graphiti-core 0.12.0rc1__py3-none-any.whl → 0.24.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. graphiti_core/cross_encoder/bge_reranker_client.py +12 -2
  2. graphiti_core/cross_encoder/gemini_reranker_client.py +161 -0
  3. graphiti_core/cross_encoder/openai_reranker_client.py +7 -5
  4. graphiti_core/decorators.py +110 -0
  5. graphiti_core/driver/__init__.py +19 -0
  6. graphiti_core/driver/driver.py +124 -0
  7. graphiti_core/driver/falkordb_driver.py +362 -0
  8. graphiti_core/driver/graph_operations/graph_operations.py +191 -0
  9. graphiti_core/driver/kuzu_driver.py +182 -0
  10. graphiti_core/driver/neo4j_driver.py +117 -0
  11. graphiti_core/driver/neptune_driver.py +305 -0
  12. graphiti_core/driver/search_interface/search_interface.py +89 -0
  13. graphiti_core/edges.py +287 -172
  14. graphiti_core/embedder/azure_openai.py +71 -0
  15. graphiti_core/embedder/client.py +2 -1
  16. graphiti_core/embedder/gemini.py +116 -22
  17. graphiti_core/embedder/voyage.py +13 -2
  18. graphiti_core/errors.py +8 -0
  19. graphiti_core/graph_queries.py +162 -0
  20. graphiti_core/graphiti.py +705 -193
  21. graphiti_core/graphiti_types.py +4 -2
  22. graphiti_core/helpers.py +87 -10
  23. graphiti_core/llm_client/__init__.py +16 -0
  24. graphiti_core/llm_client/anthropic_client.py +159 -56
  25. graphiti_core/llm_client/azure_openai_client.py +115 -0
  26. graphiti_core/llm_client/client.py +98 -21
  27. graphiti_core/llm_client/config.py +1 -1
  28. graphiti_core/llm_client/gemini_client.py +290 -41
  29. graphiti_core/llm_client/groq_client.py +14 -3
  30. graphiti_core/llm_client/openai_base_client.py +261 -0
  31. graphiti_core/llm_client/openai_client.py +56 -132
  32. graphiti_core/llm_client/openai_generic_client.py +91 -56
  33. graphiti_core/models/edges/edge_db_queries.py +259 -35
  34. graphiti_core/models/nodes/node_db_queries.py +311 -32
  35. graphiti_core/nodes.py +420 -205
  36. graphiti_core/prompts/dedupe_edges.py +46 -32
  37. graphiti_core/prompts/dedupe_nodes.py +67 -42
  38. graphiti_core/prompts/eval.py +4 -4
  39. graphiti_core/prompts/extract_edges.py +27 -16
  40. graphiti_core/prompts/extract_nodes.py +74 -31
  41. graphiti_core/prompts/prompt_helpers.py +39 -0
  42. graphiti_core/prompts/snippets.py +29 -0
  43. graphiti_core/prompts/summarize_nodes.py +23 -25
  44. graphiti_core/search/search.py +158 -82
  45. graphiti_core/search/search_config.py +39 -4
  46. graphiti_core/search/search_filters.py +126 -35
  47. graphiti_core/search/search_helpers.py +5 -6
  48. graphiti_core/search/search_utils.py +1405 -485
  49. graphiti_core/telemetry/__init__.py +9 -0
  50. graphiti_core/telemetry/telemetry.py +117 -0
  51. graphiti_core/tracer.py +193 -0
  52. graphiti_core/utils/bulk_utils.py +364 -285
  53. graphiti_core/utils/datetime_utils.py +13 -0
  54. graphiti_core/utils/maintenance/community_operations.py +67 -49
  55. graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
  56. graphiti_core/utils/maintenance/edge_operations.py +339 -197
  57. graphiti_core/utils/maintenance/graph_data_operations.py +50 -114
  58. graphiti_core/utils/maintenance/node_operations.py +319 -238
  59. graphiti_core/utils/maintenance/temporal_operations.py +11 -3
  60. graphiti_core/utils/ontology_utils/entity_types_utils.py +1 -1
  61. graphiti_core/utils/text_utils.py +53 -0
  62. graphiti_core-0.24.3.dist-info/METADATA +726 -0
  63. graphiti_core-0.24.3.dist-info/RECORD +86 -0
  64. {graphiti_core-0.12.0rc1.dist-info → graphiti_core-0.24.3.dist-info}/WHEEL +1 -1
  65. graphiti_core-0.12.0rc1.dist-info/METADATA +0 -350
  66. graphiti_core-0.12.0rc1.dist-info/RECORD +0 -66
  67. /graphiti_core/{utils/maintenance/utils.py → migrations/__init__.py} +0 -0
  68. {graphiti_core-0.12.0rc1.dist-info → graphiti_core-0.24.3.dist-info/licenses}/LICENSE +0 -0
@@ -14,22 +14,22 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- import json
18
17
  from typing import Any, Protocol, TypedDict
19
18
 
20
19
  from pydantic import BaseModel, Field
21
20
 
22
21
  from .models import Message, PromptFunction, PromptVersion
22
+ from .prompt_helpers import to_prompt_json
23
23
 
24
24
 
25
25
  class EdgeDuplicate(BaseModel):
26
- duplicate_fact_id: int = Field(
26
+ duplicate_facts: list[int] = Field(
27
27
  ...,
28
- description='id of the duplicate fact. If no duplicate facts are found, default to -1.',
28
+ description='List of idx values of any duplicate facts. If no duplicate facts are found, default to empty list.',
29
29
  )
30
30
  contradicted_facts: list[int] = Field(
31
31
  ...,
32
- description='List of ids of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
32
+ description='List of idx values of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
33
33
  )
34
34
  fact_type: str = Field(..., description='One of the provided fact types or DEFAULT')
35
35
 
@@ -67,16 +67,17 @@ def edge(context: dict[str, Any]) -> list[Message]:
67
67
  Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
68
68
 
69
69
  <EXISTING EDGES>
70
- {json.dumps(context['related_edges'], indent=2)}
70
+ {to_prompt_json(context['related_edges'])}
71
71
  </EXISTING EDGES>
72
72
 
73
73
  <NEW EDGE>
74
- {json.dumps(context['extracted_edges'], indent=2)}
74
+ {to_prompt_json(context['extracted_edges'])}
75
75
  </NEW EDGE>
76
-
76
+
77
77
  Task:
78
- If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact.
79
- If the NEW EDGE is not a duplicate of any of the EXISTING EDGES, return -1.
78
+ If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact
79
+ as part of the list of duplicate_facts.
80
+ If the NEW EDGE is not a duplicate of any of the EXISTING EDGES, return an empty list.
80
81
 
81
82
  Guidelines:
82
83
  1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
@@ -97,7 +98,7 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
97
98
  Given the following context, find all of the duplicates in a list of facts:
98
99
 
99
100
  Facts:
100
- {json.dumps(context['edges'], indent=2)}
101
+ {to_prompt_json(context['edges'])}
101
102
 
102
103
  Task:
103
104
  If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
@@ -123,35 +124,48 @@ def resolve_edge(context: dict[str, Any]) -> list[Message]:
123
124
  Message(
124
125
  role='user',
125
126
  content=f"""
126
- <NEW FACT>
127
- {context['new_edge']}
128
- </NEW FACT>
129
-
127
+ Task:
128
+ You will receive TWO separate lists of facts. Each list uses 'idx' as its index field, starting from 0.
129
+
130
+ 1. DUPLICATE DETECTION:
131
+ - If the NEW FACT represents identical factual information as any fact in EXISTING FACTS, return those idx values in duplicate_facts.
132
+ - Facts with similar information that contain key differences should NOT be marked as duplicates.
133
+ - Return idx values from EXISTING FACTS.
134
+ - If no duplicates, return an empty list for duplicate_facts.
135
+
136
+ 2. FACT TYPE CLASSIFICATION:
137
+ - Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
138
+ - Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
139
+
140
+ 3. CONTRADICTION DETECTION:
141
+ - Based on FACT INVALIDATION CANDIDATES and NEW FACT, determine which facts the new fact contradicts.
142
+ - Return idx values from FACT INVALIDATION CANDIDATES.
143
+ - If no contradictions, return an empty list for contradicted_facts.
144
+
145
+ IMPORTANT:
146
+ - duplicate_facts: Use ONLY 'idx' values from EXISTING FACTS
147
+ - contradicted_facts: Use ONLY 'idx' values from FACT INVALIDATION CANDIDATES
148
+ - These are two separate lists with independent idx ranges starting from 0
149
+
150
+ Guidelines:
151
+ 1. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
152
+ Do not mark these facts as duplicates.
153
+
154
+ <FACT TYPES>
155
+ {context['edge_types']}
156
+ </FACT TYPES>
157
+
130
158
  <EXISTING FACTS>
131
159
  {context['existing_edges']}
132
160
  </EXISTING FACTS>
161
+
133
162
  <FACT INVALIDATION CANDIDATES>
134
163
  {context['edge_invalidation_candidates']}
135
164
  </FACT INVALIDATION CANDIDATES>
136
-
137
- <FACT TYPES>
138
- {context['edge_types']}
139
- </FACT TYPES>
140
-
141
165
 
142
- Task:
143
- If the NEW FACT represents the same factual information as any fact in EXISTING FACTS, return the idx of the duplicate fact.
144
- If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return -1.
145
-
146
- Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
147
- Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
148
-
149
- Based on the provided FACT INVALIDATION CANDIDATES and NEW FACT, determine which existing facts the new fact contradicts.
150
- Return a list containing all idx's of the facts that are contradicted by the NEW FACT.
151
- If there are no contradicted facts, return an empty list.
152
-
153
- Guidelines:
154
- 1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
166
+ <NEW FACT>
167
+ {context['new_edge']}
168
+ </NEW FACT>
155
169
  """,
156
170
  ),
157
171
  ]
@@ -14,23 +14,27 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- import json
18
17
  from typing import Any, Protocol, TypedDict
19
18
 
20
19
  from pydantic import BaseModel, Field
21
20
 
22
21
  from .models import Message, PromptFunction, PromptVersion
22
+ from .prompt_helpers import to_prompt_json
23
23
 
24
24
 
25
25
  class NodeDuplicate(BaseModel):
26
26
  id: int = Field(..., description='integer id of the entity')
27
27
  duplicate_idx: int = Field(
28
28
  ...,
29
- description='idx of the duplicate node. If no duplicate nodes are found, default to -1.',
29
+ description='idx of the duplicate entity. If no duplicate entities are found, default to -1.',
30
30
  )
31
31
  name: str = Field(
32
32
  ...,
33
- description='Name of the entity. Should be the most complete and descriptive name possible.',
33
+ description='Name of the entity. Should be the most complete and descriptive name of the entity. Do not include any JSON formatting in the Entity name such as {}.',
34
+ )
35
+ duplicates: list[int] = Field(
36
+ ...,
37
+ description='idx of all entities that are a duplicate of the entity with the above id.',
34
38
  )
35
39
 
36
40
 
@@ -60,40 +64,51 @@ def node(context: dict[str, Any]) -> list[Message]:
60
64
  role='user',
61
65
  content=f"""
62
66
  <PREVIOUS MESSAGES>
63
- {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
67
+ {to_prompt_json([ep for ep in context['previous_episodes']])}
64
68
  </PREVIOUS MESSAGES>
65
69
  <CURRENT MESSAGE>
66
70
  {context['episode_content']}
67
71
  </CURRENT MESSAGE>
68
72
  <NEW ENTITY>
69
- {json.dumps(context['extracted_node'], indent=2)}
73
+ {to_prompt_json(context['extracted_node'])}
70
74
  </NEW ENTITY>
71
75
  <ENTITY TYPE DESCRIPTION>
72
- {json.dumps(context['entity_type_description'], indent=2)}
76
+ {to_prompt_json(context['entity_type_description'])}
73
77
  </ENTITY TYPE DESCRIPTION>
74
78
 
75
79
  <EXISTING ENTITIES>
76
- {json.dumps(context['existing_nodes'], indent=2)}
80
+ {to_prompt_json(context['existing_nodes'])}
77
81
  </EXISTING ENTITIES>
78
82
 
79
83
  Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
80
84
  is a duplicate entity of one of the EXISTING ENTITIES.
81
85
 
82
86
  Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
87
+ Semantic Equivalence: if a descriptive label in existing_entities clearly refers to a named entity in context, treat them as duplicates.
83
88
 
84
89
  Do NOT mark entities as duplicates if:
85
90
  - They are related but distinct.
86
91
  - They have similar names or purposes but refer to separate instances or concepts.
87
92
 
88
- Task:
89
- If the NEW ENTITY represents a duplicate entity of any entity in EXISTING ENTITIES, set duplicate_entity_id to the
90
- id of the EXISTING ENTITY that is the duplicate.
91
-
92
- If the NEW ENTITY is not a duplicate of any of the EXISTING ENTITIES,
93
- duplicate_entity_id should be set to -1.
94
-
95
- Also return the name that best describes the NEW ENTITY (whether it is the name of the NEW ENTITY, a node it
96
- is a duplicate of, or a combination of the two).
93
+ TASK:
94
+ 1. Compare `new_entity` against each item in `existing_entities`.
95
+ 2. If it refers to the same real-world object or concept, collect its index.
96
+ 3. Let `duplicate_idx` = the smallest collected index, or -1 if none.
97
+ 4. Let `duplicates` = the sorted list of all collected indices (empty list if none).
98
+
99
+ Respond with a JSON object containing an "entity_resolutions" array with a single entry:
100
+ {{
101
+ "entity_resolutions": [
102
+ {{
103
+ "id": integer id from NEW ENTITY,
104
+ "name": the best full name for the entity,
105
+ "duplicate_idx": integer index of the best duplicate in EXISTING ENTITIES, or -1 if none,
106
+ "duplicates": sorted list of all duplicate indices you collected (deduplicate the list, use [] when none)
107
+ }}
108
+ ]
109
+ }}
110
+
111
+ Only reference indices that appear in EXISTING ENTITIES, and return [] / -1 when unsure.
97
112
  """,
98
113
  ),
99
114
  ]
@@ -104,41 +119,45 @@ def nodes(context: dict[str, Any]) -> list[Message]:
104
119
  Message(
105
120
  role='system',
106
121
  content='You are a helpful assistant that determines whether or not ENTITIES extracted from a conversation are duplicates'
107
- 'of existing entities.',
122
+ ' of existing entities.',
108
123
  ),
109
124
  Message(
110
125
  role='user',
111
126
  content=f"""
112
127
  <PREVIOUS MESSAGES>
113
- {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
128
+ {to_prompt_json([ep for ep in context['previous_episodes']])}
114
129
  </PREVIOUS MESSAGES>
115
130
  <CURRENT MESSAGE>
116
131
  {context['episode_content']}
117
132
  </CURRENT MESSAGE>
118
-
119
-
133
+
134
+
120
135
  Each of the following ENTITIES were extracted from the CURRENT MESSAGE.
121
136
  Each entity in ENTITIES is represented as a JSON object with the following structure:
122
137
  {{
123
138
  id: integer id of the entity,
124
139
  name: "name of the entity",
125
- entity_type: "ontological classification of the entity",
126
- entity_type_description: "Description of what the entity type represents",
127
- duplication_candidates: [
128
- {{
129
- idx: integer index of the candidate entity,
130
- name: "name of the candidate entity",
131
- entity_type: "ontological classification of the candidate entity",
132
- ...<additional attributes>
133
- }}
134
- ]
140
+ entity_type: ["Entity", "<optional additional label>", ...],
141
+ entity_type_description: "Description of what the entity type represents"
135
142
  }}
136
-
143
+
137
144
  <ENTITIES>
138
- {json.dumps(context['extracted_nodes'], indent=2)}
145
+ {to_prompt_json(context['extracted_nodes'])}
139
146
  </ENTITIES>
140
147
 
141
- For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
148
+ <EXISTING ENTITIES>
149
+ {to_prompt_json(context['existing_nodes'])}
150
+ </EXISTING ENTITIES>
151
+
152
+ Each entry in EXISTING ENTITIES is an object with the following structure:
153
+ {{
154
+ idx: integer index of the candidate entity (use this when referencing a duplicate),
155
+ name: "name of the candidate entity",
156
+ entity_types: ["Entity", "<optional additional label>", ...],
157
+ ...<additional attributes such as summaries or metadata>
158
+ }}
159
+
160
+ For each of the above ENTITIES, determine if the entity is a duplicate of any of the EXISTING ENTITIES.
142
161
 
143
162
  Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
144
163
 
@@ -147,14 +166,20 @@ def nodes(context: dict[str, Any]) -> list[Message]:
147
166
  - They have similar names or purposes but refer to separate instances or concepts.
148
167
 
149
168
  Task:
150
- Your response will be a list called entity_resolutions which contains one entry for each entity.
151
-
152
- For each entity, return the id of the entity as id, the name of the entity as name, and the duplicate_idx
153
- as an integer.
154
-
155
- - If an entity is a duplicate of one of its duplication_candidates, return the idx of the candidate it is a
156
- duplicate of.
157
- - If an entity is not a duplicate of one of its duplication candidates, return the -1 as the duplication_idx
169
+ ENTITIES contains {len(context['extracted_nodes'])} entities with IDs 0 through {len(context['extracted_nodes']) - 1}.
170
+ Your response MUST include EXACTLY {len(context['extracted_nodes'])} resolutions with IDs 0 through {len(context['extracted_nodes']) - 1}. Do not skip or add IDs.
171
+
172
+ For every entity, return an object with the following keys:
173
+ {{
174
+ "id": integer id from ENTITIES,
175
+ "name": the best full name for the entity (preserve the original name unless a duplicate has a more complete name),
176
+ "duplicate_idx": the idx of the EXISTING ENTITY that is the best duplicate match, or -1 if there is no duplicate,
177
+ "duplicates": a sorted list of all idx values from EXISTING ENTITIES that refer to duplicates (deduplicate the list, use [] when none or unsure)
178
+ }}
179
+
180
+ - Only use idx values that appear in EXISTING ENTITIES.
181
+ - Set duplicate_idx to the smallest idx you collected for that entity, or -1 if duplicates is empty.
182
+ - Never fabricate entities or indices.
158
183
  """,
159
184
  ),
160
185
  ]
@@ -172,7 +197,7 @@ def node_list(context: dict[str, Any]) -> list[Message]:
172
197
  Given the following context, deduplicate a list of nodes:
173
198
 
174
199
  Nodes:
175
- {json.dumps(context['nodes'], indent=2)}
200
+ {to_prompt_json(context['nodes'])}
176
201
 
177
202
  Task:
178
203
  1. Group nodes together such that all duplicate nodes are in the same list of uuids
@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- import json
18
17
  from typing import Any, Protocol, TypedDict
19
18
 
20
19
  from pydantic import BaseModel, Field
21
20
 
22
21
  from .models import Message, PromptFunction, PromptVersion
22
+ from .prompt_helpers import to_prompt_json
23
23
 
24
24
 
25
25
  class QueryExpansion(BaseModel):
@@ -68,7 +68,7 @@ def query_expansion(context: dict[str, Any]) -> list[Message]:
68
68
  Bob is asking Alice a question, are you able to rephrase the question into a simpler one about Alice in the third person
69
69
  that maintains the relevant context?
70
70
  <QUESTION>
71
- {json.dumps(context['query'])}
71
+ {to_prompt_json(context['query'])}
72
72
  </QUESTION>
73
73
  """
74
74
  return [
@@ -84,10 +84,10 @@ def qa_prompt(context: dict[str, Any]) -> list[Message]:
84
84
  Your task is to briefly answer the question in the way that you think Alice would answer the question.
85
85
  You are given the following entity summaries and facts to help you determine the answer to your question.
86
86
  <ENTITY_SUMMARIES>
87
- {json.dumps(context['entity_summaries'])}
87
+ {to_prompt_json(context['entity_summaries'])}
88
88
  </ENTITY_SUMMARIES>
89
89
  <FACTS>
90
- {json.dumps(context['facts'])}
90
+ {to_prompt_json(context['facts'])}
91
91
  </FACTS>
92
92
  <QUESTION>
93
93
  {context['query']}
@@ -14,19 +14,26 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- import json
18
17
  from typing import Any, Protocol, TypedDict
19
18
 
20
19
  from pydantic import BaseModel, Field
21
20
 
22
21
  from .models import Message, PromptFunction, PromptVersion
22
+ from .prompt_helpers import to_prompt_json
23
23
 
24
24
 
25
25
  class Edge(BaseModel):
26
26
  relation_type: str = Field(..., description='FACT_PREDICATE_IN_SCREAMING_SNAKE_CASE')
27
- source_entity_name: str = Field(..., description='The name of the source entity of the fact.')
28
- target_entity_name: str = Field(..., description='The name of the target entity of the fact.')
29
- fact: str = Field(..., description='')
27
+ source_entity_id: int = Field(
28
+ ..., description='The id of the source entity from the ENTITIES list'
29
+ )
30
+ target_entity_id: int = Field(
31
+ ..., description='The id of the target entity from the ENTITIES list'
32
+ )
33
+ fact: str = Field(
34
+ ...,
35
+ description='A natural language description of the relationship between the entities, paraphrased from the source text',
36
+ )
30
37
  valid_at: str | None = Field(
31
38
  None,
32
39
  description='The date and time when the relationship described by the edge fact became true or was established. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
@@ -68,8 +75,12 @@ def edge(context: dict[str, Any]) -> list[Message]:
68
75
  Message(
69
76
  role='user',
70
77
  content=f"""
78
+ <FACT TYPES>
79
+ {context['edge_types']}
80
+ </FACT TYPES>
81
+
71
82
  <PREVIOUS_MESSAGES>
72
- {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
83
+ {to_prompt_json([ep for ep in context['previous_episodes']])}
73
84
  </PREVIOUS_MESSAGES>
74
85
 
75
86
  <CURRENT_MESSAGE>
@@ -77,25 +88,24 @@ def edge(context: dict[str, Any]) -> list[Message]:
77
88
  </CURRENT_MESSAGE>
78
89
 
79
90
  <ENTITIES>
80
- {context['nodes']} # Each has: id, label (e.g., Person, Org), name, aliases
91
+ {to_prompt_json(context['nodes'])}
81
92
  </ENTITIES>
82
93
 
83
94
  <REFERENCE_TIME>
84
95
  {context['reference_time']} # ISO 8601 (UTC); used to resolve relative time mentions
85
96
  </REFERENCE_TIME>
86
97
 
87
- <FACT TYPES>
88
- {context['edge_types']}
89
- </FACT TYPES>
90
-
91
98
  # TASK
92
99
  Extract all factual relationships between the given ENTITIES based on the CURRENT MESSAGE.
93
100
  Only extract facts that:
94
101
  - involve two DISTINCT ENTITIES from the ENTITIES list,
95
102
  - are clearly stated or unambiguously implied in the CURRENT MESSAGE,
96
103
  and can be represented as edges in a knowledge graph.
97
- - The FACT TYPES provide a list of the most important types of facts, make sure to extract any facts that
98
- could be classified into one of the provided fact types
104
+ - Facts should include entity names rather than pronouns whenever possible.
105
+ - The FACT TYPES provide a list of the most important types of facts, make sure to extract facts of these types
106
+ - The FACT TYPES are not an exhaustive list, extract all facts from the message even if they do not fit into one
107
+ of the FACT TYPES
108
+ - The FACT TYPES each contain their fact_type_signature which represents the source and target entity types.
99
109
 
100
110
  You may use information from the PREVIOUS MESSAGES only to disambiguate references or support continuity.
101
111
 
@@ -104,11 +114,12 @@ You may use information from the PREVIOUS MESSAGES only to disambiguate referenc
104
114
 
105
115
  # EXTRACTION RULES
106
116
 
107
- 1. Only emit facts where both the subject and object match IDs in ENTITIES.
117
+ 1. **Entity ID Validation**: `source_entity_id` and `target_entity_id` must use only the `id` values from the ENTITIES list provided above.
118
+ - **CRITICAL**: Using IDs not in the list will cause the edge to be rejected
108
119
  2. Each fact must involve two **distinct** entities.
109
120
  3. Use a SCREAMING_SNAKE_CASE string as the `relation_type` (e.g., FOUNDED, WORKS_AT).
110
121
  4. Do not emit duplicate or semantically redundant facts.
111
- 5. The `fact_text` should quote or closely paraphrase the original source sentence(s).
122
+ 5. The `fact` should closely paraphrase the original source sentence(s). Do not verbatim quote the original text.
112
123
  6. Use `REFERENCE_TIME` to resolve vague or relative temporal expressions (e.g., "last week").
113
124
  7. Do **not** hallucinate or infer temporal bounds from unrelated events.
114
125
 
@@ -130,7 +141,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
130
141
 
131
142
  user_prompt = f"""
132
143
  <PREVIOUS MESSAGES>
133
- {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
144
+ {to_prompt_json([ep for ep in context['previous_episodes']])}
134
145
  </PREVIOUS MESSAGES>
135
146
  <CURRENT MESSAGE>
136
147
  {context['episode_content']}
@@ -164,7 +175,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
164
175
  content=f"""
165
176
 
166
177
  <MESSAGE>
167
- {json.dumps(context['episode_content'], indent=2)}
178
+ {to_prompt_json(context['episode_content'])}
168
179
  </MESSAGE>
169
180
  <REFERENCE TIME>
170
181
  {context['reference_time']}