graphiti-core 0.20.4__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/driver/driver.py +28 -0
- graphiti_core/driver/falkordb_driver.py +112 -0
- graphiti_core/driver/kuzu_driver.py +1 -0
- graphiti_core/driver/neo4j_driver.py +10 -2
- graphiti_core/driver/neptune_driver.py +4 -6
- graphiti_core/edges.py +67 -7
- graphiti_core/embedder/client.py +2 -1
- graphiti_core/graph_queries.py +35 -6
- graphiti_core/graphiti.py +27 -23
- graphiti_core/graphiti_types.py +0 -1
- graphiti_core/helpers.py +2 -2
- graphiti_core/llm_client/client.py +19 -4
- graphiti_core/llm_client/gemini_client.py +4 -2
- graphiti_core/llm_client/openai_base_client.py +3 -2
- graphiti_core/llm_client/openai_generic_client.py +3 -2
- graphiti_core/models/edges/edge_db_queries.py +36 -16
- graphiti_core/models/nodes/node_db_queries.py +30 -10
- graphiti_core/nodes.py +126 -25
- graphiti_core/prompts/dedupe_edges.py +40 -29
- graphiti_core/prompts/dedupe_nodes.py +51 -34
- graphiti_core/prompts/eval.py +3 -3
- graphiti_core/prompts/extract_edges.py +17 -9
- graphiti_core/prompts/extract_nodes.py +10 -9
- graphiti_core/prompts/prompt_helpers.py +3 -3
- graphiti_core/prompts/summarize_nodes.py +5 -5
- graphiti_core/search/search_filters.py +53 -0
- graphiti_core/search/search_helpers.py +5 -7
- graphiti_core/search/search_utils.py +227 -57
- graphiti_core/utils/bulk_utils.py +168 -69
- graphiti_core/utils/maintenance/community_operations.py +8 -20
- graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
- graphiti_core/utils/maintenance/edge_operations.py +187 -50
- graphiti_core/utils/maintenance/graph_data_operations.py +9 -5
- graphiti_core/utils/maintenance/node_operations.py +244 -88
- graphiti_core/utils/maintenance/temporal_operations.py +0 -4
- {graphiti_core-0.20.4.dist-info → graphiti_core-0.21.0.dist-info}/METADATA +7 -1
- {graphiti_core-0.20.4.dist-info → graphiti_core-0.21.0.dist-info}/RECORD +39 -38
- {graphiti_core-0.20.4.dist-info → graphiti_core-0.21.0.dist-info}/WHEEL +0 -0
- {graphiti_core-0.20.4.dist-info → graphiti_core-0.21.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -64,20 +64,20 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|
|
64
64
|
role='user',
|
|
65
65
|
content=f"""
|
|
66
66
|
<PREVIOUS MESSAGES>
|
|
67
|
-
{to_prompt_json([ep for ep in context['previous_episodes']],
|
|
67
|
+
{to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
|
|
68
68
|
</PREVIOUS MESSAGES>
|
|
69
69
|
<CURRENT MESSAGE>
|
|
70
70
|
{context['episode_content']}
|
|
71
71
|
</CURRENT MESSAGE>
|
|
72
72
|
<NEW ENTITY>
|
|
73
|
-
{to_prompt_json(context['extracted_node'],
|
|
73
|
+
{to_prompt_json(context['extracted_node'], indent=2)}
|
|
74
74
|
</NEW ENTITY>
|
|
75
75
|
<ENTITY TYPE DESCRIPTION>
|
|
76
|
-
{to_prompt_json(context['entity_type_description'],
|
|
76
|
+
{to_prompt_json(context['entity_type_description'], indent=2)}
|
|
77
77
|
</ENTITY TYPE DESCRIPTION>
|
|
78
78
|
|
|
79
79
|
<EXISTING ENTITIES>
|
|
80
|
-
{to_prompt_json(context['existing_nodes'],
|
|
80
|
+
{to_prompt_json(context['existing_nodes'], indent=2)}
|
|
81
81
|
</EXISTING ENTITIES>
|
|
82
82
|
|
|
83
83
|
Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
|
|
@@ -92,12 +92,23 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|
|
92
92
|
|
|
93
93
|
TASK:
|
|
94
94
|
1. Compare `new_entity` against each item in `existing_entities`.
|
|
95
|
-
2. If it refers to the same real
|
|
96
|
-
3. Let `duplicate_idx` = the
|
|
97
|
-
4. Let `duplicates` = the list of
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
2. If it refers to the same real-world object or concept, collect its index.
|
|
96
|
+
3. Let `duplicate_idx` = the smallest collected index, or -1 if none.
|
|
97
|
+
4. Let `duplicates` = the sorted list of all collected indices (empty list if none).
|
|
98
|
+
|
|
99
|
+
Respond with a JSON object containing an "entity_resolutions" array with a single entry:
|
|
100
|
+
{{
|
|
101
|
+
"entity_resolutions": [
|
|
102
|
+
{{
|
|
103
|
+
"id": integer id from NEW ENTITY,
|
|
104
|
+
"name": the best full name for the entity,
|
|
105
|
+
"duplicate_idx": integer index of the best duplicate in EXISTING ENTITIES, or -1 if none,
|
|
106
|
+
"duplicates": sorted list of all duplicate indices you collected (deduplicate the list, use [] when none)
|
|
107
|
+
}}
|
|
108
|
+
]
|
|
109
|
+
}}
|
|
110
|
+
|
|
111
|
+
Only reference indices that appear in EXISTING ENTITIES, and return [] / -1 when unsure.
|
|
101
112
|
""",
|
|
102
113
|
),
|
|
103
114
|
]
|
|
@@ -114,7 +125,7 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
114
125
|
role='user',
|
|
115
126
|
content=f"""
|
|
116
127
|
<PREVIOUS MESSAGES>
|
|
117
|
-
{to_prompt_json([ep for ep in context['previous_episodes']],
|
|
128
|
+
{to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
|
|
118
129
|
</PREVIOUS MESSAGES>
|
|
119
130
|
<CURRENT MESSAGE>
|
|
120
131
|
{context['episode_content']}
|
|
@@ -126,26 +137,26 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
126
137
|
{{
|
|
127
138
|
id: integer id of the entity,
|
|
128
139
|
name: "name of the entity",
|
|
129
|
-
entity_type: "
|
|
130
|
-
entity_type_description: "Description of what the entity type represents"
|
|
131
|
-
duplication_candidates: [
|
|
132
|
-
{{
|
|
133
|
-
idx: integer index of the candidate entity,
|
|
134
|
-
name: "name of the candidate entity",
|
|
135
|
-
entity_type: "ontological classification of the candidate entity",
|
|
136
|
-
...<additional attributes>
|
|
137
|
-
}}
|
|
138
|
-
]
|
|
140
|
+
entity_type: ["Entity", "<optional additional label>", ...],
|
|
141
|
+
entity_type_description: "Description of what the entity type represents"
|
|
139
142
|
}}
|
|
140
|
-
|
|
143
|
+
|
|
141
144
|
<ENTITIES>
|
|
142
|
-
{to_prompt_json(context['extracted_nodes'],
|
|
145
|
+
{to_prompt_json(context['extracted_nodes'], indent=2)}
|
|
143
146
|
</ENTITIES>
|
|
144
|
-
|
|
147
|
+
|
|
145
148
|
<EXISTING ENTITIES>
|
|
146
|
-
{to_prompt_json(context['existing_nodes'],
|
|
149
|
+
{to_prompt_json(context['existing_nodes'], indent=2)}
|
|
147
150
|
</EXISTING ENTITIES>
|
|
148
151
|
|
|
152
|
+
Each entry in EXISTING ENTITIES is an object with the following structure:
|
|
153
|
+
{{
|
|
154
|
+
idx: integer index of the candidate entity (use this when referencing a duplicate),
|
|
155
|
+
name: "name of the candidate entity",
|
|
156
|
+
entity_types: ["Entity", "<optional additional label>", ...],
|
|
157
|
+
...<additional attributes such as summaries or metadata>
|
|
158
|
+
}}
|
|
159
|
+
|
|
149
160
|
For each of the above ENTITIES, determine if the entity is a duplicate of any of the EXISTING ENTITIES.
|
|
150
161
|
|
|
151
162
|
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
|
@@ -155,14 +166,20 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
155
166
|
- They have similar names or purposes but refer to separate instances or concepts.
|
|
156
167
|
|
|
157
168
|
Task:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
169
|
+
ENTITIES contains {len(context['extracted_nodes'])} entities with IDs 0 through {len(context['extracted_nodes']) - 1}.
|
|
170
|
+
Your response MUST include EXACTLY {len(context['extracted_nodes'])} resolutions with IDs 0 through {len(context['extracted_nodes']) - 1}. Do not skip or add IDs.
|
|
171
|
+
|
|
172
|
+
For every entity, return an object with the following keys:
|
|
173
|
+
{{
|
|
174
|
+
"id": integer id from ENTITIES,
|
|
175
|
+
"name": the best full name for the entity (preserve the original name unless a duplicate has a more complete name),
|
|
176
|
+
"duplicate_idx": the idx of the EXISTING ENTITY that is the best duplicate match, or -1 if there is no duplicate,
|
|
177
|
+
"duplicates": a sorted list of all idx values from EXISTING ENTITIES that refer to duplicates (deduplicate the list, use [] when none or unsure)
|
|
178
|
+
}}
|
|
179
|
+
|
|
180
|
+
- Only use idx values that appear in EXISTING ENTITIES.
|
|
181
|
+
- Set duplicate_idx to the smallest idx you collected for that entity, or -1 if duplicates is empty.
|
|
182
|
+
- Never fabricate entities or indices.
|
|
166
183
|
""",
|
|
167
184
|
),
|
|
168
185
|
]
|
|
@@ -180,7 +197,7 @@ def node_list(context: dict[str, Any]) -> list[Message]:
|
|
|
180
197
|
Given the following context, deduplicate a list of nodes:
|
|
181
198
|
|
|
182
199
|
Nodes:
|
|
183
|
-
{to_prompt_json(context['nodes'],
|
|
200
|
+
{to_prompt_json(context['nodes'], indent=2)}
|
|
184
201
|
|
|
185
202
|
Task:
|
|
186
203
|
1. Group nodes together such that all duplicate nodes are in the same list of uuids
|
graphiti_core/prompts/eval.py
CHANGED
|
@@ -68,7 +68,7 @@ def query_expansion(context: dict[str, Any]) -> list[Message]:
|
|
|
68
68
|
Bob is asking Alice a question, are you able to rephrase the question into a simpler one about Alice in the third person
|
|
69
69
|
that maintains the relevant context?
|
|
70
70
|
<QUESTION>
|
|
71
|
-
{to_prompt_json(context['query']
|
|
71
|
+
{to_prompt_json(context['query'])}
|
|
72
72
|
</QUESTION>
|
|
73
73
|
"""
|
|
74
74
|
return [
|
|
@@ -84,10 +84,10 @@ def qa_prompt(context: dict[str, Any]) -> list[Message]:
|
|
|
84
84
|
Your task is to briefly answer the question in the way that you think Alice would answer the question.
|
|
85
85
|
You are given the following entity summaries and facts to help you determine the answer to your question.
|
|
86
86
|
<ENTITY_SUMMARIES>
|
|
87
|
-
{to_prompt_json(context['entity_summaries']
|
|
87
|
+
{to_prompt_json(context['entity_summaries'])}
|
|
88
88
|
</ENTITY_SUMMARIES>
|
|
89
89
|
<FACTS>
|
|
90
|
-
{to_prompt_json(context['facts']
|
|
90
|
+
{to_prompt_json(context['facts'])}
|
|
91
91
|
</FACTS>
|
|
92
92
|
<QUESTION>
|
|
93
93
|
{context['query']}
|
|
@@ -24,9 +24,16 @@ from .prompt_helpers import to_prompt_json
|
|
|
24
24
|
|
|
25
25
|
class Edge(BaseModel):
|
|
26
26
|
relation_type: str = Field(..., description='FACT_PREDICATE_IN_SCREAMING_SNAKE_CASE')
|
|
27
|
-
source_entity_id: int = Field(
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
source_entity_id: int = Field(
|
|
28
|
+
..., description='The id of the source entity from the ENTITIES list'
|
|
29
|
+
)
|
|
30
|
+
target_entity_id: int = Field(
|
|
31
|
+
..., description='The id of the target entity from the ENTITIES list'
|
|
32
|
+
)
|
|
33
|
+
fact: str = Field(
|
|
34
|
+
...,
|
|
35
|
+
description='A natural language description of the relationship between the entities, paraphrased from the source text',
|
|
36
|
+
)
|
|
30
37
|
valid_at: str | None = Field(
|
|
31
38
|
None,
|
|
32
39
|
description='The date and time when the relationship described by the edge fact became true or was established. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
|
|
@@ -73,7 +80,7 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
73
80
|
</FACT TYPES>
|
|
74
81
|
|
|
75
82
|
<PREVIOUS_MESSAGES>
|
|
76
|
-
{to_prompt_json([ep for ep in context['previous_episodes']],
|
|
83
|
+
{to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
|
|
77
84
|
</PREVIOUS_MESSAGES>
|
|
78
85
|
|
|
79
86
|
<CURRENT_MESSAGE>
|
|
@@ -81,7 +88,7 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
81
88
|
</CURRENT_MESSAGE>
|
|
82
89
|
|
|
83
90
|
<ENTITIES>
|
|
84
|
-
{context['nodes']}
|
|
91
|
+
{to_prompt_json(context['nodes'], indent=2)}
|
|
85
92
|
</ENTITIES>
|
|
86
93
|
|
|
87
94
|
<REFERENCE_TIME>
|
|
@@ -107,11 +114,12 @@ You may use information from the PREVIOUS MESSAGES only to disambiguate referenc
|
|
|
107
114
|
|
|
108
115
|
# EXTRACTION RULES
|
|
109
116
|
|
|
110
|
-
1.
|
|
117
|
+
1. **Entity ID Validation**: `source_entity_id` and `target_entity_id` must use only the `id` values from the ENTITIES list provided above.
|
|
118
|
+
- **CRITICAL**: Using IDs not in the list will cause the edge to be rejected
|
|
111
119
|
2. Each fact must involve two **distinct** entities.
|
|
112
120
|
3. Use a SCREAMING_SNAKE_CASE string as the `relation_type` (e.g., FOUNDED, WORKS_AT).
|
|
113
121
|
4. Do not emit duplicate or semantically redundant facts.
|
|
114
|
-
5. The `
|
|
122
|
+
5. The `fact` should closely paraphrase the original source sentence(s). Do not verbatim quote the original text.
|
|
115
123
|
6. Use `REFERENCE_TIME` to resolve vague or relative temporal expressions (e.g., "last week").
|
|
116
124
|
7. Do **not** hallucinate or infer temporal bounds from unrelated events.
|
|
117
125
|
|
|
@@ -133,7 +141,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
|
133
141
|
|
|
134
142
|
user_prompt = f"""
|
|
135
143
|
<PREVIOUS MESSAGES>
|
|
136
|
-
{to_prompt_json([ep for ep in context['previous_episodes']],
|
|
144
|
+
{to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
|
|
137
145
|
</PREVIOUS MESSAGES>
|
|
138
146
|
<CURRENT MESSAGE>
|
|
139
147
|
{context['episode_content']}
|
|
@@ -167,7 +175,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
|
167
175
|
content=f"""
|
|
168
176
|
|
|
169
177
|
<MESSAGE>
|
|
170
|
-
{to_prompt_json(context['episode_content'],
|
|
178
|
+
{to_prompt_json(context['episode_content'], indent=2)}
|
|
171
179
|
</MESSAGE>
|
|
172
180
|
<REFERENCE TIME>
|
|
173
181
|
{context['reference_time']}
|
|
@@ -89,7 +89,7 @@ def extract_message(context: dict[str, Any]) -> list[Message]:
|
|
|
89
89
|
</ENTITY TYPES>
|
|
90
90
|
|
|
91
91
|
<PREVIOUS MESSAGES>
|
|
92
|
-
{to_prompt_json([ep for ep in context['previous_episodes']],
|
|
92
|
+
{to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
|
|
93
93
|
</PREVIOUS MESSAGES>
|
|
94
94
|
|
|
95
95
|
<CURRENT MESSAGE>
|
|
@@ -151,8 +151,9 @@ For each entity extracted, also determine its entity type based on the provided
|
|
|
151
151
|
Indicate the classified entity type by providing its entity_type_id.
|
|
152
152
|
|
|
153
153
|
Guidelines:
|
|
154
|
-
1.
|
|
155
|
-
2.
|
|
154
|
+
1. Extract all entities that the JSON represents. This will often be something like a "name" or "user" field
|
|
155
|
+
2. Extract all entities mentioned in all other properties throughout the JSON structure
|
|
156
|
+
3. Do NOT extract any properties that contain dates
|
|
156
157
|
"""
|
|
157
158
|
return [
|
|
158
159
|
Message(role='system', content=sys_prompt),
|
|
@@ -196,7 +197,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
|
196
197
|
|
|
197
198
|
user_prompt = f"""
|
|
198
199
|
<PREVIOUS MESSAGES>
|
|
199
|
-
{to_prompt_json([ep for ep in context['previous_episodes']],
|
|
200
|
+
{to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
|
|
200
201
|
</PREVIOUS MESSAGES>
|
|
201
202
|
<CURRENT MESSAGE>
|
|
202
203
|
{context['episode_content']}
|
|
@@ -220,7 +221,7 @@ def classify_nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
220
221
|
|
|
221
222
|
user_prompt = f"""
|
|
222
223
|
<PREVIOUS MESSAGES>
|
|
223
|
-
{to_prompt_json([ep for ep in context['previous_episodes']],
|
|
224
|
+
{to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
|
|
224
225
|
</PREVIOUS MESSAGES>
|
|
225
226
|
<CURRENT MESSAGE>
|
|
226
227
|
{context['episode_content']}
|
|
@@ -258,8 +259,8 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
|
258
259
|
content=f"""
|
|
259
260
|
|
|
260
261
|
<MESSAGES>
|
|
261
|
-
{to_prompt_json(context['previous_episodes'],
|
|
262
|
-
{to_prompt_json(context['episode_content'],
|
|
262
|
+
{to_prompt_json(context['previous_episodes'], indent=2)}
|
|
263
|
+
{to_prompt_json(context['episode_content'], indent=2)}
|
|
263
264
|
</MESSAGES>
|
|
264
265
|
|
|
265
266
|
Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
|
@@ -288,8 +289,8 @@ def extract_summary(context: dict[str, Any]) -> list[Message]:
|
|
|
288
289
|
content=f"""
|
|
289
290
|
|
|
290
291
|
<MESSAGES>
|
|
291
|
-
{to_prompt_json(context['previous_episodes'],
|
|
292
|
-
{to_prompt_json(context['episode_content'],
|
|
292
|
+
{to_prompt_json(context['previous_episodes'], indent=2)}
|
|
293
|
+
{to_prompt_json(context['episode_content'], indent=2)}
|
|
293
294
|
</MESSAGES>
|
|
294
295
|
|
|
295
296
|
Given the above MESSAGES and the following ENTITY, update the summary that combines relevant information about the entity
|
|
@@ -4,20 +4,20 @@ from typing import Any
|
|
|
4
4
|
DO_NOT_ESCAPE_UNICODE = '\nDo not escape unicode characters.\n'
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def to_prompt_json(data: Any, ensure_ascii: bool =
|
|
7
|
+
def to_prompt_json(data: Any, ensure_ascii: bool = False, indent: int = 2) -> str:
|
|
8
8
|
"""
|
|
9
9
|
Serialize data to JSON for use in prompts.
|
|
10
10
|
|
|
11
11
|
Args:
|
|
12
12
|
data: The data to serialize
|
|
13
|
-
ensure_ascii: If True, escape non-ASCII characters. If False, preserve them.
|
|
13
|
+
ensure_ascii: If True, escape non-ASCII characters. If False (default), preserve them.
|
|
14
14
|
indent: Number of spaces for indentation
|
|
15
15
|
|
|
16
16
|
Returns:
|
|
17
17
|
JSON string representation of the data
|
|
18
18
|
|
|
19
19
|
Notes:
|
|
20
|
-
|
|
20
|
+
By default (ensure_ascii=False), non-ASCII characters (e.g., Korean, Japanese, Chinese)
|
|
21
21
|
are preserved in their original form in the prompt, making them readable
|
|
22
22
|
in LLM logs and improving model understanding.
|
|
23
23
|
"""
|
|
@@ -59,7 +59,7 @@ def summarize_pair(context: dict[str, Any]) -> list[Message]:
|
|
|
59
59
|
Summaries must be under 250 words.
|
|
60
60
|
|
|
61
61
|
Summaries:
|
|
62
|
-
{to_prompt_json(context['node_summaries'],
|
|
62
|
+
{to_prompt_json(context['node_summaries'], indent=2)}
|
|
63
63
|
""",
|
|
64
64
|
),
|
|
65
65
|
]
|
|
@@ -76,8 +76,8 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
|
|
|
76
76
|
content=f"""
|
|
77
77
|
|
|
78
78
|
<MESSAGES>
|
|
79
|
-
{to_prompt_json(context['previous_episodes'],
|
|
80
|
-
{to_prompt_json(context['episode_content'],
|
|
79
|
+
{to_prompt_json(context['previous_episodes'], indent=2)}
|
|
80
|
+
{to_prompt_json(context['episode_content'], indent=2)}
|
|
81
81
|
</MESSAGES>
|
|
82
82
|
|
|
83
83
|
Given the above MESSAGES and the following ENTITY name, create a summary for the ENTITY. Your summary must only use
|
|
@@ -100,7 +100,7 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
|
|
|
100
100
|
</ENTITY CONTEXT>
|
|
101
101
|
|
|
102
102
|
<ATTRIBUTES>
|
|
103
|
-
{to_prompt_json(context['attributes'],
|
|
103
|
+
{to_prompt_json(context['attributes'], indent=2)}
|
|
104
104
|
</ATTRIBUTES>
|
|
105
105
|
""",
|
|
106
106
|
),
|
|
@@ -120,7 +120,7 @@ def summary_description(context: dict[str, Any]) -> list[Message]:
|
|
|
120
120
|
Summaries must be under 250 words.
|
|
121
121
|
|
|
122
122
|
Summary:
|
|
123
|
-
{to_prompt_json(context['summary'],
|
|
123
|
+
{to_prompt_json(context['summary'], indent=2)}
|
|
124
124
|
""",
|
|
125
125
|
),
|
|
126
126
|
]
|
|
@@ -52,6 +52,17 @@ class SearchFilters(BaseModel):
|
|
|
52
52
|
invalid_at: list[list[DateFilter]] | None = Field(default=None)
|
|
53
53
|
created_at: list[list[DateFilter]] | None = Field(default=None)
|
|
54
54
|
expired_at: list[list[DateFilter]] | None = Field(default=None)
|
|
55
|
+
edge_uuids: list[str] | None = Field(default=None)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def cypher_to_opensearch_operator(op: ComparisonOperator) -> str:
|
|
59
|
+
mapping = {
|
|
60
|
+
ComparisonOperator.greater_than: 'gt',
|
|
61
|
+
ComparisonOperator.less_than: 'lt',
|
|
62
|
+
ComparisonOperator.greater_than_equal: 'gte',
|
|
63
|
+
ComparisonOperator.less_than_equal: 'lte',
|
|
64
|
+
}
|
|
65
|
+
return mapping.get(op, op.value)
|
|
55
66
|
|
|
56
67
|
|
|
57
68
|
def node_search_filter_query_constructor(
|
|
@@ -98,6 +109,10 @@ def edge_search_filter_query_constructor(
|
|
|
98
109
|
filter_queries.append('e.name in $edge_types')
|
|
99
110
|
filter_params['edge_types'] = edge_types
|
|
100
111
|
|
|
112
|
+
if filters.edge_uuids is not None:
|
|
113
|
+
filter_queries.append('e.uuid in $edge_uuids')
|
|
114
|
+
filter_params['edge_uuids'] = filters.edge_uuids
|
|
115
|
+
|
|
101
116
|
if filters.node_labels is not None:
|
|
102
117
|
if provider == GraphProvider.KUZU:
|
|
103
118
|
node_label_filter = (
|
|
@@ -234,3 +249,41 @@ def edge_search_filter_query_constructor(
|
|
|
234
249
|
filter_queries.append(expired_at_filter)
|
|
235
250
|
|
|
236
251
|
return filter_queries, filter_params
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def build_aoss_node_filters(group_ids: list[str], search_filters: SearchFilters) -> list[dict]:
|
|
255
|
+
filters = [{'terms': {'group_id': group_ids}}]
|
|
256
|
+
|
|
257
|
+
if search_filters.node_labels:
|
|
258
|
+
filters.append({'terms': {'node_labels': search_filters.node_labels}})
|
|
259
|
+
|
|
260
|
+
return filters
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def build_aoss_edge_filters(group_ids: list[str], search_filters: SearchFilters) -> list[dict]:
|
|
264
|
+
filters: list[dict] = [{'terms': {'group_id': group_ids}}]
|
|
265
|
+
|
|
266
|
+
if search_filters.edge_types:
|
|
267
|
+
filters.append({'terms': {'edge_types': search_filters.edge_types}})
|
|
268
|
+
|
|
269
|
+
if search_filters.edge_uuids:
|
|
270
|
+
filters.append({'terms': {'uuid': search_filters.edge_uuids}})
|
|
271
|
+
|
|
272
|
+
for field in ['valid_at', 'invalid_at', 'created_at', 'expired_at']:
|
|
273
|
+
ranges = getattr(search_filters, field)
|
|
274
|
+
if ranges:
|
|
275
|
+
# OR of ANDs
|
|
276
|
+
should_clauses = []
|
|
277
|
+
for and_group in ranges:
|
|
278
|
+
and_filters = []
|
|
279
|
+
for df in and_group: # df is a DateFilter
|
|
280
|
+
range_query = {
|
|
281
|
+
'range': {
|
|
282
|
+
field: {cypher_to_opensearch_operator(df.comparison_operator): df.date}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
and_filters.append(range_query)
|
|
286
|
+
should_clauses.append({'bool': {'filter': and_filters}})
|
|
287
|
+
filters.append({'bool': {'should': should_clauses, 'minimum_should_match': 1}})
|
|
288
|
+
|
|
289
|
+
return filters
|
|
@@ -24,9 +24,7 @@ def format_edge_date_range(edge: EntityEdge) -> str:
|
|
|
24
24
|
return f'{edge.valid_at if edge.valid_at else "date unknown"} - {(edge.invalid_at if edge.invalid_at else "present")}'
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def search_results_to_context_string(
|
|
28
|
-
search_results: SearchResults, ensure_ascii: bool = False
|
|
29
|
-
) -> str:
|
|
27
|
+
def search_results_to_context_string(search_results: SearchResults) -> str:
|
|
30
28
|
"""Reformats a set of SearchResults into a single string to pass directly to an LLM as context"""
|
|
31
29
|
fact_json = [
|
|
32
30
|
{
|
|
@@ -58,16 +56,16 @@ def search_results_to_context_string(
|
|
|
58
56
|
These are the most relevant facts and their valid and invalid dates. Facts are considered valid
|
|
59
57
|
between their valid_at and invalid_at dates. Facts with an invalid_at date of "Present" are considered valid.
|
|
60
58
|
<FACTS>
|
|
61
|
-
{to_prompt_json(fact_json,
|
|
59
|
+
{to_prompt_json(fact_json, indent=12)}
|
|
62
60
|
</FACTS>
|
|
63
61
|
<ENTITIES>
|
|
64
|
-
{to_prompt_json(entity_json,
|
|
62
|
+
{to_prompt_json(entity_json, indent=12)}
|
|
65
63
|
</ENTITIES>
|
|
66
64
|
<EPISODES>
|
|
67
|
-
{to_prompt_json(episode_json,
|
|
65
|
+
{to_prompt_json(episode_json, indent=12)}
|
|
68
66
|
</EPISODES>
|
|
69
67
|
<COMMUNITIES>
|
|
70
|
-
{to_prompt_json(community_json,
|
|
68
|
+
{to_prompt_json(community_json, indent=12)}
|
|
71
69
|
</COMMUNITIES>
|
|
72
70
|
"""
|
|
73
71
|
|