graphiti-core 0.22.0rc4__py3-none-any.whl → 0.22.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/driver/driver.py +5 -7
- graphiti_core/driver/falkordb_driver.py +54 -3
- graphiti_core/driver/graph_operations/graph_operations.py +195 -0
- graphiti_core/driver/neo4j_driver.py +9 -0
- graphiti_core/driver/search_interface/__init__.py +0 -0
- graphiti_core/driver/search_interface/search_interface.py +89 -0
- graphiti_core/edges.py +11 -34
- graphiti_core/llm_client/anthropic_client.py +3 -1
- graphiti_core/llm_client/openai_base_client.py +5 -1
- graphiti_core/llm_client/openai_generic_client.py +5 -1
- graphiti_core/models/edges/edge_db_queries.py +1 -0
- graphiti_core/models/nodes/node_db_queries.py +1 -0
- graphiti_core/nodes.py +26 -99
- graphiti_core/prompts/dedupe_edges.py +4 -4
- graphiti_core/prompts/dedupe_nodes.py +10 -10
- graphiti_core/prompts/extract_edges.py +4 -4
- graphiti_core/prompts/extract_nodes.py +13 -13
- graphiti_core/prompts/prompt_helpers.py +2 -2
- graphiti_core/prompts/summarize_nodes.py +12 -12
- graphiti_core/search/search_filters.py +0 -38
- graphiti_core/search/search_helpers.py +4 -4
- graphiti_core/search/search_utils.py +84 -220
- graphiti_core/utils/bulk_utils.py +14 -28
- graphiti_core/utils/maintenance/edge_operations.py +20 -15
- graphiti_core/utils/maintenance/graph_data_operations.py +6 -25
- {graphiti_core-0.22.0rc4.dist-info → graphiti_core-0.22.1rc1.dist-info}/METADATA +36 -3
- {graphiti_core-0.22.0rc4.dist-info → graphiti_core-0.22.1rc1.dist-info}/RECORD +30 -27
- /graphiti_core/{utils/maintenance/utils.py → driver/graph_operations/__init__.py} +0 -0
- {graphiti_core-0.22.0rc4.dist-info → graphiti_core-0.22.1rc1.dist-info}/WHEEL +0 -0
- {graphiti_core-0.22.0rc4.dist-info → graphiti_core-0.22.1rc1.dist-info}/licenses/LICENSE +0 -0
graphiti_core/nodes.py
CHANGED
|
@@ -27,10 +27,6 @@ from pydantic import BaseModel, Field
|
|
|
27
27
|
from typing_extensions import LiteralString
|
|
28
28
|
|
|
29
29
|
from graphiti_core.driver.driver import (
|
|
30
|
-
COMMUNITY_INDEX_NAME,
|
|
31
|
-
ENTITY_EDGE_INDEX_NAME,
|
|
32
|
-
ENTITY_INDEX_NAME,
|
|
33
|
-
EPISODE_INDEX_NAME,
|
|
34
30
|
GraphDriver,
|
|
35
31
|
GraphProvider,
|
|
36
32
|
)
|
|
@@ -99,6 +95,9 @@ class Node(BaseModel, ABC):
|
|
|
99
95
|
async def save(self, driver: GraphDriver): ...
|
|
100
96
|
|
|
101
97
|
async def delete(self, driver: GraphDriver):
|
|
98
|
+
if driver.graph_operations_interface:
|
|
99
|
+
return await driver.graph_operations_interface.node_delete(self, driver)
|
|
100
|
+
|
|
102
101
|
match driver.provider:
|
|
103
102
|
case GraphProvider.NEO4J:
|
|
104
103
|
records, _, _ = await driver.execute_query(
|
|
@@ -113,27 +112,6 @@ class Node(BaseModel, ABC):
|
|
|
113
112
|
uuid=self.uuid,
|
|
114
113
|
)
|
|
115
114
|
|
|
116
|
-
edge_uuids: list[str] = records[0].get('edge_uuids', []) if records else []
|
|
117
|
-
|
|
118
|
-
if driver.aoss_client:
|
|
119
|
-
# Delete the node from OpenSearch indices
|
|
120
|
-
for index in (EPISODE_INDEX_NAME, ENTITY_INDEX_NAME, COMMUNITY_INDEX_NAME):
|
|
121
|
-
await driver.aoss_client.delete(
|
|
122
|
-
index=index,
|
|
123
|
-
id=self.uuid,
|
|
124
|
-
params={'routing': self.group_id},
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
# Bulk delete the detached edges
|
|
128
|
-
if edge_uuids:
|
|
129
|
-
actions = []
|
|
130
|
-
for eid in edge_uuids:
|
|
131
|
-
actions.append(
|
|
132
|
-
{'delete': {'_index': ENTITY_EDGE_INDEX_NAME, '_id': eid}}
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
await driver.aoss_client.bulk(body=actions)
|
|
136
|
-
|
|
137
115
|
case GraphProvider.KUZU:
|
|
138
116
|
for label in ['Episodic', 'Community']:
|
|
139
117
|
await driver.execute_query(
|
|
@@ -181,14 +159,18 @@ class Node(BaseModel, ABC):
|
|
|
181
159
|
|
|
182
160
|
@classmethod
|
|
183
161
|
async def delete_by_group_id(cls, driver: GraphDriver, group_id: str, batch_size: int = 100):
|
|
162
|
+
if driver.graph_operations_interface:
|
|
163
|
+
return await driver.graph_operations_interface.node_delete_by_group_id(
|
|
164
|
+
cls, driver, group_id, batch_size
|
|
165
|
+
)
|
|
166
|
+
|
|
184
167
|
match driver.provider:
|
|
185
168
|
case GraphProvider.NEO4J:
|
|
186
169
|
async with driver.session() as session:
|
|
187
170
|
await session.run(
|
|
188
171
|
"""
|
|
189
172
|
MATCH (n:Entity|Episodic|Community {group_id: $group_id})
|
|
190
|
-
CALL {
|
|
191
|
-
WITH n
|
|
173
|
+
CALL (n) {
|
|
192
174
|
DETACH DELETE n
|
|
193
175
|
} IN TRANSACTIONS OF $batch_size ROWS
|
|
194
176
|
""",
|
|
@@ -196,31 +178,6 @@ class Node(BaseModel, ABC):
|
|
|
196
178
|
batch_size=batch_size,
|
|
197
179
|
)
|
|
198
180
|
|
|
199
|
-
if driver.aoss_client:
|
|
200
|
-
await driver.aoss_client.delete_by_query(
|
|
201
|
-
index=EPISODE_INDEX_NAME,
|
|
202
|
-
body={'query': {'term': {'group_id': group_id}}},
|
|
203
|
-
params={'routing': group_id},
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
await driver.aoss_client.delete_by_query(
|
|
207
|
-
index=ENTITY_INDEX_NAME,
|
|
208
|
-
body={'query': {'term': {'group_id': group_id}}},
|
|
209
|
-
params={'routing': group_id},
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
await driver.aoss_client.delete_by_query(
|
|
213
|
-
index=COMMUNITY_INDEX_NAME,
|
|
214
|
-
body={'query': {'term': {'group_id': group_id}}},
|
|
215
|
-
params={'routing': group_id},
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
await driver.aoss_client.delete_by_query(
|
|
219
|
-
index=ENTITY_EDGE_INDEX_NAME,
|
|
220
|
-
body={'query': {'term': {'group_id': group_id}}},
|
|
221
|
-
params={'routing': group_id},
|
|
222
|
-
)
|
|
223
|
-
|
|
224
181
|
case GraphProvider.KUZU:
|
|
225
182
|
for label in ['Episodic', 'Community']:
|
|
226
183
|
await driver.execute_query(
|
|
@@ -258,6 +215,11 @@ class Node(BaseModel, ABC):
|
|
|
258
215
|
|
|
259
216
|
@classmethod
|
|
260
217
|
async def delete_by_uuids(cls, driver: GraphDriver, uuids: list[str], batch_size: int = 100):
|
|
218
|
+
if driver.graph_operations_interface:
|
|
219
|
+
return await driver.graph_operations_interface.node_delete_by_uuids(
|
|
220
|
+
cls, driver, uuids, group_id=None, batch_size=batch_size
|
|
221
|
+
)
|
|
222
|
+
|
|
261
223
|
match driver.provider:
|
|
262
224
|
case GraphProvider.FALKORDB:
|
|
263
225
|
for label in ['Entity', 'Episodic', 'Community']:
|
|
@@ -300,7 +262,7 @@ class Node(BaseModel, ABC):
|
|
|
300
262
|
case _: # Neo4J, Neptune
|
|
301
263
|
async with driver.session() as session:
|
|
302
264
|
# Collect all edge UUIDs before deleting nodes
|
|
303
|
-
|
|
265
|
+
await session.run(
|
|
304
266
|
"""
|
|
305
267
|
MATCH (n:Entity|Episodic|Community)
|
|
306
268
|
WHERE n.uuid IN $uuids
|
|
@@ -310,18 +272,12 @@ class Node(BaseModel, ABC):
|
|
|
310
272
|
uuids=uuids,
|
|
311
273
|
)
|
|
312
274
|
|
|
313
|
-
record = await result.single()
|
|
314
|
-
edge_uuids: list[str] = (
|
|
315
|
-
record['edge_uuids'] if record and record['edge_uuids'] else []
|
|
316
|
-
)
|
|
317
|
-
|
|
318
275
|
# Now delete the nodes in batches
|
|
319
276
|
await session.run(
|
|
320
277
|
"""
|
|
321
278
|
MATCH (n:Entity|Episodic|Community)
|
|
322
279
|
WHERE n.uuid IN $uuids
|
|
323
|
-
CALL {
|
|
324
|
-
WITH n
|
|
280
|
+
CALL (n) {
|
|
325
281
|
DETACH DELETE n
|
|
326
282
|
} IN TRANSACTIONS OF $batch_size ROWS
|
|
327
283
|
""",
|
|
@@ -329,20 +285,6 @@ class Node(BaseModel, ABC):
|
|
|
329
285
|
batch_size=batch_size,
|
|
330
286
|
)
|
|
331
287
|
|
|
332
|
-
if driver.aoss_client:
|
|
333
|
-
for index in (EPISODE_INDEX_NAME, ENTITY_INDEX_NAME, COMMUNITY_INDEX_NAME):
|
|
334
|
-
await driver.aoss_client.delete_by_query(
|
|
335
|
-
index=index,
|
|
336
|
-
body={'query': {'terms': {'uuid': uuids}}},
|
|
337
|
-
)
|
|
338
|
-
|
|
339
|
-
if edge_uuids:
|
|
340
|
-
actions = [
|
|
341
|
-
{'delete': {'_index': ENTITY_EDGE_INDEX_NAME, '_id': eid}}
|
|
342
|
-
for eid in edge_uuids
|
|
343
|
-
]
|
|
344
|
-
await driver.aoss_client.bulk(body=actions)
|
|
345
|
-
|
|
346
288
|
@classmethod
|
|
347
289
|
async def get_by_uuid(cls, driver: GraphDriver, uuid: str): ...
|
|
348
290
|
|
|
@@ -363,6 +305,9 @@ class EpisodicNode(Node):
|
|
|
363
305
|
)
|
|
364
306
|
|
|
365
307
|
async def save(self, driver: GraphDriver):
|
|
308
|
+
if driver.graph_operations_interface:
|
|
309
|
+
return await driver.graph_operations_interface.episodic_node_save(self, driver)
|
|
310
|
+
|
|
366
311
|
episode_args = {
|
|
367
312
|
'uuid': self.uuid,
|
|
368
313
|
'name': self.name,
|
|
@@ -375,12 +320,6 @@ class EpisodicNode(Node):
|
|
|
375
320
|
'source': self.source.value,
|
|
376
321
|
}
|
|
377
322
|
|
|
378
|
-
if driver.aoss_client:
|
|
379
|
-
await driver.save_to_aoss( # pyright: ignore reportAttributeAccessIssue
|
|
380
|
-
'episodes',
|
|
381
|
-
[episode_args],
|
|
382
|
-
)
|
|
383
|
-
|
|
384
323
|
result = await driver.execute_query(
|
|
385
324
|
get_episode_node_save_query(driver.provider), **episode_args
|
|
386
325
|
)
|
|
@@ -510,26 +449,14 @@ class EntityNode(Node):
|
|
|
510
449
|
return self.name_embedding
|
|
511
450
|
|
|
512
451
|
async def load_name_embedding(self, driver: GraphDriver):
|
|
452
|
+
if driver.graph_operations_interface:
|
|
453
|
+
return await driver.graph_operations_interface.node_load_embeddings(self, driver)
|
|
454
|
+
|
|
513
455
|
if driver.provider == GraphProvider.NEPTUNE:
|
|
514
456
|
query: LiteralString = """
|
|
515
457
|
MATCH (n:Entity {uuid: $uuid})
|
|
516
458
|
RETURN [x IN split(n.name_embedding, ",") | toFloat(x)] as name_embedding
|
|
517
459
|
"""
|
|
518
|
-
elif driver.aoss_client:
|
|
519
|
-
resp = await driver.aoss_client.search(
|
|
520
|
-
body={
|
|
521
|
-
'query': {'multi_match': {'query': self.uuid, 'fields': ['uuid']}},
|
|
522
|
-
'size': 1,
|
|
523
|
-
},
|
|
524
|
-
index=ENTITY_INDEX_NAME,
|
|
525
|
-
params={'routing': self.group_id},
|
|
526
|
-
)
|
|
527
|
-
|
|
528
|
-
if resp['hits']['hits']:
|
|
529
|
-
self.name_embedding = resp['hits']['hits'][0]['_source']['name_embedding']
|
|
530
|
-
return
|
|
531
|
-
else:
|
|
532
|
-
raise NodeNotFoundError(self.uuid)
|
|
533
460
|
|
|
534
461
|
else:
|
|
535
462
|
query: LiteralString = """
|
|
@@ -548,6 +475,9 @@ class EntityNode(Node):
|
|
|
548
475
|
self.name_embedding = records[0]['name_embedding']
|
|
549
476
|
|
|
550
477
|
async def save(self, driver: GraphDriver):
|
|
478
|
+
if driver.graph_operations_interface:
|
|
479
|
+
return await driver.graph_operations_interface.node_save(self, driver)
|
|
480
|
+
|
|
551
481
|
entity_data: dict[str, Any] = {
|
|
552
482
|
'uuid': self.uuid,
|
|
553
483
|
'name': self.name,
|
|
@@ -568,11 +498,8 @@ class EntityNode(Node):
|
|
|
568
498
|
entity_data.update(self.attributes or {})
|
|
569
499
|
labels = ':'.join(self.labels + ['Entity'])
|
|
570
500
|
|
|
571
|
-
if driver.aoss_client:
|
|
572
|
-
await driver.save_to_aoss(ENTITY_INDEX_NAME, [entity_data]) # pyright: ignore reportAttributeAccessIssue
|
|
573
|
-
|
|
574
501
|
result = await driver.execute_query(
|
|
575
|
-
get_entity_node_save_query(driver.provider, labels
|
|
502
|
+
get_entity_node_save_query(driver.provider, labels),
|
|
576
503
|
entity_data=entity_data,
|
|
577
504
|
)
|
|
578
505
|
|
|
@@ -67,13 +67,13 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
67
67
|
Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
|
|
68
68
|
|
|
69
69
|
<EXISTING EDGES>
|
|
70
|
-
{to_prompt_json(context['related_edges']
|
|
70
|
+
{to_prompt_json(context['related_edges'])}
|
|
71
71
|
</EXISTING EDGES>
|
|
72
72
|
|
|
73
73
|
<NEW EDGE>
|
|
74
|
-
{to_prompt_json(context['extracted_edges']
|
|
74
|
+
{to_prompt_json(context['extracted_edges'])}
|
|
75
75
|
</NEW EDGE>
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
Task:
|
|
78
78
|
If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact
|
|
79
79
|
as part of the list of duplicate_facts.
|
|
@@ -98,7 +98,7 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
|
|
|
98
98
|
Given the following context, find all of the duplicates in a list of facts:
|
|
99
99
|
|
|
100
100
|
Facts:
|
|
101
|
-
{to_prompt_json(context['edges']
|
|
101
|
+
{to_prompt_json(context['edges'])}
|
|
102
102
|
|
|
103
103
|
Task:
|
|
104
104
|
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
|
|
@@ -64,20 +64,20 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|
|
64
64
|
role='user',
|
|
65
65
|
content=f"""
|
|
66
66
|
<PREVIOUS MESSAGES>
|
|
67
|
-
{to_prompt_json([ep for ep in context['previous_episodes']]
|
|
67
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
68
68
|
</PREVIOUS MESSAGES>
|
|
69
69
|
<CURRENT MESSAGE>
|
|
70
70
|
{context['episode_content']}
|
|
71
71
|
</CURRENT MESSAGE>
|
|
72
72
|
<NEW ENTITY>
|
|
73
|
-
{to_prompt_json(context['extracted_node']
|
|
73
|
+
{to_prompt_json(context['extracted_node'])}
|
|
74
74
|
</NEW ENTITY>
|
|
75
75
|
<ENTITY TYPE DESCRIPTION>
|
|
76
|
-
{to_prompt_json(context['entity_type_description']
|
|
76
|
+
{to_prompt_json(context['entity_type_description'])}
|
|
77
77
|
</ENTITY TYPE DESCRIPTION>
|
|
78
78
|
|
|
79
79
|
<EXISTING ENTITIES>
|
|
80
|
-
{to_prompt_json(context['existing_nodes']
|
|
80
|
+
{to_prompt_json(context['existing_nodes'])}
|
|
81
81
|
</EXISTING ENTITIES>
|
|
82
82
|
|
|
83
83
|
Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
|
|
@@ -125,13 +125,13 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
125
125
|
role='user',
|
|
126
126
|
content=f"""
|
|
127
127
|
<PREVIOUS MESSAGES>
|
|
128
|
-
{to_prompt_json([ep for ep in context['previous_episodes']]
|
|
128
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
129
129
|
</PREVIOUS MESSAGES>
|
|
130
130
|
<CURRENT MESSAGE>
|
|
131
131
|
{context['episode_content']}
|
|
132
132
|
</CURRENT MESSAGE>
|
|
133
|
-
|
|
134
|
-
|
|
133
|
+
|
|
134
|
+
|
|
135
135
|
Each of the following ENTITIES were extracted from the CURRENT MESSAGE.
|
|
136
136
|
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
|
137
137
|
{{
|
|
@@ -142,11 +142,11 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
142
142
|
}}
|
|
143
143
|
|
|
144
144
|
<ENTITIES>
|
|
145
|
-
{to_prompt_json(context['extracted_nodes']
|
|
145
|
+
{to_prompt_json(context['extracted_nodes'])}
|
|
146
146
|
</ENTITIES>
|
|
147
147
|
|
|
148
148
|
<EXISTING ENTITIES>
|
|
149
|
-
{to_prompt_json(context['existing_nodes']
|
|
149
|
+
{to_prompt_json(context['existing_nodes'])}
|
|
150
150
|
</EXISTING ENTITIES>
|
|
151
151
|
|
|
152
152
|
Each entry in EXISTING ENTITIES is an object with the following structure:
|
|
@@ -197,7 +197,7 @@ def node_list(context: dict[str, Any]) -> list[Message]:
|
|
|
197
197
|
Given the following context, deduplicate a list of nodes:
|
|
198
198
|
|
|
199
199
|
Nodes:
|
|
200
|
-
{to_prompt_json(context['nodes']
|
|
200
|
+
{to_prompt_json(context['nodes'])}
|
|
201
201
|
|
|
202
202
|
Task:
|
|
203
203
|
1. Group nodes together such that all duplicate nodes are in the same list of uuids
|
|
@@ -80,7 +80,7 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
80
80
|
</FACT TYPES>
|
|
81
81
|
|
|
82
82
|
<PREVIOUS_MESSAGES>
|
|
83
|
-
{to_prompt_json([ep for ep in context['previous_episodes']]
|
|
83
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
84
84
|
</PREVIOUS_MESSAGES>
|
|
85
85
|
|
|
86
86
|
<CURRENT_MESSAGE>
|
|
@@ -88,7 +88,7 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
88
88
|
</CURRENT_MESSAGE>
|
|
89
89
|
|
|
90
90
|
<ENTITIES>
|
|
91
|
-
{to_prompt_json(context['nodes']
|
|
91
|
+
{to_prompt_json(context['nodes'])}
|
|
92
92
|
</ENTITIES>
|
|
93
93
|
|
|
94
94
|
<REFERENCE_TIME>
|
|
@@ -141,7 +141,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
|
141
141
|
|
|
142
142
|
user_prompt = f"""
|
|
143
143
|
<PREVIOUS MESSAGES>
|
|
144
|
-
{to_prompt_json([ep for ep in context['previous_episodes']]
|
|
144
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
145
145
|
</PREVIOUS MESSAGES>
|
|
146
146
|
<CURRENT MESSAGE>
|
|
147
147
|
{context['episode_content']}
|
|
@@ -175,7 +175,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
|
175
175
|
content=f"""
|
|
176
176
|
|
|
177
177
|
<MESSAGE>
|
|
178
|
-
{to_prompt_json(context['episode_content']
|
|
178
|
+
{to_prompt_json(context['episode_content'])}
|
|
179
179
|
</MESSAGE>
|
|
180
180
|
<REFERENCE TIME>
|
|
181
181
|
{context['reference_time']}
|
|
@@ -93,7 +93,7 @@ def extract_message(context: dict[str, Any]) -> list[Message]:
|
|
|
93
93
|
</ENTITY TYPES>
|
|
94
94
|
|
|
95
95
|
<PREVIOUS MESSAGES>
|
|
96
|
-
{to_prompt_json([ep for ep in context['previous_episodes']]
|
|
96
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
97
97
|
</PREVIOUS MESSAGES>
|
|
98
98
|
|
|
99
99
|
<CURRENT MESSAGE>
|
|
@@ -201,7 +201,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
|
201
201
|
|
|
202
202
|
user_prompt = f"""
|
|
203
203
|
<PREVIOUS MESSAGES>
|
|
204
|
-
{to_prompt_json([ep for ep in context['previous_episodes']]
|
|
204
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
205
205
|
</PREVIOUS MESSAGES>
|
|
206
206
|
<CURRENT MESSAGE>
|
|
207
207
|
{context['episode_content']}
|
|
@@ -225,22 +225,22 @@ def classify_nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
225
225
|
|
|
226
226
|
user_prompt = f"""
|
|
227
227
|
<PREVIOUS MESSAGES>
|
|
228
|
-
{to_prompt_json([ep for ep in context['previous_episodes']]
|
|
228
|
+
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
229
229
|
</PREVIOUS MESSAGES>
|
|
230
230
|
<CURRENT MESSAGE>
|
|
231
231
|
{context['episode_content']}
|
|
232
232
|
</CURRENT MESSAGE>
|
|
233
|
-
|
|
233
|
+
|
|
234
234
|
<EXTRACTED ENTITIES>
|
|
235
235
|
{context['extracted_entities']}
|
|
236
236
|
</EXTRACTED ENTITIES>
|
|
237
|
-
|
|
237
|
+
|
|
238
238
|
<ENTITY TYPES>
|
|
239
239
|
{context['entity_types']}
|
|
240
240
|
</ENTITY TYPES>
|
|
241
|
-
|
|
241
|
+
|
|
242
242
|
Given the above conversation, extracted entities, and provided entity types and their descriptions, classify the extracted entities.
|
|
243
|
-
|
|
243
|
+
|
|
244
244
|
Guidelines:
|
|
245
245
|
1. Each entity must have exactly one type
|
|
246
246
|
2. Only use the provided ENTITY TYPES as types, do not use additional types to classify entities.
|
|
@@ -269,10 +269,10 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
|
269
269
|
2. Only use the provided MESSAGES and ENTITY to set attribute values.
|
|
270
270
|
|
|
271
271
|
<MESSAGES>
|
|
272
|
-
{to_prompt_json(context['previous_episodes']
|
|
273
|
-
{to_prompt_json(context['episode_content']
|
|
272
|
+
{to_prompt_json(context['previous_episodes'])}
|
|
273
|
+
{to_prompt_json(context['episode_content'])}
|
|
274
274
|
</MESSAGES>
|
|
275
|
-
|
|
275
|
+
|
|
276
276
|
<ENTITY>
|
|
277
277
|
{context['node']}
|
|
278
278
|
</ENTITY>
|
|
@@ -292,12 +292,12 @@ def extract_summary(context: dict[str, Any]) -> list[Message]:
|
|
|
292
292
|
content=f"""
|
|
293
293
|
Given the MESSAGES and the ENTITY, update the summary that combines relevant information about the entity
|
|
294
294
|
from the messages and relevant information from the existing summary.
|
|
295
|
-
|
|
295
|
+
|
|
296
296
|
{summary_instructions}
|
|
297
297
|
|
|
298
298
|
<MESSAGES>
|
|
299
|
-
{to_prompt_json(context['previous_episodes']
|
|
300
|
-
{to_prompt_json(context['episode_content']
|
|
299
|
+
{to_prompt_json(context['previous_episodes'])}
|
|
300
|
+
{to_prompt_json(context['episode_content'])}
|
|
301
301
|
</MESSAGES>
|
|
302
302
|
|
|
303
303
|
<ENTITY>
|
|
@@ -20,14 +20,14 @@ from typing import Any
|
|
|
20
20
|
DO_NOT_ESCAPE_UNICODE = '\nDo not escape unicode characters.\n'
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
def to_prompt_json(data: Any, ensure_ascii: bool = False, indent: int =
|
|
23
|
+
def to_prompt_json(data: Any, ensure_ascii: bool = False, indent: int | None = None) -> str:
|
|
24
24
|
"""
|
|
25
25
|
Serialize data to JSON for use in prompts.
|
|
26
26
|
|
|
27
27
|
Args:
|
|
28
28
|
data: The data to serialize
|
|
29
29
|
ensure_ascii: If True, escape non-ASCII characters. If False (default), preserve them.
|
|
30
|
-
indent: Number of spaces for indentation
|
|
30
|
+
indent: Number of spaces for indentation. Defaults to None (minified).
|
|
31
31
|
|
|
32
32
|
Returns:
|
|
33
33
|
JSON string representation of the data
|
|
@@ -56,11 +56,11 @@ def summarize_pair(context: dict[str, Any]) -> list[Message]:
|
|
|
56
56
|
role='user',
|
|
57
57
|
content=f"""
|
|
58
58
|
Synthesize the information from the following two summaries into a single succinct summary.
|
|
59
|
-
|
|
59
|
+
|
|
60
60
|
IMPORTANT: Keep the summary concise and to the point. SUMMARIES MUST BE LESS THAN 250 CHARACTERS.
|
|
61
61
|
|
|
62
62
|
Summaries:
|
|
63
|
-
{to_prompt_json(context['node_summaries']
|
|
63
|
+
{to_prompt_json(context['node_summaries'])}
|
|
64
64
|
""",
|
|
65
65
|
),
|
|
66
66
|
]
|
|
@@ -77,28 +77,28 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
|
|
|
77
77
|
content=f"""
|
|
78
78
|
Given the MESSAGES and the ENTITY name, create a summary for the ENTITY. Your summary must only use
|
|
79
79
|
information from the provided MESSAGES. Your summary should also only contain information relevant to the
|
|
80
|
-
provided ENTITY.
|
|
81
|
-
|
|
80
|
+
provided ENTITY.
|
|
81
|
+
|
|
82
82
|
In addition, extract any values for the provided entity properties based on their descriptions.
|
|
83
83
|
If the value of the entity property cannot be found in the current context, set the value of the property to the Python value None.
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
{summary_instructions}
|
|
86
86
|
|
|
87
87
|
<MESSAGES>
|
|
88
|
-
{to_prompt_json(context['previous_episodes']
|
|
89
|
-
{to_prompt_json(context['episode_content']
|
|
88
|
+
{to_prompt_json(context['previous_episodes'])}
|
|
89
|
+
{to_prompt_json(context['episode_content'])}
|
|
90
90
|
</MESSAGES>
|
|
91
|
-
|
|
91
|
+
|
|
92
92
|
<ENTITY>
|
|
93
93
|
{context['node_name']}
|
|
94
94
|
</ENTITY>
|
|
95
|
-
|
|
95
|
+
|
|
96
96
|
<ENTITY CONTEXT>
|
|
97
97
|
{context['node_summary']}
|
|
98
98
|
</ENTITY CONTEXT>
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
<ATTRIBUTES>
|
|
101
|
-
{to_prompt_json(context['attributes']
|
|
101
|
+
{to_prompt_json(context['attributes'])}
|
|
102
102
|
</ATTRIBUTES>
|
|
103
103
|
""",
|
|
104
104
|
),
|
|
@@ -118,7 +118,7 @@ def summary_description(context: dict[str, Any]) -> list[Message]:
|
|
|
118
118
|
Summaries must be under 250 characters.
|
|
119
119
|
|
|
120
120
|
Summary:
|
|
121
|
-
{to_prompt_json(context['summary']
|
|
121
|
+
{to_prompt_json(context['summary'])}
|
|
122
122
|
""",
|
|
123
123
|
),
|
|
124
124
|
]
|
|
@@ -249,41 +249,3 @@ def edge_search_filter_query_constructor(
|
|
|
249
249
|
filter_queries.append(expired_at_filter)
|
|
250
250
|
|
|
251
251
|
return filter_queries, filter_params
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
def build_aoss_node_filters(group_ids: list[str], search_filters: SearchFilters) -> list[dict]:
|
|
255
|
-
filters = [{'terms': {'group_id': group_ids}}]
|
|
256
|
-
|
|
257
|
-
if search_filters.node_labels:
|
|
258
|
-
filters.append({'terms': {'node_labels': search_filters.node_labels}})
|
|
259
|
-
|
|
260
|
-
return filters
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
def build_aoss_edge_filters(group_ids: list[str], search_filters: SearchFilters) -> list[dict]:
|
|
264
|
-
filters: list[dict] = [{'terms': {'group_id': group_ids}}]
|
|
265
|
-
|
|
266
|
-
if search_filters.edge_types:
|
|
267
|
-
filters.append({'terms': {'edge_types': search_filters.edge_types}})
|
|
268
|
-
|
|
269
|
-
if search_filters.edge_uuids:
|
|
270
|
-
filters.append({'terms': {'uuid': search_filters.edge_uuids}})
|
|
271
|
-
|
|
272
|
-
for field in ['valid_at', 'invalid_at', 'created_at', 'expired_at']:
|
|
273
|
-
ranges = getattr(search_filters, field)
|
|
274
|
-
if ranges:
|
|
275
|
-
# OR of ANDs
|
|
276
|
-
should_clauses = []
|
|
277
|
-
for and_group in ranges:
|
|
278
|
-
and_filters = []
|
|
279
|
-
for df in and_group: # df is a DateFilter
|
|
280
|
-
range_query = {
|
|
281
|
-
'range': {
|
|
282
|
-
field: {cypher_to_opensearch_operator(df.comparison_operator): df.date}
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
and_filters.append(range_query)
|
|
286
|
-
should_clauses.append({'bool': {'filter': and_filters}})
|
|
287
|
-
filters.append({'bool': {'should': should_clauses, 'minimum_should_match': 1}})
|
|
288
|
-
|
|
289
|
-
return filters
|
|
@@ -56,16 +56,16 @@ def search_results_to_context_string(search_results: SearchResults) -> str:
|
|
|
56
56
|
These are the most relevant facts and their valid and invalid dates. Facts are considered valid
|
|
57
57
|
between their valid_at and invalid_at dates. Facts with an invalid_at date of "Present" are considered valid.
|
|
58
58
|
<FACTS>
|
|
59
|
-
{to_prompt_json(fact_json
|
|
59
|
+
{to_prompt_json(fact_json)}
|
|
60
60
|
</FACTS>
|
|
61
61
|
<ENTITIES>
|
|
62
|
-
{to_prompt_json(entity_json
|
|
62
|
+
{to_prompt_json(entity_json)}
|
|
63
63
|
</ENTITIES>
|
|
64
64
|
<EPISODES>
|
|
65
|
-
{to_prompt_json(episode_json
|
|
65
|
+
{to_prompt_json(episode_json)}
|
|
66
66
|
</EPISODES>
|
|
67
67
|
<COMMUNITIES>
|
|
68
|
-
{to_prompt_json(community_json
|
|
68
|
+
{to_prompt_json(community_json)}
|
|
69
69
|
</COMMUNITIES>
|
|
70
70
|
"""
|
|
71
71
|
|