graphiti-core 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/graphiti.py +105 -85
- graphiti_core/llm_client/openai_client.py +0 -1
- graphiti_core/prompts/dedupe_edges.py +46 -8
- graphiti_core/prompts/dedupe_nodes.py +61 -13
- graphiti_core/prompts/extract_edges.py +2 -1
- graphiti_core/prompts/extract_nodes.py +2 -0
- graphiti_core/search/search.py +8 -8
- graphiti_core/search/search_utils.py +44 -26
- graphiti_core/utils/bulk_utils.py +138 -20
- graphiti_core/utils/maintenance/edge_operations.py +76 -9
- graphiti_core/utils/maintenance/node_operations.py +98 -40
- graphiti_core/utils/maintenance/temporal_operations.py +3 -4
- graphiti_core/utils/utils.py +22 -1
- {graphiti_core-0.1.0.dist-info → graphiti_core-0.2.0.dist-info}/METADATA +38 -38
- {graphiti_core-0.1.0.dist-info → graphiti_core-0.2.0.dist-info}/RECORD +17 -17
- {graphiti_core-0.1.0.dist-info → graphiti_core-0.2.0.dist-info}/LICENSE +0 -0
- {graphiti_core-0.1.0.dist-info → graphiti_core-0.2.0.dist-info}/WHEEL +0 -0
graphiti_core/graphiti.py
CHANGED
|
@@ -29,6 +29,7 @@ from graphiti_core.llm_client.utils import generate_embedding
|
|
|
29
29
|
from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode
|
|
30
30
|
from graphiti_core.search.search import Reranker, SearchConfig, SearchMethod, hybrid_search
|
|
31
31
|
from graphiti_core.search.search_utils import (
|
|
32
|
+
RELEVANT_SCHEMA_LIMIT,
|
|
32
33
|
get_relevant_edges,
|
|
33
34
|
get_relevant_nodes,
|
|
34
35
|
hybrid_node_search,
|
|
@@ -41,19 +42,23 @@ from graphiti_core.utils.bulk_utils import (
|
|
|
41
42
|
RawEpisode,
|
|
42
43
|
dedupe_edges_bulk,
|
|
43
44
|
dedupe_nodes_bulk,
|
|
45
|
+
extract_edge_dates_bulk,
|
|
44
46
|
extract_nodes_and_edges_bulk,
|
|
45
47
|
resolve_edge_pointers,
|
|
46
48
|
retrieve_previous_episodes_bulk,
|
|
47
49
|
)
|
|
48
50
|
from graphiti_core.utils.maintenance.edge_operations import (
|
|
49
|
-
dedupe_extracted_edges,
|
|
50
51
|
extract_edges,
|
|
52
|
+
resolve_extracted_edges,
|
|
51
53
|
)
|
|
52
54
|
from graphiti_core.utils.maintenance.graph_data_operations import (
|
|
53
55
|
EPISODE_WINDOW_LEN,
|
|
54
56
|
build_indices_and_constraints,
|
|
55
57
|
)
|
|
56
|
-
from graphiti_core.utils.maintenance.node_operations import
|
|
58
|
+
from graphiti_core.utils.maintenance.node_operations import (
|
|
59
|
+
extract_nodes,
|
|
60
|
+
resolve_extracted_nodes,
|
|
61
|
+
)
|
|
57
62
|
from graphiti_core.utils.maintenance.temporal_operations import (
|
|
58
63
|
extract_edge_dates,
|
|
59
64
|
invalidate_edges,
|
|
@@ -175,9 +180,9 @@ class Graphiti:
|
|
|
175
180
|
await build_indices_and_constraints(self.driver)
|
|
176
181
|
|
|
177
182
|
async def retrieve_episodes(
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
183
|
+
self,
|
|
184
|
+
reference_time: datetime,
|
|
185
|
+
last_n: int = EPISODE_WINDOW_LEN,
|
|
181
186
|
) -> list[EpisodicNode]:
|
|
182
187
|
"""
|
|
183
188
|
Retrieve the last n episodic nodes from the graph.
|
|
@@ -205,14 +210,14 @@ class Graphiti:
|
|
|
205
210
|
return await retrieve_episodes(self.driver, reference_time, last_n)
|
|
206
211
|
|
|
207
212
|
async def add_episode(
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
213
|
+
self,
|
|
214
|
+
name: str,
|
|
215
|
+
episode_body: str,
|
|
216
|
+
source_description: str,
|
|
217
|
+
reference_time: datetime,
|
|
218
|
+
source: EpisodeType = EpisodeType.message,
|
|
219
|
+
success_callback: Callable | None = None,
|
|
220
|
+
error_callback: Callable | None = None,
|
|
216
221
|
):
|
|
217
222
|
"""
|
|
218
223
|
Process an episode and update the graph.
|
|
@@ -263,7 +268,6 @@ class Graphiti:
|
|
|
263
268
|
|
|
264
269
|
nodes: list[EntityNode] = []
|
|
265
270
|
entity_edges: list[EntityEdge] = []
|
|
266
|
-
episodic_edges: list[EpisodicEdge] = []
|
|
267
271
|
embedder = self.llm_client.get_embedder()
|
|
268
272
|
now = datetime.now()
|
|
269
273
|
|
|
@@ -278,6 +282,8 @@ class Graphiti:
|
|
|
278
282
|
valid_at=reference_time,
|
|
279
283
|
)
|
|
280
284
|
|
|
285
|
+
# Extract entities as nodes
|
|
286
|
+
|
|
281
287
|
extracted_nodes = await extract_nodes(self.llm_client, episode, previous_episodes)
|
|
282
288
|
logger.info(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
|
|
283
289
|
|
|
@@ -286,59 +292,82 @@ class Graphiti:
|
|
|
286
292
|
await asyncio.gather(
|
|
287
293
|
*[node.generate_name_embedding(embedder) for node in extracted_nodes]
|
|
288
294
|
)
|
|
289
|
-
|
|
295
|
+
|
|
296
|
+
# Resolve extracted nodes with nodes already in the graph
|
|
297
|
+
existing_nodes_lists: list[list[EntityNode]] = list(
|
|
298
|
+
await asyncio.gather(
|
|
299
|
+
*[get_relevant_nodes([node], self.driver) for node in extracted_nodes]
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
|
|
290
303
|
logger.info(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
|
|
291
|
-
|
|
292
|
-
|
|
304
|
+
|
|
305
|
+
mentioned_nodes, _ = await resolve_extracted_nodes(
|
|
306
|
+
self.llm_client, extracted_nodes, existing_nodes_lists
|
|
293
307
|
)
|
|
294
|
-
logger.info(f'Adjusted
|
|
295
|
-
nodes.extend(
|
|
308
|
+
logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
|
|
309
|
+
nodes.extend(mentioned_nodes)
|
|
296
310
|
|
|
311
|
+
# Extract facts as edges given entity nodes
|
|
297
312
|
extracted_edges = await extract_edges(
|
|
298
|
-
self.llm_client, episode,
|
|
313
|
+
self.llm_client, episode, mentioned_nodes, previous_episodes
|
|
299
314
|
)
|
|
300
315
|
|
|
316
|
+
# calculate embeddings
|
|
301
317
|
await asyncio.gather(*[edge.generate_embedding(embedder) for edge in extracted_edges])
|
|
302
318
|
|
|
303
|
-
|
|
304
|
-
|
|
319
|
+
# Resolve extracted edges with edges already in the graph
|
|
320
|
+
existing_edges_list: list[list[EntityEdge]] = list(
|
|
321
|
+
await asyncio.gather(
|
|
322
|
+
*[
|
|
323
|
+
get_relevant_edges(
|
|
324
|
+
[edge],
|
|
325
|
+
self.driver,
|
|
326
|
+
RELEVANT_SCHEMA_LIMIT,
|
|
327
|
+
edge.source_node_uuid,
|
|
328
|
+
edge.target_node_uuid,
|
|
329
|
+
)
|
|
330
|
+
for edge in extracted_edges
|
|
331
|
+
]
|
|
332
|
+
)
|
|
333
|
+
)
|
|
334
|
+
logger.info(
|
|
335
|
+
f'Existing edges lists: {[(e.name, e.uuid) for edges_lst in existing_edges_list for e in edges_lst]}'
|
|
336
|
+
)
|
|
305
337
|
logger.info(f'Extracted edges: {[(e.name, e.uuid) for e in extracted_edges]}')
|
|
306
338
|
|
|
307
|
-
deduped_edges = await
|
|
308
|
-
self.llm_client,
|
|
309
|
-
extracted_edges,
|
|
310
|
-
existing_edges,
|
|
339
|
+
deduped_edges: list[EntityEdge] = await resolve_extracted_edges(
|
|
340
|
+
self.llm_client, extracted_edges, existing_edges_list
|
|
311
341
|
)
|
|
312
342
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
for edge in existing_edges:
|
|
331
|
-
valid_at, invalid_at, _ = await extract_edge_dates(
|
|
332
|
-
self.llm_client,
|
|
333
|
-
edge,
|
|
334
|
-
episode.valid_at,
|
|
335
|
-
episode,
|
|
336
|
-
previous_episodes,
|
|
337
|
-
)
|
|
343
|
+
# Extract dates for the newly extracted edges
|
|
344
|
+
edge_dates = await asyncio.gather(
|
|
345
|
+
*[
|
|
346
|
+
extract_edge_dates(
|
|
347
|
+
self.llm_client,
|
|
348
|
+
edge,
|
|
349
|
+
episode,
|
|
350
|
+
previous_episodes,
|
|
351
|
+
)
|
|
352
|
+
for edge in deduped_edges
|
|
353
|
+
]
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
for i, edge in enumerate(deduped_edges):
|
|
357
|
+
valid_at = edge_dates[i][0]
|
|
358
|
+
invalid_at = edge_dates[i][1]
|
|
359
|
+
|
|
338
360
|
edge.valid_at = valid_at
|
|
339
361
|
edge.invalid_at = invalid_at
|
|
340
|
-
if edge.invalid_at:
|
|
341
|
-
edge.expired_at =
|
|
362
|
+
if edge.invalid_at is not None:
|
|
363
|
+
edge.expired_at = now
|
|
364
|
+
|
|
365
|
+
entity_edges.extend(deduped_edges)
|
|
366
|
+
|
|
367
|
+
existing_edges: list[EntityEdge] = [
|
|
368
|
+
e for edge_lst in existing_edges_list for e in edge_lst
|
|
369
|
+
]
|
|
370
|
+
|
|
342
371
|
(
|
|
343
372
|
old_edges_with_nodes_pending_invalidation,
|
|
344
373
|
new_edges_with_nodes,
|
|
@@ -361,30 +390,18 @@ class Graphiti:
|
|
|
361
390
|
for deduped_edge in deduped_edges:
|
|
362
391
|
if deduped_edge.uuid == edge.uuid:
|
|
363
392
|
deduped_edge.expired_at = edge.expired_at
|
|
364
|
-
edge_touched_node_uuids.append(edge.source_node_uuid)
|
|
365
|
-
edge_touched_node_uuids.append(edge.target_node_uuid)
|
|
366
393
|
logger.info(f'Invalidated edges: {[(e.name, e.uuid) for e in invalidated_edges]}')
|
|
367
394
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
entity_edges.extend(edges_to_save)
|
|
371
|
-
|
|
372
|
-
edge_touched_node_uuids = list(set(edge_touched_node_uuids))
|
|
373
|
-
involved_nodes = [node for node in nodes if node.uuid in edge_touched_node_uuids]
|
|
374
|
-
|
|
375
|
-
logger.info(f'Edge touched nodes: {[(n.name, n.uuid) for n in involved_nodes]}')
|
|
395
|
+
entity_edges.extend(existing_edges)
|
|
376
396
|
|
|
377
397
|
logger.info(f'Deduped edges: {[(e.name, e.uuid) for e in deduped_edges]}')
|
|
378
398
|
|
|
379
|
-
episodic_edges
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
episode,
|
|
384
|
-
now,
|
|
385
|
-
)
|
|
399
|
+
episodic_edges: list[EpisodicEdge] = build_episodic_edges(
|
|
400
|
+
mentioned_nodes,
|
|
401
|
+
episode,
|
|
402
|
+
now,
|
|
386
403
|
)
|
|
387
|
-
|
|
404
|
+
|
|
388
405
|
logger.info(f'Built episodic edges: {episodic_edges}')
|
|
389
406
|
|
|
390
407
|
# Future optimization would be using batch operations to save nodes and edges
|
|
@@ -395,9 +412,7 @@ class Graphiti:
|
|
|
395
412
|
|
|
396
413
|
end = time()
|
|
397
414
|
logger.info(f'Completed add_episode in {(end - start) * 1000} ms')
|
|
398
|
-
|
|
399
|
-
# if isinstance(node, EntityNode):
|
|
400
|
-
# await node.update_summary(self.driver)
|
|
415
|
+
|
|
401
416
|
if success_callback:
|
|
402
417
|
await success_callback(episode)
|
|
403
418
|
except Exception as e:
|
|
@@ -407,8 +422,8 @@ class Graphiti:
|
|
|
407
422
|
raise e
|
|
408
423
|
|
|
409
424
|
async def add_episode_bulk(
|
|
410
|
-
|
|
411
|
-
|
|
425
|
+
self,
|
|
426
|
+
bulk_episodes: list[RawEpisode],
|
|
412
427
|
):
|
|
413
428
|
"""
|
|
414
429
|
Process multiple episodes in bulk and update the graph.
|
|
@@ -481,15 +496,18 @@ class Graphiti:
|
|
|
481
496
|
*[edge.generate_embedding(embedder) for edge in extracted_edges],
|
|
482
497
|
)
|
|
483
498
|
|
|
484
|
-
# Dedupe extracted nodes
|
|
485
|
-
nodes, uuid_map = await
|
|
499
|
+
# Dedupe extracted nodes, compress extracted edges
|
|
500
|
+
(nodes, uuid_map), extracted_edges_timestamped = await asyncio.gather(
|
|
501
|
+
dedupe_nodes_bulk(self.driver, self.llm_client, extracted_nodes),
|
|
502
|
+
extract_edge_dates_bulk(self.llm_client, extracted_edges, episode_pairs),
|
|
503
|
+
)
|
|
486
504
|
|
|
487
505
|
# save nodes to KG
|
|
488
506
|
await asyncio.gather(*[node.save(self.driver) for node in nodes])
|
|
489
507
|
|
|
490
508
|
# re-map edge pointers so that they don't point to discard dupe nodes
|
|
491
509
|
extracted_edges_with_resolved_pointers: list[EntityEdge] = resolve_edge_pointers(
|
|
492
|
-
|
|
510
|
+
extracted_edges_timestamped, uuid_map
|
|
493
511
|
)
|
|
494
512
|
episodic_edges_with_resolved_pointers: list[EpisodicEdge] = resolve_edge_pointers(
|
|
495
513
|
episodic_edges, uuid_map
|
|
@@ -569,17 +587,19 @@ class Graphiti:
|
|
|
569
587
|
return edges
|
|
570
588
|
|
|
571
589
|
async def _search(
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
590
|
+
self,
|
|
591
|
+
query: str,
|
|
592
|
+
timestamp: datetime,
|
|
593
|
+
config: SearchConfig,
|
|
594
|
+
center_node_uuid: str | None = None,
|
|
577
595
|
):
|
|
578
596
|
return await hybrid_search(
|
|
579
597
|
self.driver, self.llm_client.get_embedder(), query, timestamp, config, center_node_uuid
|
|
580
598
|
)
|
|
581
599
|
|
|
582
|
-
async def get_nodes_by_query(
|
|
600
|
+
async def get_nodes_by_query(
|
|
601
|
+
self, query: str, limit: int = RELEVANT_SCHEMA_LIMIT
|
|
602
|
+
) -> list[EntityNode]:
|
|
583
603
|
"""
|
|
584
604
|
Retrieve nodes from the graph database based on a text query.
|
|
585
605
|
|
|
@@ -23,12 +23,14 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
23
23
|
class Prompt(Protocol):
|
|
24
24
|
v1: PromptVersion
|
|
25
25
|
v2: PromptVersion
|
|
26
|
+
v3: PromptVersion
|
|
26
27
|
edge_list: PromptVersion
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class Versions(TypedDict):
|
|
30
31
|
v1: PromptFunction
|
|
31
32
|
v2: PromptFunction
|
|
33
|
+
v3: PromptFunction
|
|
32
34
|
edge_list: PromptFunction
|
|
33
35
|
|
|
34
36
|
|
|
@@ -41,17 +43,17 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
41
43
|
Message(
|
|
42
44
|
role='user',
|
|
43
45
|
content=f"""
|
|
44
|
-
Given the following context, deduplicate facts from a list of new facts given a list of existing
|
|
46
|
+
Given the following context, deduplicate facts from a list of new facts given a list of existing edges:
|
|
45
47
|
|
|
46
|
-
Existing
|
|
48
|
+
Existing Edges:
|
|
47
49
|
{json.dumps(context['existing_edges'], indent=2)}
|
|
48
50
|
|
|
49
|
-
New
|
|
51
|
+
New Edges:
|
|
50
52
|
{json.dumps(context['extracted_edges'], indent=2)}
|
|
51
53
|
|
|
52
54
|
Task:
|
|
53
|
-
If any
|
|
54
|
-
|
|
55
|
+
If any edge in New Edges is a duplicate of an edge in Existing Edges, add their uuids to the output list.
|
|
56
|
+
When finding duplicates edges, synthesize their facts into a short new fact.
|
|
55
57
|
|
|
56
58
|
Guidelines:
|
|
57
59
|
1. identical or near identical facts are duplicates
|
|
@@ -60,9 +62,11 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
60
62
|
|
|
61
63
|
Respond with a JSON object in the following format:
|
|
62
64
|
{{
|
|
63
|
-
"
|
|
65
|
+
"duplicates": [
|
|
64
66
|
{{
|
|
65
|
-
"uuid": "
|
|
67
|
+
"uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
|
|
68
|
+
"duplicate_of": "uuid of the existing node",
|
|
69
|
+
"fact": "one sentence description of the fact"
|
|
66
70
|
}}
|
|
67
71
|
]
|
|
68
72
|
}}
|
|
@@ -113,6 +117,40 @@ def v2(context: dict[str, Any]) -> list[Message]:
|
|
|
113
117
|
]
|
|
114
118
|
|
|
115
119
|
|
|
120
|
+
def v3(context: dict[str, Any]) -> list[Message]:
|
|
121
|
+
return [
|
|
122
|
+
Message(
|
|
123
|
+
role='system',
|
|
124
|
+
content='You are a helpful assistant that de-duplicates edges from edge lists.',
|
|
125
|
+
),
|
|
126
|
+
Message(
|
|
127
|
+
role='user',
|
|
128
|
+
content=f"""
|
|
129
|
+
Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
|
|
130
|
+
|
|
131
|
+
Existing Edges:
|
|
132
|
+
{json.dumps(context['existing_edges'], indent=2)}
|
|
133
|
+
|
|
134
|
+
New Edge:
|
|
135
|
+
{json.dumps(context['extracted_edges'], indent=2)}
|
|
136
|
+
Task:
|
|
137
|
+
1. If the New Edges represents the same factual information as any edge in Existing Edges, return 'is_duplicate: true' in the
|
|
138
|
+
response. Otherwise, return 'is_duplicate: false'
|
|
139
|
+
2. If is_duplicate is true, also return the uuid of the existing edge in the response
|
|
140
|
+
|
|
141
|
+
Guidelines:
|
|
142
|
+
1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
|
|
143
|
+
|
|
144
|
+
Respond with a JSON object in the following format:
|
|
145
|
+
{{
|
|
146
|
+
"is_duplicate": true or false,
|
|
147
|
+
"uuid": uuid of the existing edge like "5d643020624c42fa9de13f97b1b3fa39" or null,
|
|
148
|
+
}}
|
|
149
|
+
""",
|
|
150
|
+
),
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
|
|
116
154
|
def edge_list(context: dict[str, Any]) -> list[Message]:
|
|
117
155
|
return [
|
|
118
156
|
Message(
|
|
@@ -151,4 +189,4 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
|
|
|
151
189
|
]
|
|
152
190
|
|
|
153
191
|
|
|
154
|
-
versions: Versions = {'v1': v1, 'v2': v2, 'edge_list': edge_list}
|
|
192
|
+
versions: Versions = {'v1': v1, 'v2': v2, 'v3': v3, 'edge_list': edge_list}
|
|
@@ -23,13 +23,15 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
23
23
|
class Prompt(Protocol):
|
|
24
24
|
v1: PromptVersion
|
|
25
25
|
v2: PromptVersion
|
|
26
|
+
v3: PromptVersion
|
|
26
27
|
node_list: PromptVersion
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class Versions(TypedDict):
|
|
30
31
|
v1: PromptFunction
|
|
31
32
|
v2: PromptFunction
|
|
32
|
-
|
|
33
|
+
v3: PromptFunction
|
|
34
|
+
node_list: PromptFunction
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
def v1(context: dict[str, Any]) -> list[Message]:
|
|
@@ -53,7 +55,9 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
53
55
|
1. start with the list of nodes from New Nodes
|
|
54
56
|
2. If any node in New Nodes is a duplicate of a node in Existing Nodes, replace the new node with the existing
|
|
55
57
|
node in the list
|
|
56
|
-
3.
|
|
58
|
+
3. when deduplicating nodes, synthesize their summaries into a short new summary that contains the relevant information
|
|
59
|
+
of the summaries of the new and existing nodes
|
|
60
|
+
4. Respond with the resulting list of nodes
|
|
57
61
|
|
|
58
62
|
Guidelines:
|
|
59
63
|
1. Use both the name and summary of nodes to determine if they are duplicates,
|
|
@@ -64,6 +68,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
64
68
|
"new_nodes": [
|
|
65
69
|
{{
|
|
66
70
|
"name": "Unique identifier for the node",
|
|
71
|
+
"summary": "Brief summary of the node's role or significance"
|
|
67
72
|
}}
|
|
68
73
|
]
|
|
69
74
|
}}
|
|
@@ -91,20 +96,23 @@ def v2(context: dict[str, Any]) -> list[Message]:
|
|
|
91
96
|
Important:
|
|
92
97
|
If a node in the new nodes is describing the same entity as a node in the existing nodes, mark it as a duplicate!!!
|
|
93
98
|
Task:
|
|
94
|
-
If any node in New Nodes is a duplicate of a node in Existing Nodes, add their
|
|
99
|
+
If any node in New Nodes is a duplicate of a node in Existing Nodes, add their uuids to the output list
|
|
100
|
+
When finding duplicates nodes, synthesize their summaries into a short new summary that contains the
|
|
101
|
+
relevant information of the summaries of the new and existing nodes.
|
|
95
102
|
|
|
96
103
|
Guidelines:
|
|
97
104
|
1. Use both the name and summary of nodes to determine if they are duplicates,
|
|
98
105
|
duplicate nodes may have different names
|
|
99
|
-
2. In the output,
|
|
100
|
-
the
|
|
106
|
+
2. In the output, uuid should always be the uuid of the New Node that is a duplicate. duplicate_of should be
|
|
107
|
+
the uuid of the Existing Node.
|
|
101
108
|
|
|
102
109
|
Respond with a JSON object in the following format:
|
|
103
110
|
{{
|
|
104
111
|
"duplicates": [
|
|
105
112
|
{{
|
|
106
|
-
"
|
|
107
|
-
"duplicate_of": "
|
|
113
|
+
"uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
|
|
114
|
+
"duplicate_of": "uuid of the existing node",
|
|
115
|
+
"summary": "Brief summary of the node's role or significance. Takes information from the new and existing nodes"
|
|
108
116
|
}}
|
|
109
117
|
]
|
|
110
118
|
}}
|
|
@@ -113,6 +121,44 @@ def v2(context: dict[str, Any]) -> list[Message]:
|
|
|
113
121
|
]
|
|
114
122
|
|
|
115
123
|
|
|
124
|
+
def v3(context: dict[str, Any]) -> list[Message]:
|
|
125
|
+
return [
|
|
126
|
+
Message(
|
|
127
|
+
role='system',
|
|
128
|
+
content='You are a helpful assistant that de-duplicates nodes from node lists.',
|
|
129
|
+
),
|
|
130
|
+
Message(
|
|
131
|
+
role='user',
|
|
132
|
+
content=f"""
|
|
133
|
+
Given the following context, determine whether the New Node represents any of the entities in the list of Existing Nodes.
|
|
134
|
+
|
|
135
|
+
Existing Nodes:
|
|
136
|
+
{json.dumps(context['existing_nodes'], indent=2)}
|
|
137
|
+
|
|
138
|
+
New Node:
|
|
139
|
+
{json.dumps(context['extracted_nodes'], indent=2)}
|
|
140
|
+
Task:
|
|
141
|
+
1. If the New Node represents the same entity as any node in Existing Nodes, return 'is_duplicate: true' in the
|
|
142
|
+
response. Otherwise, return 'is_duplicate: false'
|
|
143
|
+
2. If is_duplicate is true, also return the uuid of the existing node in the response
|
|
144
|
+
3. If is_duplicate is true, return a summary that synthesizes the information in the New Node summary and the
|
|
145
|
+
summary of the Existing Node it is a duplicate of.
|
|
146
|
+
|
|
147
|
+
Guidelines:
|
|
148
|
+
1. Use both the name and summary of nodes to determine if the entities are duplicates,
|
|
149
|
+
duplicate nodes may have different names
|
|
150
|
+
|
|
151
|
+
Respond with a JSON object in the following format:
|
|
152
|
+
{{
|
|
153
|
+
"is_duplicate": true or false,
|
|
154
|
+
"uuid": "uuid of the existing node like 5d643020624c42fa9de13f97b1b3fa39 or null",
|
|
155
|
+
"summary": "Brief summary of the node's role or significance. Takes information from the new and existing node"
|
|
156
|
+
}}
|
|
157
|
+
""",
|
|
158
|
+
),
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
|
|
116
162
|
def node_list(context: dict[str, Any]) -> list[Message]:
|
|
117
163
|
return [
|
|
118
164
|
Message(
|
|
@@ -128,18 +174,20 @@ def node_list(context: dict[str, Any]) -> list[Message]:
|
|
|
128
174
|
{json.dumps(context['nodes'], indent=2)}
|
|
129
175
|
|
|
130
176
|
Task:
|
|
131
|
-
1. Group nodes together such that all duplicate nodes are in the same list of
|
|
132
|
-
2. All duplicate
|
|
177
|
+
1. Group nodes together such that all duplicate nodes are in the same list of uuids
|
|
178
|
+
2. All duplicate uuids should be grouped together in the same list
|
|
179
|
+
3. Also return a new summary that synthesizes the summary into a new short summary
|
|
133
180
|
|
|
134
181
|
Guidelines:
|
|
135
|
-
1. Each
|
|
136
|
-
2. If a node has no duplicates, it should appear in the response in a list of only one
|
|
182
|
+
1. Each uuid from the list of nodes should appear EXACTLY once in your response
|
|
183
|
+
2. If a node has no duplicates, it should appear in the response in a list of only one uuid
|
|
137
184
|
|
|
138
185
|
Respond with a JSON object in the following format:
|
|
139
186
|
{{
|
|
140
187
|
"nodes": [
|
|
141
188
|
{{
|
|
142
|
-
"
|
|
189
|
+
"uuids": ["5d643020624c42fa9de13f97b1b3fa39", "node that is a duplicate of 5d643020624c42fa9de13f97b1b3fa39"],
|
|
190
|
+
"summary": "Brief summary of the node summaries that appear in the list of names."
|
|
143
191
|
}}
|
|
144
192
|
]
|
|
145
193
|
}}
|
|
@@ -148,4 +196,4 @@ def node_list(context: dict[str, Any]) -> list[Message]:
|
|
|
148
196
|
]
|
|
149
197
|
|
|
150
198
|
|
|
151
|
-
versions: Versions = {'v1': v1, 'v2': v2, 'node_list': node_list}
|
|
199
|
+
versions: Versions = {'v1': v1, 'v2': v2, 'v3': v3, 'node_list': node_list}
|
|
@@ -110,10 +110,11 @@ def v2(context: dict[str, Any]) -> list[Message]:
|
|
|
110
110
|
|
|
111
111
|
Guidelines:
|
|
112
112
|
1. Create edges only between the provided nodes.
|
|
113
|
-
2. Each edge should represent a clear relationship between two nodes.
|
|
113
|
+
2. Each edge should represent a clear relationship between two DISTINCT nodes.
|
|
114
114
|
3. The relation_type should be a concise, all-caps description of the relationship (e.g., LOVES, IS_FRIENDS_WITH, WORKS_FOR).
|
|
115
115
|
4. Provide a more detailed fact describing the relationship.
|
|
116
116
|
5. Consider temporal aspects of relationships when relevant.
|
|
117
|
+
6. Avoid using the same node as the source and target of a relationship
|
|
117
118
|
|
|
118
119
|
Respond with a JSON object in the following format:
|
|
119
120
|
{{
|
|
@@ -55,6 +55,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
55
55
|
1. Focus on entities, concepts, or actors that are central to the current episode.
|
|
56
56
|
2. Avoid creating nodes for relationships or actions (these will be handled as edges later).
|
|
57
57
|
3. Provide a brief but informative summary for each node.
|
|
58
|
+
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
|
58
59
|
|
|
59
60
|
Respond with a JSON object in the following format:
|
|
60
61
|
{{
|
|
@@ -90,6 +91,7 @@ Guidelines:
|
|
|
90
91
|
3. Provide concise but informative summaries for each extracted node.
|
|
91
92
|
4. Avoid creating nodes for relationships or actions.
|
|
92
93
|
5. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
|
94
|
+
6. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
|
93
95
|
|
|
94
96
|
Respond with a JSON object in the following format:
|
|
95
97
|
{{
|
graphiti_core/search/search.py
CHANGED
|
@@ -63,12 +63,12 @@ class SearchResults(BaseModel):
|
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
async def hybrid_search(
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
66
|
+
driver: AsyncDriver,
|
|
67
|
+
embedder,
|
|
68
|
+
query: str,
|
|
69
|
+
timestamp: datetime,
|
|
70
|
+
config: SearchConfig,
|
|
71
|
+
center_node_uuid: str | None = None,
|
|
72
72
|
) -> SearchResults:
|
|
73
73
|
start = time()
|
|
74
74
|
|
|
@@ -83,7 +83,7 @@ async def hybrid_search(
|
|
|
83
83
|
nodes.extend(await get_mentioned_nodes(driver, episodes))
|
|
84
84
|
|
|
85
85
|
if SearchMethod.bm25 in config.search_methods:
|
|
86
|
-
text_search = await edge_fulltext_search(
|
|
86
|
+
text_search = await edge_fulltext_search(driver, query, 2 * config.num_edges)
|
|
87
87
|
search_results.append(text_search)
|
|
88
88
|
|
|
89
89
|
if SearchMethod.cosine_similarity in config.search_methods:
|
|
@@ -95,7 +95,7 @@ async def hybrid_search(
|
|
|
95
95
|
)
|
|
96
96
|
|
|
97
97
|
similarity_search = await edge_similarity_search(
|
|
98
|
-
|
|
98
|
+
driver, search_vector, 2 * config.num_edges
|
|
99
99
|
)
|
|
100
100
|
search_results.append(similarity_search)
|
|
101
101
|
|