graphiti-core 0.3.20__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/cross_encoder/openai_reranker_client.py +2 -2
- graphiti_core/graphiti.py +50 -71
- graphiti_core/helpers.py +1 -0
- graphiti_core/models/edges/edge_db_queries.py +16 -0
- graphiti_core/models/nodes/node_db_queries.py +16 -0
- graphiti_core/nodes.py +2 -2
- graphiti_core/prompts/dedupe_edges.py +9 -93
- graphiti_core/prompts/dedupe_nodes.py +19 -101
- graphiti_core/prompts/extract_edge_dates.py +14 -7
- graphiti_core/prompts/extract_edges.py +55 -81
- graphiti_core/prompts/extract_nodes.py +72 -96
- graphiti_core/prompts/summarize_nodes.py +40 -1
- graphiti_core/search/search.py +20 -0
- graphiti_core/search/search_config_recipes.py +35 -0
- graphiti_core/search/search_utils.py +5 -4
- graphiti_core/utils/bulk_utils.py +3 -3
- graphiti_core/utils/maintenance/community_operations.py +3 -3
- graphiti_core/utils/maintenance/edge_operations.py +87 -55
- graphiti_core/utils/maintenance/node_operations.py +122 -52
- {graphiti_core-0.3.20.dist-info → graphiti_core-0.4.0.dist-info}/METADATA +6 -5
- {graphiti_core-0.3.20.dist-info → graphiti_core-0.4.0.dist-info}/RECORD +23 -23
- {graphiti_core-0.3.20.dist-info → graphiti_core-0.4.0.dist-info}/LICENSE +0 -0
- {graphiti_core-0.3.20.dist-info → graphiti_core-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -64,10 +64,10 @@ class OpenAIRerankerClient(CrossEncoderClient):
|
|
|
64
64
|
content=f"""
|
|
65
65
|
Respond with "True" if PASSAGE is relevant to QUERY and "False" otherwise.
|
|
66
66
|
<PASSAGE>
|
|
67
|
-
{query}
|
|
68
|
-
</PASSAGE>
|
|
69
67
|
{passage}
|
|
68
|
+
</PASSAGE>
|
|
70
69
|
<QUERY>
|
|
70
|
+
{query}
|
|
71
71
|
</QUERY>
|
|
72
72
|
""",
|
|
73
73
|
),
|
graphiti_core/graphiti.py
CHANGED
|
@@ -16,7 +16,7 @@ limitations under the License.
|
|
|
16
16
|
|
|
17
17
|
import asyncio
|
|
18
18
|
import logging
|
|
19
|
-
from datetime import datetime
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
20
|
from time import time
|
|
21
21
|
|
|
22
22
|
from dotenv import load_dotenv
|
|
@@ -35,8 +35,6 @@ from graphiti_core.search.search_config import DEFAULT_SEARCH_LIMIT, SearchResul
|
|
|
35
35
|
from graphiti_core.search.search_config_recipes import (
|
|
36
36
|
EDGE_HYBRID_SEARCH_NODE_DISTANCE,
|
|
37
37
|
EDGE_HYBRID_SEARCH_RRF,
|
|
38
|
-
NODE_HYBRID_SEARCH_NODE_DISTANCE,
|
|
39
|
-
NODE_HYBRID_SEARCH_RRF,
|
|
40
38
|
)
|
|
41
39
|
from graphiti_core.search.search_utils import (
|
|
42
40
|
RELEVANT_SCHEMA_LIMIT,
|
|
@@ -65,7 +63,9 @@ from graphiti_core.utils.maintenance.community_operations import (
|
|
|
65
63
|
update_community,
|
|
66
64
|
)
|
|
67
65
|
from graphiti_core.utils.maintenance.edge_operations import (
|
|
66
|
+
dedupe_extracted_edge,
|
|
68
67
|
extract_edges,
|
|
68
|
+
resolve_edge_contradictions,
|
|
69
69
|
resolve_extracted_edges,
|
|
70
70
|
)
|
|
71
71
|
from graphiti_core.utils.maintenance.graph_data_operations import (
|
|
@@ -76,6 +76,7 @@ from graphiti_core.utils.maintenance.node_operations import (
|
|
|
76
76
|
extract_nodes,
|
|
77
77
|
resolve_extracted_nodes,
|
|
78
78
|
)
|
|
79
|
+
from graphiti_core.utils.maintenance.temporal_operations import get_edge_contradictions
|
|
79
80
|
|
|
80
81
|
logger = logging.getLogger(__name__)
|
|
81
82
|
|
|
@@ -312,10 +313,10 @@ class Graphiti:
|
|
|
312
313
|
start = time()
|
|
313
314
|
|
|
314
315
|
entity_edges: list[EntityEdge] = []
|
|
315
|
-
now = datetime.now()
|
|
316
|
+
now = datetime.now(timezone.utc)
|
|
316
317
|
|
|
317
318
|
previous_episodes = await self.retrieve_episodes(
|
|
318
|
-
reference_time, last_n=
|
|
319
|
+
reference_time, last_n=RELEVANT_SCHEMA_LIMIT, group_ids=[group_id]
|
|
319
320
|
)
|
|
320
321
|
episode = EpisodicNode(
|
|
321
322
|
name=name,
|
|
@@ -340,17 +341,24 @@ class Graphiti:
|
|
|
340
341
|
*[node.generate_name_embedding(self.embedder) for node in extracted_nodes]
|
|
341
342
|
)
|
|
342
343
|
|
|
343
|
-
#
|
|
344
|
+
# Find relevant nodes already in the graph
|
|
344
345
|
existing_nodes_lists: list[list[EntityNode]] = list(
|
|
345
346
|
await asyncio.gather(
|
|
346
|
-
*[get_relevant_nodes([node]
|
|
347
|
+
*[get_relevant_nodes(self.driver, [node]) for node in extracted_nodes]
|
|
347
348
|
)
|
|
348
349
|
)
|
|
349
350
|
|
|
351
|
+
# Resolve extracted nodes with nodes already in the graph and extract facts
|
|
350
352
|
logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
|
|
351
353
|
|
|
352
354
|
(mentioned_nodes, uuid_map), extracted_edges = await asyncio.gather(
|
|
353
|
-
resolve_extracted_nodes(
|
|
355
|
+
resolve_extracted_nodes(
|
|
356
|
+
self.llm_client,
|
|
357
|
+
extracted_nodes,
|
|
358
|
+
existing_nodes_lists,
|
|
359
|
+
episode,
|
|
360
|
+
previous_episodes,
|
|
361
|
+
),
|
|
354
362
|
extract_edges(
|
|
355
363
|
self.llm_client, episode, extracted_nodes, previous_episodes, group_id
|
|
356
364
|
),
|
|
@@ -448,7 +456,6 @@ class Graphiti:
|
|
|
448
456
|
|
|
449
457
|
episode.entity_edges = [edge.uuid for edge in entity_edges]
|
|
450
458
|
|
|
451
|
-
# Future optimization would be using batch operations to save nodes and edges
|
|
452
459
|
if not self.store_raw_episode_content:
|
|
453
460
|
episode.content = ''
|
|
454
461
|
|
|
@@ -511,7 +518,7 @@ class Graphiti:
|
|
|
511
518
|
"""
|
|
512
519
|
try:
|
|
513
520
|
start = time()
|
|
514
|
-
now = datetime.now()
|
|
521
|
+
now = datetime.now(timezone.utc)
|
|
515
522
|
|
|
516
523
|
episodes = [
|
|
517
524
|
EpisodicNode(
|
|
@@ -685,67 +692,6 @@ class Graphiti:
|
|
|
685
692
|
bfs_origin_node_uuids,
|
|
686
693
|
)
|
|
687
694
|
|
|
688
|
-
async def get_nodes_by_query(
|
|
689
|
-
self,
|
|
690
|
-
query: str,
|
|
691
|
-
center_node_uuid: str | None = None,
|
|
692
|
-
group_ids: list[str] | None = None,
|
|
693
|
-
limit: int = DEFAULT_SEARCH_LIMIT,
|
|
694
|
-
) -> list[EntityNode]:
|
|
695
|
-
"""
|
|
696
|
-
Retrieve nodes from the graph database based on a text query.
|
|
697
|
-
|
|
698
|
-
This method performs a hybrid search using both text-based and
|
|
699
|
-
embedding-based approaches to find relevant nodes.
|
|
700
|
-
|
|
701
|
-
Parameters
|
|
702
|
-
----------
|
|
703
|
-
query : str
|
|
704
|
-
The text query to search for in the graph
|
|
705
|
-
center_node_uuid: str, optional
|
|
706
|
-
Facts will be reranked based on proximity to this node.
|
|
707
|
-
group_ids : list[str | None] | None, optional
|
|
708
|
-
The graph partitions to return data from.
|
|
709
|
-
limit : int | None, optional
|
|
710
|
-
The maximum number of results to return per search method.
|
|
711
|
-
If None, a default limit will be applied.
|
|
712
|
-
|
|
713
|
-
Returns
|
|
714
|
-
-------
|
|
715
|
-
list[EntityNode]
|
|
716
|
-
A list of EntityNode objects that match the search criteria.
|
|
717
|
-
|
|
718
|
-
Notes
|
|
719
|
-
-----
|
|
720
|
-
This method uses the following steps:
|
|
721
|
-
1. Generates an embedding for the input query using the LLM client's embedder.
|
|
722
|
-
2. Calls the hybrid_node_search function with both the text query and its embedding.
|
|
723
|
-
3. The hybrid search combines fulltext search and vector similarity search
|
|
724
|
-
to find the most relevant nodes.
|
|
725
|
-
|
|
726
|
-
The method leverages the LLM client's embedding capabilities to enhance
|
|
727
|
-
the search with semantic similarity matching. The 'limit' parameter is applied
|
|
728
|
-
to each individual search method before results are combined and deduplicated.
|
|
729
|
-
If not specified, a default limit (defined in the search functions) will be used.
|
|
730
|
-
"""
|
|
731
|
-
search_config = (
|
|
732
|
-
NODE_HYBRID_SEARCH_RRF if center_node_uuid is None else NODE_HYBRID_SEARCH_NODE_DISTANCE
|
|
733
|
-
)
|
|
734
|
-
search_config.limit = limit
|
|
735
|
-
|
|
736
|
-
nodes = (
|
|
737
|
-
await search(
|
|
738
|
-
self.driver,
|
|
739
|
-
self.embedder,
|
|
740
|
-
self.cross_encoder,
|
|
741
|
-
query,
|
|
742
|
-
group_ids,
|
|
743
|
-
search_config,
|
|
744
|
-
center_node_uuid,
|
|
745
|
-
)
|
|
746
|
-
).nodes
|
|
747
|
-
return nodes
|
|
748
|
-
|
|
749
695
|
async def get_episode_mentions(self, episode_uuids: list[str]) -> SearchResults:
|
|
750
696
|
episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
|
|
751
697
|
|
|
@@ -760,3 +706,36 @@ class Graphiti:
|
|
|
760
706
|
communities = await get_communities_by_nodes(self.driver, nodes)
|
|
761
707
|
|
|
762
708
|
return SearchResults(edges=edges, nodes=nodes, communities=communities)
|
|
709
|
+
|
|
710
|
+
async def add_triplet(self, source_node: EntityNode, edge: EntityEdge, target_node: EntityNode):
|
|
711
|
+
if source_node.name_embedding is None:
|
|
712
|
+
await source_node.generate_name_embedding(self.embedder)
|
|
713
|
+
if target_node.name_embedding is None:
|
|
714
|
+
await target_node.generate_name_embedding(self.embedder)
|
|
715
|
+
if edge.fact_embedding is None:
|
|
716
|
+
await edge.generate_embedding(self.embedder)
|
|
717
|
+
|
|
718
|
+
resolved_nodes, _ = await resolve_extracted_nodes(
|
|
719
|
+
self.llm_client,
|
|
720
|
+
[source_node, target_node],
|
|
721
|
+
[
|
|
722
|
+
await get_relevant_nodes(self.driver, [source_node]),
|
|
723
|
+
await get_relevant_nodes(self.driver, [target_node]),
|
|
724
|
+
],
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
related_edges = await get_relevant_edges(
|
|
728
|
+
self.driver,
|
|
729
|
+
[edge],
|
|
730
|
+
source_node_uuid=resolved_nodes[0].uuid,
|
|
731
|
+
target_node_uuid=resolved_nodes[1].uuid,
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
resolved_edge = await dedupe_extracted_edge(self.llm_client, edge, related_edges)
|
|
735
|
+
|
|
736
|
+
contradicting_edges = await get_edge_contradictions(self.llm_client, edge, related_edges)
|
|
737
|
+
invalidated_edges = resolve_edge_contradictions(resolved_edge, contradicting_edges)
|
|
738
|
+
|
|
739
|
+
await add_nodes_and_edges_bulk(
|
|
740
|
+
self.driver, [], [], resolved_nodes, [resolved_edge] + invalidated_edges
|
|
741
|
+
)
|
graphiti_core/helpers.py
CHANGED
|
@@ -25,6 +25,7 @@ load_dotenv()
|
|
|
25
25
|
|
|
26
26
|
DEFAULT_DATABASE = os.getenv('DEFAULT_DATABASE', None)
|
|
27
27
|
USE_PARALLEL_RUNTIME = bool(os.getenv('USE_PARALLEL_RUNTIME', False))
|
|
28
|
+
MAX_REFLEXION_ITERATIONS = 2
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
def parse_db_date(neo_date: neo4j_time.DateTime | None) -> datetime | None:
|
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
1
17
|
EPISODIC_EDGE_SAVE = """
|
|
2
18
|
MATCH (episode:Episodic {uuid: $episode_uuid})
|
|
3
19
|
MATCH (node:Entity {uuid: $entity_uuid})
|
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
1
17
|
EPISODIC_NODE_SAVE = """
|
|
2
18
|
MERGE (n:Episodic {uuid: $uuid})
|
|
3
19
|
SET n = {uuid: $uuid, name: $name, group_id: $group_id, source_description: $source_description, source: $source, content: $content,
|
graphiti_core/nodes.py
CHANGED
|
@@ -16,7 +16,7 @@ limitations under the License.
|
|
|
16
16
|
|
|
17
17
|
import logging
|
|
18
18
|
from abc import ABC, abstractmethod
|
|
19
|
-
from datetime import datetime
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
20
|
from enum import Enum
|
|
21
21
|
from time import time
|
|
22
22
|
from typing import Any
|
|
@@ -78,7 +78,7 @@ class Node(BaseModel, ABC):
|
|
|
78
78
|
name: str = Field(description='name of the node')
|
|
79
79
|
group_id: str = Field(description='partition of the graph')
|
|
80
80
|
labels: list[str] = Field(default_factory=list)
|
|
81
|
-
created_at: datetime = Field(default_factory=lambda: datetime.now())
|
|
81
|
+
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
82
82
|
|
|
83
83
|
@abstractmethod
|
|
84
84
|
async def save(self, driver: AsyncDriver): ...
|
|
@@ -21,103 +21,16 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class Prompt(Protocol):
|
|
24
|
-
|
|
25
|
-
v2: PromptVersion
|
|
26
|
-
v3: PromptVersion
|
|
24
|
+
edge: PromptVersion
|
|
27
25
|
edge_list: PromptVersion
|
|
28
26
|
|
|
29
27
|
|
|
30
28
|
class Versions(TypedDict):
|
|
31
|
-
|
|
32
|
-
v2: PromptFunction
|
|
33
|
-
v3: PromptFunction
|
|
29
|
+
edge: PromptFunction
|
|
34
30
|
edge_list: PromptFunction
|
|
35
31
|
|
|
36
32
|
|
|
37
|
-
def
|
|
38
|
-
return [
|
|
39
|
-
Message(
|
|
40
|
-
role='system',
|
|
41
|
-
content='You are a helpful assistant that de-duplicates relationship from edge lists.',
|
|
42
|
-
),
|
|
43
|
-
Message(
|
|
44
|
-
role='user',
|
|
45
|
-
content=f"""
|
|
46
|
-
Given the following context, deduplicate facts from a list of new facts given a list of existing edges:
|
|
47
|
-
|
|
48
|
-
Existing Edges:
|
|
49
|
-
{json.dumps(context['existing_edges'], indent=2)}
|
|
50
|
-
|
|
51
|
-
New Edges:
|
|
52
|
-
{json.dumps(context['extracted_edges'], indent=2)}
|
|
53
|
-
|
|
54
|
-
Task:
|
|
55
|
-
If any edge in New Edges is a duplicate of an edge in Existing Edges, add their uuids to the output list.
|
|
56
|
-
When finding duplicates edges, synthesize their facts into a short new fact.
|
|
57
|
-
|
|
58
|
-
Guidelines:
|
|
59
|
-
1. identical or near identical facts are duplicates
|
|
60
|
-
2. Facts are also duplicates if they are represented by similar sentences
|
|
61
|
-
3. Facts will often discuss the same or similar relation between identical entities
|
|
62
|
-
|
|
63
|
-
Respond with a JSON object in the following format:
|
|
64
|
-
{{
|
|
65
|
-
"duplicates": [
|
|
66
|
-
{{
|
|
67
|
-
"uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
|
|
68
|
-
"duplicate_of": "uuid of the existing node",
|
|
69
|
-
"fact": "one sentence description of the fact"
|
|
70
|
-
}}
|
|
71
|
-
]
|
|
72
|
-
}}
|
|
73
|
-
""",
|
|
74
|
-
),
|
|
75
|
-
]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def v2(context: dict[str, Any]) -> list[Message]:
|
|
79
|
-
return [
|
|
80
|
-
Message(
|
|
81
|
-
role='system',
|
|
82
|
-
content='You are a helpful assistant that de-duplicates relationship from edge lists.',
|
|
83
|
-
),
|
|
84
|
-
Message(
|
|
85
|
-
role='user',
|
|
86
|
-
content=f"""
|
|
87
|
-
Given the following context, deduplicate edges from a list of new edges given a list of existing edges:
|
|
88
|
-
|
|
89
|
-
Existing Edges:
|
|
90
|
-
{json.dumps(context['existing_edges'], indent=2)}
|
|
91
|
-
|
|
92
|
-
New Edges:
|
|
93
|
-
{json.dumps(context['extracted_edges'], indent=2)}
|
|
94
|
-
|
|
95
|
-
Task:
|
|
96
|
-
1. start with the list of edges from New Edges
|
|
97
|
-
2. If any edge in New Edges is a duplicate of an edge in Existing Edges, replace the new edge with the existing
|
|
98
|
-
edge in the list
|
|
99
|
-
3. Respond with the resulting list of edges
|
|
100
|
-
|
|
101
|
-
Guidelines:
|
|
102
|
-
1. Use both the triplet name and fact of edges to determine if they are duplicates,
|
|
103
|
-
duplicate edges may have different names meaning the same thing and slight variations in the facts.
|
|
104
|
-
2. If you encounter facts that are semantically equivalent or very similar, keep the original edge
|
|
105
|
-
|
|
106
|
-
Respond with a JSON object in the following format:
|
|
107
|
-
{{
|
|
108
|
-
"new_edges": [
|
|
109
|
-
{{
|
|
110
|
-
"triplet": "source_node_name-edge_name-target_node_name",
|
|
111
|
-
"fact": "one sentence description of the fact"
|
|
112
|
-
}}
|
|
113
|
-
]
|
|
114
|
-
}}
|
|
115
|
-
""",
|
|
116
|
-
),
|
|
117
|
-
]
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def v3(context: dict[str, Any]) -> list[Message]:
|
|
33
|
+
def edge(context: dict[str, Any]) -> list[Message]:
|
|
121
34
|
return [
|
|
122
35
|
Message(
|
|
123
36
|
role='system',
|
|
@@ -128,11 +41,14 @@ def v3(context: dict[str, Any]) -> list[Message]:
|
|
|
128
41
|
content=f"""
|
|
129
42
|
Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
|
|
130
43
|
|
|
131
|
-
|
|
44
|
+
<EXISTING EDGES>
|
|
132
45
|
{json.dumps(context['related_edges'], indent=2)}
|
|
46
|
+
</EXISTING EDGES>
|
|
133
47
|
|
|
134
|
-
|
|
48
|
+
<NEW EDGE>
|
|
135
49
|
{json.dumps(context['extracted_edges'], indent=2)}
|
|
50
|
+
</NEW EDGE>
|
|
51
|
+
|
|
136
52
|
Task:
|
|
137
53
|
1. If the New Edges represents the same factual information as any edge in Existing Edges, return 'is_duplicate: true' in the
|
|
138
54
|
response. Otherwise, return 'is_duplicate: false'
|
|
@@ -189,4 +105,4 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
|
|
|
189
105
|
]
|
|
190
106
|
|
|
191
107
|
|
|
192
|
-
versions: Versions = {'
|
|
108
|
+
versions: Versions = {'edge': edge, 'edge_list': edge_list}
|
|
@@ -21,20 +21,16 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class Prompt(Protocol):
|
|
24
|
-
|
|
25
|
-
v2: PromptVersion
|
|
26
|
-
v3: PromptVersion
|
|
24
|
+
node: PromptVersion
|
|
27
25
|
node_list: PromptVersion
|
|
28
26
|
|
|
29
27
|
|
|
30
28
|
class Versions(TypedDict):
|
|
31
|
-
|
|
32
|
-
v2: PromptFunction
|
|
33
|
-
v3: PromptFunction
|
|
29
|
+
node: PromptFunction
|
|
34
30
|
node_list: PromptFunction
|
|
35
31
|
|
|
36
32
|
|
|
37
|
-
def
|
|
33
|
+
def node(context: dict[str, Any]) -> list[Message]:
|
|
38
34
|
return [
|
|
39
35
|
Message(
|
|
40
36
|
role='system',
|
|
@@ -43,106 +39,28 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
43
39
|
Message(
|
|
44
40
|
role='user',
|
|
45
41
|
content=f"""
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
42
|
+
<PREVIOUS MESSAGES>
|
|
43
|
+
{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
|
|
44
|
+
</PREVIOUS MESSAGES>
|
|
45
|
+
<CURRENT MESSAGE>
|
|
46
|
+
{context["episode_content"]}
|
|
47
|
+
</CURRENT MESSAGE>
|
|
48
|
+
|
|
49
|
+
<EXISTING NODES>
|
|
49
50
|
{json.dumps(context['existing_nodes'], indent=2)}
|
|
50
|
-
|
|
51
|
-
New Nodes:
|
|
52
|
-
{json.dumps(context['extracted_nodes'], indent=2)}
|
|
51
|
+
</EXISTING NODES>
|
|
53
52
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
2. If any node in New Nodes is a duplicate of a node in Existing Nodes, replace the new node with the existing
|
|
57
|
-
node in the list
|
|
58
|
-
3. when deduplicating nodes, synthesize their summaries into a short new summary that contains the relevant information
|
|
59
|
-
of the summaries of the new and existing nodes
|
|
60
|
-
4. Respond with the resulting list of nodes
|
|
61
|
-
|
|
62
|
-
Guidelines:
|
|
63
|
-
1. Use both the name and summary of nodes to determine if they are duplicates,
|
|
64
|
-
duplicate nodes may have different names
|
|
65
|
-
|
|
66
|
-
Respond with a JSON object in the following format:
|
|
67
|
-
{{
|
|
68
|
-
"new_nodes": [
|
|
69
|
-
{{
|
|
70
|
-
"name": "Unique identifier for the node",
|
|
71
|
-
"summary": "Brief summary of the node's role or significance"
|
|
72
|
-
}}
|
|
73
|
-
]
|
|
74
|
-
}}
|
|
75
|
-
""",
|
|
76
|
-
),
|
|
77
|
-
]
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def v2(context: dict[str, Any]) -> list[Message]:
|
|
81
|
-
return [
|
|
82
|
-
Message(
|
|
83
|
-
role='system',
|
|
84
|
-
content='You are a helpful assistant that de-duplicates nodes from node lists.',
|
|
85
|
-
),
|
|
86
|
-
Message(
|
|
87
|
-
role='user',
|
|
88
|
-
content=f"""
|
|
89
|
-
Given the following context, deduplicate nodes from a list of new nodes given a list of existing nodes:
|
|
90
|
-
|
|
91
|
-
Existing Nodes:
|
|
92
|
-
{json.dumps(context['existing_nodes'], indent=2)}
|
|
93
|
-
|
|
94
|
-
New Nodes:
|
|
95
|
-
{json.dumps(context['extracted_nodes'], indent=2)}
|
|
96
|
-
Important:
|
|
97
|
-
If a node in the new nodes is describing the same entity as a node in the existing nodes, mark it as a duplicate!!!
|
|
98
|
-
Task:
|
|
99
|
-
If any node in New Nodes is a duplicate of a node in Existing Nodes, add their uuids to the output list
|
|
100
|
-
When finding duplicates nodes, synthesize their summaries into a short new summary that contains the
|
|
101
|
-
relevant information of the summaries of the new and existing nodes.
|
|
102
|
-
|
|
103
|
-
Guidelines:
|
|
104
|
-
1. Use both the name and summary of nodes to determine if they are duplicates,
|
|
105
|
-
duplicate nodes may have different names
|
|
106
|
-
2. In the output, uuid should always be the uuid of the New Node that is a duplicate. duplicate_of should be
|
|
107
|
-
the uuid of the Existing Node.
|
|
108
|
-
|
|
109
|
-
Respond with a JSON object in the following format:
|
|
110
|
-
{{
|
|
111
|
-
"duplicates": [
|
|
112
|
-
{{
|
|
113
|
-
"uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
|
|
114
|
-
"duplicate_of": "uuid of the existing node",
|
|
115
|
-
"summary": "Brief summary of the node's role or significance. Takes information from the new and existing nodes"
|
|
116
|
-
}}
|
|
117
|
-
]
|
|
118
|
-
}}
|
|
119
|
-
""",
|
|
120
|
-
),
|
|
121
|
-
]
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
def v3(context: dict[str, Any]) -> list[Message]:
|
|
125
|
-
return [
|
|
126
|
-
Message(
|
|
127
|
-
role='system',
|
|
128
|
-
content='You are a helpful assistant that de-duplicates nodes from node lists.',
|
|
129
|
-
),
|
|
130
|
-
Message(
|
|
131
|
-
role='user',
|
|
132
|
-
content=f"""
|
|
133
|
-
Given the following context, determine whether the New Node represents any of the entities in the list of Existing Nodes.
|
|
134
|
-
|
|
135
|
-
Existing Nodes:
|
|
136
|
-
{json.dumps(context['existing_nodes'], indent=2)}
|
|
53
|
+
Given the above EXISTING NODES, MESSAGE, and PREVIOUS MESSAGES. Determine if the NEW NODE extracted from the conversation
|
|
54
|
+
is a duplicate entity of one of the EXISTING NODES.
|
|
137
55
|
|
|
138
|
-
|
|
56
|
+
<NEW NODE>
|
|
139
57
|
{json.dumps(context['extracted_nodes'], indent=2)}
|
|
58
|
+
</NEW NODE>
|
|
140
59
|
Task:
|
|
141
60
|
1. If the New Node represents the same entity as any node in Existing Nodes, return 'is_duplicate: true' in the
|
|
142
61
|
response. Otherwise, return 'is_duplicate: false'
|
|
143
62
|
2. If is_duplicate is true, also return the uuid of the existing node in the response
|
|
144
|
-
3. If is_duplicate is true, return a
|
|
145
|
-
summary of the Existing Node it is a duplicate of.
|
|
63
|
+
3. If is_duplicate is true, return a name for the node that is the most complete full name.
|
|
146
64
|
|
|
147
65
|
Guidelines:
|
|
148
66
|
1. Use both the name and summary of nodes to determine if the entities are duplicates,
|
|
@@ -152,7 +70,7 @@ def v3(context: dict[str, Any]) -> list[Message]:
|
|
|
152
70
|
{{
|
|
153
71
|
"is_duplicate": true or false,
|
|
154
72
|
"uuid": "uuid of the existing node like 5d643020624c42fa9de13f97b1b3fa39 or null",
|
|
155
|
-
"
|
|
73
|
+
"name": "Updated name of the new node (use the best name between the new node's name, an existing duplicate name, or a combination of both)"
|
|
156
74
|
}}
|
|
157
75
|
""",
|
|
158
76
|
),
|
|
@@ -196,4 +114,4 @@ def node_list(context: dict[str, Any]) -> list[Message]:
|
|
|
196
114
|
]
|
|
197
115
|
|
|
198
116
|
|
|
199
|
-
versions: Versions = {'
|
|
117
|
+
versions: Versions = {'node': node, 'node_list': node_list}
|
|
@@ -36,12 +36,19 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
36
36
|
Message(
|
|
37
37
|
role='user',
|
|
38
38
|
content=f"""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
39
|
+
<PREVIOUS MESSAGES>
|
|
40
|
+
{context['previous_episodes']}
|
|
41
|
+
</PREVIOUS MESSAGES>
|
|
42
|
+
<CURRENT MESSAGE>
|
|
43
|
+
{context["current_episode"]}
|
|
44
|
+
</CURRENT MESSAGE>
|
|
45
|
+
<REFERENCE TIMESTAMP>
|
|
46
|
+
{context['reference_timestamp']}
|
|
47
|
+
</REFERENCE TIMESTAMP>
|
|
48
|
+
|
|
49
|
+
<FACT>
|
|
50
|
+
{context['edge_fact']}
|
|
51
|
+
</FACT>
|
|
45
52
|
|
|
46
53
|
IMPORTANT: Only extract time information if it is part of the provided fact. Otherwise ignore the time mentioned. Make sure to do your best to determine the dates if only the relative time is mentioned. (eg 10 years ago, 2 mins ago) based on the provided reference timestamp
|
|
47
54
|
If the relationship is not of spanning nature, but you are still able to determine the dates, set the valid_at only.
|
|
@@ -60,7 +67,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
60
67
|
5. Do not infer dates from related events. Only use dates that are directly stated to establish or change the relationship.
|
|
61
68
|
6. For relative time mentions directly related to the relationship, calculate the actual datetime based on the reference timestamp.
|
|
62
69
|
7. If only a date is mentioned without a specific time, use 00:00:00 (midnight) for that date.
|
|
63
|
-
8. If only
|
|
70
|
+
8. If only year is mentioned, use January 1st of that year at 00:00:00.
|
|
64
71
|
9. Always include the time zone offset (use Z for UTC if no specific time zone is mentioned).
|
|
65
72
|
Respond with a JSON object:
|
|
66
73
|
{{
|