graphiti-core 0.3.20__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/PKG-INFO +6 -5
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/README.md +2 -2
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/graphiti.py +50 -71
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/helpers.py +1 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/edges/edge_db_queries.py +16 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/nodes/node_db_queries.py +16 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/nodes.py +2 -2
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/dedupe_edges.py +9 -93
- graphiti_core-0.4.0/graphiti_core/prompts/dedupe_nodes.py +117 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/extract_edge_dates.py +14 -7
- graphiti_core-0.4.0/graphiti_core/prompts/extract_edges.py +114 -0
- graphiti_core-0.4.0/graphiti_core/prompts/extract_nodes.py +165 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/summarize_nodes.py +40 -1
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/search.py +20 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/search_config_recipes.py +35 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/search_utils.py +5 -4
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/bulk_utils.py +3 -3
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/community_operations.py +3 -3
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/edge_operations.py +87 -55
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/node_operations.py +122 -52
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/pyproject.toml +6 -6
- graphiti_core-0.3.20/graphiti_core/prompts/dedupe_nodes.py +0 -199
- graphiti_core-0.3.20/graphiti_core/prompts/extract_edges.py +0 -140
- graphiti_core-0.3.20/graphiti_core/prompts/extract_nodes.py +0 -189
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/LICENSE +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/cross_encoder/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/cross_encoder/bge_reranker_client.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/cross_encoder/client.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/cross_encoder/openai_reranker_client.py +2 -2
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/edges.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/embedder/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/embedder/client.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/embedder/openai.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/embedder/voyage.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/errors.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/anthropic_client.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/client.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/config.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/errors.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/groq_client.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/openai_client.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/llm_client/utils.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/edges/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/nodes/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/eval.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/invalidate_edges.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/lib.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/models.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/py.typed +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/search/search_config.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/__init__.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
- {graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/utils/maintenance/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: graphiti-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: A temporal graph building library
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Paul Paliychuk
|
|
@@ -14,9 +14,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
14
14
|
Requires-Dist: diskcache (>=5.6.3,<6.0.0)
|
|
15
15
|
Requires-Dist: neo4j (>=5.23.0,<6.0.0)
|
|
16
16
|
Requires-Dist: numpy (>=1.0.0)
|
|
17
|
-
Requires-Dist: openai (>=1.
|
|
17
|
+
Requires-Dist: openai (>=1.53.0,<2.0.0)
|
|
18
18
|
Requires-Dist: pydantic (>=2.8.2,<3.0.0)
|
|
19
|
-
Requires-Dist:
|
|
19
|
+
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
20
|
+
Requires-Dist: tenacity (==9.0.0)
|
|
20
21
|
Description-Content-Type: text/markdown
|
|
21
22
|
|
|
22
23
|
<div align="center">
|
|
@@ -129,7 +130,7 @@ poetry add graphiti-core
|
|
|
129
130
|
```python
|
|
130
131
|
from graphiti_core import Graphiti
|
|
131
132
|
from graphiti_core.nodes import EpisodeType
|
|
132
|
-
from datetime import datetime
|
|
133
|
+
from datetime import datetime, timezone
|
|
133
134
|
|
|
134
135
|
# Initialize Graphiti
|
|
135
136
|
graphiti = Graphiti("bolt://localhost:7687", "neo4j", "password")
|
|
@@ -149,7 +150,7 @@ for i, episode in enumerate(episodes):
|
|
|
149
150
|
episode_body=episode,
|
|
150
151
|
source=EpisodeType.text,
|
|
151
152
|
source_description="podcast",
|
|
152
|
-
reference_time=datetime.now()
|
|
153
|
+
reference_time=datetime.now(timezone.utc)
|
|
153
154
|
)
|
|
154
155
|
|
|
155
156
|
# Search the graph
|
|
@@ -108,7 +108,7 @@ poetry add graphiti-core
|
|
|
108
108
|
```python
|
|
109
109
|
from graphiti_core import Graphiti
|
|
110
110
|
from graphiti_core.nodes import EpisodeType
|
|
111
|
-
from datetime import datetime
|
|
111
|
+
from datetime import datetime, timezone
|
|
112
112
|
|
|
113
113
|
# Initialize Graphiti
|
|
114
114
|
graphiti = Graphiti("bolt://localhost:7687", "neo4j", "password")
|
|
@@ -128,7 +128,7 @@ for i, episode in enumerate(episodes):
|
|
|
128
128
|
episode_body=episode,
|
|
129
129
|
source=EpisodeType.text,
|
|
130
130
|
source_description="podcast",
|
|
131
|
-
reference_time=datetime.now()
|
|
131
|
+
reference_time=datetime.now(timezone.utc)
|
|
132
132
|
)
|
|
133
133
|
|
|
134
134
|
# Search the graph
|
|
@@ -16,7 +16,7 @@ limitations under the License.
|
|
|
16
16
|
|
|
17
17
|
import asyncio
|
|
18
18
|
import logging
|
|
19
|
-
from datetime import datetime
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
20
|
from time import time
|
|
21
21
|
|
|
22
22
|
from dotenv import load_dotenv
|
|
@@ -35,8 +35,6 @@ from graphiti_core.search.search_config import DEFAULT_SEARCH_LIMIT, SearchResul
|
|
|
35
35
|
from graphiti_core.search.search_config_recipes import (
|
|
36
36
|
EDGE_HYBRID_SEARCH_NODE_DISTANCE,
|
|
37
37
|
EDGE_HYBRID_SEARCH_RRF,
|
|
38
|
-
NODE_HYBRID_SEARCH_NODE_DISTANCE,
|
|
39
|
-
NODE_HYBRID_SEARCH_RRF,
|
|
40
38
|
)
|
|
41
39
|
from graphiti_core.search.search_utils import (
|
|
42
40
|
RELEVANT_SCHEMA_LIMIT,
|
|
@@ -65,7 +63,9 @@ from graphiti_core.utils.maintenance.community_operations import (
|
|
|
65
63
|
update_community,
|
|
66
64
|
)
|
|
67
65
|
from graphiti_core.utils.maintenance.edge_operations import (
|
|
66
|
+
dedupe_extracted_edge,
|
|
68
67
|
extract_edges,
|
|
68
|
+
resolve_edge_contradictions,
|
|
69
69
|
resolve_extracted_edges,
|
|
70
70
|
)
|
|
71
71
|
from graphiti_core.utils.maintenance.graph_data_operations import (
|
|
@@ -76,6 +76,7 @@ from graphiti_core.utils.maintenance.node_operations import (
|
|
|
76
76
|
extract_nodes,
|
|
77
77
|
resolve_extracted_nodes,
|
|
78
78
|
)
|
|
79
|
+
from graphiti_core.utils.maintenance.temporal_operations import get_edge_contradictions
|
|
79
80
|
|
|
80
81
|
logger = logging.getLogger(__name__)
|
|
81
82
|
|
|
@@ -312,10 +313,10 @@ class Graphiti:
|
|
|
312
313
|
start = time()
|
|
313
314
|
|
|
314
315
|
entity_edges: list[EntityEdge] = []
|
|
315
|
-
now = datetime.now()
|
|
316
|
+
now = datetime.now(timezone.utc)
|
|
316
317
|
|
|
317
318
|
previous_episodes = await self.retrieve_episodes(
|
|
318
|
-
reference_time, last_n=
|
|
319
|
+
reference_time, last_n=RELEVANT_SCHEMA_LIMIT, group_ids=[group_id]
|
|
319
320
|
)
|
|
320
321
|
episode = EpisodicNode(
|
|
321
322
|
name=name,
|
|
@@ -340,17 +341,24 @@ class Graphiti:
|
|
|
340
341
|
*[node.generate_name_embedding(self.embedder) for node in extracted_nodes]
|
|
341
342
|
)
|
|
342
343
|
|
|
343
|
-
#
|
|
344
|
+
# Find relevant nodes already in the graph
|
|
344
345
|
existing_nodes_lists: list[list[EntityNode]] = list(
|
|
345
346
|
await asyncio.gather(
|
|
346
|
-
*[get_relevant_nodes([node]
|
|
347
|
+
*[get_relevant_nodes(self.driver, [node]) for node in extracted_nodes]
|
|
347
348
|
)
|
|
348
349
|
)
|
|
349
350
|
|
|
351
|
+
# Resolve extracted nodes with nodes already in the graph and extract facts
|
|
350
352
|
logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
|
|
351
353
|
|
|
352
354
|
(mentioned_nodes, uuid_map), extracted_edges = await asyncio.gather(
|
|
353
|
-
resolve_extracted_nodes(
|
|
355
|
+
resolve_extracted_nodes(
|
|
356
|
+
self.llm_client,
|
|
357
|
+
extracted_nodes,
|
|
358
|
+
existing_nodes_lists,
|
|
359
|
+
episode,
|
|
360
|
+
previous_episodes,
|
|
361
|
+
),
|
|
354
362
|
extract_edges(
|
|
355
363
|
self.llm_client, episode, extracted_nodes, previous_episodes, group_id
|
|
356
364
|
),
|
|
@@ -448,7 +456,6 @@ class Graphiti:
|
|
|
448
456
|
|
|
449
457
|
episode.entity_edges = [edge.uuid for edge in entity_edges]
|
|
450
458
|
|
|
451
|
-
# Future optimization would be using batch operations to save nodes and edges
|
|
452
459
|
if not self.store_raw_episode_content:
|
|
453
460
|
episode.content = ''
|
|
454
461
|
|
|
@@ -511,7 +518,7 @@ class Graphiti:
|
|
|
511
518
|
"""
|
|
512
519
|
try:
|
|
513
520
|
start = time()
|
|
514
|
-
now = datetime.now()
|
|
521
|
+
now = datetime.now(timezone.utc)
|
|
515
522
|
|
|
516
523
|
episodes = [
|
|
517
524
|
EpisodicNode(
|
|
@@ -685,67 +692,6 @@ class Graphiti:
|
|
|
685
692
|
bfs_origin_node_uuids,
|
|
686
693
|
)
|
|
687
694
|
|
|
688
|
-
async def get_nodes_by_query(
|
|
689
|
-
self,
|
|
690
|
-
query: str,
|
|
691
|
-
center_node_uuid: str | None = None,
|
|
692
|
-
group_ids: list[str] | None = None,
|
|
693
|
-
limit: int = DEFAULT_SEARCH_LIMIT,
|
|
694
|
-
) -> list[EntityNode]:
|
|
695
|
-
"""
|
|
696
|
-
Retrieve nodes from the graph database based on a text query.
|
|
697
|
-
|
|
698
|
-
This method performs a hybrid search using both text-based and
|
|
699
|
-
embedding-based approaches to find relevant nodes.
|
|
700
|
-
|
|
701
|
-
Parameters
|
|
702
|
-
----------
|
|
703
|
-
query : str
|
|
704
|
-
The text query to search for in the graph
|
|
705
|
-
center_node_uuid: str, optional
|
|
706
|
-
Facts will be reranked based on proximity to this node.
|
|
707
|
-
group_ids : list[str | None] | None, optional
|
|
708
|
-
The graph partitions to return data from.
|
|
709
|
-
limit : int | None, optional
|
|
710
|
-
The maximum number of results to return per search method.
|
|
711
|
-
If None, a default limit will be applied.
|
|
712
|
-
|
|
713
|
-
Returns
|
|
714
|
-
-------
|
|
715
|
-
list[EntityNode]
|
|
716
|
-
A list of EntityNode objects that match the search criteria.
|
|
717
|
-
|
|
718
|
-
Notes
|
|
719
|
-
-----
|
|
720
|
-
This method uses the following steps:
|
|
721
|
-
1. Generates an embedding for the input query using the LLM client's embedder.
|
|
722
|
-
2. Calls the hybrid_node_search function with both the text query and its embedding.
|
|
723
|
-
3. The hybrid search combines fulltext search and vector similarity search
|
|
724
|
-
to find the most relevant nodes.
|
|
725
|
-
|
|
726
|
-
The method leverages the LLM client's embedding capabilities to enhance
|
|
727
|
-
the search with semantic similarity matching. The 'limit' parameter is applied
|
|
728
|
-
to each individual search method before results are combined and deduplicated.
|
|
729
|
-
If not specified, a default limit (defined in the search functions) will be used.
|
|
730
|
-
"""
|
|
731
|
-
search_config = (
|
|
732
|
-
NODE_HYBRID_SEARCH_RRF if center_node_uuid is None else NODE_HYBRID_SEARCH_NODE_DISTANCE
|
|
733
|
-
)
|
|
734
|
-
search_config.limit = limit
|
|
735
|
-
|
|
736
|
-
nodes = (
|
|
737
|
-
await search(
|
|
738
|
-
self.driver,
|
|
739
|
-
self.embedder,
|
|
740
|
-
self.cross_encoder,
|
|
741
|
-
query,
|
|
742
|
-
group_ids,
|
|
743
|
-
search_config,
|
|
744
|
-
center_node_uuid,
|
|
745
|
-
)
|
|
746
|
-
).nodes
|
|
747
|
-
return nodes
|
|
748
|
-
|
|
749
695
|
async def get_episode_mentions(self, episode_uuids: list[str]) -> SearchResults:
|
|
750
696
|
episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
|
|
751
697
|
|
|
@@ -760,3 +706,36 @@ class Graphiti:
|
|
|
760
706
|
communities = await get_communities_by_nodes(self.driver, nodes)
|
|
761
707
|
|
|
762
708
|
return SearchResults(edges=edges, nodes=nodes, communities=communities)
|
|
709
|
+
|
|
710
|
+
async def add_triplet(self, source_node: EntityNode, edge: EntityEdge, target_node: EntityNode):
|
|
711
|
+
if source_node.name_embedding is None:
|
|
712
|
+
await source_node.generate_name_embedding(self.embedder)
|
|
713
|
+
if target_node.name_embedding is None:
|
|
714
|
+
await target_node.generate_name_embedding(self.embedder)
|
|
715
|
+
if edge.fact_embedding is None:
|
|
716
|
+
await edge.generate_embedding(self.embedder)
|
|
717
|
+
|
|
718
|
+
resolved_nodes, _ = await resolve_extracted_nodes(
|
|
719
|
+
self.llm_client,
|
|
720
|
+
[source_node, target_node],
|
|
721
|
+
[
|
|
722
|
+
await get_relevant_nodes(self.driver, [source_node]),
|
|
723
|
+
await get_relevant_nodes(self.driver, [target_node]),
|
|
724
|
+
],
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
related_edges = await get_relevant_edges(
|
|
728
|
+
self.driver,
|
|
729
|
+
[edge],
|
|
730
|
+
source_node_uuid=resolved_nodes[0].uuid,
|
|
731
|
+
target_node_uuid=resolved_nodes[1].uuid,
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
resolved_edge = await dedupe_extracted_edge(self.llm_client, edge, related_edges)
|
|
735
|
+
|
|
736
|
+
contradicting_edges = await get_edge_contradictions(self.llm_client, edge, related_edges)
|
|
737
|
+
invalidated_edges = resolve_edge_contradictions(resolved_edge, contradicting_edges)
|
|
738
|
+
|
|
739
|
+
await add_nodes_and_edges_bulk(
|
|
740
|
+
self.driver, [], [], resolved_nodes, [resolved_edge] + invalidated_edges
|
|
741
|
+
)
|
|
@@ -25,6 +25,7 @@ load_dotenv()
|
|
|
25
25
|
|
|
26
26
|
DEFAULT_DATABASE = os.getenv('DEFAULT_DATABASE', None)
|
|
27
27
|
USE_PARALLEL_RUNTIME = bool(os.getenv('USE_PARALLEL_RUNTIME', False))
|
|
28
|
+
MAX_REFLEXION_ITERATIONS = 2
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
def parse_db_date(neo_date: neo4j_time.DateTime | None) -> datetime | None:
|
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
1
17
|
EPISODIC_EDGE_SAVE = """
|
|
2
18
|
MATCH (episode:Episodic {uuid: $episode_uuid})
|
|
3
19
|
MATCH (node:Entity {uuid: $entity_uuid})
|
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
1
17
|
EPISODIC_NODE_SAVE = """
|
|
2
18
|
MERGE (n:Episodic {uuid: $uuid})
|
|
3
19
|
SET n = {uuid: $uuid, name: $name, group_id: $group_id, source_description: $source_description, source: $source, content: $content,
|
|
@@ -16,7 +16,7 @@ limitations under the License.
|
|
|
16
16
|
|
|
17
17
|
import logging
|
|
18
18
|
from abc import ABC, abstractmethod
|
|
19
|
-
from datetime import datetime
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
20
|
from enum import Enum
|
|
21
21
|
from time import time
|
|
22
22
|
from typing import Any
|
|
@@ -78,7 +78,7 @@ class Node(BaseModel, ABC):
|
|
|
78
78
|
name: str = Field(description='name of the node')
|
|
79
79
|
group_id: str = Field(description='partition of the graph')
|
|
80
80
|
labels: list[str] = Field(default_factory=list)
|
|
81
|
-
created_at: datetime = Field(default_factory=lambda: datetime.now())
|
|
81
|
+
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
82
82
|
|
|
83
83
|
@abstractmethod
|
|
84
84
|
async def save(self, driver: AsyncDriver): ...
|
|
@@ -21,103 +21,16 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class Prompt(Protocol):
|
|
24
|
-
|
|
25
|
-
v2: PromptVersion
|
|
26
|
-
v3: PromptVersion
|
|
24
|
+
edge: PromptVersion
|
|
27
25
|
edge_list: PromptVersion
|
|
28
26
|
|
|
29
27
|
|
|
30
28
|
class Versions(TypedDict):
|
|
31
|
-
|
|
32
|
-
v2: PromptFunction
|
|
33
|
-
v3: PromptFunction
|
|
29
|
+
edge: PromptFunction
|
|
34
30
|
edge_list: PromptFunction
|
|
35
31
|
|
|
36
32
|
|
|
37
|
-
def
|
|
38
|
-
return [
|
|
39
|
-
Message(
|
|
40
|
-
role='system',
|
|
41
|
-
content='You are a helpful assistant that de-duplicates relationship from edge lists.',
|
|
42
|
-
),
|
|
43
|
-
Message(
|
|
44
|
-
role='user',
|
|
45
|
-
content=f"""
|
|
46
|
-
Given the following context, deduplicate facts from a list of new facts given a list of existing edges:
|
|
47
|
-
|
|
48
|
-
Existing Edges:
|
|
49
|
-
{json.dumps(context['existing_edges'], indent=2)}
|
|
50
|
-
|
|
51
|
-
New Edges:
|
|
52
|
-
{json.dumps(context['extracted_edges'], indent=2)}
|
|
53
|
-
|
|
54
|
-
Task:
|
|
55
|
-
If any edge in New Edges is a duplicate of an edge in Existing Edges, add their uuids to the output list.
|
|
56
|
-
When finding duplicates edges, synthesize their facts into a short new fact.
|
|
57
|
-
|
|
58
|
-
Guidelines:
|
|
59
|
-
1. identical or near identical facts are duplicates
|
|
60
|
-
2. Facts are also duplicates if they are represented by similar sentences
|
|
61
|
-
3. Facts will often discuss the same or similar relation between identical entities
|
|
62
|
-
|
|
63
|
-
Respond with a JSON object in the following format:
|
|
64
|
-
{{
|
|
65
|
-
"duplicates": [
|
|
66
|
-
{{
|
|
67
|
-
"uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
|
|
68
|
-
"duplicate_of": "uuid of the existing node",
|
|
69
|
-
"fact": "one sentence description of the fact"
|
|
70
|
-
}}
|
|
71
|
-
]
|
|
72
|
-
}}
|
|
73
|
-
""",
|
|
74
|
-
),
|
|
75
|
-
]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def v2(context: dict[str, Any]) -> list[Message]:
|
|
79
|
-
return [
|
|
80
|
-
Message(
|
|
81
|
-
role='system',
|
|
82
|
-
content='You are a helpful assistant that de-duplicates relationship from edge lists.',
|
|
83
|
-
),
|
|
84
|
-
Message(
|
|
85
|
-
role='user',
|
|
86
|
-
content=f"""
|
|
87
|
-
Given the following context, deduplicate edges from a list of new edges given a list of existing edges:
|
|
88
|
-
|
|
89
|
-
Existing Edges:
|
|
90
|
-
{json.dumps(context['existing_edges'], indent=2)}
|
|
91
|
-
|
|
92
|
-
New Edges:
|
|
93
|
-
{json.dumps(context['extracted_edges'], indent=2)}
|
|
94
|
-
|
|
95
|
-
Task:
|
|
96
|
-
1. start with the list of edges from New Edges
|
|
97
|
-
2. If any edge in New Edges is a duplicate of an edge in Existing Edges, replace the new edge with the existing
|
|
98
|
-
edge in the list
|
|
99
|
-
3. Respond with the resulting list of edges
|
|
100
|
-
|
|
101
|
-
Guidelines:
|
|
102
|
-
1. Use both the triplet name and fact of edges to determine if they are duplicates,
|
|
103
|
-
duplicate edges may have different names meaning the same thing and slight variations in the facts.
|
|
104
|
-
2. If you encounter facts that are semantically equivalent or very similar, keep the original edge
|
|
105
|
-
|
|
106
|
-
Respond with a JSON object in the following format:
|
|
107
|
-
{{
|
|
108
|
-
"new_edges": [
|
|
109
|
-
{{
|
|
110
|
-
"triplet": "source_node_name-edge_name-target_node_name",
|
|
111
|
-
"fact": "one sentence description of the fact"
|
|
112
|
-
}}
|
|
113
|
-
]
|
|
114
|
-
}}
|
|
115
|
-
""",
|
|
116
|
-
),
|
|
117
|
-
]
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def v3(context: dict[str, Any]) -> list[Message]:
|
|
33
|
+
def edge(context: dict[str, Any]) -> list[Message]:
|
|
121
34
|
return [
|
|
122
35
|
Message(
|
|
123
36
|
role='system',
|
|
@@ -128,11 +41,14 @@ def v3(context: dict[str, Any]) -> list[Message]:
|
|
|
128
41
|
content=f"""
|
|
129
42
|
Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
|
|
130
43
|
|
|
131
|
-
|
|
44
|
+
<EXISTING EDGES>
|
|
132
45
|
{json.dumps(context['related_edges'], indent=2)}
|
|
46
|
+
</EXISTING EDGES>
|
|
133
47
|
|
|
134
|
-
|
|
48
|
+
<NEW EDGE>
|
|
135
49
|
{json.dumps(context['extracted_edges'], indent=2)}
|
|
50
|
+
</NEW EDGE>
|
|
51
|
+
|
|
136
52
|
Task:
|
|
137
53
|
1. If the New Edges represents the same factual information as any edge in Existing Edges, return 'is_duplicate: true' in the
|
|
138
54
|
response. Otherwise, return 'is_duplicate: false'
|
|
@@ -189,4 +105,4 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
|
|
|
189
105
|
]
|
|
190
106
|
|
|
191
107
|
|
|
192
|
-
versions: Versions = {'
|
|
108
|
+
versions: Versions = {'edge': edge, 'edge_list': edge_list}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from typing import Any, Protocol, TypedDict
|
|
19
|
+
|
|
20
|
+
from .models import Message, PromptFunction, PromptVersion
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Prompt(Protocol):
|
|
24
|
+
node: PromptVersion
|
|
25
|
+
node_list: PromptVersion
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Versions(TypedDict):
|
|
29
|
+
node: PromptFunction
|
|
30
|
+
node_list: PromptFunction
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def node(context: dict[str, Any]) -> list[Message]:
|
|
34
|
+
return [
|
|
35
|
+
Message(
|
|
36
|
+
role='system',
|
|
37
|
+
content='You are a helpful assistant that de-duplicates nodes from node lists.',
|
|
38
|
+
),
|
|
39
|
+
Message(
|
|
40
|
+
role='user',
|
|
41
|
+
content=f"""
|
|
42
|
+
<PREVIOUS MESSAGES>
|
|
43
|
+
{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
|
|
44
|
+
</PREVIOUS MESSAGES>
|
|
45
|
+
<CURRENT MESSAGE>
|
|
46
|
+
{context["episode_content"]}
|
|
47
|
+
</CURRENT MESSAGE>
|
|
48
|
+
|
|
49
|
+
<EXISTING NODES>
|
|
50
|
+
{json.dumps(context['existing_nodes'], indent=2)}
|
|
51
|
+
</EXISTING NODES>
|
|
52
|
+
|
|
53
|
+
Given the above EXISTING NODES, MESSAGE, and PREVIOUS MESSAGES. Determine if the NEW NODE extracted from the conversation
|
|
54
|
+
is a duplicate entity of one of the EXISTING NODES.
|
|
55
|
+
|
|
56
|
+
<NEW NODE>
|
|
57
|
+
{json.dumps(context['extracted_nodes'], indent=2)}
|
|
58
|
+
</NEW NODE>
|
|
59
|
+
Task:
|
|
60
|
+
1. If the New Node represents the same entity as any node in Existing Nodes, return 'is_duplicate: true' in the
|
|
61
|
+
response. Otherwise, return 'is_duplicate: false'
|
|
62
|
+
2. If is_duplicate is true, also return the uuid of the existing node in the response
|
|
63
|
+
3. If is_duplicate is true, return a name for the node that is the most complete full name.
|
|
64
|
+
|
|
65
|
+
Guidelines:
|
|
66
|
+
1. Use both the name and summary of nodes to determine if the entities are duplicates,
|
|
67
|
+
duplicate nodes may have different names
|
|
68
|
+
|
|
69
|
+
Respond with a JSON object in the following format:
|
|
70
|
+
{{
|
|
71
|
+
"is_duplicate": true or false,
|
|
72
|
+
"uuid": "uuid of the existing node like 5d643020624c42fa9de13f97b1b3fa39 or null",
|
|
73
|
+
"name": "Updated name of the new node (use the best name between the new node's name, an existing duplicate name, or a combination of both)"
|
|
74
|
+
}}
|
|
75
|
+
""",
|
|
76
|
+
),
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def node_list(context: dict[str, Any]) -> list[Message]:
|
|
81
|
+
return [
|
|
82
|
+
Message(
|
|
83
|
+
role='system',
|
|
84
|
+
content='You are a helpful assistant that de-duplicates nodes from node lists.',
|
|
85
|
+
),
|
|
86
|
+
Message(
|
|
87
|
+
role='user',
|
|
88
|
+
content=f"""
|
|
89
|
+
Given the following context, deduplicate a list of nodes:
|
|
90
|
+
|
|
91
|
+
Nodes:
|
|
92
|
+
{json.dumps(context['nodes'], indent=2)}
|
|
93
|
+
|
|
94
|
+
Task:
|
|
95
|
+
1. Group nodes together such that all duplicate nodes are in the same list of uuids
|
|
96
|
+
2. All duplicate uuids should be grouped together in the same list
|
|
97
|
+
3. Also return a new summary that synthesizes the summary into a new short summary
|
|
98
|
+
|
|
99
|
+
Guidelines:
|
|
100
|
+
1. Each uuid from the list of nodes should appear EXACTLY once in your response
|
|
101
|
+
2. If a node has no duplicates, it should appear in the response in a list of only one uuid
|
|
102
|
+
|
|
103
|
+
Respond with a JSON object in the following format:
|
|
104
|
+
{{
|
|
105
|
+
"nodes": [
|
|
106
|
+
{{
|
|
107
|
+
"uuids": ["5d643020624c42fa9de13f97b1b3fa39", "node that is a duplicate of 5d643020624c42fa9de13f97b1b3fa39"],
|
|
108
|
+
"summary": "Brief summary of the node summaries that appear in the list of names."
|
|
109
|
+
}}
|
|
110
|
+
]
|
|
111
|
+
}}
|
|
112
|
+
""",
|
|
113
|
+
),
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
versions: Versions = {'node': node, 'node_list': node_list}
|
|
@@ -36,12 +36,19 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
36
36
|
Message(
|
|
37
37
|
role='user',
|
|
38
38
|
content=f"""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
39
|
+
<PREVIOUS MESSAGES>
|
|
40
|
+
{context['previous_episodes']}
|
|
41
|
+
</PREVIOUS MESSAGES>
|
|
42
|
+
<CURRENT MESSAGE>
|
|
43
|
+
{context["current_episode"]}
|
|
44
|
+
</CURRENT MESSAGE>
|
|
45
|
+
<REFERENCE TIMESTAMP>
|
|
46
|
+
{context['reference_timestamp']}
|
|
47
|
+
</REFERENCE TIMESTAMP>
|
|
48
|
+
|
|
49
|
+
<FACT>
|
|
50
|
+
{context['edge_fact']}
|
|
51
|
+
</FACT>
|
|
45
52
|
|
|
46
53
|
IMPORTANT: Only extract time information if it is part of the provided fact. Otherwise ignore the time mentioned. Make sure to do your best to determine the dates if only the relative time is mentioned. (eg 10 years ago, 2 mins ago) based on the provided reference timestamp
|
|
47
54
|
If the relationship is not of spanning nature, but you are still able to determine the dates, set the valid_at only.
|
|
@@ -60,7 +67,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
|
|
60
67
|
5. Do not infer dates from related events. Only use dates that are directly stated to establish or change the relationship.
|
|
61
68
|
6. For relative time mentions directly related to the relationship, calculate the actual datetime based on the reference timestamp.
|
|
62
69
|
7. If only a date is mentioned without a specific time, use 00:00:00 (midnight) for that date.
|
|
63
|
-
8. If only
|
|
70
|
+
8. If only year is mentioned, use January 1st of that year at 00:00:00.
|
|
64
71
|
9. Always include the time zone offset (use Z for UTC if no specific time zone is mentioned).
|
|
65
72
|
Respond with a JSON object:
|
|
66
73
|
{{
|