PyPI - graphiti-core - Versions diffs - 0.3.20__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

graphiti-core 0.3.20py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (23) hide show

graphiti_core/cross_encoder/openai_reranker_client.py +2 -2
graphiti_core/graphiti.py +50 -71
graphiti_core/helpers.py +1 -0
graphiti_core/models/edges/edge_db_queries.py +16 -0
graphiti_core/models/nodes/node_db_queries.py +16 -0
graphiti_core/nodes.py +2 -2
graphiti_core/prompts/dedupe_edges.py +9 -93
graphiti_core/prompts/dedupe_nodes.py +19 -101
graphiti_core/prompts/extract_edge_dates.py +14 -7
graphiti_core/prompts/extract_edges.py +55 -81
graphiti_core/prompts/extract_nodes.py +72 -96
graphiti_core/prompts/summarize_nodes.py +40 -1
graphiti_core/search/search.py +20 -0
graphiti_core/search/search_config_recipes.py +35 -0
graphiti_core/search/search_utils.py +5 -4
graphiti_core/utils/bulk_utils.py +3 -3
graphiti_core/utils/maintenance/community_operations.py +3 -3
graphiti_core/utils/maintenance/edge_operations.py +87 -55
graphiti_core/utils/maintenance/node_operations.py +122 -52
{graphiti_core-0.3.20.dist-info → graphiti_core-0.4.0.dist-info}/METADATA +6 -5
{graphiti_core-0.3.20.dist-info → graphiti_core-0.4.0.dist-info}/RECORD +23 -23
{graphiti_core-0.3.20.dist-info → graphiti_core-0.4.0.dist-info}/LICENSE +0 -0
{graphiti_core-0.3.20.dist-info → graphiti_core-0.4.0.dist-info}/WHEEL +0 -0

graphiti_core/cross_encoder/openai_reranker_client.py CHANGED Viewed

@@ -64,10 +64,10 @@ class OpenAIRerankerClient(CrossEncoderClient):
                     content=f"""
                            Respond with "True" if PASSAGE is relevant to QUERY and "False" otherwise.
                            <PASSAGE>
-                           {query}
-                           </PASSAGE>
                            {passage}
+                           </PASSAGE>
                            <QUERY>
+                           {query}
                            </QUERY>
                            """,
                 ),

graphiti_core/graphiti.py CHANGED Viewed

@@ -16,7 +16,7 @@ limitations under the License.
 import asyncio
 import logging
-from datetime import datetime
+from datetime import datetime, timezone
 from time import time
 from dotenv import load_dotenv
@@ -35,8 +35,6 @@ from graphiti_core.search.search_config import DEFAULT_SEARCH_LIMIT, SearchResul
 from graphiti_core.search.search_config_recipes import (
     EDGE_HYBRID_SEARCH_NODE_DISTANCE,
     EDGE_HYBRID_SEARCH_RRF,
-    NODE_HYBRID_SEARCH_NODE_DISTANCE,
-    NODE_HYBRID_SEARCH_RRF,
 )
 from graphiti_core.search.search_utils import (
     RELEVANT_SCHEMA_LIMIT,
@@ -65,7 +63,9 @@ from graphiti_core.utils.maintenance.community_operations import (
     update_community,
 )
 from graphiti_core.utils.maintenance.edge_operations import (
+    dedupe_extracted_edge,
     extract_edges,
+    resolve_edge_contradictions,
     resolve_extracted_edges,
 )
 from graphiti_core.utils.maintenance.graph_data_operations import (
@@ -76,6 +76,7 @@ from graphiti_core.utils.maintenance.node_operations import (
     extract_nodes,
     resolve_extracted_nodes,
 )
+from graphiti_core.utils.maintenance.temporal_operations import get_edge_contradictions
 logger = logging.getLogger(__name__)
@@ -312,10 +313,10 @@ class Graphiti:
             start = time()
             entity_edges: list[EntityEdge] = []
-            now = datetime.now()
+            now = datetime.now(timezone.utc)
             previous_episodes = await self.retrieve_episodes(
-                reference_time, last_n=3, group_ids=[group_id]
+                reference_time, last_n=RELEVANT_SCHEMA_LIMIT, group_ids=[group_id]
             )
             episode = EpisodicNode(
                 name=name,
@@ -340,17 +341,24 @@ class Graphiti:
                 *[node.generate_name_embedding(self.embedder) for node in extracted_nodes]
             )
-            # Resolve extracted nodes with nodes already in the graph and extract facts
+            # Find relevant nodes already in the graph
             existing_nodes_lists: list[list[EntityNode]] = list(
                 await asyncio.gather(
-                    *[get_relevant_nodes([node], self.driver) for node in extracted_nodes]
+                    *[get_relevant_nodes(self.driver, [node]) for node in extracted_nodes]
                 )
             )
+            # Resolve extracted nodes with nodes already in the graph and extract facts
             logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
             (mentioned_nodes, uuid_map), extracted_edges = await asyncio.gather(
-                resolve_extracted_nodes(self.llm_client, extracted_nodes, existing_nodes_lists),
+                resolve_extracted_nodes(
+                    self.llm_client,
+                    extracted_nodes,
+                    existing_nodes_lists,
+                    episode,
+                    previous_episodes,
+                ),
                 extract_edges(
                     self.llm_client, episode, extracted_nodes, previous_episodes, group_id
                 ),
@@ -448,7 +456,6 @@ class Graphiti:
             episode.entity_edges = [edge.uuid for edge in entity_edges]
-            # Future optimization would be using batch operations to save nodes and edges
             if not self.store_raw_episode_content:
                 episode.content = ''
@@ -511,7 +518,7 @@ class Graphiti:
         """
         try:
             start = time()
-            now = datetime.now()
+            now = datetime.now(timezone.utc)
             episodes = [
                 EpisodicNode(
@@ -685,67 +692,6 @@ class Graphiti:
             bfs_origin_node_uuids,
         )
-    async def get_nodes_by_query(
-        self,
-        query: str,
-        center_node_uuid: str | None = None,
-        group_ids: list[str] | None = None,
-        limit: int = DEFAULT_SEARCH_LIMIT,
-    ) -> list[EntityNode]:
-        """
-        Retrieve nodes from the graph database based on a text query.
-        This method performs a hybrid search using both text-based and
-        embedding-based approaches to find relevant nodes.
-        Parameters
-        ----------
-        query : str
-            The text query to search for in the graph
-        center_node_uuid: str, optional
-            Facts will be reranked based on proximity to this node.
-        group_ids : list[str | None] | None, optional
-            The graph partitions to return data from.
-        limit : int | None, optional
-            The maximum number of results to return per search method.
-            If None, a default limit will be applied.
-        Returns
-        -------
-        list[EntityNode]
-            A list of EntityNode objects that match the search criteria.
-        Notes
-        -----
-        This method uses the following steps:
-        1. Generates an embedding for the input query using the LLM client's embedder.
-        2. Calls the hybrid_node_search function with both the text query and its embedding.
-        3. The hybrid search combines fulltext search and vector similarity search
-           to find the most relevant nodes.
-        The method leverages the LLM client's embedding capabilities to enhance
-        the search with semantic similarity matching. The 'limit' parameter is applied
-        to each individual search method before results are combined and deduplicated.
-        If not specified, a default limit (defined in the search functions) will be used.
-        """
-        search_config = (
-            NODE_HYBRID_SEARCH_RRF if center_node_uuid is None else NODE_HYBRID_SEARCH_NODE_DISTANCE
-        )
-        search_config.limit = limit
-        nodes = (
-            await search(
-                self.driver,
-                self.embedder,
-                self.cross_encoder,
-                query,
-                group_ids,
-                search_config,
-                center_node_uuid,
-            )
-        ).nodes
-        return nodes
     async def get_episode_mentions(self, episode_uuids: list[str]) -> SearchResults:
         episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
@@ -760,3 +706,36 @@ class Graphiti:
         communities = await get_communities_by_nodes(self.driver, nodes)
         return SearchResults(edges=edges, nodes=nodes, communities=communities)
+    async def add_triplet(self, source_node: EntityNode, edge: EntityEdge, target_node: EntityNode):
+        if source_node.name_embedding is None:
+            await source_node.generate_name_embedding(self.embedder)
+        if target_node.name_embedding is None:
+            await target_node.generate_name_embedding(self.embedder)
+        if edge.fact_embedding is None:
+            await edge.generate_embedding(self.embedder)
+        resolved_nodes, _ = await resolve_extracted_nodes(
+            self.llm_client,
+            [source_node, target_node],
+            [
+                await get_relevant_nodes(self.driver, [source_node]),
+                await get_relevant_nodes(self.driver, [target_node]),
+            ],
+        )
+        related_edges = await get_relevant_edges(
+            self.driver,
+            [edge],
+            source_node_uuid=resolved_nodes[0].uuid,
+            target_node_uuid=resolved_nodes[1].uuid,
+        )
+        resolved_edge = await dedupe_extracted_edge(self.llm_client, edge, related_edges)
+        contradicting_edges = await get_edge_contradictions(self.llm_client, edge, related_edges)
+        invalidated_edges = resolve_edge_contradictions(resolved_edge, contradicting_edges)
+        await add_nodes_and_edges_bulk(
+            self.driver, [], [], resolved_nodes, [resolved_edge] + invalidated_edges
+        )

graphiti_core/helpers.py CHANGED Viewed

@@ -25,6 +25,7 @@ load_dotenv()
 DEFAULT_DATABASE = os.getenv('DEFAULT_DATABASE', None)
 USE_PARALLEL_RUNTIME = bool(os.getenv('USE_PARALLEL_RUNTIME', False))
+MAX_REFLEXION_ITERATIONS = 2
 def parse_db_date(neo_date: neo4j_time.DateTime | None) -> datetime | None:

graphiti_core/models/edges/edge_db_queries.py CHANGED Viewed

@@ -1,3 +1,19 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
 EPISODIC_EDGE_SAVE = """
         MATCH (episode:Episodic {uuid: $episode_uuid})
         MATCH (node:Entity {uuid: $entity_uuid})

graphiti_core/models/nodes/node_db_queries.py CHANGED Viewed

@@ -1,3 +1,19 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
 EPISODIC_NODE_SAVE = """
         MERGE (n:Episodic {uuid: $uuid})
         SET n = {uuid: $uuid, name: $name, group_id: $group_id, source_description: $source_description, source: $source, content: $content,

graphiti_core/nodes.py CHANGED Viewed

@@ -16,7 +16,7 @@ limitations under the License.
 import logging
 from abc import ABC, abstractmethod
-from datetime import datetime
+from datetime import datetime, timezone
 from enum import Enum
 from time import time
 from typing import Any
@@ -78,7 +78,7 @@ class Node(BaseModel, ABC):
     name: str = Field(description='name of the node')
     group_id: str = Field(description='partition of the graph')
     labels: list[str] = Field(default_factory=list)
-    created_at: datetime = Field(default_factory=lambda: datetime.now())
+    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     @abstractmethod
     async def save(self, driver: AsyncDriver): ...

graphiti_core/prompts/dedupe_edges.py CHANGED Viewed

@@ -21,103 +21,16 @@ from .models import Message, PromptFunction, PromptVersion
 class Prompt(Protocol):
-    v1: PromptVersion
-    v2: PromptVersion
-    v3: PromptVersion
+    edge: PromptVersion
     edge_list: PromptVersion
 class Versions(TypedDict):
-    v1: PromptFunction
-    v2: PromptFunction
-    v3: PromptFunction
+    edge: PromptFunction
     edge_list: PromptFunction
-def v1(context: dict[str, Any]) -> list[Message]:
-    return [
-        Message(
-            role='system',
-            content='You are a helpful assistant that de-duplicates relationship from edge lists.',
-        ),
-        Message(
-            role='user',
-            content=f"""
-        Given the following context, deduplicate facts from a list of new facts given a list of existing edges:
-        Existing Edges:
-        {json.dumps(context['existing_edges'], indent=2)}
-        New Edges:
-        {json.dumps(context['extracted_edges'], indent=2)}
-        Task:
-        If any edge in New Edges is a duplicate of an edge in Existing Edges, add their uuids to the output list.
-        When finding duplicates edges, synthesize their facts into a short new fact.
-        Guidelines:
-        1. identical or near identical facts are duplicates
-        2. Facts are also duplicates if they are represented by similar sentences
-        3. Facts will often discuss the same or similar relation between identical entities
-        Respond with a JSON object in the following format:
-        {{
-            "duplicates": [
-                {{
-                    "uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
-                    "duplicate_of": "uuid of the existing node",
-                    "fact": "one sentence description of the fact"
-                }}
-            ]
-        }}
-        """,
-        ),
-    ]
-def v2(context: dict[str, Any]) -> list[Message]:
-    return [
-        Message(
-            role='system',
-            content='You are a helpful assistant that de-duplicates relationship from edge lists.',
-        ),
-        Message(
-            role='user',
-            content=f"""
-        Given the following context, deduplicate edges from a list of new edges given a list of existing edges:
-        Existing Edges:
-        {json.dumps(context['existing_edges'], indent=2)}
-        New Edges:
-        {json.dumps(context['extracted_edges'], indent=2)}
-        Task:
-        1. start with the list of edges from New Edges
-        2. If any edge in New Edges is a duplicate of an edge in Existing Edges, replace the new edge with the existing
-            edge in the list
-        3. Respond with the resulting list of edges
-        Guidelines:
-        1. Use both the triplet name and fact of edges to determine if they are duplicates,
-            duplicate edges may have different names meaning the same thing and slight variations in the facts.
-        2. If you encounter facts that are semantically equivalent or very similar, keep the original edge
-        Respond with a JSON object in the following format:
-        {{
-            "new_edges": [
-                {{
-                    "triplet": "source_node_name-edge_name-target_node_name",
-                    "fact": "one sentence description of the fact"
-                }}
-            ]
-        }}
-        """,
-        ),
-    ]
-def v3(context: dict[str, Any]) -> list[Message]:
+def edge(context: dict[str, Any]) -> list[Message]:
     return [
         Message(
             role='system',
@@ -128,11 +41,14 @@ def v3(context: dict[str, Any]) -> list[Message]:
             content=f"""
         Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
-        Existing Edges:
+        <EXISTING EDGES>
         {json.dumps(context['related_edges'], indent=2)}
+        </EXISTING EDGES>
-        New Edge:
+        <NEW EDGE>
         {json.dumps(context['extracted_edges'], indent=2)}
+        </NEW EDGE>
         Task:
         1. If the New Edges represents the same factual information as any edge in Existing Edges, return 'is_duplicate: true' in the
             response. Otherwise, return 'is_duplicate: false'
@@ -189,4 +105,4 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
     ]
-versions: Versions = {'v1': v1, 'v2': v2, 'v3': v3, 'edge_list': edge_list}
+versions: Versions = {'edge': edge, 'edge_list': edge_list}

graphiti_core/prompts/dedupe_nodes.py CHANGED Viewed

@@ -21,20 +21,16 @@ from .models import Message, PromptFunction, PromptVersion
 class Prompt(Protocol):
-    v1: PromptVersion
-    v2: PromptVersion
-    v3: PromptVersion
+    node: PromptVersion
     node_list: PromptVersion
 class Versions(TypedDict):
-    v1: PromptFunction
-    v2: PromptFunction
-    v3: PromptFunction
+    node: PromptFunction
     node_list: PromptFunction
-def v1(context: dict[str, Any]) -> list[Message]:
+def node(context: dict[str, Any]) -> list[Message]:
     return [
         Message(
             role='system',
@@ -43,106 +39,28 @@ def v1(context: dict[str, Any]) -> list[Message]:
         Message(
             role='user',
             content=f"""
-        Given the following context, deduplicate nodes from a list of new nodes given a list of existing nodes:
-        Existing Nodes:
+        <PREVIOUS MESSAGES>
+        {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+        </PREVIOUS MESSAGES>
+        <CURRENT MESSAGE>
+        {context["episode_content"]}
+        </CURRENT MESSAGE>
+        <EXISTING NODES>
         {json.dumps(context['existing_nodes'], indent=2)}
-        New Nodes:
-        {json.dumps(context['extracted_nodes'], indent=2)}
+        </EXISTING NODES>
-        Task:
-        1. start with the list of nodes from New Nodes
-        2. If any node in New Nodes is a duplicate of a node in Existing Nodes, replace the new node with the existing
-            node in the list
-        3. when deduplicating nodes, synthesize their summaries into a short new summary that contains the relevant information
-            of the summaries of the new and existing nodes
-        4. Respond with the resulting list of nodes
-        Guidelines:
-        1. Use both the name and summary of nodes to determine if they are duplicates,
-            duplicate nodes may have different names
-        Respond with a JSON object in the following format:
-        {{
-            "new_nodes": [
-                {{
-                    "name": "Unique identifier for the node",
-                    "summary": "Brief summary of the node's role or significance"
-                }}
-            ]
-        }}
-        """,
-        ),
-    ]
-def v2(context: dict[str, Any]) -> list[Message]:
-    return [
-        Message(
-            role='system',
-            content='You are a helpful assistant that de-duplicates nodes from node lists.',
-        ),
-        Message(
-            role='user',
-            content=f"""
-        Given the following context, deduplicate nodes from a list of new nodes given a list of existing nodes:
-        Existing Nodes:
-        {json.dumps(context['existing_nodes'], indent=2)}
-        New Nodes:
-        {json.dumps(context['extracted_nodes'], indent=2)}
-        Important:
-        If a node in the new nodes is describing the same entity as a node in the existing nodes, mark it as a duplicate!!!
-        Task:
-        If any node in New Nodes is a duplicate of a node in Existing Nodes, add their uuids to the output list
-        When finding duplicates nodes, synthesize their summaries into a short new summary that contains the
-        relevant information of the summaries of the new and existing nodes.
-        Guidelines:
-        1. Use both the name and summary of nodes to determine if they are duplicates,
-            duplicate nodes may have different names
-        2. In the output, uuid should always be the uuid of the New Node that is a duplicate. duplicate_of should be
-            the uuid of the Existing Node.
-        Respond with a JSON object in the following format:
-        {{
-            "duplicates": [
-                {{
-                    "uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
-                    "duplicate_of": "uuid of the existing node",
-                    "summary": "Brief summary of the node's role or significance. Takes information from the new and existing nodes"
-                }}
-            ]
-        }}
-        """,
-        ),
-    ]
-def v3(context: dict[str, Any]) -> list[Message]:
-    return [
-        Message(
-            role='system',
-            content='You are a helpful assistant that de-duplicates nodes from node lists.',
-        ),
-        Message(
-            role='user',
-            content=f"""
-        Given the following context, determine whether the New Node represents any of the entities in the list of Existing Nodes.
-        Existing Nodes:
-        {json.dumps(context['existing_nodes'], indent=2)}
+        Given the above EXISTING NODES, MESSAGE, and PREVIOUS MESSAGES. Determine if the NEW NODE extracted from the conversation
+        is a duplicate entity of one of the EXISTING NODES.
-        New Node:
+        <NEW NODE>
         {json.dumps(context['extracted_nodes'], indent=2)}
+        </NEW NODE>
         Task:
         1. If the New Node represents the same entity as any node in Existing Nodes, return 'is_duplicate: true' in the
             response. Otherwise, return 'is_duplicate: false'
         2. If is_duplicate is true, also return the uuid of the existing node in the response
-        3. If is_duplicate is true, return a summary that synthesizes the information in the New Node summary and the
-        summary of the Existing Node it is a duplicate of.
+        3. If is_duplicate is true, return a name for the node that is the most complete full name.
         Guidelines:
         1. Use both the name and summary of nodes to determine if the entities are duplicates,
@@ -152,7 +70,7 @@ def v3(context: dict[str, Any]) -> list[Message]:
             {{
                 "is_duplicate": true or false,
                 "uuid": "uuid of the existing node like 5d643020624c42fa9de13f97b1b3fa39 or null",
-                "summary": "Brief summary of the node's role or significance. Takes information from the new and existing node"
+                "name": "Updated name of the new node (use the best name between the new node's name, an existing duplicate name, or a combination of both)"
             }}
         """,
         ),
@@ -196,4 +114,4 @@ def node_list(context: dict[str, Any]) -> list[Message]:
     ]
-versions: Versions = {'v1': v1, 'v2': v2, 'v3': v3, 'node_list': node_list}
+versions: Versions = {'node': node, 'node_list': node_list}

graphiti_core/prompts/extract_edge_dates.py CHANGED Viewed

@@ -36,12 +36,19 @@ def v1(context: dict[str, Any]) -> list[Message]:
         Message(
             role='user',
             content=f"""
-            Edge:
-            Fact: {context['edge_fact']}
-            Current Episode: {context['current_episode']}
-            Previous Episodes: {context['previous_episodes']}
-            Reference Timestamp: {context['reference_timestamp']}
+            <PREVIOUS MESSAGES>
+            {context['previous_episodes']}
+            </PREVIOUS MESSAGES>
+            <CURRENT MESSAGE>
+            {context["current_episode"]}
+            </CURRENT MESSAGE>
+            <REFERENCE TIMESTAMP>
+            {context['reference_timestamp']}
+            </REFERENCE TIMESTAMP>
+            <FACT>
+            {context['edge_fact']}
+            </FACT>
             IMPORTANT: Only extract time information if it is part of the provided fact. Otherwise ignore the time mentioned. Make sure to do your best to determine the dates if only the relative time is mentioned. (eg 10 years ago, 2 mins ago) based on the provided reference timestamp
             If the relationship is not of spanning nature, but you are still able to determine the dates, set the valid_at only.
@@ -60,7 +67,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
             5. Do not infer dates from related events. Only use dates that are directly stated to establish or change the relationship.
 			6. For relative time mentions directly related to the relationship, calculate the actual datetime based on the reference timestamp.
             7. If only a date is mentioned without a specific time, use 00:00:00 (midnight) for that date.
-            8. If only a year is mentioned, use January 1st of that year at 00:00:00.
+            8. If only year is mentioned, use January 1st of that year at 00:00:00.
             9. Always include the time zone offset (use Z for UTC if no specific time zone is mentioned).
             Respond with a JSON object:
             {{

graphiti-core 0.3.20__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.3.20py3-none-any.whl → 0.4.0py3-none-any.whl