PyPI - graphiti-core - Versions diffs - 0.3.20__tar.gz → 0.4.0__tar.gz - Mend

graphiti-core 0.3.20tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (60) hide show

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: graphiti-core
-Version: 0.3.20
+Version: 0.4.0
 Summary: A temporal graph building library
 License: Apache-2.0
 Author: Paul Paliychuk
@@ -14,9 +14,10 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: diskcache (>=5.6.3,<6.0.0)
 Requires-Dist: neo4j (>=5.23.0,<6.0.0)
 Requires-Dist: numpy (>=1.0.0)
-Requires-Dist: openai (>=1.52.2,<2.0.0)
+Requires-Dist: openai (>=1.53.0,<2.0.0)
 Requires-Dist: pydantic (>=2.8.2,<3.0.0)
-Requires-Dist: tenacity (<9.0.0)
+Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
+Requires-Dist: tenacity (==9.0.0)
 Description-Content-Type: text/markdown
 <div align="center">
@@ -129,7 +130,7 @@ poetry add graphiti-core
 ```python
 from graphiti_core import Graphiti
 from graphiti_core.nodes import EpisodeType
-from datetime import datetime
+from datetime import datetime, timezone
 # Initialize Graphiti
 graphiti = Graphiti("bolt://localhost:7687", "neo4j", "password")
@@ -149,7 +150,7 @@ for i, episode in enumerate(episodes):
         episode_body=episode,
         source=EpisodeType.text,
         source_description="podcast",
-        reference_time=datetime.now()
+        reference_time=datetime.now(timezone.utc)
     )
 # Search the graph

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/README.md RENAMED Viewed

@@ -108,7 +108,7 @@ poetry add graphiti-core
 ```python
 from graphiti_core import Graphiti
 from graphiti_core.nodes import EpisodeType
-from datetime import datetime
+from datetime import datetime, timezone
 # Initialize Graphiti
 graphiti = Graphiti("bolt://localhost:7687", "neo4j", "password")
@@ -128,7 +128,7 @@ for i, episode in enumerate(episodes):
         episode_body=episode,
         source=EpisodeType.text,
         source_description="podcast",
-        reference_time=datetime.now()
+        reference_time=datetime.now(timezone.utc)
     )
 # Search the graph

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/graphiti.py RENAMED Viewed

@@ -16,7 +16,7 @@ limitations under the License.
 import asyncio
 import logging
-from datetime import datetime
+from datetime import datetime, timezone
 from time import time
 from dotenv import load_dotenv
@@ -35,8 +35,6 @@ from graphiti_core.search.search_config import DEFAULT_SEARCH_LIMIT, SearchResul
 from graphiti_core.search.search_config_recipes import (
     EDGE_HYBRID_SEARCH_NODE_DISTANCE,
     EDGE_HYBRID_SEARCH_RRF,
-    NODE_HYBRID_SEARCH_NODE_DISTANCE,
-    NODE_HYBRID_SEARCH_RRF,
 )
 from graphiti_core.search.search_utils import (
     RELEVANT_SCHEMA_LIMIT,
@@ -65,7 +63,9 @@ from graphiti_core.utils.maintenance.community_operations import (
     update_community,
 )
 from graphiti_core.utils.maintenance.edge_operations import (
+    dedupe_extracted_edge,
     extract_edges,
+    resolve_edge_contradictions,
     resolve_extracted_edges,
 )
 from graphiti_core.utils.maintenance.graph_data_operations import (
@@ -76,6 +76,7 @@ from graphiti_core.utils.maintenance.node_operations import (
     extract_nodes,
     resolve_extracted_nodes,
 )
+from graphiti_core.utils.maintenance.temporal_operations import get_edge_contradictions
 logger = logging.getLogger(__name__)
@@ -312,10 +313,10 @@ class Graphiti:
             start = time()
             entity_edges: list[EntityEdge] = []
-            now = datetime.now()
+            now = datetime.now(timezone.utc)
             previous_episodes = await self.retrieve_episodes(
-                reference_time, last_n=3, group_ids=[group_id]
+                reference_time, last_n=RELEVANT_SCHEMA_LIMIT, group_ids=[group_id]
             )
             episode = EpisodicNode(
                 name=name,
@@ -340,17 +341,24 @@ class Graphiti:
                 *[node.generate_name_embedding(self.embedder) for node in extracted_nodes]
             )
-            # Resolve extracted nodes with nodes already in the graph and extract facts
+            # Find relevant nodes already in the graph
             existing_nodes_lists: list[list[EntityNode]] = list(
                 await asyncio.gather(
-                    *[get_relevant_nodes([node], self.driver) for node in extracted_nodes]
+                    *[get_relevant_nodes(self.driver, [node]) for node in extracted_nodes]
                 )
             )
+            # Resolve extracted nodes with nodes already in the graph and extract facts
             logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
             (mentioned_nodes, uuid_map), extracted_edges = await asyncio.gather(
-                resolve_extracted_nodes(self.llm_client, extracted_nodes, existing_nodes_lists),
+                resolve_extracted_nodes(
+                    self.llm_client,
+                    extracted_nodes,
+                    existing_nodes_lists,
+                    episode,
+                    previous_episodes,
+                ),
                 extract_edges(
                     self.llm_client, episode, extracted_nodes, previous_episodes, group_id
                 ),
@@ -448,7 +456,6 @@ class Graphiti:
             episode.entity_edges = [edge.uuid for edge in entity_edges]
-            # Future optimization would be using batch operations to save nodes and edges
             if not self.store_raw_episode_content:
                 episode.content = ''
@@ -511,7 +518,7 @@ class Graphiti:
         """
         try:
             start = time()
-            now = datetime.now()
+            now = datetime.now(timezone.utc)
             episodes = [
                 EpisodicNode(
@@ -685,67 +692,6 @@ class Graphiti:
             bfs_origin_node_uuids,
         )
-    async def get_nodes_by_query(
-        self,
-        query: str,
-        center_node_uuid: str | None = None,
-        group_ids: list[str] | None = None,
-        limit: int = DEFAULT_SEARCH_LIMIT,
-    ) -> list[EntityNode]:
-        """
-        Retrieve nodes from the graph database based on a text query.
-        This method performs a hybrid search using both text-based and
-        embedding-based approaches to find relevant nodes.
-        Parameters
-        ----------
-        query : str
-            The text query to search for in the graph
-        center_node_uuid: str, optional
-            Facts will be reranked based on proximity to this node.
-        group_ids : list[str | None] | None, optional
-            The graph partitions to return data from.
-        limit : int | None, optional
-            The maximum number of results to return per search method.
-            If None, a default limit will be applied.
-        Returns
-        -------
-        list[EntityNode]
-            A list of EntityNode objects that match the search criteria.
-        Notes
-        -----
-        This method uses the following steps:
-        1. Generates an embedding for the input query using the LLM client's embedder.
-        2. Calls the hybrid_node_search function with both the text query and its embedding.
-        3. The hybrid search combines fulltext search and vector similarity search
-           to find the most relevant nodes.
-        The method leverages the LLM client's embedding capabilities to enhance
-        the search with semantic similarity matching. The 'limit' parameter is applied
-        to each individual search method before results are combined and deduplicated.
-        If not specified, a default limit (defined in the search functions) will be used.
-        """
-        search_config = (
-            NODE_HYBRID_SEARCH_RRF if center_node_uuid is None else NODE_HYBRID_SEARCH_NODE_DISTANCE
-        )
-        search_config.limit = limit
-        nodes = (
-            await search(
-                self.driver,
-                self.embedder,
-                self.cross_encoder,
-                query,
-                group_ids,
-                search_config,
-                center_node_uuid,
-            )
-        ).nodes
-        return nodes
     async def get_episode_mentions(self, episode_uuids: list[str]) -> SearchResults:
         episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
@@ -760,3 +706,36 @@ class Graphiti:
         communities = await get_communities_by_nodes(self.driver, nodes)
         return SearchResults(edges=edges, nodes=nodes, communities=communities)
+    async def add_triplet(self, source_node: EntityNode, edge: EntityEdge, target_node: EntityNode):
+        if source_node.name_embedding is None:
+            await source_node.generate_name_embedding(self.embedder)
+        if target_node.name_embedding is None:
+            await target_node.generate_name_embedding(self.embedder)
+        if edge.fact_embedding is None:
+            await edge.generate_embedding(self.embedder)
+        resolved_nodes, _ = await resolve_extracted_nodes(
+            self.llm_client,
+            [source_node, target_node],
+            [
+                await get_relevant_nodes(self.driver, [source_node]),
+                await get_relevant_nodes(self.driver, [target_node]),
+            ],
+        )
+        related_edges = await get_relevant_edges(
+            self.driver,
+            [edge],
+            source_node_uuid=resolved_nodes[0].uuid,
+            target_node_uuid=resolved_nodes[1].uuid,
+        )
+        resolved_edge = await dedupe_extracted_edge(self.llm_client, edge, related_edges)
+        contradicting_edges = await get_edge_contradictions(self.llm_client, edge, related_edges)
+        invalidated_edges = resolve_edge_contradictions(resolved_edge, contradicting_edges)
+        await add_nodes_and_edges_bulk(
+            self.driver, [], [], resolved_nodes, [resolved_edge] + invalidated_edges
+        )

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/helpers.py RENAMED Viewed

@@ -25,6 +25,7 @@ load_dotenv()
 DEFAULT_DATABASE = os.getenv('DEFAULT_DATABASE', None)
 USE_PARALLEL_RUNTIME = bool(os.getenv('USE_PARALLEL_RUNTIME', False))
+MAX_REFLEXION_ITERATIONS = 2
 def parse_db_date(neo_date: neo4j_time.DateTime | None) -> datetime | None:

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/edges/edge_db_queries.py RENAMED Viewed

@@ -1,3 +1,19 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
 EPISODIC_EDGE_SAVE = """
         MATCH (episode:Episodic {uuid: $episode_uuid})
         MATCH (node:Entity {uuid: $entity_uuid})

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/models/nodes/node_db_queries.py RENAMED Viewed

@@ -1,3 +1,19 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
 EPISODIC_NODE_SAVE = """
         MERGE (n:Episodic {uuid: $uuid})
         SET n = {uuid: $uuid, name: $name, group_id: $group_id, source_description: $source_description, source: $source, content: $content,

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/nodes.py RENAMED Viewed

@@ -16,7 +16,7 @@ limitations under the License.
 import logging
 from abc import ABC, abstractmethod
-from datetime import datetime
+from datetime import datetime, timezone
 from enum import Enum
 from time import time
 from typing import Any
@@ -78,7 +78,7 @@ class Node(BaseModel, ABC):
     name: str = Field(description='name of the node')
     group_id: str = Field(description='partition of the graph')
     labels: list[str] = Field(default_factory=list)
-    created_at: datetime = Field(default_factory=lambda: datetime.now())
+    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     @abstractmethod
     async def save(self, driver: AsyncDriver): ...

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/dedupe_edges.py RENAMED Viewed

@@ -21,103 +21,16 @@ from .models import Message, PromptFunction, PromptVersion
 class Prompt(Protocol):
-    v1: PromptVersion
-    v2: PromptVersion
-    v3: PromptVersion
+    edge: PromptVersion
     edge_list: PromptVersion
 class Versions(TypedDict):
-    v1: PromptFunction
-    v2: PromptFunction
-    v3: PromptFunction
+    edge: PromptFunction
     edge_list: PromptFunction
-def v1(context: dict[str, Any]) -> list[Message]:
-    return [
-        Message(
-            role='system',
-            content='You are a helpful assistant that de-duplicates relationship from edge lists.',
-        ),
-        Message(
-            role='user',
-            content=f"""
-        Given the following context, deduplicate facts from a list of new facts given a list of existing edges:
-        Existing Edges:
-        {json.dumps(context['existing_edges'], indent=2)}
-        New Edges:
-        {json.dumps(context['extracted_edges'], indent=2)}
-        Task:
-        If any edge in New Edges is a duplicate of an edge in Existing Edges, add their uuids to the output list.
-        When finding duplicates edges, synthesize their facts into a short new fact.
-        Guidelines:
-        1. identical or near identical facts are duplicates
-        2. Facts are also duplicates if they are represented by similar sentences
-        3. Facts will often discuss the same or similar relation between identical entities
-        Respond with a JSON object in the following format:
-        {{
-            "duplicates": [
-                {{
-                    "uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
-                    "duplicate_of": "uuid of the existing node",
-                    "fact": "one sentence description of the fact"
-                }}
-            ]
-        }}
-        """,
-        ),
-    ]
-def v2(context: dict[str, Any]) -> list[Message]:
-    return [
-        Message(
-            role='system',
-            content='You are a helpful assistant that de-duplicates relationship from edge lists.',
-        ),
-        Message(
-            role='user',
-            content=f"""
-        Given the following context, deduplicate edges from a list of new edges given a list of existing edges:
-        Existing Edges:
-        {json.dumps(context['existing_edges'], indent=2)}
-        New Edges:
-        {json.dumps(context['extracted_edges'], indent=2)}
-        Task:
-        1. start with the list of edges from New Edges
-        2. If any edge in New Edges is a duplicate of an edge in Existing Edges, replace the new edge with the existing
-            edge in the list
-        3. Respond with the resulting list of edges
-        Guidelines:
-        1. Use both the triplet name and fact of edges to determine if they are duplicates,
-            duplicate edges may have different names meaning the same thing and slight variations in the facts.
-        2. If you encounter facts that are semantically equivalent or very similar, keep the original edge
-        Respond with a JSON object in the following format:
-        {{
-            "new_edges": [
-                {{
-                    "triplet": "source_node_name-edge_name-target_node_name",
-                    "fact": "one sentence description of the fact"
-                }}
-            ]
-        }}
-        """,
-        ),
-    ]
-def v3(context: dict[str, Any]) -> list[Message]:
+def edge(context: dict[str, Any]) -> list[Message]:
     return [
         Message(
             role='system',
@@ -128,11 +41,14 @@ def v3(context: dict[str, Any]) -> list[Message]:
             content=f"""
         Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
-        Existing Edges:
+        <EXISTING EDGES>
         {json.dumps(context['related_edges'], indent=2)}
+        </EXISTING EDGES>
-        New Edge:
+        <NEW EDGE>
         {json.dumps(context['extracted_edges'], indent=2)}
+        </NEW EDGE>
         Task:
         1. If the New Edges represents the same factual information as any edge in Existing Edges, return 'is_duplicate: true' in the
             response. Otherwise, return 'is_duplicate: false'
@@ -189,4 +105,4 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
     ]
-versions: Versions = {'v1': v1, 'v2': v2, 'v3': v3, 'edge_list': edge_list}
+versions: Versions = {'edge': edge, 'edge_list': edge_list}

graphiti_core-0.4.0/graphiti_core/prompts/dedupe_nodes.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import json
+from typing import Any, Protocol, TypedDict
+from .models import Message, PromptFunction, PromptVersion
+class Prompt(Protocol):
+    node: PromptVersion
+    node_list: PromptVersion
+class Versions(TypedDict):
+    node: PromptFunction
+    node_list: PromptFunction
+def node(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that de-duplicates nodes from node lists.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        <PREVIOUS MESSAGES>
+        {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+        </PREVIOUS MESSAGES>
+        <CURRENT MESSAGE>
+        {context["episode_content"]}
+        </CURRENT MESSAGE>
+        <EXISTING NODES>
+        {json.dumps(context['existing_nodes'], indent=2)}
+        </EXISTING NODES>
+        Given the above EXISTING NODES, MESSAGE, and PREVIOUS MESSAGES. Determine if the NEW NODE extracted from the conversation
+        is a duplicate entity of one of the EXISTING NODES.
+        <NEW NODE>
+        {json.dumps(context['extracted_nodes'], indent=2)}
+        </NEW NODE>
+        Task:
+        1. If the New Node represents the same entity as any node in Existing Nodes, return 'is_duplicate: true' in the
+            response. Otherwise, return 'is_duplicate: false'
+        2. If is_duplicate is true, also return the uuid of the existing node in the response
+        3. If is_duplicate is true, return a name for the node that is the most complete full name.
+        Guidelines:
+        1. Use both the name and summary of nodes to determine if the entities are duplicates,
+            duplicate nodes may have different names
+        Respond with a JSON object in the following format:
+            {{
+                "is_duplicate": true or false,
+                "uuid": "uuid of the existing node like 5d643020624c42fa9de13f97b1b3fa39 or null",
+                "name": "Updated name of the new node (use the best name between the new node's name, an existing duplicate name, or a combination of both)"
+            }}
+        """,
+        ),
+    ]
+def node_list(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that de-duplicates nodes from node lists.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        Given the following context, deduplicate a list of nodes:
+        Nodes:
+        {json.dumps(context['nodes'], indent=2)}
+        Task:
+        1. Group nodes together such that all duplicate nodes are in the same list of uuids
+        2. All duplicate uuids should be grouped together in the same list
+        3. Also return a new summary that synthesizes the summary into a new short summary
+        Guidelines:
+        1. Each uuid from the list of nodes should appear EXACTLY once in your response
+        2. If a node has no duplicates, it should appear in the response in a list of only one uuid
+        Respond with a JSON object in the following format:
+        {{
+            "nodes": [
+                {{
+                    "uuids": ["5d643020624c42fa9de13f97b1b3fa39", "node that is a duplicate of 5d643020624c42fa9de13f97b1b3fa39"],
+                    "summary": "Brief summary of the node summaries that appear in the list of names."
+                }}
+            ]
+        }}
+        """,
+        ),
+    ]
+versions: Versions = {'node': node, 'node_list': node_list}

{graphiti_core-0.3.20 → graphiti_core-0.4.0}/graphiti_core/prompts/extract_edge_dates.py RENAMED Viewed

@@ -36,12 +36,19 @@ def v1(context: dict[str, Any]) -> list[Message]:
         Message(
             role='user',
             content=f"""
-            Edge:
-            Fact: {context['edge_fact']}
-            Current Episode: {context['current_episode']}
-            Previous Episodes: {context['previous_episodes']}
-            Reference Timestamp: {context['reference_timestamp']}
+            <PREVIOUS MESSAGES>
+            {context['previous_episodes']}
+            </PREVIOUS MESSAGES>
+            <CURRENT MESSAGE>
+            {context["current_episode"]}
+            </CURRENT MESSAGE>
+            <REFERENCE TIMESTAMP>
+            {context['reference_timestamp']}
+            </REFERENCE TIMESTAMP>
+            <FACT>
+            {context['edge_fact']}
+            </FACT>
             IMPORTANT: Only extract time information if it is part of the provided fact. Otherwise ignore the time mentioned. Make sure to do your best to determine the dates if only the relative time is mentioned. (eg 10 years ago, 2 mins ago) based on the provided reference timestamp
             If the relationship is not of spanning nature, but you are still able to determine the dates, set the valid_at only.
@@ -60,7 +67,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
             5. Do not infer dates from related events. Only use dates that are directly stated to establish or change the relationship.
 			6. For relative time mentions directly related to the relationship, calculate the actual datetime based on the reference timestamp.
             7. If only a date is mentioned without a specific time, use 00:00:00 (midnight) for that date.
-            8. If only a year is mentioned, use January 1st of that year at 00:00:00.
+            8. If only year is mentioned, use January 1st of that year at 00:00:00.
             9. Always include the time zone offset (use Z for UTC if no specific time zone is mentioned).
             Respond with a JSON object:
             {{

graphiti-core 0.3.20__tar.gz → 0.4.0__tar.gz

Potentially problematic release.

graphiti-core 0.3.20tar.gz → 0.4.0tar.gz