PyPI - graphiti-core - Versions diffs - 0.2.3__tar.gz → 0.3.0__tar.gz - Mend

graphiti-core 0.2.3tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (42) hide show

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: graphiti-core
-Version: 0.2.3
+Version: 0.3.0
 Summary: A temporal graph building library
 License: Apache-2.0
 Author: Paul Paliychuk
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: diskcache (>=5.6.3,<6.0.0)
 Requires-Dist: neo4j (>=5.23.0,<6.0.0)
-Requires-Dist: numpy (>=2.1.1,<3.0.0)
+Requires-Dist: numpy (>=1.0.0)
 Requires-Dist: openai (>=1.38.0,<2.0.0)
 Requires-Dist: pydantic (>=2.8.2,<3.0.0)
 Requires-Dist: tenacity (<9.0.0)
@@ -170,6 +170,12 @@ await graphiti.search('Who was the California Attorney General?', center_node_uu
 graphiti.close()
 ```
+## Graph Service
+The `server` directory contains an API service for interacting with the Graphiti API. It is built using FastAPI.
+Please see the [server README](./server/README.md) for more information.
 ## Documentation
 - [Guides and API documentation](https://help.getzep.com/graphiti).

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/README.md RENAMED Viewed

@@ -149,6 +149,12 @@ await graphiti.search('Who was the California Attorney General?', center_node_uu
 graphiti.close()
 ```
+## Graph Service
+The `server` directory contains an API service for interacting with the Graphiti API. It is built using FastAPI.
+Please see the [server README](./server/README.md) for more information.
 ## Documentation
 - [Guides and API documentation](https://help.getzep.com/graphiti).

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/edges.py RENAMED Viewed

@@ -24,6 +24,7 @@ from uuid import uuid4
 from neo4j import AsyncDriver
 from pydantic import BaseModel, Field
+from graphiti_core.errors import EdgeNotFoundError
 from graphiti_core.helpers import parse_db_date
 from graphiti_core.llm_client.config import EMBEDDING_DIM
 from graphiti_core.nodes import Node
@@ -41,8 +42,18 @@ class Edge(BaseModel, ABC):
     @abstractmethod
     async def save(self, driver: AsyncDriver): ...
-    @abstractmethod
-    async def delete(self, driver: AsyncDriver): ...
+    async def delete(self, driver: AsyncDriver):
+        result = await driver.execute_query(
+            """
+        MATCH (n)-[e {uuid: $uuid}]->(m)
+        DELETE e
+        """,
+            uuid=self.uuid,
+        )
+        logger.info(f'Deleted Edge: {self.uuid}')
+        return result
     def __hash__(self):
         return hash(self.uuid)
@@ -76,19 +87,6 @@ class EpisodicEdge(Edge):
         return result
-    async def delete(self, driver: AsyncDriver):
-        result = await driver.execute_query(
-            """
-        MATCH (n:Episodic)-[e:MENTIONS {uuid: $uuid}]->(m:Entity)
-        DELETE e
-        """,
-            uuid=self.uuid,
-        )
-        logger.info(f'Deleted Edge: {self.uuid}')
-        return result
     @classmethod
     async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
         records, _, _ = await driver.execute_query(
@@ -107,7 +105,8 @@ class EpisodicEdge(Edge):
         edges = [get_episodic_edge_from_record(record) for record in records]
         logger.info(f'Found Edge: {uuid}')
+        if len(edges) == 0:
+            raise EdgeNotFoundError(uuid)
         return edges[0]
@@ -169,19 +168,6 @@ class EntityEdge(Edge):
         return result
-    async def delete(self, driver: AsyncDriver):
-        result = await driver.execute_query(
-            """
-        MATCH (n:Entity)-[e:RELATES_TO {uuid: $uuid}]->(m:Entity)
-        DELETE e
-        """,
-            uuid=self.uuid,
-        )
-        logger.info(f'Deleted Edge: {self.uuid}')
-        return result
     @classmethod
     async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
         records, _, _ = await driver.execute_query(
@@ -206,6 +192,49 @@ class EntityEdge(Edge):
         edges = [get_entity_edge_from_record(record) for record in records]
+        logger.info(f'Found Edge: {uuid}')
+        if len(edges) == 0:
+            raise EdgeNotFoundError(uuid)
+        return edges[0]
+class CommunityEdge(Edge):
+    async def save(self, driver: AsyncDriver):
+        result = await driver.execute_query(
+            """
+        MATCH (community:Community {uuid: $community_uuid})
+        MATCH (node:Entity | Community {uuid: $entity_uuid})
+        MERGE (community)-[r:HAS_MEMBER {uuid: $uuid}]->(node)
+        SET r = {uuid: $uuid, group_id: $group_id, created_at: $created_at}
+        RETURN r.uuid AS uuid""",
+            community_uuid=self.source_node_uuid,
+            entity_uuid=self.target_node_uuid,
+            uuid=self.uuid,
+            group_id=self.group_id,
+            created_at=self.created_at,
+        )
+        logger.info(f'Saved edge to neo4j: {self.uuid}')
+        return result
+    @classmethod
+    async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
+        records, _, _ = await driver.execute_query(
+            """
+        MATCH (n:Community)-[e:HAS_MEMBER {uuid: $uuid}]->(m:Entity | Community)
+        RETURN
+            e.uuid As uuid,
+            e.group_id AS group_id,
+            n.uuid AS source_node_uuid,
+            m.uuid AS target_node_uuid,
+            e.created_at AS created_at
+        """,
+            uuid=uuid,
+        )
+        edges = [get_community_edge_from_record(record) for record in records]
         logger.info(f'Found Edge: {uuid}')
         return edges[0]
@@ -237,3 +266,13 @@ def get_entity_edge_from_record(record: Any) -> EntityEdge:
         valid_at=parse_db_date(record['valid_at']),
         invalid_at=parse_db_date(record['invalid_at']),
     )
+def get_community_edge_from_record(record: Any):
+    return CommunityEdge(
+        uuid=record['uuid'],
+        group_id=record['group_id'],
+        source_node_uuid=record['source_node_uuid'],
+        target_node_uuid=record['target_node_uuid'],
+        created_at=record['created_at'].to_native(),
+    )

graphiti_core-0.3.0/graphiti_core/errors.py ADDED Viewed

@@ -0,0 +1,18 @@
+class GraphitiError(Exception):
+    """Base exception class for Graphiti Core."""
+class EdgeNotFoundError(GraphitiError):
+    """Raised when an edge is not found."""
+    def __init__(self, uuid: str):
+        self.message = f'edge {uuid} not found'
+        super().__init__(self.message)
+class NodeNotFoundError(GraphitiError):
+    """Raised when a node is not found."""
+    def __init__(self, uuid: str):
+        self.message = f'node {uuid} not found'
+        super().__init__(self.message)

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/graphiti.py RENAMED Viewed

@@ -46,6 +46,10 @@ from graphiti_core.utils.bulk_utils import (
     resolve_edge_pointers,
     retrieve_previous_episodes_bulk,
 )
+from graphiti_core.utils.maintenance.community_operations import (
+    build_communities,
+    remove_communities,
+)
 from graphiti_core.utils.maintenance.edge_operations import (
     extract_edges,
     resolve_extracted_edges,
@@ -412,7 +416,7 @@ class Graphiti:
         except Exception as e:
             raise e
-    async def add_episode_bulk(self, bulk_episodes: list[RawEpisode], group_id: str | None):
+    async def add_episode_bulk(self, bulk_episodes: list[RawEpisode], group_id: str | None = None):
         """
         Process multiple episodes in bulk and update the graph.
@@ -526,6 +530,19 @@ class Graphiti:
         except Exception as e:
             raise e
+    async def build_communities(self):
+        embedder = self.llm_client.get_embedder()
+        # Clear existing communities
+        await remove_communities(self.driver)
+        community_nodes, community_edges = await build_communities(self.driver, self.llm_client)
+        await asyncio.gather(*[node.generate_name_embedding(embedder) for node in community_nodes])
+        await asyncio.gather(*[node.save(self.driver) for node in community_nodes])
+        await asyncio.gather(*[edge.save(self.driver) for edge in community_edges])
     async def search(
         self,
         query: str,

graphiti_core-0.3.0/graphiti_core/llm_client/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .client import LLMClient
+from .config import LLMConfig
+from .errors import RateLimitError
+from .openai_client import OpenAIClient
+__all__ = ['LLMClient', 'OpenAIClient', 'LLMConfig', 'RateLimitError']

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/llm_client/anthropic_client.py RENAMED Viewed

@@ -18,12 +18,14 @@ import json
 import logging
 import typing
+import anthropic
 from anthropic import AsyncAnthropic
 from openai import AsyncOpenAI
 from ..prompts.models import Message
 from .client import LLMClient
 from .config import LLMConfig
+from .errors import RateLimitError
 logger = logging.getLogger(__name__)
@@ -35,7 +37,11 @@ class AnthropicClient(LLMClient):
         if config is None:
             config = LLMConfig()
         super().__init__(config, cache)
-        self.client = AsyncAnthropic(api_key=config.api_key)
+        self.client = AsyncAnthropic(
+            api_key=config.api_key,
+            # we'll use tenacity to retry
+            max_retries=1,
+        )
     def get_embedder(self) -> typing.Any:
         openai_client = AsyncOpenAI()
@@ -58,6 +64,8 @@ class AnthropicClient(LLMClient):
             )
             return json.loads('{' + result.content[0].text)  # type: ignore
+        except anthropic.RateLimitError as e:
+            raise RateLimitError from e
         except Exception as e:
             logger.error(f'Error in generating LLM response: {e}')
             raise

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/llm_client/client.py RENAMED Viewed

@@ -22,10 +22,11 @@ from abc import ABC, abstractmethod
 import httpx
 from diskcache import Cache
-from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
+from tenacity import retry, retry_if_exception, stop_after_attempt, wait_random_exponential
 from ..prompts.models import Message
 from .config import LLMConfig
+from .errors import RateLimitError
 DEFAULT_TEMPERATURE = 0
 DEFAULT_CACHE_DIR = './llm_cache'
@@ -33,7 +34,10 @@ DEFAULT_CACHE_DIR = './llm_cache'
 logger = logging.getLogger(__name__)
-def is_server_error(exception):
+def is_server_or_retry_error(exception):
+    if isinstance(exception, RateLimitError):
+        return True
     return (
         isinstance(exception, httpx.HTTPStatusError) and 500 <= exception.response.status_code < 600
     )
@@ -56,18 +60,21 @@ class LLMClient(ABC):
         pass
     @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_exponential(multiplier=1, min=4, max=10),
-        retry=retry_if_exception(is_server_error),
+        stop=stop_after_attempt(4),
+        wait=wait_random_exponential(multiplier=10, min=5, max=120),
+        retry=retry_if_exception(is_server_or_retry_error),
+        after=lambda retry_state: logger.warning(
+            f'Retrying {retry_state.fn.__name__ if retry_state.fn else "function"} after {retry_state.attempt_number} attempts...'
+        )
+        if retry_state.attempt_number > 1
+        else None,
+        reraise=True,
     )
     async def _generate_response_with_retry(self, messages: list[Message]) -> dict[str, typing.Any]:
         try:
             return await self._generate_response(messages)
-        except httpx.HTTPStatusError as e:
-            if not is_server_error(e):
-                raise Exception(f'LLM request error: {e}') from e
-            else:
-                raise
+        except (httpx.HTTPStatusError, RateLimitError) as e:
+            raise e
     @abstractmethod
     async def _generate_response(self, messages: list[Message]) -> dict[str, typing.Any]:

graphiti_core-0.3.0/graphiti_core/llm_client/errors.py ADDED Viewed

@@ -0,0 +1,6 @@
+class RateLimitError(Exception):
+    """Exception raised when the rate limit is exceeded."""
+    def __init__(self, message='Rate limit exceeded. Please try again later.'):
+        self.message = message
+        super().__init__(self.message)

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/llm_client/groq_client.py RENAMED Viewed

@@ -18,6 +18,7 @@ import json
 import logging
 import typing
+import groq
 from groq import AsyncGroq
 from groq.types.chat import ChatCompletionMessageParam
 from openai import AsyncOpenAI
@@ -25,6 +26,7 @@ from openai import AsyncOpenAI
 from ..prompts.models import Message
 from .client import LLMClient
 from .config import LLMConfig
+from .errors import RateLimitError
 logger = logging.getLogger(__name__)
@@ -59,6 +61,8 @@ class GroqClient(LLMClient):
             )
             result = response.choices[0].message.content or ''
             return json.loads(result)
+        except groq.RateLimitError as e:
+            raise RateLimitError from e
         except Exception as e:
             logger.error(f'Error in generating LLM response: {e}')
             raise

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/llm_client/openai_client.py RENAMED Viewed

@@ -18,12 +18,14 @@ import json
 import logging
 import typing
+import openai
 from openai import AsyncOpenAI
 from openai.types.chat import ChatCompletionMessageParam
 from ..prompts.models import Message
 from .client import LLMClient
 from .config import LLMConfig
+from .errors import RateLimitError
 logger = logging.getLogger(__name__)
@@ -59,6 +61,8 @@ class OpenAIClient(LLMClient):
             )
             result = response.choices[0].message.content or ''
             return json.loads(result)
+        except openai.RateLimitError as e:
+            raise RateLimitError from e
         except Exception as e:
             logger.error(f'Error in generating LLM response: {e}')
             raise

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/nodes.py RENAMED Viewed

@@ -25,6 +25,7 @@ from uuid import uuid4
 from neo4j import AsyncDriver
 from pydantic import BaseModel, Field
+from graphiti_core.errors import NodeNotFoundError
 from graphiti_core.llm_client.config import EMBEDDING_DIM
 logger = logging.getLogger(__name__)
@@ -76,8 +77,18 @@ class Node(BaseModel, ABC):
     @abstractmethod
     async def save(self, driver: AsyncDriver): ...
-    @abstractmethod
-    async def delete(self, driver: AsyncDriver): ...
+    async def delete(self, driver: AsyncDriver):
+        result = await driver.execute_query(
+            """
+        MATCH (n {uuid: $uuid})
+        DETACH DELETE n
+        """,
+            uuid=self.uuid,
+        )
+        logger.info(f'Deleted Node: {self.uuid}')
+        return result
     def __hash__(self):
         return hash(self.uuid)
@@ -90,6 +101,9 @@ class Node(BaseModel, ABC):
     @classmethod
     async def get_by_uuid(cls, driver: AsyncDriver, uuid: str): ...
+    @classmethod
+    async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]): ...
 class EpisodicNode(Node):
     source: EpisodeType = Field(description='source type')
@@ -125,24 +139,37 @@ class EpisodicNode(Node):
         return result
-    async def delete(self, driver: AsyncDriver):
-        result = await driver.execute_query(
+    @classmethod
+    async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
+        records, _, _ = await driver.execute_query(
             """
-        MATCH (n:Episodic {uuid: $uuid})
-        DETACH DELETE n
+        MATCH (e:Episodic {uuid: $uuid})
+            RETURN e.content AS content,
+            e.created_at AS created_at,
+            e.valid_at AS valid_at,
+            e.uuid AS uuid,
+            e.name AS name,
+            e.group_id AS group_id,
+            e.source_description AS source_description,
+            e.source AS source
         """,
-            uuid=self.uuid,
+            uuid=uuid,
         )
-        logger.info(f'Deleted Node: {self.uuid}')
+        episodes = [get_episodic_node_from_record(record) for record in records]
-        return result
+        logger.info(f'Found Node: {uuid}')
+        if len(episodes) == 0:
+            raise NodeNotFoundError(uuid)
+        return episodes[0]
     @classmethod
-    async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
+    async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
         records, _, _ = await driver.execute_query(
             """
-        MATCH (e:Episodic {uuid: $uuid})
+        MATCH (e:Episodic) WHERE e.uuid IN $uuids
             RETURN e.content AS content,
             e.created_at AS created_at,
             e.valid_at AS valid_at,
@@ -152,14 +179,14 @@ class EpisodicNode(Node):
             e.source_description AS source_description,
             e.source AS source
         """,
-            uuid=uuid,
+            uuids=uuids,
         )
         episodes = [get_episodic_node_from_record(record) for record in records]
-        logger.info(f'Found Node: {uuid}')
+        logger.info(f'Found Nodes: {uuids}')
-        return episodes[0]
+        return episodes
 class EntityNode(Node):
@@ -194,24 +221,88 @@ class EntityNode(Node):
         return result
-    async def delete(self, driver: AsyncDriver):
-        result = await driver.execute_query(
+    @classmethod
+    async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
+        records, _, _ = await driver.execute_query(
             """
         MATCH (n:Entity {uuid: $uuid})
-        DETACH DELETE n
+        RETURN
+            n.uuid As uuid,
+            n.name AS name,
+            n.name_embedding AS name_embedding,
+            n.group_id AS group_id
+            n.created_at AS created_at,
+            n.summary AS summary
+        """,
+            uuid=uuid,
+        )
+        nodes = [get_entity_node_from_record(record) for record in records]
+        logger.info(f'Found Node: {uuid}')
+        return nodes[0]
+    @classmethod
+    async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
+        records, _, _ = await driver.execute_query(
+            """
+        MATCH (n:Entity) WHERE n.uuid IN $uuids
+        RETURN
+            n.uuid As uuid,
+            n.name AS name,
+            n.name_embedding AS name_embedding,
+            n.group_id AS group_id,
+            n.created_at AS created_at,
+            n.summary AS summary
         """,
+            uuids=uuids,
+        )
+        nodes = [get_entity_node_from_record(record) for record in records]
+        logger.info(f'Found Nodes: {uuids}')
+        return nodes
+class CommunityNode(Node):
+    name_embedding: list[float] | None = Field(default=None, description='embedding of the name')
+    summary: str = Field(description='region summary of member nodes', default_factory=str)
+    async def save(self, driver: AsyncDriver):
+        result = await driver.execute_query(
+            """
+        MERGE (n:Community {uuid: $uuid})
+        SET n = {uuid: $uuid, name: $name, name_embedding: $name_embedding, group_id: $group_id, summary: $summary, created_at: $created_at}
+        RETURN n.uuid AS uuid""",
             uuid=self.uuid,
+            name=self.name,
+            group_id=self.group_id,
+            summary=self.summary,
+            name_embedding=self.name_embedding,
+            created_at=self.created_at,
         )
-        logger.info(f'Deleted Node: {self.uuid}')
+        logger.info(f'Saved Node to neo4j: {self.uuid}')
         return result
+    async def generate_name_embedding(self, embedder, model='text-embedding-3-small'):
+        start = time()
+        text = self.name.replace('\n', ' ')
+        embedding = (await embedder.create(input=[text], model=model)).data[0].embedding
+        self.name_embedding = embedding[:EMBEDDING_DIM]
+        end = time()
+        logger.info(f'embedded {text} in {end - start} ms')
+        return embedding
     @classmethod
     async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
         records, _, _ = await driver.execute_query(
             """
-        MATCH (n:Entity {uuid: $uuid})
+        MATCH (n:Community {uuid: $uuid})
         RETURN
             n.uuid As uuid,
             n.name AS name,
@@ -223,12 +314,34 @@ class EntityNode(Node):
             uuid=uuid,
         )
-        nodes = [get_entity_node_from_record(record) for record in records]
+        nodes = [get_community_node_from_record(record) for record in records]
         logger.info(f'Found Node: {uuid}')
         return nodes[0]
+    @classmethod
+    async def get_by_uuids(cls, driver: AsyncDriver, uuids: list[str]):
+        records, _, _ = await driver.execute_query(
+            """
+        MATCH (n:Community) WHERE n.uuid IN $uuids
+        RETURN
+            n.uuid As uuid,
+            n.name AS name,
+            n.name_embedding AS name_embedding,
+            n.group_id AS group_id
+            n.created_at AS created_at,
+            n.summary AS summary
+        """,
+            uuids=uuids,
+        )
+        nodes = [get_community_node_from_record(record) for record in records]
+        logger.info(f'Found Nodes: {uuids}')
+        return nodes
 # Node helpers
 def get_episodic_node_from_record(record: Any) -> EpisodicNode:
@@ -254,3 +367,14 @@ def get_entity_node_from_record(record: Any) -> EntityNode:
         created_at=record['created_at'].to_native(),
         summary=record['summary'],
     )
+def get_community_node_from_record(record: Any) -> CommunityNode:
+    return CommunityNode(
+        uuid=record['uuid'],
+        name=record['name'],
+        group_id=record['group_id'],
+        name_embedding=record['name_embedding'],
+        created_at=record['created_at'].to_native(),
+        summary=record['summary'],
+    )

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/prompts/extract_nodes.py RENAMED Viewed

@@ -24,12 +24,14 @@ class Prompt(Protocol):
     v1: PromptVersion
     v2: PromptVersion
     extract_json: PromptVersion
+    extract_text: PromptVersion
 class Versions(TypedDict):
     v1: PromptFunction
     v2: PromptFunction
     extract_json: PromptFunction
+    extract_text: PromptFunction
 def v1(context: dict[str, Any]) -> list[Message]:
@@ -144,4 +146,44 @@ Respond with a JSON object in the following format:
     ]
-versions: Versions = {'v1': v1, 'v2': v2, 'extract_json': extract_json}
+def extract_text(context: dict[str, Any]) -> list[Message]:
+    sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
+    user_prompt = f"""
+Given the following conversation, extract entity nodes from the CURRENT MESSAGE that are explicitly or implicitly mentioned:
+Conversation:
+{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
+<CURRENT MESSAGE>
+{context["episode_content"]}
+Guidelines:
+2. Extract significant entities, concepts, or actors mentioned in the conversation.
+3. Provide concise but informative summaries for each extracted node.
+4. Avoid creating nodes for relationships or actions.
+5. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
+6. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
+Respond with a JSON object in the following format:
+{{
+    "extracted_nodes": [
+        {{
+            "name": "Unique identifier for the node (use the speaker's name for speaker nodes)",
+            "labels": ["Entity", "OptionalAdditionalLabel"],
+            "summary": "Brief summary of the node's role or significance"
+        }}
+    ]
+}}
+"""
+    return [
+        Message(role='system', content=sys_prompt),
+        Message(role='user', content=user_prompt),
+    ]
+versions: Versions = {
+    'v1': v1,
+    'v2': v2,
+    'extract_json': extract_json,
+    'extract_text': extract_text,
+}

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/prompts/lib.py RENAMED Viewed

@@ -71,6 +71,9 @@ from .invalidate_edges import (
     versions as invalidate_edges_versions,
 )
 from .models import Message, PromptFunction
+from .summarize_nodes import Prompt as SummarizeNodesPrompt
+from .summarize_nodes import Versions as SummarizeNodesVersions
+from .summarize_nodes import versions as summarize_nodes_versions
 class PromptLibrary(Protocol):
@@ -80,6 +83,7 @@ class PromptLibrary(Protocol):
     dedupe_edges: DedupeEdgesPrompt
     invalidate_edges: InvalidateEdgesPrompt
     extract_edge_dates: ExtractEdgeDatesPrompt
+    summarize_nodes: SummarizeNodesPrompt
 class PromptLibraryImpl(TypedDict):
@@ -89,6 +93,7 @@ class PromptLibraryImpl(TypedDict):
     dedupe_edges: DedupeEdgesVersions
     invalidate_edges: InvalidateEdgesVersions
     extract_edge_dates: ExtractEdgeDatesVersions
+    summarize_nodes: SummarizeNodesVersions
 class VersionWrapper:
@@ -118,5 +123,6 @@ PROMPT_LIBRARY_IMPL: PromptLibraryImpl = {
     'dedupe_edges': dedupe_edges_versions,
     'invalidate_edges': invalidate_edges_versions,
     'extract_edge_dates': extract_edge_dates_versions,
+    'summarize_nodes': summarize_nodes_versions,
 }
 prompt_library: PromptLibrary = PromptLibraryWrapper(PROMPT_LIBRARY_IMPL)  # type: ignore[assignment]

graphiti_core-0.3.0/graphiti_core/prompts/summarize_nodes.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import json
+from typing import Any, Protocol, TypedDict
+from .models import Message, PromptFunction, PromptVersion
+class Prompt(Protocol):
+    summarize_pair: PromptVersion
+    summary_description: PromptVersion
+class Versions(TypedDict):
+    summarize_pair: PromptFunction
+    summary_description: PromptFunction
+def summarize_pair(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that combines summaries.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        Synthesize the information from the following two summaries into a single succinct summary.
+        Summaries:
+        {json.dumps(context['node_summaries'], indent=2)}
+        Respond with a JSON object in the following format:
+            {{
+                "summary": "Summary containing the important information from both summaries"
+            }}
+        """,
+        ),
+    ]
+def summary_description(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that describes provided contents in a single sentence.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        Create a short one sentence description of the summary that explains what kind of information is summarized.
+        Summary:
+        {json.dumps(context['summary'], indent=2)}
+        Respond with a JSON object in the following format:
+            {{
+                "description": "One sentence description of the provided summary"
+            }}
+        """,
+        ),
+    ]
+versions: Versions = {'summarize_pair': summarize_pair, 'summary_description': summary_description}

graphiti_core-0.3.0/graphiti_core/py.typed ADDED Viewed

	@@ -0,0 +1 @@
1	+ # This file is intentionally left empty to indicate that the package is typed.

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/search/search_utils.py RENAMED Viewed

@@ -496,34 +496,39 @@ async def node_distance_reranker(
     sorted_uuids = rrf(results)
     scores: dict[str, float] = {}
-    for uuid in sorted_uuids:
-        # Find the shortest path to center node
-        records, _, _ = await driver.execute_query(
-            """
+    # Find the shortest path to center node
+    query = Query("""
         MATCH (source:Entity)-[r:RELATES_TO {uuid: $edge_uuid}]->(target:Entity)
-        MATCH p = SHORTEST 1 (center:Entity)-[:RELATES_TO*1..10]->(n:Entity)
-        WHERE center.uuid = $center_uuid AND n.uuid IN [source.uuid, target.uuid]
-        RETURN min(length(p)) AS score, source.uuid AS source_uuid, target.uuid AS target_uuid
-        """,
-            edge_uuid=uuid,
-            center_uuid=center_node_uuid,
-        )
-        distance = 0.01
+        MATCH p = SHORTEST 1 (center:Entity {uuid: $center_uuid})-[:RELATES_TO]-+(n:Entity {uuid: source.uuid})
+        RETURN length(p) AS score, source.uuid AS source_uuid, target.uuid AS target_uuid
+        """)
-        for record in records:
-            if (
-                record['source_uuid'] == center_node_uuid
-                or record['target_uuid'] == center_node_uuid
-            ):
-                continue
-            distance = record['score']
+    path_results = await asyncio.gather(
+        *[
+            driver.execute_query(
+                query,
+                edge_uuid=uuid,
+                center_uuid=center_node_uuid,
+            )
+            for uuid in sorted_uuids
+        ]
+    )
+    for uuid, result in zip(sorted_uuids, path_results):
+        records = result[0]
+        record = records[0] if len(records) > 0 else None
+        distance: float = record['score'] if record is not None else float('inf')
+        if record is not None and (
+            record['source_uuid'] == center_node_uuid or record['target_uuid'] == center_node_uuid
+        ):
+            distance = 0
         if uuid in scores:
-            scores[uuid] = min(1 / distance, scores[uuid])
+            scores[uuid] = min(distance, scores[uuid])
         else:
-            scores[uuid] = 1 / distance
+            scores[uuid] = distance
     # rerank on shortest distance
-    sorted_uuids.sort(reverse=True, key=lambda cur_uuid: scores[cur_uuid])
+    sorted_uuids.sort(key=lambda cur_uuid: scores[cur_uuid])
     return sorted_uuids

graphiti_core-0.3.0/graphiti_core/utils/maintenance/community_operations.py ADDED Viewed

@@ -0,0 +1,155 @@
+import asyncio
+import logging
+from collections import defaultdict
+from datetime import datetime
+from neo4j import AsyncDriver
+from graphiti_core.edges import CommunityEdge
+from graphiti_core.llm_client import LLMClient
+from graphiti_core.nodes import CommunityNode, EntityNode
+from graphiti_core.prompts import prompt_library
+from graphiti_core.utils.maintenance.edge_operations import build_community_edges
+logger = logging.getLogger(__name__)
+async def build_community_projection(driver: AsyncDriver) -> str:
+    records, _, _ = await driver.execute_query("""
+    CALL gds.graph.project("communities", "Entity",
+        {RELATES_TO: {
+            type: "RELATES_TO",
+            orientation: "UNDIRECTED",
+            properties: {weight: {property: "*", aggregation: "COUNT"}}
+        }}
+    )
+    YIELD graphName AS graph, nodeProjection AS nodes, relationshipProjection AS edges
+    """)
+    return records[0]['graph']
+async def destroy_projection(driver: AsyncDriver, projection_name: str):
+    await driver.execute_query(
+        """
+    CALL gds.graph.drop($projection_name)
+    """,
+        projection_name=projection_name,
+    )
+async def get_community_clusters(
+    driver: AsyncDriver, projection_name: str
+) -> list[list[EntityNode]]:
+    records, _, _ = await driver.execute_query("""
+    CALL gds.leiden.stream("communities")
+    YIELD nodeId, communityId
+    RETURN gds.util.asNode(nodeId).uuid AS entity_uuid, communityId
+    """)
+    community_map: dict[int, list[str]] = defaultdict(list)
+    for record in records:
+        community_map[record['communityId']].append(record['entity_uuid'])
+    community_clusters: list[list[EntityNode]] = list(
+        await asyncio.gather(
+            *[EntityNode.get_by_uuids(driver, cluster) for cluster in community_map.values()]
+        )
+    )
+    return community_clusters
+async def summarize_pair(llm_client: LLMClient, summary_pair: tuple[str, str]) -> str:
+    # Prepare context for LLM
+    context = {'node_summaries': [{'summary': summary} for summary in summary_pair]}
+    llm_response = await llm_client.generate_response(
+        prompt_library.summarize_nodes.summarize_pair(context)
+    )
+    pair_summary = llm_response.get('summary', '')
+    return pair_summary
+async def generate_summary_description(llm_client: LLMClient, summary: str) -> str:
+    context = {'summary': summary}
+    llm_response = await llm_client.generate_response(
+        prompt_library.summarize_nodes.summary_description(context)
+    )
+    description = llm_response.get('description', '')
+    return description
+async def build_community(
+    llm_client: LLMClient, community_cluster: list[EntityNode]
+) -> tuple[CommunityNode, list[CommunityEdge]]:
+    summaries = [entity.summary for entity in community_cluster]
+    length = len(summaries)
+    while length > 1:
+        odd_one_out: str | None = None
+        if length % 2 == 1:
+            odd_one_out = summaries.pop()
+            length -= 1
+        new_summaries: list[str] = list(
+            await asyncio.gather(
+                *[
+                    summarize_pair(llm_client, (str(left_summary), str(right_summary)))
+                    for left_summary, right_summary in zip(
+                        summaries[: int(length / 2)], summaries[int(length / 2) :]
+                    )
+                ]
+            )
+        )
+        if odd_one_out is not None:
+            new_summaries.append(odd_one_out)
+        summaries = new_summaries
+        length = len(summaries)
+    summary = summaries[0]
+    name = await generate_summary_description(llm_client, summary)
+    now = datetime.now()
+    community_node = CommunityNode(
+        name=name,
+        group_id=community_cluster[0].group_id,
+        labels=['Community'],
+        created_at=now,
+        summary=summary,
+    )
+    community_edges = build_community_edges(community_cluster, community_node, now)
+    logger.info((community_node, community_edges))
+    return community_node, community_edges
+async def build_communities(
+    driver: AsyncDriver, llm_client: LLMClient
+) -> tuple[list[CommunityNode], list[CommunityEdge]]:
+    projection = await build_community_projection(driver)
+    community_clusters = await get_community_clusters(driver, projection)
+    communities: list[tuple[CommunityNode, list[CommunityEdge]]] = list(
+        await asyncio.gather(
+            *[build_community(llm_client, cluster) for cluster in community_clusters]
+        )
+    )
+    community_nodes: list[CommunityNode] = []
+    community_edges: list[CommunityEdge] = []
+    for community in communities:
+        community_nodes.append(community[0])
+        community_edges.extend(community[1])
+    await destroy_projection(driver, projection)
+    return community_nodes, community_edges
+async def remove_communities(driver: AsyncDriver):
+    await driver.execute_query("""
+    MATCH (c:Community)
+    DETACH DELETE c
+    """)

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/utils/maintenance/edge_operations.py RENAMED Viewed

@@ -20,9 +20,9 @@ from datetime import datetime
 from time import time
 from typing import List
-from graphiti_core.edges import EntityEdge, EpisodicEdge
+from graphiti_core.edges import CommunityEdge, EntityEdge, EpisodicEdge
 from graphiti_core.llm_client import LLMClient
-from graphiti_core.nodes import EntityNode, EpisodicNode
+from graphiti_core.nodes import CommunityNode, EntityNode, EpisodicNode
 from graphiti_core.prompts import prompt_library
 from graphiti_core.utils.maintenance.temporal_operations import (
     extract_edge_dates,
@@ -50,6 +50,24 @@ def build_episodic_edges(
     return edges
+def build_community_edges(
+    entity_nodes: List[EntityNode],
+    community_node: CommunityNode,
+    created_at: datetime,
+) -> List[CommunityEdge]:
+    edges: List[CommunityEdge] = [
+        CommunityEdge(
+            source_node_uuid=community_node.uuid,
+            target_node_uuid=node.uuid,
+            created_at=created_at,
+            group_id=community_node.group_id,
+        )
+        for node in entity_nodes
+    ]
+    return edges
 async def extract_edges(
     llm_client: LLMClient,
     episode: EpisodicNode,

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/utils/maintenance/graph_data_operations.py RENAMED Viewed

@@ -32,8 +32,10 @@ async def build_indices_and_constraints(driver: AsyncDriver):
     range_indices: list[LiteralString] = [
         'CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)',
         'CREATE INDEX episode_uuid IF NOT EXISTS FOR (n:Episodic) ON (n.uuid)',
+        'CREATE INDEX community_uuid IF NOT EXISTS FOR (n:Community) ON (n.uuid)',
         'CREATE INDEX relation_uuid IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.uuid)',
         'CREATE INDEX mention_uuid IF NOT EXISTS FOR ()-[e:MENTIONS]-() ON (e.uuid)',
+        'CREATE INDEX has_member_uuid IF NOT EXISTS FOR ()-[e:HAS_MEMBER]-() ON (e.uuid)',
         'CREATE INDEX entity_group_id IF NOT EXISTS FOR (n:Entity) ON (n.group_id)',
         'CREATE INDEX episode_group_id IF NOT EXISTS FOR (n:Episodic) ON (n.group_id)',
         'CREATE INDEX relation_group_id IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.group_id)',
@@ -51,6 +53,7 @@ async def build_indices_and_constraints(driver: AsyncDriver):
     fulltext_indices: list[LiteralString] = [
         'CREATE FULLTEXT INDEX name_and_summary IF NOT EXISTS FOR (n:Entity) ON EACH [n.name, n.summary]',
+        'CREATE FULLTEXT INDEX community_name IF NOT EXISTS FOR (n:Community) ON EACH [n.name]',
         'CREATE FULLTEXT INDEX name_and_fact IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON EACH [e.name, e.fact]',
     ]
@@ -71,6 +74,14 @@ async def build_indices_and_constraints(driver: AsyncDriver):
          `vector.similarity_function`: 'cosine'
         }}
         """,
+        """
+        CREATE VECTOR INDEX community_name_embedding IF NOT EXISTS
+        FOR (n:Community) ON (n.name_embedding)
+        OPTIONS {indexConfig: {
+         `vector.dimensions`: 1024,
+         `vector.similarity_function`: 'cosine'
+        }}
+        """,
     ]
     index_queries: list[LiteralString] = range_indices + fulltext_indices + vector_indices

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/graphiti_core/utils/maintenance/node_operations.py RENAMED Viewed

@@ -48,6 +48,29 @@ async def extract_message_nodes(
     return extracted_node_data
+async def extract_text_nodes(
+    llm_client: LLMClient, episode: EpisodicNode, previous_episodes: list[EpisodicNode]
+) -> list[dict[str, Any]]:
+    # Prepare context for LLM
+    context = {
+        'episode_content': episode.content,
+        'episode_timestamp': episode.valid_at.isoformat(),
+        'previous_episodes': [
+            {
+                'content': ep.content,
+                'timestamp': ep.valid_at.isoformat(),
+            }
+            for ep in previous_episodes
+        ],
+    }
+    llm_response = await llm_client.generate_response(
+        prompt_library.extract_nodes.extract_text(context)
+    )
+    extracted_node_data = llm_response.get('extracted_nodes', [])
+    return extracted_node_data
 async def extract_json_nodes(
     llm_client: LLMClient,
     episode: EpisodicNode,
@@ -73,8 +96,10 @@ async def extract_nodes(
 ) -> list[EntityNode]:
     start = time()
     extracted_node_data: list[dict[str, Any]] = []
-    if episode.source in [EpisodeType.message, EpisodeType.text]:
+    if episode.source == EpisodeType.message:
         extracted_node_data = await extract_message_nodes(llm_client, episode, previous_episodes)
+    elif episode.source == EpisodeType.text:
+        extracted_node_data = await extract_text_nodes(llm_client, episode, previous_episodes)
     elif episode.source == EpisodeType.json:
         extracted_node_data = await extract_json_nodes(llm_client, episode)

{graphiti_core-0.2.3 → graphiti_core-0.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "graphiti-core"
-version = "0.2.3"
+version = "0.3.0"
 description = "A temporal graph building library"
 authors = [
     "Paul Paliychuk <paul@getzep.com>",
@@ -19,7 +19,7 @@ neo4j = "^5.23.0"
 diskcache = "^5.6.3"
 openai = "^1.38.0"
 tenacity = "<9.0.0"
-numpy = "^2.1.1"
+numpy = ">=1.0.0"
 [tool.poetry.dev-dependencies]
 pytest = "^8.3.2"
@@ -31,7 +31,7 @@ ruff = "^0.6.2"
 [tool.poetry.group.dev.dependencies]
 pydantic = "^2.8.2"
 mypy = "^1.11.1"
-groq = ">=0.9,<0.11"
+groq = ">=0.9,<0.12"
 anthropic = "^0.34.1"
 ipykernel = "^6.29.5"
 jupyterlab = "^4.2.4"

graphiti_core-0.2.3/graphiti_core/llm_client/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .client import LLMClient
-from .config import LLMConfig
-from .openai_client import OpenAIClient
-__all__ = ['LLMClient', 'OpenAIClient', 'LLMConfig']