PyPI - graphiti-core - Versions diffs - 0.3.16__tar.gz → 0.3.17__tar.gz - Mend

graphiti-core 0.3.16tar.gz → 0.3.17tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (57) hide show

{graphiti_core-0.3.16 → graphiti_core-0.3.17}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: graphiti-core
-Version: 0.3.16
+Version: 0.3.17
 Summary: A temporal graph building library
 License: Apache-2.0
 Author: Paul Paliychuk
@@ -17,7 +17,6 @@ Requires-Dist: numpy (>=1.0.0)
 Requires-Dist: openai (>=1.50.2,<2.0.0)
 Requires-Dist: pydantic (>=2.8.2,<3.0.0)
 Requires-Dist: tenacity (<9.0.0)
-Requires-Dist: voyageai (>=0.2.3,<0.3.0)
 Description-Content-Type: text/markdown
 <div align="center">

graphiti_core-0.3.17/graphiti_core/cross_encoder/bge_reranker_client.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import asyncio
+from typing import List, Tuple
+from sentence_transformers import CrossEncoder
+from graphiti_core.cross_encoder.client import CrossEncoderClient
+class BGERerankerClient(CrossEncoderClient):
+    def __init__(self):
+        self.model = CrossEncoder('BAAI/bge-reranker-v2-m3')
+    async def rank(self, query: str, passages: List[str]) -> List[Tuple[str, float]]:
+        if not passages:
+            return []
+        input_pairs = [[query, passage] for passage in passages]
+        # Run the synchronous predict method in an executor
+        loop = asyncio.get_running_loop()
+        scores = await loop.run_in_executor(None, self.model.predict, input_pairs)
+        ranked_passages = sorted(
+            [(passage, float(score)) for passage, score in zip(passages, scores)],
+            key=lambda x: x[1],
+            reverse=True,
+        )
+        return ranked_passages

graphiti_core-0.3.17/graphiti_core/cross_encoder/client.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+class CrossEncoderClient(ABC):
+    """
+    CrossEncoderClient is an abstract base class that defines the interface
+    for cross-encoder models used for ranking passages based on their relevance to a query.
+    It allows for different implementations of cross-encoder models to be used interchangeably.
+    """
+    @abstractmethod
+    async def rank(self, query: str, passages: List[str]) -> List[Tuple[str, float]]:
+        """
+        Rank the given passages based on their relevance to the query.
+        Args:
+            query (str): The query string.
+            passages (List[str]): A list of passages to rank.
+        Returns:
+            List[Tuple[str, float]]: A list of tuples containing the passage and its score,
+                                     sorted in descending order of relevance.
+        """
+        pass

graphiti_core-0.3.17/graphiti_core/cross_encoder/openai_reranker_client.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import asyncio
+import logging
+from typing import Any
+import openai
+from openai import AsyncOpenAI
+from pydantic import BaseModel
+from ..llm_client import LLMConfig, RateLimitError
+from ..prompts import Message
+from .client import CrossEncoderClient
+logger = logging.getLogger(__name__)
+DEFAULT_MODEL = 'gpt-4o-mini'
+class BooleanClassifier(BaseModel):
+    isTrue: bool
+class OpenAIRerankerClient(CrossEncoderClient):
+    def __init__(self, config: LLMConfig | None = None):
+        """
+        Initialize the OpenAIClient with the provided configuration, cache setting, and client.
+        Args:
+            config (LLMConfig | None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.
+            cache (bool): Whether to use caching for responses. Defaults to False.
+            client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.
+        """
+        if config is None:
+            config = LLMConfig()
+        self.config = config
+        self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
+    async def rank(self, query: str, passages: list[str]) -> list[tuple[str, float]]:
+        openai_messages_list: Any = [
+            [
+                Message(
+                    role='system',
+                    content='You are an expert tasked with determining whether the passage is relevant to the query',
+                ),
+                Message(
+                    role='user',
+                    content=f"""
+                           Respond with "True" if PASSAGE is relevant to QUERY and "False" otherwise.
+                           <PASSAGE>
+                           {query}
+                           </PASSAGE>
+                           {passage}
+                           <QUERY>
+                           </QUERY>
+                           """,
+                ),
+            ]
+            for passage in passages
+        ]
+        try:
+            responses = await asyncio.gather(
+                *[
+                    self.client.chat.completions.create(
+                        model=DEFAULT_MODEL,
+                        messages=openai_messages,
+                        temperature=0,
+                        max_tokens=1,
+                        logit_bias={'6432': 1, '7983': 1},
+                        logprobs=True,
+                        top_logprobs=2,
+                    )
+                    for openai_messages in openai_messages_list
+                ]
+            )
+            responses_top_logprobs = [
+                response.choices[0].logprobs.content[0].top_logprobs
+                if response.choices[0].logprobs is not None
+                and response.choices[0].logprobs.content is not None
+                else []
+                for response in responses
+            ]
+            scores: list[float] = []
+            for top_logprobs in responses_top_logprobs:
+                for logprob in top_logprobs:
+                    if bool(logprob.token):
+                        scores.append(logprob.logprob)
+            results = [(passage, score) for passage, score in zip(passages, scores)]
+            results.sort(reverse=True, key=lambda x: x[1])
+            return results
+        except openai.RateLimitError as e:
+            raise RateLimitError from e
+        except Exception as e:
+            logger.error(f'Error in generating LLM response: {e}')
+            raise

{graphiti_core-0.3.16 → graphiti_core-0.3.17}/graphiti_core/graphiti.py RENAMED Viewed

@@ -23,8 +23,11 @@ from dotenv import load_dotenv
 from neo4j import AsyncGraphDatabase
 from pydantic import BaseModel
+from graphiti_core.cross_encoder.client import CrossEncoderClient
+from graphiti_core.cross_encoder.openai_reranker_client import OpenAIRerankerClient
 from graphiti_core.edges import EntityEdge, EpisodicEdge
 from graphiti_core.embedder import EmbedderClient, OpenAIEmbedder
+from graphiti_core.helpers import DEFAULT_DATABASE
 from graphiti_core.llm_client import LLMClient, OpenAIClient
 from graphiti_core.nodes import CommunityNode, EntityNode, EpisodeType, EpisodicNode
 from graphiti_core.search.search import SearchConfig, search
@@ -92,6 +95,7 @@ class Graphiti:
         password: str,
         llm_client: LLMClient | None = None,
         embedder: EmbedderClient | None = None,
+        cross_encoder: CrossEncoderClient | None = None,
         store_raw_episode_content: bool = True,
     ):
         """
@@ -131,7 +135,7 @@ class Graphiti:
         Graphiti if you're using the default OpenAIClient.
         """
         self.driver = AsyncGraphDatabase.driver(uri, auth=(user, password))
-        self.database = 'neo4j'
+        self.database = DEFAULT_DATABASE
         self.store_raw_episode_content = store_raw_episode_content
         if llm_client:
             self.llm_client = llm_client
@@ -141,6 +145,10 @@ class Graphiti:
             self.embedder = embedder
         else:
             self.embedder = OpenAIEmbedder()
+        if cross_encoder:
+            self.cross_encoder = cross_encoder
+        else:
+            self.cross_encoder = OpenAIRerankerClient()
     async def close(self):
         """
@@ -648,6 +656,7 @@ class Graphiti:
             await search(
                 self.driver,
                 self.embedder,
+                self.cross_encoder,
                 query,
                 group_ids,
                 search_config,
@@ -663,8 +672,18 @@ class Graphiti:
         config: SearchConfig,
         group_ids: list[str] | None = None,
         center_node_uuid: str | None = None,
+        bfs_origin_node_uuids: list[str] | None = None,
     ) -> SearchResults:
-        return await search(self.driver, self.embedder, query, group_ids, config, center_node_uuid)
+        return await search(
+            self.driver,
+            self.embedder,
+            self.cross_encoder,
+            query,
+            group_ids,
+            config,
+            center_node_uuid,
+            bfs_origin_node_uuids,
+        )
     async def get_nodes_by_query(
         self,
@@ -716,7 +735,13 @@ class Graphiti:
         nodes = (
             await search(
-                self.driver, self.embedder, query, group_ids, search_config, center_node_uuid
+                self.driver,
+                self.embedder,
+                self.cross_encoder,
+                query,
+                group_ids,
+                search_config,
+                center_node_uuid,
             )
         ).nodes
         return nodes

{graphiti_core-0.3.16 → graphiti_core-0.3.17}/graphiti_core/search/search.py RENAMED Viewed

@@ -21,6 +21,7 @@ from time import time
 from neo4j import AsyncDriver
+from graphiti_core.cross_encoder.client import CrossEncoderClient
 from graphiti_core.edges import EntityEdge
 from graphiti_core.embedder import EmbedderClient
 from graphiti_core.errors import SearchRerankerError
@@ -39,6 +40,7 @@ from graphiti_core.search.search_config import (
 from graphiti_core.search.search_utils import (
     community_fulltext_search,
     community_similarity_search,
+    edge_bfs_search,
     edge_fulltext_search,
     edge_similarity_search,
     episode_mentions_reranker,
@@ -55,40 +57,49 @@ logger = logging.getLogger(__name__)
 async def search(
     driver: AsyncDriver,
     embedder: EmbedderClient,
+    cross_encoder: CrossEncoderClient,
     query: str,
     group_ids: list[str] | None,
     config: SearchConfig,
     center_node_uuid: str | None = None,
+    bfs_origin_node_uuids: list[str] | None = None,
 ) -> SearchResults:
     start = time()
-    query = query.replace('\n', ' ')
+    query_vector = await embedder.create(input=[query.replace('\n', ' ')])
     # if group_ids is empty, set it to None
     group_ids = group_ids if group_ids else None
     edges, nodes, communities = await asyncio.gather(
         edge_search(
             driver,
-            embedder,
+            cross_encoder,
             query,
+            query_vector,
             group_ids,
             config.edge_config,
             center_node_uuid,
+            bfs_origin_node_uuids,
             config.limit,
         ),
         node_search(
             driver,
-            embedder,
+            cross_encoder,
             query,
+            query_vector,
             group_ids,
             config.node_config,
             center_node_uuid,
+            bfs_origin_node_uuids,
             config.limit,
         ),
         community_search(
             driver,
-            embedder,
+            cross_encoder,
             query,
+            query_vector,
             group_ids,
             config.community_config,
+            bfs_origin_node_uuids,
             config.limit,
         ),
     )
@@ -99,27 +110,27 @@ async def search(
         communities=communities,
     )
-    end = time()
+    latency = (time() - start) * 1000
-    logger.info(f'search returned context for query {query} in {(end - start) * 1000} ms')
+    logger.debug(f'search returned context for query {query} in {latency} ms')
     return results
 async def edge_search(
     driver: AsyncDriver,
-    embedder: EmbedderClient,
+    cross_encoder: CrossEncoderClient,
     query: str,
+    query_vector: list[float],
     group_ids: list[str] | None,
     config: EdgeSearchConfig | None,
     center_node_uuid: str | None = None,
+    bfs_origin_node_uuids: list[str] | None = None,
     limit=DEFAULT_SEARCH_LIMIT,
 ) -> list[EntityEdge]:
     if config is None:
         return []
-    query_vector = await embedder.create(input=[query])
     search_results: list[list[EntityEdge]] = list(
         await asyncio.gather(
             *[
@@ -127,6 +138,7 @@ async def edge_search(
                 edge_similarity_search(
                     driver, query_vector, None, None, group_ids, 2 * limit, config.sim_min_score
                 ),
+                edge_bfs_search(driver, bfs_origin_node_uuids, config.bfs_max_depth),
             ]
         )
     )
@@ -147,6 +159,10 @@ async def edge_search(
         reranked_uuids = maximal_marginal_relevance(
             query_vector, search_result_uuids_and_vectors, config.mmr_lambda
         )
+    elif config.reranker == EdgeReranker.cross_encoder:
+        fact_to_uuid_map = {edge.fact: edge.uuid for result in search_results for edge in result}
+        reranked_facts = await cross_encoder.rank(query, list(fact_to_uuid_map.keys()))
+        reranked_uuids = [fact_to_uuid_map[fact] for fact, _ in reranked_facts]
     elif config.reranker == EdgeReranker.node_distance:
         if center_node_uuid is None:
             raise SearchRerankerError('No center node provided for Node Distance reranker')
@@ -177,18 +193,18 @@ async def edge_search(
 async def node_search(
     driver: AsyncDriver,
-    embedder: EmbedderClient,
+    cross_encoder: CrossEncoderClient,
     query: str,
+    query_vector: list[float],
     group_ids: list[str] | None,
     config: NodeSearchConfig | None,
     center_node_uuid: str | None = None,
+    bfs_origin_node_uuids: list[str] | None = None,
     limit=DEFAULT_SEARCH_LIMIT,
 ) -> list[EntityNode]:
     if config is None:
         return []
-    query_vector = await embedder.create(input=[query])
     search_results: list[list[EntityNode]] = list(
         await asyncio.gather(
             *[
@@ -215,6 +231,12 @@ async def node_search(
         reranked_uuids = maximal_marginal_relevance(
             query_vector, search_result_uuids_and_vectors, config.mmr_lambda
         )
+    elif config.reranker == NodeReranker.cross_encoder:
+        summary_to_uuid_map = {
+            node.summary: node.uuid for result in search_results for node in result
+        }
+        reranked_summaries = await cross_encoder.rank(query, list(summary_to_uuid_map.keys()))
+        reranked_uuids = [summary_to_uuid_map[fact] for fact, _ in reranked_summaries]
     elif config.reranker == NodeReranker.episode_mentions:
         reranked_uuids = await episode_mentions_reranker(driver, search_result_uuids)
     elif config.reranker == NodeReranker.node_distance:
@@ -231,17 +253,17 @@ async def node_search(
 async def community_search(
     driver: AsyncDriver,
-    embedder: EmbedderClient,
+    cross_encoder: CrossEncoderClient,
     query: str,
+    query_vector: list[float],
     group_ids: list[str] | None,
     config: CommunitySearchConfig | None,
+    bfs_origin_node_uuids: list[str] | None = None,
     limit=DEFAULT_SEARCH_LIMIT,
 ) -> list[CommunityNode]:
     if config is None:
         return []
-    query_vector = await embedder.create(input=[query])
     search_results: list[list[CommunityNode]] = list(
         await asyncio.gather(
             *[
@@ -273,6 +295,12 @@ async def community_search(
         reranked_uuids = maximal_marginal_relevance(
             query_vector, search_result_uuids_and_vectors, config.mmr_lambda
         )
+    elif config.reranker == CommunityReranker.cross_encoder:
+        summary_to_uuid_map = {
+            node.summary: node.uuid for result in search_results for node in result
+        }
+        reranked_summaries = await cross_encoder.rank(query, list(summary_to_uuid_map.keys()))
+        reranked_uuids = [summary_to_uuid_map[fact] for fact, _ in reranked_summaries]
     reranked_communities = [community_uuid_map[uuid] for uuid in reranked_uuids]

{graphiti_core-0.3.16 → graphiti_core-0.3.17}/graphiti_core/search/search_config.py RENAMED Viewed

@@ -20,7 +20,11 @@ from pydantic import BaseModel, Field
 from graphiti_core.edges import EntityEdge
 from graphiti_core.nodes import CommunityNode, EntityNode
-from graphiti_core.search.search_utils import DEFAULT_MIN_SCORE, DEFAULT_MMR_LAMBDA
+from graphiti_core.search.search_utils import (
+    DEFAULT_MIN_SCORE,
+    DEFAULT_MMR_LAMBDA,
+    MAX_SEARCH_DEPTH,
+)
 DEFAULT_SEARCH_LIMIT = 10
@@ -28,11 +32,13 @@ DEFAULT_SEARCH_LIMIT = 10
 class EdgeSearchMethod(Enum):
     cosine_similarity = 'cosine_similarity'
     bm25 = 'bm25'
+    bfs = 'breadth_first_search'
 class NodeSearchMethod(Enum):
     cosine_similarity = 'cosine_similarity'
     bm25 = 'bm25'
+    bfs = 'breadth_first_search'
 class CommunitySearchMethod(Enum):
@@ -45,6 +51,7 @@ class EdgeReranker(Enum):
     node_distance = 'node_distance'
     episode_mentions = 'episode_mentions'
     mmr = 'mmr'
+    cross_encoder = 'cross_encoder'
 class NodeReranker(Enum):
@@ -52,11 +59,13 @@ class NodeReranker(Enum):
     node_distance = 'node_distance'
     episode_mentions = 'episode_mentions'
     mmr = 'mmr'
+    cross_encoder = 'cross_encoder'
 class CommunityReranker(Enum):
     rrf = 'reciprocal_rank_fusion'
     mmr = 'mmr'
+    cross_encoder = 'cross_encoder'
 class EdgeSearchConfig(BaseModel):
@@ -64,6 +73,7 @@ class EdgeSearchConfig(BaseModel):
     reranker: EdgeReranker = Field(default=EdgeReranker.rrf)
     sim_min_score: float = Field(default=DEFAULT_MIN_SCORE)
     mmr_lambda: float = Field(default=DEFAULT_MMR_LAMBDA)
+    bfs_max_depth: int = Field(default=MAX_SEARCH_DEPTH)
 class NodeSearchConfig(BaseModel):
@@ -71,6 +81,7 @@ class NodeSearchConfig(BaseModel):
     reranker: NodeReranker = Field(default=NodeReranker.rrf)
     sim_min_score: float = Field(default=DEFAULT_MIN_SCORE)
     mmr_lambda: float = Field(default=DEFAULT_MMR_LAMBDA)
+    bfs_max_depth: int = Field(default=MAX_SEARCH_DEPTH)
 class CommunitySearchConfig(BaseModel):
@@ -78,6 +89,7 @@ class CommunitySearchConfig(BaseModel):
     reranker: CommunityReranker = Field(default=CommunityReranker.rrf)
     sim_min_score: float = Field(default=DEFAULT_MIN_SCORE)
     mmr_lambda: float = Field(default=DEFAULT_MMR_LAMBDA)
+    bfs_max_depth: int = Field(default=MAX_SEARCH_DEPTH)
 class SearchConfig(BaseModel):

{graphiti_core-0.3.16 → graphiti_core-0.3.17}/graphiti_core/search/search_config_recipes.py RENAMED Viewed

@@ -48,14 +48,41 @@ COMBINED_HYBRID_SEARCH_MMR = SearchConfig(
     edge_config=EdgeSearchConfig(
         search_methods=[EdgeSearchMethod.bm25, EdgeSearchMethod.cosine_similarity],
         reranker=EdgeReranker.mmr,
+        mmr_lambda=1,
     ),
     node_config=NodeSearchConfig(
         search_methods=[NodeSearchMethod.bm25, NodeSearchMethod.cosine_similarity],
         reranker=NodeReranker.mmr,
+        mmr_lambda=1,
     ),
     community_config=CommunitySearchConfig(
         search_methods=[CommunitySearchMethod.bm25, CommunitySearchMethod.cosine_similarity],
         reranker=CommunityReranker.mmr,
+        mmr_lambda=1,
+    ),
+)
+# Performs a full-text search, similarity search, and bfs with cross_encoder reranking over edges, nodes, and communities
+COMBINED_HYBRID_SEARCH_CROSS_ENCODER = SearchConfig(
+    edge_config=EdgeSearchConfig(
+        search_methods=[
+            EdgeSearchMethod.bm25,
+            EdgeSearchMethod.cosine_similarity,
+            EdgeSearchMethod.bfs,
+        ],
+        reranker=EdgeReranker.cross_encoder,
+    ),
+    node_config=NodeSearchConfig(
+        search_methods=[
+            NodeSearchMethod.bm25,
+            NodeSearchMethod.cosine_similarity,
+            NodeSearchMethod.bfs,
+        ],
+        reranker=NodeReranker.cross_encoder,
+    ),
+    community_config=CommunitySearchConfig(
+        search_methods=[CommunitySearchMethod.bm25, CommunitySearchMethod.cosine_similarity],
+        reranker=CommunityReranker.cross_encoder,
     ),
 )
@@ -81,7 +108,6 @@ EDGE_HYBRID_SEARCH_NODE_DISTANCE = SearchConfig(
         search_methods=[EdgeSearchMethod.bm25, EdgeSearchMethod.cosine_similarity],
         reranker=EdgeReranker.node_distance,
     ),
-    limit=30,
 )
 # performs a hybrid search over edges with episode mention reranking

graphiti-core 0.3.16__tar.gz → 0.3.17__tar.gz

Potentially problematic release.

graphiti-core 0.3.16tar.gz → 0.3.17tar.gz