PyPI - graphiti-core - Versions diffs - 0.11.4__py3-none-any.whl → 0.11.6__py3-none-any.whl - Mend

graphiti-core 0.11.4py3-none-any.whl → 0.11.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (20) hide show

graphiti_core/edges.py +18 -4
graphiti_core/graphiti.py +12 -10
graphiti_core/helpers.py +8 -10
graphiti_core/llm_client/anthropic_client.py +0 -13
graphiti_core/nodes.py +30 -7
graphiti_core/prompts/dedupe_edges.py +44 -1
graphiti_core/prompts/dedupe_nodes.py +85 -12
graphiti_core/prompts/extract_nodes.py +1 -1
graphiti_core/prompts/invalidate_edges.py +1 -1
graphiti_core/prompts/summarize_nodes.py +4 -4
graphiti_core/search/search.py +25 -42
graphiti_core/search/search_utils.py +117 -20
graphiti_core/utils/bulk_utils.py +15 -1
graphiti_core/utils/maintenance/community_operations.py +0 -2
graphiti_core/utils/maintenance/edge_operations.py +63 -15
graphiti_core/utils/maintenance/node_operations.py +78 -35
{graphiti_core-0.11.4.dist-info → graphiti_core-0.11.6.dist-info}/METADATA +1 -2
{graphiti_core-0.11.4.dist-info → graphiti_core-0.11.6.dist-info}/RECORD +20 -20
{graphiti_core-0.11.4.dist-info → graphiti_core-0.11.6.dist-info}/LICENSE +0 -0
{graphiti_core-0.11.4.dist-info → graphiti_core-0.11.6.dist-info}/WHEEL +0 -0

graphiti_core/edges.py CHANGED Viewed

@@ -46,7 +46,6 @@ ENTITY_EDGE_RETURN: LiteralString = """
             e.name AS name,
             e.group_id AS group_id,
             e.fact AS fact,
-            e.fact_embedding AS fact_embedding,
             e.episodes AS episodes,
             e.expired_at AS expired_at,
             e.valid_at AS valid_at,
@@ -222,6 +221,20 @@ class EntityEdge(Edge):
         return self.fact_embedding
+    async def load_fact_embedding(self, driver: AsyncDriver):
+        query: LiteralString = """
+            MATCH (n:Entity)-[e:RELATES_TO {uuid: $uuid}]->(m:Entity)
+            RETURN e.fact_embedding AS fact_embedding
+        """
+        records, _, _ = await driver.execute_query(
+            query, uuid=self.uuid, database_=DEFAULT_DATABASE, routing_='r'
+        )
+        if len(records) == 0:
+            raise EdgeNotFoundError(self.uuid)
+        self.fact_embedding = records[0]['fact_embedding']
     async def save(self, driver: AsyncDriver):
         result = await driver.execute_query(
             ENTITY_EDGE_SAVE,
@@ -321,8 +334,8 @@ class EntityEdge(Edge):
     async def get_by_node_uuid(cls, driver: AsyncDriver, node_uuid: str):
         query: LiteralString = (
             """
-                        MATCH (n:Entity {uuid: $node_uuid})-[e:RELATES_TO]-(m:Entity)
-                        """
+                                        MATCH (n:Entity {uuid: $node_uuid})-[e:RELATES_TO]-(m:Entity)
+                                        """
             + ENTITY_EDGE_RETURN
         )
         records, _, _ = await driver.execute_query(
@@ -452,7 +465,6 @@ def get_entity_edge_from_record(record: Any) -> EntityEdge:
         name=record['name'],
         group_id=record['group_id'],
         episodes=record['episodes'],
-        fact_embedding=record['fact_embedding'],
         created_at=record['created_at'].to_native(),
         expired_at=parse_db_date(record['expired_at']),
         valid_at=parse_db_date(record['valid_at']),
@@ -471,6 +483,8 @@ def get_community_edge_from_record(record: Any):
 async def create_entity_edge_embeddings(embedder: EmbedderClient, edges: list[EntityEdge]):
+    if len(edges) == 0:
+        return
     fact_embeddings = await embedder.create_batch([edge.fact for edge in edges])
     for edge, fact_embedding in zip(edges, fact_embeddings, strict=True):
         edge.fact_embedding = fact_embedding

graphiti_core/graphiti.py CHANGED Viewed

@@ -41,6 +41,7 @@ from graphiti_core.search.search_config_recipes import (
 from graphiti_core.search.search_filters import SearchFilters
 from graphiti_core.search.search_utils import (
     RELEVANT_SCHEMA_LIMIT,
+    get_edge_invalidation_candidates,
     get_mentioned_nodes,
     get_relevant_edges,
 )
@@ -62,9 +63,8 @@ from graphiti_core.utils.maintenance.community_operations import (
 )
 from graphiti_core.utils.maintenance.edge_operations import (
     build_episodic_edges,
-    dedupe_extracted_edge,
     extract_edges,
-    resolve_edge_contradictions,
+    resolve_extracted_edge,
     resolve_extracted_edges,
 )
 from graphiti_core.utils.maintenance.graph_data_operations import (
@@ -77,7 +77,6 @@ from graphiti_core.utils.maintenance.node_operations import (
     extract_nodes,
     resolve_extracted_nodes,
 )
-from graphiti_core.utils.maintenance.temporal_operations import get_edge_contradictions
 from graphiti_core.utils.ontology_utils.entity_types_utils import validate_entity_types
 logger = logging.getLogger(__name__)
@@ -380,6 +379,7 @@ class Graphiti:
                 resolve_extracted_edges(
                     self.clients,
                     edges,
+                    episode,
                 ),
                 extract_attributes_from_nodes(
                     self.clients, nodes, episode, previous_episodes, entity_types
@@ -396,7 +396,7 @@ class Graphiti:
                 episode.content = ''
             await add_nodes_and_edges_bulk(
-                self.driver, [episode], episodic_edges, hydrated_nodes, entity_edges
+                self.driver, [episode], episodic_edges, hydrated_nodes, entity_edges, self.embedder
             )
             # Update any communities
@@ -680,15 +680,17 @@ class Graphiti:
         updated_edge = resolve_edge_pointers([edge], uuid_map)[0]
-        related_edges = await get_relevant_edges(self.driver, [updated_edge], SearchFilters(), 0.8)
-        resolved_edge = await dedupe_extracted_edge(self.llm_client, updated_edge, related_edges[0])
+        related_edges = (await get_relevant_edges(self.driver, [updated_edge], SearchFilters()))[0]
+        existing_edges = (
+            await get_edge_invalidation_candidates(self.driver, [updated_edge], SearchFilters())
+        )[0]
-        contradicting_edges = await get_edge_contradictions(self.llm_client, edge, related_edges[0])
-        invalidated_edges = resolve_edge_contradictions(resolved_edge, contradicting_edges)
+        resolved_edge, invalidated_edges = await resolve_extracted_edge(
+            self.llm_client, updated_edge, related_edges, existing_edges
+        )
         await add_nodes_and_edges_bulk(
-            self.driver, [], [], resolved_nodes, [resolved_edge] + invalidated_edges
+            self.driver, [], [], resolved_nodes, [resolved_edge] + invalidated_edges, self.embedder
         )
     async def remove_episode(self, episode_uuid: str):

graphiti_core/helpers.py CHANGED Viewed

@@ -22,6 +22,7 @@ from datetime import datetime
 import numpy as np
 from dotenv import load_dotenv
 from neo4j import time as neo4j_time
+from numpy._typing import NDArray
 from typing_extensions import LiteralString
 load_dotenv()
@@ -78,20 +79,17 @@ def lucene_sanitize(query: str) -> str:
     return sanitized
-def normalize_l2(embedding: list[float]):
+def normalize_l2(embedding: list[float]) -> NDArray:
     embedding_array = np.array(embedding)
-    if embedding_array.ndim == 1:
-        norm = np.linalg.norm(embedding_array)
-        if norm == 0:
-            return [0.0] * len(embedding)
-        return (embedding_array / norm).tolist()
-    else:
-        norm = np.linalg.norm(embedding_array, 2, axis=1, keepdims=True)
-        return (np.where(norm == 0, embedding_array, embedding_array / norm)).tolist()
+    norm = np.linalg.norm(embedding_array, 2, axis=0, keepdims=True)
+    return np.where(norm == 0, embedding_array, embedding_array / norm)
 # Use this instead of asyncio.gather() to bound coroutines
-async def semaphore_gather(*coroutines: Coroutine, max_coroutines: int = SEMAPHORE_LIMIT):
+async def semaphore_gather(
+    *coroutines: Coroutine,
+    max_coroutines: int = SEMAPHORE_LIMIT,
+):
     semaphore = asyncio.Semaphore(max_coroutines)
     async def _wrap_coroutine(coroutine):

graphiti_core/llm_client/anthropic_client.py CHANGED Viewed

@@ -139,15 +139,11 @@ class AnthropicClient(LLMClient):
             A list containing a single tool definition for use with the Anthropic API.
         """
         if response_model is not None:
-            # temporary debug log
-            logger.info(f'Creating tool for response_model: {response_model}')
             # Use the response_model to define the tool
             model_schema = response_model.model_json_schema()
             tool_name = response_model.__name__
             description = model_schema.get('description', f'Extract {tool_name} information')
         else:
-            # temporary debug log
-            logger.info('Creating generic JSON output tool')
             # Create a generic JSON output tool
             tool_name = 'generic_json_output'
             description = 'Output data in JSON format'
@@ -205,8 +201,6 @@ class AnthropicClient(LLMClient):
         try:
             # Create the appropriate tool based on whether response_model is provided
             tools, tool_choice = self._create_tool(response_model)
-            # temporary debug log
-            logger.info(f'using model: {self.model} with max_tokens: {self.max_tokens}')
             result = await self.client.messages.create(
                 system=system_message.content,
                 max_tokens=max_creation_tokens,
@@ -227,13 +221,6 @@ class AnthropicClient(LLMClient):
                     return tool_args
             # If we didn't get a proper tool_use response, try to extract from text
-            # logger.debug(
-            #     f'Did not get a tool_use response, trying to extract json from text. Result: {result.content}'
-            # )
-            # temporary debug log
-            logger.info(
-                f'Did not get a tool_use response, trying to extract json from text. Result: {result.content}'
-            )
             for content_item in result.content:
                 if content_item.type == 'text':
                     return self._extract_json_from_text(content_item.text)

graphiti_core/nodes.py CHANGED Viewed

@@ -42,7 +42,6 @@ ENTITY_NODE_RETURN: LiteralString = """
         RETURN
             n.uuid As uuid,
             n.name AS name,
-            n.name_embedding AS name_embedding,
             n.group_id AS group_id,
             n.created_at AS created_at,
             n.summary AS summary,
@@ -305,6 +304,20 @@ class EntityNode(Node):
         return self.name_embedding
+    async def load_name_embedding(self, driver: AsyncDriver):
+        query: LiteralString = """
+            MATCH (n:Entity {uuid: $uuid})
+            RETURN n.name_embedding AS name_embedding
+        """
+        records, _, _ = await driver.execute_query(
+            query, uuid=self.uuid, database_=DEFAULT_DATABASE, routing_='r'
+        )
+        if len(records) == 0:
+            raise NodeNotFoundError(self.uuid)
+        self.name_embedding = records[0]['name_embedding']
     async def save(self, driver: AsyncDriver):
         entity_data: dict[str, Any] = {
             'uuid': self.uuid,
@@ -332,8 +345,8 @@ class EntityNode(Node):
     async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
         query = (
             """
-                                    MATCH (n:Entity {uuid: $uuid})
-                                    """
+                                                        MATCH (n:Entity {uuid: $uuid})
+                                                        """
             + ENTITY_NODE_RETURN
         )
         records, _, _ = await driver.execute_query(
@@ -428,6 +441,20 @@ class CommunityNode(Node):
         return self.name_embedding
+    async def load_name_embedding(self, driver: AsyncDriver):
+        query: LiteralString = """
+            MATCH (c:Community {uuid: $uuid})
+            RETURN c.name_embedding AS name_embedding
+        """
+        records, _, _ = await driver.execute_query(
+            query, uuid=self.uuid, database_=DEFAULT_DATABASE, routing_='r'
+        )
+        if len(records) == 0:
+            raise NodeNotFoundError(self.uuid)
+        self.name_embedding = records[0]['name_embedding']
     @classmethod
     async def get_by_uuid(cls, driver: AsyncDriver, uuid: str):
         records, _, _ = await driver.execute_query(
@@ -436,7 +463,6 @@ class CommunityNode(Node):
         RETURN
             n.uuid As uuid,
             n.name AS name,
-            n.name_embedding AS name_embedding,
             n.group_id AS group_id,
             n.created_at AS created_at,
             n.summary AS summary
@@ -461,7 +487,6 @@ class CommunityNode(Node):
         RETURN
             n.uuid As uuid,
             n.name AS name,
-            n.name_embedding AS name_embedding,
             n.group_id AS group_id,
             n.created_at AS created_at,
             n.summary AS summary
@@ -495,7 +520,6 @@ class CommunityNode(Node):
         RETURN
             n.uuid As uuid,
             n.name AS name,
-            n.name_embedding AS name_embedding,
             n.group_id AS group_id,
             n.created_at AS created_at,
             n.summary AS summary
@@ -534,7 +558,6 @@ def get_entity_node_from_record(record: Any) -> EntityNode:
         uuid=record['uuid'],
         name=record['name'],
         group_id=record['group_id'],
-        name_embedding=record['name_embedding'],
         labels=record['labels'],
         created_at=record['created_at'].to_native(),
         summary=record['summary'],

graphiti_core/prompts/dedupe_edges.py CHANGED Viewed

@@ -27,6 +27,10 @@ class EdgeDuplicate(BaseModel):
         ...,
         description='id of the duplicate fact. If no duplicate facts are found, default to -1.',
     )
+    contradicted_facts: list[int] = Field(
+        ...,
+        description='List of ids of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
+    )
 class UniqueFact(BaseModel):
@@ -41,11 +45,13 @@ class UniqueFacts(BaseModel):
 class Prompt(Protocol):
     edge: PromptVersion
     edge_list: PromptVersion
+    resolve_edge: PromptVersion
 class Versions(TypedDict):
     edge: PromptFunction
     edge_list: PromptFunction
+    resolve_edge: PromptFunction
 def edge(context: dict[str, Any]) -> list[Message]:
@@ -106,4 +112,41 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
     ]
-versions: Versions = {'edge': edge, 'edge_list': edge_list}
+def resolve_edge(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that de-duplicates facts from fact lists and determines which existing '
+            'facts are contradicted by the new fact.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        <NEW FACT>
+        {context['new_edge']}
+        </NEW FACT>
+        <EXISTING FACTS>
+        {context['existing_edges']}
+        </EXISTING FACTS>
+        <FACT INVALIDATION CANDIDATES>
+        {context['edge_invalidation_candidates']}
+        </FACT INVALIDATION CANDIDATES>
+        Task:
+        If the NEW FACT represents the same factual information as any fact in EXISTING FACTS, return the idx of the duplicate fact.
+        If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return -1.
+        Based on the provided FACT INVALIDATION CANDIDATES and NEW FACT, determine which existing facts the new fact contradicts.
+        Return a list containing all idx's of the facts that are contradicted by the NEW FACT.
+        If there are no contradicted facts, return an empty list.
+        Guidelines:
+        1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
+        """,
+        ),
+    ]
+versions: Versions = {'edge': edge, 'edge_list': edge_list, 'resolve_edge': resolve_edge}

graphiti_core/prompts/dedupe_nodes.py CHANGED Viewed

@@ -23,28 +23,38 @@ from .models import Message, PromptFunction, PromptVersion
 class NodeDuplicate(BaseModel):
-    duplicate_node_id: int = Field(
+    id: int = Field(..., description='integer id of the entity')
+    duplicate_idx: int = Field(
         ...,
-        description='id of the duplicate node. If no duplicate nodes are found, default to -1.',
+        description='idx of the duplicate node. If no duplicate nodes are found, default to -1.',
     )
-    name: str = Field(..., description='Name of the entity.')
+    name: str = Field(
+        ...,
+        description='Name of the entity. Should be the most complete and descriptive name possible.',
+    )
+class NodeResolutions(BaseModel):
+    entity_resolutions: list[NodeDuplicate] = Field(..., description='List of resolved nodes')
 class Prompt(Protocol):
     node: PromptVersion
     node_list: PromptVersion
+    nodes: PromptVersion
 class Versions(TypedDict):
     node: PromptFunction
     node_list: PromptFunction
+    nodes: PromptFunction
 def node(context: dict[str, Any]) -> list[Message]:
     return [
         Message(
             role='system',
-            content='You are a helpful assistant that de-duplicates entities from entity lists.',
+            content='You are a helpful assistant that determines whether or not a NEW ENTITY is a duplicate of any EXISTING ENTITIES.',
         ),
         Message(
             role='user',
@@ -69,19 +79,82 @@ def node(context: dict[str, Any]) -> list[Message]:
         Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
         is a duplicate entity of one of the EXISTING ENTITIES.
-        The ENTITY TYPE DESCRIPTION gives more insight into what the entity type means for the NEW ENTITY.
+        Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
+        Do NOT mark entities as duplicates if:
+        - They are related but distinct.
+        - They have similar names or purposes but refer to separate instances or concepts.
         Task:
         If the NEW ENTITY represents a duplicate entity of any entity in EXISTING ENTITIES, set duplicate_entity_id to the
-        id of the EXISTING ENTITY that is the duplicate. If the NEW ENTITY is not a duplicate of any of the EXISTING ENTITIES,
+        id of the EXISTING ENTITY that is the duplicate.
+        If the NEW ENTITY is not a duplicate of any of the EXISTING ENTITIES,
         duplicate_entity_id should be set to -1.
-        Also return the most complete name for the entity.
+        Also return the name that best describes the NEW ENTITY (whether it is the name of the NEW ENTITY, a node it
+        is a duplicate of, or a combination of the two).
+        """,
+        ),
+    ]
-        Guidelines:
-        1. Entities with the same name should be considered duplicates
-        2. Duplicate entities may refer to the same real-world entity even if names differ. Use context clues from the MESSAGES
-            to determine if the NEW ENTITY represents a duplicate entity of one of the EXISTING ENTITIES.
+def nodes(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that determines whether or not ENTITIES extracted from a conversation are duplicates'
+            'of existing entities.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        <PREVIOUS MESSAGES>
+        {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+        </PREVIOUS MESSAGES>
+        <CURRENT MESSAGE>
+        {context['episode_content']}
+        </CURRENT MESSAGE>
+        Each of the following ENTITIES were extracted from the CURRENT MESSAGE.
+        Each entity in ENTITIES is represented as a JSON object with the following structure:
+        {{
+            id: integer id of the entity,
+            name: "name of the entity",
+            entity_type: "ontological classification of the entity",
+            entity_type_description: "Description of what the entity type represents",
+            duplication_candidates: [
+                {{
+                    idx: integer index of the candidate entity,
+                    name: "name of the candidate entity",
+                    entity_type: "ontological classification of the candidate entity",
+                    ...<additional attributes>
+                }}
+            ]
+        }}
+        <ENTITIES>
+        {json.dumps(context['extracted_nodes'], indent=2)}
+        </ENTITIES>
+        For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
+        Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
+        Do NOT mark entities as duplicates if:
+        - They are related but distinct.
+        - They have similar names or purposes but refer to separate instances or concepts.
+        Task:
+        Your response will be a list called entity_resolutions which contains one entry for each entity.
+        For each entity, return the id of the entity as id, the name of the entity as name, and the duplicate_idx
+        as an integer.
+        - If an entity is a duplicate of one of its duplication_candidates, return the idx of the candidate it is a
+        duplicate of.
+        - If an entity is not a duplicate of one of its duplication candidates, return the -1 as the duplication_idx
         """,
         ),
     ]
@@ -124,4 +197,4 @@ def node_list(context: dict[str, Any]) -> list[Message]:
     ]
-versions: Versions = {'node': node, 'node_list': node_list}
+versions: Versions = {'node': node, 'node_list': node_list, 'nodes': nodes}

graphiti_core/prompts/extract_nodes.py CHANGED Viewed

@@ -256,7 +256,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
         1. Do not hallucinate entity property values if they cannot be found in the current context.
         2. Only use the provided MESSAGES and ENTITY to set attribute values.
         3. The summary attribute represents a summary of the ENTITY, and should be updated with new information about the Entity from the MESSAGES.
-            Summaries must be no longer than 500 words.
+            Summaries must be no longer than 250 words.
         <ENTITY>
         {context['node']}

graphiti_core/prompts/invalidate_edges.py CHANGED Viewed

@@ -24,7 +24,7 @@ from .models import Message, PromptFunction, PromptVersion
 class InvalidatedEdges(BaseModel):
     contradicted_facts: list[int] = Field(
         ...,
-        description='List of ids of facts that be should invalidated. If no facts should be invalidated, the list should be empty.',
+        description='List of ids of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
     )

graphiti_core/prompts/summarize_nodes.py CHANGED Viewed

@@ -25,7 +25,7 @@ from .models import Message, PromptFunction, PromptVersion
 class Summary(BaseModel):
     summary: str = Field(
         ...,
-        description='Summary containing the important information about the entity. Under 500 words',
+        description='Summary containing the important information about the entity. Under 250 words',
     )
@@ -56,7 +56,7 @@ def summarize_pair(context: dict[str, Any]) -> list[Message]:
             content=f"""
         Synthesize the information from the following two summaries into a single succinct summary.
-        Summaries must be under 500 words.
+        Summaries must be under 250 words.
         Summaries:
         {json.dumps(context['node_summaries'], indent=2)}
@@ -82,7 +82,7 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
         Given the above MESSAGES and the following ENTITY name, create a summary for the ENTITY. Your summary must only use
         information from the provided MESSAGES. Your summary should also only contain information relevant to the
-        provided ENTITY. Summaries must be under 500 words.
+        provided ENTITY. Summaries must be under 250 words.
         In addition, extract any values for the provided entity properties based on their descriptions.
         If the value of the entity property cannot be found in the current context, set the value of the property to the Python value None.
@@ -117,7 +117,7 @@ def summary_description(context: dict[str, Any]) -> list[Message]:
             role='user',
             content=f"""
         Create a short one sentence description of the summary that explains what kind of information is summarized.
-        Summaries must be under 500 words.
+        Summaries must be under 250 words.
         Summary:
         {json.dumps(context['summary'], indent=2)}

graphiti_core/search/search.py CHANGED Viewed

@@ -50,6 +50,9 @@ from graphiti_core.search.search_utils import (
     edge_similarity_search,
     episode_fulltext_search,
     episode_mentions_reranker,
+    get_embeddings_for_communities,
+    get_embeddings_for_edges,
+    get_embeddings_for_nodes,
     maximal_marginal_relevance,
     node_bfs_search,
     node_distance_reranker,
@@ -209,23 +212,17 @@ async def edge_search(
         reranked_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
     elif config.reranker == EdgeReranker.mmr:
-        search_result_uuids_and_vectors = [
-            (edge.uuid, edge.fact_embedding if edge.fact_embedding is not None else [0.0] * 1024)
-            for result in search_results
-            for edge in result
-        ]
+        search_result_uuids_and_vectors = await get_embeddings_for_edges(
+            driver, list(edge_uuid_map.values())
+        )
         reranked_uuids = maximal_marginal_relevance(
             query_vector,
             search_result_uuids_and_vectors,
             config.mmr_lambda,
+            reranker_min_score,
         )
     elif config.reranker == EdgeReranker.cross_encoder:
-        search_result_uuids = [[edge.uuid for edge in result] for result in search_results]
-        rrf_result_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
-        rrf_edges = [edge_uuid_map[uuid] for uuid in rrf_result_uuids][:limit]
-        fact_to_uuid_map = {edge.fact: edge.uuid for edge in rrf_edges}
+        fact_to_uuid_map = {edge.fact: edge.uuid for edge in list(edge_uuid_map.values())[:limit]}
         reranked_facts = await cross_encoder.rank(query, list(fact_to_uuid_map.keys()))
         reranked_uuids = [
             fact_to_uuid_map[fact] for fact, score in reranked_facts if score >= reranker_min_score
@@ -308,27 +305,23 @@ async def node_search(
     if config.reranker == NodeReranker.rrf:
         reranked_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
     elif config.reranker == NodeReranker.mmr:
-        search_result_uuids_and_vectors = [
-            (node.uuid, node.name_embedding if node.name_embedding is not None else [0.0] * 1024)
-            for result in search_results
-            for node in result
-        ]
+        search_result_uuids_and_vectors = await get_embeddings_for_nodes(
+            driver, list(node_uuid_map.values())
+        )
         reranked_uuids = maximal_marginal_relevance(
             query_vector,
             search_result_uuids_and_vectors,
             config.mmr_lambda,
+            reranker_min_score,
         )
     elif config.reranker == NodeReranker.cross_encoder:
-        # use rrf as a preliminary reranker
-        rrf_result_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
-        rrf_results = [node_uuid_map[uuid] for uuid in rrf_result_uuids][:limit]
-        summary_to_uuid_map = {node.summary: node.uuid for node in rrf_results}
+        name_to_uuid_map = {node.name: node.uuid for node in list(node_uuid_map.values())}
-        reranked_summaries = await cross_encoder.rank(query, list(summary_to_uuid_map.keys()))
+        reranked_node_names = await cross_encoder.rank(query, list(name_to_uuid_map.keys()))
         reranked_uuids = [
-            summary_to_uuid_map[fact]
-            for fact, score in reranked_summaries
+            name_to_uuid_map[name]
+            for name, score in reranked_node_names
             if score >= reranker_min_score
         ]
     elif config.reranker == NodeReranker.episode_mentions:
@@ -431,28 +424,18 @@ async def community_search(
     if config.reranker == CommunityReranker.rrf:
         reranked_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
     elif config.reranker == CommunityReranker.mmr:
-        search_result_uuids_and_vectors = [
-            (
-                community.uuid,
-                community.name_embedding if community.name_embedding is not None else [0.0] * 1024,
-            )
-            for result in search_results
-            for community in result
-        ]
+        search_result_uuids_and_vectors = await get_embeddings_for_communities(
+            driver, list(community_uuid_map.values())
+        )
         reranked_uuids = maximal_marginal_relevance(
-            query_vector,
-            search_result_uuids_and_vectors,
-            config.mmr_lambda,
+            query_vector, search_result_uuids_and_vectors, config.mmr_lambda, reranker_min_score
         )
     elif config.reranker == CommunityReranker.cross_encoder:
-        summary_to_uuid_map = {
-            node.summary: node.uuid for result in search_results for node in result
-        }
-        reranked_summaries = await cross_encoder.rank(query, list(summary_to_uuid_map.keys()))
+        name_to_uuid_map = {node.name: node.uuid for result in search_results for node in result}
+        reranked_nodes = await cross_encoder.rank(query, list(name_to_uuid_map.keys()))
         reranked_uuids = [
-            summary_to_uuid_map[fact]
-            for fact, score in reranked_summaries
-            if score >= reranker_min_score
+            name_to_uuid_map[name] for name, score in reranked_nodes if score >= reranker_min_score
         ]
     reranked_communities = [community_uuid_map[uuid] for uuid in reranked_uuids]

graphiti-core 0.11.4__py3-none-any.whl → 0.11.6__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.11.4py3-none-any.whl → 0.11.6py3-none-any.whl