PyPI - graphiti-core - Versions diffs - 0.11.6rc9__tar.gz → 0.12.0rc1__tar.gz - Mend

graphiti-core 0.11.6rc9tar.gz → 0.12.0rc1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (66) hide show

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: graphiti-core
-Version: 0.11.6rc9
+Version: 0.12.0rc1
 Summary: A temporal graph building library
 License: Apache-2.0
 Author: Paul Paliychuk

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/edges.py RENAMED Viewed

@@ -49,7 +49,9 @@ ENTITY_EDGE_RETURN: LiteralString = """
             e.episodes AS episodes,
             e.expired_at AS expired_at,
             e.valid_at AS valid_at,
-            e.invalid_at AS invalid_at"""
+            e.invalid_at AS invalid_at,
+            properties(e) AS attributes
+            """
 class Edge(BaseModel, ABC):
@@ -209,6 +211,9 @@ class EntityEdge(Edge):
     invalid_at: datetime | None = Field(
         default=None, description='datetime of when the fact stopped being true'
     )
+    attributes: dict[str, Any] = Field(
+        default={}, description='Additional attributes of the edge. Dependent on edge name'
+    )
     async def generate_embedding(self, embedder: EmbedderClient):
         start = time()
@@ -236,20 +241,26 @@ class EntityEdge(Edge):
         self.fact_embedding = records[0]['fact_embedding']
     async def save(self, driver: AsyncDriver):
+        edge_data: dict[str, Any] = {
+            'source_uuid': self.source_node_uuid,
+            'target_uuid': self.target_node_uuid,
+            'uuid': self.uuid,
+            'name': self.name,
+            'group_id': self.group_id,
+            'fact': self.fact,
+            'fact_embedding': self.fact_embedding,
+            'episodes': self.episodes,
+            'created_at': self.created_at,
+            'expired_at': self.expired_at,
+            'valid_at': self.valid_at,
+            'invalid_at': self.invalid_at,
+        }
+        edge_data.update(self.attributes or {})
         result = await driver.execute_query(
             ENTITY_EDGE_SAVE,
-            source_uuid=self.source_node_uuid,
-            target_uuid=self.target_node_uuid,
-            uuid=self.uuid,
-            name=self.name,
-            group_id=self.group_id,
-            fact=self.fact,
-            fact_embedding=self.fact_embedding,
-            episodes=self.episodes,
-            created_at=self.created_at,
-            expired_at=self.expired_at,
-            valid_at=self.valid_at,
-            invalid_at=self.invalid_at,
+            edge_data=edge_data,
             database_=DEFAULT_DATABASE,
         )
@@ -334,8 +345,8 @@ class EntityEdge(Edge):
     async def get_by_node_uuid(cls, driver: AsyncDriver, node_uuid: str):
         query: LiteralString = (
             """
-                                        MATCH (n:Entity {uuid: $node_uuid})-[e:RELATES_TO]-(m:Entity)
-                                        """
+                                            MATCH (n:Entity {uuid: $node_uuid})-[e:RELATES_TO]-(m:Entity)
+                                            """
             + ENTITY_EDGE_RETURN
         )
         records, _, _ = await driver.execute_query(
@@ -457,7 +468,7 @@ def get_episodic_edge_from_record(record: Any) -> EpisodicEdge:
 def get_entity_edge_from_record(record: Any) -> EntityEdge:
-    return EntityEdge(
+    edge = EntityEdge(
         uuid=record['uuid'],
         source_node_uuid=record['source_node_uuid'],
         target_node_uuid=record['target_node_uuid'],
@@ -469,8 +480,23 @@ def get_entity_edge_from_record(record: Any) -> EntityEdge:
         expired_at=parse_db_date(record['expired_at']),
         valid_at=parse_db_date(record['valid_at']),
         invalid_at=parse_db_date(record['invalid_at']),
+        attributes=record['attributes'],
     )
+    edge.attributes.pop('uuid', None)
+    edge.attributes.pop('source_node_uuid', None)
+    edge.attributes.pop('target_node_uuid', None)
+    edge.attributes.pop('fact', None)
+    edge.attributes.pop('name', None)
+    edge.attributes.pop('group_id', None)
+    edge.attributes.pop('episodes', None)
+    edge.attributes.pop('created_at', None)
+    edge.attributes.pop('expired_at', None)
+    edge.attributes.pop('valid_at', None)
+    edge.attributes.pop('invalid_at', None)
+    return edge
 def get_community_edge_from_record(record: Any):
     return CommunityEdge(

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/embedder/gemini.py RENAMED Viewed

@@ -61,18 +61,29 @@ class GeminiEmbedder(EmbedderClient):
         # Generate embeddings
         result = await self.client.aio.models.embed_content(
             model=self.config.embedding_model or DEFAULT_EMBEDDING_MODEL,
-            contents=[input_data],
+            contents=[input_data],  # type: ignore[arg-type]  # mypy fails on broad union type
             config=types.EmbedContentConfig(output_dimensionality=self.config.embedding_dim),
         )
+        if not result.embeddings or len(result.embeddings) == 0 or not result.embeddings[0].values:
+            raise ValueError('No embeddings returned from Gemini API in create()')
         return result.embeddings[0].values
     async def create_batch(self, input_data_list: list[str]) -> list[list[float]]:
         # Generate embeddings
         result = await self.client.aio.models.embed_content(
             model=self.config.embedding_model or DEFAULT_EMBEDDING_MODEL,
-            contents=input_data_list,
+            contents=input_data_list,  # type: ignore[arg-type]  # mypy fails on broad union type
             config=types.EmbedContentConfig(output_dimensionality=self.config.embedding_dim),
         )
-        return [embedding.values for embedding in result.embeddings]
+        if not result.embeddings or len(result.embeddings) == 0:
+            raise Exception('No embeddings returned')
+        embeddings = []
+        for embedding in result.embeddings:
+            if not embedding.values:
+                raise ValueError('Empty embedding values returned')
+            embeddings.append(embedding.values)
+        return embeddings

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/graphiti.py RENAMED Viewed

@@ -41,6 +41,7 @@ from graphiti_core.search.search_config_recipes import (
 from graphiti_core.search.search_filters import SearchFilters
 from graphiti_core.search.search_utils import (
     RELEVANT_SCHEMA_LIMIT,
+    get_edge_invalidation_candidates,
     get_mentioned_nodes,
     get_relevant_edges,
 )
@@ -62,9 +63,8 @@ from graphiti_core.utils.maintenance.community_operations import (
 )
 from graphiti_core.utils.maintenance.edge_operations import (
     build_episodic_edges,
-    dedupe_extracted_edge,
     extract_edges,
-    resolve_edge_contradictions,
+    resolve_extracted_edge,
     resolve_extracted_edges,
 )
 from graphiti_core.utils.maintenance.graph_data_operations import (
@@ -77,7 +77,6 @@ from graphiti_core.utils.maintenance.node_operations import (
     extract_nodes,
     resolve_extracted_nodes,
 )
-from graphiti_core.utils.maintenance.temporal_operations import get_edge_contradictions
 from graphiti_core.utils.ontology_utils.entity_types_utils import validate_entity_types
 logger = logging.getLogger(__name__)
@@ -274,6 +273,8 @@ class Graphiti:
         update_communities: bool = False,
         entity_types: dict[str, BaseModel] | None = None,
         previous_episode_uuids: list[str] | None = None,
+        edge_types: dict[str, BaseModel] | None = None,
+        edge_type_map: dict[tuple[str, str], list[str]] | None = None,
     ) -> AddEpisodeResults:
         """
         Process an episode and update the graph.
@@ -356,6 +357,13 @@ class Graphiti:
                 )
             )
+            # Create default edge type map
+            edge_type_map_default = (
+                {('Entity', 'Entity'): list(edge_types.keys())}
+                if edge_types is not None
+                else {('Entity', 'Entity'): []}
+            )
             # Extract entities as nodes
             extracted_nodes = await extract_nodes(
@@ -371,7 +379,9 @@ class Graphiti:
                     previous_episodes,
                     entity_types,
                 ),
-                extract_edges(self.clients, episode, extracted_nodes, previous_episodes, group_id),
+                extract_edges(
+                    self.clients, episode, extracted_nodes, previous_episodes, group_id, edge_types
+                ),
             )
             edges = resolve_edge_pointers(extracted_edges, uuid_map)
@@ -381,6 +391,9 @@ class Graphiti:
                     self.clients,
                     edges,
                     episode,
+                    nodes,
+                    edge_types or {},
+                    edge_type_map or edge_type_map_default,
                 ),
                 extract_attributes_from_nodes(
                     self.clients, nodes, episode, previous_episodes, entity_types
@@ -681,17 +694,27 @@ class Graphiti:
         updated_edge = resolve_edge_pointers([edge], uuid_map)[0]
-        related_edges = await get_relevant_edges(self.driver, [updated_edge], SearchFilters(), 0.8)
+        related_edges = (await get_relevant_edges(self.driver, [updated_edge], SearchFilters()))[0]
+        existing_edges = (
+            await get_edge_invalidation_candidates(self.driver, [updated_edge], SearchFilters())
+        )[0]
-        resolved_edge = await dedupe_extracted_edge(
+        resolved_edge, invalidated_edges = await resolve_extracted_edge(
             self.llm_client,
             updated_edge,
-            related_edges[0],
+            related_edges,
+            existing_edges,
+            EpisodicNode(
+                name='',
+                source=EpisodeType.text,
+                source_description='',
+                content='',
+                valid_at=edge.valid_at or utc_now(),
+                entity_edges=[],
+                group_id=edge.group_id,
+            ),
         )
-        contradicting_edges = await get_edge_contradictions(self.llm_client, edge, related_edges[0])
-        invalidated_edges = resolve_edge_contradictions(resolved_edge, contradicting_edges)
         await add_nodes_and_edges_bulk(
             self.driver, [], [], resolved_nodes, [resolved_edge] + invalidated_edges, self.embedder
         )

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/helpers.py RENAMED Viewed

@@ -18,7 +18,6 @@ import asyncio
 import os
 from collections.abc import Coroutine
 from datetime import datetime
-from typing import Any
 import numpy as np
 from dotenv import load_dotenv

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/llm_client/gemini_client.py RENAMED Viewed

@@ -139,13 +139,16 @@ class GeminiClient(LLMClient):
             # Generate content using the simple string approach
             response = await self.client.aio.models.generate_content(
                 model=self.model or DEFAULT_MODEL,
-                contents=gemini_messages,
+                contents=gemini_messages,  # type: ignore[arg-type]  # mypy fails on broad union type
                 config=generation_config,
             )
             # If this was a structured output request, parse the response into the Pydantic model
             if response_model is not None:
                 try:
+                    if not response.text:
+                        raise ValueError('No response text')
                     validated_model = response_model.model_validate(json.loads(response.text))
                     # Return as a dictionary for API consistency

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/models/edges/edge_db_queries.py RENAMED Viewed

@@ -34,8 +34,7 @@ ENTITY_EDGE_SAVE = """
         MATCH (source:Entity {uuid: $source_uuid})
         MATCH (target:Entity {uuid: $target_uuid})
         MERGE (source)-[r:RELATES_TO {uuid: $uuid}]->(target)
-        SET r = {uuid: $uuid, name: $name, group_id: $group_id, fact: $fact, episodes: $episodes,
-        created_at: $created_at, expired_at: $expired_at, valid_at: $valid_at, invalid_at: $invalid_at}
+        SET r = $edge_data
         WITH r CALL db.create.setRelationshipVectorProperty(r, "fact_embedding", $fact_embedding)
         RETURN r.uuid AS uuid"""
@@ -44,8 +43,7 @@ ENTITY_EDGE_SAVE_BULK = """
     MATCH (source:Entity {uuid: edge.source_node_uuid})
     MATCH (target:Entity {uuid: edge.target_node_uuid})
     MERGE (source)-[r:RELATES_TO {uuid: edge.uuid}]->(target)
-    SET r = {uuid: edge.uuid, name: edge.name, group_id: edge.group_id, fact: edge.fact, episodes: edge.episodes,
-    created_at: edge.created_at, expired_at: edge.expired_at, valid_at: edge.valid_at, invalid_at: edge.invalid_at}
+    SET r = edge
     WITH r, edge CALL db.create.setRelationshipVectorProperty(r, "fact_embedding", edge.fact_embedding)
     RETURN edge.uuid AS uuid
 """

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/prompts/dedupe_edges.py RENAMED Viewed

@@ -27,6 +27,11 @@ class EdgeDuplicate(BaseModel):
         ...,
         description='id of the duplicate fact. If no duplicate facts are found, default to -1.',
     )
+    contradicted_facts: list[int] = Field(
+        ...,
+        description='List of ids of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
+    )
+    fact_type: str = Field(..., description='One of the provided fact types or DEFAULT')
 class UniqueFact(BaseModel):
@@ -41,11 +46,13 @@ class UniqueFacts(BaseModel):
 class Prompt(Protocol):
     edge: PromptVersion
     edge_list: PromptVersion
+    resolve_edge: PromptVersion
 class Versions(TypedDict):
     edge: PromptFunction
     edge_list: PromptFunction
+    resolve_edge: PromptFunction
 def edge(context: dict[str, Any]) -> list[Message]:
@@ -106,4 +113,48 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
     ]
-versions: Versions = {'edge': edge, 'edge_list': edge_list}
+def resolve_edge(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that de-duplicates facts from fact lists and determines which existing '
+            'facts are contradicted by the new fact.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        <NEW FACT>
+        {context['new_edge']}
+        </NEW FACT>
+        <EXISTING FACTS>
+        {context['existing_edges']}
+        </EXISTING FACTS>
+        <FACT INVALIDATION CANDIDATES>
+        {context['edge_invalidation_candidates']}
+        </FACT INVALIDATION CANDIDATES>
+        <FACT TYPES>
+        {context['edge_types']}
+        </FACT TYPES>
+        Task:
+        If the NEW FACT represents the same factual information as any fact in EXISTING FACTS, return the idx of the duplicate fact.
+        If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return -1.
+        Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
+        Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
+        Based on the provided FACT INVALIDATION CANDIDATES and NEW FACT, determine which existing facts the new fact contradicts.
+        Return a list containing all idx's of the facts that are contradicted by the NEW FACT.
+        If there are no contradicted facts, return an empty list.
+        Guidelines:
+        1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
+        """,
+        ),
+    ]
+versions: Versions = {'edge': edge, 'edge_list': edge_list, 'resolve_edge': resolve_edge}

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/prompts/dedupe_nodes.py RENAMED Viewed

@@ -23,21 +23,31 @@ from .models import Message, PromptFunction, PromptVersion
 class NodeDuplicate(BaseModel):
-    duplicate_node_id: int = Field(
+    id: int = Field(..., description='integer id of the entity')
+    duplicate_idx: int = Field(
         ...,
-        description='id of the duplicate node. If no duplicate nodes are found, default to -1.',
+        description='idx of the duplicate node. If no duplicate nodes are found, default to -1.',
     )
-    name: str = Field(..., description='Name of the entity.')
+    name: str = Field(
+        ...,
+        description='Name of the entity. Should be the most complete and descriptive name possible.',
+    )
+class NodeResolutions(BaseModel):
+    entity_resolutions: list[NodeDuplicate] = Field(..., description='List of resolved nodes')
 class Prompt(Protocol):
     node: PromptVersion
     node_list: PromptVersion
+    nodes: PromptVersion
 class Versions(TypedDict):
     node: PromptFunction
     node_list: PromptFunction
+    nodes: PromptFunction
 def node(context: dict[str, Any]) -> list[Message]:
@@ -89,6 +99,67 @@ def node(context: dict[str, Any]) -> list[Message]:
     ]
+def nodes(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that determines whether or not ENTITIES extracted from a conversation are duplicates'
+            'of existing entities.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        <PREVIOUS MESSAGES>
+        {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+        </PREVIOUS MESSAGES>
+        <CURRENT MESSAGE>
+        {context['episode_content']}
+        </CURRENT MESSAGE>
+        Each of the following ENTITIES were extracted from the CURRENT MESSAGE.
+        Each entity in ENTITIES is represented as a JSON object with the following structure:
+        {{
+            id: integer id of the entity,
+            name: "name of the entity",
+            entity_type: "ontological classification of the entity",
+            entity_type_description: "Description of what the entity type represents",
+            duplication_candidates: [
+                {{
+                    idx: integer index of the candidate entity,
+                    name: "name of the candidate entity",
+                    entity_type: "ontological classification of the candidate entity",
+                    ...<additional attributes>
+                }}
+            ]
+        }}
+        <ENTITIES>
+        {json.dumps(context['extracted_nodes'], indent=2)}
+        </ENTITIES>
+        For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
+        Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
+        Do NOT mark entities as duplicates if:
+        - They are related but distinct.
+        - They have similar names or purposes but refer to separate instances or concepts.
+        Task:
+        Your response will be a list called entity_resolutions which contains one entry for each entity.
+        For each entity, return the id of the entity as id, the name of the entity as name, and the duplicate_idx
+        as an integer.
+        - If an entity is a duplicate of one of its duplication_candidates, return the idx of the candidate it is a
+        duplicate of.
+        - If an entity is not a duplicate of one of its duplication candidates, return the -1 as the duplication_idx
+        """,
+        ),
+    ]
 def node_list(context: dict[str, Any]) -> list[Message]:
     return [
         Message(
@@ -126,4 +197,4 @@ def node_list(context: dict[str, Any]) -> list[Message]:
     ]
-versions: Versions = {'node': node, 'node_list': node_list}
+versions: Versions = {'node': node, 'node_list': node_list, 'nodes': nodes}

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/prompts/extract_edges.py RENAMED Viewed

@@ -48,11 +48,13 @@ class MissingFacts(BaseModel):
 class Prompt(Protocol):
     edge: PromptVersion
     reflexion: PromptVersion
+    extract_attributes: PromptVersion
 class Versions(TypedDict):
     edge: PromptFunction
     reflexion: PromptFunction
+    extract_attributes: PromptFunction
 def edge(context: dict[str, Any]) -> list[Message]:
@@ -82,12 +84,18 @@ def edge(context: dict[str, Any]) -> list[Message]:
 {context['reference_time']}  # ISO 8601 (UTC); used to resolve relative time mentions
 </REFERENCE_TIME>
+<FACT TYPES>
+{context['edge_types']}
+</FACT TYPES>
 # TASK
 Extract all factual relationships between the given ENTITIES based on the CURRENT MESSAGE.
 Only extract facts that:
 - involve two DISTINCT ENTITIES from the ENTITIES list,
 - are clearly stated or unambiguously implied in the CURRENT MESSAGE,
-- and can be represented as edges in a knowledge graph.
+    and can be represented as edges in a knowledge graph.
+- The FACT TYPES provide a list of the most important types of facts, make sure to extract any facts that
+    could be classified into one of the provided fact types
 You may use information from the PREVIOUS MESSAGES only to disambiguate references or support continuity.
@@ -145,4 +153,40 @@ determine if any facts haven't been extracted.
     ]
-versions: Versions = {'edge': edge, 'reflexion': reflexion}
+def extract_attributes(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that extracts fact properties from the provided text.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        <MESSAGE>
+        {json.dumps(context['episode_content'], indent=2)}
+        </MESSAGE>
+        <REFERENCE TIME>
+        {context['reference_time']}
+        </REFERENCE TIME>
+        Given the above MESSAGE, its REFERENCE TIME, and the following FACT, update any of its attributes based on the information provided
+        in MESSAGE. Use the provided attribute descriptions to better understand how each attribute should be determined.
+        Guidelines:
+        1. Do not hallucinate entity property values if they cannot be found in the current context.
+        2. Only use the provided MESSAGES and FACT to set attribute values.
+        <FACT>
+        {context['fact']}
+        </FACT>
+        """,
+        ),
+    ]
+versions: Versions = {
+    'edge': edge,
+    'reflexion': reflexion,
+    'extract_attributes': extract_attributes,
+}

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/prompts/invalidate_edges.py RENAMED Viewed

@@ -24,7 +24,7 @@ from .models import Message, PromptFunction, PromptVersion
 class InvalidatedEdges(BaseModel):
     contradicted_facts: list[int] = Field(
         ...,
-        description='List of ids of facts that be should invalidated. If no facts should be invalidated, the list should be empty.',
+        description='List of ids of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
     )

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/search/search_utils.py RENAMED Viewed

@@ -174,7 +174,8 @@ async def edge_fulltext_search(
                      r.episodes AS episodes,
                      r.expired_at AS expired_at,
                      r.valid_at AS valid_at,
-                     r.invalid_at AS invalid_at
+                     r.invalid_at AS invalid_at,
+                     properties(r) AS attributes
                  ORDER BY score DESC LIMIT $limit
                  """
     )
@@ -243,7 +244,8 @@ async def edge_similarity_search(
                     r.episodes AS episodes,
                     r.expired_at AS expired_at,
                     r.valid_at AS valid_at,
-                    r.invalid_at AS invalid_at
+                    r.invalid_at AS invalid_at,
+                    properties(r) AS attributes
                 ORDER BY score DESC
                 LIMIT $limit
         """
@@ -301,7 +303,8 @@ async def edge_bfs_search(
                     r.episodes AS episodes,
                     r.expired_at AS expired_at,
                     r.valid_at AS valid_at,
-                    r.invalid_at AS invalid_at
+                    r.invalid_at AS invalid_at,
+                    properties(r) AS attributes
                 LIMIT $limit
         """
     )
@@ -337,10 +340,10 @@ async def node_fulltext_search(
     query = (
         """
-                                                                                CALL db.index.fulltext.queryNodes("node_name_and_summary", $query, {limit: $limit})
-                                                                                YIELD node AS n, score
-                                                                                WHERE n:Entity
-                                                                                """
+                                                                                        CALL db.index.fulltext.queryNodes("node_name_and_summary", $query, {limit: $limit})
+                                                                                        YIELD node AS n, score
+                                                                                        WHERE n:Entity
+                                                                                        """
         + filter_query
         + ENTITY_NODE_RETURN
         + """
@@ -771,7 +774,8 @@ async def get_relevant_edges(
                     episodes: e.episodes,
                     expired_at: e.expired_at,
                     valid_at: e.valid_at,
-                    invalid_at: e.invalid_at
+                    invalid_at: e.invalid_at,
+                    attributes: properties(e)
                 })[..$limit] AS matches
         """
     )
@@ -837,7 +841,8 @@ async def get_edge_invalidation_candidates(
                     episodes: e.episodes,
                     expired_at: e.expired_at,
                     valid_at: e.valid_at,
-                    invalid_at: e.invalid_at
+                    invalid_at: e.invalid_at,
+                    attributes: properties(e)
                 })[..$limit] AS matches
         """
     )

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/utils/bulk_utils.py RENAMED Viewed

@@ -137,16 +137,34 @@ async def add_nodes_and_edges_bulk_tx(
         entity_data['labels'] = list(set(node.labels + ['Entity']))
         nodes.append(entity_data)
+    edges: list[dict[str, Any]] = []
     for edge in entity_edges:
         if edge.fact_embedding is None:
             await edge.generate_embedding(embedder)
+        edge_data: dict[str, Any] = {
+            'uuid': edge.uuid,
+            'source_node_uuid': edge.source_node_uuid,
+            'target_node_uuid': edge.target_node_uuid,
+            'name': edge.name,
+            'fact': edge.fact,
+            'fact_embedding': edge.fact_embedding,
+            'group_id': edge.group_id,
+            'episodes': edge.episodes,
+            'created_at': edge.created_at,
+            'expired_at': edge.expired_at,
+            'valid_at': edge.valid_at,
+            'invalid_at': edge.invalid_at,
+        }
+        edge_data.update(edge.attributes or {})
+        edges.append(edge_data)
     await tx.run(EPISODIC_NODE_SAVE_BULK, episodes=episodes)
     await tx.run(ENTITY_NODE_SAVE_BULK, nodes=nodes)
     await tx.run(
         EPISODIC_EDGE_SAVE_BULK, episodic_edges=[edge.model_dump() for edge in episodic_edges]
     )
-    await tx.run(ENTITY_EDGE_SAVE_BULK, entity_edges=[edge.model_dump() for edge in entity_edges])
+    await tx.run(ENTITY_EDGE_SAVE_BULK, entity_edges=edges)
 async def extract_nodes_and_edges_bulk(

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/utils/maintenance/edge_operations.py RENAMED Viewed

@@ -18,6 +18,8 @@ import logging
 from datetime import datetime
 from time import time
+from pydantic import BaseModel
 from graphiti_core.edges import (
     CommunityEdge,
     EntityEdge,
@@ -35,9 +37,6 @@ from graphiti_core.prompts.extract_edges import ExtractedEdges, MissingFacts
 from graphiti_core.search.search_filters import SearchFilters
 from graphiti_core.search.search_utils import get_edge_invalidation_candidates, get_relevant_edges
 from graphiti_core.utils.datetime_utils import ensure_utc, utc_now
-from graphiti_core.utils.maintenance.temporal_operations import (
-    get_edge_contradictions,
-)
 logger = logging.getLogger(__name__)
@@ -86,6 +85,7 @@ async def extract_edges(
     nodes: list[EntityNode],
     previous_episodes: list[EpisodicNode],
     group_id: str = '',
+    edge_types: dict[str, BaseModel] | None = None,
 ) -> list[EntityEdge]:
     start = time()
@@ -94,12 +94,25 @@ async def extract_edges(
     node_uuids_by_name_map = {node.name: node.uuid for node in nodes}
+    edge_types_context = (
+        [
+            {
+                'fact_type_name': type_name,
+                'fact_type_description': type_model.__doc__,
+            }
+            for type_name, type_model in edge_types.items()
+        ]
+        if edge_types is not None
+        else []
+    )
     # Prepare context for LLM
     context = {
         'episode_content': episode.content,
         'nodes': [node.name for node in nodes],
         'previous_episodes': [ep.content for ep in previous_episodes],
         'reference_time': episode.valid_at,
+        'edge_types': edge_types_context,
         'custom_prompt': '',
     }
@@ -236,6 +249,9 @@ async def resolve_extracted_edges(
     clients: GraphitiClients,
     extracted_edges: list[EntityEdge],
     episode: EpisodicNode,
+    entities: list[EntityNode],
+    edge_types: dict[str, BaseModel],
+    edge_type_map: dict[tuple[str, str], list[str]],
 ) -> tuple[list[EntityEdge], list[EntityEdge]]:
     driver = clients.driver
     llm_client = clients.llm_client
@@ -245,7 +261,7 @@ async def resolve_extracted_edges(
     search_results: tuple[list[list[EntityEdge]], list[list[EntityEdge]]] = await semaphore_gather(
         get_relevant_edges(driver, extracted_edges, SearchFilters()),
-        get_edge_invalidation_candidates(driver, extracted_edges, SearchFilters()),
+        get_edge_invalidation_candidates(driver, extracted_edges, SearchFilters(), 0.2),
     )
     related_edges_lists, edge_invalidation_candidates = search_results
@@ -254,15 +270,50 @@ async def resolve_extracted_edges(
         f'Related edges lists: {[(e.name, e.uuid) for edges_lst in related_edges_lists for e in edges_lst]}'
     )
+    # Build entity hash table
+    uuid_entity_map: dict[str, EntityNode] = {entity.uuid: entity for entity in entities}
+    # Determine which edge types are relevant for each edge
+    edge_types_lst: list[dict[str, BaseModel]] = []
+    for extracted_edge in extracted_edges:
+        source_node_labels = uuid_entity_map[extracted_edge.source_node_uuid].labels
+        target_node_labels = uuid_entity_map[extracted_edge.target_node_uuid].labels
+        label_tuples = [
+            (source_label, target_label)
+            for source_label in source_node_labels
+            for target_label in target_node_labels
+        ]
+        extracted_edge_types = {}
+        for label_tuple in label_tuples:
+            type_names = edge_type_map.get(label_tuple, [])
+            for type_name in type_names:
+                type_model = edge_types.get(type_name)
+                if type_model is None:
+                    continue
+                extracted_edge_types[type_name] = type_model
+        edge_types_lst.append(extracted_edge_types)
     # resolve edges with related edges in the graph and find invalidation candidates
     results: list[tuple[EntityEdge, list[EntityEdge]]] = list(
         await semaphore_gather(
             *[
                 resolve_extracted_edge(
-                    llm_client, extracted_edge, related_edges, existing_edges, episode
+                    llm_client,
+                    extracted_edge,
+                    related_edges,
+                    existing_edges,
+                    episode,
+                    extracted_edge_types,
                 )
-                for extracted_edge, related_edges, existing_edges in zip(
-                    extracted_edges, related_edges_lists, edge_invalidation_candidates, strict=True
+                for extracted_edge, related_edges, existing_edges, extracted_edge_types in zip(
+                    extracted_edges,
+                    related_edges_lists,
+                    edge_invalidation_candidates,
+                    edge_types_lst,
+                    strict=True,
                 )
             ]
         )
@@ -326,10 +377,86 @@ async def resolve_extracted_edge(
     related_edges: list[EntityEdge],
     existing_edges: list[EntityEdge],
     episode: EpisodicNode,
+    edge_types: dict[str, BaseModel] | None = None,
 ) -> tuple[EntityEdge, list[EntityEdge]]:
-    resolved_edge, invalidation_candidates = await semaphore_gather(
-        dedupe_extracted_edge(llm_client, extracted_edge, related_edges, episode),
-        get_edge_contradictions(llm_client, extracted_edge, existing_edges),
+    if len(related_edges) == 0 and len(existing_edges) == 0:
+        return extracted_edge, []
+    start = time()
+    # Prepare context for LLM
+    related_edges_context = [
+        {'id': edge.uuid, 'fact': edge.fact} for i, edge in enumerate(related_edges)
+    ]
+    invalidation_edge_candidates_context = [
+        {'id': i, 'fact': existing_edge.fact} for i, existing_edge in enumerate(existing_edges)
+    ]
+    edge_types_context = (
+        [
+            {
+                'fact_type_id': i,
+                'fact_type_name': type_name,
+                'fact_type_description': type_model.__doc__,
+            }
+            for i, (type_name, type_model) in enumerate(edge_types.items())
+        ]
+        if edge_types is not None
+        else []
+    )
+    context = {
+        'existing_edges': related_edges_context,
+        'new_edge': extracted_edge.fact,
+        'edge_invalidation_candidates': invalidation_edge_candidates_context,
+        'edge_types': edge_types_context,
+    }
+    llm_response = await llm_client.generate_response(
+        prompt_library.dedupe_edges.resolve_edge(context),
+        response_model=EdgeDuplicate,
+        model_size=ModelSize.small,
+    )
+    duplicate_fact_id: int = llm_response.get('duplicate_fact_id', -1)
+    resolved_edge = (
+        related_edges[duplicate_fact_id]
+        if 0 <= duplicate_fact_id < len(related_edges)
+        else extracted_edge
+    )
+    if duplicate_fact_id >= 0 and episode is not None:
+        resolved_edge.episodes.append(episode.uuid)
+    contradicted_facts: list[int] = llm_response.get('contradicted_facts', [])
+    invalidation_candidates: list[EntityEdge] = [existing_edges[i] for i in contradicted_facts]
+    fact_type: str = str(llm_response.get('fact_type'))
+    if fact_type.upper() != 'DEFAULT' and edge_types is not None:
+        resolved_edge.name = fact_type
+        edge_attributes_context = {
+            'message': episode.content,
+            'reference_time': episode.valid_at,
+            'fact': resolved_edge.fact,
+        }
+        edge_model = edge_types.get(fact_type)
+        edge_attributes_response = await llm_client.generate_response(
+            prompt_library.extract_edges.extract_attributes(edge_attributes_context),
+            response_model=edge_model,  # type: ignore
+            model_size=ModelSize.small,
+        )
+        resolved_edge.attributes = edge_attributes_response
+    end = time()
+    logger.debug(
+        f'Resolved Edge: {extracted_edge.name} is {resolved_edge.name}, in {(end - start) * 1000} ms'
     )
     now = utc_now()

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/graphiti_core/utils/maintenance/node_operations.py RENAMED Viewed

@@ -29,7 +29,7 @@ from graphiti_core.llm_client import LLMClient
 from graphiti_core.llm_client.config import ModelSize
 from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode, create_entity_node_embeddings
 from graphiti_core.prompts import prompt_library
-from graphiti_core.prompts.dedupe_nodes import NodeDuplicate
+from graphiti_core.prompts.dedupe_nodes import NodeDuplicate, NodeResolutions
 from graphiti_core.prompts.extract_nodes import (
     ExtractedEntities,
     ExtractedEntity,
@@ -243,28 +243,65 @@ async def resolve_extracted_nodes(
     existing_nodes_lists: list[list[EntityNode]] = [result.nodes for result in search_results]
-    resolved_nodes: list[EntityNode] = await semaphore_gather(
-        *[
-            resolve_extracted_node(
-                llm_client,
-                extracted_node,
-                existing_nodes,
-                episode,
-                previous_episodes,
-                entity_types.get(
-                    next((item for item in extracted_node.labels if item != 'Entity'), '')
-                )
-                if entity_types is not None
-                else None,
-            )
-            for extracted_node, existing_nodes in zip(
-                extracted_nodes, existing_nodes_lists, strict=True
-            )
-        ]
+    entity_types_dict: dict[str, BaseModel] = entity_types if entity_types is not None else {}
+    # Prepare context for LLM
+    extracted_nodes_context = [
+        {
+            'id': i,
+            'name': node.name,
+            'entity_type': node.labels,
+            'entity_type_description': entity_types_dict.get(
+                next((item for item in node.labels if item != 'Entity'), '')
+            ).__doc__
+            or 'Default Entity Type',
+            'duplication_candidates': [
+                {
+                    **{
+                        'idx': j,
+                        'name': candidate.name,
+                        'entity_types': candidate.labels,
+                    },
+                    **candidate.attributes,
+                }
+                for j, candidate in enumerate(existing_nodes_lists[i])
+            ],
+        }
+        for i, node in enumerate(extracted_nodes)
+    ]
+    context = {
+        'extracted_nodes': extracted_nodes_context,
+        'episode_content': episode.content if episode is not None else '',
+        'previous_episodes': [ep.content for ep in previous_episodes]
+        if previous_episodes is not None
+        else [],
+    }
+    llm_response = await llm_client.generate_response(
+        prompt_library.dedupe_nodes.nodes(context),
+        response_model=NodeResolutions,
     )
+    node_resolutions: list = llm_response.get('entity_resolutions', [])
+    resolved_nodes: list[EntityNode] = []
     uuid_map: dict[str, str] = {}
-    for extracted_node, resolved_node in zip(extracted_nodes, resolved_nodes, strict=True):
+    for resolution in node_resolutions:
+        resolution_id = resolution.get('id', -1)
+        duplicate_idx = resolution.get('duplicate_idx', -1)
+        extracted_node = extracted_nodes[resolution_id]
+        resolved_node = (
+            existing_nodes_lists[resolution_id][duplicate_idx]
+            if 0 <= duplicate_idx < len(existing_nodes_lists[resolution_id])
+            else extracted_node
+        )
+        resolved_node.name = resolution.get('name')
+        resolved_nodes.append(resolved_node)
         uuid_map[extracted_node.uuid] = resolved_node.uuid
     logger.debug(f'Resolved nodes: {[(n.name, n.uuid) for n in resolved_nodes]}')
@@ -410,6 +447,7 @@ async def extract_attributes_from_node(
     llm_response = await llm_client.generate_response(
         prompt_library.extract_nodes.extract_attributes(summary_context),
         response_model=entity_attributes_model,
+        model_size=ModelSize.small,
     )
     node.summary = llm_response.get('summary', node.summary)

{graphiti_core-0.11.6rc9 → graphiti_core-0.12.0rc1}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "graphiti-core"
 description = "A temporal graph building library"
-version = "0.11.6pre9"
+version = "0.12.0pre1"
 authors = [
     { "name" = "Paul Paliychuk", "email" = "paul@getzep.com" },
     { "name" = "Preston Rasmussen", "email" = "preston@getzep.com" },