PyPI - graphiti-core - Versions diffs - 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

graphiti-core 0.6.1py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (14) hide show

graphiti_core/graphiti.py +11 -4
graphiti_core/models/nodes/node_db_queries.py +4 -2
graphiti_core/nodes.py +27 -10
graphiti_core/prompts/extract_nodes.py +43 -1
graphiti_core/prompts/summarize_nodes.py +12 -6
graphiti_core/search/search.py +10 -4
graphiti_core/search/search_filters.py +23 -4
graphiti_core/search/search_utils.py +64 -21
graphiti_core/utils/bulk_utils.py +19 -2
graphiti_core/utils/maintenance/node_operations.py +58 -6
{graphiti_core-0.6.1.dist-info → graphiti_core-0.7.1.dist-info}/METADATA +1 -1
{graphiti_core-0.6.1.dist-info → graphiti_core-0.7.1.dist-info}/RECORD +14 -14
{graphiti_core-0.6.1.dist-info → graphiti_core-0.7.1.dist-info}/LICENSE +0 -0
{graphiti_core-0.6.1.dist-info → graphiti_core-0.7.1.dist-info}/WHEEL +0 -0

graphiti_core/graphiti.py CHANGED Viewed

@@ -262,6 +262,7 @@ class Graphiti:
         group_id: str = '',
         uuid: str | None = None,
         update_communities: bool = False,
+        entity_types: dict[str, BaseModel] | None = None,
     ) -> AddEpisodeResults:
         """
         Process an episode and update the graph.
@@ -336,7 +337,9 @@ class Graphiti:
             # Extract entities as nodes
-            extracted_nodes = await extract_nodes(self.llm_client, episode, previous_episodes)
+            extracted_nodes = await extract_nodes(
+                self.llm_client, episode, previous_episodes, entity_types
+            )
             logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
             # Calculate Embeddings
@@ -348,7 +351,10 @@ class Graphiti:
             # Find relevant nodes already in the graph
             existing_nodes_lists: list[list[EntityNode]] = list(
                 await semaphore_gather(
-                    *[get_relevant_nodes(self.driver, [node]) for node in extracted_nodes]
+                    *[
+                        get_relevant_nodes(self.driver, SearchFilters(), [node])
+                        for node in extracted_nodes
+                    ]
                 )
             )
@@ -362,6 +368,7 @@ class Graphiti:
                     existing_nodes_lists,
                     episode,
                     previous_episodes,
+                    entity_types,
                 ),
                 extract_edges(
                     self.llm_client, episode, extracted_nodes, previous_episodes, group_id
@@ -728,8 +735,8 @@ class Graphiti:
             self.llm_client,
             [source_node, target_node],
             [
-                await get_relevant_nodes(self.driver, [source_node]),
-                await get_relevant_nodes(self.driver, [target_node]),
+                await get_relevant_nodes(self.driver, SearchFilters(), [source_node]),
+                await get_relevant_nodes(self.driver, SearchFilters(), [target_node]),
             ],
         )

graphiti_core/models/nodes/node_db_queries.py CHANGED Viewed

@@ -31,14 +31,16 @@ EPISODIC_NODE_SAVE_BULK = """
 ENTITY_NODE_SAVE = """
         MERGE (n:Entity {uuid: $uuid})
-        SET n = {uuid: $uuid, name: $name, group_id: $group_id, summary: $summary, created_at: $created_at}
+        SET n:$($labels)
+        SET n = $entity_data
         WITH n CALL db.create.setNodeVectorProperty(n, "name_embedding", $name_embedding)
         RETURN n.uuid AS uuid"""
 ENTITY_NODE_SAVE_BULK = """
     UNWIND $nodes AS node
     MERGE (n:Entity {uuid: node.uuid})
-    SET n = {uuid: node.uuid, name: node.name, group_id: node.group_id, summary: node.summary, created_at: node.created_at}
+    SET n:$(node.labels)
+    SET n = node
     WITH n, node CALL db.create.setNodeVectorProperty(n, "name_embedding", node.name_embedding)
     RETURN n.uuid AS uuid
 """

graphiti_core/nodes.py CHANGED Viewed

@@ -255,6 +255,9 @@ class EpisodicNode(Node):
 class EntityNode(Node):
     name_embedding: list[float] | None = Field(default=None, description='embedding of the name')
     summary: str = Field(description='regional summary of surrounding edges', default_factory=str)
+    attributes: dict[str, Any] = Field(
+        default={}, description='Additional attributes of the node. Dependent on node labels'
+    )
     async def generate_name_embedding(self, embedder: EmbedderClient):
         start = time()
@@ -266,14 +269,21 @@ class EntityNode(Node):
         return self.name_embedding
     async def save(self, driver: AsyncDriver):
+        entity_data: dict[str, Any] = {
+            'uuid': self.uuid,
+            'name': self.name,
+            'name_embedding': self.name_embedding,
+            'group_id': self.group_id,
+            'summary': self.summary,
+            'created_at': self.created_at,
+        }
+        entity_data.update(self.attributes or {})
         result = await driver.execute_query(
             ENTITY_NODE_SAVE,
-            uuid=self.uuid,
-            name=self.name,
-            group_id=self.group_id,
-            summary=self.summary,
-            name_embedding=self.name_embedding,
-            created_at=self.created_at,
+            labels=self.labels + ['Entity'],
+            entity_data=entity_data,
             database_=DEFAULT_DATABASE,
         )
@@ -292,7 +302,9 @@ class EntityNode(Node):
             n.name_embedding AS name_embedding,
             n.group_id AS group_id,
             n.created_at AS created_at,
-            n.summary AS summary
+            n.summary AS summary,
+            labels(n) AS labels,
+            properties(n) AS attributes
         """,
             uuid=uuid,
             database_=DEFAULT_DATABASE,
@@ -317,7 +329,9 @@ class EntityNode(Node):
             n.name_embedding AS name_embedding,
             n.group_id AS group_id,
             n.created_at AS created_at,
-            n.summary AS summary
+            n.summary AS summary,
+            labels(n) AS labels,
+            properties(n) AS attributes
         """,
             uuids=uuids,
             database_=DEFAULT_DATABASE,
@@ -351,7 +365,9 @@ class EntityNode(Node):
             n.name_embedding AS name_embedding,
             n.group_id AS group_id,
             n.created_at AS created_at,
-            n.summary AS summary
+            n.summary AS summary,
+            labels(n) AS labels,
+            properties(n) AS attributes
         ORDER BY n.uuid DESC
         """
             + limit_query,
@@ -503,9 +519,10 @@ def get_entity_node_from_record(record: Any) -> EntityNode:
         name=record['name'],
         group_id=record['group_id'],
         name_embedding=record['name_embedding'],
-        labels=['Entity'],
+        labels=record['labels'],
         created_at=record['created_at'].to_native(),
         summary=record['summary'],
+        attributes=record['attributes'],
     )

graphiti_core/prompts/extract_nodes.py CHANGED Viewed

@@ -30,11 +30,19 @@ class MissedEntities(BaseModel):
     missed_entities: list[str] = Field(..., description="Names of entities that weren't extracted")
+class EntityClassification(BaseModel):
+    entity_classification: str = Field(
+        ...,
+        description='Dictionary of entity classifications. Key is the entity name and value is the entity type',
+    )
 class Prompt(Protocol):
     extract_message: PromptVersion
     extract_json: PromptVersion
     extract_text: PromptVersion
     reflexion: PromptVersion
+    classify_nodes: PromptVersion
 class Versions(TypedDict):
@@ -42,6 +50,7 @@ class Versions(TypedDict):
     extract_json: PromptFunction
     extract_text: PromptFunction
     reflexion: PromptFunction
+    classify_nodes: PromptFunction
 def extract_message(context: dict[str, Any]) -> list[Message]:
@@ -66,6 +75,7 @@ Guidelines:
 4. DO NOT create nodes for temporal information like dates, times or years (these will be added to edges later).
 5. Be as explicit as possible in your node names, using full names.
 6. DO NOT extract entities mentioned only in PREVIOUS MESSAGES, those messages are only to provide context.
+7. Extract preferences as their own nodes
 """
     return [
         Message(role='system', content=sys_prompt),
@@ -109,7 +119,7 @@ def extract_text(context: dict[str, Any]) -> list[Message]:
 {context['custom_prompt']}
-Given the following text, extract entity nodes from the TEXT that are explicitly or implicitly mentioned:
+Given the above text, extract entity nodes from the TEXT that are explicitly or implicitly mentioned:
 Guidelines:
 1. Extract significant entities, concepts, or actors mentioned in the conversation.
@@ -147,9 +157,41 @@ extracted.
     ]
+def classify_nodes(context: dict[str, Any]) -> list[Message]:
+    sys_prompt = """You are an AI assistant that classifies entity nodes given the context from which they were extracted"""
+    user_prompt = f"""
+    <PREVIOUS MESSAGES>
+    {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+    </PREVIOUS MESSAGES>
+    <CURRENT MESSAGE>
+    {context["episode_content"]}
+    </CURRENT MESSAGE>
+    <EXTRACTED ENTITIES>
+    {context['extracted_entities']}
+    </EXTRACTED ENTITIES>
+    <ENTITY TYPES>
+    {context['entity_types']}
+    </ENTITY TYPES>
+    Given the above conversation, extracted entities, and provided entity types, classify the extracted entities.
+    Guidelines:
+    1. Each entity must have exactly one type
+    2. If none of the provided entity types accurately classify an extracted node, the type should be set to None
+"""
+    return [
+        Message(role='system', content=sys_prompt),
+        Message(role='user', content=user_prompt),
+    ]
 versions: Versions = {
     'extract_message': extract_message,
     'extract_json': extract_json,
     'extract_text': extract_text,
     'reflexion': reflexion,
+    'classify_nodes': classify_nodes,
 }

graphiti_core/prompts/summarize_nodes.py CHANGED Viewed

@@ -24,7 +24,8 @@ from .models import Message, PromptFunction, PromptVersion
 class Summary(BaseModel):
     summary: str = Field(
-        ..., description='Summary containing the important information from both summaries'
+        ...,
+        description='Summary containing the important information about the entity. Under 500 words',
     )
@@ -68,7 +69,7 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
     return [
         Message(
             role='system',
-            content='You are a helpful assistant that combines summaries with new conversation context.',
+            content='You are a helpful assistant that extracts entity properties from the provided text.',
         ),
         Message(
             role='user',
@@ -79,18 +80,23 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
         {json.dumps(context['episode_content'], indent=2)}
         </MESSAGES>
-        Given the above MESSAGES and the following ENTITY name and ENTITY CONTEXT, create a summary for the ENTITY. Your summary must only use
-        information from the provided MESSAGES and from the ENTITY CONTEXT. Your summary should also only contain information relevant to the
-        provided ENTITY.
+        Given the above MESSAGES and the following ENTITY name, create a summary for the ENTITY. Your summary must only use
+        information from the provided MESSAGES. Your summary should also only contain information relevant to the
+        provided ENTITY. Summaries must be under 500 words.
-        Summaries must be under 500 words.
+        In addition, extract any values for the provided entity properties based on their descriptions.
         <ENTITY>
         {context['node_name']}
         </ENTITY>
         <ENTITY CONTEXT>
         {context['node_summary']}
         </ENTITY CONTEXT>
+        <ATTRIBUTES>
+        {json.dumps(context['attributes'], indent=2)}
+        </ATTRIBUTES>
         """,
         ),
     ]

graphiti_core/search/search.py CHANGED Viewed

@@ -100,6 +100,7 @@ async def search(
             query_vector,
             group_ids,
             config.node_config,
+            search_filter,
             center_node_uuid,
             bfs_origin_node_uuids,
             config.limit,
@@ -233,6 +234,7 @@ async def node_search(
     query_vector: list[float],
     group_ids: list[str] | None,
     config: NodeSearchConfig | None,
+    search_filter: SearchFilters,
     center_node_uuid: str | None = None,
     bfs_origin_node_uuids: list[str] | None = None,
     limit=DEFAULT_SEARCH_LIMIT,
@@ -243,11 +245,13 @@ async def node_search(
     search_results: list[list[EntityNode]] = list(
         await semaphore_gather(
             *[
-                node_fulltext_search(driver, query, group_ids, 2 * limit),
+                node_fulltext_search(driver, query, search_filter, group_ids, 2 * limit),
                 node_similarity_search(
-                    driver, query_vector, group_ids, 2 * limit, config.sim_min_score
+                    driver, query_vector, search_filter, group_ids, 2 * limit, config.sim_min_score
+                ),
+                node_bfs_search(
+                    driver, bfs_origin_node_uuids, search_filter, config.bfs_max_depth, 2 * limit
                 ),
-                node_bfs_search(driver, bfs_origin_node_uuids, config.bfs_max_depth, 2 * limit),
             ]
         )
     )
@@ -255,7 +259,9 @@ async def node_search(
     if NodeSearchMethod.bfs in config.search_methods and bfs_origin_node_uuids is None:
         origin_node_uuids = [node.uuid for result in search_results for node in result]
         search_results.append(
-            await node_bfs_search(driver, origin_node_uuids, config.bfs_max_depth, 2 * limit)
+            await node_bfs_search(
+                driver, origin_node_uuids, search_filter, config.bfs_max_depth, 2 * limit
+            )
         )
     search_result_uuids = [[node.uuid for node in result] for result in search_results]

graphiti_core/search/search_filters.py CHANGED Viewed

@@ -39,18 +39,37 @@ class DateFilter(BaseModel):
 class SearchFilters(BaseModel):
+    node_labels: list[str] | None = Field(
+        default=None, description='List of node labels to filter on'
+    )
     valid_at: list[list[DateFilter]] | None = Field(default=None)
     invalid_at: list[list[DateFilter]] | None = Field(default=None)
     created_at: list[list[DateFilter]] | None = Field(default=None)
     expired_at: list[list[DateFilter]] | None = Field(default=None)
-def search_filter_query_constructor(filters: SearchFilters) -> tuple[LiteralString, dict[str, Any]]:
+def node_search_filter_query_constructor(
+    filters: SearchFilters,
+) -> tuple[LiteralString, dict[str, Any]]:
+    filter_query: LiteralString = ''
+    filter_params: dict[str, Any] = {}
+    if filters.node_labels is not None:
+        node_labels = ':'.join(filters.node_labels)
+        node_label_filter = ' AND n:' + node_labels
+        filter_query += node_label_filter
+    return filter_query, filter_params
+def edge_search_filter_query_constructor(
+    filters: SearchFilters,
+) -> tuple[LiteralString, dict[str, Any]]:
     filter_query: LiteralString = ''
     filter_params: dict[str, Any] = {}
     if filters.valid_at is not None:
-        valid_at_filter = 'AND ('
+        valid_at_filter = ' AND ('
         for i, or_list in enumerate(filters.valid_at):
             for j, date_filter in enumerate(or_list):
                 filter_params['valid_at_' + str(j)] = date_filter.date
@@ -75,7 +94,7 @@ def search_filter_query_constructor(filters: SearchFilters) -> tuple[LiteralStri
         filter_query += valid_at_filter
     if filters.invalid_at is not None:
-        invalid_at_filter = 'AND ('
+        invalid_at_filter = ' AND ('
         for i, or_list in enumerate(filters.invalid_at):
             for j, date_filter in enumerate(or_list):
                 filter_params['invalid_at_' + str(j)] = date_filter.date
@@ -100,7 +119,7 @@ def search_filter_query_constructor(filters: SearchFilters) -> tuple[LiteralStri
         filter_query += invalid_at_filter
     if filters.created_at is not None:
-        created_at_filter = 'AND ('
+        created_at_filter = ' AND ('
         for i, or_list in enumerate(filters.created_at):
             for j, date_filter in enumerate(or_list):
                 filter_params['created_at_' + str(j)] = date_filter.date

graphiti_core/search/search_utils.py CHANGED Viewed

@@ -38,7 +38,11 @@ from graphiti_core.nodes import (
     get_community_node_from_record,
     get_entity_node_from_record,
 )
-from graphiti_core.search.search_filters import SearchFilters, search_filter_query_constructor
+from graphiti_core.search.search_filters import (
+    SearchFilters,
+    edge_search_filter_query_constructor,
+    node_search_filter_query_constructor,
+)
 logger = logging.getLogger(__name__)
@@ -97,7 +101,9 @@ async def get_mentioned_nodes(
             n.name AS name,
             n.name_embedding AS name_embedding,
             n.created_at AS created_at,
-            n.summary AS summary
+            n.summary AS summary,
+            labels(n) AS labels,
+            properties(n) AS attributes
         """,
         uuids=episode_uuids,
         database_=DEFAULT_DATABASE,
@@ -146,7 +152,7 @@ async def edge_fulltext_search(
     if fuzzy_query == '':
         return []
-    filter_query, filter_params = search_filter_query_constructor(search_filter)
+    filter_query, filter_params = edge_search_filter_query_constructor(search_filter)
     cypher_query = Query(
         """
@@ -205,7 +211,7 @@ async def edge_similarity_search(
     query_params: dict[str, Any] = {}
-    filter_query, filter_params = search_filter_query_constructor(search_filter)
+    filter_query, filter_params = edge_search_filter_query_constructor(search_filter)
     query_params.update(filter_params)
     group_filter_query: LiteralString = ''
@@ -223,8 +229,8 @@ async def edge_similarity_search(
     query: LiteralString = (
         """
-                                    MATCH (n:Entity)-[r:RELATES_TO]->(m:Entity)
-                                    """
+                                                                        MATCH (n:Entity)-[r:RELATES_TO]->(m:Entity)
+                                                                        """
         + group_filter_query
         + filter_query
         + """\nWITH DISTINCT r, vector.similarity.cosine(r.fact_embedding, $search_vector) AS score
@@ -276,7 +282,7 @@ async def edge_bfs_search(
     if bfs_origin_node_uuids is None:
         return []
-    filter_query, filter_params = search_filter_query_constructor(search_filter)
+    filter_query, filter_params = edge_search_filter_query_constructor(search_filter)
     query = Query(
         """
@@ -323,6 +329,7 @@ async def edge_bfs_search(
 async def node_fulltext_search(
     driver: AsyncDriver,
     query: str,
+    search_filter: SearchFilters,
     group_ids: list[str] | None = None,
     limit=RELEVANT_SCHEMA_LIMIT,
 ) -> list[EntityNode]:
@@ -331,20 +338,30 @@ async def node_fulltext_search(
     if fuzzy_query == '':
         return []
+    filter_query, filter_params = node_search_filter_query_constructor(search_filter)
     records, _, _ = await driver.execute_query(
         """
         CALL db.index.fulltext.queryNodes("node_name_and_summary", $query, {limit: $limit})
-        YIELD node AS n, score
+        YIELD node AS node, score
+        MATCH (n:Entity)
+        WHERE n.uuid = node.uuid
+        """
+        + filter_query
+        + """
         RETURN
             n.uuid AS uuid,
             n.group_id AS group_id,
             n.name AS name,
             n.name_embedding AS name_embedding,
             n.created_at AS created_at,
-            n.summary AS summary
+            n.summary AS summary,
+            labels(n) AS labels,
+            properties(n) AS attributes
         ORDER BY score DESC
         LIMIT $limit
         """,
+        filter_params,
         query=fuzzy_query,
         group_ids=group_ids,
         limit=limit,
@@ -359,6 +376,7 @@ async def node_fulltext_search(
 async def node_similarity_search(
     driver: AsyncDriver,
     search_vector: list[float],
+    search_filter: SearchFilters,
     group_ids: list[str] | None = None,
     limit=RELEVANT_SCHEMA_LIMIT,
     min_score: float = DEFAULT_MIN_SCORE,
@@ -375,12 +393,16 @@ async def node_similarity_search(
         group_filter_query += 'WHERE n.group_id IN $group_ids'
         query_params['group_ids'] = group_ids
+    filter_query, filter_params = node_search_filter_query_constructor(search_filter)
+    query_params.update(filter_params)
     records, _, _ = await driver.execute_query(
         runtime_query
         + """
             MATCH (n:Entity)
             """
         + group_filter_query
+        + filter_query
         + """
             WITH n, vector.similarity.cosine(n.name_embedding, $search_vector) AS score
             WHERE score > $min_score
@@ -390,7 +412,9 @@ async def node_similarity_search(
                 n.name AS name,
                 n.name_embedding AS name_embedding,
                 n.created_at AS created_at,
-                n.summary AS summary
+                n.summary AS summary,
+                labels(n) AS labels,
+                properties(n) AS attributes
             ORDER BY score DESC
             LIMIT $limit
             """,
@@ -410,6 +434,7 @@ async def node_similarity_search(
 async def node_bfs_search(
     driver: AsyncDriver,
     bfs_origin_node_uuids: list[str] | None,
+    search_filter: SearchFilters,
     bfs_max_depth: int,
     limit: int,
 ) -> list[EntityNode]:
@@ -417,19 +442,28 @@ async def node_bfs_search(
     if bfs_origin_node_uuids is None:
         return []
+    filter_query, filter_params = node_search_filter_query_constructor(search_filter)
     records, _, _ = await driver.execute_query(
         """
             UNWIND $bfs_origin_node_uuids AS origin_uuid
             MATCH (origin:Entity|Episodic {uuid: origin_uuid})-[:RELATES_TO|MENTIONS]->{1,3}(n:Entity)
-            RETURN DISTINCT
-                n.uuid As uuid,
-                n.group_id AS group_id,
-                n.name AS name,
-                n.name_embedding AS name_embedding,
-                n.created_at AS created_at,
-                n.summary AS summary
-            LIMIT $limit
-            """,
+            WHERE n.group_id = origin.group_id
+            """
+        + filter_query
+        + """
+        RETURN DISTINCT
+            n.uuid As uuid,
+            n.group_id AS group_id,
+            n.name AS name,
+            n.name_embedding AS name_embedding,
+            n.created_at AS created_at,
+            n.summary AS summary,
+            labels(n) AS labels,
+            properties(n) AS attributes
+        LIMIT $limit
+        """,
+        filter_params,
         bfs_origin_node_uuids=bfs_origin_node_uuids,
         depth=bfs_max_depth,
         limit=limit,
@@ -531,6 +565,7 @@ async def hybrid_node_search(
     queries: list[str],
     embeddings: list[list[float]],
     driver: AsyncDriver,
+    search_filter: SearchFilters,
     group_ids: list[str] | None = None,
     limit: int = RELEVANT_SCHEMA_LIMIT,
 ) -> list[EntityNode]:
@@ -575,8 +610,14 @@ async def hybrid_node_search(
     start = time()
     results: list[list[EntityNode]] = list(
         await semaphore_gather(
-            *[node_fulltext_search(driver, q, group_ids, 2 * limit) for q in queries],
-            *[node_similarity_search(driver, e, group_ids, 2 * limit) for e in embeddings],
+            *[
+                node_fulltext_search(driver, q, search_filter, group_ids, 2 * limit)
+                for q in queries
+            ],
+            *[
+                node_similarity_search(driver, e, search_filter, group_ids, 2 * limit)
+                for e in embeddings
+            ],
         )
     )
@@ -596,6 +637,7 @@ async def hybrid_node_search(
 async def get_relevant_nodes(
     driver: AsyncDriver,
+    search_filter: SearchFilters,
     nodes: list[EntityNode],
 ) -> list[EntityNode]:
     """
@@ -627,6 +669,7 @@ async def get_relevant_nodes(
         [node.name for node in nodes],
         [node.name_embedding for node in nodes if node.name_embedding is not None],
         driver,
+        search_filter,
         [node.group_id for node in nodes],
     )

graphiti_core/utils/bulk_utils.py CHANGED Viewed

@@ -23,6 +23,7 @@ from math import ceil
 from neo4j import AsyncDriver, AsyncManagedTransaction
 from numpy import dot, sqrt
 from pydantic import BaseModel
+from typing_extensions import Any
 from graphiti_core.edges import Edge, EntityEdge, EpisodicEdge
 from graphiti_core.helpers import semaphore_gather
@@ -36,6 +37,7 @@ from graphiti_core.models.nodes.node_db_queries import (
     EPISODIC_NODE_SAVE_BULK,
 )
 from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode
+from graphiti_core.search.search_filters import SearchFilters
 from graphiti_core.search.search_utils import get_relevant_edges, get_relevant_nodes
 from graphiti_core.utils.datetime_utils import utc_now
 from graphiti_core.utils.maintenance.edge_operations import (
@@ -109,8 +111,23 @@ async def add_nodes_and_edges_bulk_tx(
     episodes = [dict(episode) for episode in episodic_nodes]
     for episode in episodes:
         episode['source'] = str(episode['source'].value)
+    nodes: list[dict[str, Any]] = []
+    for node in entity_nodes:
+        entity_data: dict[str, Any] = {
+            'uuid': node.uuid,
+            'name': node.name,
+            'name_embedding': node.name_embedding,
+            'group_id': node.group_id,
+            'summary': node.summary,
+            'created_at': node.created_at,
+        }
+        entity_data.update(node.attributes or {})
+        entity_data['labels'] = list(set(node.labels + ['Entity']))
+        nodes.append(entity_data)
     await tx.run(EPISODIC_NODE_SAVE_BULK, episodes=episodes)
-    await tx.run(ENTITY_NODE_SAVE_BULK, nodes=[dict(entity) for entity in entity_nodes])
+    await tx.run(ENTITY_NODE_SAVE_BULK, nodes=nodes)
     await tx.run(EPISODIC_EDGE_SAVE_BULK, episodic_edges=[dict(edge) for edge in episodic_edges])
     await tx.run(ENTITY_EDGE_SAVE_BULK, entity_edges=[dict(edge) for edge in entity_edges])
@@ -172,7 +189,7 @@ async def dedupe_nodes_bulk(
     existing_nodes_chunks: list[list[EntityNode]] = list(
         await semaphore_gather(
-            *[get_relevant_nodes(driver, node_chunk) for node_chunk in node_chunks]
+            *[get_relevant_nodes(driver, SearchFilters(), node_chunk) for node_chunk in node_chunks]
         )
     )

graphiti_core/utils/maintenance/node_operations.py CHANGED Viewed

@@ -14,15 +14,19 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
+import ast
 import logging
 from time import time
+import pydantic
+from pydantic import BaseModel
 from graphiti_core.helpers import MAX_REFLEXION_ITERATIONS, semaphore_gather
 from graphiti_core.llm_client import LLMClient
 from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode
 from graphiti_core.prompts import prompt_library
 from graphiti_core.prompts.dedupe_nodes import NodeDuplicate
-from graphiti_core.prompts.extract_nodes import ExtractedNodes, MissedEntities
+from graphiti_core.prompts.extract_nodes import EntityClassification, ExtractedNodes, MissedEntities
 from graphiti_core.prompts.summarize_nodes import Summary
 from graphiti_core.utils.datetime_utils import utc_now
@@ -114,6 +118,7 @@ async def extract_nodes(
     llm_client: LLMClient,
     episode: EpisodicNode,
     previous_episodes: list[EpisodicNode],
+    entity_types: dict[str, BaseModel] | None = None,
 ) -> list[EntityNode]:
     start = time()
     extracted_node_names: list[str] = []
@@ -144,15 +149,35 @@ async def extract_nodes(
             for entity in missing_entities:
                 custom_prompt += f'\n{entity},'
+    node_classification_context = {
+        'episode_content': episode.content,
+        'previous_episodes': [ep.content for ep in previous_episodes],
+        'extracted_entities': extracted_node_names,
+        'entity_types': entity_types.keys() if entity_types is not None else [],
+    }
+    node_classifications: dict[str, str | None] = {}
+    if entity_types is not None:
+        llm_response = await llm_client.generate_response(
+            prompt_library.extract_nodes.classify_nodes(node_classification_context),
+            response_model=EntityClassification,
+        )
+        response_string = llm_response.get('entity_classification', '{}')
+        node_classifications.update(ast.literal_eval(response_string))
     end = time()
     logger.debug(f'Extracted new nodes: {extracted_node_names} in {(end - start) * 1000} ms')
     # Convert the extracted data into EntityNode objects
     new_nodes = []
     for name in extracted_node_names:
+        entity_type = node_classifications.get(name)
+        labels = ['Entity'] if entity_type is None else ['Entity', entity_type]
         new_node = EntityNode(
             name=name,
             group_id=episode.group_id,
-            labels=['Entity'],
+            labels=labels,
             summary='',
             created_at=utc_now(),
         )
@@ -218,6 +243,7 @@ async def resolve_extracted_nodes(
     existing_nodes_lists: list[list[EntityNode]],
     episode: EpisodicNode | None = None,
     previous_episodes: list[EpisodicNode] | None = None,
+    entity_types: dict[str, BaseModel] | None = None,
 ) -> tuple[list[EntityNode], dict[str, str]]:
     uuid_map: dict[str, str] = {}
     resolved_nodes: list[EntityNode] = []
@@ -225,7 +251,12 @@ async def resolve_extracted_nodes(
         await semaphore_gather(
             *[
                 resolve_extracted_node(
-                    llm_client, extracted_node, existing_nodes, episode, previous_episodes
+                    llm_client,
+                    extracted_node,
+                    existing_nodes,
+                    episode,
+                    previous_episodes,
+                    entity_types,
                 )
                 for extracted_node, existing_nodes in zip(extracted_nodes, existing_nodes_lists)
             ]
@@ -245,6 +276,7 @@ async def resolve_extracted_node(
     existing_nodes: list[EntityNode],
     episode: EpisodicNode | None = None,
     previous_episodes: list[EpisodicNode] | None = None,
+    entity_types: dict[str, BaseModel] | None = None,
 ) -> tuple[EntityNode, dict[str, str]]:
     start = time()
@@ -273,19 +305,39 @@ async def resolve_extracted_node(
         'previous_episodes': [ep.content for ep in previous_episodes]
         if previous_episodes is not None
         else [],
+        'attributes': [],
     }
-    llm_response, node_summary_response = await semaphore_gather(
+    entity_type_classes: tuple[BaseModel, ...] = tuple()
+    if entity_types is not None:  # type: ignore
+        entity_type_classes = entity_type_classes + tuple(
+            filter(
+                lambda x: x is not None,  # type: ignore
+                [entity_types.get(entity_type) for entity_type in extracted_node.labels],  # type: ignore
+            )
+        )
+    for entity_type in entity_type_classes:
+        for field_name in entity_type.model_fields:
+            summary_context.get('attributes', []).append(field_name)  # type: ignore
+    entity_attributes_model = pydantic.create_model(  # type: ignore
+        'EntityAttributes',
+        __base__=entity_type_classes + (Summary,),  # type: ignore
+    )
+    llm_response, node_attributes_response = await semaphore_gather(
         llm_client.generate_response(
             prompt_library.dedupe_nodes.node(context), response_model=NodeDuplicate
         ),
         llm_client.generate_response(
             prompt_library.summarize_nodes.summarize_context(summary_context),
-            response_model=Summary,
+            response_model=entity_attributes_model,
         ),
     )
-    extracted_node.summary = node_summary_response.get('summary', '')
+    extracted_node.summary = node_attributes_response.get('summary', '')
+    extracted_node.attributes.update(node_attributes_response)
     is_duplicate: bool = llm_response.get('is_duplicate', False)
     uuid: str | None = llm_response.get('uuid', None)

{graphiti_core-0.6.1.dist-info → graphiti_core-0.7.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: graphiti-core
-Version: 0.6.1
+Version: 0.7.1
 Summary: A temporal graph building library
 License: Apache-2.0
 Author: Paul Paliychuk

{graphiti_core-0.6.1.dist-info → graphiti_core-0.7.1.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ graphiti_core/embedder/client.py,sha256=HKIlpPLnzFT81jurPkry6z8F8nxfZVfejdcfxHVU
 graphiti_core/embedder/openai.py,sha256=FzEM9rtSDK1wTb4iYKjNjjdFf8BEBTDxG2vM_E-5W-8,1621
 graphiti_core/embedder/voyage.py,sha256=7kqrLG75J3Q6cdA2Nlx1JSYtpk2141ckdl3OtDDw0vU,1882
 graphiti_core/errors.py,sha256=ddHrHGQxhwkVAtSph4AV84UoOlgwZufMczXPwB7uqPo,1795
-graphiti_core/graphiti.py,sha256=IaQ2xUM3Z1BG7ByJpznRAdg3FWtcOuIOq9YkY_JfiLE,28974
+graphiti_core/graphiti.py,sha256=BfsR_JF89_bX0D9PJ2Q2IHQrnph9hd4I7-ayGvvZxpU,29231
 graphiti_core/helpers.py,sha256=z7ApOgrm_J7hk5FN_XPAwkKyopEY943BgHjDJbSXr2s,2869
 graphiti_core/llm_client/__init__.py,sha256=PA80TSMeX-sUXITXEAxMDEt3gtfZgcJrGJUcyds1mSo,207
 graphiti_core/llm_client/anthropic_client.py,sha256=RlD6e49XvMJsTKU0krpq46gPSFm6-hfLkkq4Sfx27BE,2574
@@ -24,38 +24,38 @@ graphiti_core/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 graphiti_core/models/edges/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 graphiti_core/models/edges/edge_db_queries.py,sha256=2UoLkmazO-FJYqjc3g0LuL-pyjekzQxxed_XHVv_HZE,2671
 graphiti_core/models/nodes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-graphiti_core/models/nodes/node_db_queries.py,sha256=I0top_N23FN0U5ZbypaS5IXvtfx2zgJmKUCT_7mpUdo,2257
-graphiti_core/nodes.py,sha256=_ExaTj2HU-xDczbls4aFcLdpc8zwPZUZ8JgVOrBiEdw,16098
+graphiti_core/models/nodes/node_db_queries.py,sha256=f4_UT6XL8UDt4_CO9YIHeI8pvpw_vrutA9SYrgi6QCU,2121
+graphiti_core/nodes.py,sha256=dKllAYBvNy6uCDxvacvNoVHiEm-wJm_cIK3KKTahVkM,16709
 graphiti_core/prompts/__init__.py,sha256=EA-x9xUki9l8wnu2l8ek_oNf75-do5tq5hVq7Zbv8Kw,101
 graphiti_core/prompts/dedupe_edges.py,sha256=EuX8ngeItBzrlMBOgeHrpExzxIFHD2aoDyaX1ZniF6I,3556
 graphiti_core/prompts/dedupe_nodes.py,sha256=mqvNATL-4Vo33vaxUEZfOq6hXXOiL-ftY0zcx2G-82I,4624
 graphiti_core/prompts/eval.py,sha256=csW494kKBMvWSm2SYLIRuGgNghhwNR3YwGn3veo3g_Y,3691
 graphiti_core/prompts/extract_edge_dates.py,sha256=td2yx2wnX-nLioMa0mtla3WcRyO71_wSjemT79YZGQ0,4096
 graphiti_core/prompts/extract_edges.py,sha256=vyEdW7JAPOT_eLWUi6nRmxbvucyVoyoYX2SxXfknRUg,3467
-graphiti_core/prompts/extract_nodes.py,sha256=JXLHeL1VcFo0auGf2roVnoWu1CyZJDWxBCu6BXE9fUQ,5289
+graphiti_core/prompts/extract_nodes.py,sha256=-01MpcVd9drtmMDIpQkkzZe8YwVhedmdbZq7UNGfo24,6651
 graphiti_core/prompts/invalidate_edges.py,sha256=DV2mEyIhhjc0hdKEMFLQMeG0FiUCkv_X0ctCliYjQ2c,3577
 graphiti_core/prompts/lib.py,sha256=oxhlpGEgV15VOLEZiwirxmIJBIdfzfiyL58iyzFDskE,4254
 graphiti_core/prompts/models.py,sha256=cvx_Bv5RMFUD_5IUawYrbpOKLPHogai7_bm7YXrSz84,867
 graphiti_core/prompts/prompt_helpers.py,sha256=-9TABwIcIQUVHcNANx6wIZd-FT2DgYKyGTfx4IGYq2I,64
-graphiti_core/prompts/summarize_nodes.py,sha256=XOJykwT7LtzWk2bRquFgv4tRAU3JOkkNkWBg-mkYOKc,3593
+graphiti_core/prompts/summarize_nodes.py,sha256=ONDZdkvC7-RPaKx2geWSVjNaJAsHxRisV8tiU2ukw4k,3781
 graphiti_core/py.typed,sha256=vlmmzQOt7bmeQl9L3XJP4W6Ry0iiELepnOrinKz5KQg,79
 graphiti_core/search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-graphiti_core/search/search.py,sha256=4DaeP5aRT7ZOByDO3H5UK0edxfwQ4mzAOdFjnjwaDJs,12454
+graphiti_core/search/search.py,sha256=DX-tcIa0SiKI2HX-b_WdjGE74A8RLWQor4p90dJluUA,12643
 graphiti_core/search/search_config.py,sha256=UZN8jFA4pBlw2O5N1cuhVRBdTwMLR9N3Oyo6sQ4MDVw,3117
 graphiti_core/search/search_config_recipes.py,sha256=yUqiLnn9vFg39M8eVwjVKfBCL_ptGrfDMQ47m_Blb0g,6885
-graphiti_core/search/search_filters.py,sha256=_E_Od3hUoZm6H2UVCcxhfS34AqGF2lNx0NJPCw0gAQs,5333
-graphiti_core/search/search_utils.py,sha256=GwF7tsvjKgVXtv6q4lXA1tZn1_0izy6rHNwL8d0cYU4,24348
+graphiti_core/search/search_filters.py,sha256=4MJmCXD-blMc71xB4F9K4a72qidDCigADQ_ztdG15kc,5884
+graphiti_core/search/search_utils.py,sha256=i9qTBOZOiwnuiUNlIw9OoTYIrooBrM2unPwylGVNVq8,25657
 graphiti_core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-graphiti_core/utils/bulk_utils.py,sha256=FYal4tSspGVohNsnDoyW_YjMiscySuYPuQLPSwVCy24,14110
+graphiti_core/utils/bulk_utils.py,sha256=P4LKO46Yle4tBdNcQ3hDHcSQFaR8UBLfoL-z1M2Wua0,14690
 graphiti_core/utils/datetime_utils.py,sha256=Ti-2tnrDFRzBsbfblzsHybsM3jaDLP4-VT2t0VhpIzU,1357
 graphiti_core/utils/maintenance/__init__.py,sha256=TRY3wWWu5kn3Oahk_KKhltrWnh0NACw0FskjqF6OtlA,314
 graphiti_core/utils/maintenance/community_operations.py,sha256=gIw1M5HGgc2c3TXag5ygPPpAv5WsG-yoC8Lhmfr6FMs,10011
 graphiti_core/utils/maintenance/edge_operations.py,sha256=tNw56vN586JYZMgie6RLRTiHZ680-kWzDIxW8ucL6SU,12780
 graphiti_core/utils/maintenance/graph_data_operations.py,sha256=qds9ALk9PhpQs1CNZTZGpi70mqJ93Y2KhIh9X2r8MUI,6533
-graphiti_core/utils/maintenance/node_operations.py,sha256=lrlp27clVhWrxy2BxofTjIISZpwqNG12evHO5wNwOY8,12084
+graphiti_core/utils/maintenance/node_operations.py,sha256=gihbPEBH6StLQCSd9wSu582d4Owaw3l5JLR1IBDrnVs,14137
 graphiti_core/utils/maintenance/temporal_operations.py,sha256=RdNtubCyYhOVrvcOIq2WppHls1Q-BEjtsN8r38l-Rtc,3691
 graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-graphiti_core-0.6.1.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
-graphiti_core-0.6.1.dist-info/METADATA,sha256=T7rqCclsf8c92WTRWiYXFzWpQR36gy3whh_w-uXWjvA,10242
-graphiti_core-0.6.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-graphiti_core-0.6.1.dist-info/RECORD,,
+graphiti_core-0.7.1.dist-info/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
+graphiti_core-0.7.1.dist-info/METADATA,sha256=7jGgBXFuCT17KdyQVeSWAN1R1KQrBSd5Up92tqR30-c,10242
+graphiti_core-0.7.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+graphiti_core-0.7.1.dist-info/RECORD,,

{graphiti_core-0.6.1.dist-info → graphiti_core-0.7.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{graphiti_core-0.6.1.dist-info → graphiti_core-0.7.1.dist-info}/WHEEL RENAMED Viewed

File without changes

graphiti-core 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.6.1py3-none-any.whl → 0.7.1py3-none-any.whl