PyPI - graphiti-core - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

graphiti-core 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (17) hide show

graphiti_core/graphiti.py +87 -67
graphiti_core/llm_client/openai_client.py +0 -1
graphiti_core/prompts/dedupe_edges.py +46 -8
graphiti_core/prompts/dedupe_nodes.py +61 -13
graphiti_core/prompts/extract_edges.py +2 -1
graphiti_core/prompts/extract_nodes.py +2 -0
graphiti_core/search/search.py +8 -8
graphiti_core/search/search_utils.py +196 -54
graphiti_core/utils/bulk_utils.py +138 -20
graphiti_core/utils/maintenance/edge_operations.py +76 -9
graphiti_core/utils/maintenance/node_operations.py +87 -29
graphiti_core/utils/maintenance/temporal_operations.py +3 -4
graphiti_core/utils/utils.py +22 -1
{graphiti_core-0.1.0.dist-info → graphiti_core-0.2.1.dist-info}/METADATA +40 -38
{graphiti_core-0.1.0.dist-info → graphiti_core-0.2.1.dist-info}/RECORD +17 -17
{graphiti_core-0.1.0.dist-info → graphiti_core-0.2.1.dist-info}/LICENSE +0 -0
{graphiti_core-0.1.0.dist-info → graphiti_core-0.2.1.dist-info}/WHEEL +0 -0

graphiti_core/search/search_utils.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import asyncio
 import logging
 import re
-import typing
 from collections import defaultdict
 from time import time
+from typing import Any
-from neo4j import AsyncDriver
+from neo4j import AsyncDriver, Query
 from graphiti_core.edges import EntityEdge
 from graphiti_core.helpers import parse_db_date
@@ -66,12 +66,12 @@ async def bfs(node_ids: list[str], driver: AsyncDriver):
             r.expired_at AS expired_at,
             r.valid_at AS valid_at,
             r.invalid_at AS invalid_at
     """,
         node_ids=node_ids,
     )
-    context: dict[str, typing.Any] = {}
+    context: dict[str, Any] = {}
     for record in records:
         n_uuid = record['source_node_uuid']
@@ -96,14 +96,17 @@ async def bfs(node_ids: list[str], driver: AsyncDriver):
 async def edge_similarity_search(
-    search_vector: list[float], driver: AsyncDriver, limit=RELEVANT_SCHEMA_LIMIT
+        driver: AsyncDriver,
+        search_vector: list[float],
+        source_node_uuid: str | None,
+        target_node_uuid: str | None,
+        limit: int = RELEVANT_SCHEMA_LIMIT,
 ) -> list[EntityEdge]:
     # vector similarity search over embedded facts
-    records, _, _ = await driver.execute_query(
-        """
+    query = Query("""
                 CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
-                YIELD relationship AS r, score
-                MATCH (n)-[r:RELATES_TO]->(m)
+                YIELD relationship AS rel, score
+                MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
                 RETURN
                     r.uuid AS uuid,
                     n.uuid AS source_node_uuid,
@@ -117,8 +120,71 @@ async def edge_similarity_search(
                     r.valid_at AS valid_at,
                     r.invalid_at AS invalid_at
                 ORDER BY score DESC
-                """,
+        """)
+    if source_node_uuid is None and target_node_uuid is None:
+        query = Query("""
+                    CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
+                    YIELD relationship AS rel, score
+                    MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity)
+                    RETURN
+                        r.uuid AS uuid,
+                        n.uuid AS source_node_uuid,
+                        m.uuid AS target_node_uuid,
+                        r.created_at AS created_at,
+                        r.name AS name,
+                        r.fact AS fact,
+                        r.fact_embedding AS fact_embedding,
+                        r.episodes AS episodes,
+                        r.expired_at AS expired_at,
+                        r.valid_at AS valid_at,
+                        r.invalid_at AS invalid_at
+                    ORDER BY score DESC
+            """)
+    elif source_node_uuid is None:
+        query = Query("""
+                    CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
+                    YIELD relationship AS rel, score
+                    MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
+                    RETURN
+                        r.uuid AS uuid,
+                        n.uuid AS source_node_uuid,
+                        m.uuid AS target_node_uuid,
+                        r.created_at AS created_at,
+                        r.name AS name,
+                        r.fact AS fact,
+                        r.fact_embedding AS fact_embedding,
+                        r.episodes AS episodes,
+                        r.expired_at AS expired_at,
+                        r.valid_at AS valid_at,
+                        r.invalid_at AS invalid_at
+                    ORDER BY score DESC
+            """)
+    elif target_node_uuid is None:
+        query = Query("""
+                    CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector)
+                    YIELD relationship AS rel, score
+                    MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity)
+                    RETURN
+                        r.uuid AS uuid,
+                        n.uuid AS source_node_uuid,
+                        m.uuid AS target_node_uuid,
+                        r.created_at AS created_at,
+                        r.name AS name,
+                        r.fact AS fact,
+                        r.fact_embedding AS fact_embedding,
+                        r.episodes AS episodes,
+                        r.expired_at AS expired_at,
+                        r.valid_at AS valid_at,
+                        r.invalid_at AS invalid_at
+                    ORDER BY score DESC
+            """)
+    records, _, _ = await driver.execute_query(
+        query,
         search_vector=search_vector,
+        source_uuid=source_node_uuid,
+        target_uuid=target_node_uuid,
         limit=limit,
     )
@@ -145,7 +211,7 @@ async def edge_similarity_search(
 async def entity_similarity_search(
-    search_vector: list[float], driver: AsyncDriver, limit=RELEVANT_SCHEMA_LIMIT
+        search_vector: list[float], driver: AsyncDriver, limit=RELEVANT_SCHEMA_LIMIT
 ) -> list[EntityNode]:
     # vector similarity search over entity names
     records, _, _ = await driver.execute_query(
@@ -155,6 +221,7 @@ async def entity_similarity_search(
                 RETURN
                     n.uuid As uuid,
                     n.name AS name,
+                    n.name_embeddings AS name_embedding,
                     n.created_at AS created_at,
                     n.summary AS summary
                 ORDER BY score DESC
@@ -169,6 +236,7 @@ async def entity_similarity_search(
             EntityNode(
                 uuid=record['uuid'],
                 name=record['name'],
+                name_embedding=record['name_embedding'],
                 labels=['Entity'],
                 created_at=record['created_at'].to_native(),
                 summary=record['summary'],
@@ -179,7 +247,7 @@ async def entity_similarity_search(
 async def entity_fulltext_search(
-    query: str, driver: AsyncDriver, limit=RELEVANT_SCHEMA_LIMIT
+        query: str, driver: AsyncDriver, limit=RELEVANT_SCHEMA_LIMIT
 ) -> list[EntityNode]:
     # BM25 search to get top nodes
     fuzzy_query = re.sub(r'[^\w\s]', '', query) + '~'
@@ -187,8 +255,9 @@ async def entity_fulltext_search(
         """
     CALL db.index.fulltext.queryNodes("name_and_summary", $query) YIELD node, score
     RETURN
-        node.uuid As uuid,
+        node.uuid AS uuid,
         node.name AS name,
+        node.name_embeddings AS name_embedding,
         node.created_at AS created_at,
         node.summary AS summary
     ORDER BY score DESC
@@ -204,6 +273,7 @@ async def entity_fulltext_search(
             EntityNode(
                 uuid=record['uuid'],
                 name=record['name'],
+                name_embedding=record['name_embedding'],
                 labels=['Entity'],
                 created_at=record['created_at'].to_native(),
                 summary=record['summary'],
@@ -214,17 +284,18 @@ async def entity_fulltext_search(
 async def edge_fulltext_search(
-    query: str, driver: AsyncDriver, limit=RELEVANT_SCHEMA_LIMIT
+        driver: AsyncDriver,
+        query: str,
+        source_node_uuid: str | None,
+        target_node_uuid: str | None,
+        limit=RELEVANT_SCHEMA_LIMIT,
 ) -> list[EntityEdge]:
     # fulltext search over facts
-    fuzzy_query = re.sub(r'[^\w\s]', '', query) + '~'
-    records, _, _ = await driver.execute_query(
-        """
-                CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
-                YIELD relationship AS r, score
-                MATCH (n:Entity)-[r]->(m:Entity)
-                RETURN
+    cypher_query = Query("""
+              CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
+              YIELD relationship AS rel, score
+              MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
+              RETURN
                     r.uuid AS uuid,
                     n.uuid AS source_node_uuid,
                     m.uuid AS target_node_uuid,
@@ -237,8 +308,73 @@ async def edge_fulltext_search(
                     r.valid_at AS valid_at,
                     r.invalid_at AS invalid_at
                 ORDER BY score DESC LIMIT $limit
-                """,
+                """)
+    if source_node_uuid is None and target_node_uuid is None:
+        cypher_query = Query("""
+                  CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
+                  YIELD relationship AS rel, score
+                  MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity)
+                  RETURN
+                        r.uuid AS uuid,
+                        n.uuid AS source_node_uuid,
+                        m.uuid AS target_node_uuid,
+                        r.created_at AS created_at,
+                        r.name AS name,
+                        r.fact AS fact,
+                        r.fact_embedding AS fact_embedding,
+                        r.episodes AS episodes,
+                        r.expired_at AS expired_at,
+                        r.valid_at AS valid_at,
+                        r.invalid_at AS invalid_at
+                    ORDER BY score DESC LIMIT $limit
+                    """)
+    elif source_node_uuid is None:
+        cypher_query = Query("""
+                  CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
+                  YIELD relationship AS rel, score
+                  MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid})
+                  RETURN
+                        r.uuid AS uuid,
+                        n.uuid AS source_node_uuid,
+                        m.uuid AS target_node_uuid,
+                        r.created_at AS created_at,
+                        r.name AS name,
+                        r.fact AS fact,
+                        r.fact_embedding AS fact_embedding,
+                        r.episodes AS episodes,
+                        r.expired_at AS expired_at,
+                        r.valid_at AS valid_at,
+                        r.invalid_at AS invalid_at
+                    ORDER BY score DESC LIMIT $limit
+                    """)
+    elif target_node_uuid is None:
+        cypher_query = Query("""
+                  CALL db.index.fulltext.queryRelationships("name_and_fact", $query)
+                  YIELD relationship AS rel, score
+                  MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity)
+                  RETURN
+                        r.uuid AS uuid,
+                        n.uuid AS source_node_uuid,
+                        m.uuid AS target_node_uuid,
+                        r.created_at AS created_at,
+                        r.name AS name,
+                        r.fact AS fact,
+                        r.fact_embedding AS fact_embedding,
+                        r.episodes AS episodes,
+                        r.expired_at AS expired_at,
+                        r.valid_at AS valid_at,
+                        r.invalid_at AS invalid_at
+                    ORDER BY score DESC LIMIT $limit
+                    """)
+    fuzzy_query = re.sub(r'[^\w\s]', '', query) + '~'
+    records, _, _ = await driver.execute_query(
+        cypher_query,
         query=fuzzy_query,
+        source_uuid=source_node_uuid,
+        target_uuid=target_node_uuid,
         limit=limit,
     )
@@ -265,16 +401,16 @@ async def edge_fulltext_search(
 async def hybrid_node_search(
-    queries: list[str],
-    embeddings: list[list[float]],
-    driver: AsyncDriver,
-    limit: int | None = None,
+        queries: list[str],
+        embeddings: list[list[float]],
+        driver: AsyncDriver,
+        limit: int = RELEVANT_SCHEMA_LIMIT,
 ) -> list[EntityNode]:
     """
     Perform a hybrid search for nodes using both text queries and embeddings.
     This method combines fulltext search and vector similarity search to find
-    relevant nodes in the graph database.
+    relevant nodes in the graph database. It uses a rrf reranker.
     Parameters
     ----------
@@ -307,33 +443,31 @@ async def hybrid_node_search(
     """
     start = time()
-    relevant_nodes: list[EntityNode] = []
-    relevant_node_uuids = set()
-    results = await asyncio.gather(
-        *[entity_fulltext_search(q, driver, 2 * (limit or RELEVANT_SCHEMA_LIMIT)) for q in queries],
-        *[
-            entity_similarity_search(e, driver, 2 * (limit or RELEVANT_SCHEMA_LIMIT))
-            for e in embeddings
-        ],
+    results: list[list[EntityNode]] = list(
+        await asyncio.gather(
+            *[entity_fulltext_search(q, driver, 2 * limit) for q in queries],
+            *[entity_similarity_search(e, driver, 2 * limit) for e in embeddings],
+        )
     )
-    for result in results:
-        for node in result:
-            if node.uuid in relevant_node_uuids:
-                continue
+    node_uuid_map: dict[str, EntityNode] = {
+        node.uuid: node for result in results for node in result
+    }
+    result_uuids = [[node.uuid for node in result] for result in results]
+    ranked_uuids = rrf(result_uuids)
-            relevant_node_uuids.add(node.uuid)
-            relevant_nodes.append(node)
+    relevant_nodes: list[EntityNode] = [node_uuid_map[uuid] for uuid in ranked_uuids]
     end = time()
-    logger.info(f'Found relevant nodes: {relevant_node_uuids} in {(end - start) * 1000} ms')
+    logger.info(f'Found relevant nodes: {ranked_uuids} in {(end - start) * 1000} ms')
     return relevant_nodes
 async def get_relevant_nodes(
-    nodes: list[EntityNode],
-    driver: AsyncDriver,
+        nodes: list[EntityNode],
+        driver: AsyncDriver,
 ) -> list[EntityNode]:
     """
     Retrieve relevant nodes based on the provided list of EntityNodes.
@@ -369,8 +503,11 @@ async def get_relevant_nodes(
 async def get_relevant_edges(
-    edges: list[EntityEdge],
-    driver: AsyncDriver,
+        driver: AsyncDriver,
+        edges: list[EntityEdge],
+        source_node_uuid: str | None,
+        target_node_uuid: str | None,
+        limit: int = RELEVANT_SCHEMA_LIMIT,
 ) -> list[EntityEdge]:
     start = time()
     relevant_edges: list[EntityEdge] = []
@@ -378,11 +515,16 @@ async def get_relevant_edges(
     results = await asyncio.gather(
         *[
-            edge_similarity_search(edge.fact_embedding, driver)
+            edge_similarity_search(
+                driver, edge.fact_embedding, source_node_uuid, target_node_uuid, limit
+            )
             for edge in edges
             if edge.fact_embedding is not None
         ],
-        *[edge_fulltext_search(edge.fact, driver) for edge in edges],
+        *[
+            edge_fulltext_search(driver, edge.fact, source_node_uuid, target_node_uuid, limit)
+            for edge in edges
+        ],
     )
     for result in results:
@@ -415,18 +557,18 @@ def rrf(results: list[list[str]], rank_const=1) -> list[str]:
 async def node_distance_reranker(
-    driver: AsyncDriver, results: list[list[str]], center_node_uuid: str
+        driver: AsyncDriver, results: list[list[str]], center_node_uuid: str
 ) -> list[str]:
     # use rrf as a preliminary ranker
     sorted_uuids = rrf(results)
     scores: dict[str, float] = {}
     for uuid in sorted_uuids:
-        # Find shortest path to center node
+        # Find the shortest path to center node
         records, _, _ = await driver.execute_query(
             """
         MATCH (source:Entity)-[r:RELATES_TO {uuid: $edge_uuid}]->(target:Entity)
-        MATCH p = SHORTEST 1 (center:Entity)-[:RELATES_TO]-+(n:Entity)
+        MATCH p = SHORTEST 1 (center:Entity)-[:RELATES_TO*1..10]->(n:Entity)
         WHERE center.uuid = $center_uuid AND n.uuid IN [source.uuid, target.uuid]
         RETURN min(length(p)) AS score, source.uuid AS source_uuid, target.uuid AS target_uuid
         """,
@@ -437,8 +579,8 @@ async def node_distance_reranker(
         for record in records:
             if (
-                record['source_uuid'] == center_node_uuid
-                or record['target_uuid'] == center_node_uuid
+                    record['source_uuid'] == center_node_uuid
+                    or record['target_uuid'] == center_node_uuid
             ):
                 continue
             distance = record['score']

graphiti_core/utils/bulk_utils.py CHANGED Viewed

@@ -15,11 +15,13 @@ limitations under the License.
 """
 import asyncio
+import logging
 import typing
 from datetime import datetime
+from math import ceil
 from neo4j import AsyncDriver
-from numpy import dot
+from numpy import dot, sqrt
 from pydantic import BaseModel
 from graphiti_core.edges import Edge, EntityEdge, EpisodicEdge
@@ -39,8 +41,12 @@ from graphiti_core.utils.maintenance.node_operations import (
     dedupe_node_list,
     extract_nodes,
 )
+from graphiti_core.utils.maintenance.temporal_operations import extract_edge_dates
+from graphiti_core.utils.utils import chunk_edges_by_nodes
-CHUNK_SIZE = 15
+logger = logging.getLogger(__name__)
+CHUNK_SIZE = 10
 class RawEpisode(BaseModel):
@@ -114,27 +120,58 @@ async def dedupe_nodes_bulk(
     compressed_nodes, compressed_map = await compress_nodes(llm_client, nodes, uuid_map)
-    existing_nodes = await get_relevant_nodes(compressed_nodes, driver)
+    node_chunks = [nodes[i : i + CHUNK_SIZE] for i in range(0, len(nodes), CHUNK_SIZE)]
-    nodes, partial_uuid_map, _ = await dedupe_extracted_nodes(
-        llm_client, compressed_nodes, existing_nodes
+    existing_nodes_chunks: list[list[EntityNode]] = list(
+        await asyncio.gather(
+            *[get_relevant_nodes(node_chunk, driver) for node_chunk in node_chunks]
+        )
     )
-    compressed_map.update(partial_uuid_map)
+    results: list[tuple[list[EntityNode], dict[str, str]]] = list(
+        await asyncio.gather(
+            *[
+                dedupe_extracted_nodes(llm_client, node_chunk, existing_nodes_chunks[i])
+                for i, node_chunk in enumerate(node_chunks)
+            ]
+        )
+    )
-    return nodes, compressed_map
+    final_nodes: list[EntityNode] = []
+    for result in results:
+        final_nodes.extend(result[0])
+        partial_uuid_map = result[1]
+        compressed_map.update(partial_uuid_map)
+    return final_nodes, compressed_map
 async def dedupe_edges_bulk(
     driver: AsyncDriver, llm_client: LLMClient, extracted_edges: list[EntityEdge]
 ) -> list[EntityEdge]:
-    # Compress edges
+    # First compress edges
     compressed_edges = await compress_edges(llm_client, extracted_edges)
-    existing_edges = await get_relevant_edges(compressed_edges, driver)
+    edge_chunks = [
+        compressed_edges[i : i + CHUNK_SIZE] for i in range(0, len(compressed_edges), CHUNK_SIZE)
+    ]
-    edges = await dedupe_extracted_edges(llm_client, compressed_edges, existing_edges)
+    relevant_edges_chunks: list[list[EntityEdge]] = list(
+        await asyncio.gather(
+            *[get_relevant_edges(driver, edge_chunk, None, None) for edge_chunk in edge_chunks]
+        )
+    )
+    resolved_edge_chunks: list[list[EntityEdge]] = list(
+        await asyncio.gather(
+            *[
+                dedupe_extracted_edges(llm_client, edge_chunk, relevant_edges_chunks[i])
+                for i, edge_chunk in enumerate(edge_chunks)
+            ]
+        )
+    )
+    edges = [edge for edge_chunk in resolved_edge_chunks for edge in edge_chunk]
     return edges
@@ -154,13 +191,58 @@ def node_name_match(nodes: list[EntityNode]) -> tuple[list[EntityNode], dict[str
 async def compress_nodes(
     llm_client: LLMClient, nodes: list[EntityNode], uuid_map: dict[str, str]
 ) -> tuple[list[EntityNode], dict[str, str]]:
+    # We want to first compress the nodes by deduplicating nodes across each of the episodes added in bulk
     if len(nodes) == 0:
         return nodes, uuid_map
-    anchor = nodes[0]
-    nodes.sort(key=lambda node: dot(anchor.name_embedding or [], node.name_embedding or []))
+    # Our approach involves us deduplicating chunks of nodes in parallel.
+    # We want n chunks of size n so that n ** 2 == len(nodes).
+    # We want chunk sizes to be at least 10 for optimizing LLM processing time
+    chunk_size = max(int(sqrt(len(nodes))), CHUNK_SIZE)
-    node_chunks = [nodes[i : i + CHUNK_SIZE] for i in range(0, len(nodes), CHUNK_SIZE)]
+    # First calculate similarity scores between nodes
+    similarity_scores: list[tuple[int, int, float]] = [
+        (i, j, dot(n.name_embedding or [], m.name_embedding or []))
+        for i, n in enumerate(nodes)
+        for j, m in enumerate(nodes[:i])
+    ]
+    # We now sort by semantic similarity
+    similarity_scores.sort(key=lambda score_tuple: score_tuple[2])
+    # initialize our chunks based on chunk size
+    node_chunks: list[list[EntityNode]] = [[] for _ in range(ceil(len(nodes) / chunk_size))]
+    # Draft the most similar nodes into the same chunk
+    while len(similarity_scores) > 0:
+        i, j, _ = similarity_scores.pop()
+        # determine if any of the nodes have already been drafted into a chunk
+        n = nodes[i]
+        m = nodes[j]
+        # make sure the shortest chunks get preference
+        node_chunks.sort(reverse=True, key=lambda chunk: len(chunk))
+        n_chunk = max([i if n in chunk else -1 for i, chunk in enumerate(node_chunks)])
+        m_chunk = max([i if m in chunk else -1 for i, chunk in enumerate(node_chunks)])
+        # both nodes already in a chunk
+        if n_chunk > -1 and m_chunk > -1:
+            continue
+        # n has a chunk and that chunk is not full
+        elif n_chunk > -1 and len(node_chunks[n_chunk]) < chunk_size:
+            # put m in the same chunk as n
+            node_chunks[n_chunk].append(m)
+        # m has a chunk and that chunk is not full
+        elif m_chunk > -1 and len(node_chunks[m_chunk]) < chunk_size:
+            # put n in the same chunk as m
+            node_chunks[m_chunk].append(n)
+        # neither node has a chunk or the chunk is full
+        else:
+            # add both nodes to the shortest chunk
+            node_chunks[-1].extend([n, m])
     results = await asyncio.gather(*[dedupe_node_list(llm_client, chunk) for chunk in node_chunks])
@@ -181,13 +263,9 @@ async def compress_nodes(
 async def compress_edges(llm_client: LLMClient, edges: list[EntityEdge]) -> list[EntityEdge]:
     if len(edges) == 0:
         return edges
-    anchor = edges[0]
-    edges.sort(
-        key=lambda embedding: dot(anchor.fact_embedding or [], embedding.fact_embedding or [])
-    )
-    edge_chunks = [edges[i : i + CHUNK_SIZE] for i in range(0, len(edges), CHUNK_SIZE)]
+    # We only want to dedupe edges that are between the same pair of nodes
+    # We build a map of the edges based on their source and target nodes.
+    edge_chunks = chunk_edges_by_nodes(edges)
     results = await asyncio.gather(*[dedupe_edge_list(llm_client, chunk) for chunk in edge_chunks])
@@ -225,3 +303,43 @@ def resolve_edge_pointers(edges: list[E], uuid_map: dict[str, str]):
         edge.target_node_uuid = uuid_map.get(target_uuid, target_uuid)
     return edges
+async def extract_edge_dates_bulk(
+    llm_client: LLMClient,
+    extracted_edges: list[EntityEdge],
+    episode_pairs: list[tuple[EpisodicNode, list[EpisodicNode]]],
+) -> list[EntityEdge]:
+    edges: list[EntityEdge] = []
+    # confirm that all of our edges have at least one episode
+    for edge in extracted_edges:
+        if edge.episodes is not None and len(edge.episodes) > 0:
+            edges.append(edge)
+    episode_uuid_map: dict[str, tuple[EpisodicNode, list[EpisodicNode]]] = {
+        episode.uuid: (episode, previous_episodes) for episode, previous_episodes in episode_pairs
+    }
+    results = await asyncio.gather(
+        *[
+            extract_edge_dates(
+                llm_client,
+                edge,
+                episode_uuid_map[edge.episodes[0]][0],  # type: ignore
+                episode_uuid_map[edge.episodes[0]][1],  # type: ignore
+            )
+            for edge in edges
+        ]
+    )
+    for i, result in enumerate(results):
+        valid_at = result[0]
+        invalid_at = result[1]
+        edge = edges[i]
+        edge.valid_at = valid_at
+        edge.invalid_at = invalid_at
+        if edge.invalid_at:
+            edge.expired_at = datetime.now()
+    return edges

graphiti-core 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl