PyPI - graphiti-core - Versions diffs - 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

graphiti-core 0.15.1py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (13) hide show

graphiti_core/cross_encoder/gemini_reranker_client.py +4 -1
graphiti_core/embedder/gemini.py +17 -5
graphiti_core/graphiti.py +106 -52
graphiti_core/llm_client/gemini_client.py +8 -5
graphiti_core/prompts/dedupe_edges.py +5 -4
graphiti_core/prompts/dedupe_nodes.py +3 -3
graphiti_core/utils/bulk_utils.py +211 -255
graphiti_core/utils/maintenance/edge_operations.py +34 -120
graphiti_core/utils/maintenance/node_operations.py +11 -58
{graphiti_core-0.15.1.dist-info → graphiti_core-0.16.0.dist-info}/METADATA +23 -1
{graphiti_core-0.15.1.dist-info → graphiti_core-0.16.0.dist-info}/RECORD +13 -13
{graphiti_core-0.15.1.dist-info → graphiti_core-0.16.0.dist-info}/WHEEL +0 -0
{graphiti_core-0.15.1.dist-info → graphiti_core-0.16.0.dist-info}/licenses/LICENSE +0 -0

graphiti_core/cross_encoder/gemini_reranker_client.py CHANGED Viewed

@@ -41,6 +41,10 @@ DEFAULT_MODEL = 'gemini-2.5-flash-lite-preview-06-17'
 class GeminiRerankerClient(CrossEncoderClient):
+    """
+    Google Gemini Reranker Client
+    """
     def __init__(
         self,
         config: LLMConfig | None = None,
@@ -57,7 +61,6 @@ class GeminiRerankerClient(CrossEncoderClient):
             config (LLMConfig | None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.
             client (genai.Client | None): An optional async client instance to use. If not provided, a new genai.Client is created.
         """
         if config is None:
             config = LLMConfig()

graphiti_core/embedder/gemini.py CHANGED Viewed

@@ -47,15 +47,27 @@ class GeminiEmbedder(EmbedderClient):
     Google Gemini Embedder Client
     """
-    def __init__(self, config: GeminiEmbedderConfig | None = None):
+    def __init__(
+        self,
+        config: GeminiEmbedderConfig | None = None,
+        client: 'genai.Client | None' = None,
+    ):
+        """
+        Initialize the GeminiEmbedder with the provided configuration and client.
+        Args:
+            config (GeminiEmbedderConfig | None): The configuration for the GeminiEmbedder, including API key, model, base URL, temperature, and max tokens.
+            client (genai.Client | None): An optional async client instance to use. If not provided, a new genai.Client is created.
+        """
         if config is None:
             config = GeminiEmbedderConfig()
         self.config = config
-        # Configure the Gemini API
-        self.client = genai.Client(
-            api_key=config.api_key,
-        )
+        if client is None:
+            self.client = genai.Client(api_key=config.api_key)
+        else:
+            self.client = client
     async def create(
         self, input_data: str | list[str] | Iterable[int] | Iterable[Iterable[int]]

graphiti_core/graphiti.py CHANGED Viewed

@@ -57,7 +57,6 @@ from graphiti_core.utils.bulk_utils import (
     add_nodes_and_edges_bulk,
     dedupe_edges_bulk,
     dedupe_nodes_bulk,
-    extract_edge_dates_bulk,
     extract_nodes_and_edges_bulk,
     resolve_edge_pointers,
     retrieve_previous_episodes_bulk,
@@ -508,7 +507,7 @@ class Graphiti:
             entity_edges = resolved_edges + invalidated_edges + duplicate_of_edges
-            episodic_edges = build_episodic_edges(nodes, episode, now)
+            episodic_edges = build_episodic_edges(nodes, episode.uuid, now)
             episode.entity_edges = [edge.uuid for edge in entity_edges]
@@ -536,8 +535,16 @@ class Graphiti:
         except Exception as e:
             raise e
-    #### WIP: USE AT YOUR OWN RISK ####
-    async def add_episode_bulk(self, bulk_episodes: list[RawEpisode], group_id: str = ''):
+    ##### EXPERIMENTAL #####
+    async def add_episode_bulk(
+        self,
+        bulk_episodes: list[RawEpisode],
+        group_id: str = '',
+        entity_types: dict[str, BaseModel] | None = None,
+        excluded_entity_types: list[str] | None = None,
+        edge_types: dict[str, BaseModel] | None = None,
+        edge_type_map: dict[tuple[str, str], list[str]] | None = None,
+    ):
         """
         Process multiple episodes in bulk and update the graph.
@@ -580,8 +587,17 @@ class Graphiti:
             validate_group_id(group_id)
+            # Create default edge type map
+            edge_type_map_default = (
+                {('Entity', 'Entity'): list(edge_types.keys())}
+                if edge_types is not None
+                else {('Entity', 'Entity'): []}
+            )
             episodes = [
-                EpisodicNode(
+                await EpisodicNode.get_by_uuid(self.driver, episode.uuid)
+                if episode.uuid is not None
+                else EpisodicNode(
                     name=episode.name,
                     labels=[],
                     source=episode.source,
@@ -594,68 +610,106 @@ class Graphiti:
                 for episode in bulk_episodes
             ]
-            # Save all the episodes
-            await semaphore_gather(
-                *[episode.save(self.driver) for episode in episodes],
-                max_coroutines=self.max_coroutines,
+            episodes_by_uuid: dict[str, EpisodicNode] = {
+                episode.uuid: episode for episode in episodes
+            }
+            # Save all episodes
+            await add_nodes_and_edges_bulk(
+                driver=self.driver,
+                episodic_nodes=episodes,
+                episodic_edges=[],
+                entity_nodes=[],
+                entity_edges=[],
+                embedder=self.embedder,
             )
             # Get previous episode context for each episode
-            episode_pairs = await retrieve_previous_episodes_bulk(self.driver, episodes)
+            episode_context = await retrieve_previous_episodes_bulk(self.driver, episodes)
-            # Extract all nodes and edges
-            (
-                extracted_nodes,
-                extracted_edges,
-                episodic_edges,
-            ) = await extract_nodes_and_edges_bulk(self.clients, episode_pairs, None, None)
-            # Generate embeddings
-            await semaphore_gather(
-                *[node.generate_name_embedding(self.embedder) for node in extracted_nodes],
-                *[edge.generate_embedding(self.embedder) for edge in extracted_edges],
-                max_coroutines=self.max_coroutines,
+            # Extract all nodes and edges for each episode
+            extracted_nodes_bulk, extracted_edges_bulk = await extract_nodes_and_edges_bulk(
+                self.clients,
+                episode_context,
+                edge_type_map=edge_type_map or edge_type_map_default,
+                edge_types=edge_types,
+                entity_types=entity_types,
+                excluded_entity_types=excluded_entity_types,
             )
-            # Dedupe extracted nodes, compress extracted edges
-            (nodes, uuid_map), extracted_edges_timestamped = await semaphore_gather(
-                dedupe_nodes_bulk(self.driver, self.llm_client, extracted_nodes),
-                extract_edge_dates_bulk(self.llm_client, extracted_edges, episode_pairs),
-                max_coroutines=self.max_coroutines,
+            # Dedupe extracted nodes in memory
+            nodes_by_episode, uuid_map = await dedupe_nodes_bulk(
+                self.clients, extracted_nodes_bulk, episode_context, entity_types
             )
-            # save nodes to KG
-            await semaphore_gather(
-                *[node.save(self.driver) for node in nodes],
-                max_coroutines=self.max_coroutines,
-            )
+            episodic_edges: list[EpisodicEdge] = []
+            for episode_uuid, nodes in nodes_by_episode.items():
+                episodic_edges.extend(build_episodic_edges(nodes, episode_uuid, now))
             # re-map edge pointers so that they don't point to discard dupe nodes
-            extracted_edges_with_resolved_pointers: list[EntityEdge] = resolve_edge_pointers(
-                extracted_edges_timestamped, uuid_map
-            )
-            episodic_edges_with_resolved_pointers: list[EpisodicEdge] = resolve_edge_pointers(
-                episodic_edges, uuid_map
-            )
+            extracted_edges_bulk_updated: list[list[EntityEdge]] = [
+                resolve_edge_pointers(edges, uuid_map) for edges in extracted_edges_bulk
+            ]
-            # save episodic edges to KG
-            await semaphore_gather(
-                *[edge.save(self.driver) for edge in episodic_edges_with_resolved_pointers],
-                max_coroutines=self.max_coroutines,
+            # Dedupe extracted edges in memory
+            edges_by_episode = await dedupe_edges_bulk(
+                self.clients,
+                extracted_edges_bulk_updated,
+                episode_context,
+                [],
+                edge_types or {},
+                edge_type_map or edge_type_map_default,
             )
-            # Dedupe extracted edges
-            edges = await dedupe_edges_bulk(
-                self.driver, self.llm_client, extracted_edges_with_resolved_pointers
+            # Extract node attributes
+            nodes_by_uuid: dict[str, EntityNode] = {
+                node.uuid: node for nodes in nodes_by_episode.values() for node in nodes
+            }
+            extract_attributes_params: list[tuple[EntityNode, list[EpisodicNode]]] = []
+            for node in nodes_by_uuid.values():
+                episode_uuids: list[str] = []
+                for episode_uuid, mentioned_nodes in nodes_by_episode.items():
+                    for mentioned_node in mentioned_nodes:
+                        if node.uuid == mentioned_node.uuid:
+                            episode_uuids.append(episode_uuid)
+                            break
+                episode_mentions: list[EpisodicNode] = [
+                    episodes_by_uuid[episode_uuid] for episode_uuid in episode_uuids
+                ]
+                episode_mentions.sort(key=lambda x: x.valid_at, reverse=True)
+                extract_attributes_params.append((node, episode_mentions))
+            new_hydrated_nodes: list[list[EntityNode]] = await semaphore_gather(
+                *[
+                    extract_attributes_from_nodes(
+                        self.clients,
+                        [params[0]],
+                        params[1][0],
+                        params[1][0:],
+                        entity_types,
+                    )
+                    for params in extract_attributes_params
+                ]
             )
-            logger.debug(f'extracted edge length: {len(edges)}')
-            # invalidate edges
+            hydrated_nodes = [node for nodes in new_hydrated_nodes for node in nodes]
-            # save edges to KG
-            await semaphore_gather(
-                *[edge.save(self.driver) for edge in edges],
-                max_coroutines=self.max_coroutines,
+            # TODO: Resolve nodes and edges against the existing graph
+            edges_by_uuid: dict[str, EntityEdge] = {
+                edge.uuid: edge for edges in edges_by_episode.values() for edge in edges
+            }
+            # save data to KG
+            await add_nodes_and_edges_bulk(
+                self.driver,
+                episodes,
+                episodic_edges,
+                hydrated_nodes,
+                list(edges_by_uuid.values()),
+                self.embedder,
             )
             end = time()
@@ -828,7 +882,7 @@ class Graphiti:
             await get_edge_invalidation_candidates(self.driver, [updated_edge], SearchFilters())
         )[0]
-        resolved_edge, invalidated_edges = await resolve_extracted_edge(
+        resolved_edge, invalidated_edges, _ = await resolve_extracted_edge(
             self.llm_client,
             updated_edge,
             related_edges,

graphiti_core/llm_client/gemini_client.py CHANGED Viewed

@@ -76,6 +76,7 @@ class GeminiClient(LLMClient):
         cache: bool = False,
         max_tokens: int = DEFAULT_MAX_TOKENS,
         thinking_config: types.ThinkingConfig | None = None,
+        client: 'genai.Client | None' = None,
     ):
         """
         Initialize the GeminiClient with the provided configuration, cache setting, and optional thinking config.
@@ -85,7 +86,7 @@ class GeminiClient(LLMClient):
             cache (bool): Whether to use caching for responses. Defaults to False.
             thinking_config (types.ThinkingConfig | None): Optional thinking configuration for models that support it.
                 Only use with models that support thinking (gemini-2.5+). Defaults to None.
+            client (genai.Client | None): An optional async client instance to use. If not provided, a new genai.Client is created.
         """
         if config is None:
             config = LLMConfig()
@@ -93,10 +94,12 @@ class GeminiClient(LLMClient):
         super().__init__(config, cache)
         self.model = config.model
-        # Configure the Gemini API
-        self.client = genai.Client(
-            api_key=config.api_key,
-        )
+        if client is None:
+            self.client = genai.Client(api_key=config.api_key)
+        else:
+            self.client = client
         self.max_tokens = max_tokens
         self.thinking_config = thinking_config

graphiti_core/prompts/dedupe_edges.py CHANGED Viewed

@@ -23,9 +23,9 @@ from .models import Message, PromptFunction, PromptVersion
 class EdgeDuplicate(BaseModel):
-    duplicate_fact_id: int = Field(
+    duplicate_facts: list[int] = Field(
         ...,
-        description='id of the duplicate fact. If no duplicate facts are found, default to -1.',
+        description='List of ids of any duplicate facts. If no duplicate facts are found, default to empty list.',
     )
     contradicted_facts: list[int] = Field(
         ...,
@@ -75,8 +75,9 @@ def edge(context: dict[str, Any]) -> list[Message]:
         </NEW EDGE>
         Task:
-        If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact.
-        If the NEW EDGE is not a duplicate of any of the EXISTING EDGES, return -1.
+        If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact
+            as part of the list of duplicate_facts.
+        If the NEW EDGE is not a duplicate of any of the EXISTING EDGES, return an empty list.
         Guidelines:
         1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.

graphiti_core/prompts/dedupe_nodes.py CHANGED Viewed

@@ -32,9 +32,9 @@ class NodeDuplicate(BaseModel):
         ...,
         description='Name of the entity. Should be the most complete and descriptive name of the entity. Do not include any JSON formatting in the Entity name such as {}.',
     )
-    additional_duplicates: list[int] = Field(
+    duplicates: list[int] = Field(
         ...,
-        description='idx of additional duplicate entities. Use this list if the entity has multiple duplicates among existing entities.',
+        description='idx of all duplicate entities.',
     )
@@ -94,7 +94,7 @@ def node(context: dict[str, Any]) -> list[Message]:
          1. Compare `new_entity` against each item in `existing_entities`.
          2. If it refers to the same real‐world object or concept, collect its index.
          3. Let `duplicate_idx` = the *first* collected index, or –1 if none.
-         4. Let `additional_duplicates` = the list of *any other* collected indices (empty list if none).
+         4. Let `duplicates` = the list of *all* collected indices (empty list if none).
         Also return the full name of the NEW ENTITY (whether it is the name of the NEW ENTITY, a node it
         is a duplicate of, or a combination of the two).

graphiti-core 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.15.1py3-none-any.whl → 0.16.0py3-none-any.whl