PyPI - graphiti-core - Versions diffs - 0.13.2__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

graphiti-core 0.13.2py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (23) hide show

graphiti_core/cross_encoder/__init__.py +2 -1
graphiti_core/cross_encoder/gemini_reranker_client.py +146 -0
graphiti_core/driver/__init__.py +4 -1
graphiti_core/driver/falkordb_driver.py +47 -21
graphiti_core/driver/neo4j_driver.py +5 -3
graphiti_core/embedder/voyage.py +1 -1
graphiti_core/graphiti.py +79 -5
graphiti_core/helpers.py +38 -2
graphiti_core/llm_client/gemini_client.py +135 -23
graphiti_core/nodes.py +12 -2
graphiti_core/search/search_filters.py +4 -5
graphiti_core/search/search_utils.py +2 -8
graphiti_core/telemetry/__init__.py +9 -0
graphiti_core/telemetry/telemetry.py +117 -0
graphiti_core/utils/bulk_utils.py +5 -2
graphiti_core/utils/maintenance/community_operations.py +1 -1
graphiti_core/utils/maintenance/edge_operations.py +1 -1
graphiti_core/utils/maintenance/graph_data_operations.py +3 -5
graphiti_core/utils/maintenance/node_operations.py +6 -0
{graphiti_core-0.13.2.dist-info → graphiti_core-0.15.0.dist-info}/METADATA +167 -52
{graphiti_core-0.13.2.dist-info → graphiti_core-0.15.0.dist-info}/RECORD +28 -25
{graphiti_core-0.13.2.dist-info → graphiti_core-0.15.0.dist-info}/WHEEL +1 -1
{graphiti_core-0.13.2.dist-info → graphiti_core-0.15.0.dist-info/licenses}/LICENSE +0 -0

graphiti_core/cross_encoder/__init__.py CHANGED Viewed

@@ -15,6 +15,7 @@ limitations under the License.
 """
 from .client import CrossEncoderClient
+from .gemini_reranker_client import GeminiRerankerClient
 from .openai_reranker_client import OpenAIRerankerClient
-__all__ = ['CrossEncoderClient', 'OpenAIRerankerClient']
+__all__ = ['CrossEncoderClient', 'GeminiRerankerClient', 'OpenAIRerankerClient']

graphiti_core/cross_encoder/gemini_reranker_client.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+Copyright 2024, Zep Software, Inc.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import logging
+import re
+from google import genai  # type: ignore
+from google.genai import types  # type: ignore
+from ..helpers import semaphore_gather
+from ..llm_client import LLMConfig, RateLimitError
+from .client import CrossEncoderClient
+logger = logging.getLogger(__name__)
+DEFAULT_MODEL = 'gemini-2.5-flash-lite-preview-06-17'
+class GeminiRerankerClient(CrossEncoderClient):
+    def __init__(
+        self,
+        config: LLMConfig | None = None,
+        client: genai.Client | None = None,
+    ):
+        """
+        Initialize the GeminiRerankerClient with the provided configuration and client.
+        The Gemini Developer API does not yet support logprobs. Unlike the OpenAI reranker,
+        this reranker uses the Gemini API to perform direct relevance scoring of passages.
+        Each passage is scored individually on a 0-100 scale.
+        Args:
+            config (LLMConfig | None): The configuration for the LLM client, including API key, model, base URL, temperature, and max tokens.
+            client (genai.Client | None): An optional async client instance to use. If not provided, a new genai.Client is created.
+        """
+        if config is None:
+            config = LLMConfig()
+        self.config = config
+        if client is None:
+            self.client = genai.Client(api_key=config.api_key)
+        else:
+            self.client = client
+    async def rank(self, query: str, passages: list[str]) -> list[tuple[str, float]]:
+        """
+        Rank passages based on their relevance to the query using direct scoring.
+        Each passage is scored individually on a 0-100 scale, then normalized to [0,1].
+        """
+        if len(passages) <= 1:
+            return [(passage, 1.0) for passage in passages]
+        # Generate scoring prompts for each passage
+        scoring_prompts = []
+        for passage in passages:
+            prompt = f"""Rate how well this passage answers or relates to the query. Use a scale from 0 to 100.
+Query: {query}
+Passage: {passage}
+Provide only a number between 0 and 100 (no explanation, just the number):"""
+            scoring_prompts.append(
+                [
+                    types.Content(
+                        role='user',
+                        parts=[types.Part.from_text(text=prompt)],
+                    ),
+                ]
+            )
+        try:
+            # Execute all scoring requests concurrently - O(n) API calls
+            responses = await semaphore_gather(
+                *[
+                    self.client.aio.models.generate_content(
+                        model=self.config.model or DEFAULT_MODEL,
+                        contents=prompt_messages,  # type: ignore
+                        config=types.GenerateContentConfig(
+                            system_instruction='You are an expert at rating passage relevance. Respond with only a number from 0-100.',
+                            temperature=0.0,
+                            max_output_tokens=3,
+                        ),
+                    )
+                    for prompt_messages in scoring_prompts
+                ]
+            )
+            # Extract scores and create results
+            results = []
+            for passage, response in zip(passages, responses, strict=True):
+                try:
+                    if hasattr(response, 'text') and response.text:
+                        # Extract numeric score from response
+                        score_text = response.text.strip()
+                        # Handle cases where model might return non-numeric text
+                        score_match = re.search(r'\b(\d{1,3})\b', score_text)
+                        if score_match:
+                            score = float(score_match.group(1))
+                            # Normalize to [0, 1] range and clamp to valid range
+                            normalized_score = max(0.0, min(1.0, score / 100.0))
+                            results.append((passage, normalized_score))
+                        else:
+                            logger.warning(
+                                f'Could not extract numeric score from response: {score_text}'
+                            )
+                            results.append((passage, 0.0))
+                    else:
+                        logger.warning('Empty response from Gemini for passage scoring')
+                        results.append((passage, 0.0))
+                except (ValueError, AttributeError) as e:
+                    logger.warning(f'Error parsing score from Gemini response: {e}')
+                    results.append((passage, 0.0))
+            # Sort by score in descending order (highest relevance first)
+            results.sort(reverse=True, key=lambda x: x[1])
+            return results
+        except Exception as e:
+            # Check if it's a rate limit error based on Gemini API error codes
+            error_message = str(e).lower()
+            if (
+                'rate limit' in error_message
+                or 'quota' in error_message
+                or 'resource_exhausted' in error_message
+                or '429' in str(e)
+            ):
+                raise RateLimitError from e
+            logger.error(f'Error in generating LLM response: {e}')
+            raise

graphiti_core/driver/__init__.py CHANGED Viewed

@@ -14,4 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-__all__ = ['GraphDriver', 'Neo4jDriver', 'FalkorDriver']
+from falkordb import FalkorDB
+from neo4j import Neo4jDriver
+__all__ = ['Neo4jDriver', 'FalkorDB']

graphiti_core/driver/falkordb_driver.py CHANGED Viewed

@@ -15,7 +15,6 @@ limitations under the License.
 """
 import logging
-from collections.abc import Coroutine
 from datetime import datetime
 from typing import Any
@@ -52,11 +51,11 @@ class FalkorDriverSession(GraphDriverSession):
         if isinstance(query, list):
             for cypher, params in query:
                 params = convert_datetimes_to_strings(params)
-                await self.graph.query(str(cypher), params)
+                await self.graph.query(str(cypher), params)  # type: ignore[reportUnknownArgumentType]
         else:
             params = dict(kwargs)
             params = convert_datetimes_to_strings(params)
-            await self.graph.query(str(query), params)
+            await self.graph.query(str(query), params)  # type: ignore[reportUnknownArgumentType]
         # Assuming `graph.query` is async (ideal); otherwise, wrap in executor
         return None
@@ -66,22 +65,30 @@ class FalkorDriver(GraphDriver):
     def __init__(
         self,
-        uri: str,
-        user: str,
-        password: str,
+        host: str = 'localhost',
+        port: int = 6379,
+        username: str | None = None,
+        password: str | None = None,
+        falkor_db: FalkorDB | None = None,
     ):
-        super().__init__()
-        uri_parts = uri.split('://', 1)
-        uri = f'{uri_parts[0]}://{user}:{password}@{uri_parts[1]}'
+        """
+        Initialize the FalkorDB driver.
-        self.client = FalkorDB(
-            host='your-db.falkor.cloud', port=6380, password='your_password', ssl=True
-        )
+        FalkorDB is a multi-tenant graph database.
+        To connect, provide the host and port.
+        The default parameters assume a local (on-premises) FalkorDB instance.
+        """
+        super().__init__()
+        if falkor_db is not None:
+            # If a FalkorDB instance is provided, use it directly
+            self.client = falkor_db
+        else:
+            self.client = FalkorDB(host=host, port=port, username=username, password=password)
     def _get_graph(self, graph_name: str | None) -> FalkorGraph:
-        # FalkorDB requires a non-None database name for multi-tenant graphs; the default is "DEFAULT_DATABASE"
+        # FalkorDB requires a non-None database name for multi-tenant graphs; the default is DEFAULT_DATABASE
         if graph_name is None:
-            graph_name = 'DEFAULT_DATABASE'
+            graph_name = DEFAULT_DATABASE
         return self.client.select_graph(graph_name)
     async def execute_query(self, cypher_query_, **kwargs: Any):
@@ -92,7 +99,7 @@ class FalkorDriver(GraphDriver):
         params = convert_datetimes_to_strings(dict(kwargs))
         try:
-            result = await graph.query(cypher_query_, params)
+            result = await graph.query(cypher_query_, params)  # type: ignore[reportUnknownArgumentType]
         except Exception as e:
             if 'already indexed' in str(e):
                 # check if index already exists
@@ -102,17 +109,36 @@ class FalkorDriver(GraphDriver):
             raise
         # Convert the result header to a list of strings
-        header = [h[1].decode('utf-8') for h in result.header]
-        return result.result_set, header, None
+        header = [h[1] for h in result.header]
+        # Convert FalkorDB's result format (list of lists) to the format expected by Graphiti (list of dicts)
+        records = []
+        for row in result.result_set:
+            record = {}
+            for i, field_name in enumerate(header):
+                if i < len(row):
+                    record[field_name] = row[i]
+                else:
+                    # If there are more fields in header than values in row, set to None
+                    record[field_name] = None
+            records.append(record)
+        return records, header, None
     def session(self, database: str | None) -> GraphDriverSession:
         return FalkorDriverSession(self._get_graph(database))
     async def close(self) -> None:
-        await self.client.connection.close()
-    async def delete_all_indexes(self, database_: str = DEFAULT_DATABASE) -> Coroutine:
-        return self.execute_query(
+        """Close the driver connection."""
+        if hasattr(self.client, 'aclose'):
+            await self.client.aclose()  # type: ignore[reportUnknownMemberType]
+        elif hasattr(self.client.connection, 'aclose'):
+            await self.client.connection.aclose()
+        elif hasattr(self.client.connection, 'close'):
+            await self.client.connection.close()
+    async def delete_all_indexes(self, database_: str = DEFAULT_DATABASE) -> None:
+        await self.execute_query(
             'CALL db.indexes() YIELD name DROP INDEX name',
             database_=database_,
         )

graphiti_core/driver/neo4j_driver.py CHANGED Viewed

@@ -18,7 +18,7 @@ import logging
 from collections.abc import Coroutine
 from typing import Any
-from neo4j import AsyncGraphDatabase
+from neo4j import AsyncGraphDatabase, EagerResult
 from typing_extensions import LiteralString
 from graphiti_core.driver.driver import GraphDriver, GraphDriverSession
@@ -42,7 +42,7 @@ class Neo4jDriver(GraphDriver):
             auth=(user or '', password or ''),
         )
-    async def execute_query(self, cypher_query_: LiteralString, **kwargs: Any) -> Coroutine:
+    async def execute_query(self, cypher_query_: LiteralString, **kwargs: Any) -> EagerResult:
         params = kwargs.pop('params', None)
         result = await self.client.execute_query(cypher_query_, parameters_=params, **kwargs)
@@ -54,7 +54,9 @@ class Neo4jDriver(GraphDriver):
     async def close(self) -> None:
         return await self.client.close()
-    def delete_all_indexes(self, database_: str = DEFAULT_DATABASE) -> Coroutine:
+    def delete_all_indexes(
+        self, database_: str = DEFAULT_DATABASE
+    ) -> Coroutine[Any, Any, EagerResult]:
         return self.client.execute_query(
             'CALL db.indexes() YIELD name DROP INDEX name',
             database_=database_,

graphiti_core/embedder/voyage.py CHANGED Viewed

@@ -38,7 +38,7 @@ class VoyageAIEmbedder(EmbedderClient):
         if config is None:
             config = VoyageAIEmbedderConfig()
         self.config = config
-        self.client = voyageai.AsyncClient(api_key=config.api_key)
+        self.client = voyageai.AsyncClient(api_key=config.api_key)  # type: ignore[reportUnknownMemberType]
     async def create(
         self, input_data: str | list[str] | Iterable[int] | Iterable[Iterable[int]]

graphiti_core/graphiti.py CHANGED Viewed

@@ -29,7 +29,12 @@ from graphiti_core.driver.neo4j_driver import Neo4jDriver
 from graphiti_core.edges import EntityEdge, EpisodicEdge
 from graphiti_core.embedder import EmbedderClient, OpenAIEmbedder
 from graphiti_core.graphiti_types import GraphitiClients
-from graphiti_core.helpers import DEFAULT_DATABASE, semaphore_gather, validate_group_id
+from graphiti_core.helpers import (
+    DEFAULT_DATABASE,
+    semaphore_gather,
+    validate_excluded_entity_types,
+    validate_group_id,
+)
 from graphiti_core.llm_client import LLMClient, OpenAIClient
 from graphiti_core.nodes import CommunityNode, EntityNode, EpisodeType, EpisodicNode
 from graphiti_core.search.search import SearchConfig, search
@@ -46,6 +51,7 @@ from graphiti_core.search.search_utils import (
     get_mentioned_nodes,
     get_relevant_edges,
 )
+from graphiti_core.telemetry import capture_event
 from graphiti_core.utils.bulk_utils import (
     RawEpisode,
     add_nodes_and_edges_bulk,
@@ -95,7 +101,7 @@ class AddEpisodeResults(BaseModel):
 class Graphiti:
     def __init__(
         self,
-        uri: str,
+        uri: str | None = None,
         user: str | None = None,
         password: str | None = None,
         llm_client: LLMClient | None = None,
@@ -156,7 +162,12 @@ class Graphiti:
         Graphiti if you're using the default OpenAIClient.
         """
-        self.driver = graph_driver if graph_driver else Neo4jDriver(uri, user, password)
+        if graph_driver:
+            self.driver = graph_driver
+        else:
+            if uri is None:
+                raise ValueError("uri must be provided when graph_driver is None")
+            self.driver = Neo4jDriver(uri, user, password)
         self.database = DEFAULT_DATABASE
         self.store_raw_episode_content = store_raw_episode_content
@@ -181,6 +192,61 @@ class Graphiti:
             cross_encoder=self.cross_encoder,
         )
+        # Capture telemetry event
+        self._capture_initialization_telemetry()
+    def _capture_initialization_telemetry(self):
+        """Capture telemetry event for Graphiti initialization."""
+        try:
+            # Detect provider types from class names
+            llm_provider = self._get_provider_type(self.llm_client)
+            embedder_provider = self._get_provider_type(self.embedder)
+            reranker_provider = self._get_provider_type(self.cross_encoder)
+            database_provider = self._get_provider_type(self.driver)
+            properties = {
+                'llm_provider': llm_provider,
+                'embedder_provider': embedder_provider,
+                'reranker_provider': reranker_provider,
+                'database_provider': database_provider,
+            }
+            capture_event('graphiti_initialized', properties)
+        except Exception:
+            # Silently handle telemetry errors
+            pass
+    def _get_provider_type(self, client) -> str:
+        """Get provider type from client class name."""
+        if client is None:
+            return 'none'
+        class_name = client.__class__.__name__.lower()
+        # LLM providers
+        if 'openai' in class_name:
+            return 'openai'
+        elif 'azure' in class_name:
+            return 'azure'
+        elif 'anthropic' in class_name:
+            return 'anthropic'
+        elif 'crossencoder' in class_name:
+            return 'crossencoder'
+        elif 'gemini' in class_name:
+            return 'gemini'
+        elif 'groq' in class_name:
+            return 'groq'
+        # Database providers
+        elif 'neo4j' in class_name:
+            return 'neo4j'
+        elif 'falkor' in class_name:
+            return 'falkordb'
+        # Embedder providers
+        elif 'voyage' in class_name:
+            return 'voyage'
+        else:
+            return 'unknown'
     async def close(self):
         """
         Close the connection to the Neo4j database.
@@ -293,6 +359,7 @@ class Graphiti:
         uuid: str | None = None,
         update_communities: bool = False,
         entity_types: dict[str, BaseModel] | None = None,
+        excluded_entity_types: list[str] | None = None,
         previous_episode_uuids: list[str] | None = None,
         edge_types: dict[str, BaseModel] | None = None,
         edge_type_map: dict[tuple[str, str], list[str]] | None = None,
@@ -321,6 +388,12 @@ class Graphiti:
             Optional uuid of the episode.
         update_communities : bool
             Optional. Whether to update communities with new node information
+        entity_types : dict[str, BaseModel] | None
+            Optional. Dictionary mapping entity type names to their Pydantic model definitions.
+        excluded_entity_types : list[str] | None
+            Optional. List of entity type names to exclude from the graph. Entities classified
+            into these types will not be added to the graph. Can include 'Entity' to exclude
+            the default entity type.
         previous_episode_uuids : list[str] | None
             Optional.  list of episode uuids to use as the previous episodes. If this is not provided,
             the most recent episodes by created_at date will be used.
@@ -351,6 +424,7 @@ class Graphiti:
             now = utc_now()
             validate_entity_types(entity_types)
+            validate_excluded_entity_types(excluded_entity_types, entity_types)
             validate_group_id(group_id)
             previous_episodes = (
@@ -389,7 +463,7 @@ class Graphiti:
             # Extract entities as nodes
             extracted_nodes = await extract_nodes(
-                self.clients, episode, previous_episodes, entity_types
+                self.clients, episode, previous_episodes, entity_types, excluded_entity_types
             )
             # Extract edges and resolve nodes
@@ -534,7 +608,7 @@ class Graphiti:
                 extracted_nodes,
                 extracted_edges,
                 episodic_edges,
-            ) = await extract_nodes_and_edges_bulk(self.clients, episode_pairs)
+            ) = await extract_nodes_and_edges_bulk(self.clients, episode_pairs, None, None)
             # Generate embeddings
             await semaphore_gather(

graphiti_core/helpers.py CHANGED Viewed

@@ -19,18 +19,20 @@ import os
 import re
 from collections.abc import Coroutine
 from datetime import datetime
+from typing import Any
 import numpy as np
 from dotenv import load_dotenv
 from neo4j import time as neo4j_time
 from numpy._typing import NDArray
+from pydantic import BaseModel
 from typing_extensions import LiteralString
 from graphiti_core.errors import GroupIdValidationError
 load_dotenv()
-DEFAULT_DATABASE = os.getenv('DEFAULT_DATABASE', 'neo4j')
+DEFAULT_DATABASE = os.getenv('DEFAULT_DATABASE', 'default_db')
 USE_PARALLEL_RUNTIME = bool(os.getenv('USE_PARALLEL_RUNTIME', False))
 SEMAPHORE_LIMIT = int(os.getenv('SEMAPHORE_LIMIT', 20))
 MAX_REFLEXION_ITERATIONS = int(os.getenv('MAX_REFLEXION_ITERATIONS', 0))
@@ -98,7 +100,7 @@ def normalize_l2(embedding: list[float]) -> NDArray:
 async def semaphore_gather(
     *coroutines: Coroutine,
     max_coroutines: int | None = None,
-):
+) -> list[Any]:
     semaphore = asyncio.Semaphore(max_coroutines or SEMAPHORE_LIMIT)
     async def _wrap_coroutine(coroutine):
@@ -132,3 +134,37 @@ def validate_group_id(group_id: str) -> bool:
         raise GroupIdValidationError(group_id)
     return True
+def validate_excluded_entity_types(
+    excluded_entity_types: list[str] | None, entity_types: dict[str, BaseModel] | None = None
+) -> bool:
+    """
+    Validate that excluded entity types are valid type names.
+    Args:
+        excluded_entity_types: List of entity type names to exclude
+        entity_types: Dictionary of available custom entity types
+    Returns:
+        True if valid
+    Raises:
+        ValueError: If any excluded type names are invalid
+    """
+    if not excluded_entity_types:
+        return True
+    # Build set of available type names
+    available_types = {'Entity'}  # Default type is always available
+    if entity_types:
+        available_types.update(entity_types.keys())
+    # Check for invalid type names
+    invalid_types = set(excluded_entity_types) - available_types
+    if invalid_types:
+        raise ValueError(
+            f'Invalid excluded entity types: {sorted(invalid_types)}. Available types: {sorted(available_types)}'
+        )
+    return True

graphiti-core 0.13.2__py3-none-any.whl → 0.15.0__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.13.2py3-none-any.whl → 0.15.0py3-none-any.whl