PyPI - graphiti-core - Versions diffs - 0.13.2__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

graphiti-core 0.13.2py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (23) hide show

graphiti_core/cross_encoder/__init__.py +2 -1
graphiti_core/cross_encoder/gemini_reranker_client.py +146 -0
graphiti_core/driver/__init__.py +4 -1
graphiti_core/driver/falkordb_driver.py +47 -21
graphiti_core/driver/neo4j_driver.py +5 -3
graphiti_core/embedder/voyage.py +1 -1
graphiti_core/graphiti.py +79 -5
graphiti_core/helpers.py +38 -2
graphiti_core/llm_client/gemini_client.py +135 -23
graphiti_core/nodes.py +12 -2
graphiti_core/search/search_filters.py +4 -5
graphiti_core/search/search_utils.py +2 -8
graphiti_core/telemetry/__init__.py +9 -0
graphiti_core/telemetry/telemetry.py +117 -0
graphiti_core/utils/bulk_utils.py +5 -2
graphiti_core/utils/maintenance/community_operations.py +1 -1
graphiti_core/utils/maintenance/edge_operations.py +1 -1
graphiti_core/utils/maintenance/graph_data_operations.py +3 -5
graphiti_core/utils/maintenance/node_operations.py +6 -0
{graphiti_core-0.13.2.dist-info → graphiti_core-0.15.0.dist-info}/METADATA +167 -52
{graphiti_core-0.13.2.dist-info → graphiti_core-0.15.0.dist-info}/RECORD +28 -25
{graphiti_core-0.13.2.dist-info → graphiti_core-0.15.0.dist-info}/WHEEL +1 -1
{graphiti_core-0.13.2.dist-info → graphiti_core-0.15.0.dist-info/licenses}/LICENSE +0 -0

graphiti_core/llm_client/gemini_client.py CHANGED Viewed

@@ -17,19 +17,21 @@ limitations under the License.
 import json
 import logging
 import typing
+from typing import ClassVar
 from google import genai  # type: ignore
 from google.genai import types  # type: ignore
 from pydantic import BaseModel
 from ..prompts.models import Message
-from .client import LLMClient
+from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
 from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
 from .errors import RateLimitError
 logger = logging.getLogger(__name__)
-DEFAULT_MODEL = 'gemini-2.0-flash'
+DEFAULT_MODEL = 'gemini-2.5-flash'
+DEFAULT_SMALL_MODEL = 'models/gemini-2.5-flash-lite-preview-06-17'
 class GeminiClient(LLMClient):
@@ -43,27 +45,34 @@ class GeminiClient(LLMClient):
         model (str): The model name to use for generating responses.
         temperature (float): The temperature to use for generating responses.
         max_tokens (int): The maximum number of tokens to generate in a response.
+        thinking_config (types.ThinkingConfig | None): Optional thinking configuration for models that support it.
     Methods:
-        __init__(config: LLMConfig | None = None, cache: bool = False):
-            Initializes the GeminiClient with the provided configuration and cache setting.
+        __init__(config: LLMConfig | None = None, cache: bool = False, thinking_config: types.ThinkingConfig | None = None):
+            Initializes the GeminiClient with the provided configuration, cache setting, and optional thinking config.
         _generate_response(messages: list[Message]) -> dict[str, typing.Any]:
             Generates a response from the language model based on the provided messages.
     """
+    # Class-level constants
+    MAX_RETRIES: ClassVar[int] = 2
     def __init__(
         self,
         config: LLMConfig | None = None,
         cache: bool = False,
         max_tokens: int = DEFAULT_MAX_TOKENS,
+        thinking_config: types.ThinkingConfig | None = None,
     ):
         """
-        Initialize the GeminiClient with the provided configuration and cache setting.
+        Initialize the GeminiClient with the provided configuration, cache setting, and optional thinking config.
         Args:
             config (LLMConfig | None): The configuration for the LLM client, including API key, model, temperature, and max tokens.
             cache (bool): Whether to use caching for responses. Defaults to False.
+            thinking_config (types.ThinkingConfig | None): Optional thinking configuration for models that support it.
+                Only use with models that support thinking (gemini-2.5+). Defaults to None.
         """
         if config is None:
             config = LLMConfig()
@@ -76,6 +85,50 @@ class GeminiClient(LLMClient):
             api_key=config.api_key,
         )
         self.max_tokens = max_tokens
+        self.thinking_config = thinking_config
+    def _check_safety_blocks(self, response) -> None:
+        """Check if response was blocked for safety reasons and raise appropriate exceptions."""
+        # Check if the response was blocked for safety reasons
+        if not (hasattr(response, 'candidates') and response.candidates):
+            return
+        candidate = response.candidates[0]
+        if not (hasattr(candidate, 'finish_reason') and candidate.finish_reason == 'SAFETY'):
+            return
+        # Content was blocked for safety reasons - collect safety details
+        safety_info = []
+        safety_ratings = getattr(candidate, 'safety_ratings', None)
+        if safety_ratings:
+            for rating in safety_ratings:
+                if getattr(rating, 'blocked', False):
+                    category = getattr(rating, 'category', 'Unknown')
+                    probability = getattr(rating, 'probability', 'Unknown')
+                    safety_info.append(f'{category}: {probability}')
+        safety_details = (
+            ', '.join(safety_info) if safety_info else 'Content blocked for safety reasons'
+        )
+        raise Exception(f'Response blocked by Gemini safety filters: {safety_details}')
+    def _check_prompt_blocks(self, response) -> None:
+        """Check if prompt was blocked and raise appropriate exceptions."""
+        prompt_feedback = getattr(response, 'prompt_feedback', None)
+        if not prompt_feedback:
+            return
+        block_reason = getattr(prompt_feedback, 'block_reason', None)
+        if block_reason:
+            raise Exception(f'Prompt blocked by Gemini: {block_reason}')
+    def _get_model_for_size(self, model_size: ModelSize) -> str:
+        """Get the appropriate model name based on the requested size."""
+        if model_size == ModelSize.small:
+            return self.small_model or DEFAULT_SMALL_MODEL
+        else:
+            return self.model or DEFAULT_MODEL
     async def _generate_response(
         self,
@@ -91,17 +144,17 @@ class GeminiClient(LLMClient):
             messages (list[Message]): A list of messages to send to the language model.
             response_model (type[BaseModel] | None): An optional Pydantic model to parse the response into.
             max_tokens (int): The maximum number of tokens to generate in the response.
+            model_size (ModelSize): The size of the model to use (small or medium).
         Returns:
             dict[str, typing.Any]: The response from the language model.
         Raises:
             RateLimitError: If the API rate limit is exceeded.
-            RefusalError: If the content is blocked by the model.
-            Exception: If there is an error generating the response.
+            Exception: If there is an error generating the response or content is blocked.
         """
         try:
-            gemini_messages: list[types.Content] = []
+            gemini_messages: typing.Any = []
             # If a response model is provided, add schema for structured output
             system_prompt = ''
             if response_model is not None:
@@ -127,6 +180,9 @@ class GeminiClient(LLMClient):
                     types.Content(role=m.role, parts=[types.Part.from_text(text=m.content)])
                 )
+            # Get the appropriate model for the requested size
+            model = self._get_model_for_size(model_size)
             # Create generation config
             generation_config = types.GenerateContentConfig(
                 temperature=self.temperature,
@@ -134,15 +190,20 @@ class GeminiClient(LLMClient):
                 response_mime_type='application/json' if response_model else None,
                 response_schema=response_model if response_model else None,
                 system_instruction=system_prompt,
+                thinking_config=self.thinking_config,
             )
             # Generate content using the simple string approach
             response = await self.client.aio.models.generate_content(
-                model=self.model or DEFAULT_MODEL,
-                contents=gemini_messages,  # type: ignore[arg-type]  # mypy fails on broad union type
+                model=model,
+                contents=gemini_messages,
                 config=generation_config,
             )
+            # Check for safety and prompt blocks
+            self._check_safety_blocks(response)
+            self._check_prompt_blocks(response)
             # If this was a structured output request, parse the response into the Pydantic model
             if response_model is not None:
                 try:
@@ -160,9 +221,16 @@ class GeminiClient(LLMClient):
             return {'content': response.text}
         except Exception as e:
-            # Check if it's a rate limit error
-            if 'rate limit' in str(e).lower() or 'quota' in str(e).lower():
+            # Check if it's a rate limit error based on Gemini API error codes
+            error_message = str(e).lower()
+            if (
+                'rate limit' in error_message
+                or 'quota' in error_message
+                or 'resource_exhausted' in error_message
+                or '429' in str(e)
+            ):
                 raise RateLimitError from e
             logger.error(f'Error in generating LLM response: {e}')
             raise
@@ -174,13 +242,14 @@ class GeminiClient(LLMClient):
         model_size: ModelSize = ModelSize.medium,
     ) -> dict[str, typing.Any]:
         """
-        Generate a response from the Gemini language model.
-        This method overrides the parent class method to provide a direct implementation.
+        Generate a response from the Gemini language model with retry logic and error handling.
+        This method overrides the parent class method to provide a direct implementation with advanced retry logic.
         Args:
             messages (list[Message]): A list of messages to send to the language model.
             response_model (type[BaseModel] | None): An optional Pydantic model to parse the response into.
-            max_tokens (int): The maximum number of tokens to generate in the response.
+            max_tokens (int | None): The maximum number of tokens to generate in the response.
+            model_size (ModelSize): The size of the model to use (small or medium).
         Returns:
             dict[str, typing.Any]: The response from the language model.
@@ -188,10 +257,53 @@ class GeminiClient(LLMClient):
         if max_tokens is None:
             max_tokens = self.max_tokens
-        # Call the internal _generate_response method
-        return await self._generate_response(
-            messages=messages,
-            response_model=response_model,
-            max_tokens=max_tokens,
-            model_size=model_size,
-        )
+        retry_count = 0
+        last_error = None
+        # Add multilingual extraction instructions
+        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        while retry_count <= self.MAX_RETRIES:
+            try:
+                response = await self._generate_response(
+                    messages=messages,
+                    response_model=response_model,
+                    max_tokens=max_tokens,
+                    model_size=model_size,
+                )
+                return response
+            except RateLimitError:
+                # Rate limit errors should not trigger retries (fail fast)
+                raise
+            except Exception as e:
+                last_error = e
+                # Check if this is a safety block - these typically shouldn't be retried
+                if 'safety' in str(e).lower() or 'blocked' in str(e).lower():
+                    logger.warning(f'Content blocked by safety filters: {e}')
+                    raise
+                # Don't retry if we've hit the max retries
+                if retry_count >= self.MAX_RETRIES:
+                    logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {e}')
+                    raise
+                retry_count += 1
+                # Construct a detailed error message for the LLM
+                error_context = (
+                    f'The previous response attempt was invalid. '
+                    f'Error type: {e.__class__.__name__}. '
+                    f'Error details: {str(e)}. '
+                    f'Please try again with a valid response, ensuring the output matches '
+                    f'the expected format and constraints.'
+                )
+                error_message = Message(role='user', content=error_context)
+                messages.append(error_message)
+                logger.warning(
+                    f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
+                )
+        # If we somehow get here, raise the last error
+        raise last_error or Exception('Max retries exceeded with no specific error')

graphiti_core/nodes.py CHANGED Viewed

@@ -540,10 +540,18 @@ class CommunityNode(Node):
 # Node helpers
 def get_episodic_node_from_record(record: Any) -> EpisodicNode:
+    created_at = parse_db_date(record['created_at'])
+    valid_at = parse_db_date(record['valid_at'])
+    if created_at is None:
+        raise ValueError(f"created_at cannot be None for episode {record.get('uuid', 'unknown')}")
+    if valid_at is None:
+        raise ValueError(f"valid_at cannot be None for episode {record.get('uuid', 'unknown')}")
     return EpisodicNode(
         content=record['content'],
-        created_at=parse_db_date(record['created_at']),  # type: ignore
-        valid_at=parse_db_date(record['valid_at']),  # type: ignore
+        created_at=created_at,
+        valid_at=valid_at,
         uuid=record['uuid'],
         group_id=record['group_id'],
         source=EpisodeType.from_str(record['source']),
@@ -586,6 +594,8 @@ def get_community_node_from_record(record: Any) -> CommunityNode:
 async def create_entity_node_embeddings(embedder: EmbedderClient, nodes: list[EntityNode]):
+    if not nodes:  # Handle empty list case
+        return
     name_embeddings = await embedder.create_batch([node.name for node in nodes])
     for node, name_embedding in zip(nodes, name_embeddings, strict=True):
         node.name_embedding = name_embedding

graphiti_core/search/search_filters.py CHANGED Viewed

@@ -19,7 +19,6 @@ from enum import Enum
 from typing import Any
 from pydantic import BaseModel, Field
-from typing_extensions import LiteralString
 class ComparisonOperator(Enum):
@@ -53,8 +52,8 @@ class SearchFilters(BaseModel):
 def node_search_filter_query_constructor(
     filters: SearchFilters,
-) -> tuple[LiteralString, dict[str, Any]]:
-    filter_query: LiteralString = ''
+) -> tuple[str, dict[str, Any]]:
+    filter_query: str = ''
     filter_params: dict[str, Any] = {}
     if filters.node_labels is not None:
@@ -67,8 +66,8 @@ def node_search_filter_query_constructor(
 def edge_search_filter_query_constructor(
     filters: SearchFilters,
-) -> tuple[LiteralString, dict[str, Any]]:
-    filter_query: LiteralString = ''
+) -> tuple[str, dict[str, Any]]:
+    filter_query: str = ''
     filter_params: dict[str, Any] = {}
     if filters.edge_types is not None:

graphiti_core/search/search_utils.py CHANGED Viewed

@@ -67,7 +67,7 @@ def fulltext_query(query: str, group_ids: list[str] | None = None):
     )
     group_ids_filter = ''
     for f in group_ids_filter_list:
-        group_ids_filter += f if not group_ids_filter else f'OR {f}'
+        group_ids_filter += f if not group_ids_filter else f' OR {f}'
     group_ids_filter += ' AND ' if group_ids_filter else ''
@@ -278,9 +278,6 @@ async def edge_similarity_search(
         routing_='r',
     )
-    if driver.provider == 'falkordb':
-        records = [dict(zip(header, row, strict=True)) for row in records]
     edges = [get_entity_edge_from_record(record) for record in records]
     return edges
@@ -377,8 +374,6 @@ async def node_fulltext_search(
         database_=DEFAULT_DATABASE,
         routing_='r',
     )
-    if driver.provider == 'falkordb':
-        records = [dict(zip(header, row, strict=True)) for row in records]
     nodes = [get_entity_node_from_record(record) for record in records]
@@ -433,8 +428,7 @@ async def node_similarity_search(
         database_=DEFAULT_DATABASE,
         routing_='r',
     )
-    if driver.provider == 'falkordb':
-        records = [dict(zip(header, row, strict=True)) for row in records]
     nodes = [get_entity_node_from_record(record) for record in records]
     return nodes

graphiti_core/telemetry/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""
+Telemetry module for Graphiti.
+This module provides anonymous usage analytics to help improve Graphiti.
+"""
+from .telemetry import capture_event, is_telemetry_enabled
+__all__ = ['capture_event', 'is_telemetry_enabled']

graphiti_core/telemetry/telemetry.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""
+Telemetry client for Graphiti.
+Collects anonymous usage statistics to help improve the product.
+"""
+import contextlib
+import os
+import platform
+import sys
+import uuid
+from pathlib import Path
+from typing import Any
+# PostHog configuration
+# Note: This is a public API key intended for client-side use and safe to commit
+# PostHog public keys are designed to be exposed in client applications
+POSTHOG_API_KEY = 'phc_UG6EcfDbuXz92neb3rMlQFDY0csxgMqRcIPWESqnSmo'
+POSTHOG_HOST = 'https://us.i.posthog.com'
+# Environment variable to control telemetry
+TELEMETRY_ENV_VAR = 'GRAPHITI_TELEMETRY_ENABLED'
+# Cache directory for anonymous ID
+CACHE_DIR = Path.home() / '.cache' / 'graphiti'
+ANON_ID_FILE = CACHE_DIR / 'telemetry_anon_id'
+def is_telemetry_enabled() -> bool:
+    """Check if telemetry is enabled."""
+    # Disable during pytest runs
+    if 'pytest' in sys.modules:
+        return False
+    # Check environment variable (default: enabled)
+    env_value = os.environ.get(TELEMETRY_ENV_VAR, 'true').lower()
+    return env_value in ('true', '1', 'yes', 'on')
+def get_anonymous_id() -> str:
+    """Get or create anonymous user ID."""
+    try:
+        # Create cache directory if it doesn't exist
+        CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        # Try to read existing ID
+        if ANON_ID_FILE.exists():
+            try:
+                return ANON_ID_FILE.read_text().strip()
+            except Exception:
+                pass
+        # Generate new ID
+        anon_id = str(uuid.uuid4())
+        # Save to file
+        with contextlib.suppress(Exception):
+            ANON_ID_FILE.write_text(anon_id)
+        return anon_id
+    except Exception:
+        return 'UNKNOWN'
+def get_graphiti_version() -> str:
+    """Get Graphiti version."""
+    try:
+        # Try to get version from package metadata
+        import importlib.metadata
+        return importlib.metadata.version('graphiti-core')
+    except Exception:
+        return 'unknown'
+def initialize_posthog():
+    """Initialize PostHog client."""
+    try:
+        import posthog
+        posthog.api_key = POSTHOG_API_KEY
+        posthog.host = POSTHOG_HOST
+        return posthog
+    except ImportError:
+        # PostHog not installed, silently disable telemetry
+        return None
+    except Exception:
+        # Any other error, silently disable telemetry
+        return None
+def capture_event(event_name: str, properties: dict[str, Any] | None = None) -> None:
+    """Capture a telemetry event."""
+    if not is_telemetry_enabled():
+        return
+    try:
+        posthog_client = initialize_posthog()
+        if posthog_client is None:
+            return
+        # Get anonymous ID
+        user_id = get_anonymous_id()
+        # Prepare event properties
+        event_properties = {
+            '$process_person_profile': False,
+            'graphiti_version': get_graphiti_version(),
+            'architecture': platform.machine(),
+            **(properties or {}),
+        }
+        # Capture the event
+        posthog_client.capture(distinct_id=user_id, event=event_name, properties=event_properties)
+    except Exception:
+        # Silently handle all telemetry errors to avoid disrupting the main application
+        pass

graphiti_core/utils/bulk_utils.py CHANGED Viewed

@@ -177,11 +177,14 @@ async def add_nodes_and_edges_bulk_tx(
 async def extract_nodes_and_edges_bulk(
-    clients: GraphitiClients, episode_tuples: list[tuple[EpisodicNode, list[EpisodicNode]]]
+    clients: GraphitiClients,
+    episode_tuples: list[tuple[EpisodicNode, list[EpisodicNode]]],
+    entity_types: dict[str, BaseModel] | None = None,
+    excluded_entity_types: list[str] | None = None,
 ) -> tuple[list[EntityNode], list[EntityEdge], list[EpisodicEdge]]:
     extracted_nodes_bulk = await semaphore_gather(
         *[
-            extract_nodes(clients, episode, previous_episodes)
+            extract_nodes(clients, episode, previous_episodes, entity_types, excluded_entity_types)
             for episode, previous_episodes in episode_tuples
         ]
     )

graphiti_core/utils/maintenance/community_operations.py CHANGED Viewed

@@ -40,7 +40,7 @@ async def get_community_clusters(
             database_=DEFAULT_DATABASE,
         )
-        group_ids = group_id_values[0]['group_ids']
+        group_ids = group_id_values[0]['group_ids'] if group_id_values else []
     for group_id in group_ids:
         projection: dict[str, list[Neighbor]] = {}

graphiti_core/utils/maintenance/edge_operations.py CHANGED Viewed

@@ -297,7 +297,7 @@ async def resolve_extracted_edges(
     embedder = clients.embedder
     await create_entity_edge_embeddings(embedder, extracted_edges)
-    search_results: tuple[list[list[EntityEdge]], list[list[EntityEdge]]] = await semaphore_gather(
+    search_results = await semaphore_gather(
         get_relevant_edges(driver, extracted_edges, SearchFilters()),
         get_edge_invalidation_candidates(driver, extracted_edges, SearchFilters(), 0.2),
     )

graphiti_core/utils/maintenance/graph_data_operations.py CHANGED Viewed

@@ -21,7 +21,7 @@ from typing_extensions import LiteralString
 from graphiti_core.driver.driver import GraphDriver
 from graphiti_core.graph_queries import get_fulltext_indices, get_range_indices
-from graphiti_core.helpers import DEFAULT_DATABASE, semaphore_gather
+from graphiti_core.helpers import DEFAULT_DATABASE, parse_db_date, semaphore_gather
 from graphiti_core.nodes import EpisodeType, EpisodicNode
 EPISODE_WINDOW_LEN = 3
@@ -140,10 +140,8 @@ async def retrieve_episodes(
     episodes = [
         EpisodicNode(
             content=record['content'],
-            created_at=datetime.fromtimestamp(
-                record['created_at'].to_native().timestamp(), timezone.utc
-            ),
-            valid_at=(record['valid_at'].to_native()),
+            created_at=parse_db_date(record['created_at']) or datetime.min.replace(tzinfo=timezone.utc),
+            valid_at=parse_db_date(record['valid_at']) or datetime.min.replace(tzinfo=timezone.utc),
             uuid=record['uuid'],
             group_id=record['group_id'],
             source=EpisodeType.from_str(record['source']),

graphiti_core/utils/maintenance/node_operations.py CHANGED Viewed

@@ -71,6 +71,7 @@ async def extract_nodes(
     episode: EpisodicNode,
     previous_episodes: list[EpisodicNode],
     entity_types: dict[str, BaseModel] | None = None,
+    excluded_entity_types: list[str] | None = None,
 ) -> list[EntityNode]:
     start = time()
     llm_client = clients.llm_client
@@ -154,6 +155,11 @@ async def extract_nodes(
             'entity_type_name'
         )
+        # Check if this entity type should be excluded
+        if excluded_entity_types and entity_type_name in excluded_entity_types:
+            logger.debug(f'Excluding entity "{extracted_entity.name}" of type "{entity_type_name}"')
+            continue
         labels: list[str] = list({'Entity', str(entity_type_name)})
         new_node = EntityNode(

graphiti-core 0.13.2__py3-none-any.whl → 0.15.0__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.13.2py3-none-any.whl → 0.15.0py3-none-any.whl