PyPI - graphiti-core - Versions diffs - 0.17.4__py3-none-any.whl → 0.25.3__py3-none-any.whl - Mend

graphiti-core 0.17.4py3-none-any.whl → 0.25.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

graphiti_core/cross_encoder/gemini_reranker_client.py +1 -1
graphiti_core/cross_encoder/openai_reranker_client.py +1 -1
graphiti_core/decorators.py +110 -0
graphiti_core/driver/driver.py +62 -2
graphiti_core/driver/falkordb_driver.py +215 -23
graphiti_core/driver/graph_operations/graph_operations.py +191 -0
graphiti_core/driver/kuzu_driver.py +182 -0
graphiti_core/driver/neo4j_driver.py +70 -8
graphiti_core/driver/neptune_driver.py +305 -0
graphiti_core/driver/search_interface/search_interface.py +89 -0
graphiti_core/edges.py +264 -132
graphiti_core/embedder/azure_openai.py +10 -3
graphiti_core/embedder/client.py +2 -1
graphiti_core/graph_queries.py +114 -101
graphiti_core/graphiti.py +635 -260
graphiti_core/graphiti_types.py +2 -0
graphiti_core/helpers.py +37 -15
graphiti_core/llm_client/anthropic_client.py +142 -52
graphiti_core/llm_client/azure_openai_client.py +57 -19
graphiti_core/llm_client/client.py +83 -21
graphiti_core/llm_client/config.py +1 -1
graphiti_core/llm_client/gemini_client.py +75 -57
graphiti_core/llm_client/openai_base_client.py +92 -48
graphiti_core/llm_client/openai_client.py +39 -9
graphiti_core/llm_client/openai_generic_client.py +91 -56
graphiti_core/models/edges/edge_db_queries.py +259 -35
graphiti_core/models/nodes/node_db_queries.py +311 -32
graphiti_core/nodes.py +388 -164
graphiti_core/prompts/dedupe_edges.py +42 -31
graphiti_core/prompts/dedupe_nodes.py +56 -39
graphiti_core/prompts/eval.py +4 -4
graphiti_core/prompts/extract_edges.py +24 -15
graphiti_core/prompts/extract_nodes.py +76 -35
graphiti_core/prompts/prompt_helpers.py +39 -0
graphiti_core/prompts/snippets.py +29 -0
graphiti_core/prompts/summarize_nodes.py +23 -25
graphiti_core/search/search.py +154 -74
graphiti_core/search/search_config.py +39 -4
graphiti_core/search/search_filters.py +110 -31
graphiti_core/search/search_helpers.py +5 -6
graphiti_core/search/search_utils.py +1360 -473
graphiti_core/tracer.py +193 -0
graphiti_core/utils/bulk_utils.py +216 -90
graphiti_core/utils/content_chunking.py +702 -0
graphiti_core/utils/datetime_utils.py +13 -0
graphiti_core/utils/maintenance/community_operations.py +62 -38
graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
graphiti_core/utils/maintenance/edge_operations.py +306 -156
graphiti_core/utils/maintenance/graph_data_operations.py +44 -74
graphiti_core/utils/maintenance/node_operations.py +466 -206
graphiti_core/utils/maintenance/temporal_operations.py +11 -3
graphiti_core/utils/ontology_utils/entity_types_utils.py +1 -1
graphiti_core/utils/text_utils.py +53 -0
{graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/METADATA +221 -87
graphiti_core-0.25.3.dist-info/RECORD +87 -0
{graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/WHEEL +1 -1
graphiti_core-0.17.4.dist-info/RECORD +0 -77
/graphiti_core/{utils/maintenance/utils.py → migrations/__init__.py} +0 -0
{graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/licenses/LICENSE +0 -0

graphiti_core/graphiti_types.py CHANGED Viewed

@@ -20,6 +20,7 @@ from graphiti_core.cross_encoder import CrossEncoderClient
 from graphiti_core.driver.driver import GraphDriver
 from graphiti_core.embedder import EmbedderClient
 from graphiti_core.llm_client import LLMClient
+from graphiti_core.tracer import Tracer
 class GraphitiClients(BaseModel):
@@ -27,5 +28,6 @@ class GraphitiClients(BaseModel):
     llm_client: LLMClient
     embedder: EmbedderClient
     cross_encoder: CrossEncoderClient
+    tracer: Tracer
     model_config = ConfigDict(arbitrary_types_allowed=True)

graphiti_core/helpers.py CHANGED Viewed

@@ -26,30 +26,52 @@ from dotenv import load_dotenv
 from neo4j import time as neo4j_time
 from numpy._typing import NDArray
 from pydantic import BaseModel
-from typing_extensions import LiteralString
+from graphiti_core.driver.driver import GraphProvider
 from graphiti_core.errors import GroupIdValidationError
 load_dotenv()
 USE_PARALLEL_RUNTIME = bool(os.getenv('USE_PARALLEL_RUNTIME', False))
 SEMAPHORE_LIMIT = int(os.getenv('SEMAPHORE_LIMIT', 20))
-MAX_REFLEXION_ITERATIONS = int(os.getenv('MAX_REFLEXION_ITERATIONS', 0))
 DEFAULT_PAGE_LIMIT = 20
-RUNTIME_QUERY: LiteralString = (
-    'CYPHER runtime = parallel parallelRuntimeSupport=all\n' if USE_PARALLEL_RUNTIME else ''
-)
+# Content chunking configuration for entity extraction
+# Density-based chunking: only chunk high-density content (many entities per token)
+# This targets the failure case (large entity-dense inputs) while preserving
+# context for prose/narrative content
+CHUNK_TOKEN_SIZE = int(os.getenv('CHUNK_TOKEN_SIZE', 3000))
+CHUNK_OVERLAP_TOKENS = int(os.getenv('CHUNK_OVERLAP_TOKENS', 200))
+# Minimum tokens before considering chunking - short content processes fine regardless of density
+CHUNK_MIN_TOKENS = int(os.getenv('CHUNK_MIN_TOKENS', 1000))
+# Entity density threshold: chunk if estimated density > this value
+# For JSON: elements per 1000 tokens > threshold * 1000 (e.g., 0.15 = 150 elements/1000 tokens)
+# For Text: capitalized words per 1000 tokens > threshold * 500 (e.g., 0.15 = 75 caps/1000 tokens)
+# Higher values = more conservative (less chunking), targets P95+ density cases
+# Examples that trigger chunking at 0.15: AWS cost data (12mo), bulk data imports, entity-dense JSON
+# Examples that DON'T chunk at 0.15: meeting transcripts, news articles, documentation
+CHUNK_DENSITY_THRESHOLD = float(os.getenv('CHUNK_DENSITY_THRESHOLD', 0.15))
-def parse_db_date(neo_date: neo4j_time.DateTime | str | None) -> datetime | None:
-    return (
-        neo_date.to_native()
-        if isinstance(neo_date, neo4j_time.DateTime)
-        else datetime.fromisoformat(neo_date)
-        if neo_date
-        else None
-    )
+def parse_db_date(input_date: neo4j_time.DateTime | str | None) -> datetime | None:
+    if isinstance(input_date, neo4j_time.DateTime):
+        return input_date.to_native()
+    if isinstance(input_date, str):
+        return datetime.fromisoformat(input_date)
+    return input_date
+def get_default_group_id(provider: GraphProvider) -> str:
+    """
+    This function differentiates the default group id based on the database type.
+    For most databases, the default group id is an empty string, while there are database types that require a specific default group id.
+    """
+    if provider == GraphProvider.FALKORDB:
+        return '\\_'
+    else:
+        return ''
 def lucene_sanitize(query: str) -> str:
@@ -109,7 +131,7 @@ async def semaphore_gather(
     return await asyncio.gather(*(_wrap_coroutine(coroutine) for coroutine in coroutines))
-def validate_group_id(group_id: str) -> bool:
+def validate_group_id(group_id: str | None) -> bool:
     """
     Validate that a group_id contains only ASCII alphanumeric characters, dashes, and underscores.
@@ -136,7 +158,7 @@ def validate_group_id(group_id: str) -> bool:
 def validate_excluded_entity_types(
-    excluded_entity_types: list[str] | None, entity_types: dict[str, BaseModel] | None = None
+    excluded_entity_types: list[str] | None, entity_types: dict[str, type[BaseModel]] | None = None
 ) -> bool:
     """
     Validate that excluded entity types are valid type names.

graphiti_core/llm_client/anthropic_client.py CHANGED Viewed

@@ -47,6 +47,9 @@ else:
 logger = logging.getLogger(__name__)
 AnthropicModel = Literal[
+    'claude-sonnet-4-5-latest',
+    'claude-sonnet-4-5-20250929',
+    'claude-haiku-4-5-latest',
     'claude-3-7-sonnet-latest',
     'claude-3-7-sonnet-20250219',
     'claude-3-5-haiku-latest',
@@ -62,7 +65,39 @@ AnthropicModel = Literal[
     'claude-2.0',
 ]
-DEFAULT_MODEL: AnthropicModel = 'claude-3-7-sonnet-latest'
+DEFAULT_MODEL: AnthropicModel = 'claude-haiku-4-5-latest'
+# Maximum output tokens for different Anthropic models
+# Based on official Anthropic documentation (as of 2025)
+# Note: These represent standard limits without beta headers.
+# Some models support higher limits with additional configuration (e.g., Claude 3.7 supports
+# 128K with 'anthropic-beta: output-128k-2025-02-19' header, but this is not currently implemented).
+ANTHROPIC_MODEL_MAX_TOKENS = {
+    # Claude 4.5 models - 64K tokens
+    'claude-sonnet-4-5-latest': 65536,
+    'claude-sonnet-4-5-20250929': 65536,
+    'claude-haiku-4-5-latest': 65536,
+    # Claude 3.7 models - standard 64K tokens
+    'claude-3-7-sonnet-latest': 65536,
+    'claude-3-7-sonnet-20250219': 65536,
+    # Claude 3.5 models
+    'claude-3-5-haiku-latest': 8192,
+    'claude-3-5-haiku-20241022': 8192,
+    'claude-3-5-sonnet-latest': 8192,
+    'claude-3-5-sonnet-20241022': 8192,
+    'claude-3-5-sonnet-20240620': 8192,
+    # Claude 3 models - 4K tokens
+    'claude-3-opus-latest': 4096,
+    'claude-3-opus-20240229': 4096,
+    'claude-3-sonnet-20240229': 4096,
+    'claude-3-haiku-20240307': 4096,
+    # Claude 2 models - 4K tokens
+    'claude-2.1': 4096,
+    'claude-2.0': 4096,
+}
+# Default max tokens for models not in the mapping
+DEFAULT_ANTHROPIC_MAX_TOKENS = 8192
 class AnthropicClient(LLMClient):
@@ -177,6 +212,45 @@ class AnthropicClient(LLMClient):
         tool_choice_cast = typing.cast(ToolChoiceParam, tool_choice)
         return tool_list_cast, tool_choice_cast
+    def _get_max_tokens_for_model(self, model: str) -> int:
+        """Get the maximum output tokens for a specific Anthropic model.
+        Args:
+            model: The model name to look up
+        Returns:
+            int: The maximum output tokens for the model
+        """
+        return ANTHROPIC_MODEL_MAX_TOKENS.get(model, DEFAULT_ANTHROPIC_MAX_TOKENS)
+    def _resolve_max_tokens(self, requested_max_tokens: int | None, model: str) -> int:
+        """
+        Resolve the maximum output tokens to use based on precedence rules.
+        Precedence order (highest to lowest):
+        1. Explicit max_tokens parameter passed to generate_response()
+        2. Instance max_tokens set during client initialization
+        3. Model-specific maximum tokens from ANTHROPIC_MODEL_MAX_TOKENS mapping
+        4. DEFAULT_ANTHROPIC_MAX_TOKENS as final fallback
+        Args:
+            requested_max_tokens: The max_tokens parameter passed to generate_response()
+            model: The model name to look up model-specific limits
+        Returns:
+            int: The resolved maximum tokens to use
+        """
+        # 1. Use explicit parameter if provided
+        if requested_max_tokens is not None:
+            return requested_max_tokens
+        # 2. Use instance max_tokens if set during initialization
+        if self.max_tokens is not None:
+            return self.max_tokens
+        # 3. Use model-specific maximum or return DEFAULT_ANTHROPIC_MAX_TOKENS
+        return self._get_max_tokens_for_model(model)
     async def _generate_response(
         self,
         messages: list[Message],
@@ -204,12 +278,9 @@ class AnthropicClient(LLMClient):
         user_messages = [{'role': m.role, 'content': m.content} for m in messages[1:]]
         user_messages_cast = typing.cast(list[MessageParam], user_messages)
-        # TODO: Replace hacky min finding solution after fixing hardcoded EXTRACT_EDGES_MAX_TOKENS = 16384 in
-        # edge_operations.py. Throws errors with cheaper models that lower max_tokens.
-        max_creation_tokens: int = min(
-            max_tokens if max_tokens is not None else self.config.max_tokens,
-            DEFAULT_MAX_TOKENS,
-        )
+        # Resolve max_tokens dynamically based on the model's capabilities
+        # This allows different models to use their full output capacity
+        max_creation_tokens: int = self._resolve_max_tokens(max_tokens, self.model)
         try:
             # Create the appropriate tool based on whether response_model is provided
@@ -265,6 +336,8 @@ class AnthropicClient(LLMClient):
         response_model: type[BaseModel] | None = None,
         max_tokens: int | None = None,
         model_size: ModelSize = ModelSize.medium,
+        group_id: str | None = None,
+        prompt_name: str | None = None,
     ) -> dict[str, typing.Any]:
         """
         Generate a response from the LLM.
@@ -285,55 +358,72 @@ class AnthropicClient(LLMClient):
         if max_tokens is None:
             max_tokens = self.max_tokens
-        retry_count = 0
-        max_retries = 2
-        last_error: Exception | None = None
-        while retry_count <= max_retries:
-            try:
-                response = await self._generate_response(
-                    messages, response_model, max_tokens, model_size
-                )
-                # If we have a response_model, attempt to validate the response
-                if response_model is not None:
-                    # Validate the response against the response_model
-                    model_instance = response_model(**response)
-                    return model_instance.model_dump()
-                # If no validation needed, return the response
-                return response
+        # Wrap entire operation in tracing span
+        with self.tracer.start_span('llm.generate') as span:
+            attributes = {
+                'llm.provider': 'anthropic',
+                'model.size': model_size.value,
+                'max_tokens': max_tokens,
+            }
+            if prompt_name:
+                attributes['prompt.name'] = prompt_name
+            span.add_attributes(attributes)
+            retry_count = 0
+            max_retries = 2
+            last_error: Exception | None = None
+            while retry_count <= max_retries:
+                try:
+                    response = await self._generate_response(
+                        messages, response_model, max_tokens, model_size
+                    )
-            except (RateLimitError, RefusalError):
-                # These errors should not trigger retries
-                raise
-            except Exception as e:
-                last_error = e
+                    # If we have a response_model, attempt to validate the response
+                    if response_model is not None:
+                        # Validate the response against the response_model
+                        model_instance = response_model(**response)
+                        return model_instance.model_dump()
+                    # If no validation needed, return the response
+                    return response
+                except (RateLimitError, RefusalError):
+                    # These errors should not trigger retries
+                    span.set_status('error', str(last_error))
+                    raise
+                except Exception as e:
+                    last_error = e
+                    if retry_count >= max_retries:
+                        if isinstance(e, ValidationError):
+                            logger.error(
+                                f'Validation error after {retry_count}/{max_retries} attempts: {e}'
+                            )
+                        else:
+                            logger.error(f'Max retries ({max_retries}) exceeded. Last error: {e}')
+                        span.set_status('error', str(e))
+                        span.record_exception(e)
+                        raise e
-                if retry_count >= max_retries:
                     if isinstance(e, ValidationError):
-                        logger.error(
-                            f'Validation error after {retry_count}/{max_retries} attempts: {e}'
-                        )
+                        response_model_cast = typing.cast(type[BaseModel], response_model)
+                        error_context = f'The previous response was invalid. Please provide a valid {response_model_cast.__name__} object. Error: {e}'
                     else:
-                        logger.error(f'Max retries ({max_retries}) exceeded. Last error: {e}')
-                    raise e
+                        error_context = (
+                            f'The previous response attempt was invalid. '
+                            f'Error type: {e.__class__.__name__}. '
+                            f'Error details: {str(e)}. '
+                            f'Please try again with a valid response.'
+                        )
-                if isinstance(e, ValidationError):
-                    response_model_cast = typing.cast(type[BaseModel], response_model)
-                    error_context = f'The previous response was invalid. Please provide a valid {response_model_cast.__name__} object. Error: {e}'
-                else:
-                    error_context = (
-                        f'The previous response attempt was invalid. '
-                        f'Error type: {e.__class__.__name__}. '
-                        f'Error details: {str(e)}. '
-                        f'Please try again with a valid response.'
+                    # Common retry logic
+                    retry_count += 1
+                    messages.append(Message(role='user', content=error_context))
+                    logger.warning(
+                        f'Retrying after error (attempt {retry_count}/{max_retries}): {e}'
                     )
-                # Common retry logic
-                retry_count += 1
-                messages.append(Message(role='user', content=error_context))
-                logger.warning(f'Retrying after error (attempt {retry_count}/{max_retries}): {e}')
-        # If we somehow get here, raise the last error
-        raise last_error or Exception('Max retries exceeded with no specific error')
+            # If we somehow get here, raise the last error
+            span.set_status('error', str(last_error))
+            raise last_error or Exception('Max retries exceeded with no specific error')

graphiti_core/llm_client/azure_openai_client.py CHANGED Viewed

@@ -17,7 +17,7 @@ limitations under the License.
 import logging
 from typing import ClassVar
-from openai import AsyncAzureOpenAI
+from openai import AsyncAzureOpenAI, AsyncOpenAI
 from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
@@ -28,18 +28,29 @@ logger = logging.getLogger(__name__)
 class AzureOpenAILLMClient(BaseOpenAIClient):
-    """Wrapper class for AsyncAzureOpenAI that implements the LLMClient interface."""
+    """Wrapper class for Azure OpenAI that implements the LLMClient interface.
+    Supports both AsyncAzureOpenAI and AsyncOpenAI (with Azure v1 API endpoint).
+    """
     # Class-level constants
     MAX_RETRIES: ClassVar[int] = 2
     def __init__(
         self,
-        azure_client: AsyncAzureOpenAI,
+        azure_client: AsyncAzureOpenAI | AsyncOpenAI,
         config: LLMConfig | None = None,
         max_tokens: int = DEFAULT_MAX_TOKENS,
+        reasoning: str | None = None,
+        verbosity: str | None = None,
     ):
-        super().__init__(config, cache=False, max_tokens=max_tokens)
+        super().__init__(
+            config,
+            cache=False,
+            max_tokens=max_tokens,
+            reasoning=reasoning,
+            verbosity=verbosity,
+        )
         self.client = azure_client
     async def _create_structured_completion(
@@ -49,15 +60,29 @@ class AzureOpenAILLMClient(BaseOpenAIClient):
         temperature: float | None,
         max_tokens: int,
         response_model: type[BaseModel],
+        reasoning: str | None,
+        verbosity: str | None,
     ):
-        """Create a structured completion using Azure OpenAI's beta parse API."""
-        return await self.client.beta.chat.completions.parse(
-            model=model,
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            response_format=response_model,  # type: ignore
-        )
+        """Create a structured completion using Azure OpenAI's responses.parse API."""
+        supports_reasoning = self._supports_reasoning_features(model)
+        request_kwargs = {
+            'model': model,
+            'input': messages,
+            'max_output_tokens': max_tokens,
+            'text_format': response_model,  # type: ignore
+        }
+        temperature_value = temperature if not supports_reasoning else None
+        if temperature_value is not None:
+            request_kwargs['temperature'] = temperature_value
+        if supports_reasoning and reasoning:
+            request_kwargs['reasoning'] = {'effort': reasoning}  # type: ignore
+        if supports_reasoning and verbosity:
+            request_kwargs['text'] = {'verbosity': verbosity}  # type: ignore
+        return await self.client.responses.parse(**request_kwargs)
     async def _create_completion(
         self,
@@ -68,10 +93,23 @@ class AzureOpenAILLMClient(BaseOpenAIClient):
         response_model: type[BaseModel] | None = None,
     ):
         """Create a regular completion with JSON format using Azure OpenAI."""
-        return await self.client.chat.completions.create(
-            model=model,
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            response_format={'type': 'json_object'},
-        )
+        supports_reasoning = self._supports_reasoning_features(model)
+        request_kwargs = {
+            'model': model,
+            'messages': messages,
+            'max_tokens': max_tokens,
+            'response_format': {'type': 'json_object'},
+        }
+        temperature_value = temperature if not supports_reasoning else None
+        if temperature_value is not None:
+            request_kwargs['temperature'] = temperature_value
+        return await self.client.chat.completions.create(**request_kwargs)
+    @staticmethod
+    def _supports_reasoning_features(model: str) -> bool:
+        """Return True when the Azure model supports reasoning/verbosity options."""
+        reasoning_prefixes = ('o1', 'o3', 'gpt-5')
+        return model.startswith(reasoning_prefixes)

graphiti_core/llm_client/client.py CHANGED Viewed

@@ -26,15 +26,34 @@ from pydantic import BaseModel
 from tenacity import retry, retry_if_exception, stop_after_attempt, wait_random_exponential
 from ..prompts.models import Message
+from ..tracer import NoOpTracer, Tracer
 from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
 from .errors import RateLimitError
 DEFAULT_TEMPERATURE = 0
 DEFAULT_CACHE_DIR = './llm_cache'
-MULTILINGUAL_EXTRACTION_RESPONSES = (
-    '\n\nAny extracted information should be returned in the same language as it was written in.'
-)
+def get_extraction_language_instruction(group_id: str | None = None) -> str:
+    """Returns instruction for language extraction behavior.
+    Override this function to customize language extraction:
+    - Return empty string to disable multilingual instructions
+    - Return custom instructions for specific language requirements
+    - Use group_id to provide different instructions per group/partition
+    Args:
+        group_id: Optional partition identifier for the graph
+    Returns:
+        str: Language instruction to append to system messages
+    """
+    return (
+        '\n\nAny extracted information should be returned in the same language as it was written in. '
+        'Only output non-English text when the user has written full sentences or phrases in that non-English language. '
+        'Otherwise, output English.'
+    )
 logger = logging.getLogger(__name__)
@@ -60,11 +79,16 @@ class LLMClient(ABC):
         self.max_tokens = config.max_tokens
         self.cache_enabled = cache
         self.cache_dir = None
+        self.tracer: Tracer = NoOpTracer()
         # Only create the cache directory if caching is enabled
         if self.cache_enabled:
             self.cache_dir = Cache(DEFAULT_CACHE_DIR)
+    def set_tracer(self, tracer: Tracer) -> None:
+        """Set the tracer for this LLM client."""
+        self.tracer = tracer
     def _clean_input(self, input: str) -> str:
         """Clean input string of invalid unicode and control characters.
@@ -132,6 +156,8 @@ class LLMClient(ABC):
         response_model: type[BaseModel] | None = None,
         max_tokens: int | None = None,
         model_size: ModelSize = ModelSize.medium,
+        group_id: str | None = None,
+        prompt_name: str | None = None,
     ) -> dict[str, typing.Any]:
         if max_tokens is None:
             max_tokens = self.max_tokens
@@ -145,28 +171,64 @@ class LLMClient(ABC):
             )
         # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
-        if self.cache_enabled and self.cache_dir is not None:
-            cache_key = self._get_cache_key(messages)
-            cached_response = self.cache_dir.get(cache_key)
-            if cached_response is not None:
-                logger.debug(f'Cache hit for {cache_key}')
-                return cached_response
+        messages[0].content += get_extraction_language_instruction(group_id)
         for message in messages:
             message.content = self._clean_input(message.content)
-        response = await self._generate_response_with_retry(
-            messages, response_model, max_tokens, model_size
-        )
-        if self.cache_enabled and self.cache_dir is not None:
-            cache_key = self._get_cache_key(messages)
-            self.cache_dir.set(cache_key, response)
-        return response
+        # Wrap entire operation in tracing span
+        with self.tracer.start_span('llm.generate') as span:
+            attributes = {
+                'llm.provider': self._get_provider_type(),
+                'model.size': model_size.value,
+                'max_tokens': max_tokens,
+                'cache.enabled': self.cache_enabled,
+            }
+            if prompt_name:
+                attributes['prompt.name'] = prompt_name
+            span.add_attributes(attributes)
+            # Check cache first
+            if self.cache_enabled and self.cache_dir is not None:
+                cache_key = self._get_cache_key(messages)
+                cached_response = self.cache_dir.get(cache_key)
+                if cached_response is not None:
+                    logger.debug(f'Cache hit for {cache_key}')
+                    span.add_attributes({'cache.hit': True})
+                    return cached_response
+            span.add_attributes({'cache.hit': False})
+            # Execute LLM call
+            try:
+                response = await self._generate_response_with_retry(
+                    messages, response_model, max_tokens, model_size
+                )
+            except Exception as e:
+                span.set_status('error', str(e))
+                span.record_exception(e)
+                raise
+            # Cache response if enabled
+            if self.cache_enabled and self.cache_dir is not None:
+                cache_key = self._get_cache_key(messages)
+                self.cache_dir.set(cache_key, response)
+            return response
+    def _get_provider_type(self) -> str:
+        """Get provider type from class name."""
+        class_name = self.__class__.__name__.lower()
+        if 'openai' in class_name:
+            return 'openai'
+        elif 'anthropic' in class_name:
+            return 'anthropic'
+        elif 'gemini' in class_name:
+            return 'gemini'
+        elif 'groq' in class_name:
+            return 'groq'
+        else:
+            return 'unknown'
     def _get_failed_generation_log(self, messages: list[Message], output: str | None) -> str:
         """

graphiti_core/llm_client/config.py CHANGED Viewed

@@ -17,7 +17,7 @@ limitations under the License.
 from enum import Enum
 DEFAULT_MAX_TOKENS = 8192
-DEFAULT_TEMPERATURE = 0
+DEFAULT_TEMPERATURE = 1
 class ModelSize(Enum):

graphiti-core 0.17.4__py3-none-any.whl → 0.25.3__py3-none-any.whl

graphiti-core 0.17.4py3-none-any.whl → 0.25.3py3-none-any.whl