PyPI - graphiti-core - Versions diffs - 0.17.1__py3-none-any.whl → 0.17.2__py3-none-any.whl - Mend

graphiti-core 0.17.1py3-none-any.whl → 0.17.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (7) hide show

graphiti_core/embedder/gemini.py CHANGED Viewed

@@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
+import logging
 from collections.abc import Iterable
 from typing import TYPE_CHECKING
@@ -34,7 +35,11 @@ from pydantic import Field
 from .client import EmbedderClient, EmbedderConfig
-DEFAULT_EMBEDDING_MODEL = 'embedding-001'
+logger = logging.getLogger(__name__)
+DEFAULT_EMBEDDING_MODEL = 'text-embedding-001' # gemini-embedding-001 or text-embedding-005
+DEFAULT_BATCH_SIZE = 100
 class GeminiEmbedderConfig(EmbedderConfig):
@@ -51,6 +56,7 @@ class GeminiEmbedder(EmbedderClient):
         self,
         config: GeminiEmbedderConfig | None = None,
         client: 'genai.Client | None' = None,
+        batch_size: int | None = None,
     ):
         """
         Initialize the GeminiEmbedder with the provided configuration and client.
@@ -58,6 +64,7 @@ class GeminiEmbedder(EmbedderClient):
         Args:
             config (GeminiEmbedderConfig | None): The configuration for the GeminiEmbedder, including API key, model, base URL, temperature, and max tokens.
             client (genai.Client | None): An optional async client instance to use. If not provided, a new genai.Client is created.
+            batch_size (int | None): An optional batch size to use. If not provided, the default batch size will be used.
         """
         if config is None:
             config = GeminiEmbedderConfig()
@@ -69,6 +76,15 @@ class GeminiEmbedder(EmbedderClient):
         else:
             self.client = client
+        if batch_size is None and self.config.embedding_model == 'gemini-embedding-001':
+            # Gemini API has a limit on the number of instances per request
+            #https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api
+            self.batch_size = 1
+        elif batch_size is None:
+            self.batch_size = DEFAULT_BATCH_SIZE
+        else:
+            self.batch_size = batch_size
     async def create(
         self, input_data: str | list[str] | Iterable[int] | Iterable[Iterable[int]]
     ) -> list[float]:
@@ -95,19 +111,67 @@ class GeminiEmbedder(EmbedderClient):
         return result.embeddings[0].values
     async def create_batch(self, input_data_list: list[str]) -> list[list[float]]:
-        # Generate embeddings
-        result = await self.client.aio.models.embed_content(
-            model=self.config.embedding_model or DEFAULT_EMBEDDING_MODEL,
-            contents=input_data_list,  # type: ignore[arg-type]  # mypy fails on broad union type
-            config=types.EmbedContentConfig(output_dimensionality=self.config.embedding_dim),
-        )
-        if not result.embeddings or len(result.embeddings) == 0:
-            raise Exception('No embeddings returned')
-        embeddings = []
-        for embedding in result.embeddings:
-            if not embedding.values:
-                raise ValueError('Empty embedding values returned')
-            embeddings.append(embedding.values)
-        return embeddings
+        """
+        Create embeddings for a batch of input data using Google's Gemini embedding model.
+        This method handles batching to respect the Gemini API's limits on the number
+        of instances that can be processed in a single request.
+        Args:
+            input_data_list: A list of strings to create embeddings for.
+        Returns:
+            A list of embedding vectors (each vector is a list of floats).
+        """
+        if not input_data_list:
+            return []
+        batch_size = self.batch_size
+        all_embeddings = []
+        # Process inputs in batches
+        for i in range(0, len(input_data_list), batch_size):
+            batch = input_data_list[i:i + batch_size]
+            try:
+                # Generate embeddings for this batch
+                result = await self.client.aio.models.embed_content(
+                    model=self.config.embedding_model or DEFAULT_EMBEDDING_MODEL,
+                    contents=batch,  # type: ignore[arg-type]  # mypy fails on broad union type
+                    config=types.EmbedContentConfig(output_dimensionality=self.config.embedding_dim),
+                )
+                if not result.embeddings or len(result.embeddings) == 0:
+                    raise Exception('No embeddings returned')
+                # Process embeddings from this batch
+                for embedding in result.embeddings:
+                    if not embedding.values:
+                        raise ValueError('Empty embedding values returned')
+                    all_embeddings.append(embedding.values)
+            except Exception as e:
+                # If batch processing fails, fall back to individual processing
+                logger.warning(f"Batch embedding failed for batch {i//batch_size + 1}, falling back to individual processing: {e}")
+                for item in batch:
+                    try:
+                        # Process each item individually
+                        result = await self.client.aio.models.embed_content(
+                            model=self.config.embedding_model or DEFAULT_EMBEDDING_MODEL,
+                            contents=[item],  # type: ignore[arg-type]  # mypy fails on broad union type
+                            config=types.EmbedContentConfig(output_dimensionality=self.config.embedding_dim),
+                        )
+                        if not result.embeddings or len(result.embeddings) == 0:
+                            raise ValueError('No embeddings returned from Gemini API')
+                        if not result.embeddings[0].values:
+                            raise ValueError('Empty embedding values returned')
+                        all_embeddings.append(result.embeddings[0].values)
+                    except Exception as individual_error:
+                        logger.error(f"Failed to embed individual item: {individual_error}")
+                        raise individual_error
+        return all_embeddings

graphiti_core/llm_client/client.py CHANGED Viewed

@@ -167,3 +167,18 @@ class LLMClient(ABC):
             self.cache_dir.set(cache_key, response)
         return response
+    def _get_failed_generation_log(self, messages: list[Message], output: str | None) -> str:
+        """
+        Log the full input messages, the raw output (if any), and the exception for debugging failed generations.
+        """
+        log = ""
+        log += f"Input messages: {json.dumps([m.model_dump() for m in messages], indent=2)}\n"
+        if output is not None:
+            if len(output) > 4000:
+                log += f"Raw output: {output[:2000]}... (truncated) ...{output[-2000:]}\n"
+            else:
+                log += f"Raw output: {output}\n"
+        else:
+            log += "No raw output available"
+        return log

graphiti_core/llm_client/gemini_client.py CHANGED Viewed

@@ -16,6 +16,7 @@ limitations under the License.
 import json
 import logging
+import re
 import typing
 from typing import TYPE_CHECKING, ClassVar
@@ -23,7 +24,7 @@ from pydantic import BaseModel
 from ..prompts.models import Message
 from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
-from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
+from .config import LLMConfig, ModelSize
 from .errors import RateLimitError
 if TYPE_CHECKING:
@@ -44,7 +45,26 @@ else:
 logger = logging.getLogger(__name__)
 DEFAULT_MODEL = 'gemini-2.5-flash'
-DEFAULT_SMALL_MODEL = 'models/gemini-2.5-flash-lite-preview-06-17'
+DEFAULT_SMALL_MODEL = 'gemini-2.5-flash-lite-preview-06-17'
+# Maximum output tokens for different Gemini models
+GEMINI_MODEL_MAX_TOKENS = {
+    # Gemini 2.5 models
+    'gemini-2.5-pro': 65536,
+    'gemini-2.5-flash': 65536,
+    'gemini-2.5-flash-lite': 64000,
+    'models/gemini-2.5-flash-lite-preview-06-17': 64000,
+    # Gemini 2.0 models
+    'gemini-2.0-flash': 8192,
+    'gemini-2.0-flash-lite': 8192,
+    # Gemini 1.5 models
+    'gemini-1.5-pro': 8192,
+    'gemini-1.5-flash': 8192,
+    'gemini-1.5-flash-8b': 8192,
+}
+# Default max tokens for models not in the mapping
+DEFAULT_GEMINI_MAX_TOKENS = 8192
 class GeminiClient(LLMClient):
@@ -74,7 +94,7 @@ class GeminiClient(LLMClient):
         self,
         config: LLMConfig | None = None,
         cache: bool = False,
-        max_tokens: int = DEFAULT_MAX_TOKENS,
+        max_tokens: int | None = None,
         thinking_config: types.ThinkingConfig | None = None,
         client: 'genai.Client | None' = None,
     ):
@@ -146,11 +166,76 @@ class GeminiClient(LLMClient):
         else:
             return self.model or DEFAULT_MODEL
+    def _get_max_tokens_for_model(self, model: str) -> int:
+        """Get the maximum output tokens for a specific Gemini model."""
+        return GEMINI_MODEL_MAX_TOKENS.get(model, DEFAULT_GEMINI_MAX_TOKENS)
+    def _resolve_max_tokens(self, requested_max_tokens: int | None, model: str) -> int:
+        """
+        Resolve the maximum output tokens to use based on precedence rules.
+        Precedence order (highest to lowest):
+        1. Explicit max_tokens parameter passed to generate_response()
+        2. Instance max_tokens set during client initialization
+        3. Model-specific maximum tokens from GEMINI_MODEL_MAX_TOKENS mapping
+        4. DEFAULT_MAX_TOKENS as final fallback
+        Args:
+            requested_max_tokens: The max_tokens parameter passed to generate_response()
+            model: The model name to look up model-specific limits
+        Returns:
+            int: The resolved maximum tokens to use
+        """
+        # 1. Use explicit parameter if provided
+        if requested_max_tokens is not None:
+            return requested_max_tokens
+        # 2. Use instance max_tokens if set during initialization
+        if self.max_tokens is not None:
+            return self.max_tokens
+        # 3. Use model-specific maximum or return DEFAULT_GEMINI_MAX_TOKENS
+        return self._get_max_tokens_for_model(model)
+    def salvage_json(self, raw_output: str) -> dict[str, typing.Any] | None:
+        """
+        Attempt to salvage a JSON object if the raw output is truncated.
+        This is accomplished by looking for the last closing bracket for an array or object.
+        If found, it will try to load the JSON object from the raw output.
+        If the JSON object is not valid, it will return None.
+        Args:
+            raw_output (str): The raw output from the LLM.
+        Returns:
+            dict[str, typing.Any]: The salvaged JSON object.
+            None: If no salvage is possible.
+        """
+        if not raw_output:
+            return None
+        # Try to salvage a JSON array
+        array_match = re.search(r'\]\s*$', raw_output)
+        if array_match:
+            try:
+                return json.loads(raw_output[:array_match.end()])
+            except Exception:
+                pass
+        # Try to salvage a JSON object
+        obj_match = re.search(r'\}\s*$', raw_output)
+        if obj_match:
+            try:
+                return json.loads(raw_output[:obj_match.end()])
+            except Exception:
+                pass
+        return None
     async def _generate_response(
         self,
         messages: list[Message],
         response_model: type[BaseModel] | None = None,
-        max_tokens: int = DEFAULT_MAX_TOKENS,
+        max_tokens: int | None = None,
         model_size: ModelSize = ModelSize.medium,
     ) -> dict[str, typing.Any]:
         """
@@ -159,7 +244,7 @@ class GeminiClient(LLMClient):
         Args:
             messages (list[Message]): A list of messages to send to the language model.
             response_model (type[BaseModel] | None): An optional Pydantic model to parse the response into.
-            max_tokens (int): The maximum number of tokens to generate in the response.
+            max_tokens (int | None): The maximum number of tokens to generate in the response. If None, uses precedence rules.
             model_size (ModelSize): The size of the model to use (small or medium).
         Returns:
@@ -199,10 +284,13 @@ class GeminiClient(LLMClient):
             # Get the appropriate model for the requested size
             model = self._get_model_for_size(model_size)
+            # Resolve max_tokens using precedence rules (see _resolve_max_tokens for details)
+            resolved_max_tokens = self._resolve_max_tokens(max_tokens, model)
             # Create generation config
             generation_config = types.GenerateContentConfig(
                 temperature=self.temperature,
-                max_output_tokens=max_tokens or self.max_tokens,
+                max_output_tokens=resolved_max_tokens,
                 response_mime_type='application/json' if response_model else None,
                 response_schema=response_model if response_model else None,
                 system_instruction=system_prompt,
@@ -216,6 +304,9 @@ class GeminiClient(LLMClient):
                 config=generation_config,
             )
+            # Always capture the raw output for debugging
+            raw_output = getattr(response, 'text', None)
             # Check for safety and prompt blocks
             self._check_safety_blocks(response)
             self._check_prompt_blocks(response)
@@ -223,18 +314,26 @@ class GeminiClient(LLMClient):
             # If this was a structured output request, parse the response into the Pydantic model
             if response_model is not None:
                 try:
-                    if not response.text:
+                    if not raw_output:
                         raise ValueError('No response text')
-                    validated_model = response_model.model_validate(json.loads(response.text))
+                    validated_model = response_model.model_validate(json.loads(raw_output))
                     # Return as a dictionary for API consistency
                     return validated_model.model_dump()
                 except Exception as e:
+                    if raw_output:
+                        logger.error("🦀 LLM generation failed parsing as JSON, will try to salvage.")
+                        logger.error(self._get_failed_generation_log(gemini_messages, raw_output))
+                        # Try to salvage
+                        salvaged = self.salvage_json(raw_output)
+                        if salvaged is not None:
+                            logger.warning("Salvaged partial JSON from truncated/malformed output.")
+                            return salvaged
                     raise Exception(f'Failed to parse structured response: {e}') from e
             # Otherwise, return the response text as a dictionary
-            return {'content': response.text}
+            return {'content': raw_output}
         except Exception as e:
             # Check if it's a rate limit error based on Gemini API error codes
@@ -248,7 +347,7 @@ class GeminiClient(LLMClient):
                 raise RateLimitError from e
             logger.error(f'Error in generating LLM response: {e}')
-            raise
+            raise Exception from e
     async def generate_response(
         self,
@@ -270,16 +369,14 @@ class GeminiClient(LLMClient):
         Returns:
             dict[str, typing.Any]: The response from the language model.
         """
-        if max_tokens is None:
-            max_tokens = self.max_tokens
         retry_count = 0
         last_error = None
+        last_output = None
         # Add multilingual extraction instructions
         messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
-        while retry_count <= self.MAX_RETRIES:
+        while retry_count < self.MAX_RETRIES:
             try:
                 response = await self._generate_response(
                     messages=messages,
@@ -287,22 +384,19 @@ class GeminiClient(LLMClient):
                     max_tokens=max_tokens,
                     model_size=model_size,
                 )
+                last_output = response.get('content') if isinstance(response, dict) and 'content' in response else None
                 return response
-            except RateLimitError:
+            except RateLimitError as e:
                 # Rate limit errors should not trigger retries (fail fast)
-                raise
+                raise e
             except Exception as e:
                 last_error = e
                 # Check if this is a safety block - these typically shouldn't be retried
-                if 'safety' in str(e).lower() or 'blocked' in str(e).lower():
+                error_text = str(e) or (str(e.__cause__) if e.__cause__ else '')
+                if 'safety' in error_text.lower() or 'blocked' in error_text.lower():
                     logger.warning(f'Content blocked by safety filters: {e}')
-                    raise
-                # Don't retry if we've hit the max retries
-                if retry_count >= self.MAX_RETRIES:
-                    logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {e}')
-                    raise
+                    raise Exception(f'Content blocked by safety filters: {e}') from e
                 retry_count += 1
@@ -321,5 +415,8 @@ class GeminiClient(LLMClient):
                     f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
                 )
-        # If we somehow get here, raise the last error
-        raise last_error or Exception('Max retries exceeded with no specific error')
+        # If we exit the loop without returning, all retries are exhausted
+        logger.error("🦀 LLM generation failed and retries are exhausted.")
+        logger.error(self._get_failed_generation_log(messages, last_output))
+        logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {last_error}')
+        raise last_error or Exception("Max retries exceeded")

{graphiti_core-0.17.1.dist-info → graphiti_core-0.17.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: graphiti-core
-Version: 0.17.1
+Version: 0.17.2
 Summary: A temporal graph building library
 Project-URL: Homepage, https://help.getzep.com/graphiti/graphiti/overview
 Project-URL: Repository, https://github.com/getzep/graphiti

{graphiti_core-0.17.1.dist-info → graphiti_core-0.17.2.dist-info}/RECORD RENAMED Viewed

@@ -19,16 +19,16 @@ graphiti_core/driver/neo4j_driver.py,sha256=0MCAWAPay0LdcqrFSkY91GooUtrn1yX1CTKu
 graphiti_core/embedder/__init__.py,sha256=EL564ZuE-DZjcuKNUK_exMn_XHXm2LdO9fzdXePVKL4,179
 graphiti_core/embedder/azure_openai.py,sha256=OyomPwC1fIsddI-3n6g00kQFdQznZorBhHwkQKCLUok,2384
 graphiti_core/embedder/client.py,sha256=qEpSHceL_Gc4QQPJWIOnuNLemNuR_TYA4r28t2Vldbg,1115
-graphiti_core/embedder/gemini.py,sha256=0O3JCeeINRNF_jfrEPA-__YHpEHWPkXd7IYfsUMi-ng,4080
+graphiti_core/embedder/gemini.py,sha256=GdpnmRKunruLB4ViJMo6K-WEv8RqZvuLfgyKXtRcEMI,7218
 graphiti_core/embedder/openai.py,sha256=bIThUoLMeGlHG2-3VikzK6JZfOHKn4PKvUMx5sHxJy8,2192
 graphiti_core/embedder/voyage.py,sha256=oJHAZiNqjdEJOKgoKfGWcxK2-Ewqn5UB3vrBwIwP2u4,2546
 graphiti_core/llm_client/__init__.py,sha256=QgBWUiCeBp6YiA_xqyrDvJ9jIyy1hngH8g7FWahN3nw,776
 graphiti_core/llm_client/anthropic_client.py,sha256=xTFcrgMDK77BwnChBhYj51Jaa2mRNI850oJv2pKZI0A,12892
 graphiti_core/llm_client/azure_openai_client.py,sha256=ekERggAekbb7enes1RJqdRChf_mjaZTFXsnMbxO7azQ,2497
-graphiti_core/llm_client/client.py,sha256=v_w5TBbDJYYADCXSs2r287g5Ami2Urma-GGEbHSI_Jg,5826
+graphiti_core/llm_client/client.py,sha256=fgNnJgmoZN7v7PNoJGtt4MMdKkDNsmT9F2XOLKZOU38,6473
 graphiti_core/llm_client/config.py,sha256=90IgSBxZE_3nWdaEONVLUznI8lytPA7ZyexQz-_c55U,2560
 graphiti_core/llm_client/errors.py,sha256=pn6brRiLW60DAUIXJYKBT6MInrS4ueuH1hNLbn_JbQo,1243
-graphiti_core/llm_client/gemini_client.py,sha256=oyAOXc2ArPLulayoTRj2fjrKYP107WWs8LqM8574-vA,13434
+graphiti_core/llm_client/gemini_client.py,sha256=LKB6nktFMIn2fuRNRoGeBOmxlE3WuhB5sWI7yUwGGaA,17583
 graphiti_core/llm_client/groq_client.py,sha256=bYLE_cg1QEhugsJOXh4b1vPbxagKeMWqk48240GCzMs,2922
 graphiti_core/llm_client/openai_base_client.py,sha256=gfMcKPyLrylz_ouRdoenDWXyitmgfFZ17Zthbkq3Qs4,8126
 graphiti_core/llm_client/openai_client.py,sha256=ykBK94gxzE7iXux5rvOzVNA8q0Sqzq-8njPB75XcRe8,3240
@@ -71,7 +71,7 @@ graphiti_core/utils/maintenance/node_operations.py,sha256=4jMlmbB3zwK9KzIm2QXRxz
 graphiti_core/utils/maintenance/temporal_operations.py,sha256=mJkw9xLB4W2BsLfC5POr0r-PHWL9SIfNj_l_xu0B5ug,3410
 graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 graphiti_core/utils/ontology_utils/entity_types_utils.py,sha256=QJX5cG0GSSNF_Mm_yrldr69wjVAbN_MxLhOSznz85Hk,1279
-graphiti_core-0.17.1.dist-info/METADATA,sha256=U4lMqT6JebOB4Rcxvlym2Gb3f1U3RexyG4jU2mhY9UU,23791
-graphiti_core-0.17.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-graphiti_core-0.17.1.dist-info/licenses/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
-graphiti_core-0.17.1.dist-info/RECORD,,
+graphiti_core-0.17.2.dist-info/METADATA,sha256=bqv7KcvfhQ5qp4F2BYrWK5ziw2uJzAA2rPxi-KlJTp8,23791
+graphiti_core-0.17.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+graphiti_core-0.17.2.dist-info/licenses/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
+graphiti_core-0.17.2.dist-info/RECORD,,

{graphiti_core-0.17.1.dist-info → graphiti_core-0.17.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{graphiti_core-0.17.1.dist-info → graphiti_core-0.17.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

graphiti-core 0.17.1__py3-none-any.whl → 0.17.2__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.17.1py3-none-any.whl → 0.17.2py3-none-any.whl