graphiti-core 0.17.1__py3-none-any.whl → 0.17.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

@@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
+ import logging
17
18
  from collections.abc import Iterable
18
19
  from typing import TYPE_CHECKING
19
20
 
@@ -34,7 +35,11 @@ from pydantic import Field
34
35
 
35
36
  from .client import EmbedderClient, EmbedderConfig
36
37
 
37
- DEFAULT_EMBEDDING_MODEL = 'embedding-001'
38
+ logger = logging.getLogger(__name__)
39
+
40
+ DEFAULT_EMBEDDING_MODEL = 'text-embedding-001' # gemini-embedding-001 or text-embedding-005
41
+
42
+ DEFAULT_BATCH_SIZE = 100
38
43
 
39
44
 
40
45
  class GeminiEmbedderConfig(EmbedderConfig):
@@ -51,6 +56,7 @@ class GeminiEmbedder(EmbedderClient):
51
56
  self,
52
57
  config: GeminiEmbedderConfig | None = None,
53
58
  client: 'genai.Client | None' = None,
59
+ batch_size: int | None = None,
54
60
  ):
55
61
  """
56
62
  Initialize the GeminiEmbedder with the provided configuration and client.
@@ -58,6 +64,7 @@ class GeminiEmbedder(EmbedderClient):
58
64
  Args:
59
65
  config (GeminiEmbedderConfig | None): The configuration for the GeminiEmbedder, including API key, model, base URL, temperature, and max tokens.
60
66
  client (genai.Client | None): An optional async client instance to use. If not provided, a new genai.Client is created.
67
+ batch_size (int | None): An optional batch size to use. If not provided, the default batch size will be used.
61
68
  """
62
69
  if config is None:
63
70
  config = GeminiEmbedderConfig()
@@ -69,6 +76,15 @@ class GeminiEmbedder(EmbedderClient):
69
76
  else:
70
77
  self.client = client
71
78
 
79
+ if batch_size is None and self.config.embedding_model == 'gemini-embedding-001':
80
+ # Gemini API has a limit on the number of instances per request
81
+ #https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api
82
+ self.batch_size = 1
83
+ elif batch_size is None:
84
+ self.batch_size = DEFAULT_BATCH_SIZE
85
+ else:
86
+ self.batch_size = batch_size
87
+
72
88
  async def create(
73
89
  self, input_data: str | list[str] | Iterable[int] | Iterable[Iterable[int]]
74
90
  ) -> list[float]:
@@ -95,19 +111,67 @@ class GeminiEmbedder(EmbedderClient):
95
111
  return result.embeddings[0].values
96
112
 
97
113
  async def create_batch(self, input_data_list: list[str]) -> list[list[float]]:
98
- # Generate embeddings
99
- result = await self.client.aio.models.embed_content(
100
- model=self.config.embedding_model or DEFAULT_EMBEDDING_MODEL,
101
- contents=input_data_list, # type: ignore[arg-type] # mypy fails on broad union type
102
- config=types.EmbedContentConfig(output_dimensionality=self.config.embedding_dim),
103
- )
104
-
105
- if not result.embeddings or len(result.embeddings) == 0:
106
- raise Exception('No embeddings returned')
107
-
108
- embeddings = []
109
- for embedding in result.embeddings:
110
- if not embedding.values:
111
- raise ValueError('Empty embedding values returned')
112
- embeddings.append(embedding.values)
113
- return embeddings
114
+ """
115
+ Create embeddings for a batch of input data using Google's Gemini embedding model.
116
+
117
+ This method handles batching to respect the Gemini API's limits on the number
118
+ of instances that can be processed in a single request.
119
+
120
+ Args:
121
+ input_data_list: A list of strings to create embeddings for.
122
+
123
+ Returns:
124
+ A list of embedding vectors (each vector is a list of floats).
125
+ """
126
+ if not input_data_list:
127
+ return []
128
+
129
+ batch_size = self.batch_size
130
+ all_embeddings = []
131
+
132
+ # Process inputs in batches
133
+ for i in range(0, len(input_data_list), batch_size):
134
+ batch = input_data_list[i:i + batch_size]
135
+
136
+ try:
137
+ # Generate embeddings for this batch
138
+ result = await self.client.aio.models.embed_content(
139
+ model=self.config.embedding_model or DEFAULT_EMBEDDING_MODEL,
140
+ contents=batch, # type: ignore[arg-type] # mypy fails on broad union type
141
+ config=types.EmbedContentConfig(output_dimensionality=self.config.embedding_dim),
142
+ )
143
+
144
+ if not result.embeddings or len(result.embeddings) == 0:
145
+ raise Exception('No embeddings returned')
146
+
147
+ # Process embeddings from this batch
148
+ for embedding in result.embeddings:
149
+ if not embedding.values:
150
+ raise ValueError('Empty embedding values returned')
151
+ all_embeddings.append(embedding.values)
152
+
153
+ except Exception as e:
154
+ # If batch processing fails, fall back to individual processing
155
+ logger.warning(f"Batch embedding failed for batch {i//batch_size + 1}, falling back to individual processing: {e}")
156
+
157
+ for item in batch:
158
+ try:
159
+ # Process each item individually
160
+ result = await self.client.aio.models.embed_content(
161
+ model=self.config.embedding_model or DEFAULT_EMBEDDING_MODEL,
162
+ contents=[item], # type: ignore[arg-type] # mypy fails on broad union type
163
+ config=types.EmbedContentConfig(output_dimensionality=self.config.embedding_dim),
164
+ )
165
+
166
+ if not result.embeddings or len(result.embeddings) == 0:
167
+ raise ValueError('No embeddings returned from Gemini API')
168
+ if not result.embeddings[0].values:
169
+ raise ValueError('Empty embedding values returned')
170
+
171
+ all_embeddings.append(result.embeddings[0].values)
172
+
173
+ except Exception as individual_error:
174
+ logger.error(f"Failed to embed individual item: {individual_error}")
175
+ raise individual_error
176
+
177
+ return all_embeddings
@@ -167,3 +167,18 @@ class LLMClient(ABC):
167
167
  self.cache_dir.set(cache_key, response)
168
168
 
169
169
  return response
170
+
171
+ def _get_failed_generation_log(self, messages: list[Message], output: str | None) -> str:
172
+ """
173
+ Log the full input messages, the raw output (if any), and the exception for debugging failed generations.
174
+ """
175
+ log = ""
176
+ log += f"Input messages: {json.dumps([m.model_dump() for m in messages], indent=2)}\n"
177
+ if output is not None:
178
+ if len(output) > 4000:
179
+ log += f"Raw output: {output[:2000]}... (truncated) ...{output[-2000:]}\n"
180
+ else:
181
+ log += f"Raw output: {output}\n"
182
+ else:
183
+ log += "No raw output available"
184
+ return log
@@ -16,6 +16,7 @@ limitations under the License.
16
16
 
17
17
  import json
18
18
  import logging
19
+ import re
19
20
  import typing
20
21
  from typing import TYPE_CHECKING, ClassVar
21
22
 
@@ -23,7 +24,7 @@ from pydantic import BaseModel
23
24
 
24
25
  from ..prompts.models import Message
25
26
  from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
26
- from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
27
+ from .config import LLMConfig, ModelSize
27
28
  from .errors import RateLimitError
28
29
 
29
30
  if TYPE_CHECKING:
@@ -44,7 +45,26 @@ else:
44
45
  logger = logging.getLogger(__name__)
45
46
 
46
47
  DEFAULT_MODEL = 'gemini-2.5-flash'
47
- DEFAULT_SMALL_MODEL = 'models/gemini-2.5-flash-lite-preview-06-17'
48
+ DEFAULT_SMALL_MODEL = 'gemini-2.5-flash-lite-preview-06-17'
49
+
50
+ # Maximum output tokens for different Gemini models
51
+ GEMINI_MODEL_MAX_TOKENS = {
52
+ # Gemini 2.5 models
53
+ 'gemini-2.5-pro': 65536,
54
+ 'gemini-2.5-flash': 65536,
55
+ 'gemini-2.5-flash-lite': 64000,
56
+ 'models/gemini-2.5-flash-lite-preview-06-17': 64000,
57
+ # Gemini 2.0 models
58
+ 'gemini-2.0-flash': 8192,
59
+ 'gemini-2.0-flash-lite': 8192,
60
+ # Gemini 1.5 models
61
+ 'gemini-1.5-pro': 8192,
62
+ 'gemini-1.5-flash': 8192,
63
+ 'gemini-1.5-flash-8b': 8192,
64
+ }
65
+
66
+ # Default max tokens for models not in the mapping
67
+ DEFAULT_GEMINI_MAX_TOKENS = 8192
48
68
 
49
69
 
50
70
  class GeminiClient(LLMClient):
@@ -74,7 +94,7 @@ class GeminiClient(LLMClient):
74
94
  self,
75
95
  config: LLMConfig | None = None,
76
96
  cache: bool = False,
77
- max_tokens: int = DEFAULT_MAX_TOKENS,
97
+ max_tokens: int | None = None,
78
98
  thinking_config: types.ThinkingConfig | None = None,
79
99
  client: 'genai.Client | None' = None,
80
100
  ):
@@ -146,11 +166,76 @@ class GeminiClient(LLMClient):
146
166
  else:
147
167
  return self.model or DEFAULT_MODEL
148
168
 
169
+ def _get_max_tokens_for_model(self, model: str) -> int:
170
+ """Get the maximum output tokens for a specific Gemini model."""
171
+ return GEMINI_MODEL_MAX_TOKENS.get(model, DEFAULT_GEMINI_MAX_TOKENS)
172
+
173
+ def _resolve_max_tokens(self, requested_max_tokens: int | None, model: str) -> int:
174
+ """
175
+ Resolve the maximum output tokens to use based on precedence rules.
176
+
177
+ Precedence order (highest to lowest):
178
+ 1. Explicit max_tokens parameter passed to generate_response()
179
+ 2. Instance max_tokens set during client initialization
180
+ 3. Model-specific maximum tokens from GEMINI_MODEL_MAX_TOKENS mapping
181
+ 4. DEFAULT_MAX_TOKENS as final fallback
182
+
183
+ Args:
184
+ requested_max_tokens: The max_tokens parameter passed to generate_response()
185
+ model: The model name to look up model-specific limits
186
+
187
+ Returns:
188
+ int: The resolved maximum tokens to use
189
+ """
190
+ # 1. Use explicit parameter if provided
191
+ if requested_max_tokens is not None:
192
+ return requested_max_tokens
193
+
194
+ # 2. Use instance max_tokens if set during initialization
195
+ if self.max_tokens is not None:
196
+ return self.max_tokens
197
+
198
+ # 3. Use model-specific maximum or return DEFAULT_GEMINI_MAX_TOKENS
199
+ return self._get_max_tokens_for_model(model)
200
+
201
+ def salvage_json(self, raw_output: str) -> dict[str, typing.Any] | None:
202
+ """
203
+ Attempt to salvage a JSON object if the raw output is truncated.
204
+
205
+ This is accomplished by looking for the last closing bracket for an array or object.
206
+ If found, it will try to load the JSON object from the raw output.
207
+ If the JSON object is not valid, it will return None.
208
+
209
+ Args:
210
+ raw_output (str): The raw output from the LLM.
211
+
212
+ Returns:
213
+ dict[str, typing.Any]: The salvaged JSON object.
214
+ None: If no salvage is possible.
215
+ """
216
+ if not raw_output:
217
+ return None
218
+ # Try to salvage a JSON array
219
+ array_match = re.search(r'\]\s*$', raw_output)
220
+ if array_match:
221
+ try:
222
+ return json.loads(raw_output[:array_match.end()])
223
+ except Exception:
224
+ pass
225
+ # Try to salvage a JSON object
226
+ obj_match = re.search(r'\}\s*$', raw_output)
227
+ if obj_match:
228
+ try:
229
+ return json.loads(raw_output[:obj_match.end()])
230
+ except Exception:
231
+ pass
232
+ return None
233
+
149
234
  async def _generate_response(
150
235
  self,
151
236
  messages: list[Message],
152
237
  response_model: type[BaseModel] | None = None,
153
- max_tokens: int = DEFAULT_MAX_TOKENS,
238
+ max_tokens: int | None = None,
154
239
  model_size: ModelSize = ModelSize.medium,
155
240
  ) -> dict[str, typing.Any]:
156
241
  """
@@ -159,7 +244,7 @@ class GeminiClient(LLMClient):
159
244
  Args:
160
245
  messages (list[Message]): A list of messages to send to the language model.
161
246
  response_model (type[BaseModel] | None): An optional Pydantic model to parse the response into.
162
- max_tokens (int): The maximum number of tokens to generate in the response.
247
+ max_tokens (int | None): The maximum number of tokens to generate in the response. If None, uses precedence rules.
163
248
  model_size (ModelSize): The size of the model to use (small or medium).
164
249
 
165
250
  Returns:
@@ -199,10 +284,13 @@ class GeminiClient(LLMClient):
199
284
  # Get the appropriate model for the requested size
200
285
  model = self._get_model_for_size(model_size)
201
286
 
287
+ # Resolve max_tokens using precedence rules (see _resolve_max_tokens for details)
288
+ resolved_max_tokens = self._resolve_max_tokens(max_tokens, model)
289
+
202
290
  # Create generation config
203
291
  generation_config = types.GenerateContentConfig(
204
292
  temperature=self.temperature,
205
- max_output_tokens=max_tokens or self.max_tokens,
293
+ max_output_tokens=resolved_max_tokens,
206
294
  response_mime_type='application/json' if response_model else None,
207
295
  response_schema=response_model if response_model else None,
208
296
  system_instruction=system_prompt,
@@ -216,6 +304,9 @@ class GeminiClient(LLMClient):
216
304
  config=generation_config,
217
305
  )
218
306
 
307
+ # Always capture the raw output for debugging
308
+ raw_output = getattr(response, 'text', None)
309
+
219
310
  # Check for safety and prompt blocks
220
311
  self._check_safety_blocks(response)
221
312
  self._check_prompt_blocks(response)
@@ -223,18 +314,26 @@ class GeminiClient(LLMClient):
223
314
  # If this was a structured output request, parse the response into the Pydantic model
224
315
  if response_model is not None:
225
316
  try:
226
- if not response.text:
317
+ if not raw_output:
227
318
  raise ValueError('No response text')
228
319
 
229
- validated_model = response_model.model_validate(json.loads(response.text))
320
+ validated_model = response_model.model_validate(json.loads(raw_output))
230
321
 
231
322
  # Return as a dictionary for API consistency
232
323
  return validated_model.model_dump()
233
324
  except Exception as e:
325
+ if raw_output:
326
+ logger.error("🦀 LLM generation failed parsing as JSON, will try to salvage.")
327
+ logger.error(self._get_failed_generation_log(gemini_messages, raw_output))
328
+ # Try to salvage
329
+ salvaged = self.salvage_json(raw_output)
330
+ if salvaged is not None:
331
+ logger.warning("Salvaged partial JSON from truncated/malformed output.")
332
+ return salvaged
234
333
  raise Exception(f'Failed to parse structured response: {e}') from e
235
334
 
236
335
  # Otherwise, return the response text as a dictionary
237
- return {'content': response.text}
336
+ return {'content': raw_output}
238
337
 
239
338
  except Exception as e:
240
339
  # Check if it's a rate limit error based on Gemini API error codes
@@ -248,7 +347,7 @@ class GeminiClient(LLMClient):
248
347
  raise RateLimitError from e
249
348
 
250
349
  logger.error(f'Error in generating LLM response: {e}')
251
- raise
350
+ raise Exception from e
252
351
 
253
352
  async def generate_response(
254
353
  self,
@@ -270,16 +369,14 @@ class GeminiClient(LLMClient):
270
369
  Returns:
271
370
  dict[str, typing.Any]: The response from the language model.
272
371
  """
273
- if max_tokens is None:
274
- max_tokens = self.max_tokens
275
-
276
372
  retry_count = 0
277
373
  last_error = None
374
+ last_output = None
278
375
 
279
376
  # Add multilingual extraction instructions
280
377
  messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
281
378
 
282
- while retry_count <= self.MAX_RETRIES:
379
+ while retry_count < self.MAX_RETRIES:
283
380
  try:
284
381
  response = await self._generate_response(
285
382
  messages=messages,
@@ -287,22 +384,19 @@ class GeminiClient(LLMClient):
287
384
  max_tokens=max_tokens,
288
385
  model_size=model_size,
289
386
  )
387
+ last_output = response.get('content') if isinstance(response, dict) and 'content' in response else None
290
388
  return response
291
- except RateLimitError:
389
+ except RateLimitError as e:
292
390
  # Rate limit errors should not trigger retries (fail fast)
293
- raise
391
+ raise e
294
392
  except Exception as e:
295
393
  last_error = e
296
394
 
297
395
  # Check if this is a safety block - these typically shouldn't be retried
298
- if 'safety' in str(e).lower() or 'blocked' in str(e).lower():
396
+ error_text = str(e) or (str(e.__cause__) if e.__cause__ else '')
397
+ if 'safety' in error_text.lower() or 'blocked' in error_text.lower():
299
398
  logger.warning(f'Content blocked by safety filters: {e}')
300
- raise
301
-
302
- # Don't retry if we've hit the max retries
303
- if retry_count >= self.MAX_RETRIES:
304
- logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {e}')
305
- raise
399
+ raise Exception(f'Content blocked by safety filters: {e}') from e
306
400
 
307
401
  retry_count += 1
308
402
 
@@ -321,5 +415,8 @@ class GeminiClient(LLMClient):
321
415
  f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
322
416
  )
323
417
 
324
- # If we somehow get here, raise the last error
325
- raise last_error or Exception('Max retries exceeded with no specific error')
418
+ # If we exit the loop without returning, all retries are exhausted
419
+ logger.error("🦀 LLM generation failed and retries are exhausted.")
420
+ logger.error(self._get_failed_generation_log(messages, last_output))
421
+ logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {last_error}')
422
+ raise last_error or Exception("Max retries exceeded")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphiti-core
3
- Version: 0.17.1
3
+ Version: 0.17.2
4
4
  Summary: A temporal graph building library
5
5
  Project-URL: Homepage, https://help.getzep.com/graphiti/graphiti/overview
6
6
  Project-URL: Repository, https://github.com/getzep/graphiti
@@ -19,16 +19,16 @@ graphiti_core/driver/neo4j_driver.py,sha256=0MCAWAPay0LdcqrFSkY91GooUtrn1yX1CTKu
19
19
  graphiti_core/embedder/__init__.py,sha256=EL564ZuE-DZjcuKNUK_exMn_XHXm2LdO9fzdXePVKL4,179
20
20
  graphiti_core/embedder/azure_openai.py,sha256=OyomPwC1fIsddI-3n6g00kQFdQznZorBhHwkQKCLUok,2384
21
21
  graphiti_core/embedder/client.py,sha256=qEpSHceL_Gc4QQPJWIOnuNLemNuR_TYA4r28t2Vldbg,1115
22
- graphiti_core/embedder/gemini.py,sha256=0O3JCeeINRNF_jfrEPA-__YHpEHWPkXd7IYfsUMi-ng,4080
22
+ graphiti_core/embedder/gemini.py,sha256=GdpnmRKunruLB4ViJMo6K-WEv8RqZvuLfgyKXtRcEMI,7218
23
23
  graphiti_core/embedder/openai.py,sha256=bIThUoLMeGlHG2-3VikzK6JZfOHKn4PKvUMx5sHxJy8,2192
24
24
  graphiti_core/embedder/voyage.py,sha256=oJHAZiNqjdEJOKgoKfGWcxK2-Ewqn5UB3vrBwIwP2u4,2546
25
25
  graphiti_core/llm_client/__init__.py,sha256=QgBWUiCeBp6YiA_xqyrDvJ9jIyy1hngH8g7FWahN3nw,776
26
26
  graphiti_core/llm_client/anthropic_client.py,sha256=xTFcrgMDK77BwnChBhYj51Jaa2mRNI850oJv2pKZI0A,12892
27
27
  graphiti_core/llm_client/azure_openai_client.py,sha256=ekERggAekbb7enes1RJqdRChf_mjaZTFXsnMbxO7azQ,2497
28
- graphiti_core/llm_client/client.py,sha256=v_w5TBbDJYYADCXSs2r287g5Ami2Urma-GGEbHSI_Jg,5826
28
+ graphiti_core/llm_client/client.py,sha256=fgNnJgmoZN7v7PNoJGtt4MMdKkDNsmT9F2XOLKZOU38,6473
29
29
  graphiti_core/llm_client/config.py,sha256=90IgSBxZE_3nWdaEONVLUznI8lytPA7ZyexQz-_c55U,2560
30
30
  graphiti_core/llm_client/errors.py,sha256=pn6brRiLW60DAUIXJYKBT6MInrS4ueuH1hNLbn_JbQo,1243
31
- graphiti_core/llm_client/gemini_client.py,sha256=oyAOXc2ArPLulayoTRj2fjrKYP107WWs8LqM8574-vA,13434
31
+ graphiti_core/llm_client/gemini_client.py,sha256=LKB6nktFMIn2fuRNRoGeBOmxlE3WuhB5sWI7yUwGGaA,17583
32
32
  graphiti_core/llm_client/groq_client.py,sha256=bYLE_cg1QEhugsJOXh4b1vPbxagKeMWqk48240GCzMs,2922
33
33
  graphiti_core/llm_client/openai_base_client.py,sha256=gfMcKPyLrylz_ouRdoenDWXyitmgfFZ17Zthbkq3Qs4,8126
34
34
  graphiti_core/llm_client/openai_client.py,sha256=ykBK94gxzE7iXux5rvOzVNA8q0Sqzq-8njPB75XcRe8,3240
@@ -71,7 +71,7 @@ graphiti_core/utils/maintenance/node_operations.py,sha256=4jMlmbB3zwK9KzIm2QXRxz
71
71
  graphiti_core/utils/maintenance/temporal_operations.py,sha256=mJkw9xLB4W2BsLfC5POr0r-PHWL9SIfNj_l_xu0B5ug,3410
72
72
  graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
73
  graphiti_core/utils/ontology_utils/entity_types_utils.py,sha256=QJX5cG0GSSNF_Mm_yrldr69wjVAbN_MxLhOSznz85Hk,1279
74
- graphiti_core-0.17.1.dist-info/METADATA,sha256=U4lMqT6JebOB4Rcxvlym2Gb3f1U3RexyG4jU2mhY9UU,23791
75
- graphiti_core-0.17.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
76
- graphiti_core-0.17.1.dist-info/licenses/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
77
- graphiti_core-0.17.1.dist-info/RECORD,,
74
+ graphiti_core-0.17.2.dist-info/METADATA,sha256=bqv7KcvfhQ5qp4F2BYrWK5ziw2uJzAA2rPxi-KlJTp8,23791
75
+ graphiti_core-0.17.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
76
+ graphiti_core-0.17.2.dist-info/licenses/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
77
+ graphiti_core-0.17.2.dist-info/RECORD,,