graphiti-core 0.22.0rc2__py3-none-any.whl → 0.22.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/graphiti.py +459 -326
- graphiti_core/graphiti_types.py +2 -0
- graphiti_core/llm_client/anthropic_client.py +63 -46
- graphiti_core/llm_client/client.py +60 -17
- graphiti_core/llm_client/gemini_client.py +69 -52
- graphiti_core/llm_client/openai_base_client.py +58 -41
- graphiti_core/llm_client/openai_generic_client.py +58 -41
- graphiti_core/prompts/extract_nodes.py +13 -13
- graphiti_core/tracer.py +193 -0
- graphiti_core/utils/maintenance/community_operations.py +4 -1
- graphiti_core/utils/maintenance/edge_operations.py +4 -0
- graphiti_core/utils/maintenance/node_operations.py +10 -2
- graphiti_core/utils/maintenance/temporal_operations.py +4 -1
- graphiti_core/utils/text_utils.py +53 -0
- {graphiti_core-0.22.0rc2.dist-info → graphiti_core-0.22.0rc4.dist-info}/METADATA +5 -1
- {graphiti_core-0.22.0rc2.dist-info → graphiti_core-0.22.0rc4.dist-info}/RECORD +18 -16
- {graphiti_core-0.22.0rc2.dist-info → graphiti_core-0.22.0rc4.dist-info}/WHEEL +0 -0
- {graphiti_core-0.22.0rc2.dist-info → graphiti_core-0.22.0rc4.dist-info}/licenses/LICENSE +0 -0
graphiti_core/graphiti_types.py
CHANGED
|
@@ -20,6 +20,7 @@ from graphiti_core.cross_encoder import CrossEncoderClient
|
|
|
20
20
|
from graphiti_core.driver.driver import GraphDriver
|
|
21
21
|
from graphiti_core.embedder import EmbedderClient
|
|
22
22
|
from graphiti_core.llm_client import LLMClient
|
|
23
|
+
from graphiti_core.tracer import Tracer
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class GraphitiClients(BaseModel):
|
|
@@ -27,5 +28,6 @@ class GraphitiClients(BaseModel):
|
|
|
27
28
|
llm_client: LLMClient
|
|
28
29
|
embedder: EmbedderClient
|
|
29
30
|
cross_encoder: CrossEncoderClient
|
|
31
|
+
tracer: Tracer
|
|
30
32
|
|
|
31
33
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
@@ -265,6 +265,8 @@ class AnthropicClient(LLMClient):
|
|
|
265
265
|
response_model: type[BaseModel] | None = None,
|
|
266
266
|
max_tokens: int | None = None,
|
|
267
267
|
model_size: ModelSize = ModelSize.medium,
|
|
268
|
+
group_id: str | None = None,
|
|
269
|
+
prompt_name: str | None = None,
|
|
268
270
|
) -> dict[str, typing.Any]:
|
|
269
271
|
"""
|
|
270
272
|
Generate a response from the LLM.
|
|
@@ -285,55 +287,70 @@ class AnthropicClient(LLMClient):
|
|
|
285
287
|
if max_tokens is None:
|
|
286
288
|
max_tokens = self.max_tokens
|
|
287
289
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
290
|
+
# Wrap entire operation in tracing span
|
|
291
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
292
|
+
attributes = {
|
|
293
|
+
'llm.provider': 'anthropic',
|
|
294
|
+
'model.size': model_size.value,
|
|
295
|
+
'max_tokens': max_tokens,
|
|
296
|
+
}
|
|
297
|
+
if prompt_name:
|
|
298
|
+
attributes['prompt.name'] = prompt_name
|
|
299
|
+
span.add_attributes(attributes)
|
|
300
|
+
|
|
301
|
+
retry_count = 0
|
|
302
|
+
max_retries = 2
|
|
303
|
+
last_error: Exception | None = None
|
|
304
|
+
|
|
305
|
+
while retry_count <= max_retries:
|
|
306
|
+
try:
|
|
307
|
+
response = await self._generate_response(
|
|
308
|
+
messages, response_model, max_tokens, model_size
|
|
309
|
+
)
|
|
306
310
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
311
|
+
# If we have a response_model, attempt to validate the response
|
|
312
|
+
if response_model is not None:
|
|
313
|
+
# Validate the response against the response_model
|
|
314
|
+
model_instance = response_model(**response)
|
|
315
|
+
return model_instance.model_dump()
|
|
316
|
+
|
|
317
|
+
# If no validation needed, return the response
|
|
318
|
+
return response
|
|
319
|
+
|
|
320
|
+
except (RateLimitError, RefusalError):
|
|
321
|
+
# These errors should not trigger retries
|
|
322
|
+
span.set_status('error', str(last_error))
|
|
323
|
+
raise
|
|
324
|
+
except Exception as e:
|
|
325
|
+
last_error = e
|
|
326
|
+
|
|
327
|
+
if retry_count >= max_retries:
|
|
328
|
+
if isinstance(e, ValidationError):
|
|
329
|
+
logger.error(
|
|
330
|
+
f'Validation error after {retry_count}/{max_retries} attempts: {e}'
|
|
331
|
+
)
|
|
332
|
+
else:
|
|
333
|
+
logger.error(f'Max retries ({max_retries}) exceeded. Last error: {e}')
|
|
334
|
+
span.set_status('error', str(e))
|
|
335
|
+
span.record_exception(e)
|
|
336
|
+
raise e
|
|
312
337
|
|
|
313
|
-
if retry_count >= max_retries:
|
|
314
338
|
if isinstance(e, ValidationError):
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
)
|
|
339
|
+
response_model_cast = typing.cast(type[BaseModel], response_model)
|
|
340
|
+
error_context = f'The previous response was invalid. Please provide a valid {response_model_cast.__name__} object. Error: {e}'
|
|
318
341
|
else:
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
else:
|
|
326
|
-
error_context = (
|
|
327
|
-
f'The previous response attempt was invalid. '
|
|
328
|
-
f'Error type: {e.__class__.__name__}. '
|
|
329
|
-
f'Error details: {str(e)}. '
|
|
330
|
-
f'Please try again with a valid response.'
|
|
331
|
-
)
|
|
342
|
+
error_context = (
|
|
343
|
+
f'The previous response attempt was invalid. '
|
|
344
|
+
f'Error type: {e.__class__.__name__}. '
|
|
345
|
+
f'Error details: {str(e)}. '
|
|
346
|
+
f'Please try again with a valid response.'
|
|
347
|
+
)
|
|
332
348
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
349
|
+
# Common retry logic
|
|
350
|
+
retry_count += 1
|
|
351
|
+
messages.append(Message(role='user', content=error_context))
|
|
352
|
+
logger.warning(f'Retrying after error (attempt {retry_count}/{max_retries}): {e}')
|
|
337
353
|
|
|
338
|
-
|
|
339
|
-
|
|
354
|
+
# If we somehow get here, raise the last error
|
|
355
|
+
span.set_status('error', str(last_error))
|
|
356
|
+
raise last_error or Exception('Max retries exceeded with no specific error')
|
|
@@ -26,6 +26,7 @@ from pydantic import BaseModel
|
|
|
26
26
|
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_random_exponential
|
|
27
27
|
|
|
28
28
|
from ..prompts.models import Message
|
|
29
|
+
from ..tracer import NoOpTracer, Tracer
|
|
29
30
|
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
|
|
30
31
|
from .errors import RateLimitError
|
|
31
32
|
|
|
@@ -74,11 +75,16 @@ class LLMClient(ABC):
|
|
|
74
75
|
self.max_tokens = config.max_tokens
|
|
75
76
|
self.cache_enabled = cache
|
|
76
77
|
self.cache_dir = None
|
|
78
|
+
self.tracer: Tracer = NoOpTracer()
|
|
77
79
|
|
|
78
80
|
# Only create the cache directory if caching is enabled
|
|
79
81
|
if self.cache_enabled:
|
|
80
82
|
self.cache_dir = Cache(DEFAULT_CACHE_DIR)
|
|
81
83
|
|
|
84
|
+
def set_tracer(self, tracer: Tracer) -> None:
|
|
85
|
+
"""Set the tracer for this LLM client."""
|
|
86
|
+
self.tracer = tracer
|
|
87
|
+
|
|
82
88
|
def _clean_input(self, input: str) -> str:
|
|
83
89
|
"""Clean input string of invalid unicode and control characters.
|
|
84
90
|
|
|
@@ -147,6 +153,7 @@ class LLMClient(ABC):
|
|
|
147
153
|
max_tokens: int | None = None,
|
|
148
154
|
model_size: ModelSize = ModelSize.medium,
|
|
149
155
|
group_id: str | None = None,
|
|
156
|
+
prompt_name: str | None = None,
|
|
150
157
|
) -> dict[str, typing.Any]:
|
|
151
158
|
if max_tokens is None:
|
|
152
159
|
max_tokens = self.max_tokens
|
|
@@ -162,26 +169,62 @@ class LLMClient(ABC):
|
|
|
162
169
|
# Add multilingual extraction instructions
|
|
163
170
|
messages[0].content += get_extraction_language_instruction(group_id)
|
|
164
171
|
|
|
165
|
-
if self.cache_enabled and self.cache_dir is not None:
|
|
166
|
-
cache_key = self._get_cache_key(messages)
|
|
167
|
-
|
|
168
|
-
cached_response = self.cache_dir.get(cache_key)
|
|
169
|
-
if cached_response is not None:
|
|
170
|
-
logger.debug(f'Cache hit for {cache_key}')
|
|
171
|
-
return cached_response
|
|
172
|
-
|
|
173
172
|
for message in messages:
|
|
174
173
|
message.content = self._clean_input(message.content)
|
|
175
174
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
175
|
+
# Wrap entire operation in tracing span
|
|
176
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
177
|
+
attributes = {
|
|
178
|
+
'llm.provider': self._get_provider_type(),
|
|
179
|
+
'model.size': model_size.value,
|
|
180
|
+
'max_tokens': max_tokens,
|
|
181
|
+
'cache.enabled': self.cache_enabled,
|
|
182
|
+
}
|
|
183
|
+
if prompt_name:
|
|
184
|
+
attributes['prompt.name'] = prompt_name
|
|
185
|
+
span.add_attributes(attributes)
|
|
186
|
+
|
|
187
|
+
# Check cache first
|
|
188
|
+
if self.cache_enabled and self.cache_dir is not None:
|
|
189
|
+
cache_key = self._get_cache_key(messages)
|
|
190
|
+
cached_response = self.cache_dir.get(cache_key)
|
|
191
|
+
if cached_response is not None:
|
|
192
|
+
logger.debug(f'Cache hit for {cache_key}')
|
|
193
|
+
span.add_attributes({'cache.hit': True})
|
|
194
|
+
return cached_response
|
|
195
|
+
|
|
196
|
+
span.add_attributes({'cache.hit': False})
|
|
197
|
+
|
|
198
|
+
# Execute LLM call
|
|
199
|
+
try:
|
|
200
|
+
response = await self._generate_response_with_retry(
|
|
201
|
+
messages, response_model, max_tokens, model_size
|
|
202
|
+
)
|
|
203
|
+
except Exception as e:
|
|
204
|
+
span.set_status('error', str(e))
|
|
205
|
+
span.record_exception(e)
|
|
206
|
+
raise
|
|
207
|
+
|
|
208
|
+
# Cache response if enabled
|
|
209
|
+
if self.cache_enabled and self.cache_dir is not None:
|
|
210
|
+
cache_key = self._get_cache_key(messages)
|
|
211
|
+
self.cache_dir.set(cache_key, response)
|
|
212
|
+
|
|
213
|
+
return response
|
|
214
|
+
|
|
215
|
+
def _get_provider_type(self) -> str:
|
|
216
|
+
"""Get provider type from class name."""
|
|
217
|
+
class_name = self.__class__.__name__.lower()
|
|
218
|
+
if 'openai' in class_name:
|
|
219
|
+
return 'openai'
|
|
220
|
+
elif 'anthropic' in class_name:
|
|
221
|
+
return 'anthropic'
|
|
222
|
+
elif 'gemini' in class_name:
|
|
223
|
+
return 'gemini'
|
|
224
|
+
elif 'groq' in class_name:
|
|
225
|
+
return 'groq'
|
|
226
|
+
else:
|
|
227
|
+
return 'unknown'
|
|
185
228
|
|
|
186
229
|
def _get_failed_generation_log(self, messages: list[Message], output: str | None) -> str:
|
|
187
230
|
"""
|
|
@@ -358,6 +358,7 @@ class GeminiClient(LLMClient):
|
|
|
358
358
|
max_tokens: int | None = None,
|
|
359
359
|
model_size: ModelSize = ModelSize.medium,
|
|
360
360
|
group_id: str | None = None,
|
|
361
|
+
prompt_name: str | None = None,
|
|
361
362
|
) -> dict[str, typing.Any]:
|
|
362
363
|
"""
|
|
363
364
|
Generate a response from the Gemini language model with retry logic and error handling.
|
|
@@ -369,62 +370,78 @@ class GeminiClient(LLMClient):
|
|
|
369
370
|
max_tokens (int | None): The maximum number of tokens to generate in the response.
|
|
370
371
|
model_size (ModelSize): The size of the model to use (small or medium).
|
|
371
372
|
group_id (str | None): Optional partition identifier for the graph.
|
|
373
|
+
prompt_name (str | None): Optional name of the prompt for tracing.
|
|
372
374
|
|
|
373
375
|
Returns:
|
|
374
376
|
dict[str, typing.Any]: The response from the language model.
|
|
375
377
|
"""
|
|
376
|
-
retry_count = 0
|
|
377
|
-
last_error = None
|
|
378
|
-
last_output = None
|
|
379
|
-
|
|
380
378
|
# Add multilingual extraction instructions
|
|
381
379
|
messages[0].content += get_extraction_language_instruction(group_id)
|
|
382
380
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
381
|
+
# Wrap entire operation in tracing span
|
|
382
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
383
|
+
attributes = {
|
|
384
|
+
'llm.provider': 'gemini',
|
|
385
|
+
'model.size': model_size.value,
|
|
386
|
+
'max_tokens': max_tokens or self.max_tokens,
|
|
387
|
+
}
|
|
388
|
+
if prompt_name:
|
|
389
|
+
attributes['prompt.name'] = prompt_name
|
|
390
|
+
span.add_attributes(attributes)
|
|
391
|
+
|
|
392
|
+
retry_count = 0
|
|
393
|
+
last_error = None
|
|
394
|
+
last_output = None
|
|
395
|
+
|
|
396
|
+
while retry_count < self.MAX_RETRIES:
|
|
397
|
+
try:
|
|
398
|
+
response = await self._generate_response(
|
|
399
|
+
messages=messages,
|
|
400
|
+
response_model=response_model,
|
|
401
|
+
max_tokens=max_tokens,
|
|
402
|
+
model_size=model_size,
|
|
403
|
+
)
|
|
404
|
+
last_output = (
|
|
405
|
+
response.get('content')
|
|
406
|
+
if isinstance(response, dict) and 'content' in response
|
|
407
|
+
else None
|
|
408
|
+
)
|
|
409
|
+
return response
|
|
410
|
+
except RateLimitError as e:
|
|
411
|
+
# Rate limit errors should not trigger retries (fail fast)
|
|
412
|
+
span.set_status('error', str(e))
|
|
413
|
+
raise e
|
|
414
|
+
except Exception as e:
|
|
415
|
+
last_error = e
|
|
416
|
+
|
|
417
|
+
# Check if this is a safety block - these typically shouldn't be retried
|
|
418
|
+
error_text = str(e) or (str(e.__cause__) if e.__cause__ else '')
|
|
419
|
+
if 'safety' in error_text.lower() or 'blocked' in error_text.lower():
|
|
420
|
+
logger.warning(f'Content blocked by safety filters: {e}')
|
|
421
|
+
span.set_status('error', str(e))
|
|
422
|
+
raise Exception(f'Content blocked by safety filters: {e}') from e
|
|
423
|
+
|
|
424
|
+
retry_count += 1
|
|
425
|
+
|
|
426
|
+
# Construct a detailed error message for the LLM
|
|
427
|
+
error_context = (
|
|
428
|
+
f'The previous response attempt was invalid. '
|
|
429
|
+
f'Error type: {e.__class__.__name__}. '
|
|
430
|
+
f'Error details: {str(e)}. '
|
|
431
|
+
f'Please try again with a valid response, ensuring the output matches '
|
|
432
|
+
f'the expected format and constraints.'
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
error_message = Message(role='user', content=error_context)
|
|
436
|
+
messages.append(error_message)
|
|
437
|
+
logger.warning(
|
|
438
|
+
f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# If we exit the loop without returning, all retries are exhausted
|
|
442
|
+
logger.error('🦀 LLM generation failed and retries are exhausted.')
|
|
443
|
+
logger.error(self._get_failed_generation_log(messages, last_output))
|
|
444
|
+
logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {last_error}')
|
|
445
|
+
span.set_status('error', str(last_error))
|
|
446
|
+
span.record_exception(last_error) if last_error else None
|
|
447
|
+
raise last_error or Exception('Max retries exceeded')
|
|
@@ -176,53 +176,70 @@ class BaseOpenAIClient(LLMClient):
|
|
|
176
176
|
max_tokens: int | None = None,
|
|
177
177
|
model_size: ModelSize = ModelSize.medium,
|
|
178
178
|
group_id: str | None = None,
|
|
179
|
+
prompt_name: str | None = None,
|
|
179
180
|
) -> dict[str, typing.Any]:
|
|
180
181
|
"""Generate a response with retry logic and error handling."""
|
|
181
182
|
if max_tokens is None:
|
|
182
183
|
max_tokens = self.max_tokens
|
|
183
184
|
|
|
184
|
-
retry_count = 0
|
|
185
|
-
last_error = None
|
|
186
|
-
|
|
187
185
|
# Add multilingual extraction instructions
|
|
188
186
|
messages[0].content += get_extraction_language_instruction(group_id)
|
|
189
187
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
188
|
+
# Wrap entire operation in tracing span
|
|
189
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
190
|
+
attributes = {
|
|
191
|
+
'llm.provider': 'openai',
|
|
192
|
+
'model.size': model_size.value,
|
|
193
|
+
'max_tokens': max_tokens,
|
|
194
|
+
}
|
|
195
|
+
if prompt_name:
|
|
196
|
+
attributes['prompt.name'] = prompt_name
|
|
197
|
+
span.add_attributes(attributes)
|
|
198
|
+
|
|
199
|
+
retry_count = 0
|
|
200
|
+
last_error = None
|
|
201
|
+
|
|
202
|
+
while retry_count <= self.MAX_RETRIES:
|
|
203
|
+
try:
|
|
204
|
+
response = await self._generate_response(
|
|
205
|
+
messages, response_model, max_tokens, model_size
|
|
206
|
+
)
|
|
207
|
+
return response
|
|
208
|
+
except (RateLimitError, RefusalError):
|
|
209
|
+
# These errors should not trigger retries
|
|
210
|
+
span.set_status('error', str(last_error))
|
|
208
211
|
raise
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
212
|
+
except (openai.APITimeoutError, openai.APIConnectionError, openai.InternalServerError):
|
|
213
|
+
# Let OpenAI's client handle these retries
|
|
214
|
+
span.set_status('error', str(last_error))
|
|
215
|
+
raise
|
|
216
|
+
except Exception as e:
|
|
217
|
+
last_error = e
|
|
218
|
+
|
|
219
|
+
# Don't retry if we've hit the max retries
|
|
220
|
+
if retry_count >= self.MAX_RETRIES:
|
|
221
|
+
logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {e}')
|
|
222
|
+
span.set_status('error', str(e))
|
|
223
|
+
span.record_exception(e)
|
|
224
|
+
raise
|
|
225
|
+
|
|
226
|
+
retry_count += 1
|
|
227
|
+
|
|
228
|
+
# Construct a detailed error message for the LLM
|
|
229
|
+
error_context = (
|
|
230
|
+
f'The previous response attempt was invalid. '
|
|
231
|
+
f'Error type: {e.__class__.__name__}. '
|
|
232
|
+
f'Error details: {str(e)}. '
|
|
233
|
+
f'Please try again with a valid response, ensuring the output matches '
|
|
234
|
+
f'the expected format and constraints.'
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
error_message = Message(role='user', content=error_context)
|
|
238
|
+
messages.append(error_message)
|
|
239
|
+
logger.warning(
|
|
240
|
+
f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# If we somehow get here, raise the last error
|
|
244
|
+
span.set_status('error', str(last_error))
|
|
245
|
+
raise last_error or Exception('Max retries exceeded with no specific error')
|
|
@@ -121,13 +121,11 @@ class OpenAIGenericClient(LLMClient):
|
|
|
121
121
|
max_tokens: int | None = None,
|
|
122
122
|
model_size: ModelSize = ModelSize.medium,
|
|
123
123
|
group_id: str | None = None,
|
|
124
|
+
prompt_name: str | None = None,
|
|
124
125
|
) -> dict[str, typing.Any]:
|
|
125
126
|
if max_tokens is None:
|
|
126
127
|
max_tokens = self.max_tokens
|
|
127
128
|
|
|
128
|
-
retry_count = 0
|
|
129
|
-
last_error = None
|
|
130
|
-
|
|
131
129
|
if response_model is not None:
|
|
132
130
|
serialized_model = json.dumps(response_model.model_json_schema())
|
|
133
131
|
messages[
|
|
@@ -139,42 +137,61 @@ class OpenAIGenericClient(LLMClient):
|
|
|
139
137
|
# Add multilingual extraction instructions
|
|
140
138
|
messages[0].content += get_extraction_language_instruction(group_id)
|
|
141
139
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
140
|
+
# Wrap entire operation in tracing span
|
|
141
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
142
|
+
attributes = {
|
|
143
|
+
'llm.provider': 'openai',
|
|
144
|
+
'model.size': model_size.value,
|
|
145
|
+
'max_tokens': max_tokens,
|
|
146
|
+
}
|
|
147
|
+
if prompt_name:
|
|
148
|
+
attributes['prompt.name'] = prompt_name
|
|
149
|
+
span.add_attributes(attributes)
|
|
150
|
+
|
|
151
|
+
retry_count = 0
|
|
152
|
+
last_error = None
|
|
153
|
+
|
|
154
|
+
while retry_count <= self.MAX_RETRIES:
|
|
155
|
+
try:
|
|
156
|
+
response = await self._generate_response(
|
|
157
|
+
messages, response_model, max_tokens=max_tokens, model_size=model_size
|
|
158
|
+
)
|
|
159
|
+
return response
|
|
160
|
+
except (RateLimitError, RefusalError):
|
|
161
|
+
# These errors should not trigger retries
|
|
162
|
+
span.set_status('error', str(last_error))
|
|
160
163
|
raise
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
164
|
+
except (openai.APITimeoutError, openai.APIConnectionError, openai.InternalServerError):
|
|
165
|
+
# Let OpenAI's client handle these retries
|
|
166
|
+
span.set_status('error', str(last_error))
|
|
167
|
+
raise
|
|
168
|
+
except Exception as e:
|
|
169
|
+
last_error = e
|
|
170
|
+
|
|
171
|
+
# Don't retry if we've hit the max retries
|
|
172
|
+
if retry_count >= self.MAX_RETRIES:
|
|
173
|
+
logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {e}')
|
|
174
|
+
span.set_status('error', str(e))
|
|
175
|
+
span.record_exception(e)
|
|
176
|
+
raise
|
|
177
|
+
|
|
178
|
+
retry_count += 1
|
|
179
|
+
|
|
180
|
+
# Construct a detailed error message for the LLM
|
|
181
|
+
error_context = (
|
|
182
|
+
f'The previous response attempt was invalid. '
|
|
183
|
+
f'Error type: {e.__class__.__name__}. '
|
|
184
|
+
f'Error details: {str(e)}. '
|
|
185
|
+
f'Please try again with a valid response, ensuring the output matches '
|
|
186
|
+
f'the expected format and constraints.'
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
error_message = Message(role='user', content=error_context)
|
|
190
|
+
messages.append(error_message)
|
|
191
|
+
logger.warning(
|
|
192
|
+
f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# If we somehow get here, raise the last error
|
|
196
|
+
span.set_status('error', str(last_error))
|
|
197
|
+
raise last_error or Exception('Max retries exceeded with no specific error')
|