graphiti-core 0.21.0rc13__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/driver/driver.py +4 -211
- graphiti_core/driver/falkordb_driver.py +31 -3
- graphiti_core/driver/graph_operations/graph_operations.py +195 -0
- graphiti_core/driver/neo4j_driver.py +0 -49
- graphiti_core/driver/neptune_driver.py +43 -26
- graphiti_core/driver/search_interface/__init__.py +0 -0
- graphiti_core/driver/search_interface/search_interface.py +89 -0
- graphiti_core/edges.py +11 -34
- graphiti_core/graphiti.py +459 -326
- graphiti_core/graphiti_types.py +2 -0
- graphiti_core/llm_client/anthropic_client.py +64 -45
- graphiti_core/llm_client/client.py +67 -19
- graphiti_core/llm_client/gemini_client.py +73 -54
- graphiti_core/llm_client/openai_base_client.py +65 -43
- graphiti_core/llm_client/openai_generic_client.py +65 -43
- graphiti_core/models/edges/edge_db_queries.py +1 -0
- graphiti_core/models/nodes/node_db_queries.py +1 -0
- graphiti_core/nodes.py +26 -99
- graphiti_core/prompts/dedupe_edges.py +4 -4
- graphiti_core/prompts/dedupe_nodes.py +10 -10
- graphiti_core/prompts/extract_edges.py +4 -4
- graphiti_core/prompts/extract_nodes.py +26 -28
- graphiti_core/prompts/prompt_helpers.py +18 -2
- graphiti_core/prompts/snippets.py +29 -0
- graphiti_core/prompts/summarize_nodes.py +22 -24
- graphiti_core/search/search_filters.py +0 -38
- graphiti_core/search/search_helpers.py +4 -4
- graphiti_core/search/search_utils.py +84 -220
- graphiti_core/tracer.py +193 -0
- graphiti_core/utils/bulk_utils.py +16 -28
- graphiti_core/utils/maintenance/community_operations.py +4 -1
- graphiti_core/utils/maintenance/edge_operations.py +26 -15
- graphiti_core/utils/maintenance/graph_data_operations.py +6 -25
- graphiti_core/utils/maintenance/node_operations.py +98 -51
- graphiti_core/utils/maintenance/temporal_operations.py +4 -1
- graphiti_core/utils/text_utils.py +53 -0
- {graphiti_core-0.21.0rc13.dist-info → graphiti_core-0.22.0.dist-info}/METADATA +7 -3
- {graphiti_core-0.21.0rc13.dist-info → graphiti_core-0.22.0.dist-info}/RECORD +41 -35
- /graphiti_core/{utils/maintenance/utils.py → driver/graph_operations/__init__.py} +0 -0
- {graphiti_core-0.21.0rc13.dist-info → graphiti_core-0.22.0.dist-info}/WHEEL +0 -0
- {graphiti_core-0.21.0rc13.dist-info → graphiti_core-0.22.0.dist-info}/licenses/LICENSE +0 -0
graphiti_core/graphiti_types.py
CHANGED
|
@@ -20,6 +20,7 @@ from graphiti_core.cross_encoder import CrossEncoderClient
|
|
|
20
20
|
from graphiti_core.driver.driver import GraphDriver
|
|
21
21
|
from graphiti_core.embedder import EmbedderClient
|
|
22
22
|
from graphiti_core.llm_client import LLMClient
|
|
23
|
+
from graphiti_core.tracer import Tracer
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class GraphitiClients(BaseModel):
|
|
@@ -27,5 +28,6 @@ class GraphitiClients(BaseModel):
|
|
|
27
28
|
llm_client: LLMClient
|
|
28
29
|
embedder: EmbedderClient
|
|
29
30
|
cross_encoder: CrossEncoderClient
|
|
31
|
+
tracer: Tracer
|
|
30
32
|
|
|
31
33
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
@@ -265,6 +265,8 @@ class AnthropicClient(LLMClient):
|
|
|
265
265
|
response_model: type[BaseModel] | None = None,
|
|
266
266
|
max_tokens: int | None = None,
|
|
267
267
|
model_size: ModelSize = ModelSize.medium,
|
|
268
|
+
group_id: str | None = None,
|
|
269
|
+
prompt_name: str | None = None,
|
|
268
270
|
) -> dict[str, typing.Any]:
|
|
269
271
|
"""
|
|
270
272
|
Generate a response from the LLM.
|
|
@@ -285,55 +287,72 @@ class AnthropicClient(LLMClient):
|
|
|
285
287
|
if max_tokens is None:
|
|
286
288
|
max_tokens = self.max_tokens
|
|
287
289
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
290
|
+
# Wrap entire operation in tracing span
|
|
291
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
292
|
+
attributes = {
|
|
293
|
+
'llm.provider': 'anthropic',
|
|
294
|
+
'model.size': model_size.value,
|
|
295
|
+
'max_tokens': max_tokens,
|
|
296
|
+
}
|
|
297
|
+
if prompt_name:
|
|
298
|
+
attributes['prompt.name'] = prompt_name
|
|
299
|
+
span.add_attributes(attributes)
|
|
300
|
+
|
|
301
|
+
retry_count = 0
|
|
302
|
+
max_retries = 2
|
|
303
|
+
last_error: Exception | None = None
|
|
304
|
+
|
|
305
|
+
while retry_count <= max_retries:
|
|
306
|
+
try:
|
|
307
|
+
response = await self._generate_response(
|
|
308
|
+
messages, response_model, max_tokens, model_size
|
|
309
|
+
)
|
|
306
310
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
311
|
+
# If we have a response_model, attempt to validate the response
|
|
312
|
+
if response_model is not None:
|
|
313
|
+
# Validate the response against the response_model
|
|
314
|
+
model_instance = response_model(**response)
|
|
315
|
+
return model_instance.model_dump()
|
|
316
|
+
|
|
317
|
+
# If no validation needed, return the response
|
|
318
|
+
return response
|
|
319
|
+
|
|
320
|
+
except (RateLimitError, RefusalError):
|
|
321
|
+
# These errors should not trigger retries
|
|
322
|
+
span.set_status('error', str(last_error))
|
|
323
|
+
raise
|
|
324
|
+
except Exception as e:
|
|
325
|
+
last_error = e
|
|
326
|
+
|
|
327
|
+
if retry_count >= max_retries:
|
|
328
|
+
if isinstance(e, ValidationError):
|
|
329
|
+
logger.error(
|
|
330
|
+
f'Validation error after {retry_count}/{max_retries} attempts: {e}'
|
|
331
|
+
)
|
|
332
|
+
else:
|
|
333
|
+
logger.error(f'Max retries ({max_retries}) exceeded. Last error: {e}')
|
|
334
|
+
span.set_status('error', str(e))
|
|
335
|
+
span.record_exception(e)
|
|
336
|
+
raise e
|
|
312
337
|
|
|
313
|
-
if retry_count >= max_retries:
|
|
314
338
|
if isinstance(e, ValidationError):
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
)
|
|
339
|
+
response_model_cast = typing.cast(type[BaseModel], response_model)
|
|
340
|
+
error_context = f'The previous response was invalid. Please provide a valid {response_model_cast.__name__} object. Error: {e}'
|
|
318
341
|
else:
|
|
319
|
-
|
|
320
|
-
|
|
342
|
+
error_context = (
|
|
343
|
+
f'The previous response attempt was invalid. '
|
|
344
|
+
f'Error type: {e.__class__.__name__}. '
|
|
345
|
+
f'Error details: {str(e)}. '
|
|
346
|
+
f'Please try again with a valid response.'
|
|
347
|
+
)
|
|
321
348
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
f'The previous response attempt was invalid. '
|
|
328
|
-
f'Error type: {e.__class__.__name__}. '
|
|
329
|
-
f'Error details: {str(e)}. '
|
|
330
|
-
f'Please try again with a valid response.'
|
|
349
|
+
# Common retry logic
|
|
350
|
+
retry_count += 1
|
|
351
|
+
messages.append(Message(role='user', content=error_context))
|
|
352
|
+
logger.warning(
|
|
353
|
+
f'Retrying after error (attempt {retry_count}/{max_retries}): {e}'
|
|
331
354
|
)
|
|
332
355
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
logger.warning(f'Retrying after error (attempt {retry_count}/{max_retries}): {e}')
|
|
337
|
-
|
|
338
|
-
# If we somehow get here, raise the last error
|
|
339
|
-
raise last_error or Exception('Max retries exceeded with no specific error')
|
|
356
|
+
# If we somehow get here, raise the last error
|
|
357
|
+
span.set_status('error', str(last_error))
|
|
358
|
+
raise last_error or Exception('Max retries exceeded with no specific error')
|
|
@@ -26,6 +26,7 @@ from pydantic import BaseModel
|
|
|
26
26
|
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_random_exponential
|
|
27
27
|
|
|
28
28
|
from ..prompts.models import Message
|
|
29
|
+
from ..tracer import NoOpTracer, Tracer
|
|
29
30
|
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
|
|
30
31
|
from .errors import RateLimitError
|
|
31
32
|
|
|
@@ -33,12 +34,16 @@ DEFAULT_TEMPERATURE = 0
|
|
|
33
34
|
DEFAULT_CACHE_DIR = './llm_cache'
|
|
34
35
|
|
|
35
36
|
|
|
36
|
-
def get_extraction_language_instruction() -> str:
|
|
37
|
+
def get_extraction_language_instruction(group_id: str | None = None) -> str:
|
|
37
38
|
"""Returns instruction for language extraction behavior.
|
|
38
39
|
|
|
39
40
|
Override this function to customize language extraction:
|
|
40
41
|
- Return empty string to disable multilingual instructions
|
|
41
42
|
- Return custom instructions for specific language requirements
|
|
43
|
+
- Use group_id to provide different instructions per group/partition
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
group_id: Optional partition identifier for the graph
|
|
42
47
|
|
|
43
48
|
Returns:
|
|
44
49
|
str: Language instruction to append to system messages
|
|
@@ -70,11 +75,16 @@ class LLMClient(ABC):
|
|
|
70
75
|
self.max_tokens = config.max_tokens
|
|
71
76
|
self.cache_enabled = cache
|
|
72
77
|
self.cache_dir = None
|
|
78
|
+
self.tracer: Tracer = NoOpTracer()
|
|
73
79
|
|
|
74
80
|
# Only create the cache directory if caching is enabled
|
|
75
81
|
if self.cache_enabled:
|
|
76
82
|
self.cache_dir = Cache(DEFAULT_CACHE_DIR)
|
|
77
83
|
|
|
84
|
+
def set_tracer(self, tracer: Tracer) -> None:
|
|
85
|
+
"""Set the tracer for this LLM client."""
|
|
86
|
+
self.tracer = tracer
|
|
87
|
+
|
|
78
88
|
def _clean_input(self, input: str) -> str:
|
|
79
89
|
"""Clean input string of invalid unicode and control characters.
|
|
80
90
|
|
|
@@ -142,6 +152,8 @@ class LLMClient(ABC):
|
|
|
142
152
|
response_model: type[BaseModel] | None = None,
|
|
143
153
|
max_tokens: int | None = None,
|
|
144
154
|
model_size: ModelSize = ModelSize.medium,
|
|
155
|
+
group_id: str | None = None,
|
|
156
|
+
prompt_name: str | None = None,
|
|
145
157
|
) -> dict[str, typing.Any]:
|
|
146
158
|
if max_tokens is None:
|
|
147
159
|
max_tokens = self.max_tokens
|
|
@@ -155,28 +167,64 @@ class LLMClient(ABC):
|
|
|
155
167
|
)
|
|
156
168
|
|
|
157
169
|
# Add multilingual extraction instructions
|
|
158
|
-
messages[0].content += get_extraction_language_instruction()
|
|
159
|
-
|
|
160
|
-
if self.cache_enabled and self.cache_dir is not None:
|
|
161
|
-
cache_key = self._get_cache_key(messages)
|
|
162
|
-
|
|
163
|
-
cached_response = self.cache_dir.get(cache_key)
|
|
164
|
-
if cached_response is not None:
|
|
165
|
-
logger.debug(f'Cache hit for {cache_key}')
|
|
166
|
-
return cached_response
|
|
170
|
+
messages[0].content += get_extraction_language_instruction(group_id)
|
|
167
171
|
|
|
168
172
|
for message in messages:
|
|
169
173
|
message.content = self._clean_input(message.content)
|
|
170
174
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
175
|
+
# Wrap entire operation in tracing span
|
|
176
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
177
|
+
attributes = {
|
|
178
|
+
'llm.provider': self._get_provider_type(),
|
|
179
|
+
'model.size': model_size.value,
|
|
180
|
+
'max_tokens': max_tokens,
|
|
181
|
+
'cache.enabled': self.cache_enabled,
|
|
182
|
+
}
|
|
183
|
+
if prompt_name:
|
|
184
|
+
attributes['prompt.name'] = prompt_name
|
|
185
|
+
span.add_attributes(attributes)
|
|
186
|
+
|
|
187
|
+
# Check cache first
|
|
188
|
+
if self.cache_enabled and self.cache_dir is not None:
|
|
189
|
+
cache_key = self._get_cache_key(messages)
|
|
190
|
+
cached_response = self.cache_dir.get(cache_key)
|
|
191
|
+
if cached_response is not None:
|
|
192
|
+
logger.debug(f'Cache hit for {cache_key}')
|
|
193
|
+
span.add_attributes({'cache.hit': True})
|
|
194
|
+
return cached_response
|
|
195
|
+
|
|
196
|
+
span.add_attributes({'cache.hit': False})
|
|
197
|
+
|
|
198
|
+
# Execute LLM call
|
|
199
|
+
try:
|
|
200
|
+
response = await self._generate_response_with_retry(
|
|
201
|
+
messages, response_model, max_tokens, model_size
|
|
202
|
+
)
|
|
203
|
+
except Exception as e:
|
|
204
|
+
span.set_status('error', str(e))
|
|
205
|
+
span.record_exception(e)
|
|
206
|
+
raise
|
|
207
|
+
|
|
208
|
+
# Cache response if enabled
|
|
209
|
+
if self.cache_enabled and self.cache_dir is not None:
|
|
210
|
+
cache_key = self._get_cache_key(messages)
|
|
211
|
+
self.cache_dir.set(cache_key, response)
|
|
212
|
+
|
|
213
|
+
return response
|
|
214
|
+
|
|
215
|
+
def _get_provider_type(self) -> str:
|
|
216
|
+
"""Get provider type from class name."""
|
|
217
|
+
class_name = self.__class__.__name__.lower()
|
|
218
|
+
if 'openai' in class_name:
|
|
219
|
+
return 'openai'
|
|
220
|
+
elif 'anthropic' in class_name:
|
|
221
|
+
return 'anthropic'
|
|
222
|
+
elif 'gemini' in class_name:
|
|
223
|
+
return 'gemini'
|
|
224
|
+
elif 'groq' in class_name:
|
|
225
|
+
return 'groq'
|
|
226
|
+
else:
|
|
227
|
+
return 'unknown'
|
|
180
228
|
|
|
181
229
|
def _get_failed_generation_log(self, messages: list[Message], output: str | None) -> str:
|
|
182
230
|
"""
|
|
@@ -357,6 +357,8 @@ class GeminiClient(LLMClient):
|
|
|
357
357
|
response_model: type[BaseModel] | None = None,
|
|
358
358
|
max_tokens: int | None = None,
|
|
359
359
|
model_size: ModelSize = ModelSize.medium,
|
|
360
|
+
group_id: str | None = None,
|
|
361
|
+
prompt_name: str | None = None,
|
|
360
362
|
) -> dict[str, typing.Any]:
|
|
361
363
|
"""
|
|
362
364
|
Generate a response from the Gemini language model with retry logic and error handling.
|
|
@@ -367,62 +369,79 @@ class GeminiClient(LLMClient):
|
|
|
367
369
|
response_model (type[BaseModel] | None): An optional Pydantic model to parse the response into.
|
|
368
370
|
max_tokens (int | None): The maximum number of tokens to generate in the response.
|
|
369
371
|
model_size (ModelSize): The size of the model to use (small or medium).
|
|
372
|
+
group_id (str | None): Optional partition identifier for the graph.
|
|
373
|
+
prompt_name (str | None): Optional name of the prompt for tracing.
|
|
370
374
|
|
|
371
375
|
Returns:
|
|
372
376
|
dict[str, typing.Any]: The response from the language model.
|
|
373
377
|
"""
|
|
374
|
-
retry_count = 0
|
|
375
|
-
last_error = None
|
|
376
|
-
last_output = None
|
|
377
|
-
|
|
378
378
|
# Add multilingual extraction instructions
|
|
379
|
-
messages[0].content += get_extraction_language_instruction()
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
379
|
+
messages[0].content += get_extraction_language_instruction(group_id)
|
|
380
|
+
|
|
381
|
+
# Wrap entire operation in tracing span
|
|
382
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
383
|
+
attributes = {
|
|
384
|
+
'llm.provider': 'gemini',
|
|
385
|
+
'model.size': model_size.value,
|
|
386
|
+
'max_tokens': max_tokens or self.max_tokens,
|
|
387
|
+
}
|
|
388
|
+
if prompt_name:
|
|
389
|
+
attributes['prompt.name'] = prompt_name
|
|
390
|
+
span.add_attributes(attributes)
|
|
391
|
+
|
|
392
|
+
retry_count = 0
|
|
393
|
+
last_error = None
|
|
394
|
+
last_output = None
|
|
395
|
+
|
|
396
|
+
while retry_count < self.MAX_RETRIES:
|
|
397
|
+
try:
|
|
398
|
+
response = await self._generate_response(
|
|
399
|
+
messages=messages,
|
|
400
|
+
response_model=response_model,
|
|
401
|
+
max_tokens=max_tokens,
|
|
402
|
+
model_size=model_size,
|
|
403
|
+
)
|
|
404
|
+
last_output = (
|
|
405
|
+
response.get('content')
|
|
406
|
+
if isinstance(response, dict) and 'content' in response
|
|
407
|
+
else None
|
|
408
|
+
)
|
|
409
|
+
return response
|
|
410
|
+
except RateLimitError as e:
|
|
411
|
+
# Rate limit errors should not trigger retries (fail fast)
|
|
412
|
+
span.set_status('error', str(e))
|
|
413
|
+
raise e
|
|
414
|
+
except Exception as e:
|
|
415
|
+
last_error = e
|
|
416
|
+
|
|
417
|
+
# Check if this is a safety block - these typically shouldn't be retried
|
|
418
|
+
error_text = str(e) or (str(e.__cause__) if e.__cause__ else '')
|
|
419
|
+
if 'safety' in error_text.lower() or 'blocked' in error_text.lower():
|
|
420
|
+
logger.warning(f'Content blocked by safety filters: {e}')
|
|
421
|
+
span.set_status('error', str(e))
|
|
422
|
+
raise Exception(f'Content blocked by safety filters: {e}') from e
|
|
423
|
+
|
|
424
|
+
retry_count += 1
|
|
425
|
+
|
|
426
|
+
# Construct a detailed error message for the LLM
|
|
427
|
+
error_context = (
|
|
428
|
+
f'The previous response attempt was invalid. '
|
|
429
|
+
f'Error type: {e.__class__.__name__}. '
|
|
430
|
+
f'Error details: {str(e)}. '
|
|
431
|
+
f'Please try again with a valid response, ensuring the output matches '
|
|
432
|
+
f'the expected format and constraints.'
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
error_message = Message(role='user', content=error_context)
|
|
436
|
+
messages.append(error_message)
|
|
437
|
+
logger.warning(
|
|
438
|
+
f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# If we exit the loop without returning, all retries are exhausted
|
|
442
|
+
logger.error('🦀 LLM generation failed and retries are exhausted.')
|
|
443
|
+
logger.error(self._get_failed_generation_log(messages, last_output))
|
|
444
|
+
logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {last_error}')
|
|
445
|
+
span.set_status('error', str(last_error))
|
|
446
|
+
span.record_exception(last_error) if last_error else None
|
|
447
|
+
raise last_error or Exception('Max retries exceeded')
|
|
@@ -175,53 +175,75 @@ class BaseOpenAIClient(LLMClient):
|
|
|
175
175
|
response_model: type[BaseModel] | None = None,
|
|
176
176
|
max_tokens: int | None = None,
|
|
177
177
|
model_size: ModelSize = ModelSize.medium,
|
|
178
|
+
group_id: str | None = None,
|
|
179
|
+
prompt_name: str | None = None,
|
|
178
180
|
) -> dict[str, typing.Any]:
|
|
179
181
|
"""Generate a response with retry logic and error handling."""
|
|
180
182
|
if max_tokens is None:
|
|
181
183
|
max_tokens = self.max_tokens
|
|
182
184
|
|
|
183
|
-
retry_count = 0
|
|
184
|
-
last_error = None
|
|
185
|
-
|
|
186
185
|
# Add multilingual extraction instructions
|
|
187
|
-
messages[0].content += get_extraction_language_instruction()
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
186
|
+
messages[0].content += get_extraction_language_instruction(group_id)
|
|
187
|
+
|
|
188
|
+
# Wrap entire operation in tracing span
|
|
189
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
190
|
+
attributes = {
|
|
191
|
+
'llm.provider': 'openai',
|
|
192
|
+
'model.size': model_size.value,
|
|
193
|
+
'max_tokens': max_tokens,
|
|
194
|
+
}
|
|
195
|
+
if prompt_name:
|
|
196
|
+
attributes['prompt.name'] = prompt_name
|
|
197
|
+
span.add_attributes(attributes)
|
|
198
|
+
|
|
199
|
+
retry_count = 0
|
|
200
|
+
last_error = None
|
|
201
|
+
|
|
202
|
+
while retry_count <= self.MAX_RETRIES:
|
|
203
|
+
try:
|
|
204
|
+
response = await self._generate_response(
|
|
205
|
+
messages, response_model, max_tokens, model_size
|
|
206
|
+
)
|
|
207
|
+
return response
|
|
208
|
+
except (RateLimitError, RefusalError):
|
|
209
|
+
# These errors should not trigger retries
|
|
210
|
+
span.set_status('error', str(last_error))
|
|
207
211
|
raise
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
212
|
+
except (
|
|
213
|
+
openai.APITimeoutError,
|
|
214
|
+
openai.APIConnectionError,
|
|
215
|
+
openai.InternalServerError,
|
|
216
|
+
):
|
|
217
|
+
# Let OpenAI's client handle these retries
|
|
218
|
+
span.set_status('error', str(last_error))
|
|
219
|
+
raise
|
|
220
|
+
except Exception as e:
|
|
221
|
+
last_error = e
|
|
222
|
+
|
|
223
|
+
# Don't retry if we've hit the max retries
|
|
224
|
+
if retry_count >= self.MAX_RETRIES:
|
|
225
|
+
logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {e}')
|
|
226
|
+
span.set_status('error', str(e))
|
|
227
|
+
span.record_exception(e)
|
|
228
|
+
raise
|
|
229
|
+
|
|
230
|
+
retry_count += 1
|
|
231
|
+
|
|
232
|
+
# Construct a detailed error message for the LLM
|
|
233
|
+
error_context = (
|
|
234
|
+
f'The previous response attempt was invalid. '
|
|
235
|
+
f'Error type: {e.__class__.__name__}. '
|
|
236
|
+
f'Error details: {str(e)}. '
|
|
237
|
+
f'Please try again with a valid response, ensuring the output matches '
|
|
238
|
+
f'the expected format and constraints.'
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
error_message = Message(role='user', content=error_context)
|
|
242
|
+
messages.append(error_message)
|
|
243
|
+
logger.warning(
|
|
244
|
+
f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# If we somehow get here, raise the last error
|
|
248
|
+
span.set_status('error', str(last_error))
|
|
249
|
+
raise last_error or Exception('Max retries exceeded with no specific error')
|