graphiti-core 0.17.4__py3-none-any.whl → 0.25.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphiti_core/cross_encoder/gemini_reranker_client.py +1 -1
- graphiti_core/cross_encoder/openai_reranker_client.py +1 -1
- graphiti_core/decorators.py +110 -0
- graphiti_core/driver/driver.py +62 -2
- graphiti_core/driver/falkordb_driver.py +215 -23
- graphiti_core/driver/graph_operations/graph_operations.py +191 -0
- graphiti_core/driver/kuzu_driver.py +182 -0
- graphiti_core/driver/neo4j_driver.py +70 -8
- graphiti_core/driver/neptune_driver.py +305 -0
- graphiti_core/driver/search_interface/search_interface.py +89 -0
- graphiti_core/edges.py +264 -132
- graphiti_core/embedder/azure_openai.py +10 -3
- graphiti_core/embedder/client.py +2 -1
- graphiti_core/graph_queries.py +114 -101
- graphiti_core/graphiti.py +635 -260
- graphiti_core/graphiti_types.py +2 -0
- graphiti_core/helpers.py +37 -15
- graphiti_core/llm_client/anthropic_client.py +142 -52
- graphiti_core/llm_client/azure_openai_client.py +57 -19
- graphiti_core/llm_client/client.py +83 -21
- graphiti_core/llm_client/config.py +1 -1
- graphiti_core/llm_client/gemini_client.py +75 -57
- graphiti_core/llm_client/openai_base_client.py +92 -48
- graphiti_core/llm_client/openai_client.py +39 -9
- graphiti_core/llm_client/openai_generic_client.py +91 -56
- graphiti_core/models/edges/edge_db_queries.py +259 -35
- graphiti_core/models/nodes/node_db_queries.py +311 -32
- graphiti_core/nodes.py +388 -164
- graphiti_core/prompts/dedupe_edges.py +42 -31
- graphiti_core/prompts/dedupe_nodes.py +56 -39
- graphiti_core/prompts/eval.py +4 -4
- graphiti_core/prompts/extract_edges.py +24 -15
- graphiti_core/prompts/extract_nodes.py +76 -35
- graphiti_core/prompts/prompt_helpers.py +39 -0
- graphiti_core/prompts/snippets.py +29 -0
- graphiti_core/prompts/summarize_nodes.py +23 -25
- graphiti_core/search/search.py +154 -74
- graphiti_core/search/search_config.py +39 -4
- graphiti_core/search/search_filters.py +110 -31
- graphiti_core/search/search_helpers.py +5 -6
- graphiti_core/search/search_utils.py +1360 -473
- graphiti_core/tracer.py +193 -0
- graphiti_core/utils/bulk_utils.py +216 -90
- graphiti_core/utils/content_chunking.py +702 -0
- graphiti_core/utils/datetime_utils.py +13 -0
- graphiti_core/utils/maintenance/community_operations.py +62 -38
- graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
- graphiti_core/utils/maintenance/edge_operations.py +306 -156
- graphiti_core/utils/maintenance/graph_data_operations.py +44 -74
- graphiti_core/utils/maintenance/node_operations.py +466 -206
- graphiti_core/utils/maintenance/temporal_operations.py +11 -3
- graphiti_core/utils/ontology_utils/entity_types_utils.py +1 -1
- graphiti_core/utils/text_utils.py +53 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/METADATA +221 -87
- graphiti_core-0.25.3.dist-info/RECORD +87 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/WHEEL +1 -1
- graphiti_core-0.17.4.dist-info/RECORD +0 -77
- /graphiti_core/{utils/maintenance/utils.py → migrations/__init__.py} +0 -0
- {graphiti_core-0.17.4.dist-info → graphiti_core-0.25.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, ClassVar
|
|
|
23
23
|
from pydantic import BaseModel
|
|
24
24
|
|
|
25
25
|
from ..prompts.models import Message
|
|
26
|
-
from .client import
|
|
26
|
+
from .client import LLMClient, get_extraction_language_instruction
|
|
27
27
|
from .config import LLMConfig, ModelSize
|
|
28
28
|
from .errors import RateLimitError
|
|
29
29
|
|
|
@@ -45,7 +45,7 @@ else:
|
|
|
45
45
|
logger = logging.getLogger(__name__)
|
|
46
46
|
|
|
47
47
|
DEFAULT_MODEL = 'gemini-2.5-flash'
|
|
48
|
-
DEFAULT_SMALL_MODEL = 'gemini-2.5-flash-lite
|
|
48
|
+
DEFAULT_SMALL_MODEL = 'gemini-2.5-flash-lite'
|
|
49
49
|
|
|
50
50
|
# Maximum output tokens for different Gemini models
|
|
51
51
|
GEMINI_MODEL_MAX_TOKENS = {
|
|
@@ -53,7 +53,6 @@ GEMINI_MODEL_MAX_TOKENS = {
|
|
|
53
53
|
'gemini-2.5-pro': 65536,
|
|
54
54
|
'gemini-2.5-flash': 65536,
|
|
55
55
|
'gemini-2.5-flash-lite': 64000,
|
|
56
|
-
'models/gemini-2.5-flash-lite-preview-06-17': 64000,
|
|
57
56
|
# Gemini 2.0 models
|
|
58
57
|
'gemini-2.0-flash': 8192,
|
|
59
58
|
'gemini-2.0-flash-lite': 8192,
|
|
@@ -357,6 +356,8 @@ class GeminiClient(LLMClient):
|
|
|
357
356
|
response_model: type[BaseModel] | None = None,
|
|
358
357
|
max_tokens: int | None = None,
|
|
359
358
|
model_size: ModelSize = ModelSize.medium,
|
|
359
|
+
group_id: str | None = None,
|
|
360
|
+
prompt_name: str | None = None,
|
|
360
361
|
) -> dict[str, typing.Any]:
|
|
361
362
|
"""
|
|
362
363
|
Generate a response from the Gemini language model with retry logic and error handling.
|
|
@@ -367,62 +368,79 @@ class GeminiClient(LLMClient):
|
|
|
367
368
|
response_model (type[BaseModel] | None): An optional Pydantic model to parse the response into.
|
|
368
369
|
max_tokens (int | None): The maximum number of tokens to generate in the response.
|
|
369
370
|
model_size (ModelSize): The size of the model to use (small or medium).
|
|
371
|
+
group_id (str | None): Optional partition identifier for the graph.
|
|
372
|
+
prompt_name (str | None): Optional name of the prompt for tracing.
|
|
370
373
|
|
|
371
374
|
Returns:
|
|
372
375
|
dict[str, typing.Any]: The response from the language model.
|
|
373
376
|
"""
|
|
374
|
-
retry_count = 0
|
|
375
|
-
last_error = None
|
|
376
|
-
last_output = None
|
|
377
|
-
|
|
378
377
|
# Add multilingual extraction instructions
|
|
379
|
-
messages[0].content +=
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
378
|
+
messages[0].content += get_extraction_language_instruction(group_id)
|
|
379
|
+
|
|
380
|
+
# Wrap entire operation in tracing span
|
|
381
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
382
|
+
attributes = {
|
|
383
|
+
'llm.provider': 'gemini',
|
|
384
|
+
'model.size': model_size.value,
|
|
385
|
+
'max_tokens': max_tokens or self.max_tokens,
|
|
386
|
+
}
|
|
387
|
+
if prompt_name:
|
|
388
|
+
attributes['prompt.name'] = prompt_name
|
|
389
|
+
span.add_attributes(attributes)
|
|
390
|
+
|
|
391
|
+
retry_count = 0
|
|
392
|
+
last_error = None
|
|
393
|
+
last_output = None
|
|
394
|
+
|
|
395
|
+
while retry_count < self.MAX_RETRIES:
|
|
396
|
+
try:
|
|
397
|
+
response = await self._generate_response(
|
|
398
|
+
messages=messages,
|
|
399
|
+
response_model=response_model,
|
|
400
|
+
max_tokens=max_tokens,
|
|
401
|
+
model_size=model_size,
|
|
402
|
+
)
|
|
403
|
+
last_output = (
|
|
404
|
+
response.get('content')
|
|
405
|
+
if isinstance(response, dict) and 'content' in response
|
|
406
|
+
else None
|
|
407
|
+
)
|
|
408
|
+
return response
|
|
409
|
+
except RateLimitError as e:
|
|
410
|
+
# Rate limit errors should not trigger retries (fail fast)
|
|
411
|
+
span.set_status('error', str(e))
|
|
412
|
+
raise e
|
|
413
|
+
except Exception as e:
|
|
414
|
+
last_error = e
|
|
415
|
+
|
|
416
|
+
# Check if this is a safety block - these typically shouldn't be retried
|
|
417
|
+
error_text = str(e) or (str(e.__cause__) if e.__cause__ else '')
|
|
418
|
+
if 'safety' in error_text.lower() or 'blocked' in error_text.lower():
|
|
419
|
+
logger.warning(f'Content blocked by safety filters: {e}')
|
|
420
|
+
span.set_status('error', str(e))
|
|
421
|
+
raise Exception(f'Content blocked by safety filters: {e}') from e
|
|
422
|
+
|
|
423
|
+
retry_count += 1
|
|
424
|
+
|
|
425
|
+
# Construct a detailed error message for the LLM
|
|
426
|
+
error_context = (
|
|
427
|
+
f'The previous response attempt was invalid. '
|
|
428
|
+
f'Error type: {e.__class__.__name__}. '
|
|
429
|
+
f'Error details: {str(e)}. '
|
|
430
|
+
f'Please try again with a valid response, ensuring the output matches '
|
|
431
|
+
f'the expected format and constraints.'
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
error_message = Message(role='user', content=error_context)
|
|
435
|
+
messages.append(error_message)
|
|
436
|
+
logger.warning(
|
|
437
|
+
f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
# If we exit the loop without returning, all retries are exhausted
|
|
441
|
+
logger.error('🦀 LLM generation failed and retries are exhausted.')
|
|
442
|
+
logger.error(self._get_failed_generation_log(messages, last_output))
|
|
443
|
+
logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {last_error}')
|
|
444
|
+
span.set_status('error', str(last_error))
|
|
445
|
+
span.record_exception(last_error) if last_error else None
|
|
446
|
+
raise last_error or Exception('Max retries exceeded')
|
|
@@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
|
|
|
25
25
|
from pydantic import BaseModel
|
|
26
26
|
|
|
27
27
|
from ..prompts.models import Message
|
|
28
|
-
from .client import
|
|
28
|
+
from .client import LLMClient, get_extraction_language_instruction
|
|
29
29
|
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
|
|
30
30
|
from .errors import RateLimitError, RefusalError
|
|
31
31
|
|
|
@@ -33,6 +33,8 @@ logger = logging.getLogger(__name__)
|
|
|
33
33
|
|
|
34
34
|
DEFAULT_MODEL = 'gpt-4.1-mini'
|
|
35
35
|
DEFAULT_SMALL_MODEL = 'gpt-4.1-nano'
|
|
36
|
+
DEFAULT_REASONING = 'minimal'
|
|
37
|
+
DEFAULT_VERBOSITY = 'low'
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
class BaseOpenAIClient(LLMClient):
|
|
@@ -51,6 +53,8 @@ class BaseOpenAIClient(LLMClient):
|
|
|
51
53
|
config: LLMConfig | None = None,
|
|
52
54
|
cache: bool = False,
|
|
53
55
|
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
56
|
+
reasoning: str | None = DEFAULT_REASONING,
|
|
57
|
+
verbosity: str | None = DEFAULT_VERBOSITY,
|
|
54
58
|
):
|
|
55
59
|
if cache:
|
|
56
60
|
raise NotImplementedError('Caching is not implemented for OpenAI-based clients')
|
|
@@ -60,6 +64,8 @@ class BaseOpenAIClient(LLMClient):
|
|
|
60
64
|
|
|
61
65
|
super().__init__(config, cache)
|
|
62
66
|
self.max_tokens = max_tokens
|
|
67
|
+
self.reasoning = reasoning
|
|
68
|
+
self.verbosity = verbosity
|
|
63
69
|
|
|
64
70
|
@abstractmethod
|
|
65
71
|
async def _create_completion(
|
|
@@ -81,6 +87,8 @@ class BaseOpenAIClient(LLMClient):
|
|
|
81
87
|
temperature: float | None,
|
|
82
88
|
max_tokens: int,
|
|
83
89
|
response_model: type[BaseModel],
|
|
90
|
+
reasoning: str | None,
|
|
91
|
+
verbosity: str | None,
|
|
84
92
|
) -> Any:
|
|
85
93
|
"""Create a structured completion using the specific client implementation."""
|
|
86
94
|
pass
|
|
@@ -107,10 +115,10 @@ class BaseOpenAIClient(LLMClient):
|
|
|
107
115
|
|
|
108
116
|
def _handle_structured_response(self, response: Any) -> dict[str, Any]:
|
|
109
117
|
"""Handle structured response parsing and validation."""
|
|
110
|
-
response_object = response.
|
|
118
|
+
response_object = response.output_text
|
|
111
119
|
|
|
112
|
-
if response_object
|
|
113
|
-
return
|
|
120
|
+
if response_object:
|
|
121
|
+
return json.loads(response_object)
|
|
114
122
|
elif response_object.refusal:
|
|
115
123
|
raise RefusalError(response_object.refusal)
|
|
116
124
|
else:
|
|
@@ -140,6 +148,8 @@ class BaseOpenAIClient(LLMClient):
|
|
|
140
148
|
temperature=self.temperature,
|
|
141
149
|
max_tokens=max_tokens or self.max_tokens,
|
|
142
150
|
response_model=response_model,
|
|
151
|
+
reasoning=self.reasoning,
|
|
152
|
+
verbosity=self.verbosity,
|
|
143
153
|
)
|
|
144
154
|
return self._handle_structured_response(response)
|
|
145
155
|
else:
|
|
@@ -155,8 +165,20 @@ class BaseOpenAIClient(LLMClient):
|
|
|
155
165
|
raise Exception(f'Output length exceeded max tokens {self.max_tokens}: {e}') from e
|
|
156
166
|
except openai.RateLimitError as e:
|
|
157
167
|
raise RateLimitError from e
|
|
168
|
+
except openai.AuthenticationError as e:
|
|
169
|
+
logger.error(
|
|
170
|
+
f'OpenAI Authentication Error: {e}. Please verify your API key is correct.'
|
|
171
|
+
)
|
|
172
|
+
raise
|
|
158
173
|
except Exception as e:
|
|
159
|
-
|
|
174
|
+
# Provide more context for connection errors
|
|
175
|
+
error_msg = str(e)
|
|
176
|
+
if 'Connection error' in error_msg or 'connection' in error_msg.lower():
|
|
177
|
+
logger.error(
|
|
178
|
+
f'Connection error communicating with OpenAI API. Please check your network connection and API key. Error: {e}'
|
|
179
|
+
)
|
|
180
|
+
else:
|
|
181
|
+
logger.error(f'Error in generating LLM response: {e}')
|
|
160
182
|
raise
|
|
161
183
|
|
|
162
184
|
async def generate_response(
|
|
@@ -165,53 +187,75 @@ class BaseOpenAIClient(LLMClient):
|
|
|
165
187
|
response_model: type[BaseModel] | None = None,
|
|
166
188
|
max_tokens: int | None = None,
|
|
167
189
|
model_size: ModelSize = ModelSize.medium,
|
|
190
|
+
group_id: str | None = None,
|
|
191
|
+
prompt_name: str | None = None,
|
|
168
192
|
) -> dict[str, typing.Any]:
|
|
169
193
|
"""Generate a response with retry logic and error handling."""
|
|
170
194
|
if max_tokens is None:
|
|
171
195
|
max_tokens = self.max_tokens
|
|
172
196
|
|
|
173
|
-
retry_count = 0
|
|
174
|
-
last_error = None
|
|
175
|
-
|
|
176
197
|
# Add multilingual extraction instructions
|
|
177
|
-
messages[0].content +=
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
198
|
+
messages[0].content += get_extraction_language_instruction(group_id)
|
|
199
|
+
|
|
200
|
+
# Wrap entire operation in tracing span
|
|
201
|
+
with self.tracer.start_span('llm.generate') as span:
|
|
202
|
+
attributes = {
|
|
203
|
+
'llm.provider': 'openai',
|
|
204
|
+
'model.size': model_size.value,
|
|
205
|
+
'max_tokens': max_tokens,
|
|
206
|
+
}
|
|
207
|
+
if prompt_name:
|
|
208
|
+
attributes['prompt.name'] = prompt_name
|
|
209
|
+
span.add_attributes(attributes)
|
|
210
|
+
|
|
211
|
+
retry_count = 0
|
|
212
|
+
last_error = None
|
|
213
|
+
|
|
214
|
+
while retry_count <= self.MAX_RETRIES:
|
|
215
|
+
try:
|
|
216
|
+
response = await self._generate_response(
|
|
217
|
+
messages, response_model, max_tokens, model_size
|
|
218
|
+
)
|
|
219
|
+
return response
|
|
220
|
+
except (RateLimitError, RefusalError):
|
|
221
|
+
# These errors should not trigger retries
|
|
222
|
+
span.set_status('error', str(last_error))
|
|
197
223
|
raise
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
224
|
+
except (
|
|
225
|
+
openai.APITimeoutError,
|
|
226
|
+
openai.APIConnectionError,
|
|
227
|
+
openai.InternalServerError,
|
|
228
|
+
):
|
|
229
|
+
# Let OpenAI's client handle these retries
|
|
230
|
+
span.set_status('error', str(last_error))
|
|
231
|
+
raise
|
|
232
|
+
except Exception as e:
|
|
233
|
+
last_error = e
|
|
234
|
+
|
|
235
|
+
# Don't retry if we've hit the max retries
|
|
236
|
+
if retry_count >= self.MAX_RETRIES:
|
|
237
|
+
logger.error(f'Max retries ({self.MAX_RETRIES}) exceeded. Last error: {e}')
|
|
238
|
+
span.set_status('error', str(e))
|
|
239
|
+
span.record_exception(e)
|
|
240
|
+
raise
|
|
241
|
+
|
|
242
|
+
retry_count += 1
|
|
243
|
+
|
|
244
|
+
# Construct a detailed error message for the LLM
|
|
245
|
+
error_context = (
|
|
246
|
+
f'The previous response attempt was invalid. '
|
|
247
|
+
f'Error type: {e.__class__.__name__}. '
|
|
248
|
+
f'Error details: {str(e)}. '
|
|
249
|
+
f'Please try again with a valid response, ensuring the output matches '
|
|
250
|
+
f'the expected format and constraints.'
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
error_message = Message(role='user', content=error_context)
|
|
254
|
+
messages.append(error_message)
|
|
255
|
+
logger.warning(
|
|
256
|
+
f'Retrying after application error (attempt {retry_count}/{self.MAX_RETRIES}): {e}'
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# If we somehow get here, raise the last error
|
|
260
|
+
span.set_status('error', str(last_error))
|
|
261
|
+
raise last_error or Exception('Max retries exceeded with no specific error')
|
|
@@ -21,7 +21,7 @@ from openai.types.chat import ChatCompletionMessageParam
|
|
|
21
21
|
from pydantic import BaseModel
|
|
22
22
|
|
|
23
23
|
from .config import DEFAULT_MAX_TOKENS, LLMConfig
|
|
24
|
-
from .openai_base_client import BaseOpenAIClient
|
|
24
|
+
from .openai_base_client import DEFAULT_REASONING, DEFAULT_VERBOSITY, BaseOpenAIClient
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class OpenAIClient(BaseOpenAIClient):
|
|
@@ -41,6 +41,8 @@ class OpenAIClient(BaseOpenAIClient):
|
|
|
41
41
|
cache: bool = False,
|
|
42
42
|
client: typing.Any = None,
|
|
43
43
|
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
44
|
+
reasoning: str = DEFAULT_REASONING,
|
|
45
|
+
verbosity: str = DEFAULT_VERBOSITY,
|
|
44
46
|
):
|
|
45
47
|
"""
|
|
46
48
|
Initialize the OpenAIClient with the provided configuration, cache setting, and client.
|
|
@@ -50,7 +52,7 @@ class OpenAIClient(BaseOpenAIClient):
|
|
|
50
52
|
cache (bool): Whether to use caching for responses. Defaults to False.
|
|
51
53
|
client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.
|
|
52
54
|
"""
|
|
53
|
-
super().__init__(config, cache, max_tokens)
|
|
55
|
+
super().__init__(config, cache, max_tokens, reasoning, verbosity)
|
|
54
56
|
|
|
55
57
|
if config is None:
|
|
56
58
|
config = LLMConfig()
|
|
@@ -67,16 +69,37 @@ class OpenAIClient(BaseOpenAIClient):
|
|
|
67
69
|
temperature: float | None,
|
|
68
70
|
max_tokens: int,
|
|
69
71
|
response_model: type[BaseModel],
|
|
72
|
+
reasoning: str | None = None,
|
|
73
|
+
verbosity: str | None = None,
|
|
70
74
|
):
|
|
71
75
|
"""Create a structured completion using OpenAI's beta parse API."""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
temperature=temperature,
|
|
76
|
-
max_tokens=max_tokens,
|
|
77
|
-
response_format=response_model, # type: ignore
|
|
76
|
+
# Reasoning models (gpt-5 family) don't support temperature
|
|
77
|
+
is_reasoning_model = (
|
|
78
|
+
model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
|
|
78
79
|
)
|
|
79
80
|
|
|
81
|
+
request_kwargs = {
|
|
82
|
+
'model': model,
|
|
83
|
+
'input': messages, # type: ignore
|
|
84
|
+
'max_output_tokens': max_tokens,
|
|
85
|
+
'text_format': response_model, # type: ignore
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
temperature_value = temperature if not is_reasoning_model else None
|
|
89
|
+
if temperature_value is not None:
|
|
90
|
+
request_kwargs['temperature'] = temperature_value
|
|
91
|
+
|
|
92
|
+
# Only include reasoning and verbosity parameters for reasoning models
|
|
93
|
+
if is_reasoning_model and reasoning is not None:
|
|
94
|
+
request_kwargs['reasoning'] = {'effort': reasoning} # type: ignore
|
|
95
|
+
|
|
96
|
+
if is_reasoning_model and verbosity is not None:
|
|
97
|
+
request_kwargs['text'] = {'verbosity': verbosity} # type: ignore
|
|
98
|
+
|
|
99
|
+
response = await self.client.responses.parse(**request_kwargs)
|
|
100
|
+
|
|
101
|
+
return response
|
|
102
|
+
|
|
80
103
|
async def _create_completion(
|
|
81
104
|
self,
|
|
82
105
|
model: str,
|
|
@@ -84,12 +107,19 @@ class OpenAIClient(BaseOpenAIClient):
|
|
|
84
107
|
temperature: float | None,
|
|
85
108
|
max_tokens: int,
|
|
86
109
|
response_model: type[BaseModel] | None = None,
|
|
110
|
+
reasoning: str | None = None,
|
|
111
|
+
verbosity: str | None = None,
|
|
87
112
|
):
|
|
88
113
|
"""Create a regular completion with JSON format."""
|
|
114
|
+
# Reasoning models (gpt-5 family) don't support temperature
|
|
115
|
+
is_reasoning_model = (
|
|
116
|
+
model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
|
|
117
|
+
)
|
|
118
|
+
|
|
89
119
|
return await self.client.chat.completions.create(
|
|
90
120
|
model=model,
|
|
91
121
|
messages=messages,
|
|
92
|
-
temperature=temperature,
|
|
122
|
+
temperature=temperature if not is_reasoning_model else None,
|
|
93
123
|
max_tokens=max_tokens,
|
|
94
124
|
response_format={'type': 'json_object'},
|
|
95
125
|
)
|