mojentic 0.7.4__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _examples/broker_examples.py +22 -3
- _examples/fetch_openai_models.py +104 -0
- _examples/openai_gateway_enhanced_demo.py +140 -0
- mojentic/llm/gateways/openai.py +164 -31
- mojentic/llm/gateways/openai_model_registry.py +351 -0
- mojentic/llm/gateways/openai_model_registry_spec.py +181 -0
- mojentic/llm/gateways/openai_temperature_handling_spec.py +245 -0
- {mojentic-0.7.4.dist-info → mojentic-0.8.1.dist-info}/METADATA +23 -5
- {mojentic-0.7.4.dist-info → mojentic-0.8.1.dist-info}/RECORD +12 -7
- {mojentic-0.7.4.dist-info → mojentic-0.8.1.dist-info}/WHEEL +0 -0
- {mojentic-0.7.4.dist-info → mojentic-0.8.1.dist-info}/licenses/LICENSE.md +0 -0
- {mojentic-0.7.4.dist-info → mojentic-0.8.1.dist-info}/top_level.txt +0 -0
_examples/broker_examples.py
CHANGED
|
@@ -14,7 +14,7 @@ from mojentic.llm.gateways.models import LLMMessage
|
|
|
14
14
|
from mojentic.llm.tools.date_resolver import ResolveDateTool
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def openai_llm(model="gpt-
|
|
17
|
+
def openai_llm(model="gpt-5"):
|
|
18
18
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
19
19
|
gateway = OpenAIGateway(api_key)
|
|
20
20
|
llm = LLMBroker(model=model, gateway=gateway)
|
|
@@ -60,7 +60,26 @@ check_structured_output(openai_llm(model="o4-mini"))
|
|
|
60
60
|
check_tool_use(openai_llm(model="o4-mini"))
|
|
61
61
|
check_image_analysis(openai_llm(model="gpt-4o"))
|
|
62
62
|
|
|
63
|
-
check_simple_textgen(ollama_llm())
|
|
64
|
-
check_structured_output(ollama_llm())
|
|
63
|
+
# check_simple_textgen(ollama_llm())
|
|
64
|
+
# check_structured_output(ollama_llm())
|
|
65
65
|
check_tool_use(ollama_llm(model="qwen3:32b"))
|
|
66
66
|
check_image_analysis(ollama_llm(model="gemma3:27b"))
|
|
67
|
+
|
|
68
|
+
# Test all GPT-5 model variants to confirm they're all reasoning models
|
|
69
|
+
print("\n=== Testing GPT-5 Model Variants ===")
|
|
70
|
+
gpt5_models = [
|
|
71
|
+
"gpt-5",
|
|
72
|
+
"gpt-5-2025-08-07",
|
|
73
|
+
"gpt-5-chat-latest",
|
|
74
|
+
"gpt-5-mini",
|
|
75
|
+
"gpt-5-mini-2025-08-07",
|
|
76
|
+
"gpt-5-nano",
|
|
77
|
+
"gpt-5-nano-2025-08-07"
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
for model in gpt5_models:
|
|
81
|
+
print(f"\n--- Testing {model} ---")
|
|
82
|
+
try:
|
|
83
|
+
check_simple_textgen(openai_llm(model=model))
|
|
84
|
+
except Exception as e:
|
|
85
|
+
print(f"Error with {model}: {e}")
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Script to fetch current OpenAI models and update the registry with up-to-date model lists.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from mojentic.llm.gateways.openai import OpenAIGateway
|
|
7
|
+
|
|
8
|
+
def fetch_current_openai_models():
|
|
9
|
+
"""Fetch the current list of OpenAI models."""
|
|
10
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
11
|
+
if not api_key:
|
|
12
|
+
print("ERROR: OPENAI_API_KEY environment variable not set")
|
|
13
|
+
return None
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
gateway = OpenAIGateway(api_key)
|
|
17
|
+
models = gateway.get_available_models()
|
|
18
|
+
return models
|
|
19
|
+
except Exception as e:
|
|
20
|
+
print(f"ERROR: Failed to fetch models from OpenAI API: {e}")
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
def categorize_models(models):
|
|
24
|
+
"""Categorize models by type based on naming patterns."""
|
|
25
|
+
reasoning_models = []
|
|
26
|
+
chat_models = []
|
|
27
|
+
embedding_models = []
|
|
28
|
+
other_models = []
|
|
29
|
+
|
|
30
|
+
for model in models:
|
|
31
|
+
model_lower = model.lower()
|
|
32
|
+
|
|
33
|
+
# Reasoning models: o1, o3, o4, and gpt-5 series
|
|
34
|
+
if (any(pattern in model_lower for pattern in ['o1-', 'o3-', 'o4-', 'gpt-5']) or
|
|
35
|
+
model_lower in ['o1', 'o3', 'o4', 'gpt-5']):
|
|
36
|
+
reasoning_models.append(model)
|
|
37
|
+
elif 'embedding' in model_lower:
|
|
38
|
+
embedding_models.append(model)
|
|
39
|
+
elif any(pattern in model_lower for pattern in ['gpt-4', 'gpt-3.5']):
|
|
40
|
+
chat_models.append(model)
|
|
41
|
+
else:
|
|
42
|
+
other_models.append(model)
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
'reasoning': sorted(reasoning_models),
|
|
46
|
+
'chat': sorted(chat_models),
|
|
47
|
+
'embedding': sorted(embedding_models),
|
|
48
|
+
'other': sorted(other_models)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def print_model_lists(categorized_models):
|
|
52
|
+
"""Print the categorized models in a format ready for the registry."""
|
|
53
|
+
print("=== Current OpenAI Models ===\n")
|
|
54
|
+
|
|
55
|
+
print("# Reasoning Models (o1, o3, o4, gpt-5 series)")
|
|
56
|
+
print("reasoning_models = [")
|
|
57
|
+
for model in categorized_models['reasoning']:
|
|
58
|
+
print(f' "{model}",')
|
|
59
|
+
print("]\n")
|
|
60
|
+
|
|
61
|
+
print("# Chat Models (GPT-4 and GPT-4.1 series)")
|
|
62
|
+
print("gpt4_and_newer_models = [")
|
|
63
|
+
gpt4_and_newer = [m for m in categorized_models['chat'] if 'gpt-4' in m.lower()]
|
|
64
|
+
for model in gpt4_and_newer:
|
|
65
|
+
print(f' "{model}",')
|
|
66
|
+
print("]\n")
|
|
67
|
+
|
|
68
|
+
print("# Chat Models (GPT-3.5 series)")
|
|
69
|
+
print("gpt35_models = [")
|
|
70
|
+
gpt35 = [m for m in categorized_models['chat'] if 'gpt-3.5' in m.lower()]
|
|
71
|
+
for model in gpt35:
|
|
72
|
+
print(f' "{model}",')
|
|
73
|
+
print("]\n")
|
|
74
|
+
|
|
75
|
+
print("# Embedding Models")
|
|
76
|
+
print("embedding_models = [")
|
|
77
|
+
for model in categorized_models['embedding']:
|
|
78
|
+
print(f' "{model}",')
|
|
79
|
+
print("]\n")
|
|
80
|
+
|
|
81
|
+
print("# Other Models (for reference)")
|
|
82
|
+
print("# other_models = [")
|
|
83
|
+
for model in categorized_models['other']:
|
|
84
|
+
print(f'# "{model}",')
|
|
85
|
+
print("# ]\n")
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
print("Fetching current OpenAI models...")
|
|
89
|
+
models = fetch_current_openai_models()
|
|
90
|
+
|
|
91
|
+
if models:
|
|
92
|
+
print(f"Found {len(models)} models\n")
|
|
93
|
+
categorized = categorize_models(models)
|
|
94
|
+
print_model_lists(categorized)
|
|
95
|
+
|
|
96
|
+
print("\n=== Summary ===")
|
|
97
|
+
print(f"Reasoning models: {len(categorized['reasoning'])}")
|
|
98
|
+
print(f"Chat models: {len(categorized['chat'])}")
|
|
99
|
+
print(f"Embedding models: {len(categorized['embedding'])}")
|
|
100
|
+
print(f"Other models: {len(categorized['other'])}")
|
|
101
|
+
|
|
102
|
+
print("\nCopy the model lists above and update the _initialize_default_models() method in openai_model_registry.py")
|
|
103
|
+
else:
|
|
104
|
+
print("Failed to fetch models. Please check your API key and try again.")
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Demonstration of the enhanced OpenAI gateway with model registry system.
|
|
3
|
+
|
|
4
|
+
This script shows how the new infrastructure automatically handles parameter adaptation
|
|
5
|
+
for reasoning models vs chat models, provides detailed logging, and offers better
|
|
6
|
+
error handling.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from mojentic.llm.gateways.openai import OpenAIGateway
|
|
11
|
+
from mojentic.llm.gateways.openai_model_registry import get_model_registry
|
|
12
|
+
from mojentic.llm.gateways.models import LLMMessage, MessageRole
|
|
13
|
+
|
|
14
|
+
def demonstrate_model_registry():
|
|
15
|
+
"""Demonstrate the model registry capabilities."""
|
|
16
|
+
print("=== Model Registry Demonstration ===")
|
|
17
|
+
|
|
18
|
+
registry = get_model_registry()
|
|
19
|
+
|
|
20
|
+
print("\n1. Registry contains default models:")
|
|
21
|
+
registered_models = registry.get_registered_models()
|
|
22
|
+
reasoning_models = [m for m in registered_models if registry.is_reasoning_model(m)]
|
|
23
|
+
chat_models = [m for m in registered_models if not registry.is_reasoning_model(m) and not m.startswith('text-')]
|
|
24
|
+
|
|
25
|
+
print(f" Reasoning models: {reasoning_models[:3]}...") # Show first 3
|
|
26
|
+
print(f" Chat models: {chat_models[:3]}...") # Show first 3
|
|
27
|
+
|
|
28
|
+
print("\n2. Model capability detection:")
|
|
29
|
+
for model in ["o1-mini", "gpt-4o"]:
|
|
30
|
+
capabilities = registry.get_model_capabilities(model)
|
|
31
|
+
token_param = capabilities.get_token_limit_param()
|
|
32
|
+
print(f" {model}: type={capabilities.model_type.value}, token_param={token_param}")
|
|
33
|
+
|
|
34
|
+
# Handle unknown model separately to show the warning works
|
|
35
|
+
print("\n3. Unknown model handling:")
|
|
36
|
+
print(" unknown-future-model: (will default to chat model with warning)")
|
|
37
|
+
capabilities = registry.get_model_capabilities("unknown-future-model")
|
|
38
|
+
token_param = capabilities.get_token_limit_param()
|
|
39
|
+
print(f" → Defaulted to: type={capabilities.model_type.value}, token_param={token_param}")
|
|
40
|
+
|
|
41
|
+
def demonstrate_parameter_adaptation():
|
|
42
|
+
"""Demonstrate parameter adaptation for different model types."""
|
|
43
|
+
print("\n=== Parameter Adaptation Demonstration ===")
|
|
44
|
+
|
|
45
|
+
# This would normally require an API key, but we're just showing the adaptation logic
|
|
46
|
+
gateway = OpenAIGateway("fake-key-for-demo")
|
|
47
|
+
|
|
48
|
+
print("\n1. Reasoning model parameter adaptation (o1-mini):")
|
|
49
|
+
original_args = {
|
|
50
|
+
'model': 'o1-mini',
|
|
51
|
+
'messages': [LLMMessage(role=MessageRole.User, content="Hello")],
|
|
52
|
+
'max_tokens': 1000,
|
|
53
|
+
'tools': [] # Tools will be removed for reasoning models
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
adapted_args = gateway._adapt_parameters_for_model('o1-mini', original_args)
|
|
57
|
+
print(f" Original: max_tokens={original_args.get('max_tokens')}, has_tools={'tools' in original_args}")
|
|
58
|
+
print(f" Adapted: max_completion_tokens={adapted_args.get('max_completion_tokens')}, has_tools={'tools' in adapted_args}")
|
|
59
|
+
|
|
60
|
+
print("\n2. Chat model parameter adaptation (gpt-4o):")
|
|
61
|
+
original_args = {
|
|
62
|
+
'model': 'gpt-4o',
|
|
63
|
+
'messages': [LLMMessage(role=MessageRole.User, content="Hello")],
|
|
64
|
+
'max_tokens': 1000,
|
|
65
|
+
'tools': []
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
adapted_args = gateway._adapt_parameters_for_model('gpt-4o', original_args)
|
|
69
|
+
print(f" Original: max_tokens={original_args.get('max_tokens')}, has_tools={'tools' in original_args}")
|
|
70
|
+
print(f" Adapted: max_tokens={adapted_args.get('max_tokens')}, has_tools={'tools' in adapted_args}")
|
|
71
|
+
|
|
72
|
+
def demonstrate_model_validation():
|
|
73
|
+
"""Demonstrate model parameter validation."""
|
|
74
|
+
print("\n=== Model Validation Demonstration ===")
|
|
75
|
+
|
|
76
|
+
gateway = OpenAIGateway("fake-key-for-demo")
|
|
77
|
+
|
|
78
|
+
print("\n1. Validating parameters for reasoning model:")
|
|
79
|
+
args = {
|
|
80
|
+
'model': 'o1-mini',
|
|
81
|
+
'messages': [LLMMessage(role=MessageRole.User, content="Hello")],
|
|
82
|
+
'max_tokens': 50000, # High token count - will show warning
|
|
83
|
+
'tools': [] # Tools for reasoning model - will show warning
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
gateway._validate_model_parameters('o1-mini', args)
|
|
88
|
+
print(" Validation completed (check logs above for warnings)")
|
|
89
|
+
except Exception as e:
|
|
90
|
+
print(f" Validation error: {e}")
|
|
91
|
+
|
|
92
|
+
def demonstrate_registry_extensibility():
|
|
93
|
+
"""Demonstrate how to extend the registry with new models."""
|
|
94
|
+
print("\n=== Registry Extensibility Demonstration ===")
|
|
95
|
+
|
|
96
|
+
registry = get_model_registry()
|
|
97
|
+
|
|
98
|
+
print("\n1. Adding a new model to the registry:")
|
|
99
|
+
from mojentic.llm.gateways.openai_model_registry import ModelCapabilities, ModelType
|
|
100
|
+
|
|
101
|
+
new_capabilities = ModelCapabilities(
|
|
102
|
+
model_type=ModelType.REASONING,
|
|
103
|
+
supports_tools=True, # Hypothetical future reasoning model with tools
|
|
104
|
+
supports_streaming=True,
|
|
105
|
+
max_output_tokens=100000
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
registry.register_model("o5-preview", new_capabilities)
|
|
109
|
+
print(f" Registered o5-preview as reasoning model")
|
|
110
|
+
|
|
111
|
+
# Test the new model
|
|
112
|
+
capabilities = registry.get_model_capabilities("o5-preview")
|
|
113
|
+
print(f" o5-preview: type={capabilities.model_type.value}, supports_tools={capabilities.supports_tools}")
|
|
114
|
+
|
|
115
|
+
print("\n2. Adding a new pattern for model detection:")
|
|
116
|
+
registry.register_pattern("claude", ModelType.CHAT)
|
|
117
|
+
print(" Registered 'claude' pattern for chat models")
|
|
118
|
+
|
|
119
|
+
# Test pattern matching
|
|
120
|
+
capabilities = registry.get_model_capabilities("claude-3-opus")
|
|
121
|
+
print(f" claude-3-opus (inferred): type={capabilities.model_type.value}")
|
|
122
|
+
|
|
123
|
+
if __name__ == "__main__":
|
|
124
|
+
print("OpenAI Gateway Enhanced Infrastructure Demo")
|
|
125
|
+
print("=" * 50)
|
|
126
|
+
|
|
127
|
+
demonstrate_model_registry()
|
|
128
|
+
demonstrate_parameter_adaptation()
|
|
129
|
+
demonstrate_model_validation()
|
|
130
|
+
demonstrate_registry_extensibility()
|
|
131
|
+
|
|
132
|
+
print("\n" + "=" * 50)
|
|
133
|
+
print("Demo completed!")
|
|
134
|
+
print("\nKey Benefits of the New Infrastructure:")
|
|
135
|
+
print("✓ Registry-based model management (easy to extend)")
|
|
136
|
+
print("✓ Automatic parameter adaptation (max_tokens ↔ max_completion_tokens)")
|
|
137
|
+
print("✓ Enhanced logging for debugging")
|
|
138
|
+
print("✓ Parameter validation with helpful warnings")
|
|
139
|
+
print("✓ Pattern matching for unknown models")
|
|
140
|
+
print("✓ Comprehensive test coverage")
|
mojentic/llm/gateways/openai.py
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from itertools import islice
|
|
3
|
-
from typing import Type, List, Iterable
|
|
3
|
+
from typing import Type, List, Iterable, Optional
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import structlog
|
|
7
|
-
from openai import OpenAI
|
|
7
|
+
from openai import OpenAI, BadRequestError
|
|
8
|
+
from pydantic import BaseModel
|
|
8
9
|
|
|
9
10
|
from mojentic.llm.gateways.llm_gateway import LLMGateway
|
|
10
|
-
from mojentic.llm.gateways.models import LLMToolCall, LLMGatewayResponse
|
|
11
|
+
from mojentic.llm.gateways.models import LLMToolCall, LLMGatewayResponse, LLMMessage
|
|
11
12
|
from mojentic.llm.gateways.openai_messages_adapter import adapt_messages_to_openai
|
|
13
|
+
from mojentic.llm.gateways.openai_model_registry import get_model_registry, ModelType
|
|
12
14
|
from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
|
|
15
|
+
from mojentic.llm.tools.llm_tool import LLMTool
|
|
13
16
|
|
|
14
17
|
logger = structlog.get_logger()
|
|
15
18
|
|
|
@@ -24,8 +27,9 @@ class OpenAIGateway(LLMGateway):
|
|
|
24
27
|
The OpenAI API key to use.
|
|
25
28
|
"""
|
|
26
29
|
|
|
27
|
-
def __init__(self, api_key: str, base_url: str = None):
|
|
30
|
+
def __init__(self, api_key: str, base_url: Optional[str] = None):
|
|
28
31
|
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
|
32
|
+
self.model_registry = get_model_registry()
|
|
29
33
|
|
|
30
34
|
def _is_reasoning_model(self, model: str) -> bool:
|
|
31
35
|
"""
|
|
@@ -41,21 +45,11 @@ class OpenAIGateway(LLMGateway):
|
|
|
41
45
|
bool
|
|
42
46
|
True if the model is a reasoning model, False if it's a chat model.
|
|
43
47
|
"""
|
|
44
|
-
|
|
45
|
-
reasoning_model_patterns = [
|
|
46
|
-
"o1-",
|
|
47
|
-
"o3-",
|
|
48
|
-
"o4-",
|
|
49
|
-
"o1",
|
|
50
|
-
"o3"
|
|
51
|
-
]
|
|
52
|
-
|
|
53
|
-
model_lower = model.lower()
|
|
54
|
-
return any(pattern in model_lower for pattern in reasoning_model_patterns)
|
|
48
|
+
return self.model_registry.is_reasoning_model(model)
|
|
55
49
|
|
|
56
50
|
def _adapt_parameters_for_model(self, model: str, args: dict) -> dict:
|
|
57
51
|
"""
|
|
58
|
-
Adapt parameters based on the model type.
|
|
52
|
+
Adapt parameters based on the model type and capabilities.
|
|
59
53
|
|
|
60
54
|
Parameters
|
|
61
55
|
----------
|
|
@@ -70,26 +64,101 @@ class OpenAIGateway(LLMGateway):
|
|
|
70
64
|
The adapted arguments with correct parameter names for the model type.
|
|
71
65
|
"""
|
|
72
66
|
adapted_args = args.copy()
|
|
67
|
+
capabilities = self.model_registry.get_model_capabilities(model)
|
|
73
68
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
69
|
+
logger.debug("Adapting parameters for model",
|
|
70
|
+
model=model,
|
|
71
|
+
model_type=capabilities.model_type.value,
|
|
72
|
+
supports_tools=capabilities.supports_tools,
|
|
73
|
+
supports_streaming=capabilities.supports_streaming)
|
|
74
|
+
|
|
75
|
+
# Handle token limit parameter conversion
|
|
76
|
+
if 'max_tokens' in adapted_args:
|
|
77
|
+
token_param = capabilities.get_token_limit_param()
|
|
78
|
+
if token_param != 'max_tokens':
|
|
79
|
+
# Convert max_tokens to max_completion_tokens for reasoning models
|
|
80
|
+
adapted_args[token_param] = adapted_args.pop('max_tokens')
|
|
81
|
+
logger.info("Converted token limit parameter for model",
|
|
82
|
+
model=model,
|
|
83
|
+
from_param='max_tokens',
|
|
84
|
+
to_param=token_param,
|
|
85
|
+
value=adapted_args[token_param])
|
|
86
|
+
|
|
87
|
+
# Validate tool usage for models that don't support tools
|
|
88
|
+
if 'tools' in adapted_args and adapted_args['tools'] and not capabilities.supports_tools:
|
|
89
|
+
logger.warning("Model does not support tools, removing tool configuration",
|
|
90
|
+
model=model,
|
|
91
|
+
num_tools=len(adapted_args['tools']))
|
|
92
|
+
adapted_args['tools'] = None # Set to None instead of removing the key
|
|
93
|
+
|
|
94
|
+
# Handle temperature restrictions for specific models
|
|
95
|
+
if 'temperature' in adapted_args:
|
|
96
|
+
temperature = adapted_args['temperature']
|
|
97
|
+
|
|
98
|
+
# Check if model supports temperature parameter at all
|
|
99
|
+
if capabilities.supported_temperatures == []:
|
|
100
|
+
# Model doesn't support temperature parameter at all - remove it
|
|
101
|
+
logger.warning("Model does not support temperature parameter, removing it",
|
|
102
|
+
model=model,
|
|
103
|
+
requested_temperature=temperature)
|
|
104
|
+
adapted_args.pop('temperature', None)
|
|
105
|
+
elif not capabilities.supports_temperature(temperature):
|
|
106
|
+
# Model supports temperature but not this specific value - use default
|
|
107
|
+
default_temp = 1.0
|
|
108
|
+
logger.warning("Model does not support requested temperature, using default",
|
|
109
|
+
model=model,
|
|
110
|
+
requested_temperature=temperature,
|
|
111
|
+
default_temperature=default_temp,
|
|
112
|
+
supported_temperatures=capabilities.supported_temperatures)
|
|
113
|
+
adapted_args['temperature'] = default_temp
|
|
79
114
|
|
|
80
115
|
return adapted_args
|
|
81
116
|
|
|
82
|
-
def
|
|
117
|
+
def _validate_model_parameters(self, model: str, args: dict) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Validate that the parameters are compatible with the model.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
model : str
|
|
124
|
+
The model name.
|
|
125
|
+
args : dict
|
|
126
|
+
The arguments to validate.
|
|
127
|
+
"""
|
|
128
|
+
capabilities = self.model_registry.get_model_capabilities(model)
|
|
129
|
+
|
|
130
|
+
# Warning for tools on reasoning models that don't support them
|
|
131
|
+
if (capabilities.model_type == ModelType.REASONING and
|
|
132
|
+
not capabilities.supports_tools and
|
|
133
|
+
'tools' in args and args['tools']):
|
|
134
|
+
logger.warning(
|
|
135
|
+
"Reasoning model may not support tools",
|
|
136
|
+
model=model,
|
|
137
|
+
num_tools=len(args['tools'])
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Validate token limits (check both possible parameter names)
|
|
141
|
+
token_value = args.get('max_tokens') or args.get('max_completion_tokens')
|
|
142
|
+
if token_value and capabilities.max_output_tokens:
|
|
143
|
+
if token_value > capabilities.max_output_tokens:
|
|
144
|
+
logger.warning(
|
|
145
|
+
"Requested token limit exceeds model maximum",
|
|
146
|
+
model=model,
|
|
147
|
+
requested=token_value,
|
|
148
|
+
max_allowed=capabilities.max_output_tokens
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def complete(self, **kwargs) -> LLMGatewayResponse:
|
|
83
152
|
"""
|
|
84
153
|
Complete the LLM request by delegating to the OpenAI service.
|
|
85
154
|
|
|
86
155
|
Keyword Arguments
|
|
87
156
|
----------------
|
|
88
157
|
model : str
|
|
89
|
-
The name of the model to use
|
|
158
|
+
The name of the model to use.
|
|
90
159
|
messages : List[LLMMessage]
|
|
91
160
|
A list of messages to send to the LLM.
|
|
92
|
-
object_model : Optional[BaseModel]
|
|
161
|
+
object_model : Optional[Type[BaseModel]]
|
|
93
162
|
The model to use for validating the response.
|
|
94
163
|
tools : Optional[List[LLMTool]]
|
|
95
164
|
A list of tools to use with the LLM. If a tool call is requested, the tool will be called and the output
|
|
@@ -108,21 +177,61 @@ class OpenAIGateway(LLMGateway):
|
|
|
108
177
|
LLMGatewayResponse
|
|
109
178
|
The response from the OpenAI service.
|
|
110
179
|
"""
|
|
180
|
+
# Extract parameters from kwargs with defaults
|
|
181
|
+
model = kwargs.get('model')
|
|
182
|
+
messages = kwargs.get('messages')
|
|
183
|
+
object_model = kwargs.get('object_model', None)
|
|
184
|
+
tools = kwargs.get('tools', None)
|
|
185
|
+
temperature = kwargs.get('temperature', 1.0)
|
|
186
|
+
num_ctx = kwargs.get('num_ctx', 32768)
|
|
187
|
+
max_tokens = kwargs.get('max_tokens', 16384)
|
|
188
|
+
num_predict = kwargs.get('num_predict', -1)
|
|
189
|
+
|
|
190
|
+
if not model:
|
|
191
|
+
raise ValueError("'model' parameter is required")
|
|
192
|
+
if not messages:
|
|
193
|
+
raise ValueError("'messages' parameter is required")
|
|
194
|
+
|
|
195
|
+
# Convert parameters to dict for processing
|
|
196
|
+
args = {
|
|
197
|
+
'model': model,
|
|
198
|
+
'messages': messages,
|
|
199
|
+
'object_model': object_model,
|
|
200
|
+
'tools': tools,
|
|
201
|
+
'temperature': temperature,
|
|
202
|
+
'num_ctx': num_ctx,
|
|
203
|
+
'max_tokens': max_tokens,
|
|
204
|
+
'num_predict': num_predict
|
|
205
|
+
}
|
|
206
|
+
|
|
111
207
|
# Adapt parameters based on model type
|
|
112
|
-
|
|
208
|
+
try:
|
|
209
|
+
adapted_args = self._adapt_parameters_for_model(model, args)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
logger.error("Failed to adapt parameters for model",
|
|
212
|
+
model=model,
|
|
213
|
+
error=str(e))
|
|
214
|
+
raise
|
|
215
|
+
|
|
216
|
+
# Validate parameters after adaptation
|
|
217
|
+
self._validate_model_parameters(model, adapted_args)
|
|
113
218
|
|
|
114
219
|
openai_args = {
|
|
115
220
|
'model': adapted_args['model'],
|
|
116
221
|
'messages': adapt_messages_to_openai(adapted_args['messages']),
|
|
117
222
|
}
|
|
118
223
|
|
|
224
|
+
# Add temperature if specified
|
|
225
|
+
if 'temperature' in adapted_args:
|
|
226
|
+
openai_args['temperature'] = adapted_args['temperature']
|
|
227
|
+
|
|
119
228
|
completion = self.client.chat.completions.create
|
|
120
229
|
|
|
121
|
-
if
|
|
230
|
+
if adapted_args['object_model'] is not None:
|
|
122
231
|
completion = self.client.beta.chat.completions.parse
|
|
123
232
|
openai_args['response_format'] = adapted_args['object_model']
|
|
124
233
|
|
|
125
|
-
if
|
|
234
|
+
if adapted_args.get('tools') is not None:
|
|
126
235
|
openai_args['tools'] = [t.descriptor for t in adapted_args['tools']]
|
|
127
236
|
|
|
128
237
|
# Handle both max_tokens (for chat models) and max_completion_tokens (for reasoning models)
|
|
@@ -131,18 +240,42 @@ class OpenAIGateway(LLMGateway):
|
|
|
131
240
|
elif 'max_completion_tokens' in adapted_args:
|
|
132
241
|
openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
|
|
133
242
|
|
|
134
|
-
|
|
243
|
+
logger.debug("Making OpenAI API call",
|
|
244
|
+
model=openai_args['model'],
|
|
245
|
+
has_tools='tools' in openai_args,
|
|
246
|
+
has_object_model='response_format' in openai_args,
|
|
247
|
+
token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
response = completion(**openai_args)
|
|
251
|
+
except BadRequestError as e:
|
|
252
|
+
# Enhanced error handling for parameter issues
|
|
253
|
+
if "max_tokens" in str(e) and "max_completion_tokens" in str(e):
|
|
254
|
+
logger.error("Parameter error detected - model may require different token parameter",
|
|
255
|
+
model=model,
|
|
256
|
+
error=str(e),
|
|
257
|
+
suggestion="This model may be a reasoning model requiring max_completion_tokens")
|
|
258
|
+
raise e
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.error("OpenAI API call failed",
|
|
261
|
+
model=model,
|
|
262
|
+
error=str(e))
|
|
263
|
+
raise e
|
|
135
264
|
|
|
136
265
|
object = None
|
|
137
266
|
tool_calls: List[LLMToolCall] = []
|
|
138
267
|
|
|
139
|
-
if 'object_model'
|
|
268
|
+
if adapted_args.get('object_model') is not None:
|
|
140
269
|
try:
|
|
141
270
|
response_content = response.choices[0].message.content
|
|
142
|
-
|
|
271
|
+
if response_content is not None:
|
|
272
|
+
object = adapted_args['object_model'].model_validate_json(response_content)
|
|
273
|
+
else:
|
|
274
|
+
logger.error("No response content available for object validation", object_model=adapted_args['object_model'])
|
|
143
275
|
except Exception as e:
|
|
276
|
+
response_content = response.choices[0].message.content if response.choices else "No response content"
|
|
144
277
|
logger.error("Failed to validate model", error=str(e), response=response_content,
|
|
145
|
-
object_model=
|
|
278
|
+
object_model=adapted_args['object_model'])
|
|
146
279
|
|
|
147
280
|
if response.choices[0].message.tool_calls is not None:
|
|
148
281
|
for t in response.choices[0].message.tool_calls:
|