mojentic 0.8.4__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _examples/async_dispatcher_example.py +12 -4
- _examples/async_llm_example.py +1 -2
- _examples/broker_as_tool.py +42 -17
- _examples/broker_examples.py +5 -7
- _examples/broker_image_examples.py +1 -1
- _examples/characterize_ollama.py +3 -3
- _examples/characterize_openai.py +1 -1
- _examples/chat_session.py +2 -2
- _examples/chat_session_with_tool.py +2 -2
- _examples/coding_file_tool.py +16 -18
- _examples/current_datetime_tool_example.py +2 -2
- _examples/embeddings.py +1 -1
- _examples/ephemeral_task_manager_example.py +15 -11
- _examples/fetch_openai_models.py +10 -3
- _examples/file_deduplication.py +6 -6
- _examples/file_tool.py +5 -5
- _examples/image_analysis.py +2 -3
- _examples/image_broker.py +1 -1
- _examples/image_broker_splat.py +1 -1
- _examples/iterative_solver.py +3 -3
- _examples/model_characterization.py +2 -0
- _examples/openai_gateway_enhanced_demo.py +15 -5
- _examples/raw.py +1 -1
- _examples/react/agents/decisioning_agent.py +173 -15
- _examples/react/agents/summarization_agent.py +89 -0
- _examples/react/agents/thinking_agent.py +84 -14
- _examples/react/agents/tool_call_agent.py +83 -0
- _examples/react/formatters.py +38 -4
- _examples/react/models/base.py +60 -11
- _examples/react/models/events.py +76 -8
- _examples/react.py +71 -21
- _examples/recursive_agent.py +2 -2
- _examples/simple_llm.py +3 -3
- _examples/simple_llm_repl.py +1 -1
- _examples/simple_structured.py +1 -1
- _examples/simple_tool.py +2 -2
- _examples/solver_chat_session.py +5 -11
- _examples/streaming.py +36 -18
- _examples/tell_user_example.py +4 -4
- _examples/tracer_demo.py +18 -20
- _examples/tracer_qt_viewer.py +49 -46
- _examples/working_memory.py +1 -1
- mojentic/__init__.py +3 -3
- mojentic/agents/__init__.py +26 -8
- mojentic/agents/{agent_broker.py → agent_event_adapter.py} +3 -3
- mojentic/agents/async_aggregator_agent_spec.py +32 -33
- mojentic/agents/async_llm_agent.py +9 -5
- mojentic/agents/async_llm_agent_spec.py +21 -22
- mojentic/agents/base_async_agent.py +2 -2
- mojentic/agents/base_llm_agent.py +6 -2
- mojentic/agents/iterative_problem_solver.py +11 -5
- mojentic/agents/simple_recursive_agent.py +11 -10
- mojentic/agents/simple_recursive_agent_spec.py +423 -0
- mojentic/async_dispatcher.py +0 -1
- mojentic/async_dispatcher_spec.py +1 -1
- mojentic/context/__init__.py +0 -2
- mojentic/dispatcher.py +7 -8
- mojentic/llm/__init__.py +5 -5
- mojentic/llm/gateways/__init__.py +19 -18
- mojentic/llm/gateways/anthropic.py +1 -0
- mojentic/llm/gateways/anthropic_messages_adapter.py +0 -1
- mojentic/llm/gateways/llm_gateway.py +1 -1
- mojentic/llm/gateways/ollama.py +23 -18
- mojentic/llm/gateways/openai.py +243 -44
- mojentic/llm/gateways/openai_message_adapter_spec.py +3 -3
- mojentic/llm/gateways/openai_model_registry.py +7 -6
- mojentic/llm/gateways/openai_model_registry_spec.py +1 -2
- mojentic/llm/gateways/openai_temperature_handling_spec.py +2 -2
- mojentic/llm/llm_broker.py +162 -2
- mojentic/llm/llm_broker_spec.py +76 -2
- mojentic/llm/message_composers.py +6 -3
- mojentic/llm/message_composers_spec.py +5 -1
- mojentic/llm/registry/__init__.py +0 -3
- mojentic/llm/registry/populate_registry_from_ollama.py +2 -2
- mojentic/llm/tools/__init__.py +0 -9
- mojentic/llm/tools/ask_user_tool.py +11 -5
- mojentic/llm/tools/current_datetime.py +9 -6
- mojentic/llm/tools/date_resolver.py +10 -4
- mojentic/llm/tools/date_resolver_spec.py +0 -1
- mojentic/llm/tools/ephemeral_task_manager/append_task_tool.py +4 -1
- mojentic/llm/tools/ephemeral_task_manager/ephemeral_task_list.py +1 -1
- mojentic/llm/tools/ephemeral_task_manager/insert_task_after_tool.py +4 -1
- mojentic/llm/tools/ephemeral_task_manager/prepend_task_tool.py +5 -2
- mojentic/llm/tools/file_manager.py +131 -28
- mojentic/llm/tools/file_manager_spec.py +0 -3
- mojentic/llm/tools/llm_tool.py +1 -1
- mojentic/llm/tools/llm_tool_spec.py +0 -2
- mojentic/llm/tools/organic_web_search.py +4 -2
- mojentic/llm/tools/tell_user_tool.py +6 -2
- mojentic/llm/tools/tool_wrapper.py +2 -2
- mojentic/tracer/__init__.py +1 -10
- mojentic/tracer/event_store.py +7 -8
- mojentic/tracer/event_store_spec.py +1 -2
- mojentic/tracer/null_tracer.py +37 -43
- mojentic/tracer/tracer_events.py +8 -2
- mojentic/tracer/tracer_events_spec.py +6 -7
- mojentic/tracer/tracer_system.py +37 -36
- mojentic/tracer/tracer_system_spec.py +21 -6
- mojentic/utils/__init__.py +1 -1
- mojentic/utils/formatting.py +1 -0
- {mojentic-0.8.4.dist-info → mojentic-1.0.0.dist-info}/METADATA +76 -27
- mojentic-1.0.0.dist-info/RECORD +149 -0
- mojentic-0.8.4.dist-info/RECORD +0 -146
- {mojentic-0.8.4.dist-info → mojentic-1.0.0.dist-info}/WHEEL +0 -0
- {mojentic-0.8.4.dist-info → mojentic-1.0.0.dist-info}/licenses/LICENSE.md +0 -0
- {mojentic-0.8.4.dist-info → mojentic-1.0.0.dist-info}/top_level.txt +0 -0
mojentic/async_dispatcher.py
CHANGED
mojentic/context/__init__.py
CHANGED
mojentic/dispatcher.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import threading
|
|
3
3
|
from time import sleep
|
|
4
|
-
from typing import Optional, Type
|
|
5
4
|
from uuid import uuid4
|
|
6
5
|
|
|
7
6
|
import structlog
|
|
@@ -18,7 +17,7 @@ class Dispatcher:
|
|
|
18
17
|
self.event_queue = []
|
|
19
18
|
self._stop_event = threading.Event()
|
|
20
19
|
self._thread = threading.Thread(target=self._dispatch_events)
|
|
21
|
-
|
|
20
|
+
|
|
22
21
|
# Use null_tracer if no tracer is provided
|
|
23
22
|
from mojentic.tracer import null_tracer
|
|
24
23
|
self.tracer = tracer or null_tracer
|
|
@@ -49,16 +48,16 @@ class Dispatcher:
|
|
|
49
48
|
events = []
|
|
50
49
|
for agent in agents:
|
|
51
50
|
logger.debug(f"Sending event to agent {agent}")
|
|
52
|
-
|
|
51
|
+
|
|
53
52
|
# Record agent interaction in tracer system
|
|
54
53
|
self.tracer.record_agent_interaction(
|
|
55
54
|
from_agent=str(event.source),
|
|
56
55
|
to_agent=str(type(agent)),
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
event_type=str(type(event).__name__),
|
|
57
|
+
event_id=event.correlation_id,
|
|
58
|
+
source=type(self)
|
|
59
|
+
)
|
|
60
|
+
|
|
62
61
|
# Process the event through the agent
|
|
63
62
|
received_events = agent.receive_event(event)
|
|
64
63
|
logger.debug(f"Agent {agent} returned {len(events)} events")
|
mojentic/llm/__init__.py
CHANGED
|
@@ -3,13 +3,13 @@ Mojentic LLM module for interacting with Large Language Models.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
# Main LLM components
|
|
6
|
-
from .llm_broker import LLMBroker
|
|
7
|
-
from .chat_session import ChatSession
|
|
8
|
-
from .message_composers import MessageBuilder, FileTypeSensor
|
|
9
|
-
from .registry.llm_registry import LLMRegistry
|
|
6
|
+
from .llm_broker import LLMBroker # noqa: F401
|
|
7
|
+
from .chat_session import ChatSession # noqa: F401
|
|
8
|
+
from .message_composers import MessageBuilder, FileTypeSensor # noqa: F401
|
|
9
|
+
from .registry.llm_registry import LLMRegistry # noqa: F401
|
|
10
10
|
|
|
11
11
|
# Re-export gateway components at the LLM level
|
|
12
|
-
from .gateways.models import (
|
|
12
|
+
from .gateways.models import ( # noqa: F401
|
|
13
13
|
LLMMessage,
|
|
14
14
|
LLMGatewayResponse,
|
|
15
15
|
MessageRole
|
|
@@ -3,23 +3,24 @@ Mojentic LLM gateways module for connecting to various LLM providers.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
# Gateway implementations
|
|
6
|
-
from .llm_gateway import LLMGateway
|
|
7
|
-
from .ollama import OllamaGateway
|
|
8
|
-
from .openai import OpenAIGateway
|
|
9
|
-
from .anthropic import AnthropicGateway
|
|
10
|
-
from .
|
|
11
|
-
from .embeddings_gateway import EmbeddingsGateway
|
|
12
|
-
from .tokenizer_gateway import TokenizerGateway
|
|
13
|
-
|
|
14
|
-
# Message adapters
|
|
15
|
-
from .anthropic_messages_adapter import adapt_messages_to_anthropic
|
|
16
|
-
from .ollama_messages_adapter import adapt_messages_to_ollama
|
|
17
|
-
from .openai_messages_adapter import adapt_messages_to_openai
|
|
6
|
+
from mojentic.llm.gateways.llm_gateway import LLMGateway
|
|
7
|
+
from mojentic.llm.gateways.ollama import OllamaGateway
|
|
8
|
+
from mojentic.llm.gateways.openai import OpenAIGateway
|
|
9
|
+
from mojentic.llm.gateways.anthropic import AnthropicGateway
|
|
10
|
+
from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
|
|
11
|
+
from mojentic.llm.gateways.embeddings_gateway import EmbeddingsGateway
|
|
18
12
|
|
|
19
13
|
# Common models
|
|
20
|
-
from .models import
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
14
|
+
from mojentic.llm.gateways.models import LLMMessage, LLMToolCall, LLMGatewayResponse
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"LLMGateway",
|
|
18
|
+
"OllamaGateway",
|
|
19
|
+
"OpenAIGateway",
|
|
20
|
+
"AnthropicGateway",
|
|
21
|
+
"TokenizerGateway",
|
|
22
|
+
"EmbeddingsGateway",
|
|
23
|
+
"LLMMessage",
|
|
24
|
+
"LLMToolCall",
|
|
25
|
+
"LLMGatewayResponse",
|
|
26
|
+
]
|
mojentic/llm/gateways/ollama.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Iterator
|
|
1
|
+
from typing import List, Iterator, Optional
|
|
2
2
|
import structlog
|
|
3
3
|
from ollama import Client, Options, ChatResponse
|
|
4
4
|
from pydantic import BaseModel
|
|
@@ -9,9 +9,21 @@ from mojentic.llm.gateways.ollama_messages_adapter import adapt_messages_to_olla
|
|
|
9
9
|
|
|
10
10
|
logger = structlog.get_logger()
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
class StreamingResponse(BaseModel):
|
|
13
|
-
"""
|
|
14
|
-
|
|
14
|
+
"""
|
|
15
|
+
Wrapper for streaming response chunks.
|
|
16
|
+
|
|
17
|
+
Attributes
|
|
18
|
+
----------
|
|
19
|
+
content : Optional[str]
|
|
20
|
+
Text content chunk from the LLM response.
|
|
21
|
+
tool_calls : Optional[List]
|
|
22
|
+
Tool calls from the LLM response (raw ollama format).
|
|
23
|
+
"""
|
|
24
|
+
content: Optional[str] = None
|
|
25
|
+
tool_calls: Optional[List] = None
|
|
26
|
+
|
|
15
27
|
|
|
16
28
|
class OllamaGateway(LLMGateway):
|
|
17
29
|
"""
|
|
@@ -144,28 +156,21 @@ class OllamaGateway(LLMGateway):
|
|
|
144
156
|
'stream': True
|
|
145
157
|
}
|
|
146
158
|
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
# when using tools. This limits our ability to explore streaming capabilities
|
|
151
|
-
# in the mojentic API, so I'm pausing this work for now until this is resolved.
|
|
152
|
-
# https://github.com/ollama/ollama/issues/7886
|
|
153
|
-
#
|
|
154
|
-
|
|
155
|
-
# if 'tools' in args and args['tools'] is not None:
|
|
156
|
-
# ollama_args['tools'] = [t.descriptor for t in args['tools']]
|
|
159
|
+
# Enable tool support if tools are provided
|
|
160
|
+
if 'tools' in args and args['tools'] is not None:
|
|
161
|
+
ollama_args['tools'] = [t.descriptor for t in args['tools']]
|
|
157
162
|
|
|
158
163
|
stream = self.client.chat(**ollama_args)
|
|
159
164
|
|
|
160
165
|
for chunk in stream:
|
|
161
166
|
if chunk.message:
|
|
167
|
+
# Yield content chunks as they arrive
|
|
162
168
|
if chunk.message.content:
|
|
163
169
|
yield StreamingResponse(content=chunk.message.content)
|
|
164
|
-
|
|
165
|
-
#
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
# )
|
|
170
|
+
|
|
171
|
+
# Yield tool calls when they arrive
|
|
172
|
+
if chunk.message.tool_calls:
|
|
173
|
+
yield StreamingResponse(tool_calls=chunk.message.tool_calls)
|
|
169
174
|
|
|
170
175
|
def get_available_models(self) -> List[str]:
|
|
171
176
|
"""
|
mojentic/llm/gateways/openai.py
CHANGED
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from itertools import islice
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import List, Iterable, Optional, Iterator, Dict
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import structlog
|
|
8
8
|
from openai import OpenAI, BadRequestError
|
|
9
|
-
from pydantic import BaseModel
|
|
10
9
|
|
|
11
10
|
from mojentic.llm.gateways.llm_gateway import LLMGateway
|
|
12
|
-
from mojentic.llm.gateways.models import LLMToolCall, LLMGatewayResponse
|
|
11
|
+
from mojentic.llm.gateways.models import LLMToolCall, LLMGatewayResponse
|
|
13
12
|
from mojentic.llm.gateways.openai_messages_adapter import adapt_messages_to_openai
|
|
14
13
|
from mojentic.llm.gateways.openai_model_registry import get_model_registry, ModelType
|
|
15
14
|
from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
|
|
16
|
-
from mojentic.llm.
|
|
15
|
+
from mojentic.llm.gateways.ollama import StreamingResponse
|
|
17
16
|
|
|
18
17
|
logger = structlog.get_logger()
|
|
19
18
|
|
|
@@ -76,10 +75,10 @@ class OpenAIGateway(LLMGateway):
|
|
|
76
75
|
capabilities = self.model_registry.get_model_capabilities(model)
|
|
77
76
|
|
|
78
77
|
logger.debug("Adapting parameters for model",
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
78
|
+
model=model,
|
|
79
|
+
model_type=capabilities.model_type.value,
|
|
80
|
+
supports_tools=capabilities.supports_tools,
|
|
81
|
+
supports_streaming=capabilities.supports_streaming)
|
|
83
82
|
|
|
84
83
|
# Handle token limit parameter conversion
|
|
85
84
|
if 'max_tokens' in adapted_args:
|
|
@@ -88,16 +87,16 @@ class OpenAIGateway(LLMGateway):
|
|
|
88
87
|
# Convert max_tokens to max_completion_tokens for reasoning models
|
|
89
88
|
adapted_args[token_param] = adapted_args.pop('max_tokens')
|
|
90
89
|
logger.info("Converted token limit parameter for model",
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
90
|
+
model=model,
|
|
91
|
+
from_param='max_tokens',
|
|
92
|
+
to_param=token_param,
|
|
93
|
+
value=adapted_args[token_param])
|
|
95
94
|
|
|
96
95
|
# Validate tool usage for models that don't support tools
|
|
97
96
|
if 'tools' in adapted_args and adapted_args['tools'] and not capabilities.supports_tools:
|
|
98
97
|
logger.warning("Model does not support tools, removing tool configuration",
|
|
99
|
-
|
|
100
|
-
|
|
98
|
+
model=model,
|
|
99
|
+
num_tools=len(adapted_args['tools']))
|
|
101
100
|
adapted_args['tools'] = None # Set to None instead of removing the key
|
|
102
101
|
|
|
103
102
|
# Handle temperature restrictions for specific models
|
|
@@ -107,18 +106,19 @@ class OpenAIGateway(LLMGateway):
|
|
|
107
106
|
# Check if model supports temperature parameter at all
|
|
108
107
|
if capabilities.supported_temperatures == []:
|
|
109
108
|
# Model doesn't support temperature parameter at all - remove it
|
|
110
|
-
logger.warning("Model does not support temperature parameter
|
|
111
|
-
|
|
112
|
-
|
|
109
|
+
logger.warning("Model does not support temperature parameter at all",
|
|
110
|
+
model=model,
|
|
111
|
+
requested_temperature=temperature)
|
|
113
112
|
adapted_args.pop('temperature', None)
|
|
114
113
|
elif not capabilities.supports_temperature(temperature):
|
|
115
114
|
# Model supports temperature but not this specific value - use default
|
|
116
115
|
default_temp = 1.0
|
|
117
|
-
logger.warning(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
116
|
+
logger.warning(
|
|
117
|
+
"Model does not support requested temperature, using default",
|
|
118
|
+
model=model,
|
|
119
|
+
requested_temperature=temperature,
|
|
120
|
+
default_temperature=default_temp,
|
|
121
|
+
supported_temperatures=capabilities.supported_temperatures)
|
|
122
122
|
adapted_args['temperature'] = default_temp
|
|
123
123
|
|
|
124
124
|
return adapted_args
|
|
@@ -138,13 +138,12 @@ class OpenAIGateway(LLMGateway):
|
|
|
138
138
|
|
|
139
139
|
# Warning for tools on reasoning models that don't support them
|
|
140
140
|
if (capabilities.model_type == ModelType.REASONING and
|
|
141
|
-
|
|
142
|
-
|
|
141
|
+
not capabilities.supports_tools and
|
|
142
|
+
'tools' in args and args['tools']):
|
|
143
143
|
logger.warning(
|
|
144
144
|
"Reasoning model may not support tools",
|
|
145
145
|
model=model,
|
|
146
|
-
num_tools=len(args['tools'])
|
|
147
|
-
)
|
|
146
|
+
num_tools=len(args['tools']))
|
|
148
147
|
|
|
149
148
|
# Validate token limits (check both possible parameter names)
|
|
150
149
|
token_value = args.get('max_tokens') or args.get('max_completion_tokens')
|
|
@@ -154,8 +153,7 @@ class OpenAIGateway(LLMGateway):
|
|
|
154
153
|
"Requested token limit exceeds model maximum",
|
|
155
154
|
model=model,
|
|
156
155
|
requested=token_value,
|
|
157
|
-
max_allowed=capabilities.max_output_tokens
|
|
158
|
-
)
|
|
156
|
+
max_allowed=capabilities.max_output_tokens)
|
|
159
157
|
|
|
160
158
|
def complete(self, **kwargs) -> LLMGatewayResponse:
|
|
161
159
|
"""
|
|
@@ -218,8 +216,8 @@ class OpenAIGateway(LLMGateway):
|
|
|
218
216
|
adapted_args = self._adapt_parameters_for_model(model, args)
|
|
219
217
|
except Exception as e:
|
|
220
218
|
logger.error("Failed to adapt parameters for model",
|
|
221
|
-
|
|
222
|
-
|
|
219
|
+
model=model,
|
|
220
|
+
error=str(e))
|
|
223
221
|
raise
|
|
224
222
|
|
|
225
223
|
# Validate parameters after adaptation
|
|
@@ -250,25 +248,26 @@ class OpenAIGateway(LLMGateway):
|
|
|
250
248
|
openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
|
|
251
249
|
|
|
252
250
|
logger.debug("Making OpenAI API call",
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
251
|
+
model=openai_args['model'],
|
|
252
|
+
has_tools='tools' in openai_args,
|
|
253
|
+
has_object_model='response_format' in openai_args,
|
|
254
|
+
token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
|
|
257
255
|
|
|
258
256
|
try:
|
|
259
257
|
response = completion(**openai_args)
|
|
260
258
|
except BadRequestError as e:
|
|
261
259
|
# Enhanced error handling for parameter issues
|
|
262
260
|
if "max_tokens" in str(e) and "max_completion_tokens" in str(e):
|
|
263
|
-
logger.error(
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
261
|
+
logger.error(
|
|
262
|
+
"Parameter error detected - model may require different token parameter",
|
|
263
|
+
model=model,
|
|
264
|
+
error=str(e),
|
|
265
|
+
suggestion="This model may be a reasoning model requiring max_completion_tokens")
|
|
267
266
|
raise e
|
|
268
267
|
except Exception as e:
|
|
269
268
|
logger.error("OpenAI API call failed",
|
|
270
|
-
|
|
271
|
-
|
|
269
|
+
model=model,
|
|
270
|
+
error=str(e))
|
|
272
271
|
raise e
|
|
273
272
|
|
|
274
273
|
object = None
|
|
@@ -280,11 +279,16 @@ class OpenAIGateway(LLMGateway):
|
|
|
280
279
|
if response_content is not None:
|
|
281
280
|
object = adapted_args['object_model'].model_validate_json(response_content)
|
|
282
281
|
else:
|
|
283
|
-
logger.error(
|
|
282
|
+
logger.error(
|
|
283
|
+
"No response content available for object validation",
|
|
284
|
+
object_model=adapted_args['object_model'])
|
|
284
285
|
except Exception as e:
|
|
285
|
-
response_content = response.choices[0].message.content
|
|
286
|
-
|
|
287
|
-
|
|
286
|
+
response_content = (response.choices[0].message.content
|
|
287
|
+
if response.choices else "No response content")
|
|
288
|
+
logger.error("Failed to validate model",
|
|
289
|
+
error=str(e),
|
|
290
|
+
response=response_content,
|
|
291
|
+
object_model=adapted_args['object_model'])
|
|
288
292
|
|
|
289
293
|
if response.choices[0].message.tool_calls is not None:
|
|
290
294
|
for t in response.choices[0].message.tool_calls:
|
|
@@ -301,6 +305,201 @@ class OpenAIGateway(LLMGateway):
|
|
|
301
305
|
tool_calls=tool_calls,
|
|
302
306
|
)
|
|
303
307
|
|
|
308
|
+
def complete_stream(self, **kwargs) -> Iterator[StreamingResponse]:
|
|
309
|
+
"""
|
|
310
|
+
Stream the LLM response from OpenAI service.
|
|
311
|
+
|
|
312
|
+
OpenAI streams tool call arguments incrementally, so we need to accumulate them
|
|
313
|
+
and yield complete tool calls only when the stream finishes.
|
|
314
|
+
|
|
315
|
+
Keyword Arguments
|
|
316
|
+
----------------
|
|
317
|
+
model : str
|
|
318
|
+
The name of the model to use.
|
|
319
|
+
messages : List[LLMMessage]
|
|
320
|
+
A list of messages to send to the LLM.
|
|
321
|
+
tools : Optional[List[LLMTool]]
|
|
322
|
+
A list of tools to use with the LLM. Tool calls will be accumulated and yielded when complete.
|
|
323
|
+
temperature : float, optional
|
|
324
|
+
The temperature to use for the response. Defaults to 1.0.
|
|
325
|
+
num_ctx : int, optional
|
|
326
|
+
The number of context tokens to use. Defaults to 32768.
|
|
327
|
+
max_tokens : int, optional
|
|
328
|
+
The maximum number of tokens to generate. Defaults to 16384.
|
|
329
|
+
num_predict : int, optional
|
|
330
|
+
The number of tokens to predict. Defaults to no limit.
|
|
331
|
+
|
|
332
|
+
Returns
|
|
333
|
+
-------
|
|
334
|
+
Iterator[StreamingResponse]
|
|
335
|
+
An iterator of StreamingResponse objects containing response chunks.
|
|
336
|
+
"""
|
|
337
|
+
# Extract parameters from kwargs with defaults
|
|
338
|
+
model = kwargs.get('model')
|
|
339
|
+
messages = kwargs.get('messages')
|
|
340
|
+
object_model = kwargs.get('object_model', None)
|
|
341
|
+
tools = kwargs.get('tools', None)
|
|
342
|
+
temperature = kwargs.get('temperature', 1.0)
|
|
343
|
+
num_ctx = kwargs.get('num_ctx', 32768)
|
|
344
|
+
max_tokens = kwargs.get('max_tokens', 16384)
|
|
345
|
+
num_predict = kwargs.get('num_predict', -1)
|
|
346
|
+
|
|
347
|
+
if not model:
|
|
348
|
+
raise ValueError("'model' parameter is required")
|
|
349
|
+
if not messages:
|
|
350
|
+
raise ValueError("'messages' parameter is required")
|
|
351
|
+
|
|
352
|
+
# Convert parameters to dict for processing
|
|
353
|
+
args = {
|
|
354
|
+
'model': model,
|
|
355
|
+
'messages': messages,
|
|
356
|
+
'object_model': object_model,
|
|
357
|
+
'tools': tools,
|
|
358
|
+
'temperature': temperature,
|
|
359
|
+
'num_ctx': num_ctx,
|
|
360
|
+
'max_tokens': max_tokens,
|
|
361
|
+
'num_predict': num_predict
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
# Adapt parameters based on model type
|
|
365
|
+
try:
|
|
366
|
+
adapted_args = self._adapt_parameters_for_model(model, args)
|
|
367
|
+
except Exception as e:
|
|
368
|
+
logger.error("Failed to adapt parameters for model",
|
|
369
|
+
model=model,
|
|
370
|
+
error=str(e))
|
|
371
|
+
raise
|
|
372
|
+
|
|
373
|
+
# Validate parameters after adaptation
|
|
374
|
+
self._validate_model_parameters(model, adapted_args)
|
|
375
|
+
|
|
376
|
+
# Check if model supports streaming
|
|
377
|
+
capabilities = self.model_registry.get_model_capabilities(model)
|
|
378
|
+
if not capabilities.supports_streaming:
|
|
379
|
+
raise NotImplementedError(f"Model {model} does not support streaming")
|
|
380
|
+
|
|
381
|
+
# Structured output doesn't work with streaming
|
|
382
|
+
if adapted_args['object_model'] is not None:
|
|
383
|
+
raise NotImplementedError("Streaming with structured output (object_model) is not supported")
|
|
384
|
+
|
|
385
|
+
openai_args = {
|
|
386
|
+
'model': adapted_args['model'],
|
|
387
|
+
'messages': adapt_messages_to_openai(adapted_args['messages']),
|
|
388
|
+
'stream': True,
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
# Add temperature if specified
|
|
392
|
+
if 'temperature' in adapted_args:
|
|
393
|
+
openai_args['temperature'] = adapted_args['temperature']
|
|
394
|
+
|
|
395
|
+
if adapted_args.get('tools') is not None:
|
|
396
|
+
openai_args['tools'] = [t.descriptor for t in adapted_args['tools']]
|
|
397
|
+
|
|
398
|
+
# Handle both max_tokens (for chat models) and max_completion_tokens (for reasoning models)
|
|
399
|
+
if 'max_tokens' in adapted_args:
|
|
400
|
+
openai_args['max_tokens'] = adapted_args['max_tokens']
|
|
401
|
+
elif 'max_completion_tokens' in adapted_args:
|
|
402
|
+
openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
|
|
403
|
+
|
|
404
|
+
logger.debug("Making OpenAI streaming API call",
|
|
405
|
+
model=openai_args['model'],
|
|
406
|
+
has_tools='tools' in openai_args,
|
|
407
|
+
token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
stream = self.client.chat.completions.create(**openai_args)
|
|
411
|
+
except BadRequestError as e:
|
|
412
|
+
if "max_tokens" in str(e) and "max_completion_tokens" in str(e):
|
|
413
|
+
logger.error(
|
|
414
|
+
"Parameter error detected - model may require different token parameter",
|
|
415
|
+
model=model,
|
|
416
|
+
error=str(e),
|
|
417
|
+
suggestion="This model may be a reasoning model requiring max_completion_tokens")
|
|
418
|
+
raise e
|
|
419
|
+
except Exception as e:
|
|
420
|
+
logger.error("OpenAI streaming API call failed",
|
|
421
|
+
model=model,
|
|
422
|
+
error=str(e))
|
|
423
|
+
raise e
|
|
424
|
+
|
|
425
|
+
# Accumulate tool calls as they stream in
|
|
426
|
+
# OpenAI streams tool arguments incrementally, indexed by tool call index
|
|
427
|
+
tool_calls_accumulator: Dict[int, Dict] = {}
|
|
428
|
+
|
|
429
|
+
for chunk in stream:
|
|
430
|
+
if not chunk.choices:
|
|
431
|
+
continue
|
|
432
|
+
|
|
433
|
+
delta = chunk.choices[0].delta
|
|
434
|
+
finish_reason = chunk.choices[0].finish_reason
|
|
435
|
+
|
|
436
|
+
# Yield content chunks as they arrive
|
|
437
|
+
if delta.content:
|
|
438
|
+
yield StreamingResponse(content=delta.content)
|
|
439
|
+
|
|
440
|
+
# Accumulate tool call chunks
|
|
441
|
+
if delta.tool_calls:
|
|
442
|
+
for tool_call_delta in delta.tool_calls:
|
|
443
|
+
index = tool_call_delta.index
|
|
444
|
+
|
|
445
|
+
# Initialize accumulator for this tool call if needed
|
|
446
|
+
if index not in tool_calls_accumulator:
|
|
447
|
+
tool_calls_accumulator[index] = {
|
|
448
|
+
'id': None,
|
|
449
|
+
'name': None,
|
|
450
|
+
'arguments': ''
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
# First chunk has id and name
|
|
454
|
+
if tool_call_delta.id:
|
|
455
|
+
tool_calls_accumulator[index]['id'] = tool_call_delta.id
|
|
456
|
+
|
|
457
|
+
if tool_call_delta.function.name:
|
|
458
|
+
tool_calls_accumulator[index]['name'] = tool_call_delta.function.name
|
|
459
|
+
|
|
460
|
+
# All chunks may have argument fragments
|
|
461
|
+
if tool_call_delta.function.arguments:
|
|
462
|
+
tool_calls_accumulator[index]['arguments'] += tool_call_delta.function.arguments
|
|
463
|
+
|
|
464
|
+
# When stream is complete, yield accumulated tool calls
|
|
465
|
+
if finish_reason == 'tool_calls' and tool_calls_accumulator:
|
|
466
|
+
# Parse and yield complete tool calls
|
|
467
|
+
complete_tool_calls = []
|
|
468
|
+
for index in sorted(tool_calls_accumulator.keys()):
|
|
469
|
+
tc = tool_calls_accumulator[index]
|
|
470
|
+
try:
|
|
471
|
+
# Parse the accumulated JSON arguments
|
|
472
|
+
args_dict = json.loads(tc['arguments'])
|
|
473
|
+
# Convert to string values as per LLMToolCall format
|
|
474
|
+
arguments = {str(k): str(v) for k, v in args_dict.items()}
|
|
475
|
+
|
|
476
|
+
tool_call = LLMToolCall(
|
|
477
|
+
id=tc['id'],
|
|
478
|
+
name=tc['name'],
|
|
479
|
+
arguments=arguments
|
|
480
|
+
)
|
|
481
|
+
complete_tool_calls.append(tool_call)
|
|
482
|
+
except json.JSONDecodeError as e:
|
|
483
|
+
logger.error("Failed to parse tool call arguments",
|
|
484
|
+
tool_name=tc['name'],
|
|
485
|
+
arguments=tc['arguments'],
|
|
486
|
+
error=str(e))
|
|
487
|
+
|
|
488
|
+
if complete_tool_calls:
|
|
489
|
+
# Convert to the format expected by ollama's tool calls for compatibility
|
|
490
|
+
# We need to create mock objects that match ollama's structure
|
|
491
|
+
from types import SimpleNamespace
|
|
492
|
+
ollama_format_calls = []
|
|
493
|
+
for tc in complete_tool_calls:
|
|
494
|
+
ollama_format_calls.append(SimpleNamespace(
|
|
495
|
+
id=tc.id, # Include ID for proper OpenAI message formatting
|
|
496
|
+
function=SimpleNamespace(
|
|
497
|
+
name=tc.name,
|
|
498
|
+
arguments=tc.arguments
|
|
499
|
+
)
|
|
500
|
+
))
|
|
501
|
+
yield StreamingResponse(tool_calls=ollama_format_calls)
|
|
502
|
+
|
|
304
503
|
def get_available_models(self) -> list[str]:
|
|
305
504
|
"""
|
|
306
505
|
Get the list of available OpenAI models, sorted alphabetically.
|
|
@@ -93,11 +93,11 @@ class DescribeOpenAIMessagesAdapter:
|
|
|
93
93
|
Then it should convert to the correct format with structured content array
|
|
94
94
|
"""
|
|
95
95
|
# Patch our own methods that encapsulate external library calls
|
|
96
|
-
mocker.patch('mojentic.llm.gateways.openai_messages_adapter.read_file_as_binary',
|
|
96
|
+
mocker.patch('mojentic.llm.gateways.openai_messages_adapter.read_file_as_binary',
|
|
97
97
|
return_value=b'fake_image_data')
|
|
98
|
-
mocker.patch('mojentic.llm.gateways.openai_messages_adapter.encode_base64',
|
|
98
|
+
mocker.patch('mojentic.llm.gateways.openai_messages_adapter.encode_base64',
|
|
99
99
|
return_value='ZmFrZV9pbWFnZV9kYXRhX2VuY29kZWQ=')
|
|
100
|
-
mocker.patch('mojentic.llm.gateways.openai_messages_adapter.get_image_type',
|
|
100
|
+
mocker.patch('mojentic.llm.gateways.openai_messages_adapter.get_image_type',
|
|
101
101
|
side_effect=lambda path: 'jpg' if path.endswith('.jpg') else 'png')
|
|
102
102
|
|
|
103
103
|
image_paths = ["/path/to/image1.jpg", "/path/to/image2.png"]
|
|
@@ -6,13 +6,13 @@ their specific parameter requirements and capabilities.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from enum import Enum
|
|
9
|
-
from typing import Dict,
|
|
9
|
+
from typing import Dict, Optional, List, TYPE_CHECKING
|
|
10
10
|
from dataclasses import dataclass
|
|
11
11
|
|
|
12
12
|
import structlog
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
|
-
|
|
15
|
+
pass
|
|
16
16
|
|
|
17
17
|
logger = structlog.get_logger()
|
|
18
18
|
|
|
@@ -20,9 +20,9 @@ logger = structlog.get_logger()
|
|
|
20
20
|
class ModelType(Enum):
|
|
21
21
|
"""Classification of OpenAI model types based on their capabilities and parameters."""
|
|
22
22
|
REASONING = "reasoning" # Models like o1, o3 that use max_completion_tokens
|
|
23
|
-
CHAT = "chat"
|
|
24
|
-
EMBEDDING = "embedding"
|
|
25
|
-
MODERATION = "moderation"
|
|
23
|
+
CHAT = "chat" # Standard chat models that use max_tokens
|
|
24
|
+
EMBEDDING = "embedding" # Text embedding models
|
|
25
|
+
MODERATION = "moderation" # Content moderation models
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
@dataclass
|
|
@@ -346,6 +346,7 @@ class OpenAIModelRegistry:
|
|
|
346
346
|
# Global registry instance
|
|
347
347
|
_registry = OpenAIModelRegistry()
|
|
348
348
|
|
|
349
|
+
|
|
349
350
|
def get_model_registry() -> OpenAIModelRegistry:
|
|
350
351
|
"""Get the global OpenAI model registry instance."""
|
|
351
|
-
return _registry
|
|
352
|
+
return _registry
|