mojentic 0.8.4__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. _examples/async_dispatcher_example.py +12 -4
  2. _examples/async_llm_example.py +1 -2
  3. _examples/broker_as_tool.py +42 -17
  4. _examples/broker_examples.py +5 -7
  5. _examples/broker_image_examples.py +1 -1
  6. _examples/characterize_ollama.py +3 -3
  7. _examples/characterize_openai.py +1 -1
  8. _examples/chat_session.py +2 -2
  9. _examples/chat_session_with_tool.py +2 -2
  10. _examples/coding_file_tool.py +16 -18
  11. _examples/current_datetime_tool_example.py +2 -2
  12. _examples/embeddings.py +1 -1
  13. _examples/ephemeral_task_manager_example.py +15 -11
  14. _examples/fetch_openai_models.py +10 -3
  15. _examples/file_deduplication.py +6 -6
  16. _examples/file_tool.py +5 -5
  17. _examples/image_analysis.py +2 -3
  18. _examples/image_broker.py +1 -1
  19. _examples/image_broker_splat.py +1 -1
  20. _examples/iterative_solver.py +3 -3
  21. _examples/model_characterization.py +2 -0
  22. _examples/openai_gateway_enhanced_demo.py +15 -5
  23. _examples/raw.py +1 -1
  24. _examples/react/agents/decisioning_agent.py +173 -15
  25. _examples/react/agents/summarization_agent.py +89 -0
  26. _examples/react/agents/thinking_agent.py +84 -14
  27. _examples/react/agents/tool_call_agent.py +83 -0
  28. _examples/react/formatters.py +38 -4
  29. _examples/react/models/base.py +60 -11
  30. _examples/react/models/events.py +76 -8
  31. _examples/react.py +71 -21
  32. _examples/recursive_agent.py +2 -2
  33. _examples/simple_llm.py +3 -3
  34. _examples/simple_llm_repl.py +1 -1
  35. _examples/simple_structured.py +1 -1
  36. _examples/simple_tool.py +2 -2
  37. _examples/solver_chat_session.py +5 -11
  38. _examples/streaming.py +36 -18
  39. _examples/tell_user_example.py +4 -4
  40. _examples/tracer_demo.py +18 -20
  41. _examples/tracer_qt_viewer.py +49 -46
  42. _examples/working_memory.py +1 -1
  43. mojentic/__init__.py +3 -3
  44. mojentic/agents/__init__.py +26 -8
  45. mojentic/agents/{agent_broker.py → agent_event_adapter.py} +3 -3
  46. mojentic/agents/async_aggregator_agent_spec.py +32 -33
  47. mojentic/agents/async_llm_agent.py +9 -5
  48. mojentic/agents/async_llm_agent_spec.py +21 -22
  49. mojentic/agents/base_async_agent.py +2 -2
  50. mojentic/agents/base_llm_agent.py +6 -2
  51. mojentic/agents/iterative_problem_solver.py +11 -5
  52. mojentic/agents/simple_recursive_agent.py +11 -10
  53. mojentic/agents/simple_recursive_agent_spec.py +423 -0
  54. mojentic/async_dispatcher.py +0 -1
  55. mojentic/async_dispatcher_spec.py +1 -1
  56. mojentic/context/__init__.py +0 -2
  57. mojentic/dispatcher.py +7 -8
  58. mojentic/llm/__init__.py +5 -5
  59. mojentic/llm/gateways/__init__.py +19 -18
  60. mojentic/llm/gateways/anthropic.py +1 -0
  61. mojentic/llm/gateways/anthropic_messages_adapter.py +0 -1
  62. mojentic/llm/gateways/llm_gateway.py +1 -1
  63. mojentic/llm/gateways/ollama.py +23 -18
  64. mojentic/llm/gateways/openai.py +243 -44
  65. mojentic/llm/gateways/openai_message_adapter_spec.py +3 -3
  66. mojentic/llm/gateways/openai_model_registry.py +7 -6
  67. mojentic/llm/gateways/openai_model_registry_spec.py +1 -2
  68. mojentic/llm/gateways/openai_temperature_handling_spec.py +2 -2
  69. mojentic/llm/llm_broker.py +162 -2
  70. mojentic/llm/llm_broker_spec.py +76 -2
  71. mojentic/llm/message_composers.py +6 -3
  72. mojentic/llm/message_composers_spec.py +5 -1
  73. mojentic/llm/registry/__init__.py +0 -3
  74. mojentic/llm/registry/populate_registry_from_ollama.py +2 -2
  75. mojentic/llm/tools/__init__.py +0 -9
  76. mojentic/llm/tools/ask_user_tool.py +11 -5
  77. mojentic/llm/tools/current_datetime.py +9 -6
  78. mojentic/llm/tools/date_resolver.py +10 -4
  79. mojentic/llm/tools/date_resolver_spec.py +0 -1
  80. mojentic/llm/tools/ephemeral_task_manager/append_task_tool.py +4 -1
  81. mojentic/llm/tools/ephemeral_task_manager/ephemeral_task_list.py +1 -1
  82. mojentic/llm/tools/ephemeral_task_manager/insert_task_after_tool.py +4 -1
  83. mojentic/llm/tools/ephemeral_task_manager/prepend_task_tool.py +5 -2
  84. mojentic/llm/tools/file_manager.py +131 -28
  85. mojentic/llm/tools/file_manager_spec.py +0 -3
  86. mojentic/llm/tools/llm_tool.py +1 -1
  87. mojentic/llm/tools/llm_tool_spec.py +0 -2
  88. mojentic/llm/tools/organic_web_search.py +4 -2
  89. mojentic/llm/tools/tell_user_tool.py +6 -2
  90. mojentic/llm/tools/tool_wrapper.py +2 -2
  91. mojentic/tracer/__init__.py +1 -10
  92. mojentic/tracer/event_store.py +7 -8
  93. mojentic/tracer/event_store_spec.py +1 -2
  94. mojentic/tracer/null_tracer.py +37 -43
  95. mojentic/tracer/tracer_events.py +8 -2
  96. mojentic/tracer/tracer_events_spec.py +6 -7
  97. mojentic/tracer/tracer_system.py +37 -36
  98. mojentic/tracer/tracer_system_spec.py +21 -6
  99. mojentic/utils/__init__.py +1 -1
  100. mojentic/utils/formatting.py +1 -0
  101. {mojentic-0.8.4.dist-info → mojentic-1.0.0.dist-info}/METADATA +76 -27
  102. mojentic-1.0.0.dist-info/RECORD +149 -0
  103. mojentic-0.8.4.dist-info/RECORD +0 -146
  104. {mojentic-0.8.4.dist-info → mojentic-1.0.0.dist-info}/WHEEL +0 -0
  105. {mojentic-0.8.4.dist-info → mojentic-1.0.0.dist-info}/licenses/LICENSE.md +0 -0
  106. {mojentic-0.8.4.dist-info → mojentic-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
1
1
  import asyncio
2
2
  import logging
3
3
  from collections import deque
4
- from typing import Optional, Type
5
4
  from uuid import uuid4
6
5
 
7
6
  import structlog
@@ -1,7 +1,7 @@
1
1
  import asyncio
2
2
  import pytest
3
3
  import pytest_asyncio
4
- from unittest.mock import AsyncMock, MagicMock
4
+ from unittest.mock import MagicMock
5
5
 
6
6
  from mojentic.async_dispatcher import AsyncDispatcher
7
7
  from mojentic.event import Event, TerminateEvent
@@ -1,5 +1,3 @@
1
1
  """
2
2
  Mojentic context module for managing shared working memory and context.
3
3
  """
4
-
5
- from .shared_working_memory import SharedWorkingMemory
mojentic/dispatcher.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import logging
2
2
  import threading
3
3
  from time import sleep
4
- from typing import Optional, Type
5
4
  from uuid import uuid4
6
5
 
7
6
  import structlog
@@ -18,7 +17,7 @@ class Dispatcher:
18
17
  self.event_queue = []
19
18
  self._stop_event = threading.Event()
20
19
  self._thread = threading.Thread(target=self._dispatch_events)
21
-
20
+
22
21
  # Use null_tracer if no tracer is provided
23
22
  from mojentic.tracer import null_tracer
24
23
  self.tracer = tracer or null_tracer
@@ -49,16 +48,16 @@ class Dispatcher:
49
48
  events = []
50
49
  for agent in agents:
51
50
  logger.debug(f"Sending event to agent {agent}")
52
-
51
+
53
52
  # Record agent interaction in tracer system
54
53
  self.tracer.record_agent_interaction(
55
54
  from_agent=str(event.source),
56
55
  to_agent=str(type(agent)),
57
- event_type=str(type(event).__name__),
58
- event_id=event.correlation_id,
59
- source=type(self)
60
- )
61
-
56
+ event_type=str(type(event).__name__),
57
+ event_id=event.correlation_id,
58
+ source=type(self)
59
+ )
60
+
62
61
  # Process the event through the agent
63
62
  received_events = agent.receive_event(event)
64
63
  logger.debug(f"Agent {agent} returned {len(events)} events")
mojentic/llm/__init__.py CHANGED
@@ -3,13 +3,13 @@ Mojentic LLM module for interacting with Large Language Models.
3
3
  """
4
4
 
5
5
  # Main LLM components
6
- from .llm_broker import LLMBroker
7
- from .chat_session import ChatSession
8
- from .message_composers import MessageBuilder, FileTypeSensor
9
- from .registry.llm_registry import LLMRegistry
6
+ from .llm_broker import LLMBroker # noqa: F401
7
+ from .chat_session import ChatSession # noqa: F401
8
+ from .message_composers import MessageBuilder, FileTypeSensor # noqa: F401
9
+ from .registry.llm_registry import LLMRegistry # noqa: F401
10
10
 
11
11
  # Re-export gateway components at the LLM level
12
- from .gateways.models import (
12
+ from .gateways.models import ( # noqa: F401
13
13
  LLMMessage,
14
14
  LLMGatewayResponse,
15
15
  MessageRole
@@ -3,23 +3,24 @@ Mojentic LLM gateways module for connecting to various LLM providers.
3
3
  """
4
4
 
5
5
  # Gateway implementations
6
- from .llm_gateway import LLMGateway
7
- from .ollama import OllamaGateway
8
- from .openai import OpenAIGateway
9
- from .anthropic import AnthropicGateway
10
- from .file_gateway import FileGateway
11
- from .embeddings_gateway import EmbeddingsGateway
12
- from .tokenizer_gateway import TokenizerGateway
13
-
14
- # Message adapters
15
- from .anthropic_messages_adapter import adapt_messages_to_anthropic
16
- from .ollama_messages_adapter import adapt_messages_to_ollama
17
- from .openai_messages_adapter import adapt_messages_to_openai
6
+ from mojentic.llm.gateways.llm_gateway import LLMGateway
7
+ from mojentic.llm.gateways.ollama import OllamaGateway
8
+ from mojentic.llm.gateways.openai import OpenAIGateway
9
+ from mojentic.llm.gateways.anthropic import AnthropicGateway
10
+ from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
11
+ from mojentic.llm.gateways.embeddings_gateway import EmbeddingsGateway
18
12
 
19
13
  # Common models
20
- from .models import (
21
- LLMMessage,
22
- MessageRole,
23
- LLMGatewayResponse,
24
- LLMToolCall
25
- )
14
+ from mojentic.llm.gateways.models import LLMMessage, LLMToolCall, LLMGatewayResponse
15
+
16
+ __all__ = [
17
+ "LLMGateway",
18
+ "OllamaGateway",
19
+ "OpenAIGateway",
20
+ "AnthropicGateway",
21
+ "TokenizerGateway",
22
+ "EmbeddingsGateway",
23
+ "LLMMessage",
24
+ "LLMToolCall",
25
+ "LLMGatewayResponse",
26
+ ]
@@ -9,6 +9,7 @@ from mojentic.llm.gateways.anthropic_messages_adapter import adapt_messages_to_a
9
9
 
10
10
  logger = structlog.get_logger()
11
11
 
12
+
12
13
  class AnthropicGateway(LLMGateway):
13
14
  def __init__(self, api_key: str):
14
15
  self.client = Anthropic(api_key=api_key)
@@ -1,5 +1,4 @@
1
1
  import base64
2
- import json
3
2
  import os
4
3
  from typing import List, Any
5
4
 
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Type, Any
1
+ from typing import List, Optional, Type
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
@@ -1,4 +1,4 @@
1
- from typing import List, Iterator
1
+ from typing import List, Iterator, Optional
2
2
  import structlog
3
3
  from ollama import Client, Options, ChatResponse
4
4
  from pydantic import BaseModel
@@ -9,9 +9,21 @@ from mojentic.llm.gateways.ollama_messages_adapter import adapt_messages_to_olla
9
9
 
10
10
  logger = structlog.get_logger()
11
11
 
12
+
12
13
  class StreamingResponse(BaseModel):
13
- """Simple wrapper for streaming response content"""
14
- content: str
14
+ """
15
+ Wrapper for streaming response chunks.
16
+
17
+ Attributes
18
+ ----------
19
+ content : Optional[str]
20
+ Text content chunk from the LLM response.
21
+ tool_calls : Optional[List]
22
+ Tool calls from the LLM response (raw ollama format).
23
+ """
24
+ content: Optional[str] = None
25
+ tool_calls: Optional[List] = None
26
+
15
27
 
16
28
  class OllamaGateway(LLMGateway):
17
29
  """
@@ -144,28 +156,21 @@ class OllamaGateway(LLMGateway):
144
156
  'stream': True
145
157
  }
146
158
 
147
- #
148
- # This is here 2025-02-21 to demonstrate a deficiency in Ollama tool calling
149
- # using the Stream option. We can't get chunk by chunk responses from the LLM
150
- # when using tools. This limits our ability to explore streaming capabilities
151
- # in the mojentic API, so I'm pausing this work for now until this is resolved.
152
- # https://github.com/ollama/ollama/issues/7886
153
- #
154
-
155
- # if 'tools' in args and args['tools'] is not None:
156
- # ollama_args['tools'] = [t.descriptor for t in args['tools']]
159
+ # Enable tool support if tools are provided
160
+ if 'tools' in args and args['tools'] is not None:
161
+ ollama_args['tools'] = [t.descriptor for t in args['tools']]
157
162
 
158
163
  stream = self.client.chat(**ollama_args)
159
164
 
160
165
  for chunk in stream:
161
166
  if chunk.message:
167
+ # Yield content chunks as they arrive
162
168
  if chunk.message.content:
163
169
  yield StreamingResponse(content=chunk.message.content)
164
- # if chunk.message.tool_calls:
165
- # for tool_call in chunk.message.tool_calls:
166
- # yield StreamingResponse(
167
- # content=f"\nTOOL CALL: {tool_call.function.name}({tool_call.function.arguments})\n"
168
- # )
170
+
171
+ # Yield tool calls when they arrive
172
+ if chunk.message.tool_calls:
173
+ yield StreamingResponse(tool_calls=chunk.message.tool_calls)
169
174
 
170
175
  def get_available_models(self) -> List[str]:
171
176
  """
@@ -1,19 +1,18 @@
1
1
  import json
2
2
  import os
3
3
  from itertools import islice
4
- from typing import Type, List, Iterable, Optional
4
+ from typing import List, Iterable, Optional, Iterator, Dict
5
5
 
6
6
  import numpy as np
7
7
  import structlog
8
8
  from openai import OpenAI, BadRequestError
9
- from pydantic import BaseModel
10
9
 
11
10
  from mojentic.llm.gateways.llm_gateway import LLMGateway
12
- from mojentic.llm.gateways.models import LLMToolCall, LLMGatewayResponse, LLMMessage
11
+ from mojentic.llm.gateways.models import LLMToolCall, LLMGatewayResponse
13
12
  from mojentic.llm.gateways.openai_messages_adapter import adapt_messages_to_openai
14
13
  from mojentic.llm.gateways.openai_model_registry import get_model_registry, ModelType
15
14
  from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
16
- from mojentic.llm.tools.llm_tool import LLMTool
15
+ from mojentic.llm.gateways.ollama import StreamingResponse
17
16
 
18
17
  logger = structlog.get_logger()
19
18
 
@@ -76,10 +75,10 @@ class OpenAIGateway(LLMGateway):
76
75
  capabilities = self.model_registry.get_model_capabilities(model)
77
76
 
78
77
  logger.debug("Adapting parameters for model",
79
- model=model,
80
- model_type=capabilities.model_type.value,
81
- supports_tools=capabilities.supports_tools,
82
- supports_streaming=capabilities.supports_streaming)
78
+ model=model,
79
+ model_type=capabilities.model_type.value,
80
+ supports_tools=capabilities.supports_tools,
81
+ supports_streaming=capabilities.supports_streaming)
83
82
 
84
83
  # Handle token limit parameter conversion
85
84
  if 'max_tokens' in adapted_args:
@@ -88,16 +87,16 @@ class OpenAIGateway(LLMGateway):
88
87
  # Convert max_tokens to max_completion_tokens for reasoning models
89
88
  adapted_args[token_param] = adapted_args.pop('max_tokens')
90
89
  logger.info("Converted token limit parameter for model",
91
- model=model,
92
- from_param='max_tokens',
93
- to_param=token_param,
94
- value=adapted_args[token_param])
90
+ model=model,
91
+ from_param='max_tokens',
92
+ to_param=token_param,
93
+ value=adapted_args[token_param])
95
94
 
96
95
  # Validate tool usage for models that don't support tools
97
96
  if 'tools' in adapted_args and adapted_args['tools'] and not capabilities.supports_tools:
98
97
  logger.warning("Model does not support tools, removing tool configuration",
99
- model=model,
100
- num_tools=len(adapted_args['tools']))
98
+ model=model,
99
+ num_tools=len(adapted_args['tools']))
101
100
  adapted_args['tools'] = None # Set to None instead of removing the key
102
101
 
103
102
  # Handle temperature restrictions for specific models
@@ -107,18 +106,19 @@ class OpenAIGateway(LLMGateway):
107
106
  # Check if model supports temperature parameter at all
108
107
  if capabilities.supported_temperatures == []:
109
108
  # Model doesn't support temperature parameter at all - remove it
110
- logger.warning("Model does not support temperature parameter, removing it",
111
- model=model,
112
- requested_temperature=temperature)
109
+ logger.warning("Model does not support temperature parameter at all",
110
+ model=model,
111
+ requested_temperature=temperature)
113
112
  adapted_args.pop('temperature', None)
114
113
  elif not capabilities.supports_temperature(temperature):
115
114
  # Model supports temperature but not this specific value - use default
116
115
  default_temp = 1.0
117
- logger.warning("Model does not support requested temperature, using default",
118
- model=model,
119
- requested_temperature=temperature,
120
- default_temperature=default_temp,
121
- supported_temperatures=capabilities.supported_temperatures)
116
+ logger.warning(
117
+ "Model does not support requested temperature, using default",
118
+ model=model,
119
+ requested_temperature=temperature,
120
+ default_temperature=default_temp,
121
+ supported_temperatures=capabilities.supported_temperatures)
122
122
  adapted_args['temperature'] = default_temp
123
123
 
124
124
  return adapted_args
@@ -138,13 +138,12 @@ class OpenAIGateway(LLMGateway):
138
138
 
139
139
  # Warning for tools on reasoning models that don't support them
140
140
  if (capabilities.model_type == ModelType.REASONING and
141
- not capabilities.supports_tools and
142
- 'tools' in args and args['tools']):
141
+ not capabilities.supports_tools and
142
+ 'tools' in args and args['tools']):
143
143
  logger.warning(
144
144
  "Reasoning model may not support tools",
145
145
  model=model,
146
- num_tools=len(args['tools'])
147
- )
146
+ num_tools=len(args['tools']))
148
147
 
149
148
  # Validate token limits (check both possible parameter names)
150
149
  token_value = args.get('max_tokens') or args.get('max_completion_tokens')
@@ -154,8 +153,7 @@ class OpenAIGateway(LLMGateway):
154
153
  "Requested token limit exceeds model maximum",
155
154
  model=model,
156
155
  requested=token_value,
157
- max_allowed=capabilities.max_output_tokens
158
- )
156
+ max_allowed=capabilities.max_output_tokens)
159
157
 
160
158
  def complete(self, **kwargs) -> LLMGatewayResponse:
161
159
  """
@@ -218,8 +216,8 @@ class OpenAIGateway(LLMGateway):
218
216
  adapted_args = self._adapt_parameters_for_model(model, args)
219
217
  except Exception as e:
220
218
  logger.error("Failed to adapt parameters for model",
221
- model=model,
222
- error=str(e))
219
+ model=model,
220
+ error=str(e))
223
221
  raise
224
222
 
225
223
  # Validate parameters after adaptation
@@ -250,25 +248,26 @@ class OpenAIGateway(LLMGateway):
250
248
  openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
251
249
 
252
250
  logger.debug("Making OpenAI API call",
253
- model=openai_args['model'],
254
- has_tools='tools' in openai_args,
255
- has_object_model='response_format' in openai_args,
256
- token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
251
+ model=openai_args['model'],
252
+ has_tools='tools' in openai_args,
253
+ has_object_model='response_format' in openai_args,
254
+ token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
257
255
 
258
256
  try:
259
257
  response = completion(**openai_args)
260
258
  except BadRequestError as e:
261
259
  # Enhanced error handling for parameter issues
262
260
  if "max_tokens" in str(e) and "max_completion_tokens" in str(e):
263
- logger.error("Parameter error detected - model may require different token parameter",
264
- model=model,
265
- error=str(e),
266
- suggestion="This model may be a reasoning model requiring max_completion_tokens")
261
+ logger.error(
262
+ "Parameter error detected - model may require different token parameter",
263
+ model=model,
264
+ error=str(e),
265
+ suggestion="This model may be a reasoning model requiring max_completion_tokens")
267
266
  raise e
268
267
  except Exception as e:
269
268
  logger.error("OpenAI API call failed",
270
- model=model,
271
- error=str(e))
269
+ model=model,
270
+ error=str(e))
272
271
  raise e
273
272
 
274
273
  object = None
@@ -280,11 +279,16 @@ class OpenAIGateway(LLMGateway):
280
279
  if response_content is not None:
281
280
  object = adapted_args['object_model'].model_validate_json(response_content)
282
281
  else:
283
- logger.error("No response content available for object validation", object_model=adapted_args['object_model'])
282
+ logger.error(
283
+ "No response content available for object validation",
284
+ object_model=adapted_args['object_model'])
284
285
  except Exception as e:
285
- response_content = response.choices[0].message.content if response.choices else "No response content"
286
- logger.error("Failed to validate model", error=str(e), response=response_content,
287
- object_model=adapted_args['object_model'])
286
+ response_content = (response.choices[0].message.content
287
+ if response.choices else "No response content")
288
+ logger.error("Failed to validate model",
289
+ error=str(e),
290
+ response=response_content,
291
+ object_model=adapted_args['object_model'])
288
292
 
289
293
  if response.choices[0].message.tool_calls is not None:
290
294
  for t in response.choices[0].message.tool_calls:
@@ -301,6 +305,201 @@ class OpenAIGateway(LLMGateway):
301
305
  tool_calls=tool_calls,
302
306
  )
303
307
 
308
+ def complete_stream(self, **kwargs) -> Iterator[StreamingResponse]:
309
+ """
310
+ Stream the LLM response from OpenAI service.
311
+
312
+ OpenAI streams tool call arguments incrementally, so we need to accumulate them
313
+ and yield complete tool calls only when the stream finishes.
314
+
315
+ Keyword Arguments
316
+ ----------------
317
+ model : str
318
+ The name of the model to use.
319
+ messages : List[LLMMessage]
320
+ A list of messages to send to the LLM.
321
+ tools : Optional[List[LLMTool]]
322
+ A list of tools to use with the LLM. Tool calls will be accumulated and yielded when complete.
323
+ temperature : float, optional
324
+ The temperature to use for the response. Defaults to 1.0.
325
+ num_ctx : int, optional
326
+ The number of context tokens to use. Defaults to 32768.
327
+ max_tokens : int, optional
328
+ The maximum number of tokens to generate. Defaults to 16384.
329
+ num_predict : int, optional
330
+ The number of tokens to predict. Defaults to no limit.
331
+
332
+ Returns
333
+ -------
334
+ Iterator[StreamingResponse]
335
+ An iterator of StreamingResponse objects containing response chunks.
336
+ """
337
+ # Extract parameters from kwargs with defaults
338
+ model = kwargs.get('model')
339
+ messages = kwargs.get('messages')
340
+ object_model = kwargs.get('object_model', None)
341
+ tools = kwargs.get('tools', None)
342
+ temperature = kwargs.get('temperature', 1.0)
343
+ num_ctx = kwargs.get('num_ctx', 32768)
344
+ max_tokens = kwargs.get('max_tokens', 16384)
345
+ num_predict = kwargs.get('num_predict', -1)
346
+
347
+ if not model:
348
+ raise ValueError("'model' parameter is required")
349
+ if not messages:
350
+ raise ValueError("'messages' parameter is required")
351
+
352
+ # Convert parameters to dict for processing
353
+ args = {
354
+ 'model': model,
355
+ 'messages': messages,
356
+ 'object_model': object_model,
357
+ 'tools': tools,
358
+ 'temperature': temperature,
359
+ 'num_ctx': num_ctx,
360
+ 'max_tokens': max_tokens,
361
+ 'num_predict': num_predict
362
+ }
363
+
364
+ # Adapt parameters based on model type
365
+ try:
366
+ adapted_args = self._adapt_parameters_for_model(model, args)
367
+ except Exception as e:
368
+ logger.error("Failed to adapt parameters for model",
369
+ model=model,
370
+ error=str(e))
371
+ raise
372
+
373
+ # Validate parameters after adaptation
374
+ self._validate_model_parameters(model, adapted_args)
375
+
376
+ # Check if model supports streaming
377
+ capabilities = self.model_registry.get_model_capabilities(model)
378
+ if not capabilities.supports_streaming:
379
+ raise NotImplementedError(f"Model {model} does not support streaming")
380
+
381
+ # Structured output doesn't work with streaming
382
+ if adapted_args['object_model'] is not None:
383
+ raise NotImplementedError("Streaming with structured output (object_model) is not supported")
384
+
385
+ openai_args = {
386
+ 'model': adapted_args['model'],
387
+ 'messages': adapt_messages_to_openai(adapted_args['messages']),
388
+ 'stream': True,
389
+ }
390
+
391
+ # Add temperature if specified
392
+ if 'temperature' in adapted_args:
393
+ openai_args['temperature'] = adapted_args['temperature']
394
+
395
+ if adapted_args.get('tools') is not None:
396
+ openai_args['tools'] = [t.descriptor for t in adapted_args['tools']]
397
+
398
+ # Handle both max_tokens (for chat models) and max_completion_tokens (for reasoning models)
399
+ if 'max_tokens' in adapted_args:
400
+ openai_args['max_tokens'] = adapted_args['max_tokens']
401
+ elif 'max_completion_tokens' in adapted_args:
402
+ openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
403
+
404
+ logger.debug("Making OpenAI streaming API call",
405
+ model=openai_args['model'],
406
+ has_tools='tools' in openai_args,
407
+ token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
408
+
409
+ try:
410
+ stream = self.client.chat.completions.create(**openai_args)
411
+ except BadRequestError as e:
412
+ if "max_tokens" in str(e) and "max_completion_tokens" in str(e):
413
+ logger.error(
414
+ "Parameter error detected - model may require different token parameter",
415
+ model=model,
416
+ error=str(e),
417
+ suggestion="This model may be a reasoning model requiring max_completion_tokens")
418
+ raise e
419
+ except Exception as e:
420
+ logger.error("OpenAI streaming API call failed",
421
+ model=model,
422
+ error=str(e))
423
+ raise e
424
+
425
+ # Accumulate tool calls as they stream in
426
+ # OpenAI streams tool arguments incrementally, indexed by tool call index
427
+ tool_calls_accumulator: Dict[int, Dict] = {}
428
+
429
+ for chunk in stream:
430
+ if not chunk.choices:
431
+ continue
432
+
433
+ delta = chunk.choices[0].delta
434
+ finish_reason = chunk.choices[0].finish_reason
435
+
436
+ # Yield content chunks as they arrive
437
+ if delta.content:
438
+ yield StreamingResponse(content=delta.content)
439
+
440
+ # Accumulate tool call chunks
441
+ if delta.tool_calls:
442
+ for tool_call_delta in delta.tool_calls:
443
+ index = tool_call_delta.index
444
+
445
+ # Initialize accumulator for this tool call if needed
446
+ if index not in tool_calls_accumulator:
447
+ tool_calls_accumulator[index] = {
448
+ 'id': None,
449
+ 'name': None,
450
+ 'arguments': ''
451
+ }
452
+
453
+ # First chunk has id and name
454
+ if tool_call_delta.id:
455
+ tool_calls_accumulator[index]['id'] = tool_call_delta.id
456
+
457
+ if tool_call_delta.function.name:
458
+ tool_calls_accumulator[index]['name'] = tool_call_delta.function.name
459
+
460
+ # All chunks may have argument fragments
461
+ if tool_call_delta.function.arguments:
462
+ tool_calls_accumulator[index]['arguments'] += tool_call_delta.function.arguments
463
+
464
+ # When stream is complete, yield accumulated tool calls
465
+ if finish_reason == 'tool_calls' and tool_calls_accumulator:
466
+ # Parse and yield complete tool calls
467
+ complete_tool_calls = []
468
+ for index in sorted(tool_calls_accumulator.keys()):
469
+ tc = tool_calls_accumulator[index]
470
+ try:
471
+ # Parse the accumulated JSON arguments
472
+ args_dict = json.loads(tc['arguments'])
473
+ # Convert to string values as per LLMToolCall format
474
+ arguments = {str(k): str(v) for k, v in args_dict.items()}
475
+
476
+ tool_call = LLMToolCall(
477
+ id=tc['id'],
478
+ name=tc['name'],
479
+ arguments=arguments
480
+ )
481
+ complete_tool_calls.append(tool_call)
482
+ except json.JSONDecodeError as e:
483
+ logger.error("Failed to parse tool call arguments",
484
+ tool_name=tc['name'],
485
+ arguments=tc['arguments'],
486
+ error=str(e))
487
+
488
+ if complete_tool_calls:
489
+ # Convert to the format expected by ollama's tool calls for compatibility
490
+ # We need to create mock objects that match ollama's structure
491
+ from types import SimpleNamespace
492
+ ollama_format_calls = []
493
+ for tc in complete_tool_calls:
494
+ ollama_format_calls.append(SimpleNamespace(
495
+ id=tc.id, # Include ID for proper OpenAI message formatting
496
+ function=SimpleNamespace(
497
+ name=tc.name,
498
+ arguments=tc.arguments
499
+ )
500
+ ))
501
+ yield StreamingResponse(tool_calls=ollama_format_calls)
502
+
304
503
  def get_available_models(self) -> list[str]:
305
504
  """
306
505
  Get the list of available OpenAI models, sorted alphabetically.
@@ -93,11 +93,11 @@ class DescribeOpenAIMessagesAdapter:
93
93
  Then it should convert to the correct format with structured content array
94
94
  """
95
95
  # Patch our own methods that encapsulate external library calls
96
- mocker.patch('mojentic.llm.gateways.openai_messages_adapter.read_file_as_binary',
96
+ mocker.patch('mojentic.llm.gateways.openai_messages_adapter.read_file_as_binary',
97
97
  return_value=b'fake_image_data')
98
- mocker.patch('mojentic.llm.gateways.openai_messages_adapter.encode_base64',
98
+ mocker.patch('mojentic.llm.gateways.openai_messages_adapter.encode_base64',
99
99
  return_value='ZmFrZV9pbWFnZV9kYXRhX2VuY29kZWQ=')
100
- mocker.patch('mojentic.llm.gateways.openai_messages_adapter.get_image_type',
100
+ mocker.patch('mojentic.llm.gateways.openai_messages_adapter.get_image_type',
101
101
  side_effect=lambda path: 'jpg' if path.endswith('.jpg') else 'png')
102
102
 
103
103
  image_paths = ["/path/to/image1.jpg", "/path/to/image2.png"]
@@ -6,13 +6,13 @@ their specific parameter requirements and capabilities.
6
6
  """
7
7
 
8
8
  from enum import Enum
9
- from typing import Dict, Set, Optional, List, TYPE_CHECKING
9
+ from typing import Dict, Optional, List, TYPE_CHECKING
10
10
  from dataclasses import dataclass
11
11
 
12
12
  import structlog
13
13
 
14
14
  if TYPE_CHECKING:
15
- from mojentic.llm.gateways.openai import OpenAIGateway
15
+ pass
16
16
 
17
17
  logger = structlog.get_logger()
18
18
 
@@ -20,9 +20,9 @@ logger = structlog.get_logger()
20
20
  class ModelType(Enum):
21
21
  """Classification of OpenAI model types based on their capabilities and parameters."""
22
22
  REASONING = "reasoning" # Models like o1, o3 that use max_completion_tokens
23
- CHAT = "chat" # Standard chat models that use max_tokens
24
- EMBEDDING = "embedding" # Text embedding models
25
- MODERATION = "moderation" # Content moderation models
23
+ CHAT = "chat" # Standard chat models that use max_tokens
24
+ EMBEDDING = "embedding" # Text embedding models
25
+ MODERATION = "moderation" # Content moderation models
26
26
 
27
27
 
28
28
  @dataclass
@@ -346,6 +346,7 @@ class OpenAIModelRegistry:
346
346
  # Global registry instance
347
347
  _registry = OpenAIModelRegistry()
348
348
 
349
+
349
350
  def get_model_registry() -> OpenAIModelRegistry:
350
351
  """Get the global OpenAI model registry instance."""
351
- return _registry
352
+ return _registry