mojentic 0.8.3__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import json
2
+ import os
2
3
  from itertools import islice
3
- from typing import Type, List, Iterable, Optional
4
+ from typing import Type, List, Iterable, Optional, Iterator, Dict
4
5
 
5
6
  import numpy as np
6
7
  import structlog
@@ -13,6 +14,7 @@ from mojentic.llm.gateways.openai_messages_adapter import adapt_messages_to_open
13
14
  from mojentic.llm.gateways.openai_model_registry import get_model_registry, ModelType
14
15
  from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
15
16
  from mojentic.llm.tools.llm_tool import LLMTool
17
+ from mojentic.llm.gateways.ollama import StreamingResponse
16
18
 
17
19
  logger = structlog.get_logger()
18
20
 
@@ -23,11 +25,19 @@ class OpenAIGateway(LLMGateway):
23
25
 
24
26
  Parameters
25
27
  ----------
26
- api_key : str
27
- The OpenAI API key to use.
28
+ api_key : str, optional
29
+ The OpenAI API key to use. If not provided, defaults to the value of the
30
+ OPENAI_API_KEY environment variable.
31
+ base_url : str, optional
32
+ The base URL for the OpenAI API. If not provided, defaults to the value of the
33
+ OPENAI_API_ENDPOINT environment variable, or None if not set.
28
34
  """
29
35
 
30
- def __init__(self, api_key: str, base_url: Optional[str] = None):
36
+ def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
37
+ if api_key is None:
38
+ api_key = os.getenv("OPENAI_API_KEY")
39
+ if base_url is None:
40
+ base_url = os.getenv("OPENAI_API_ENDPOINT")
31
41
  self.client = OpenAI(api_key=api_key, base_url=base_url)
32
42
  self.model_registry = get_model_registry()
33
43
 
@@ -292,6 +302,200 @@ class OpenAIGateway(LLMGateway):
292
302
  tool_calls=tool_calls,
293
303
  )
294
304
 
305
+ def complete_stream(self, **kwargs) -> Iterator[StreamingResponse]:
306
+ """
307
+ Stream the LLM response from OpenAI service.
308
+
309
+ OpenAI streams tool call arguments incrementally, so we need to accumulate them
310
+ and yield complete tool calls only when the stream finishes.
311
+
312
+ Keyword Arguments
313
+ ----------------
314
+ model : str
315
+ The name of the model to use.
316
+ messages : List[LLMMessage]
317
+ A list of messages to send to the LLM.
318
+ tools : Optional[List[LLMTool]]
319
+ A list of tools to use with the LLM. Tool calls will be accumulated and yielded when complete.
320
+ temperature : float, optional
321
+ The temperature to use for the response. Defaults to 1.0.
322
+ num_ctx : int, optional
323
+ The number of context tokens to use. Defaults to 32768.
324
+ max_tokens : int, optional
325
+ The maximum number of tokens to generate. Defaults to 16384.
326
+ num_predict : int, optional
327
+ The number of tokens to predict. Defaults to no limit.
328
+
329
+ Returns
330
+ -------
331
+ Iterator[StreamingResponse]
332
+ An iterator of StreamingResponse objects containing response chunks.
333
+ """
334
+ # Extract parameters from kwargs with defaults
335
+ model = kwargs.get('model')
336
+ messages = kwargs.get('messages')
337
+ object_model = kwargs.get('object_model', None)
338
+ tools = kwargs.get('tools', None)
339
+ temperature = kwargs.get('temperature', 1.0)
340
+ num_ctx = kwargs.get('num_ctx', 32768)
341
+ max_tokens = kwargs.get('max_tokens', 16384)
342
+ num_predict = kwargs.get('num_predict', -1)
343
+
344
+ if not model:
345
+ raise ValueError("'model' parameter is required")
346
+ if not messages:
347
+ raise ValueError("'messages' parameter is required")
348
+
349
+ # Convert parameters to dict for processing
350
+ args = {
351
+ 'model': model,
352
+ 'messages': messages,
353
+ 'object_model': object_model,
354
+ 'tools': tools,
355
+ 'temperature': temperature,
356
+ 'num_ctx': num_ctx,
357
+ 'max_tokens': max_tokens,
358
+ 'num_predict': num_predict
359
+ }
360
+
361
+ # Adapt parameters based on model type
362
+ try:
363
+ adapted_args = self._adapt_parameters_for_model(model, args)
364
+ except Exception as e:
365
+ logger.error("Failed to adapt parameters for model",
366
+ model=model,
367
+ error=str(e))
368
+ raise
369
+
370
+ # Validate parameters after adaptation
371
+ self._validate_model_parameters(model, adapted_args)
372
+
373
+ # Check if model supports streaming
374
+ capabilities = self.model_registry.get_model_capabilities(model)
375
+ if not capabilities.supports_streaming:
376
+ raise NotImplementedError(f"Model {model} does not support streaming")
377
+
378
+ # Structured output doesn't work with streaming
379
+ if adapted_args['object_model'] is not None:
380
+ raise NotImplementedError("Streaming with structured output (object_model) is not supported")
381
+
382
+ openai_args = {
383
+ 'model': adapted_args['model'],
384
+ 'messages': adapt_messages_to_openai(adapted_args['messages']),
385
+ 'stream': True,
386
+ }
387
+
388
+ # Add temperature if specified
389
+ if 'temperature' in adapted_args:
390
+ openai_args['temperature'] = adapted_args['temperature']
391
+
392
+ if adapted_args.get('tools') is not None:
393
+ openai_args['tools'] = [t.descriptor for t in adapted_args['tools']]
394
+
395
+ # Handle both max_tokens (for chat models) and max_completion_tokens (for reasoning models)
396
+ if 'max_tokens' in adapted_args:
397
+ openai_args['max_tokens'] = adapted_args['max_tokens']
398
+ elif 'max_completion_tokens' in adapted_args:
399
+ openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
400
+
401
+ logger.debug("Making OpenAI streaming API call",
402
+ model=openai_args['model'],
403
+ has_tools='tools' in openai_args,
404
+ token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
405
+
406
+ try:
407
+ stream = self.client.chat.completions.create(**openai_args)
408
+ except BadRequestError as e:
409
+ if "max_tokens" in str(e) and "max_completion_tokens" in str(e):
410
+ logger.error("Parameter error detected - model may require different token parameter",
411
+ model=model,
412
+ error=str(e),
413
+ suggestion="This model may be a reasoning model requiring max_completion_tokens")
414
+ raise e
415
+ except Exception as e:
416
+ logger.error("OpenAI streaming API call failed",
417
+ model=model,
418
+ error=str(e))
419
+ raise e
420
+
421
+ # Accumulate tool calls as they stream in
422
+ # OpenAI streams tool arguments incrementally, indexed by tool call index
423
+ tool_calls_accumulator: Dict[int, Dict] = {}
424
+
425
+ for chunk in stream:
426
+ if not chunk.choices:
427
+ continue
428
+
429
+ delta = chunk.choices[0].delta
430
+ finish_reason = chunk.choices[0].finish_reason
431
+
432
+ # Yield content chunks as they arrive
433
+ if delta.content:
434
+ yield StreamingResponse(content=delta.content)
435
+
436
+ # Accumulate tool call chunks
437
+ if delta.tool_calls:
438
+ for tool_call_delta in delta.tool_calls:
439
+ index = tool_call_delta.index
440
+
441
+ # Initialize accumulator for this tool call if needed
442
+ if index not in tool_calls_accumulator:
443
+ tool_calls_accumulator[index] = {
444
+ 'id': None,
445
+ 'name': None,
446
+ 'arguments': ''
447
+ }
448
+
449
+ # First chunk has id and name
450
+ if tool_call_delta.id:
451
+ tool_calls_accumulator[index]['id'] = tool_call_delta.id
452
+
453
+ if tool_call_delta.function.name:
454
+ tool_calls_accumulator[index]['name'] = tool_call_delta.function.name
455
+
456
+ # All chunks may have argument fragments
457
+ if tool_call_delta.function.arguments:
458
+ tool_calls_accumulator[index]['arguments'] += tool_call_delta.function.arguments
459
+
460
+ # When stream is complete, yield accumulated tool calls
461
+ if finish_reason == 'tool_calls' and tool_calls_accumulator:
462
+ # Parse and yield complete tool calls
463
+ complete_tool_calls = []
464
+ for index in sorted(tool_calls_accumulator.keys()):
465
+ tc = tool_calls_accumulator[index]
466
+ try:
467
+ # Parse the accumulated JSON arguments
468
+ args_dict = json.loads(tc['arguments'])
469
+ # Convert to string values as per LLMToolCall format
470
+ arguments = {str(k): str(v) for k, v in args_dict.items()}
471
+
472
+ tool_call = LLMToolCall(
473
+ id=tc['id'],
474
+ name=tc['name'],
475
+ arguments=arguments
476
+ )
477
+ complete_tool_calls.append(tool_call)
478
+ except json.JSONDecodeError as e:
479
+ logger.error("Failed to parse tool call arguments",
480
+ tool_name=tc['name'],
481
+ arguments=tc['arguments'],
482
+ error=str(e))
483
+
484
+ if complete_tool_calls:
485
+ # Convert to the format expected by ollama's tool calls for compatibility
486
+ # We need to create mock objects that match ollama's structure
487
+ from types import SimpleNamespace
488
+ ollama_format_calls = []
489
+ for tc in complete_tool_calls:
490
+ ollama_format_calls.append(SimpleNamespace(
491
+ id=tc.id, # Include ID for proper OpenAI message formatting
492
+ function=SimpleNamespace(
493
+ name=tc.name,
494
+ arguments=tc.arguments
495
+ )
496
+ ))
497
+ yield StreamingResponse(tool_calls=ollama_format_calls)
498
+
295
499
  def get_available_models(self) -> list[str]:
296
500
  """
297
501
  Get the list of available OpenAI models, sorted alphabetically.
@@ -0,0 +1,99 @@
1
+ import os
2
+ from unittest.mock import patch
3
+
4
+ from mojentic.llm.gateways.openai import OpenAIGateway
5
+
6
+
7
+ class DescribeOpenAIGateway:
8
+ """
9
+ Unit tests for the OpenAI gateway
10
+ """
11
+
12
+ class DescribeInitialization:
13
+ """
14
+ Tests for OpenAI gateway initialization
15
+ """
16
+
17
+ def should_initialize_with_api_key(self, mocker):
18
+ api_key = "test-api-key"
19
+ mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
20
+
21
+ gateway = OpenAIGateway(api_key=api_key)
22
+
23
+ mock_openai.assert_called_once_with(api_key=api_key, base_url=None)
24
+ assert gateway.client is not None
25
+
26
+ def should_initialize_with_api_key_and_base_url(self, mocker):
27
+ api_key = "test-api-key"
28
+ base_url = "https://custom.openai.com"
29
+ mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
30
+
31
+ gateway = OpenAIGateway(api_key=api_key, base_url=base_url)
32
+
33
+ mock_openai.assert_called_once_with(api_key=api_key, base_url=base_url)
34
+ assert gateway.client is not None
35
+
36
+ def should_read_api_key_from_environment_variable(self, mocker):
37
+ api_key = "test-api-key-from-env"
38
+ mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
39
+
40
+ with patch.dict(os.environ, {'OPENAI_API_KEY': api_key}):
41
+ gateway = OpenAIGateway()
42
+
43
+ mock_openai.assert_called_once_with(api_key=api_key, base_url=None)
44
+ assert gateway.client is not None
45
+
46
+ def should_read_base_url_from_environment_variable(self, mocker):
47
+ api_key = "test-api-key"
48
+ endpoint = "https://corporate.openai.com"
49
+ mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
50
+
51
+ with patch.dict(os.environ, {'OPENAI_API_ENDPOINT': endpoint}):
52
+ gateway = OpenAIGateway(api_key=api_key)
53
+
54
+ mock_openai.assert_called_once_with(api_key=api_key, base_url=endpoint)
55
+ assert gateway.client is not None
56
+
57
+ def should_read_both_from_environment_variables(self, mocker):
58
+ api_key = "test-api-key-from-env"
59
+ endpoint = "https://corporate.openai.com"
60
+ mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
61
+
62
+ with patch.dict(os.environ, {'OPENAI_API_KEY': api_key, 'OPENAI_API_ENDPOINT': endpoint}):
63
+ gateway = OpenAIGateway()
64
+
65
+ mock_openai.assert_called_once_with(api_key=api_key, base_url=endpoint)
66
+ assert gateway.client is not None
67
+
68
+ def should_prefer_explicit_api_key_over_environment_variable(self, mocker):
69
+ api_key_env = "test-api-key-from-env"
70
+ api_key_explicit = "test-api-key-explicit"
71
+ mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
72
+
73
+ with patch.dict(os.environ, {'OPENAI_API_KEY': api_key_env}):
74
+ gateway = OpenAIGateway(api_key=api_key_explicit)
75
+
76
+ mock_openai.assert_called_once_with(api_key=api_key_explicit, base_url=None)
77
+ assert gateway.client is not None
78
+
79
+ def should_prefer_explicit_base_url_over_environment_variable(self, mocker):
80
+ api_key = "test-api-key"
81
+ endpoint_env = "https://corporate.openai.com"
82
+ endpoint_explicit = "https://explicit.openai.com"
83
+ mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
84
+
85
+ with patch.dict(os.environ, {'OPENAI_API_ENDPOINT': endpoint_env}):
86
+ gateway = OpenAIGateway(api_key=api_key, base_url=endpoint_explicit)
87
+
88
+ mock_openai.assert_called_once_with(api_key=api_key, base_url=endpoint_explicit)
89
+ assert gateway.client is not None
90
+
91
+ def should_use_none_when_no_endpoint_specified(self, mocker):
92
+ api_key = "test-api-key"
93
+ mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
94
+
95
+ with patch.dict(os.environ, {}, clear=True):
96
+ gateway = OpenAIGateway(api_key=api_key)
97
+
98
+ mock_openai.assert_called_once_with(api_key=api_key, base_url=None)
99
+ assert gateway.client is not None
@@ -1,13 +1,13 @@
1
1
  import json
2
2
  import time
3
- from typing import List, Optional, Type
3
+ from typing import List, Optional, Type, Iterator
4
4
 
5
5
  import structlog
6
6
  from pydantic import BaseModel
7
7
 
8
8
  from mojentic.llm.gateways.llm_gateway import LLMGateway
9
- from mojentic.llm.gateways.models import MessageRole, LLMMessage, LLMGatewayResponse
10
- from mojentic.llm.gateways.ollama import OllamaGateway
9
+ from mojentic.llm.gateways.models import MessageRole, LLMMessage, LLMGatewayResponse, LLMToolCall
10
+ from mojentic.llm.gateways.ollama import OllamaGateway, StreamingResponse
11
11
  from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
12
12
  from mojentic.tracer.tracer_system import TracerSystem
13
13
 
@@ -182,6 +182,164 @@ class LLMBroker():
182
182
 
183
183
  return result.content
184
184
 
185
+ def generate_stream(self, messages: List[LLMMessage], tools=None, temperature=1.0, num_ctx=32768,
186
+ num_predict=-1, max_tokens=16384,
187
+ correlation_id: str = None) -> Iterator[str]:
188
+ """
189
+ Generate a streaming text response from the LLM.
190
+
191
+ This method mirrors generate() but yields content chunks as they arrive from the LLM,
192
+ providing a better user experience for long-running requests. When tool calls are
193
+ detected, tools are executed and the LLM is called recursively, with the new response
194
+ also being streamed.
195
+
196
+ Parameters
197
+ ----------
198
+ messages : List[LLMMessage]
199
+ A list of messages to send to the LLM.
200
+ tools : List[Tool]
201
+ A list of tools to use with the LLM. If a tool call is requested, the tool will be
202
+ called and the output will be included in the response.
203
+ temperature : float
204
+ The temperature to use for the response. Defaults to 1.0
205
+ num_ctx : int
206
+ The number of context tokens to use. Defaults to 32768.
207
+ num_predict : int
208
+ The number of tokens to predict. Defaults to no limit.
209
+ max_tokens : int
210
+ The maximum number of tokens to generate. Defaults to 16384.
211
+ correlation_id : str
212
+ UUID string that is copied from cause-to-affect for tracing events.
213
+
214
+ Yields
215
+ ------
216
+ str
217
+ Content chunks as they arrive from the LLM.
218
+ """
219
+ # Check if gateway supports streaming
220
+ if not hasattr(self.adapter, 'complete_stream'):
221
+ raise NotImplementedError(f"Gateway {type(self.adapter).__name__} does not support streaming")
222
+
223
+ approximate_tokens = len(self.tokenizer.encode(self._content_to_count(messages)))
224
+ logger.info(f"Requesting streaming llm response with approx {approximate_tokens} tokens")
225
+
226
+ # Convert messages to serializable dict for audit
227
+ messages_for_tracer = [m.model_dump() for m in messages]
228
+
229
+ # Record LLM call in tracer
230
+ tools_for_tracer = [{"name": t.name, "description": t.description} for t in
231
+ tools] if tools else None
232
+ self.tracer.record_llm_call(
233
+ self.model,
234
+ messages_for_tracer,
235
+ temperature,
236
+ tools=tools_for_tracer,
237
+ source=type(self),
238
+ correlation_id=correlation_id
239
+ )
240
+
241
+ # Measure call duration for audit
242
+ start_time = time.time()
243
+
244
+ # Accumulate content and tool calls from stream
245
+ accumulated_content = ""
246
+ accumulated_tool_calls = []
247
+
248
+ stream = self.adapter.complete_stream(
249
+ model=self.model,
250
+ messages=messages,
251
+ tools=tools,
252
+ temperature=temperature,
253
+ num_ctx=num_ctx,
254
+ num_predict=num_predict,
255
+ max_tokens=max_tokens
256
+ )
257
+
258
+ for chunk in stream:
259
+ # Handle content chunks
260
+ if hasattr(chunk, 'content') and chunk.content:
261
+ accumulated_content += chunk.content
262
+ yield chunk.content
263
+
264
+ # Handle tool calls if present
265
+ if hasattr(chunk, 'tool_calls') and chunk.tool_calls:
266
+ accumulated_tool_calls.extend(chunk.tool_calls)
267
+
268
+ call_duration_ms = (time.time() - start_time) * 1000
269
+
270
+ # Record LLM response in tracer
271
+ tool_calls_for_tracer = [tc.model_dump() if hasattr(tc, 'model_dump') else tc for tc in
272
+ accumulated_tool_calls] if accumulated_tool_calls else None
273
+ self.tracer.record_llm_response(
274
+ self.model,
275
+ accumulated_content,
276
+ tool_calls=tool_calls_for_tracer,
277
+ call_duration_ms=call_duration_ms,
278
+ source=type(self),
279
+ correlation_id=correlation_id
280
+ )
281
+
282
+ # Process tool calls if any were accumulated
283
+ if accumulated_tool_calls and tools is not None:
284
+ logger.info("Tool call requested in streaming response")
285
+ for tool_call in accumulated_tool_calls:
286
+ # Handle both LLMToolCall objects and raw tool call data
287
+ if hasattr(tool_call, 'name'):
288
+ tool_name = tool_call.name
289
+ tool_arguments = tool_call.arguments
290
+ else:
291
+ # Handle ollama's tool call format
292
+ tool_name = tool_call.function.name
293
+ tool_arguments = tool_call.function.arguments
294
+
295
+ if tool := next((t for t in tools if t.matches(tool_name)), None):
296
+ logger.info('Calling function', function=tool_name)
297
+ logger.info('Arguments:', arguments=tool_arguments)
298
+
299
+ # Measure tool execution time
300
+ tool_start_time = time.time()
301
+
302
+ # Call the tool
303
+ output = tool.run(**tool_arguments)
304
+
305
+ tool_duration_ms = (time.time() - tool_start_time) * 1000
306
+
307
+ # Record tool call in tracer
308
+ self.tracer.record_tool_call(
309
+ tool_name,
310
+ tool_arguments,
311
+ output,
312
+ caller="LLMBroker.generate_stream",
313
+ call_duration_ms=tool_duration_ms,
314
+ source=type(self),
315
+ correlation_id=correlation_id
316
+ )
317
+
318
+ logger.info('Function output', output=output)
319
+
320
+ # Convert to LLMToolCall if needed, preserving the ID if it exists
321
+ if not isinstance(tool_call, LLMToolCall):
322
+ # Extract ID if available from the tool_call object
323
+ tool_call_id = None
324
+ if hasattr(tool_call, 'id'):
325
+ tool_call_id = tool_call.id
326
+ elif hasattr(tool_call, 'function') and hasattr(tool_call.function, 'id'):
327
+ tool_call_id = tool_call.function.id
328
+
329
+ tool_call = LLMToolCall(id=tool_call_id, name=tool_name, arguments=tool_arguments)
330
+
331
+ messages.append(LLMMessage(role=MessageRole.Assistant, tool_calls=[tool_call]))
332
+ messages.append(
333
+ LLMMessage(role=MessageRole.Tool, content=json.dumps(output),
334
+ tool_calls=[tool_call]))
335
+
336
+ # Recursively stream the response after tool execution
337
+ yield from self.generate_stream(messages, tools, temperature, num_ctx, num_predict,
338
+ max_tokens, correlation_id=correlation_id)
339
+ return # Exit after recursive call
340
+ else:
341
+ logger.warn('Function not found', function=tool_name)
342
+
185
343
  def _content_to_count(self, messages: List[LLMMessage]):
186
344
  content = ""
187
345
  for message in messages:
@@ -135,3 +135,72 @@ class DescribeLLMBroker:
135
135
  assert result.items[1].number == 2
136
136
  assert result.metadata == {"key1": "value1", "key2": "value2"}
137
137
  mock_gateway.complete.assert_called_once()
138
+
139
+ class DescribeStreamingGeneration:
140
+
141
+ def should_stream_simple_response(self, llm_broker, mock_gateway, mocker):
142
+ from mojentic.llm.gateways.ollama import StreamingResponse
143
+
144
+ messages = [LLMMessage(role=MessageRole.User, content="Tell me a story")]
145
+
146
+ # Mock the complete_stream method to yield chunks
147
+ mock_gateway.complete_stream = mocker.MagicMock()
148
+ mock_gateway.complete_stream.return_value = iter([
149
+ StreamingResponse(content="Once "),
150
+ StreamingResponse(content="upon "),
151
+ StreamingResponse(content="a "),
152
+ StreamingResponse(content="time...")
153
+ ])
154
+
155
+ result_chunks = list(llm_broker.generate_stream(messages))
156
+
157
+ assert result_chunks == ["Once ", "upon ", "a ", "time..."]
158
+ mock_gateway.complete_stream.assert_called_once()
159
+
160
+ def should_handle_tool_calls_during_streaming(self, llm_broker, mock_gateway, mocker):
161
+ from mojentic.llm.gateways.ollama import StreamingResponse
162
+
163
+ messages = [LLMMessage(role=MessageRole.User, content="What is the date on Friday?")]
164
+ tool_call = mocker.create_autospec(LLMToolCall, instance=True)
165
+ tool_call.name = "resolve_date"
166
+ tool_call.arguments = {"date": "Friday"}
167
+
168
+ # First stream has tool call, second stream has the response after tool execution
169
+ mock_gateway.complete_stream = mocker.MagicMock()
170
+ mock_gateway.complete_stream.side_effect = [
171
+ iter([
172
+ StreamingResponse(content="Let "),
173
+ StreamingResponse(content="me "),
174
+ StreamingResponse(content="check..."),
175
+ StreamingResponse(tool_calls=[tool_call])
176
+ ]),
177
+ iter([
178
+ StreamingResponse(content="The "),
179
+ StreamingResponse(content="date "),
180
+ StreamingResponse(content="is "),
181
+ StreamingResponse(content="2024-11-15")
182
+ ])
183
+ ]
184
+
185
+ mock_tool = mocker.MagicMock()
186
+ mock_tool.matches.return_value = True
187
+ mock_tool.run.return_value = {"resolved_date": "2024-11-15"}
188
+
189
+ result_chunks = list(llm_broker.generate_stream(messages, tools=[mock_tool]))
190
+
191
+ # Should get chunks from first response, then chunks from second response after tool execution
192
+ assert result_chunks == ["Let ", "me ", "check...", "The ", "date ", "is ", "2024-11-15"]
193
+ assert mock_gateway.complete_stream.call_count == 2
194
+ mock_tool.run.assert_called_once_with(date="Friday")
195
+
196
+ def should_raise_error_if_gateway_does_not_support_streaming(self, llm_broker, mock_gateway):
197
+ messages = [LLMMessage(role=MessageRole.User, content="Hello")]
198
+
199
+ # Remove complete_stream method to simulate unsupported gateway
200
+ if hasattr(mock_gateway, 'complete_stream'):
201
+ delattr(mock_gateway, 'complete_stream')
202
+
203
+ with pytest.raises(NotImplementedError) as exc_info:
204
+ list(llm_broker.generate_stream(messages))
205
+
206
+ assert "does not support streaming" in str(exc_info.value)
@@ -24,9 +24,9 @@ def register_llms_from_ollama(url: str, registry: LLMRegistry):
24
24
  # 'quantization_level': 'Q4_K_M'
25
25
  # },
26
26
  # 'digest': '4bd6cbf2d094264457a17aab6bd6acd1ed7a72fb8f8be3cfb193f63c78dd56df',
27
- # 'model': 'qwen2.5-coder:32b',
27
+ # 'model': 'qwen3-coder:32b',
28
28
  # 'modified_at': '2025-01-29T22:37:29.191797577-05:00',
29
- # 'name': 'qwen2.5-coder:32b',
29
+ # 'name': 'qwen3-coder:32b',
30
30
  # 'size': 19851349856
31
31
  # }
32
32
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mojentic
3
- Version: 0.8.3
3
+ Version: 0.9.0
4
4
  Summary: Mojentic is an agentic framework that aims to provide a simple and flexible way to assemble teams of agents to solve complex problems.
5
5
  Author-email: Stacey Vetzal <stacey@vetzal.com>
6
6
  Project-URL: Homepage, https://github.com/svetzal/mojentic
@@ -29,6 +29,8 @@ Requires-Dist: pytest-spec; extra == "dev"
29
29
  Requires-Dist: pytest-cov; extra == "dev"
30
30
  Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
31
31
  Requires-Dist: flake8>=6.0.0; extra == "dev"
32
+ Requires-Dist: bandit>=1.7.0; extra == "dev"
33
+ Requires-Dist: pip-audit>=2.0.0; extra == "dev"
32
34
  Requires-Dist: mkdocs; extra == "dev"
33
35
  Requires-Dist: mkdocs-material; extra == "dev"
34
36
  Requires-Dist: mkdocs-llmstxt; extra == "dev"
@@ -91,7 +93,7 @@ openai_llm = LLMBroker(model="gpt-5", gateway=OpenAIGateway(api_key="your_api_ke
91
93
  # Or use other models: "gpt-4o", "gpt-4.1", "o1-mini", "o3-mini", etc.
92
94
 
93
95
  # Or use Ollama for local LLMs
94
- ollama_llm = LLMBroker(model="llama3")
96
+ ollama_llm = LLMBroker(model="qwen3:32b")
95
97
 
96
98
  # Simple text generation
97
99
  result = openai_llm.generate(messages=[LLMMessage(content='Hello, how are you?')])
@@ -121,6 +123,35 @@ result = openai_llm.generate(messages=[
121
123
  print(result)
122
124
  ```
123
125
 
126
+ ## 🔑 OpenAI configuration
127
+
128
+ OpenAIGateway now supports environment-variable defaults so you can get started without hardcoding secrets:
129
+
130
+ - If you omit `api_key`, it will use the `OPENAI_API_KEY` environment variable.
131
+ - If you omit `base_url`, it will use the `OPENAI_API_ENDPOINT` environment variable (useful for custom endpoints like Azure/OpenAI-compatible proxies).
132
+ - Precedence: values you pass explicitly to `OpenAIGateway(api_key=..., base_url=...)` always override environment variables.
133
+
134
+ Examples:
135
+
136
+ ```python
137
+ from mojentic.llm import LLMBroker
138
+ from mojentic.llm.gateways import OpenAIGateway
139
+
140
+ # 1) Easiest: rely on environment variables
141
+ # export OPENAI_API_KEY=sk-...
142
+ # export OPENAI_API_ENDPOINT=https://api.openai.com/v1 # optional
143
+ llm = LLMBroker(
144
+ model="gpt-4o-mini",
145
+ gateway=OpenAIGateway() # picks up OPENAI_API_KEY/OPENAI_API_ENDPOINT automatically
146
+ )
147
+
148
+ # 2) Explicitly override one or both values
149
+ llm = LLMBroker(
150
+ model="gpt-4o-mini",
151
+ gateway=OpenAIGateway(api_key="your_key", base_url="https://api.openai.com/v1")
152
+ )
153
+ ```
154
+
124
155
  ## 🤖 OpenAI Model Support
125
156
 
126
157
  The framework automatically handles parameter differences between model types, so you can switch between any models without code changes.
@@ -170,9 +201,9 @@ pip install -e ".[dev]"
170
201
  pytest
171
202
  ```
172
203
 
173
- ## ⚠️ Project Status
204
+ ## Project Status
174
205
 
175
- While the Layer 1 API (LLMBroker, LLMGateway, tool use) has stabilized, the Layer 2 agentic capabilities are under heavy development and will likely change significantly.
206
+ The agentic aspects of this framework are in the highest state of flux. The first layer has stabilized, as have the simpler parts of the second layer, and we're working on the stability of the asynchronous pubsub architecture. We expect Python 3.14 will be the real enabler for the async aspects of the second layer.
176
207
 
177
208
  ## 📄 License
178
209