mojentic 0.8.3__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _examples/broker_as_tool.py +3 -3
- _examples/broker_examples.py +1 -1
- _examples/broker_image_examples.py +1 -1
- _examples/characterize_ollama.py +2 -2
- _examples/chat_session.py +1 -1
- _examples/chat_session_with_tool.py +1 -1
- _examples/coding_file_tool.py +16 -16
- _examples/current_datetime_tool_example.py +1 -1
- _examples/ephemeral_task_manager_example.py +2 -2
- _examples/file_tool.py +5 -5
- _examples/iterative_solver.py +1 -1
- _examples/react.py +1 -1
- _examples/recursive_agent.py +1 -1
- _examples/simple_llm.py +3 -3
- _examples/simple_llm_repl.py +1 -1
- _examples/simple_structured.py +1 -1
- _examples/simple_tool.py +2 -2
- _examples/solver_chat_session.py +4 -4
- _examples/streaming.py +32 -17
- _examples/tell_user_example.py +1 -1
- _examples/tracer_demo.py +3 -3
- _examples/working_memory.py +1 -1
- mojentic/llm/gateways/ollama.py +21 -18
- mojentic/llm/gateways/openai.py +208 -4
- mojentic/llm/gateways/openai_spec.py +99 -0
- mojentic/llm/llm_broker.py +161 -3
- mojentic/llm/llm_broker_spec.py +69 -0
- mojentic/llm/registry/populate_registry_from_ollama.py +2 -2
- {mojentic-0.8.3.dist-info → mojentic-0.9.0.dist-info}/METADATA +35 -4
- {mojentic-0.8.3.dist-info → mojentic-0.9.0.dist-info}/RECORD +33 -32
- {mojentic-0.8.3.dist-info → mojentic-0.9.0.dist-info}/WHEEL +0 -0
- {mojentic-0.8.3.dist-info → mojentic-0.9.0.dist-info}/licenses/LICENSE.md +0 -0
- {mojentic-0.8.3.dist-info → mojentic-0.9.0.dist-info}/top_level.txt +0 -0
mojentic/llm/gateways/openai.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import os
|
|
2
3
|
from itertools import islice
|
|
3
|
-
from typing import Type, List, Iterable, Optional
|
|
4
|
+
from typing import Type, List, Iterable, Optional, Iterator, Dict
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
6
7
|
import structlog
|
|
@@ -13,6 +14,7 @@ from mojentic.llm.gateways.openai_messages_adapter import adapt_messages_to_open
|
|
|
13
14
|
from mojentic.llm.gateways.openai_model_registry import get_model_registry, ModelType
|
|
14
15
|
from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
|
|
15
16
|
from mojentic.llm.tools.llm_tool import LLMTool
|
|
17
|
+
from mojentic.llm.gateways.ollama import StreamingResponse
|
|
16
18
|
|
|
17
19
|
logger = structlog.get_logger()
|
|
18
20
|
|
|
@@ -23,11 +25,19 @@ class OpenAIGateway(LLMGateway):
|
|
|
23
25
|
|
|
24
26
|
Parameters
|
|
25
27
|
----------
|
|
26
|
-
api_key : str
|
|
27
|
-
The OpenAI API key to use.
|
|
28
|
+
api_key : str, optional
|
|
29
|
+
The OpenAI API key to use. If not provided, defaults to the value of the
|
|
30
|
+
OPENAI_API_KEY environment variable.
|
|
31
|
+
base_url : str, optional
|
|
32
|
+
The base URL for the OpenAI API. If not provided, defaults to the value of the
|
|
33
|
+
OPENAI_API_ENDPOINT environment variable, or None if not set.
|
|
28
34
|
"""
|
|
29
35
|
|
|
30
|
-
def __init__(self, api_key: str, base_url: Optional[str] = None):
|
|
36
|
+
def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
|
|
37
|
+
if api_key is None:
|
|
38
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
39
|
+
if base_url is None:
|
|
40
|
+
base_url = os.getenv("OPENAI_API_ENDPOINT")
|
|
31
41
|
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
|
32
42
|
self.model_registry = get_model_registry()
|
|
33
43
|
|
|
@@ -292,6 +302,200 @@ class OpenAIGateway(LLMGateway):
|
|
|
292
302
|
tool_calls=tool_calls,
|
|
293
303
|
)
|
|
294
304
|
|
|
305
|
+
def complete_stream(self, **kwargs) -> Iterator[StreamingResponse]:
|
|
306
|
+
"""
|
|
307
|
+
Stream the LLM response from OpenAI service.
|
|
308
|
+
|
|
309
|
+
OpenAI streams tool call arguments incrementally, so we need to accumulate them
|
|
310
|
+
and yield complete tool calls only when the stream finishes.
|
|
311
|
+
|
|
312
|
+
Keyword Arguments
|
|
313
|
+
----------------
|
|
314
|
+
model : str
|
|
315
|
+
The name of the model to use.
|
|
316
|
+
messages : List[LLMMessage]
|
|
317
|
+
A list of messages to send to the LLM.
|
|
318
|
+
tools : Optional[List[LLMTool]]
|
|
319
|
+
A list of tools to use with the LLM. Tool calls will be accumulated and yielded when complete.
|
|
320
|
+
temperature : float, optional
|
|
321
|
+
The temperature to use for the response. Defaults to 1.0.
|
|
322
|
+
num_ctx : int, optional
|
|
323
|
+
The number of context tokens to use. Defaults to 32768.
|
|
324
|
+
max_tokens : int, optional
|
|
325
|
+
The maximum number of tokens to generate. Defaults to 16384.
|
|
326
|
+
num_predict : int, optional
|
|
327
|
+
The number of tokens to predict. Defaults to no limit.
|
|
328
|
+
|
|
329
|
+
Returns
|
|
330
|
+
-------
|
|
331
|
+
Iterator[StreamingResponse]
|
|
332
|
+
An iterator of StreamingResponse objects containing response chunks.
|
|
333
|
+
"""
|
|
334
|
+
# Extract parameters from kwargs with defaults
|
|
335
|
+
model = kwargs.get('model')
|
|
336
|
+
messages = kwargs.get('messages')
|
|
337
|
+
object_model = kwargs.get('object_model', None)
|
|
338
|
+
tools = kwargs.get('tools', None)
|
|
339
|
+
temperature = kwargs.get('temperature', 1.0)
|
|
340
|
+
num_ctx = kwargs.get('num_ctx', 32768)
|
|
341
|
+
max_tokens = kwargs.get('max_tokens', 16384)
|
|
342
|
+
num_predict = kwargs.get('num_predict', -1)
|
|
343
|
+
|
|
344
|
+
if not model:
|
|
345
|
+
raise ValueError("'model' parameter is required")
|
|
346
|
+
if not messages:
|
|
347
|
+
raise ValueError("'messages' parameter is required")
|
|
348
|
+
|
|
349
|
+
# Convert parameters to dict for processing
|
|
350
|
+
args = {
|
|
351
|
+
'model': model,
|
|
352
|
+
'messages': messages,
|
|
353
|
+
'object_model': object_model,
|
|
354
|
+
'tools': tools,
|
|
355
|
+
'temperature': temperature,
|
|
356
|
+
'num_ctx': num_ctx,
|
|
357
|
+
'max_tokens': max_tokens,
|
|
358
|
+
'num_predict': num_predict
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
# Adapt parameters based on model type
|
|
362
|
+
try:
|
|
363
|
+
adapted_args = self._adapt_parameters_for_model(model, args)
|
|
364
|
+
except Exception as e:
|
|
365
|
+
logger.error("Failed to adapt parameters for model",
|
|
366
|
+
model=model,
|
|
367
|
+
error=str(e))
|
|
368
|
+
raise
|
|
369
|
+
|
|
370
|
+
# Validate parameters after adaptation
|
|
371
|
+
self._validate_model_parameters(model, adapted_args)
|
|
372
|
+
|
|
373
|
+
# Check if model supports streaming
|
|
374
|
+
capabilities = self.model_registry.get_model_capabilities(model)
|
|
375
|
+
if not capabilities.supports_streaming:
|
|
376
|
+
raise NotImplementedError(f"Model {model} does not support streaming")
|
|
377
|
+
|
|
378
|
+
# Structured output doesn't work with streaming
|
|
379
|
+
if adapted_args['object_model'] is not None:
|
|
380
|
+
raise NotImplementedError("Streaming with structured output (object_model) is not supported")
|
|
381
|
+
|
|
382
|
+
openai_args = {
|
|
383
|
+
'model': adapted_args['model'],
|
|
384
|
+
'messages': adapt_messages_to_openai(adapted_args['messages']),
|
|
385
|
+
'stream': True,
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
# Add temperature if specified
|
|
389
|
+
if 'temperature' in adapted_args:
|
|
390
|
+
openai_args['temperature'] = adapted_args['temperature']
|
|
391
|
+
|
|
392
|
+
if adapted_args.get('tools') is not None:
|
|
393
|
+
openai_args['tools'] = [t.descriptor for t in adapted_args['tools']]
|
|
394
|
+
|
|
395
|
+
# Handle both max_tokens (for chat models) and max_completion_tokens (for reasoning models)
|
|
396
|
+
if 'max_tokens' in adapted_args:
|
|
397
|
+
openai_args['max_tokens'] = adapted_args['max_tokens']
|
|
398
|
+
elif 'max_completion_tokens' in adapted_args:
|
|
399
|
+
openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
|
|
400
|
+
|
|
401
|
+
logger.debug("Making OpenAI streaming API call",
|
|
402
|
+
model=openai_args['model'],
|
|
403
|
+
has_tools='tools' in openai_args,
|
|
404
|
+
token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
|
|
405
|
+
|
|
406
|
+
try:
|
|
407
|
+
stream = self.client.chat.completions.create(**openai_args)
|
|
408
|
+
except BadRequestError as e:
|
|
409
|
+
if "max_tokens" in str(e) and "max_completion_tokens" in str(e):
|
|
410
|
+
logger.error("Parameter error detected - model may require different token parameter",
|
|
411
|
+
model=model,
|
|
412
|
+
error=str(e),
|
|
413
|
+
suggestion="This model may be a reasoning model requiring max_completion_tokens")
|
|
414
|
+
raise e
|
|
415
|
+
except Exception as e:
|
|
416
|
+
logger.error("OpenAI streaming API call failed",
|
|
417
|
+
model=model,
|
|
418
|
+
error=str(e))
|
|
419
|
+
raise e
|
|
420
|
+
|
|
421
|
+
# Accumulate tool calls as they stream in
|
|
422
|
+
# OpenAI streams tool arguments incrementally, indexed by tool call index
|
|
423
|
+
tool_calls_accumulator: Dict[int, Dict] = {}
|
|
424
|
+
|
|
425
|
+
for chunk in stream:
|
|
426
|
+
if not chunk.choices:
|
|
427
|
+
continue
|
|
428
|
+
|
|
429
|
+
delta = chunk.choices[0].delta
|
|
430
|
+
finish_reason = chunk.choices[0].finish_reason
|
|
431
|
+
|
|
432
|
+
# Yield content chunks as they arrive
|
|
433
|
+
if delta.content:
|
|
434
|
+
yield StreamingResponse(content=delta.content)
|
|
435
|
+
|
|
436
|
+
# Accumulate tool call chunks
|
|
437
|
+
if delta.tool_calls:
|
|
438
|
+
for tool_call_delta in delta.tool_calls:
|
|
439
|
+
index = tool_call_delta.index
|
|
440
|
+
|
|
441
|
+
# Initialize accumulator for this tool call if needed
|
|
442
|
+
if index not in tool_calls_accumulator:
|
|
443
|
+
tool_calls_accumulator[index] = {
|
|
444
|
+
'id': None,
|
|
445
|
+
'name': None,
|
|
446
|
+
'arguments': ''
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
# First chunk has id and name
|
|
450
|
+
if tool_call_delta.id:
|
|
451
|
+
tool_calls_accumulator[index]['id'] = tool_call_delta.id
|
|
452
|
+
|
|
453
|
+
if tool_call_delta.function.name:
|
|
454
|
+
tool_calls_accumulator[index]['name'] = tool_call_delta.function.name
|
|
455
|
+
|
|
456
|
+
# All chunks may have argument fragments
|
|
457
|
+
if tool_call_delta.function.arguments:
|
|
458
|
+
tool_calls_accumulator[index]['arguments'] += tool_call_delta.function.arguments
|
|
459
|
+
|
|
460
|
+
# When stream is complete, yield accumulated tool calls
|
|
461
|
+
if finish_reason == 'tool_calls' and tool_calls_accumulator:
|
|
462
|
+
# Parse and yield complete tool calls
|
|
463
|
+
complete_tool_calls = []
|
|
464
|
+
for index in sorted(tool_calls_accumulator.keys()):
|
|
465
|
+
tc = tool_calls_accumulator[index]
|
|
466
|
+
try:
|
|
467
|
+
# Parse the accumulated JSON arguments
|
|
468
|
+
args_dict = json.loads(tc['arguments'])
|
|
469
|
+
# Convert to string values as per LLMToolCall format
|
|
470
|
+
arguments = {str(k): str(v) for k, v in args_dict.items()}
|
|
471
|
+
|
|
472
|
+
tool_call = LLMToolCall(
|
|
473
|
+
id=tc['id'],
|
|
474
|
+
name=tc['name'],
|
|
475
|
+
arguments=arguments
|
|
476
|
+
)
|
|
477
|
+
complete_tool_calls.append(tool_call)
|
|
478
|
+
except json.JSONDecodeError as e:
|
|
479
|
+
logger.error("Failed to parse tool call arguments",
|
|
480
|
+
tool_name=tc['name'],
|
|
481
|
+
arguments=tc['arguments'],
|
|
482
|
+
error=str(e))
|
|
483
|
+
|
|
484
|
+
if complete_tool_calls:
|
|
485
|
+
# Convert to the format expected by ollama's tool calls for compatibility
|
|
486
|
+
# We need to create mock objects that match ollama's structure
|
|
487
|
+
from types import SimpleNamespace
|
|
488
|
+
ollama_format_calls = []
|
|
489
|
+
for tc in complete_tool_calls:
|
|
490
|
+
ollama_format_calls.append(SimpleNamespace(
|
|
491
|
+
id=tc.id, # Include ID for proper OpenAI message formatting
|
|
492
|
+
function=SimpleNamespace(
|
|
493
|
+
name=tc.name,
|
|
494
|
+
arguments=tc.arguments
|
|
495
|
+
)
|
|
496
|
+
))
|
|
497
|
+
yield StreamingResponse(tool_calls=ollama_format_calls)
|
|
498
|
+
|
|
295
499
|
def get_available_models(self) -> list[str]:
|
|
296
500
|
"""
|
|
297
501
|
Get the list of available OpenAI models, sorted alphabetically.
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from unittest.mock import patch
|
|
3
|
+
|
|
4
|
+
from mojentic.llm.gateways.openai import OpenAIGateway
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DescribeOpenAIGateway:
|
|
8
|
+
"""
|
|
9
|
+
Unit tests for the OpenAI gateway
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
class DescribeInitialization:
|
|
13
|
+
"""
|
|
14
|
+
Tests for OpenAI gateway initialization
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def should_initialize_with_api_key(self, mocker):
|
|
18
|
+
api_key = "test-api-key"
|
|
19
|
+
mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
|
|
20
|
+
|
|
21
|
+
gateway = OpenAIGateway(api_key=api_key)
|
|
22
|
+
|
|
23
|
+
mock_openai.assert_called_once_with(api_key=api_key, base_url=None)
|
|
24
|
+
assert gateway.client is not None
|
|
25
|
+
|
|
26
|
+
def should_initialize_with_api_key_and_base_url(self, mocker):
|
|
27
|
+
api_key = "test-api-key"
|
|
28
|
+
base_url = "https://custom.openai.com"
|
|
29
|
+
mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
|
|
30
|
+
|
|
31
|
+
gateway = OpenAIGateway(api_key=api_key, base_url=base_url)
|
|
32
|
+
|
|
33
|
+
mock_openai.assert_called_once_with(api_key=api_key, base_url=base_url)
|
|
34
|
+
assert gateway.client is not None
|
|
35
|
+
|
|
36
|
+
def should_read_api_key_from_environment_variable(self, mocker):
|
|
37
|
+
api_key = "test-api-key-from-env"
|
|
38
|
+
mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
|
|
39
|
+
|
|
40
|
+
with patch.dict(os.environ, {'OPENAI_API_KEY': api_key}):
|
|
41
|
+
gateway = OpenAIGateway()
|
|
42
|
+
|
|
43
|
+
mock_openai.assert_called_once_with(api_key=api_key, base_url=None)
|
|
44
|
+
assert gateway.client is not None
|
|
45
|
+
|
|
46
|
+
def should_read_base_url_from_environment_variable(self, mocker):
|
|
47
|
+
api_key = "test-api-key"
|
|
48
|
+
endpoint = "https://corporate.openai.com"
|
|
49
|
+
mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
|
|
50
|
+
|
|
51
|
+
with patch.dict(os.environ, {'OPENAI_API_ENDPOINT': endpoint}):
|
|
52
|
+
gateway = OpenAIGateway(api_key=api_key)
|
|
53
|
+
|
|
54
|
+
mock_openai.assert_called_once_with(api_key=api_key, base_url=endpoint)
|
|
55
|
+
assert gateway.client is not None
|
|
56
|
+
|
|
57
|
+
def should_read_both_from_environment_variables(self, mocker):
|
|
58
|
+
api_key = "test-api-key-from-env"
|
|
59
|
+
endpoint = "https://corporate.openai.com"
|
|
60
|
+
mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
|
|
61
|
+
|
|
62
|
+
with patch.dict(os.environ, {'OPENAI_API_KEY': api_key, 'OPENAI_API_ENDPOINT': endpoint}):
|
|
63
|
+
gateway = OpenAIGateway()
|
|
64
|
+
|
|
65
|
+
mock_openai.assert_called_once_with(api_key=api_key, base_url=endpoint)
|
|
66
|
+
assert gateway.client is not None
|
|
67
|
+
|
|
68
|
+
def should_prefer_explicit_api_key_over_environment_variable(self, mocker):
|
|
69
|
+
api_key_env = "test-api-key-from-env"
|
|
70
|
+
api_key_explicit = "test-api-key-explicit"
|
|
71
|
+
mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
|
|
72
|
+
|
|
73
|
+
with patch.dict(os.environ, {'OPENAI_API_KEY': api_key_env}):
|
|
74
|
+
gateway = OpenAIGateway(api_key=api_key_explicit)
|
|
75
|
+
|
|
76
|
+
mock_openai.assert_called_once_with(api_key=api_key_explicit, base_url=None)
|
|
77
|
+
assert gateway.client is not None
|
|
78
|
+
|
|
79
|
+
def should_prefer_explicit_base_url_over_environment_variable(self, mocker):
|
|
80
|
+
api_key = "test-api-key"
|
|
81
|
+
endpoint_env = "https://corporate.openai.com"
|
|
82
|
+
endpoint_explicit = "https://explicit.openai.com"
|
|
83
|
+
mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
|
|
84
|
+
|
|
85
|
+
with patch.dict(os.environ, {'OPENAI_API_ENDPOINT': endpoint_env}):
|
|
86
|
+
gateway = OpenAIGateway(api_key=api_key, base_url=endpoint_explicit)
|
|
87
|
+
|
|
88
|
+
mock_openai.assert_called_once_with(api_key=api_key, base_url=endpoint_explicit)
|
|
89
|
+
assert gateway.client is not None
|
|
90
|
+
|
|
91
|
+
def should_use_none_when_no_endpoint_specified(self, mocker):
|
|
92
|
+
api_key = "test-api-key"
|
|
93
|
+
mock_openai = mocker.patch('mojentic.llm.gateways.openai.OpenAI')
|
|
94
|
+
|
|
95
|
+
with patch.dict(os.environ, {}, clear=True):
|
|
96
|
+
gateway = OpenAIGateway(api_key=api_key)
|
|
97
|
+
|
|
98
|
+
mock_openai.assert_called_once_with(api_key=api_key, base_url=None)
|
|
99
|
+
assert gateway.client is not None
|
mojentic/llm/llm_broker.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import time
|
|
3
|
-
from typing import List, Optional, Type
|
|
3
|
+
from typing import List, Optional, Type, Iterator
|
|
4
4
|
|
|
5
5
|
import structlog
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
8
|
from mojentic.llm.gateways.llm_gateway import LLMGateway
|
|
9
|
-
from mojentic.llm.gateways.models import MessageRole, LLMMessage, LLMGatewayResponse
|
|
10
|
-
from mojentic.llm.gateways.ollama import OllamaGateway
|
|
9
|
+
from mojentic.llm.gateways.models import MessageRole, LLMMessage, LLMGatewayResponse, LLMToolCall
|
|
10
|
+
from mojentic.llm.gateways.ollama import OllamaGateway, StreamingResponse
|
|
11
11
|
from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
|
|
12
12
|
from mojentic.tracer.tracer_system import TracerSystem
|
|
13
13
|
|
|
@@ -182,6 +182,164 @@ class LLMBroker():
|
|
|
182
182
|
|
|
183
183
|
return result.content
|
|
184
184
|
|
|
185
|
+
def generate_stream(self, messages: List[LLMMessage], tools=None, temperature=1.0, num_ctx=32768,
|
|
186
|
+
num_predict=-1, max_tokens=16384,
|
|
187
|
+
correlation_id: str = None) -> Iterator[str]:
|
|
188
|
+
"""
|
|
189
|
+
Generate a streaming text response from the LLM.
|
|
190
|
+
|
|
191
|
+
This method mirrors generate() but yields content chunks as they arrive from the LLM,
|
|
192
|
+
providing a better user experience for long-running requests. When tool calls are
|
|
193
|
+
detected, tools are executed and the LLM is called recursively, with the new response
|
|
194
|
+
also being streamed.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
messages : List[LLMMessage]
|
|
199
|
+
A list of messages to send to the LLM.
|
|
200
|
+
tools : List[Tool]
|
|
201
|
+
A list of tools to use with the LLM. If a tool call is requested, the tool will be
|
|
202
|
+
called and the output will be included in the response.
|
|
203
|
+
temperature : float
|
|
204
|
+
The temperature to use for the response. Defaults to 1.0
|
|
205
|
+
num_ctx : int
|
|
206
|
+
The number of context tokens to use. Defaults to 32768.
|
|
207
|
+
num_predict : int
|
|
208
|
+
The number of tokens to predict. Defaults to no limit.
|
|
209
|
+
max_tokens : int
|
|
210
|
+
The maximum number of tokens to generate. Defaults to 16384.
|
|
211
|
+
correlation_id : str
|
|
212
|
+
UUID string that is copied from cause-to-affect for tracing events.
|
|
213
|
+
|
|
214
|
+
Yields
|
|
215
|
+
------
|
|
216
|
+
str
|
|
217
|
+
Content chunks as they arrive from the LLM.
|
|
218
|
+
"""
|
|
219
|
+
# Check if gateway supports streaming
|
|
220
|
+
if not hasattr(self.adapter, 'complete_stream'):
|
|
221
|
+
raise NotImplementedError(f"Gateway {type(self.adapter).__name__} does not support streaming")
|
|
222
|
+
|
|
223
|
+
approximate_tokens = len(self.tokenizer.encode(self._content_to_count(messages)))
|
|
224
|
+
logger.info(f"Requesting streaming llm response with approx {approximate_tokens} tokens")
|
|
225
|
+
|
|
226
|
+
# Convert messages to serializable dict for audit
|
|
227
|
+
messages_for_tracer = [m.model_dump() for m in messages]
|
|
228
|
+
|
|
229
|
+
# Record LLM call in tracer
|
|
230
|
+
tools_for_tracer = [{"name": t.name, "description": t.description} for t in
|
|
231
|
+
tools] if tools else None
|
|
232
|
+
self.tracer.record_llm_call(
|
|
233
|
+
self.model,
|
|
234
|
+
messages_for_tracer,
|
|
235
|
+
temperature,
|
|
236
|
+
tools=tools_for_tracer,
|
|
237
|
+
source=type(self),
|
|
238
|
+
correlation_id=correlation_id
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Measure call duration for audit
|
|
242
|
+
start_time = time.time()
|
|
243
|
+
|
|
244
|
+
# Accumulate content and tool calls from stream
|
|
245
|
+
accumulated_content = ""
|
|
246
|
+
accumulated_tool_calls = []
|
|
247
|
+
|
|
248
|
+
stream = self.adapter.complete_stream(
|
|
249
|
+
model=self.model,
|
|
250
|
+
messages=messages,
|
|
251
|
+
tools=tools,
|
|
252
|
+
temperature=temperature,
|
|
253
|
+
num_ctx=num_ctx,
|
|
254
|
+
num_predict=num_predict,
|
|
255
|
+
max_tokens=max_tokens
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
for chunk in stream:
|
|
259
|
+
# Handle content chunks
|
|
260
|
+
if hasattr(chunk, 'content') and chunk.content:
|
|
261
|
+
accumulated_content += chunk.content
|
|
262
|
+
yield chunk.content
|
|
263
|
+
|
|
264
|
+
# Handle tool calls if present
|
|
265
|
+
if hasattr(chunk, 'tool_calls') and chunk.tool_calls:
|
|
266
|
+
accumulated_tool_calls.extend(chunk.tool_calls)
|
|
267
|
+
|
|
268
|
+
call_duration_ms = (time.time() - start_time) * 1000
|
|
269
|
+
|
|
270
|
+
# Record LLM response in tracer
|
|
271
|
+
tool_calls_for_tracer = [tc.model_dump() if hasattr(tc, 'model_dump') else tc for tc in
|
|
272
|
+
accumulated_tool_calls] if accumulated_tool_calls else None
|
|
273
|
+
self.tracer.record_llm_response(
|
|
274
|
+
self.model,
|
|
275
|
+
accumulated_content,
|
|
276
|
+
tool_calls=tool_calls_for_tracer,
|
|
277
|
+
call_duration_ms=call_duration_ms,
|
|
278
|
+
source=type(self),
|
|
279
|
+
correlation_id=correlation_id
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Process tool calls if any were accumulated
|
|
283
|
+
if accumulated_tool_calls and tools is not None:
|
|
284
|
+
logger.info("Tool call requested in streaming response")
|
|
285
|
+
for tool_call in accumulated_tool_calls:
|
|
286
|
+
# Handle both LLMToolCall objects and raw tool call data
|
|
287
|
+
if hasattr(tool_call, 'name'):
|
|
288
|
+
tool_name = tool_call.name
|
|
289
|
+
tool_arguments = tool_call.arguments
|
|
290
|
+
else:
|
|
291
|
+
# Handle ollama's tool call format
|
|
292
|
+
tool_name = tool_call.function.name
|
|
293
|
+
tool_arguments = tool_call.function.arguments
|
|
294
|
+
|
|
295
|
+
if tool := next((t for t in tools if t.matches(tool_name)), None):
|
|
296
|
+
logger.info('Calling function', function=tool_name)
|
|
297
|
+
logger.info('Arguments:', arguments=tool_arguments)
|
|
298
|
+
|
|
299
|
+
# Measure tool execution time
|
|
300
|
+
tool_start_time = time.time()
|
|
301
|
+
|
|
302
|
+
# Call the tool
|
|
303
|
+
output = tool.run(**tool_arguments)
|
|
304
|
+
|
|
305
|
+
tool_duration_ms = (time.time() - tool_start_time) * 1000
|
|
306
|
+
|
|
307
|
+
# Record tool call in tracer
|
|
308
|
+
self.tracer.record_tool_call(
|
|
309
|
+
tool_name,
|
|
310
|
+
tool_arguments,
|
|
311
|
+
output,
|
|
312
|
+
caller="LLMBroker.generate_stream",
|
|
313
|
+
call_duration_ms=tool_duration_ms,
|
|
314
|
+
source=type(self),
|
|
315
|
+
correlation_id=correlation_id
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
logger.info('Function output', output=output)
|
|
319
|
+
|
|
320
|
+
# Convert to LLMToolCall if needed, preserving the ID if it exists
|
|
321
|
+
if not isinstance(tool_call, LLMToolCall):
|
|
322
|
+
# Extract ID if available from the tool_call object
|
|
323
|
+
tool_call_id = None
|
|
324
|
+
if hasattr(tool_call, 'id'):
|
|
325
|
+
tool_call_id = tool_call.id
|
|
326
|
+
elif hasattr(tool_call, 'function') and hasattr(tool_call.function, 'id'):
|
|
327
|
+
tool_call_id = tool_call.function.id
|
|
328
|
+
|
|
329
|
+
tool_call = LLMToolCall(id=tool_call_id, name=tool_name, arguments=tool_arguments)
|
|
330
|
+
|
|
331
|
+
messages.append(LLMMessage(role=MessageRole.Assistant, tool_calls=[tool_call]))
|
|
332
|
+
messages.append(
|
|
333
|
+
LLMMessage(role=MessageRole.Tool, content=json.dumps(output),
|
|
334
|
+
tool_calls=[tool_call]))
|
|
335
|
+
|
|
336
|
+
# Recursively stream the response after tool execution
|
|
337
|
+
yield from self.generate_stream(messages, tools, temperature, num_ctx, num_predict,
|
|
338
|
+
max_tokens, correlation_id=correlation_id)
|
|
339
|
+
return # Exit after recursive call
|
|
340
|
+
else:
|
|
341
|
+
logger.warn('Function not found', function=tool_name)
|
|
342
|
+
|
|
185
343
|
def _content_to_count(self, messages: List[LLMMessage]):
|
|
186
344
|
content = ""
|
|
187
345
|
for message in messages:
|
mojentic/llm/llm_broker_spec.py
CHANGED
|
@@ -135,3 +135,72 @@ class DescribeLLMBroker:
|
|
|
135
135
|
assert result.items[1].number == 2
|
|
136
136
|
assert result.metadata == {"key1": "value1", "key2": "value2"}
|
|
137
137
|
mock_gateway.complete.assert_called_once()
|
|
138
|
+
|
|
139
|
+
class DescribeStreamingGeneration:
|
|
140
|
+
|
|
141
|
+
def should_stream_simple_response(self, llm_broker, mock_gateway, mocker):
|
|
142
|
+
from mojentic.llm.gateways.ollama import StreamingResponse
|
|
143
|
+
|
|
144
|
+
messages = [LLMMessage(role=MessageRole.User, content="Tell me a story")]
|
|
145
|
+
|
|
146
|
+
# Mock the complete_stream method to yield chunks
|
|
147
|
+
mock_gateway.complete_stream = mocker.MagicMock()
|
|
148
|
+
mock_gateway.complete_stream.return_value = iter([
|
|
149
|
+
StreamingResponse(content="Once "),
|
|
150
|
+
StreamingResponse(content="upon "),
|
|
151
|
+
StreamingResponse(content="a "),
|
|
152
|
+
StreamingResponse(content="time...")
|
|
153
|
+
])
|
|
154
|
+
|
|
155
|
+
result_chunks = list(llm_broker.generate_stream(messages))
|
|
156
|
+
|
|
157
|
+
assert result_chunks == ["Once ", "upon ", "a ", "time..."]
|
|
158
|
+
mock_gateway.complete_stream.assert_called_once()
|
|
159
|
+
|
|
160
|
+
def should_handle_tool_calls_during_streaming(self, llm_broker, mock_gateway, mocker):
|
|
161
|
+
from mojentic.llm.gateways.ollama import StreamingResponse
|
|
162
|
+
|
|
163
|
+
messages = [LLMMessage(role=MessageRole.User, content="What is the date on Friday?")]
|
|
164
|
+
tool_call = mocker.create_autospec(LLMToolCall, instance=True)
|
|
165
|
+
tool_call.name = "resolve_date"
|
|
166
|
+
tool_call.arguments = {"date": "Friday"}
|
|
167
|
+
|
|
168
|
+
# First stream has tool call, second stream has the response after tool execution
|
|
169
|
+
mock_gateway.complete_stream = mocker.MagicMock()
|
|
170
|
+
mock_gateway.complete_stream.side_effect = [
|
|
171
|
+
iter([
|
|
172
|
+
StreamingResponse(content="Let "),
|
|
173
|
+
StreamingResponse(content="me "),
|
|
174
|
+
StreamingResponse(content="check..."),
|
|
175
|
+
StreamingResponse(tool_calls=[tool_call])
|
|
176
|
+
]),
|
|
177
|
+
iter([
|
|
178
|
+
StreamingResponse(content="The "),
|
|
179
|
+
StreamingResponse(content="date "),
|
|
180
|
+
StreamingResponse(content="is "),
|
|
181
|
+
StreamingResponse(content="2024-11-15")
|
|
182
|
+
])
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
mock_tool = mocker.MagicMock()
|
|
186
|
+
mock_tool.matches.return_value = True
|
|
187
|
+
mock_tool.run.return_value = {"resolved_date": "2024-11-15"}
|
|
188
|
+
|
|
189
|
+
result_chunks = list(llm_broker.generate_stream(messages, tools=[mock_tool]))
|
|
190
|
+
|
|
191
|
+
# Should get chunks from first response, then chunks from second response after tool execution
|
|
192
|
+
assert result_chunks == ["Let ", "me ", "check...", "The ", "date ", "is ", "2024-11-15"]
|
|
193
|
+
assert mock_gateway.complete_stream.call_count == 2
|
|
194
|
+
mock_tool.run.assert_called_once_with(date="Friday")
|
|
195
|
+
|
|
196
|
+
def should_raise_error_if_gateway_does_not_support_streaming(self, llm_broker, mock_gateway):
|
|
197
|
+
messages = [LLMMessage(role=MessageRole.User, content="Hello")]
|
|
198
|
+
|
|
199
|
+
# Remove complete_stream method to simulate unsupported gateway
|
|
200
|
+
if hasattr(mock_gateway, 'complete_stream'):
|
|
201
|
+
delattr(mock_gateway, 'complete_stream')
|
|
202
|
+
|
|
203
|
+
with pytest.raises(NotImplementedError) as exc_info:
|
|
204
|
+
list(llm_broker.generate_stream(messages))
|
|
205
|
+
|
|
206
|
+
assert "does not support streaming" in str(exc_info.value)
|
|
@@ -24,9 +24,9 @@ def register_llms_from_ollama(url: str, registry: LLMRegistry):
|
|
|
24
24
|
# 'quantization_level': 'Q4_K_M'
|
|
25
25
|
# },
|
|
26
26
|
# 'digest': '4bd6cbf2d094264457a17aab6bd6acd1ed7a72fb8f8be3cfb193f63c78dd56df',
|
|
27
|
-
# 'model': '
|
|
27
|
+
# 'model': 'qwen3-coder:32b',
|
|
28
28
|
# 'modified_at': '2025-01-29T22:37:29.191797577-05:00',
|
|
29
|
-
# 'name': '
|
|
29
|
+
# 'name': 'qwen3-coder:32b',
|
|
30
30
|
# 'size': 19851349856
|
|
31
31
|
# }
|
|
32
32
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mojentic
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Mojentic is an agentic framework that aims to provide a simple and flexible way to assemble teams of agents to solve complex problems.
|
|
5
5
|
Author-email: Stacey Vetzal <stacey@vetzal.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/svetzal/mojentic
|
|
@@ -29,6 +29,8 @@ Requires-Dist: pytest-spec; extra == "dev"
|
|
|
29
29
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
30
30
|
Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
|
|
31
31
|
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
|
32
|
+
Requires-Dist: bandit>=1.7.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pip-audit>=2.0.0; extra == "dev"
|
|
32
34
|
Requires-Dist: mkdocs; extra == "dev"
|
|
33
35
|
Requires-Dist: mkdocs-material; extra == "dev"
|
|
34
36
|
Requires-Dist: mkdocs-llmstxt; extra == "dev"
|
|
@@ -91,7 +93,7 @@ openai_llm = LLMBroker(model="gpt-5", gateway=OpenAIGateway(api_key="your_api_ke
|
|
|
91
93
|
# Or use other models: "gpt-4o", "gpt-4.1", "o1-mini", "o3-mini", etc.
|
|
92
94
|
|
|
93
95
|
# Or use Ollama for local LLMs
|
|
94
|
-
ollama_llm = LLMBroker(model="
|
|
96
|
+
ollama_llm = LLMBroker(model="qwen3:32b")
|
|
95
97
|
|
|
96
98
|
# Simple text generation
|
|
97
99
|
result = openai_llm.generate(messages=[LLMMessage(content='Hello, how are you?')])
|
|
@@ -121,6 +123,35 @@ result = openai_llm.generate(messages=[
|
|
|
121
123
|
print(result)
|
|
122
124
|
```
|
|
123
125
|
|
|
126
|
+
## 🔑 OpenAI configuration
|
|
127
|
+
|
|
128
|
+
OpenAIGateway now supports environment-variable defaults so you can get started without hardcoding secrets:
|
|
129
|
+
|
|
130
|
+
- If you omit `api_key`, it will use the `OPENAI_API_KEY` environment variable.
|
|
131
|
+
- If you omit `base_url`, it will use the `OPENAI_API_ENDPOINT` environment variable (useful for custom endpoints like Azure/OpenAI-compatible proxies).
|
|
132
|
+
- Precedence: values you pass explicitly to `OpenAIGateway(api_key=..., base_url=...)` always override environment variables.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from mojentic.llm import LLMBroker
|
|
138
|
+
from mojentic.llm.gateways import OpenAIGateway
|
|
139
|
+
|
|
140
|
+
# 1) Easiest: rely on environment variables
|
|
141
|
+
# export OPENAI_API_KEY=sk-...
|
|
142
|
+
# export OPENAI_API_ENDPOINT=https://api.openai.com/v1 # optional
|
|
143
|
+
llm = LLMBroker(
|
|
144
|
+
model="gpt-4o-mini",
|
|
145
|
+
gateway=OpenAIGateway() # picks up OPENAI_API_KEY/OPENAI_API_ENDPOINT automatically
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# 2) Explicitly override one or both values
|
|
149
|
+
llm = LLMBroker(
|
|
150
|
+
model="gpt-4o-mini",
|
|
151
|
+
gateway=OpenAIGateway(api_key="your_key", base_url="https://api.openai.com/v1")
|
|
152
|
+
)
|
|
153
|
+
```
|
|
154
|
+
|
|
124
155
|
## 🤖 OpenAI Model Support
|
|
125
156
|
|
|
126
157
|
The framework automatically handles parameter differences between model types, so you can switch between any models without code changes.
|
|
@@ -170,9 +201,9 @@ pip install -e ".[dev]"
|
|
|
170
201
|
pytest
|
|
171
202
|
```
|
|
172
203
|
|
|
173
|
-
##
|
|
204
|
+
## ✅ Project Status
|
|
174
205
|
|
|
175
|
-
|
|
206
|
+
The agentic aspects of this framework are in the highest state of flux. The first layer has stabilized, as have the simpler parts of the second layer, and we're working on the stability of the asynchronous pubsub architecture. We expect Python 3.14 will be the real enabler for the async aspects of the second layer.
|
|
176
207
|
|
|
177
208
|
## 📄 License
|
|
178
209
|
|