mojentic 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mojentic/llm/__init__.py CHANGED
@@ -7,6 +7,7 @@ from .llm_broker import LLMBroker # noqa: F401
7
7
  from .chat_session import ChatSession # noqa: F401
8
8
  from .message_composers import MessageBuilder, FileTypeSensor # noqa: F401
9
9
  from .registry.llm_registry import LLMRegistry # noqa: F401
10
+ from .completion_config import CompletionConfig # noqa: F401
10
11
 
11
12
  # Re-export gateway components at the LLM level
12
13
  from .gateways.models import ( # noqa: F401
@@ -1,6 +1,7 @@
1
1
  from typing import Iterator, List, Optional
2
2
 
3
3
  from mojentic.llm import LLMBroker
4
+ from mojentic.llm.completion_config import CompletionConfig
4
5
  from mojentic.llm.gateways.models import LLMMessage, MessageRole
5
6
  from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
6
7
  from mojentic.llm.tools.llm_tool import LLMTool
@@ -23,6 +24,7 @@ class ChatSession:
23
24
  tools: Optional[List[LLMTool]] = None,
24
25
  max_context: int = 32768,
25
26
  tokenizer_gateway: TokenizerGateway = None,
27
+ config: Optional[CompletionConfig] = None,
26
28
  temperature: float = 1.0):
27
29
  """
28
30
  Create an instance of the ChatSession.
@@ -39,15 +41,25 @@ class ChatSession:
39
41
  The maximum number of tokens to keep in the context. Defaults to 32768.
40
42
  tokenizer_gateway : TokenizerGateway, optional
41
43
  The gateway to use for tokenization. If None, `mxbai-embed-large` is used on a local Ollama server.
44
+ config : Optional[CompletionConfig], optional
45
+ Configuration object for LLM completion. If None, one is created from temperature and max_context.
42
46
  temperature : float, optional
43
- The temperature to use for the response. Defaults to 1.0.
47
+ The temperature to use for the response. Defaults to 1.0. Deprecated: use config.
44
48
  """
45
49
 
46
50
  self.llm = llm
47
51
  self.system_prompt = system_prompt
48
52
  self.tools = tools
49
53
  self.max_context = max_context
50
- self.temperature = temperature
54
+
55
+ # Use config if provided, otherwise build from individual kwargs
56
+ if config is not None:
57
+ self.config = config
58
+ else:
59
+ self.config = CompletionConfig(
60
+ temperature=temperature,
61
+ num_ctx=max_context
62
+ )
51
63
 
52
64
  if tokenizer_gateway is None:
53
65
  self.tokenizer_gateway = TokenizerGateway()
@@ -73,7 +85,7 @@ class ChatSession:
73
85
  The response from the LLM.
74
86
  """
75
87
  self.insert_message(LLMMessage(role=MessageRole.User, content=query))
76
- response = self.llm.generate(self.messages, tools=self.tools, temperature=self.temperature)
88
+ response = self.llm.generate(self.messages, tools=self.tools, config=self.config)
77
89
  self._ensure_all_messages_are_sized()
78
90
  self.insert_message(LLMMessage(role=MessageRole.Assistant, content=response))
79
91
  return response
@@ -95,7 +107,7 @@ class ChatSession:
95
107
  """
96
108
  self.insert_message(LLMMessage(role=MessageRole.User, content=query))
97
109
  accumulated = []
98
- for chunk in self.llm.generate_stream(self.messages, tools=self.tools, temperature=self.temperature):
110
+ for chunk in self.llm.generate_stream(self.messages, tools=self.tools, config=self.config):
99
111
  accumulated.append(chunk)
100
112
  yield chunk
101
113
  self._ensure_all_messages_are_sized()
@@ -0,0 +1,58 @@
1
+ from typing import Optional, Literal
2
+ from pydantic import BaseModel, Field
3
+
4
+
5
+ class CompletionConfig(BaseModel):
6
+ """
7
+ Configuration object for LLM completion requests.
8
+
9
+ This model provides a unified way to configure LLM behavior across different
10
+ providers and models. It replaces loose kwargs with a structured configuration
11
+ object.
12
+
13
+ Attributes
14
+ ----------
15
+ temperature : float
16
+ Controls randomness in the output. Higher values (e.g., 1.5) make output
17
+ more random, while lower values (e.g., 0.1) make it more deterministic.
18
+ Defaults to 1.0.
19
+ num_ctx : int
20
+ The number of context tokens to use. This sets the context window size.
21
+ Defaults to 32768.
22
+ max_tokens : int
23
+ The maximum number of tokens to generate in the response.
24
+ Defaults to 16384.
25
+ num_predict : int
26
+ The number of tokens to predict. A value of -1 means no limit.
27
+ Defaults to -1.
28
+ reasoning_effort : Optional[Literal["low", "medium", "high"]]
29
+ Controls the reasoning effort level for models that support extended thinking.
30
+ - "low": Quick, minimal reasoning
31
+ - "medium": Balanced reasoning effort
32
+ - "high": Deep, thorough reasoning
33
+ Provider-specific behavior:
34
+ - Ollama: Maps to `think: true` parameter for all levels
35
+ - OpenAI: Maps to `reasoning_effort` API parameter for reasoning models
36
+ Defaults to None (no extended reasoning).
37
+ """
38
+
39
+ temperature: float = Field(
40
+ default=1.0,
41
+ description="Temperature for sampling (higher = more random)"
42
+ )
43
+ num_ctx: int = Field(
44
+ default=32768,
45
+ description="Number of context tokens"
46
+ )
47
+ max_tokens: int = Field(
48
+ default=16384,
49
+ description="Maximum tokens to generate"
50
+ )
51
+ num_predict: int = Field(
52
+ default=-1,
53
+ description="Number of tokens to predict (-1 = no limit)"
54
+ )
55
+ reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
56
+ default=None,
57
+ description="Reasoning effort level for extended thinking"
58
+ )
@@ -0,0 +1,44 @@
1
+ import pytest
2
+ from pydantic import ValidationError
3
+
4
+ from mojentic.llm.completion_config import CompletionConfig
5
+
6
+
7
+ class DescribeCompletionConfig:
8
+
9
+ def should_use_default_values(self):
10
+ config = CompletionConfig()
11
+ assert config.temperature == 1.0
12
+ assert config.num_ctx == 32768
13
+ assert config.max_tokens == 16384
14
+ assert config.num_predict == -1
15
+ assert config.reasoning_effort is None
16
+
17
+ def should_accept_custom_values(self):
18
+ config = CompletionConfig(
19
+ temperature=0.5,
20
+ num_ctx=16384,
21
+ max_tokens=8192,
22
+ num_predict=100,
23
+ reasoning_effort="high"
24
+ )
25
+ assert config.temperature == 0.5
26
+ assert config.num_ctx == 16384
27
+ assert config.max_tokens == 8192
28
+ assert config.num_predict == 100
29
+ assert config.reasoning_effort == "high"
30
+
31
+ def should_accept_valid_reasoning_effort_levels(self):
32
+ for level in ["low", "medium", "high"]:
33
+ config = CompletionConfig(reasoning_effort=level)
34
+ assert config.reasoning_effort == level
35
+
36
+ def should_reject_invalid_reasoning_effort_levels(self):
37
+ with pytest.raises(ValidationError) as exc_info:
38
+ CompletionConfig(reasoning_effort="invalid")
39
+
40
+ assert "reasoning_effort" in str(exc_info.value)
41
+
42
+ def should_accept_none_reasoning_effort(self):
43
+ config = CompletionConfig(reasoning_effort=None)
44
+ assert config.reasoning_effort is None
@@ -17,6 +17,19 @@ class AnthropicGateway(LLMGateway):
17
17
  def complete(self, **args) -> LLMGatewayResponse:
18
18
 
19
19
  messages = args.get('messages')
20
+ config = args.get('config', None)
21
+
22
+ # Extract temperature and max_tokens from config if provided
23
+ if config:
24
+ temperature = config.temperature
25
+ max_tokens = config.max_tokens
26
+ # Note: reasoning_effort not supported by Anthropic yet
27
+ if config.reasoning_effort is not None:
28
+ logger.warning("Anthropic gateway does not yet support reasoning_effort parameter",
29
+ reasoning_effort=config.reasoning_effort)
30
+ else:
31
+ temperature = args.get('temperature', 1.0)
32
+ max_tokens = args.get('max_tokens', args.get('num_predict', 2000))
20
33
 
21
34
  system_messages = [m for m in messages if m.role == MessageRole.System]
22
35
  user_messages = [m for m in messages if m.role == MessageRole.User]
@@ -29,8 +42,8 @@ class AnthropicGateway(LLMGateway):
29
42
 
30
43
  response = self.client.messages.create(
31
44
  **anthropic_args,
32
- temperature=args.get('temperature', 1.0),
33
- max_tokens=args.get('max_tokens', args.get('num_predict', 2000)),
45
+ temperature=temperature,
46
+ max_tokens=max_tokens,
34
47
  # thinking={
35
48
  # "type": "enabled",
36
49
  # "budget_tokens": 32768,
@@ -1,10 +1,13 @@
1
- from typing import List, Optional, Type
1
+ from typing import List, Optional, Type, TYPE_CHECKING
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
5
5
  from mojentic.llm.gateways.models import LLMGatewayResponse, LLMMessage
6
6
  from mojentic.llm.tools.llm_tool import LLMTool
7
7
 
8
+ if TYPE_CHECKING:
9
+ from mojentic.llm.completion_config import CompletionConfig
10
+
8
11
 
9
12
  class LLMGateway:
10
13
  """
@@ -18,6 +21,7 @@ class LLMGateway:
18
21
  messages: List[LLMMessage],
19
22
  object_model: Optional[Type[BaseModel]] = None,
20
23
  tools: Optional[List[LLMTool]] = None,
24
+ config: Optional['CompletionConfig'] = None,
21
25
  temperature: float = 1.0,
22
26
  num_ctx: int = 32768, max_tokens: int = 16384,
23
27
  num_predict: int = -1) -> LLMGatewayResponse:
@@ -35,14 +39,16 @@ class LLMGateway:
35
39
  tools : Optional[List[LLMTool]]
36
40
  A list of tools to use with the LLM. If a tool call is requested, the tool will be called and the output
37
41
  will be included in the response.
42
+ config : Optional[CompletionConfig]
43
+ Configuration object for LLM completion (recommended over individual kwargs).
38
44
  temperature : float
39
- The temperature to use for the response. Defaults to 1.0.
45
+ The temperature to use for the response. Defaults to 1.0. (Deprecated: use config)
40
46
  num_ctx : int
41
- The number of context tokens to use. Defaults to 32768.
47
+ The number of context tokens to use. Defaults to 32768. (Deprecated: use config)
42
48
  max_tokens : int
43
- The maximum number of tokens to generate. Defaults to 16384.
49
+ The maximum number of tokens to generate. Defaults to 16384. (Deprecated: use config)
44
50
  num_predict : int
45
- The number of tokens to predict. Defaults to no limit.
51
+ The number of tokens to predict. Defaults to no limit. (Deprecated: use config)
46
52
 
47
53
  Returns
48
54
  -------
@@ -97,8 +97,11 @@ class LLMGatewayResponse(BaseModel):
97
97
  Parsed response object.
98
98
  tool_calls : List[LLMToolCall]
99
99
  List of requested tool calls from the LLM.
100
+ thinking : Optional[str]
101
+ Model thinking/reasoning trace (populated by some providers).
100
102
  """
101
103
  content: Optional[Union[str, dict[str, str]]] = Field(None, description="The content of the response.")
102
104
  object: Optional[BaseModel] = Field(None, description="Parsed response object")
103
105
  tool_calls: List[LLMToolCall] = Field(default_factory=list,
104
106
  description="List of requested tool calls from the LLM.")
107
+ thinking: Optional[str] = Field(None, description="Model thinking/reasoning trace (populated by some providers)")
@@ -20,9 +20,12 @@ class StreamingResponse(BaseModel):
20
20
  Text content chunk from the LLM response.
21
21
  tool_calls : Optional[List]
22
22
  Tool calls from the LLM response (raw ollama format).
23
+ thinking : Optional[str]
24
+ Thinking/reasoning trace from the LLM response.
23
25
  """
24
26
  content: Optional[str] = None
25
27
  tool_calls: Optional[List] = None
28
+ thinking: Optional[str] = None
26
29
 
27
30
 
28
31
  class OllamaGateway(LLMGateway):
@@ -41,14 +44,26 @@ class OllamaGateway(LLMGateway):
41
44
  self.client = Client(host=host, headers=headers, timeout=timeout)
42
45
 
43
46
  def _extract_options_from_args(self, args):
44
- options = Options(
45
- temperature=args.get('temperature', 1.0),
46
- num_ctx=args.get('num_ctx', 32768),
47
- )
48
- if args.get('num_predict', 0) > 0:
49
- options.num_predict = args['num_predict']
50
- if 'max_tokens' in args:
51
- options.num_predict = args['max_tokens']
47
+ # Extract config if present, otherwise use individual kwargs
48
+ config = args.get('config', None)
49
+ if config:
50
+ options = Options(
51
+ temperature=config.temperature,
52
+ num_ctx=config.num_ctx,
53
+ )
54
+ if config.num_predict > 0:
55
+ options.num_predict = config.num_predict
56
+ if config.max_tokens:
57
+ options.num_predict = config.max_tokens
58
+ else:
59
+ options = Options(
60
+ temperature=args.get('temperature', 1.0),
61
+ num_ctx=args.get('num_ctx', 32768),
62
+ )
63
+ if args.get('num_predict', 0) > 0:
64
+ options.num_predict = args['num_predict']
65
+ if 'max_tokens' in args:
66
+ options.num_predict = args['max_tokens']
52
67
  return options
53
68
 
54
69
  def complete(self, **args) -> LLMGatewayResponse:
@@ -90,6 +105,12 @@ class OllamaGateway(LLMGateway):
90
105
  'options': options
91
106
  }
92
107
 
108
+ # Handle reasoning effort - if config has reasoning_effort set, enable thinking
109
+ config = args.get('config', None)
110
+ if config and config.reasoning_effort is not None:
111
+ ollama_args['think'] = True
112
+ logger.info("Enabling extended thinking for Ollama", reasoning_effort=config.reasoning_effort)
113
+
93
114
  if 'object_model' in args and args['object_model'] is not None:
94
115
  ollama_args['format'] = args['object_model'].model_json_schema()
95
116
 
@@ -113,10 +134,14 @@ class OllamaGateway(LLMGateway):
113
134
  arguments={str(k): str(t.function.arguments[k]) for k in t.function.arguments})
114
135
  for t in response.message.tool_calls]
115
136
 
137
+ # Extract thinking content if present
138
+ thinking = getattr(response.message, 'thinking', None)
139
+
116
140
  return LLMGatewayResponse(
117
141
  content=response.message.content,
118
142
  object=object,
119
143
  tool_calls=tool_calls,
144
+ thinking=thinking
120
145
  )
121
146
 
122
147
  def complete_stream(self, **args) -> Iterator[StreamingResponse]:
@@ -156,6 +181,12 @@ class OllamaGateway(LLMGateway):
156
181
  'stream': True
157
182
  }
158
183
 
184
+ # Handle reasoning effort - if config has reasoning_effort set, enable thinking
185
+ config = args.get('config', None)
186
+ if config and config.reasoning_effort is not None:
187
+ ollama_args['think'] = True
188
+ logger.info("Enabling extended thinking for Ollama streaming", reasoning_effort=config.reasoning_effort)
189
+
159
190
  # Enable tool support if tools are provided
160
191
  if 'tools' in args and args['tools'] is not None:
161
192
  ollama_args['tools'] = [t.descriptor for t in args['tools']]
@@ -168,6 +199,10 @@ class OllamaGateway(LLMGateway):
168
199
  if chunk.message.content:
169
200
  yield StreamingResponse(content=chunk.message.content)
170
201
 
202
+ # Yield thinking chunks when they arrive
203
+ if hasattr(chunk.message, 'thinking') and chunk.message.thinking:
204
+ yield StreamingResponse(thinking=chunk.message.thinking)
205
+
171
206
  # Yield tool calls when they arrive
172
207
  if chunk.message.tool_calls:
173
208
  yield StreamingResponse(tool_calls=chunk.message.tool_calls)
@@ -121,6 +121,20 @@ class OpenAIGateway(LLMGateway):
121
121
  supported_temperatures=capabilities.supported_temperatures)
122
122
  adapted_args['temperature'] = default_temp
123
123
 
124
+ # Handle reasoning_effort for reasoning models
125
+ if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
126
+ if capabilities.model_type == ModelType.REASONING:
127
+ # Keep reasoning_effort for reasoning models
128
+ logger.info("Adding reasoning_effort parameter for reasoning model",
129
+ model=model,
130
+ reasoning_effort=adapted_args['reasoning_effort'])
131
+ else:
132
+ # Warn and remove for non-reasoning models
133
+ logger.warning("Model does not support reasoning_effort, ignoring parameter",
134
+ model=model,
135
+ requested_reasoning_effort=adapted_args['reasoning_effort'])
136
+ adapted_args.pop('reasoning_effort', None)
137
+
124
138
  return adapted_args
125
139
 
126
140
  def _validate_model_parameters(self, model: str, args: dict) -> None:
@@ -189,10 +203,21 @@ class OpenAIGateway(LLMGateway):
189
203
  messages = kwargs.get('messages')
190
204
  object_model = kwargs.get('object_model', None)
191
205
  tools = kwargs.get('tools', None)
192
- temperature = kwargs.get('temperature', 1.0)
193
- num_ctx = kwargs.get('num_ctx', 32768)
194
- max_tokens = kwargs.get('max_tokens', 16384)
195
- num_predict = kwargs.get('num_predict', -1)
206
+ config = kwargs.get('config', None)
207
+
208
+ # Use config if provided, otherwise use individual kwargs
209
+ if config:
210
+ temperature = config.temperature
211
+ num_ctx = config.num_ctx
212
+ max_tokens = config.max_tokens
213
+ num_predict = config.num_predict
214
+ reasoning_effort = config.reasoning_effort
215
+ else:
216
+ temperature = kwargs.get('temperature', 1.0)
217
+ num_ctx = kwargs.get('num_ctx', 32768)
218
+ max_tokens = kwargs.get('max_tokens', 16384)
219
+ num_predict = kwargs.get('num_predict', -1)
220
+ reasoning_effort = None
196
221
 
197
222
  if not model:
198
223
  raise ValueError("'model' parameter is required")
@@ -208,7 +233,8 @@ class OpenAIGateway(LLMGateway):
208
233
  'temperature': temperature,
209
234
  'num_ctx': num_ctx,
210
235
  'max_tokens': max_tokens,
211
- 'num_predict': num_predict
236
+ 'num_predict': num_predict,
237
+ 'reasoning_effort': reasoning_effort
212
238
  }
213
239
 
214
240
  # Adapt parameters based on model type
@@ -247,10 +273,15 @@ class OpenAIGateway(LLMGateway):
247
273
  elif 'max_completion_tokens' in adapted_args:
248
274
  openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
249
275
 
276
+ # Add reasoning_effort if present in adapted args
277
+ if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
278
+ openai_args['reasoning_effort'] = adapted_args['reasoning_effort']
279
+
250
280
  logger.debug("Making OpenAI API call",
251
281
  model=openai_args['model'],
252
282
  has_tools='tools' in openai_args,
253
283
  has_object_model='response_format' in openai_args,
284
+ has_reasoning_effort='reasoning_effort' in openai_args,
254
285
  token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
255
286
 
256
287
  try:
@@ -339,10 +370,21 @@ class OpenAIGateway(LLMGateway):
339
370
  messages = kwargs.get('messages')
340
371
  object_model = kwargs.get('object_model', None)
341
372
  tools = kwargs.get('tools', None)
342
- temperature = kwargs.get('temperature', 1.0)
343
- num_ctx = kwargs.get('num_ctx', 32768)
344
- max_tokens = kwargs.get('max_tokens', 16384)
345
- num_predict = kwargs.get('num_predict', -1)
373
+ config = kwargs.get('config', None)
374
+
375
+ # Use config if provided, otherwise use individual kwargs
376
+ if config:
377
+ temperature = config.temperature
378
+ num_ctx = config.num_ctx
379
+ max_tokens = config.max_tokens
380
+ num_predict = config.num_predict
381
+ reasoning_effort = config.reasoning_effort
382
+ else:
383
+ temperature = kwargs.get('temperature', 1.0)
384
+ num_ctx = kwargs.get('num_ctx', 32768)
385
+ max_tokens = kwargs.get('max_tokens', 16384)
386
+ num_predict = kwargs.get('num_predict', -1)
387
+ reasoning_effort = None
346
388
 
347
389
  if not model:
348
390
  raise ValueError("'model' parameter is required")
@@ -358,7 +400,8 @@ class OpenAIGateway(LLMGateway):
358
400
  'temperature': temperature,
359
401
  'num_ctx': num_ctx,
360
402
  'max_tokens': max_tokens,
361
- 'num_predict': num_predict
403
+ 'num_predict': num_predict,
404
+ 'reasoning_effort': reasoning_effort
362
405
  }
363
406
 
364
407
  # Adapt parameters based on model type
@@ -401,9 +444,14 @@ class OpenAIGateway(LLMGateway):
401
444
  elif 'max_completion_tokens' in adapted_args:
402
445
  openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
403
446
 
447
+ # Add reasoning_effort if present in adapted args
448
+ if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
449
+ openai_args['reasoning_effort'] = adapted_args['reasoning_effort']
450
+
404
451
  logger.debug("Making OpenAI streaming API call",
405
452
  model=openai_args['model'],
406
453
  has_tools='tools' in openai_args,
454
+ has_reasoning_effort='reasoning_effort' in openai_args,
407
455
  token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
408
456
 
409
457
  try:
@@ -1,10 +1,12 @@
1
1
  import json
2
2
  import time
3
+ import warnings
3
4
  from typing import List, Optional, Type, Iterator
4
5
 
5
6
  import structlog
6
7
  from pydantic import BaseModel
7
8
 
9
+ from mojentic.llm.completion_config import CompletionConfig
8
10
  from mojentic.llm.gateways.llm_gateway import LLMGateway
9
11
  from mojentic.llm.gateways.models import MessageRole, LLMMessage, LLMGatewayResponse, LLMToolCall
10
12
  from mojentic.llm.gateways.ollama import OllamaGateway
@@ -62,8 +64,10 @@ class LLMBroker():
62
64
  else:
63
65
  self.adapter = gateway
64
66
 
65
- def generate(self, messages: List[LLMMessage], tools=None, temperature=1.0, num_ctx=32768,
66
- num_predict=-1, max_tokens=16384,
67
+ def generate(self, messages: List[LLMMessage], tools=None,
68
+ config: Optional[CompletionConfig] = None,
69
+ temperature: Optional[float] = None, num_ctx: Optional[int] = None,
70
+ num_predict: Optional[int] = None, max_tokens: Optional[int] = None,
67
71
  correlation_id: str = None) -> str:
68
72
  """
69
73
  Generate a text response from the LLM.
@@ -76,12 +80,17 @@ class LLMBroker():
76
80
  A list of tools to use with the LLM. If a tool call is requested, the tool will be
77
81
  called and the output
78
82
  will be included in the response.
79
- temperature : float
80
- The temperature to use for the response. Defaults to 1.0
81
- num_ctx : int
82
- The number of context tokens to use. Defaults to 32768.
83
- num_predict : int
84
- The number of tokens to predict. Defaults to no limit.
83
+ config : Optional[CompletionConfig]
84
+ Configuration object for LLM completion (recommended). If provided with individual
85
+ kwargs, a DeprecationWarning is emitted.
86
+ temperature : Optional[float]
87
+ The temperature to use for the response. Deprecated: use config.
88
+ num_ctx : Optional[int]
89
+ The number of context tokens to use. Deprecated: use config.
90
+ num_predict : Optional[int]
91
+ The number of tokens to predict. Deprecated: use config.
92
+ max_tokens : Optional[int]
93
+ The maximum number of tokens to generate. Deprecated: use config.
85
94
  correlation_id : str
86
95
  UUID string that is copied from cause-to-affect for tracing events.
87
96
 
@@ -90,6 +99,23 @@ class LLMBroker():
90
99
  str
91
100
  The response from the LLM.
92
101
  """
102
+ # Handle config vs individual kwargs
103
+ if config is not None and any(
104
+ param is not None for param in [temperature, num_ctx, num_predict, max_tokens]):
105
+ warnings.warn(
106
+ "Both config and individual kwargs provided. Using config and ignoring kwargs. "
107
+ "Individual kwargs are deprecated, use config=CompletionConfig(...) instead.",
108
+ DeprecationWarning,
109
+ stacklevel=2
110
+ )
111
+ elif config is None:
112
+ # Build config from individual kwargs
113
+ config = CompletionConfig(
114
+ temperature=temperature if temperature is not None else 1.0,
115
+ num_ctx=num_ctx if num_ctx is not None else 32768,
116
+ num_predict=num_predict if num_predict is not None else -1,
117
+ max_tokens=max_tokens if max_tokens is not None else 16384
118
+ )
93
119
  approximate_tokens = len(self.tokenizer.encode(self._content_to_count(messages)))
94
120
  logger.info(f"Requesting llm response with approx {approximate_tokens} tokens")
95
121
 
@@ -102,7 +128,7 @@ class LLMBroker():
102
128
  self.tracer.record_llm_call(
103
129
  self.model,
104
130
  messages_for_tracer,
105
- temperature,
131
+ config.temperature,
106
132
  tools=tools_for_tracer,
107
133
  source=type(self),
108
134
  correlation_id=correlation_id
@@ -115,10 +141,11 @@ class LLMBroker():
115
141
  model=self.model,
116
142
  messages=messages,
117
143
  tools=tools,
118
- temperature=temperature,
119
- num_ctx=num_ctx,
120
- num_predict=num_predict,
121
- max_tokens=max_tokens)
144
+ config=config,
145
+ temperature=config.temperature,
146
+ num_ctx=config.num_ctx,
147
+ num_predict=config.num_predict,
148
+ max_tokens=config.max_tokens)
122
149
 
123
150
  call_duration_ms = (time.time() - start_time) * 1000
124
151
 
@@ -172,7 +199,7 @@ class LLMBroker():
172
199
  tool_calls=[tool_call]))
173
200
  # {'role': 'tool', 'content': str(output), 'name': tool_call.name,
174
201
  # 'tool_call_id': tool_call.id})
175
- return self.generate(messages, tools, temperature, num_ctx, num_predict,
202
+ return self.generate(messages, tools, config=config,
176
203
  correlation_id=correlation_id)
177
204
  else:
178
205
  logger.warn('Function not found', function=tool_call.name)
@@ -182,8 +209,10 @@ class LLMBroker():
182
209
 
183
210
  return result.content
184
211
 
185
- def generate_stream(self, messages: List[LLMMessage], tools=None, temperature=1.0, num_ctx=32768,
186
- num_predict=-1, max_tokens=16384,
212
+ def generate_stream(self, messages: List[LLMMessage], tools=None,
213
+ config: Optional[CompletionConfig] = None,
214
+ temperature: Optional[float] = None, num_ctx: Optional[int] = None,
215
+ num_predict: Optional[int] = None, max_tokens: Optional[int] = None,
187
216
  correlation_id: str = None) -> Iterator[str]:
188
217
  """
189
218
  Generate a streaming text response from the LLM.
@@ -200,14 +229,17 @@ class LLMBroker():
200
229
  tools : List[Tool]
201
230
  A list of tools to use with the LLM. If a tool call is requested, the tool will be
202
231
  called and the output will be included in the response.
203
- temperature : float
204
- The temperature to use for the response. Defaults to 1.0
205
- num_ctx : int
206
- The number of context tokens to use. Defaults to 32768.
207
- num_predict : int
208
- The number of tokens to predict. Defaults to no limit.
209
- max_tokens : int
210
- The maximum number of tokens to generate. Defaults to 16384.
232
+ config : Optional[CompletionConfig]
233
+ Configuration object for LLM completion (recommended). If provided with individual
234
+ kwargs, a DeprecationWarning is emitted.
235
+ temperature : Optional[float]
236
+ The temperature to use for the response. Deprecated: use config.
237
+ num_ctx : Optional[int]
238
+ The number of context tokens to use. Deprecated: use config.
239
+ num_predict : Optional[int]
240
+ The number of tokens to predict. Deprecated: use config.
241
+ max_tokens : Optional[int]
242
+ The maximum number of tokens to generate. Deprecated: use config.
211
243
  correlation_id : str
212
244
  UUID string that is copied from cause-to-affect for tracing events.
213
245
 
@@ -216,6 +248,23 @@ class LLMBroker():
216
248
  str
217
249
  Content chunks as they arrive from the LLM.
218
250
  """
251
+ # Handle config vs individual kwargs
252
+ if config is not None and any(
253
+ param is not None for param in [temperature, num_ctx, num_predict, max_tokens]):
254
+ warnings.warn(
255
+ "Both config and individual kwargs provided. Using config and ignoring kwargs. "
256
+ "Individual kwargs are deprecated, use config=CompletionConfig(...) instead.",
257
+ DeprecationWarning,
258
+ stacklevel=2
259
+ )
260
+ elif config is None:
261
+ # Build config from individual kwargs
262
+ config = CompletionConfig(
263
+ temperature=temperature if temperature is not None else 1.0,
264
+ num_ctx=num_ctx if num_ctx is not None else 32768,
265
+ num_predict=num_predict if num_predict is not None else -1,
266
+ max_tokens=max_tokens if max_tokens is not None else 16384
267
+ )
219
268
  # Check if gateway supports streaming
220
269
  if not hasattr(self.adapter, 'complete_stream'):
221
270
  raise NotImplementedError(f"Gateway {type(self.adapter).__name__} does not support streaming")
@@ -232,7 +281,7 @@ class LLMBroker():
232
281
  self.tracer.record_llm_call(
233
282
  self.model,
234
283
  messages_for_tracer,
235
- temperature,
284
+ config.temperature,
236
285
  tools=tools_for_tracer,
237
286
  source=type(self),
238
287
  correlation_id=correlation_id
@@ -249,10 +298,11 @@ class LLMBroker():
249
298
  model=self.model,
250
299
  messages=messages,
251
300
  tools=tools,
252
- temperature=temperature,
253
- num_ctx=num_ctx,
254
- num_predict=num_predict,
255
- max_tokens=max_tokens
301
+ config=config,
302
+ temperature=config.temperature,
303
+ num_ctx=config.num_ctx,
304
+ num_predict=config.num_predict,
305
+ max_tokens=config.max_tokens
256
306
  )
257
307
 
258
308
  for chunk in stream:
@@ -335,8 +385,7 @@ class LLMBroker():
335
385
 
336
386
  # Recursively stream the response after tool execution
337
387
  yield from self.generate_stream(
338
- messages, tools, temperature, num_ctx, num_predict,
339
- max_tokens, correlation_id=correlation_id
388
+ messages, tools, config=config, correlation_id=correlation_id
340
389
  )
341
390
  return # Exit after recursive call
342
391
  else:
@@ -350,7 +399,9 @@ class LLMBroker():
350
399
  return content
351
400
 
352
401
  def generate_object(self, messages: List[LLMMessage], object_model: Type[BaseModel],
353
- temperature=1.0, num_ctx=32768, num_predict=-1, max_tokens=16384,
402
+ config: Optional[CompletionConfig] = None,
403
+ temperature: Optional[float] = None, num_ctx: Optional[int] = None,
404
+ num_predict: Optional[int] = None, max_tokens: Optional[int] = None,
354
405
  correlation_id: str = None) -> BaseModel:
355
406
  """
356
407
  Generate a structured response from the LLM and return it as an object.
@@ -361,12 +412,17 @@ class LLMBroker():
361
412
  A list of messages to send to the LLM.
362
413
  object_model : BaseModel
363
414
  The class of the model to use for the structured response data.
364
- temperature : float
365
- The temperature to use for the response. Defaults to 1.0.
366
- num_ctx : int
367
- The number of context tokens to use. Defaults to 32768.
368
- num_predict : int
369
- The number of tokens to predict. Defaults to no limit.
415
+ config : Optional[CompletionConfig]
416
+ Configuration object for LLM completion (recommended). If provided with individual
417
+ kwargs, a DeprecationWarning is emitted.
418
+ temperature : Optional[float]
419
+ The temperature to use for the response. Deprecated: use config.
420
+ num_ctx : Optional[int]
421
+ The number of context tokens to use. Deprecated: use config.
422
+ num_predict : Optional[int]
423
+ The number of tokens to predict. Deprecated: use config.
424
+ max_tokens : Optional[int]
425
+ The maximum number of tokens to generate. Deprecated: use config.
370
426
  correlation_id : str
371
427
  UUID string that is copied from cause-to-affect for tracing events.
372
428
 
@@ -375,6 +431,23 @@ class LLMBroker():
375
431
  BaseModel
376
432
  An instance of the model class provided containing the structured response data.
377
433
  """
434
+ # Handle config vs individual kwargs
435
+ if config is not None and any(
436
+ param is not None for param in [temperature, num_ctx, num_predict, max_tokens]):
437
+ warnings.warn(
438
+ "Both config and individual kwargs provided. Using config and ignoring kwargs. "
439
+ "Individual kwargs are deprecated, use config=CompletionConfig(...) instead.",
440
+ DeprecationWarning,
441
+ stacklevel=2
442
+ )
443
+ elif config is None:
444
+ # Build config from individual kwargs
445
+ config = CompletionConfig(
446
+ temperature=temperature if temperature is not None else 1.0,
447
+ num_ctx=num_ctx if num_ctx is not None else 32768,
448
+ num_predict=num_predict if num_predict is not None else -1,
449
+ max_tokens=max_tokens if max_tokens is not None else 16384
450
+ )
378
451
  approximate_tokens = len(self.tokenizer.encode(self._content_to_count(messages)))
379
452
  logger.info(f"Requesting llm response with approx {approximate_tokens} tokens")
380
453
 
@@ -385,7 +458,7 @@ class LLMBroker():
385
458
  self.tracer.record_llm_call(
386
459
  self.model,
387
460
  messages_for_tracer,
388
- temperature,
461
+ config.temperature,
389
462
  tools=None,
390
463
  source=type(self),
391
464
  correlation_id=correlation_id
@@ -396,8 +469,9 @@ class LLMBroker():
396
469
 
397
470
  result = self.adapter.complete(model=self.model, messages=messages,
398
471
  object_model=object_model,
399
- temperature=temperature, num_ctx=num_ctx,
400
- num_predict=num_predict, max_tokens=max_tokens)
472
+ config=config,
473
+ temperature=config.temperature, num_ctx=config.num_ctx,
474
+ num_predict=config.num_predict, max_tokens=config.max_tokens)
401
475
 
402
476
  call_duration_ms = (time.time() - start_time) * 1000
403
477
 
@@ -1,7 +1,9 @@
1
1
 
2
+ import warnings
2
3
  import pytest
3
4
  from pydantic import BaseModel
4
5
 
6
+ from mojentic.llm.completion_config import CompletionConfig
5
7
  from mojentic.llm.gateways.models import LLMMessage, MessageRole, LLMGatewayResponse, LLMToolCall
6
8
  from mojentic.llm.llm_broker import LLMBroker
7
9
 
@@ -209,3 +211,88 @@ class DescribeLLMBroker:
209
211
  list(llm_broker.generate_stream(messages))
210
212
 
211
213
  assert "does not support streaming" in str(exc_info.value)
214
+
215
+ class DescribeCompletionConfigSupport:
216
+
217
+ def should_pass_config_to_gateway_in_generate(self, llm_broker, mock_gateway):
218
+ config = CompletionConfig(temperature=0.7, reasoning_effort="high")
219
+ messages = [LLMMessage(role=MessageRole.User, content="Test")]
220
+ mock_gateway.complete.return_value = LLMGatewayResponse(
221
+ content="Response",
222
+ object=None,
223
+ tool_calls=[]
224
+ )
225
+
226
+ llm_broker.generate(messages, config=config)
227
+
228
+ mock_gateway.complete.assert_called_once()
229
+ call_kwargs = mock_gateway.complete.call_args[1]
230
+ assert call_kwargs['config'] == config
231
+ assert call_kwargs['config'].reasoning_effort == "high"
232
+
233
+ def should_build_config_from_kwargs_when_not_provided(self, llm_broker, mock_gateway):
234
+ messages = [LLMMessage(role=MessageRole.User, content="Test")]
235
+ mock_gateway.complete.return_value = LLMGatewayResponse(
236
+ content="Response",
237
+ object=None,
238
+ tool_calls=[]
239
+ )
240
+
241
+ llm_broker.generate(messages, temperature=0.5, num_ctx=16384)
242
+
243
+ mock_gateway.complete.assert_called_once()
244
+ call_kwargs = mock_gateway.complete.call_args[1]
245
+ assert call_kwargs['config'].temperature == 0.5
246
+ assert call_kwargs['config'].num_ctx == 16384
247
+
248
+ def should_emit_deprecation_warning_when_both_config_and_kwargs_provided(self, llm_broker, mock_gateway):
249
+ config = CompletionConfig(temperature=0.7)
250
+ messages = [LLMMessage(role=MessageRole.User, content="Test")]
251
+ mock_gateway.complete.return_value = LLMGatewayResponse(
252
+ content="Response",
253
+ object=None,
254
+ tool_calls=[]
255
+ )
256
+
257
+ with warnings.catch_warnings(record=True) as w:
258
+ warnings.simplefilter("always")
259
+ llm_broker.generate(messages, config=config, temperature=0.5)
260
+
261
+ assert len(w) == 1
262
+ assert issubclass(w[0].category, DeprecationWarning)
263
+ assert "deprecated" in str(w[0].message).lower()
264
+
265
+ def should_pass_config_to_gateway_in_generate_object(self, llm_broker, mock_gateway):
266
+ config = CompletionConfig(temperature=0.3, max_tokens=8192)
267
+ messages = [LLMMessage(role=MessageRole.User, content="Generate object")]
268
+ mock_object = SimpleModel(text="test", number=42)
269
+ mock_gateway.complete.return_value = LLMGatewayResponse(
270
+ content='{"text": "test", "number": 42}',
271
+ object=mock_object,
272
+ tool_calls=[]
273
+ )
274
+
275
+ llm_broker.generate_object(messages, object_model=SimpleModel, config=config)
276
+
277
+ mock_gateway.complete.assert_called_once()
278
+ call_kwargs = mock_gateway.complete.call_args[1]
279
+ assert call_kwargs['config'] == config
280
+ assert call_kwargs['config'].max_tokens == 8192
281
+
282
+ def should_pass_config_to_gateway_in_generate_stream(self, llm_broker, mock_gateway, mocker):
283
+ from mojentic.llm.gateways.ollama import StreamingResponse
284
+
285
+ config = CompletionConfig(temperature=0.9, reasoning_effort="medium")
286
+ messages = [LLMMessage(role=MessageRole.User, content="Stream test")]
287
+
288
+ mock_gateway.complete_stream = mocker.MagicMock()
289
+ mock_gateway.complete_stream.return_value = iter([
290
+ StreamingResponse(content="Response")
291
+ ])
292
+
293
+ list(llm_broker.generate_stream(messages, config=config))
294
+
295
+ mock_gateway.complete_stream.assert_called_once()
296
+ call_kwargs = mock_gateway.complete_stream.call_args[1]
297
+ assert call_kwargs['config'] == config
298
+ assert call_kwargs['config'].reasoning_effort == "medium"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mojentic
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: Mojentic is an agentic framework that aims to provide a simple and flexible way to assemble teams of agents to solve complex problems.
5
5
  Author-email: Stacey Vetzal <stacey@vetzal.com>
6
6
  Project-URL: Homepage, https://github.com/svetzal/mojentic
@@ -74,24 +74,26 @@ mojentic/agents/simple_recursive_agent.py,sha256=nNUzamDzBL7AU79mCb_NZsVQazAxSTn
74
74
  mojentic/agents/simple_recursive_agent_spec.py,sha256=rcIT2BWjT-sc2WevZ0ts9qi9Feh-ypNjeKYfULZmqo8,14945
75
75
  mojentic/context/__init__.py,sha256=RKDcfejikUZMDuFYIfJpmLnoXoRCOCfLjOTiicjq1Yo,80
76
76
  mojentic/context/shared_working_memory.py,sha256=Zt9MNGErEkDIUAaHvyhEOiTaEobI9l0MV4Z59lQFBr0,396
77
- mojentic/llm/__init__.py,sha256=pHWdS6XRdPKhEWv1YpXaD5B5mUPojWM9ncYB-bXI2Qo,484
78
- mojentic/llm/chat_session.py,sha256=SacT4WLjUuoRpG4puNDdTpinlfEIQI8sC3bs2loFOS8,4909
77
+ mojentic/llm/__init__.py,sha256=1IHy5lWRv2lnO0-p6ucQwUssaAn_jU9195OIVN2RA3U,546
78
+ mojentic/llm/chat_session.py,sha256=MBngV0CO78g5fL6rNTdAzX-UY_WqgHSXfbVD_vOAsAk,5446
79
79
  mojentic/llm/chat_session_spec.py,sha256=Qek3kFmRYFnuS8vSnrhQ1vnanuatrShCpqUV0ffRi-g,5492
80
- mojentic/llm/llm_broker.py,sha256=d59MvUBNgVAZbL4T6GUp-tMroTwwmcTJfyOzJSvejAw,16924
81
- mojentic/llm/llm_broker_spec.py,sha256=N0wSAIakWXn-4cxwG3dPR0MycZNTW-lQl9jWHlchC2w,8662
80
+ mojentic/llm/completion_config.py,sha256=OMutlf44NjJ8jhFfJhZTtcJmrJQeVJyC2P0C_rP4GrM,2101
81
+ mojentic/llm/completion_config_spec.py,sha256=fQFRw_w7c4BcR2Z8GV-o0gt5L9ayxHht94hfhjgL4Jo,1495
82
+ mojentic/llm/llm_broker.py,sha256=Xr723X1ve8PSCwevq4CHsqgpwaFfNZGNDsw9v0mwCVE,21278
83
+ mojentic/llm/llm_broker_spec.py,sha256=y29H615AItuxioMoPqhFLdYJELyBOyfOCq16siZXE8A,12634
82
84
  mojentic/llm/message_composers.py,sha256=8_5fA-J1I3BZ_0YlgZkQhsn_u7H8yMGEVNYHUPYW1X8,12142
83
85
  mojentic/llm/message_composers_spec.py,sha256=pR-npU5KL7lzYpAl0gWTJIP6obcnMxMpkEudoZs5-0M,12133
84
86
  mojentic/llm/gateways/__init__.py,sha256=y8zI9dGVhFkwDPSVU5NFFyaTTDWOkCfQYMzlFN72Ihg,786
85
- mojentic/llm/gateways/anthropic.py,sha256=DfaNgCrx33O4KfPrP5za_yKXlncIMexBnVKxOYk2Bew,1813
87
+ mojentic/llm/gateways/anthropic.py,sha256=8TPoIasPC2ovEwAEmb_hkaIXbh4vNcZXTpJzB6ZOGlY,2413
86
88
  mojentic/llm/gateways/anthropic_messages_adapter.py,sha256=FtDJMyeDOIei0Ign83C_jpWG06c73VAo6pXxk0zI3nI,2991
87
89
  mojentic/llm/gateways/embeddings_gateway.py,sha256=kcOhiyHzOyQgKgwPDQJD5oVvfwk71GsBgMYJOIDv5NU,1347
88
90
  mojentic/llm/gateways/file_gateway.py,sha256=3bZpalSyl_R4016WzCmmjUBDtAgPsmx19eVGv6p1Ufk,1418
89
- mojentic/llm/gateways/llm_gateway.py,sha256=1J-FWKlFNxqd9_YP8Ul6J0cORYQoF_czgEfpAUaWFtQ,2677
90
- mojentic/llm/gateways/models.py,sha256=OyIaMHKrrx6dHo5FbC8qOFct7PRql9wqbe_BJlgDSDE,3015
91
- mojentic/llm/gateways/ollama.py,sha256=9DhNRC2sjBwnzyZplS3kd8-s3famxHUf4WfBycFH0GE,7737
91
+ mojentic/llm/gateways/llm_gateway.py,sha256=PiwcrKWCqYXiHydSYqDiNJESU-cUTceB03fb26b1zY8,3071
92
+ mojentic/llm/gateways/models.py,sha256=dxAoR6OIRPWvNsiy-31n90UV2k9fqz4upBJnhzmc4fc,3232
93
+ mojentic/llm/gateways/ollama.py,sha256=w18SRSoRuZPKZlUujRq4fitl5tulYHHjKWDsky4ywgU,9393
92
94
  mojentic/llm/gateways/ollama_messages_adapter.py,sha256=kUN_p2FyN88_trXMcL-Xsn9xPBU7pGKlJwTUEUCf6G4,1404
93
95
  mojentic/llm/gateways/ollama_messages_adapter_spec.py,sha256=gVRbWDrHOa1EiZ0CkEWe0pGn-GKRqdGb-x56HBQeYSE,4981
94
- mojentic/llm/gateways/openai.py,sha256=S19AIooYoBZYELVPrSeOwKslpYc7jrhu-sLjDXUFF3w,23161
96
+ mojentic/llm/gateways/openai.py,sha256=inm4ElawqnfUsHugCMboEQufvwyOxBFsdykaqAF_Ow8,25572
95
97
  mojentic/llm/gateways/openai_message_adapter_spec.py,sha256=3nObWsf6cPuWuCK_IhrQoRdQdz7gndqeSSvJIxtQkp8,6609
96
98
  mojentic/llm/gateways/openai_messages_adapter.py,sha256=Scal68JKKdBHB35ok1c5DeWYdD6Wra5oXSsPxJyyXSQ,3947
97
99
  mojentic/llm/gateways/openai_model_registry.py,sha256=2tIT_L8g4opEgLRvhpOy_w47W83Xp_slki2rl3xnteo,18585
@@ -143,8 +145,8 @@ mojentic/tracer/tracer_system.py,sha256=KPSVIfGVOjSx6Vj_SvrisqJXKT6ddwBc_UCMQC6D
143
145
  mojentic/tracer/tracer_system_spec.py,sha256=8hpQlmAWyjUvk7ihy339L0buQ-eH5rluaFvyMl-mSH4,8830
144
146
  mojentic/utils/__init__.py,sha256=WvNYbtVeliMZn2sMX53CrOQlQLJBXi4mJNoocG7s_kI,116
145
147
  mojentic/utils/formatting.py,sha256=YtXh0aYzLB9GKP8ZD6u1By1OBqPOXUtHirtq0GmHNag,948
146
- mojentic-1.1.1.dist-info/licenses/LICENSE.md,sha256=txSgV8n5zY1W3NiF5HHsCwlaW0e8We1cSC6TuJUqxXA,1060
147
- mojentic-1.1.1.dist-info/METADATA,sha256=eb6l0htqFLfD-plqA90rn9UrdN_bHSUVSGsoB7-ILuY,8775
148
- mojentic-1.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
149
- mojentic-1.1.1.dist-info/top_level.txt,sha256=Q-BvPQ8Eu1jnEqK8Xkr6A9C8Xa1z38oPZRHuA5MCTqg,19
150
- mojentic-1.1.1.dist-info/RECORD,,
148
+ mojentic-1.2.0.dist-info/licenses/LICENSE.md,sha256=txSgV8n5zY1W3NiF5HHsCwlaW0e8We1cSC6TuJUqxXA,1060
149
+ mojentic-1.2.0.dist-info/METADATA,sha256=nJ-Gnzva7OptikQBHlko5KYKQ449H6UP_DiNQK0f488,8775
150
+ mojentic-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
151
+ mojentic-1.2.0.dist-info/top_level.txt,sha256=Q-BvPQ8Eu1jnEqK8Xkr6A9C8Xa1z38oPZRHuA5MCTqg,19
152
+ mojentic-1.2.0.dist-info/RECORD,,