PyPI - mojentic - Versions diffs - 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

mojentic 1.1.1py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

mojentic/llm/__init__.py +1 -0
mojentic/llm/chat_session.py +16 -4
mojentic/llm/completion_config.py +58 -0
mojentic/llm/completion_config_spec.py +44 -0
mojentic/llm/gateways/anthropic.py +15 -2
mojentic/llm/gateways/llm_gateway.py +11 -5
mojentic/llm/gateways/models.py +3 -0
mojentic/llm/gateways/ollama.py +43 -8
mojentic/llm/gateways/openai.py +58 -10
mojentic/llm/llm_broker.py +115 -41
mojentic/llm/llm_broker_spec.py +87 -0
{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/METADATA +1 -1
{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/RECORD +16 -14
{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/WHEEL +0 -0
{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/licenses/LICENSE.md +0 -0
{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/top_level.txt +0 -0

mojentic/llm/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from .llm_broker import LLMBroker  # noqa: F401
 from .chat_session import ChatSession  # noqa: F401
 from .message_composers import MessageBuilder, FileTypeSensor  # noqa: F401
 from .registry.llm_registry import LLMRegistry  # noqa: F401
+from .completion_config import CompletionConfig  # noqa: F401
 # Re-export gateway components at the LLM level
 from .gateways.models import (  # noqa: F401

mojentic/llm/chat_session.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import Iterator, List, Optional
 from mojentic.llm import LLMBroker
+from mojentic.llm.completion_config import CompletionConfig
 from mojentic.llm.gateways.models import LLMMessage, MessageRole
 from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
 from mojentic.llm.tools.llm_tool import LLMTool
@@ -23,6 +24,7 @@ class ChatSession:
                  tools: Optional[List[LLMTool]] = None,
                  max_context: int = 32768,
                  tokenizer_gateway: TokenizerGateway = None,
+                 config: Optional[CompletionConfig] = None,
                  temperature: float = 1.0):
         """
         Create an instance of the ChatSession.
@@ -39,15 +41,25 @@ class ChatSession:
             The maximum number of tokens to keep in the context. Defaults to 32768.
         tokenizer_gateway : TokenizerGateway, optional
             The gateway to use for tokenization. If None, `mxbai-embed-large` is used on a local Ollama server.
+        config : Optional[CompletionConfig], optional
+            Configuration object for LLM completion. If None, one is created from temperature and max_context.
         temperature : float, optional
-            The temperature to use for the response. Defaults to 1.0.
+            The temperature to use for the response. Defaults to 1.0. Deprecated: use config.
         """
         self.llm = llm
         self.system_prompt = system_prompt
         self.tools = tools
         self.max_context = max_context
-        self.temperature = temperature
+        # Use config if provided, otherwise build from individual kwargs
+        if config is not None:
+            self.config = config
+        else:
+            self.config = CompletionConfig(
+                temperature=temperature,
+                num_ctx=max_context
+            )
         if tokenizer_gateway is None:
             self.tokenizer_gateway = TokenizerGateway()
@@ -73,7 +85,7 @@ class ChatSession:
             The response from the LLM.
         """
         self.insert_message(LLMMessage(role=MessageRole.User, content=query))
-        response = self.llm.generate(self.messages, tools=self.tools, temperature=self.temperature)
+        response = self.llm.generate(self.messages, tools=self.tools, config=self.config)
         self._ensure_all_messages_are_sized()
         self.insert_message(LLMMessage(role=MessageRole.Assistant, content=response))
         return response
@@ -95,7 +107,7 @@ class ChatSession:
         """
         self.insert_message(LLMMessage(role=MessageRole.User, content=query))
         accumulated = []
-        for chunk in self.llm.generate_stream(self.messages, tools=self.tools, temperature=self.temperature):
+        for chunk in self.llm.generate_stream(self.messages, tools=self.tools, config=self.config):
             accumulated.append(chunk)
             yield chunk
         self._ensure_all_messages_are_sized()

mojentic/llm/completion_config.py ADDED Viewed

@@ -0,0 +1,58 @@
+from typing import Optional, Literal
+from pydantic import BaseModel, Field
+class CompletionConfig(BaseModel):
+    """
+    Configuration object for LLM completion requests.
+    This model provides a unified way to configure LLM behavior across different
+    providers and models. It replaces loose kwargs with a structured configuration
+    object.
+    Attributes
+    ----------
+    temperature : float
+        Controls randomness in the output. Higher values (e.g., 1.5) make output
+        more random, while lower values (e.g., 0.1) make it more deterministic.
+        Defaults to 1.0.
+    num_ctx : int
+        The number of context tokens to use. This sets the context window size.
+        Defaults to 32768.
+    max_tokens : int
+        The maximum number of tokens to generate in the response.
+        Defaults to 16384.
+    num_predict : int
+        The number of tokens to predict. A value of -1 means no limit.
+        Defaults to -1.
+    reasoning_effort : Optional[Literal["low", "medium", "high"]]
+        Controls the reasoning effort level for models that support extended thinking.
+        - "low": Quick, minimal reasoning
+        - "medium": Balanced reasoning effort
+        - "high": Deep, thorough reasoning
+        Provider-specific behavior:
+        - Ollama: Maps to `think: true` parameter for all levels
+        - OpenAI: Maps to `reasoning_effort` API parameter for reasoning models
+        Defaults to None (no extended reasoning).
+    """
+    temperature: float = Field(
+        default=1.0,
+        description="Temperature for sampling (higher = more random)"
+    )
+    num_ctx: int = Field(
+        default=32768,
+        description="Number of context tokens"
+    )
+    max_tokens: int = Field(
+        default=16384,
+        description="Maximum tokens to generate"
+    )
+    num_predict: int = Field(
+        default=-1,
+        description="Number of tokens to predict (-1 = no limit)"
+    )
+    reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
+        default=None,
+        description="Reasoning effort level for extended thinking"
+    )

mojentic/llm/completion_config_spec.py ADDED Viewed

@@ -0,0 +1,44 @@
+import pytest
+from pydantic import ValidationError
+from mojentic.llm.completion_config import CompletionConfig
+class DescribeCompletionConfig:
+    def should_use_default_values(self):
+        config = CompletionConfig()
+        assert config.temperature == 1.0
+        assert config.num_ctx == 32768
+        assert config.max_tokens == 16384
+        assert config.num_predict == -1
+        assert config.reasoning_effort is None
+    def should_accept_custom_values(self):
+        config = CompletionConfig(
+            temperature=0.5,
+            num_ctx=16384,
+            max_tokens=8192,
+            num_predict=100,
+            reasoning_effort="high"
+        )
+        assert config.temperature == 0.5
+        assert config.num_ctx == 16384
+        assert config.max_tokens == 8192
+        assert config.num_predict == 100
+        assert config.reasoning_effort == "high"
+    def should_accept_valid_reasoning_effort_levels(self):
+        for level in ["low", "medium", "high"]:
+            config = CompletionConfig(reasoning_effort=level)
+            assert config.reasoning_effort == level
+    def should_reject_invalid_reasoning_effort_levels(self):
+        with pytest.raises(ValidationError) as exc_info:
+            CompletionConfig(reasoning_effort="invalid")
+        assert "reasoning_effort" in str(exc_info.value)
+    def should_accept_none_reasoning_effort(self):
+        config = CompletionConfig(reasoning_effort=None)
+        assert config.reasoning_effort is None

mojentic/llm/gateways/anthropic.py CHANGED Viewed

@@ -17,6 +17,19 @@ class AnthropicGateway(LLMGateway):
     def complete(self, **args) -> LLMGatewayResponse:
         messages = args.get('messages')
+        config = args.get('config', None)
+        # Extract temperature and max_tokens from config if provided
+        if config:
+            temperature = config.temperature
+            max_tokens = config.max_tokens
+            # Note: reasoning_effort not supported by Anthropic yet
+            if config.reasoning_effort is not None:
+                logger.warning("Anthropic gateway does not yet support reasoning_effort parameter",
+                               reasoning_effort=config.reasoning_effort)
+        else:
+            temperature = args.get('temperature', 1.0)
+            max_tokens = args.get('max_tokens', args.get('num_predict', 2000))
         system_messages = [m for m in messages if m.role == MessageRole.System]
         user_messages = [m for m in messages if m.role == MessageRole.User]
@@ -29,8 +42,8 @@ class AnthropicGateway(LLMGateway):
         response = self.client.messages.create(
             **anthropic_args,
-            temperature=args.get('temperature', 1.0),
-            max_tokens=args.get('max_tokens', args.get('num_predict', 2000)),
+            temperature=temperature,
+            max_tokens=max_tokens,
             # thinking={
             #     "type": "enabled",
             #     "budget_tokens": 32768,

mojentic/llm/gateways/llm_gateway.py CHANGED Viewed

@@ -1,10 +1,13 @@
-from typing import List, Optional, Type
+from typing import List, Optional, Type, TYPE_CHECKING
 from pydantic import BaseModel
 from mojentic.llm.gateways.models import LLMGatewayResponse, LLMMessage
 from mojentic.llm.tools.llm_tool import LLMTool
+if TYPE_CHECKING:
+    from mojentic.llm.completion_config import CompletionConfig
 class LLMGateway:
     """
@@ -18,6 +21,7 @@ class LLMGateway:
                  messages: List[LLMMessage],
                  object_model: Optional[Type[BaseModel]] = None,
                  tools: Optional[List[LLMTool]] = None,
+                 config: Optional['CompletionConfig'] = None,
                  temperature: float = 1.0,
                  num_ctx: int = 32768, max_tokens: int = 16384,
                  num_predict: int = -1) -> LLMGatewayResponse:
@@ -35,14 +39,16 @@ class LLMGateway:
         tools : Optional[List[LLMTool]]
             A list of tools to use with the LLM. If a tool call is requested, the tool will be called and the output
             will be included in the response.
+        config : Optional[CompletionConfig]
+            Configuration object for LLM completion (recommended over individual kwargs).
         temperature : float
-            The temperature to use for the response. Defaults to 1.0.
+            The temperature to use for the response. Defaults to 1.0. (Deprecated: use config)
         num_ctx : int
-            The number of context tokens to use. Defaults to 32768.
+            The number of context tokens to use. Defaults to 32768. (Deprecated: use config)
         max_tokens : int
-            The maximum number of tokens to generate. Defaults to 16384.
+            The maximum number of tokens to generate. Defaults to 16384. (Deprecated: use config)
         num_predict : int
-            The number of tokens to predict. Defaults to no limit.
+            The number of tokens to predict. Defaults to no limit. (Deprecated: use config)
         Returns
         -------

mojentic/llm/gateways/models.py CHANGED Viewed

@@ -97,8 +97,11 @@ class LLMGatewayResponse(BaseModel):
         Parsed response object.
     tool_calls : List[LLMToolCall]
         List of requested tool calls from the LLM.
+    thinking : Optional[str]
+        Model thinking/reasoning trace (populated by some providers).
     """
     content: Optional[Union[str, dict[str, str]]] = Field(None, description="The content of the response.")
     object: Optional[BaseModel] = Field(None, description="Parsed response object")
     tool_calls: List[LLMToolCall] = Field(default_factory=list,
                                           description="List of requested tool calls from the LLM.")
+    thinking: Optional[str] = Field(None, description="Model thinking/reasoning trace (populated by some providers)")

mojentic/llm/gateways/ollama.py CHANGED Viewed

@@ -20,9 +20,12 @@ class StreamingResponse(BaseModel):
         Text content chunk from the LLM response.
     tool_calls : Optional[List]
         Tool calls from the LLM response (raw ollama format).
+    thinking : Optional[str]
+        Thinking/reasoning trace from the LLM response.
     """
     content: Optional[str] = None
     tool_calls: Optional[List] = None
+    thinking: Optional[str] = None
 class OllamaGateway(LLMGateway):
@@ -41,14 +44,26 @@ class OllamaGateway(LLMGateway):
         self.client = Client(host=host, headers=headers, timeout=timeout)
     def _extract_options_from_args(self, args):
-        options = Options(
-            temperature=args.get('temperature', 1.0),
-            num_ctx=args.get('num_ctx', 32768),
-        )
-        if args.get('num_predict', 0) > 0:
-            options.num_predict = args['num_predict']
-        if 'max_tokens' in args:
-            options.num_predict = args['max_tokens']
+        # Extract config if present, otherwise use individual kwargs
+        config = args.get('config', None)
+        if config:
+            options = Options(
+                temperature=config.temperature,
+                num_ctx=config.num_ctx,
+            )
+            if config.num_predict > 0:
+                options.num_predict = config.num_predict
+            if config.max_tokens:
+                options.num_predict = config.max_tokens
+        else:
+            options = Options(
+                temperature=args.get('temperature', 1.0),
+                num_ctx=args.get('num_ctx', 32768),
+            )
+            if args.get('num_predict', 0) > 0:
+                options.num_predict = args['num_predict']
+            if 'max_tokens' in args:
+                options.num_predict = args['max_tokens']
         return options
     def complete(self, **args) -> LLMGatewayResponse:
@@ -90,6 +105,12 @@ class OllamaGateway(LLMGateway):
             'options': options
         }
+        # Handle reasoning effort - if config has reasoning_effort set, enable thinking
+        config = args.get('config', None)
+        if config and config.reasoning_effort is not None:
+            ollama_args['think'] = True
+            logger.info("Enabling extended thinking for Ollama", reasoning_effort=config.reasoning_effort)
         if 'object_model' in args and args['object_model'] is not None:
             ollama_args['format'] = args['object_model'].model_json_schema()
@@ -113,10 +134,14 @@ class OllamaGateway(LLMGateway):
                                       arguments={str(k): str(t.function.arguments[k]) for k in t.function.arguments})
                           for t in response.message.tool_calls]
+        # Extract thinking content if present
+        thinking = getattr(response.message, 'thinking', None)
         return LLMGatewayResponse(
             content=response.message.content,
             object=object,
             tool_calls=tool_calls,
+            thinking=thinking
         )
     def complete_stream(self, **args) -> Iterator[StreamingResponse]:
@@ -156,6 +181,12 @@ class OllamaGateway(LLMGateway):
             'stream': True
         }
+        # Handle reasoning effort - if config has reasoning_effort set, enable thinking
+        config = args.get('config', None)
+        if config and config.reasoning_effort is not None:
+            ollama_args['think'] = True
+            logger.info("Enabling extended thinking for Ollama streaming", reasoning_effort=config.reasoning_effort)
         # Enable tool support if tools are provided
         if 'tools' in args and args['tools'] is not None:
             ollama_args['tools'] = [t.descriptor for t in args['tools']]
@@ -168,6 +199,10 @@ class OllamaGateway(LLMGateway):
                 if chunk.message.content:
                     yield StreamingResponse(content=chunk.message.content)
+                # Yield thinking chunks when they arrive
+                if hasattr(chunk.message, 'thinking') and chunk.message.thinking:
+                    yield StreamingResponse(thinking=chunk.message.thinking)
                 # Yield tool calls when they arrive
                 if chunk.message.tool_calls:
                     yield StreamingResponse(tool_calls=chunk.message.tool_calls)

mojentic/llm/gateways/openai.py CHANGED Viewed

@@ -121,6 +121,20 @@ class OpenAIGateway(LLMGateway):
                     supported_temperatures=capabilities.supported_temperatures)
                 adapted_args['temperature'] = default_temp
+        # Handle reasoning_effort for reasoning models
+        if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
+            if capabilities.model_type == ModelType.REASONING:
+                # Keep reasoning_effort for reasoning models
+                logger.info("Adding reasoning_effort parameter for reasoning model",
+                            model=model,
+                            reasoning_effort=adapted_args['reasoning_effort'])
+            else:
+                # Warn and remove for non-reasoning models
+                logger.warning("Model does not support reasoning_effort, ignoring parameter",
+                               model=model,
+                               requested_reasoning_effort=adapted_args['reasoning_effort'])
+                adapted_args.pop('reasoning_effort', None)
         return adapted_args
     def _validate_model_parameters(self, model: str, args: dict) -> None:
@@ -189,10 +203,21 @@ class OpenAIGateway(LLMGateway):
         messages = kwargs.get('messages')
         object_model = kwargs.get('object_model', None)
         tools = kwargs.get('tools', None)
-        temperature = kwargs.get('temperature', 1.0)
-        num_ctx = kwargs.get('num_ctx', 32768)
-        max_tokens = kwargs.get('max_tokens', 16384)
-        num_predict = kwargs.get('num_predict', -1)
+        config = kwargs.get('config', None)
+        # Use config if provided, otherwise use individual kwargs
+        if config:
+            temperature = config.temperature
+            num_ctx = config.num_ctx
+            max_tokens = config.max_tokens
+            num_predict = config.num_predict
+            reasoning_effort = config.reasoning_effort
+        else:
+            temperature = kwargs.get('temperature', 1.0)
+            num_ctx = kwargs.get('num_ctx', 32768)
+            max_tokens = kwargs.get('max_tokens', 16384)
+            num_predict = kwargs.get('num_predict', -1)
+            reasoning_effort = None
         if not model:
             raise ValueError("'model' parameter is required")
@@ -208,7 +233,8 @@ class OpenAIGateway(LLMGateway):
             'temperature': temperature,
             'num_ctx': num_ctx,
             'max_tokens': max_tokens,
-            'num_predict': num_predict
+            'num_predict': num_predict,
+            'reasoning_effort': reasoning_effort
         }
         # Adapt parameters based on model type
@@ -247,10 +273,15 @@ class OpenAIGateway(LLMGateway):
         elif 'max_completion_tokens' in adapted_args:
             openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
+        # Add reasoning_effort if present in adapted args
+        if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
+            openai_args['reasoning_effort'] = adapted_args['reasoning_effort']
         logger.debug("Making OpenAI API call",
                      model=openai_args['model'],
                      has_tools='tools' in openai_args,
                      has_object_model='response_format' in openai_args,
+                     has_reasoning_effort='reasoning_effort' in openai_args,
                      token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
         try:
@@ -339,10 +370,21 @@ class OpenAIGateway(LLMGateway):
         messages = kwargs.get('messages')
         object_model = kwargs.get('object_model', None)
         tools = kwargs.get('tools', None)
-        temperature = kwargs.get('temperature', 1.0)
-        num_ctx = kwargs.get('num_ctx', 32768)
-        max_tokens = kwargs.get('max_tokens', 16384)
-        num_predict = kwargs.get('num_predict', -1)
+        config = kwargs.get('config', None)
+        # Use config if provided, otherwise use individual kwargs
+        if config:
+            temperature = config.temperature
+            num_ctx = config.num_ctx
+            max_tokens = config.max_tokens
+            num_predict = config.num_predict
+            reasoning_effort = config.reasoning_effort
+        else:
+            temperature = kwargs.get('temperature', 1.0)
+            num_ctx = kwargs.get('num_ctx', 32768)
+            max_tokens = kwargs.get('max_tokens', 16384)
+            num_predict = kwargs.get('num_predict', -1)
+            reasoning_effort = None
         if not model:
             raise ValueError("'model' parameter is required")
@@ -358,7 +400,8 @@ class OpenAIGateway(LLMGateway):
             'temperature': temperature,
             'num_ctx': num_ctx,
             'max_tokens': max_tokens,
-            'num_predict': num_predict
+            'num_predict': num_predict,
+            'reasoning_effort': reasoning_effort
         }
         # Adapt parameters based on model type
@@ -401,9 +444,14 @@ class OpenAIGateway(LLMGateway):
         elif 'max_completion_tokens' in adapted_args:
             openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
+        # Add reasoning_effort if present in adapted args
+        if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
+            openai_args['reasoning_effort'] = adapted_args['reasoning_effort']
         logger.debug("Making OpenAI streaming API call",
                      model=openai_args['model'],
                      has_tools='tools' in openai_args,
+                     has_reasoning_effort='reasoning_effort' in openai_args,
                      token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
         try:

mojentic/llm/llm_broker.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import json
 import time
+import warnings
 from typing import List, Optional, Type, Iterator
 import structlog
 from pydantic import BaseModel
+from mojentic.llm.completion_config import CompletionConfig
 from mojentic.llm.gateways.llm_gateway import LLMGateway
 from mojentic.llm.gateways.models import MessageRole, LLMMessage, LLMGatewayResponse, LLMToolCall
 from mojentic.llm.gateways.ollama import OllamaGateway
@@ -62,8 +64,10 @@ class LLMBroker():
         else:
             self.adapter = gateway
-    def generate(self, messages: List[LLMMessage], tools=None, temperature=1.0, num_ctx=32768,
-                 num_predict=-1, max_tokens=16384,
+    def generate(self, messages: List[LLMMessage], tools=None,
+                 config: Optional[CompletionConfig] = None,
+                 temperature: Optional[float] = None, num_ctx: Optional[int] = None,
+                 num_predict: Optional[int] = None, max_tokens: Optional[int] = None,
                  correlation_id: str = None) -> str:
         """
         Generate a text response from the LLM.
@@ -76,12 +80,17 @@ class LLMBroker():
             A list of tools to use with the LLM. If a tool call is requested, the tool will be
             called and the output
             will be included in the response.
-        temperature : float
-            The temperature to use for the response. Defaults to 1.0
-        num_ctx : int
-            The number of context tokens to use. Defaults to 32768.
-        num_predict : int
-            The number of tokens to predict. Defaults to no limit.
+        config : Optional[CompletionConfig]
+            Configuration object for LLM completion (recommended). If provided with individual
+            kwargs, a DeprecationWarning is emitted.
+        temperature : Optional[float]
+            The temperature to use for the response. Deprecated: use config.
+        num_ctx : Optional[int]
+            The number of context tokens to use. Deprecated: use config.
+        num_predict : Optional[int]
+            The number of tokens to predict. Deprecated: use config.
+        max_tokens : Optional[int]
+            The maximum number of tokens to generate. Deprecated: use config.
         correlation_id : str
             UUID string that is copied from cause-to-affect for tracing events.
@@ -90,6 +99,23 @@ class LLMBroker():
         str
             The response from the LLM.
         """
+        # Handle config vs individual kwargs
+        if config is not None and any(
+                param is not None for param in [temperature, num_ctx, num_predict, max_tokens]):
+            warnings.warn(
+                "Both config and individual kwargs provided. Using config and ignoring kwargs. "
+                "Individual kwargs are deprecated, use config=CompletionConfig(...) instead.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+        elif config is None:
+            # Build config from individual kwargs
+            config = CompletionConfig(
+                temperature=temperature if temperature is not None else 1.0,
+                num_ctx=num_ctx if num_ctx is not None else 32768,
+                num_predict=num_predict if num_predict is not None else -1,
+                max_tokens=max_tokens if max_tokens is not None else 16384
+            )
         approximate_tokens = len(self.tokenizer.encode(self._content_to_count(messages)))
         logger.info(f"Requesting llm response with approx {approximate_tokens} tokens")
@@ -102,7 +128,7 @@ class LLMBroker():
         self.tracer.record_llm_call(
             self.model,
             messages_for_tracer,
-            temperature,
+            config.temperature,
             tools=tools_for_tracer,
             source=type(self),
             correlation_id=correlation_id
@@ -115,10 +141,11 @@ class LLMBroker():
             model=self.model,
             messages=messages,
             tools=tools,
-            temperature=temperature,
-            num_ctx=num_ctx,
-            num_predict=num_predict,
-            max_tokens=max_tokens)
+            config=config,
+            temperature=config.temperature,
+            num_ctx=config.num_ctx,
+            num_predict=config.num_predict,
+            max_tokens=config.max_tokens)
         call_duration_ms = (time.time() - start_time) * 1000
@@ -172,7 +199,7 @@ class LLMBroker():
                                    tool_calls=[tool_call]))
                     # {'role': 'tool', 'content': str(output), 'name': tool_call.name,
                     # 'tool_call_id': tool_call.id})
-                    return self.generate(messages, tools, temperature, num_ctx, num_predict,
+                    return self.generate(messages, tools, config=config,
                                          correlation_id=correlation_id)
                 else:
                     logger.warn('Function not found', function=tool_call.name)
@@ -182,8 +209,10 @@ class LLMBroker():
         return result.content
-    def generate_stream(self, messages: List[LLMMessage], tools=None, temperature=1.0, num_ctx=32768,
-                        num_predict=-1, max_tokens=16384,
+    def generate_stream(self, messages: List[LLMMessage], tools=None,
+                        config: Optional[CompletionConfig] = None,
+                        temperature: Optional[float] = None, num_ctx: Optional[int] = None,
+                        num_predict: Optional[int] = None, max_tokens: Optional[int] = None,
                         correlation_id: str = None) -> Iterator[str]:
         """
         Generate a streaming text response from the LLM.
@@ -200,14 +229,17 @@ class LLMBroker():
         tools : List[Tool]
             A list of tools to use with the LLM. If a tool call is requested, the tool will be
             called and the output will be included in the response.
-        temperature : float
-            The temperature to use for the response. Defaults to 1.0
-        num_ctx : int
-            The number of context tokens to use. Defaults to 32768.
-        num_predict : int
-            The number of tokens to predict. Defaults to no limit.
-        max_tokens : int
-            The maximum number of tokens to generate. Defaults to 16384.
+        config : Optional[CompletionConfig]
+            Configuration object for LLM completion (recommended). If provided with individual
+            kwargs, a DeprecationWarning is emitted.
+        temperature : Optional[float]
+            The temperature to use for the response. Deprecated: use config.
+        num_ctx : Optional[int]
+            The number of context tokens to use. Deprecated: use config.
+        num_predict : Optional[int]
+            The number of tokens to predict. Deprecated: use config.
+        max_tokens : Optional[int]
+            The maximum number of tokens to generate. Deprecated: use config.
         correlation_id : str
             UUID string that is copied from cause-to-affect for tracing events.
@@ -216,6 +248,23 @@ class LLMBroker():
         str
             Content chunks as they arrive from the LLM.
         """
+        # Handle config vs individual kwargs
+        if config is not None and any(
+                param is not None for param in [temperature, num_ctx, num_predict, max_tokens]):
+            warnings.warn(
+                "Both config and individual kwargs provided. Using config and ignoring kwargs. "
+                "Individual kwargs are deprecated, use config=CompletionConfig(...) instead.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+        elif config is None:
+            # Build config from individual kwargs
+            config = CompletionConfig(
+                temperature=temperature if temperature is not None else 1.0,
+                num_ctx=num_ctx if num_ctx is not None else 32768,
+                num_predict=num_predict if num_predict is not None else -1,
+                max_tokens=max_tokens if max_tokens is not None else 16384
+            )
         # Check if gateway supports streaming
         if not hasattr(self.adapter, 'complete_stream'):
             raise NotImplementedError(f"Gateway {type(self.adapter).__name__} does not support streaming")
@@ -232,7 +281,7 @@ class LLMBroker():
         self.tracer.record_llm_call(
             self.model,
             messages_for_tracer,
-            temperature,
+            config.temperature,
             tools=tools_for_tracer,
             source=type(self),
             correlation_id=correlation_id
@@ -249,10 +298,11 @@ class LLMBroker():
             model=self.model,
             messages=messages,
             tools=tools,
-            temperature=temperature,
-            num_ctx=num_ctx,
-            num_predict=num_predict,
-            max_tokens=max_tokens
+            config=config,
+            temperature=config.temperature,
+            num_ctx=config.num_ctx,
+            num_predict=config.num_predict,
+            max_tokens=config.max_tokens
         )
         for chunk in stream:
@@ -335,8 +385,7 @@ class LLMBroker():
                     # Recursively stream the response after tool execution
                     yield from self.generate_stream(
-                        messages, tools, temperature, num_ctx, num_predict,
-                        max_tokens, correlation_id=correlation_id
+                        messages, tools, config=config, correlation_id=correlation_id
                     )
                     return  # Exit after recursive call
                 else:
@@ -350,7 +399,9 @@ class LLMBroker():
         return content
     def generate_object(self, messages: List[LLMMessage], object_model: Type[BaseModel],
-                        temperature=1.0, num_ctx=32768, num_predict=-1, max_tokens=16384,
+                        config: Optional[CompletionConfig] = None,
+                        temperature: Optional[float] = None, num_ctx: Optional[int] = None,
+                        num_predict: Optional[int] = None, max_tokens: Optional[int] = None,
                         correlation_id: str = None) -> BaseModel:
         """
         Generate a structured response from the LLM and return it as an object.
@@ -361,12 +412,17 @@ class LLMBroker():
             A list of messages to send to the LLM.
         object_model : BaseModel
             The class of the model to use for the structured response data.
-        temperature : float
-            The temperature to use for the response. Defaults to 1.0.
-        num_ctx : int
-            The number of context tokens to use. Defaults to 32768.
-        num_predict : int
-            The number of tokens to predict. Defaults to no limit.
+        config : Optional[CompletionConfig]
+            Configuration object for LLM completion (recommended). If provided with individual
+            kwargs, a DeprecationWarning is emitted.
+        temperature : Optional[float]
+            The temperature to use for the response. Deprecated: use config.
+        num_ctx : Optional[int]
+            The number of context tokens to use. Deprecated: use config.
+        num_predict : Optional[int]
+            The number of tokens to predict. Deprecated: use config.
+        max_tokens : Optional[int]
+            The maximum number of tokens to generate. Deprecated: use config.
         correlation_id : str
             UUID string that is copied from cause-to-affect for tracing events.
@@ -375,6 +431,23 @@ class LLMBroker():
         BaseModel
             An instance of the model class provided containing the structured response data.
         """
+        # Handle config vs individual kwargs
+        if config is not None and any(
+                param is not None for param in [temperature, num_ctx, num_predict, max_tokens]):
+            warnings.warn(
+                "Both config and individual kwargs provided. Using config and ignoring kwargs. "
+                "Individual kwargs are deprecated, use config=CompletionConfig(...) instead.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+        elif config is None:
+            # Build config from individual kwargs
+            config = CompletionConfig(
+                temperature=temperature if temperature is not None else 1.0,
+                num_ctx=num_ctx if num_ctx is not None else 32768,
+                num_predict=num_predict if num_predict is not None else -1,
+                max_tokens=max_tokens if max_tokens is not None else 16384
+            )
         approximate_tokens = len(self.tokenizer.encode(self._content_to_count(messages)))
         logger.info(f"Requesting llm response with approx {approximate_tokens} tokens")
@@ -385,7 +458,7 @@ class LLMBroker():
         self.tracer.record_llm_call(
             self.model,
             messages_for_tracer,
-            temperature,
+            config.temperature,
             tools=None,
             source=type(self),
             correlation_id=correlation_id
@@ -396,8 +469,9 @@ class LLMBroker():
         result = self.adapter.complete(model=self.model, messages=messages,
                                        object_model=object_model,
-                                       temperature=temperature, num_ctx=num_ctx,
-                                       num_predict=num_predict, max_tokens=max_tokens)
+                                       config=config,
+                                       temperature=config.temperature, num_ctx=config.num_ctx,
+                                       num_predict=config.num_predict, max_tokens=config.max_tokens)
         call_duration_ms = (time.time() - start_time) * 1000

mojentic/llm/llm_broker_spec.py CHANGED Viewed

@@ -1,7 +1,9 @@
+import warnings
 import pytest
 from pydantic import BaseModel
+from mojentic.llm.completion_config import CompletionConfig
 from mojentic.llm.gateways.models import LLMMessage, MessageRole, LLMGatewayResponse, LLMToolCall
 from mojentic.llm.llm_broker import LLMBroker
@@ -209,3 +211,88 @@ class DescribeLLMBroker:
                 list(llm_broker.generate_stream(messages))
             assert "does not support streaming" in str(exc_info.value)
+    class DescribeCompletionConfigSupport:
+        def should_pass_config_to_gateway_in_generate(self, llm_broker, mock_gateway):
+            config = CompletionConfig(temperature=0.7, reasoning_effort="high")
+            messages = [LLMMessage(role=MessageRole.User, content="Test")]
+            mock_gateway.complete.return_value = LLMGatewayResponse(
+                content="Response",
+                object=None,
+                tool_calls=[]
+            )
+            llm_broker.generate(messages, config=config)
+            mock_gateway.complete.assert_called_once()
+            call_kwargs = mock_gateway.complete.call_args[1]
+            assert call_kwargs['config'] == config
+            assert call_kwargs['config'].reasoning_effort == "high"
+        def should_build_config_from_kwargs_when_not_provided(self, llm_broker, mock_gateway):
+            messages = [LLMMessage(role=MessageRole.User, content="Test")]
+            mock_gateway.complete.return_value = LLMGatewayResponse(
+                content="Response",
+                object=None,
+                tool_calls=[]
+            )
+            llm_broker.generate(messages, temperature=0.5, num_ctx=16384)
+            mock_gateway.complete.assert_called_once()
+            call_kwargs = mock_gateway.complete.call_args[1]
+            assert call_kwargs['config'].temperature == 0.5
+            assert call_kwargs['config'].num_ctx == 16384
+        def should_emit_deprecation_warning_when_both_config_and_kwargs_provided(self, llm_broker, mock_gateway):
+            config = CompletionConfig(temperature=0.7)
+            messages = [LLMMessage(role=MessageRole.User, content="Test")]
+            mock_gateway.complete.return_value = LLMGatewayResponse(
+                content="Response",
+                object=None,
+                tool_calls=[]
+            )
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter("always")
+                llm_broker.generate(messages, config=config, temperature=0.5)
+                assert len(w) == 1
+                assert issubclass(w[0].category, DeprecationWarning)
+                assert "deprecated" in str(w[0].message).lower()
+        def should_pass_config_to_gateway_in_generate_object(self, llm_broker, mock_gateway):
+            config = CompletionConfig(temperature=0.3, max_tokens=8192)
+            messages = [LLMMessage(role=MessageRole.User, content="Generate object")]
+            mock_object = SimpleModel(text="test", number=42)
+            mock_gateway.complete.return_value = LLMGatewayResponse(
+                content='{"text": "test", "number": 42}',
+                object=mock_object,
+                tool_calls=[]
+            )
+            llm_broker.generate_object(messages, object_model=SimpleModel, config=config)
+            mock_gateway.complete.assert_called_once()
+            call_kwargs = mock_gateway.complete.call_args[1]
+            assert call_kwargs['config'] == config
+            assert call_kwargs['config'].max_tokens == 8192
+        def should_pass_config_to_gateway_in_generate_stream(self, llm_broker, mock_gateway, mocker):
+            from mojentic.llm.gateways.ollama import StreamingResponse
+            config = CompletionConfig(temperature=0.9, reasoning_effort="medium")
+            messages = [LLMMessage(role=MessageRole.User, content="Stream test")]
+            mock_gateway.complete_stream = mocker.MagicMock()
+            mock_gateway.complete_stream.return_value = iter([
+                StreamingResponse(content="Response")
+            ])
+            list(llm_broker.generate_stream(messages, config=config))
+            mock_gateway.complete_stream.assert_called_once()
+            call_kwargs = mock_gateway.complete_stream.call_args[1]
+            assert call_kwargs['config'] == config
+            assert call_kwargs['config'].reasoning_effort == "medium"

{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mojentic
-Version: 1.1.1
+Version: 1.2.0
 Summary: Mojentic is an agentic framework that aims to provide a simple and flexible way to assemble teams of agents to solve complex problems.
 Author-email: Stacey Vetzal <stacey@vetzal.com>
 Project-URL: Homepage, https://github.com/svetzal/mojentic

{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/RECORD RENAMED Viewed

@@ -74,24 +74,26 @@ mojentic/agents/simple_recursive_agent.py,sha256=nNUzamDzBL7AU79mCb_NZsVQazAxSTn
 mojentic/agents/simple_recursive_agent_spec.py,sha256=rcIT2BWjT-sc2WevZ0ts9qi9Feh-ypNjeKYfULZmqo8,14945
 mojentic/context/__init__.py,sha256=RKDcfejikUZMDuFYIfJpmLnoXoRCOCfLjOTiicjq1Yo,80
 mojentic/context/shared_working_memory.py,sha256=Zt9MNGErEkDIUAaHvyhEOiTaEobI9l0MV4Z59lQFBr0,396
-mojentic/llm/__init__.py,sha256=pHWdS6XRdPKhEWv1YpXaD5B5mUPojWM9ncYB-bXI2Qo,484
-mojentic/llm/chat_session.py,sha256=SacT4WLjUuoRpG4puNDdTpinlfEIQI8sC3bs2loFOS8,4909
+mojentic/llm/__init__.py,sha256=1IHy5lWRv2lnO0-p6ucQwUssaAn_jU9195OIVN2RA3U,546
+mojentic/llm/chat_session.py,sha256=MBngV0CO78g5fL6rNTdAzX-UY_WqgHSXfbVD_vOAsAk,5446
 mojentic/llm/chat_session_spec.py,sha256=Qek3kFmRYFnuS8vSnrhQ1vnanuatrShCpqUV0ffRi-g,5492
-mojentic/llm/llm_broker.py,sha256=d59MvUBNgVAZbL4T6GUp-tMroTwwmcTJfyOzJSvejAw,16924
-mojentic/llm/llm_broker_spec.py,sha256=N0wSAIakWXn-4cxwG3dPR0MycZNTW-lQl9jWHlchC2w,8662
+mojentic/llm/completion_config.py,sha256=OMutlf44NjJ8jhFfJhZTtcJmrJQeVJyC2P0C_rP4GrM,2101
+mojentic/llm/completion_config_spec.py,sha256=fQFRw_w7c4BcR2Z8GV-o0gt5L9ayxHht94hfhjgL4Jo,1495
+mojentic/llm/llm_broker.py,sha256=Xr723X1ve8PSCwevq4CHsqgpwaFfNZGNDsw9v0mwCVE,21278
+mojentic/llm/llm_broker_spec.py,sha256=y29H615AItuxioMoPqhFLdYJELyBOyfOCq16siZXE8A,12634
 mojentic/llm/message_composers.py,sha256=8_5fA-J1I3BZ_0YlgZkQhsn_u7H8yMGEVNYHUPYW1X8,12142
 mojentic/llm/message_composers_spec.py,sha256=pR-npU5KL7lzYpAl0gWTJIP6obcnMxMpkEudoZs5-0M,12133
 mojentic/llm/gateways/__init__.py,sha256=y8zI9dGVhFkwDPSVU5NFFyaTTDWOkCfQYMzlFN72Ihg,786
-mojentic/llm/gateways/anthropic.py,sha256=DfaNgCrx33O4KfPrP5za_yKXlncIMexBnVKxOYk2Bew,1813
+mojentic/llm/gateways/anthropic.py,sha256=8TPoIasPC2ovEwAEmb_hkaIXbh4vNcZXTpJzB6ZOGlY,2413
 mojentic/llm/gateways/anthropic_messages_adapter.py,sha256=FtDJMyeDOIei0Ign83C_jpWG06c73VAo6pXxk0zI3nI,2991
 mojentic/llm/gateways/embeddings_gateway.py,sha256=kcOhiyHzOyQgKgwPDQJD5oVvfwk71GsBgMYJOIDv5NU,1347
 mojentic/llm/gateways/file_gateway.py,sha256=3bZpalSyl_R4016WzCmmjUBDtAgPsmx19eVGv6p1Ufk,1418
-mojentic/llm/gateways/llm_gateway.py,sha256=1J-FWKlFNxqd9_YP8Ul6J0cORYQoF_czgEfpAUaWFtQ,2677
-mojentic/llm/gateways/models.py,sha256=OyIaMHKrrx6dHo5FbC8qOFct7PRql9wqbe_BJlgDSDE,3015
-mojentic/llm/gateways/ollama.py,sha256=9DhNRC2sjBwnzyZplS3kd8-s3famxHUf4WfBycFH0GE,7737
+mojentic/llm/gateways/llm_gateway.py,sha256=PiwcrKWCqYXiHydSYqDiNJESU-cUTceB03fb26b1zY8,3071
+mojentic/llm/gateways/models.py,sha256=dxAoR6OIRPWvNsiy-31n90UV2k9fqz4upBJnhzmc4fc,3232
+mojentic/llm/gateways/ollama.py,sha256=w18SRSoRuZPKZlUujRq4fitl5tulYHHjKWDsky4ywgU,9393
 mojentic/llm/gateways/ollama_messages_adapter.py,sha256=kUN_p2FyN88_trXMcL-Xsn9xPBU7pGKlJwTUEUCf6G4,1404
 mojentic/llm/gateways/ollama_messages_adapter_spec.py,sha256=gVRbWDrHOa1EiZ0CkEWe0pGn-GKRqdGb-x56HBQeYSE,4981
-mojentic/llm/gateways/openai.py,sha256=S19AIooYoBZYELVPrSeOwKslpYc7jrhu-sLjDXUFF3w,23161
+mojentic/llm/gateways/openai.py,sha256=inm4ElawqnfUsHugCMboEQufvwyOxBFsdykaqAF_Ow8,25572
 mojentic/llm/gateways/openai_message_adapter_spec.py,sha256=3nObWsf6cPuWuCK_IhrQoRdQdz7gndqeSSvJIxtQkp8,6609
 mojentic/llm/gateways/openai_messages_adapter.py,sha256=Scal68JKKdBHB35ok1c5DeWYdD6Wra5oXSsPxJyyXSQ,3947
 mojentic/llm/gateways/openai_model_registry.py,sha256=2tIT_L8g4opEgLRvhpOy_w47W83Xp_slki2rl3xnteo,18585
@@ -143,8 +145,8 @@ mojentic/tracer/tracer_system.py,sha256=KPSVIfGVOjSx6Vj_SvrisqJXKT6ddwBc_UCMQC6D
 mojentic/tracer/tracer_system_spec.py,sha256=8hpQlmAWyjUvk7ihy339L0buQ-eH5rluaFvyMl-mSH4,8830
 mojentic/utils/__init__.py,sha256=WvNYbtVeliMZn2sMX53CrOQlQLJBXi4mJNoocG7s_kI,116
 mojentic/utils/formatting.py,sha256=YtXh0aYzLB9GKP8ZD6u1By1OBqPOXUtHirtq0GmHNag,948
-mojentic-1.1.1.dist-info/licenses/LICENSE.md,sha256=txSgV8n5zY1W3NiF5HHsCwlaW0e8We1cSC6TuJUqxXA,1060
-mojentic-1.1.1.dist-info/METADATA,sha256=eb6l0htqFLfD-plqA90rn9UrdN_bHSUVSGsoB7-ILuY,8775
-mojentic-1.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-mojentic-1.1.1.dist-info/top_level.txt,sha256=Q-BvPQ8Eu1jnEqK8Xkr6A9C8Xa1z38oPZRHuA5MCTqg,19
-mojentic-1.1.1.dist-info/RECORD,,
+mojentic-1.2.0.dist-info/licenses/LICENSE.md,sha256=txSgV8n5zY1W3NiF5HHsCwlaW0e8We1cSC6TuJUqxXA,1060
+mojentic-1.2.0.dist-info/METADATA,sha256=nJ-Gnzva7OptikQBHlko5KYKQ449H6UP_DiNQK0f488,8775
+mojentic-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+mojentic-1.2.0.dist-info/top_level.txt,sha256=Q-BvPQ8Eu1jnEqK8Xkr6A9C8Xa1z38oPZRHuA5MCTqg,19
+mojentic-1.2.0.dist-info/RECORD,,

{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{mojentic-1.1.1.dist-info → mojentic-1.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

mojentic 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

mojentic 1.1.1py3-none-any.whl → 1.2.0py3-none-any.whl