PyPI - mojentic - Versions diffs - 1.1.0__tar.gz → 1.2.0__tar.gz - Mend

mojentic 1.1.0tar.gz → 1.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

{mojentic-1.1.0 → mojentic-1.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mojentic
-Version: 1.1.0
+Version: 1.2.0
 Summary: Mojentic is an agentic framework that aims to provide a simple and flexible way to assemble teams of agents to solve complex problems.
 Author-email: Stacey Vetzal <stacey@vetzal.com>
 Project-URL: Homepage, https://github.com/svetzal/mojentic
@@ -22,6 +22,8 @@ Requires-Dist: parsedatetime>=2.6
 Requires-Dist: pytz>=2025.2
 Requires-Dist: serpapi>=0.1.5
 Requires-Dist: colorama>=0.4.6
+Requires-Dist: filelock>=3.20.1
+Requires-Dist: urllib3>=2.6.0
 Provides-Extra: dev
 Requires-Dist: pytest>=9.0.2; extra == "dev"
 Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"

{mojentic-1.1.0 → mojentic-1.2.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "mojentic"
-version = "1.1.0"
+version = "1.2.0"
 authors = [
     { name = "Stacey Vetzal", email = "stacey@vetzal.com" },
 ]
@@ -25,6 +25,8 @@ dependencies = [
     "pytz>=2025.2",
     "serpapi>=0.1.5",
     "colorama>=0.4.6",
+    "filelock>=3.20.1",
+    "urllib3>=2.6.0",
 ]
 [project.optional-dependencies]

{mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/__init__.py RENAMED Viewed

@@ -7,6 +7,7 @@ from .llm_broker import LLMBroker  # noqa: F401
 from .chat_session import ChatSession  # noqa: F401
 from .message_composers import MessageBuilder, FileTypeSensor  # noqa: F401
 from .registry.llm_registry import LLMRegistry  # noqa: F401
+from .completion_config import CompletionConfig  # noqa: F401
 # Re-export gateway components at the LLM level
 from .gateways.models import (  # noqa: F401

{mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/chat_session.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from typing import Iterator, List, Optional
 from mojentic.llm import LLMBroker
+from mojentic.llm.completion_config import CompletionConfig
 from mojentic.llm.gateways.models import LLMMessage, MessageRole
 from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
 from mojentic.llm.tools.llm_tool import LLMTool
@@ -23,6 +24,7 @@ class ChatSession:
                  tools: Optional[List[LLMTool]] = None,
                  max_context: int = 32768,
                  tokenizer_gateway: TokenizerGateway = None,
+                 config: Optional[CompletionConfig] = None,
                  temperature: float = 1.0):
         """
         Create an instance of the ChatSession.
@@ -39,15 +41,25 @@ class ChatSession:
             The maximum number of tokens to keep in the context. Defaults to 32768.
         tokenizer_gateway : TokenizerGateway, optional
             The gateway to use for tokenization. If None, `mxbai-embed-large` is used on a local Ollama server.
+        config : Optional[CompletionConfig], optional
+            Configuration object for LLM completion. If None, one is created from temperature and max_context.
         temperature : float, optional
-            The temperature to use for the response. Defaults to 1.0.
+            The temperature to use for the response. Defaults to 1.0. Deprecated: use config.
         """
         self.llm = llm
         self.system_prompt = system_prompt
         self.tools = tools
         self.max_context = max_context
-        self.temperature = temperature
+        # Use config if provided, otherwise build from individual kwargs
+        if config is not None:
+            self.config = config
+        else:
+            self.config = CompletionConfig(
+                temperature=temperature,
+                num_ctx=max_context
+            )
         if tokenizer_gateway is None:
             self.tokenizer_gateway = TokenizerGateway()
@@ -73,7 +85,7 @@ class ChatSession:
             The response from the LLM.
         """
         self.insert_message(LLMMessage(role=MessageRole.User, content=query))
-        response = self.llm.generate(self.messages, tools=self.tools, temperature=self.temperature)
+        response = self.llm.generate(self.messages, tools=self.tools, config=self.config)
         self._ensure_all_messages_are_sized()
         self.insert_message(LLMMessage(role=MessageRole.Assistant, content=response))
         return response
@@ -95,7 +107,7 @@ class ChatSession:
         """
         self.insert_message(LLMMessage(role=MessageRole.User, content=query))
         accumulated = []
-        for chunk in self.llm.generate_stream(self.messages, tools=self.tools, temperature=self.temperature):
+        for chunk in self.llm.generate_stream(self.messages, tools=self.tools, config=self.config):
             accumulated.append(chunk)
             yield chunk
         self._ensure_all_messages_are_sized()

mojentic-1.2.0/src/mojentic/llm/completion_config.py ADDED Viewed

@@ -0,0 +1,58 @@
+from typing import Optional, Literal
+from pydantic import BaseModel, Field
+class CompletionConfig(BaseModel):
+    """
+    Configuration object for LLM completion requests.
+    This model provides a unified way to configure LLM behavior across different
+    providers and models. It replaces loose kwargs with a structured configuration
+    object.
+    Attributes
+    ----------
+    temperature : float
+        Controls randomness in the output. Higher values (e.g., 1.5) make output
+        more random, while lower values (e.g., 0.1) make it more deterministic.
+        Defaults to 1.0.
+    num_ctx : int
+        The number of context tokens to use. This sets the context window size.
+        Defaults to 32768.
+    max_tokens : int
+        The maximum number of tokens to generate in the response.
+        Defaults to 16384.
+    num_predict : int
+        The number of tokens to predict. A value of -1 means no limit.
+        Defaults to -1.
+    reasoning_effort : Optional[Literal["low", "medium", "high"]]
+        Controls the reasoning effort level for models that support extended thinking.
+        - "low": Quick, minimal reasoning
+        - "medium": Balanced reasoning effort
+        - "high": Deep, thorough reasoning
+        Provider-specific behavior:
+        - Ollama: Maps to `think: true` parameter for all levels
+        - OpenAI: Maps to `reasoning_effort` API parameter for reasoning models
+        Defaults to None (no extended reasoning).
+    """
+    temperature: float = Field(
+        default=1.0,
+        description="Temperature for sampling (higher = more random)"
+    )
+    num_ctx: int = Field(
+        default=32768,
+        description="Number of context tokens"
+    )
+    max_tokens: int = Field(
+        default=16384,
+        description="Maximum tokens to generate"
+    )
+    num_predict: int = Field(
+        default=-1,
+        description="Number of tokens to predict (-1 = no limit)"
+    )
+    reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
+        default=None,
+        description="Reasoning effort level for extended thinking"
+    )

mojentic-1.2.0/src/mojentic/llm/completion_config_spec.py ADDED Viewed

@@ -0,0 +1,44 @@
+import pytest
+from pydantic import ValidationError
+from mojentic.llm.completion_config import CompletionConfig
+class DescribeCompletionConfig:
+    def should_use_default_values(self):
+        config = CompletionConfig()
+        assert config.temperature == 1.0
+        assert config.num_ctx == 32768
+        assert config.max_tokens == 16384
+        assert config.num_predict == -1
+        assert config.reasoning_effort is None
+    def should_accept_custom_values(self):
+        config = CompletionConfig(
+            temperature=0.5,
+            num_ctx=16384,
+            max_tokens=8192,
+            num_predict=100,
+            reasoning_effort="high"
+        )
+        assert config.temperature == 0.5
+        assert config.num_ctx == 16384
+        assert config.max_tokens == 8192
+        assert config.num_predict == 100
+        assert config.reasoning_effort == "high"
+    def should_accept_valid_reasoning_effort_levels(self):
+        for level in ["low", "medium", "high"]:
+            config = CompletionConfig(reasoning_effort=level)
+            assert config.reasoning_effort == level
+    def should_reject_invalid_reasoning_effort_levels(self):
+        with pytest.raises(ValidationError) as exc_info:
+            CompletionConfig(reasoning_effort="invalid")
+        assert "reasoning_effort" in str(exc_info.value)
+    def should_accept_none_reasoning_effort(self):
+        config = CompletionConfig(reasoning_effort=None)
+        assert config.reasoning_effort is None

{mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/anthropic.py RENAMED Viewed

@@ -17,6 +17,19 @@ class AnthropicGateway(LLMGateway):
     def complete(self, **args) -> LLMGatewayResponse:
         messages = args.get('messages')
+        config = args.get('config', None)
+        # Extract temperature and max_tokens from config if provided
+        if config:
+            temperature = config.temperature
+            max_tokens = config.max_tokens
+            # Note: reasoning_effort not supported by Anthropic yet
+            if config.reasoning_effort is not None:
+                logger.warning("Anthropic gateway does not yet support reasoning_effort parameter",
+                               reasoning_effort=config.reasoning_effort)
+        else:
+            temperature = args.get('temperature', 1.0)
+            max_tokens = args.get('max_tokens', args.get('num_predict', 2000))
         system_messages = [m for m in messages if m.role == MessageRole.System]
         user_messages = [m for m in messages if m.role == MessageRole.User]
@@ -29,8 +42,8 @@ class AnthropicGateway(LLMGateway):
         response = self.client.messages.create(
             **anthropic_args,
-            temperature=args.get('temperature', 1.0),
-            max_tokens=args.get('max_tokens', args.get('num_predict', 2000)),
+            temperature=temperature,
+            max_tokens=max_tokens,
             # thinking={
             #     "type": "enabled",
             #     "budget_tokens": 32768,

{mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/llm_gateway.py RENAMED Viewed

@@ -1,10 +1,13 @@
-from typing import List, Optional, Type
+from typing import List, Optional, Type, TYPE_CHECKING
 from pydantic import BaseModel
 from mojentic.llm.gateways.models import LLMGatewayResponse, LLMMessage
 from mojentic.llm.tools.llm_tool import LLMTool
+if TYPE_CHECKING:
+    from mojentic.llm.completion_config import CompletionConfig
 class LLMGateway:
     """
@@ -18,6 +21,7 @@ class LLMGateway:
                  messages: List[LLMMessage],
                  object_model: Optional[Type[BaseModel]] = None,
                  tools: Optional[List[LLMTool]] = None,
+                 config: Optional['CompletionConfig'] = None,
                  temperature: float = 1.0,
                  num_ctx: int = 32768, max_tokens: int = 16384,
                  num_predict: int = -1) -> LLMGatewayResponse:
@@ -35,14 +39,16 @@ class LLMGateway:
         tools : Optional[List[LLMTool]]
             A list of tools to use with the LLM. If a tool call is requested, the tool will be called and the output
             will be included in the response.
+        config : Optional[CompletionConfig]
+            Configuration object for LLM completion (recommended over individual kwargs).
         temperature : float
-            The temperature to use for the response. Defaults to 1.0.
+            The temperature to use for the response. Defaults to 1.0. (Deprecated: use config)
         num_ctx : int
-            The number of context tokens to use. Defaults to 32768.
+            The number of context tokens to use. Defaults to 32768. (Deprecated: use config)
         max_tokens : int
-            The maximum number of tokens to generate. Defaults to 16384.
+            The maximum number of tokens to generate. Defaults to 16384. (Deprecated: use config)
         num_predict : int
-            The number of tokens to predict. Defaults to no limit.
+            The number of tokens to predict. Defaults to no limit. (Deprecated: use config)
         Returns
         -------

{mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/models.py RENAMED Viewed

@@ -97,8 +97,11 @@ class LLMGatewayResponse(BaseModel):
         Parsed response object.
     tool_calls : List[LLMToolCall]
         List of requested tool calls from the LLM.
+    thinking : Optional[str]
+        Model thinking/reasoning trace (populated by some providers).
     """
     content: Optional[Union[str, dict[str, str]]] = Field(None, description="The content of the response.")
     object: Optional[BaseModel] = Field(None, description="Parsed response object")
     tool_calls: List[LLMToolCall] = Field(default_factory=list,
                                           description="List of requested tool calls from the LLM.")
+    thinking: Optional[str] = Field(None, description="Model thinking/reasoning trace (populated by some providers)")

{mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/ollama.py RENAMED Viewed

@@ -20,9 +20,12 @@ class StreamingResponse(BaseModel):
         Text content chunk from the LLM response.
     tool_calls : Optional[List]
         Tool calls from the LLM response (raw ollama format).
+    thinking : Optional[str]
+        Thinking/reasoning trace from the LLM response.
     """
     content: Optional[str] = None
     tool_calls: Optional[List] = None
+    thinking: Optional[str] = None
 class OllamaGateway(LLMGateway):
@@ -41,14 +44,26 @@ class OllamaGateway(LLMGateway):
         self.client = Client(host=host, headers=headers, timeout=timeout)
     def _extract_options_from_args(self, args):
-        options = Options(
-            temperature=args.get('temperature', 1.0),
-            num_ctx=args.get('num_ctx', 32768),
-        )
-        if args.get('num_predict', 0) > 0:
-            options.num_predict = args['num_predict']
-        if 'max_tokens' in args:
-            options.num_predict = args['max_tokens']
+        # Extract config if present, otherwise use individual kwargs
+        config = args.get('config', None)
+        if config:
+            options = Options(
+                temperature=config.temperature,
+                num_ctx=config.num_ctx,
+            )
+            if config.num_predict > 0:
+                options.num_predict = config.num_predict
+            if config.max_tokens:
+                options.num_predict = config.max_tokens
+        else:
+            options = Options(
+                temperature=args.get('temperature', 1.0),
+                num_ctx=args.get('num_ctx', 32768),
+            )
+            if args.get('num_predict', 0) > 0:
+                options.num_predict = args['num_predict']
+            if 'max_tokens' in args:
+                options.num_predict = args['max_tokens']
         return options
     def complete(self, **args) -> LLMGatewayResponse:
@@ -90,6 +105,12 @@ class OllamaGateway(LLMGateway):
             'options': options
         }
+        # Handle reasoning effort - if config has reasoning_effort set, enable thinking
+        config = args.get('config', None)
+        if config and config.reasoning_effort is not None:
+            ollama_args['think'] = True
+            logger.info("Enabling extended thinking for Ollama", reasoning_effort=config.reasoning_effort)
         if 'object_model' in args and args['object_model'] is not None:
             ollama_args['format'] = args['object_model'].model_json_schema()
@@ -113,10 +134,14 @@ class OllamaGateway(LLMGateway):
                                       arguments={str(k): str(t.function.arguments[k]) for k in t.function.arguments})
                           for t in response.message.tool_calls]
+        # Extract thinking content if present
+        thinking = getattr(response.message, 'thinking', None)
         return LLMGatewayResponse(
             content=response.message.content,
             object=object,
             tool_calls=tool_calls,
+            thinking=thinking
         )
     def complete_stream(self, **args) -> Iterator[StreamingResponse]:
@@ -156,6 +181,12 @@ class OllamaGateway(LLMGateway):
             'stream': True
         }
+        # Handle reasoning effort - if config has reasoning_effort set, enable thinking
+        config = args.get('config', None)
+        if config and config.reasoning_effort is not None:
+            ollama_args['think'] = True
+            logger.info("Enabling extended thinking for Ollama streaming", reasoning_effort=config.reasoning_effort)
         # Enable tool support if tools are provided
         if 'tools' in args and args['tools'] is not None:
             ollama_args['tools'] = [t.descriptor for t in args['tools']]
@@ -168,6 +199,10 @@ class OllamaGateway(LLMGateway):
                 if chunk.message.content:
                     yield StreamingResponse(content=chunk.message.content)
+                # Yield thinking chunks when they arrive
+                if hasattr(chunk.message, 'thinking') and chunk.message.thinking:
+                    yield StreamingResponse(thinking=chunk.message.thinking)
                 # Yield tool calls when they arrive
                 if chunk.message.tool_calls:
                     yield StreamingResponse(tool_calls=chunk.message.tool_calls)

{mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/openai.py RENAMED Viewed

@@ -121,6 +121,20 @@ class OpenAIGateway(LLMGateway):
                     supported_temperatures=capabilities.supported_temperatures)
                 adapted_args['temperature'] = default_temp
+        # Handle reasoning_effort for reasoning models
+        if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
+            if capabilities.model_type == ModelType.REASONING:
+                # Keep reasoning_effort for reasoning models
+                logger.info("Adding reasoning_effort parameter for reasoning model",
+                            model=model,
+                            reasoning_effort=adapted_args['reasoning_effort'])
+            else:
+                # Warn and remove for non-reasoning models
+                logger.warning("Model does not support reasoning_effort, ignoring parameter",
+                               model=model,
+                               requested_reasoning_effort=adapted_args['reasoning_effort'])
+                adapted_args.pop('reasoning_effort', None)
         return adapted_args
     def _validate_model_parameters(self, model: str, args: dict) -> None:
@@ -189,10 +203,21 @@ class OpenAIGateway(LLMGateway):
         messages = kwargs.get('messages')
         object_model = kwargs.get('object_model', None)
         tools = kwargs.get('tools', None)
-        temperature = kwargs.get('temperature', 1.0)
-        num_ctx = kwargs.get('num_ctx', 32768)
-        max_tokens = kwargs.get('max_tokens', 16384)
-        num_predict = kwargs.get('num_predict', -1)
+        config = kwargs.get('config', None)
+        # Use config if provided, otherwise use individual kwargs
+        if config:
+            temperature = config.temperature
+            num_ctx = config.num_ctx
+            max_tokens = config.max_tokens
+            num_predict = config.num_predict
+            reasoning_effort = config.reasoning_effort
+        else:
+            temperature = kwargs.get('temperature', 1.0)
+            num_ctx = kwargs.get('num_ctx', 32768)
+            max_tokens = kwargs.get('max_tokens', 16384)
+            num_predict = kwargs.get('num_predict', -1)
+            reasoning_effort = None
         if not model:
             raise ValueError("'model' parameter is required")
@@ -208,7 +233,8 @@ class OpenAIGateway(LLMGateway):
             'temperature': temperature,
             'num_ctx': num_ctx,
             'max_tokens': max_tokens,
-            'num_predict': num_predict
+            'num_predict': num_predict,
+            'reasoning_effort': reasoning_effort
         }
         # Adapt parameters based on model type
@@ -247,10 +273,15 @@ class OpenAIGateway(LLMGateway):
         elif 'max_completion_tokens' in adapted_args:
             openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
+        # Add reasoning_effort if present in adapted args
+        if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
+            openai_args['reasoning_effort'] = adapted_args['reasoning_effort']
         logger.debug("Making OpenAI API call",
                      model=openai_args['model'],
                      has_tools='tools' in openai_args,
                      has_object_model='response_format' in openai_args,
+                     has_reasoning_effort='reasoning_effort' in openai_args,
                      token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
         try:
@@ -339,10 +370,21 @@ class OpenAIGateway(LLMGateway):
         messages = kwargs.get('messages')
         object_model = kwargs.get('object_model', None)
         tools = kwargs.get('tools', None)
-        temperature = kwargs.get('temperature', 1.0)
-        num_ctx = kwargs.get('num_ctx', 32768)
-        max_tokens = kwargs.get('max_tokens', 16384)
-        num_predict = kwargs.get('num_predict', -1)
+        config = kwargs.get('config', None)
+        # Use config if provided, otherwise use individual kwargs
+        if config:
+            temperature = config.temperature
+            num_ctx = config.num_ctx
+            max_tokens = config.max_tokens
+            num_predict = config.num_predict
+            reasoning_effort = config.reasoning_effort
+        else:
+            temperature = kwargs.get('temperature', 1.0)
+            num_ctx = kwargs.get('num_ctx', 32768)
+            max_tokens = kwargs.get('max_tokens', 16384)
+            num_predict = kwargs.get('num_predict', -1)
+            reasoning_effort = None
         if not model:
             raise ValueError("'model' parameter is required")
@@ -358,7 +400,8 @@ class OpenAIGateway(LLMGateway):
             'temperature': temperature,
             'num_ctx': num_ctx,
             'max_tokens': max_tokens,
-            'num_predict': num_predict
+            'num_predict': num_predict,
+            'reasoning_effort': reasoning_effort
         }
         # Adapt parameters based on model type
@@ -401,9 +444,14 @@ class OpenAIGateway(LLMGateway):
         elif 'max_completion_tokens' in adapted_args:
             openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
+        # Add reasoning_effort if present in adapted args
+        if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
+            openai_args['reasoning_effort'] = adapted_args['reasoning_effort']
         logger.debug("Making OpenAI streaming API call",
                      model=openai_args['model'],
                      has_tools='tools' in openai_args,
+                     has_reasoning_effort='reasoning_effort' in openai_args,
                      token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
         try:

mojentic 1.1.0__tar.gz → 1.2.0__tar.gz

mojentic 1.1.0tar.gz → 1.2.0tar.gz