mojentic 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mojentic-1.1.0 → mojentic-1.2.0}/PKG-INFO +3 -1
- {mojentic-1.1.0 → mojentic-1.2.0}/pyproject.toml +3 -1
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/__init__.py +1 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/chat_session.py +16 -4
- mojentic-1.2.0/src/mojentic/llm/completion_config.py +58 -0
- mojentic-1.2.0/src/mojentic/llm/completion_config_spec.py +44 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/anthropic.py +15 -2
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/llm_gateway.py +11 -5
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/models.py +3 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/ollama.py +43 -8
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/openai.py +58 -10
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/llm_broker.py +115 -41
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/llm_broker_spec.py +87 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic.egg-info/PKG-INFO +3 -1
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic.egg-info/SOURCES.txt +2 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic.egg-info/requires.txt +2 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/LICENSE.md +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/README.md +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/setup.cfg +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/async_dispatcher_example.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/async_llm_example.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/audit_openai_capabilities.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/broker_as_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/broker_examples.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/broker_image_examples.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/characterize_ollama.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/characterize_openai.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/chat_session.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/chat_session_with_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/coding_file_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/current_datetime_tool_example.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/design_analysis.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/embeddings.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/ensures_files_exist.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/ephemeral_task_manager_example.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/fetch_openai_models.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/file_deduplication.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/file_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/image_analysis.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/image_broker.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/image_broker_splat.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/iterative_solver.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/list_models.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/model_characterization.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/openai_gateway_enhanced_demo.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/oversized_embeddings.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/raw.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/agents/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/agents/decisioning_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/agents/summarization_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/agents/thinking_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/agents/tool_call_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/formatters.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/models/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/models/base.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react/models/events.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/react.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/recursive_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/routed_send_response.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/simple_llm.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/simple_llm_repl.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/simple_structured.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/simple_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/solver_chat_session.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/streaming.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/tell_user_example.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/tracer_demo.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/tracer_qt_viewer.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/_examples/working_memory.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/agent_event_adapter.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/async_aggregator_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/async_aggregator_agent_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/async_llm_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/async_llm_agent_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/base_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/base_async_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/base_llm_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/base_llm_agent_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/correlation_aggregator_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/iterative_problem_solver.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/output_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/simple_recursive_agent.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/agents/simple_recursive_agent_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/async_dispatcher.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/async_dispatcher_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/context/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/context/shared_working_memory.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/dispatcher.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/event.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/chat_session_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/anthropic_messages_adapter.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/embeddings_gateway.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/file_gateway.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/ollama_messages_adapter.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/ollama_messages_adapter_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/openai_message_adapter_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/openai_messages_adapter.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/openai_model_registry.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/openai_model_registry_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/openai_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/openai_temperature_handling_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/gateways/tokenizer_gateway.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/message_composers.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/message_composers_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/registry/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/registry/llm_registry.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/registry/models.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/registry/populate_registry_from_ollama.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ask_user_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/current_datetime.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/date_resolver.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/date_resolver_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/append_task_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/append_task_tool_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/clear_tasks_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/clear_tasks_tool_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/complete_task_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/complete_task_tool_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/ephemeral_task_list.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/ephemeral_task_list_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/insert_task_after_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/insert_task_after_tool_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/list_tasks_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/list_tasks_tool_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/prepend_task_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/prepend_task_tool_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/start_task_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/ephemeral_task_manager/start_task_tool_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/file_manager.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/file_manager_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/llm_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/llm_tool_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/organic_web_search.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/tell_user_tool.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/tool_wrapper.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/llm/tools/tool_wrapper_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/router.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/router_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/tracer/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/tracer/event_store.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/tracer/event_store_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/tracer/null_tracer.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/tracer/tracer_events.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/tracer/tracer_events_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/tracer/tracer_system.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/tracer/tracer_system_spec.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/utils/__init__.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic/utils/formatting.py +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic.egg-info/dependency_links.txt +0 -0
- {mojentic-1.1.0 → mojentic-1.2.0}/src/mojentic.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mojentic
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Mojentic is an agentic framework that aims to provide a simple and flexible way to assemble teams of agents to solve complex problems.
|
|
5
5
|
Author-email: Stacey Vetzal <stacey@vetzal.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/svetzal/mojentic
|
|
@@ -22,6 +22,8 @@ Requires-Dist: parsedatetime>=2.6
|
|
|
22
22
|
Requires-Dist: pytz>=2025.2
|
|
23
23
|
Requires-Dist: serpapi>=0.1.5
|
|
24
24
|
Requires-Dist: colorama>=0.4.6
|
|
25
|
+
Requires-Dist: filelock>=3.20.1
|
|
26
|
+
Requires-Dist: urllib3>=2.6.0
|
|
25
27
|
Provides-Extra: dev
|
|
26
28
|
Requires-Dist: pytest>=9.0.2; extra == "dev"
|
|
27
29
|
Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "mojentic"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.2.0"
|
|
4
4
|
authors = [
|
|
5
5
|
{ name = "Stacey Vetzal", email = "stacey@vetzal.com" },
|
|
6
6
|
]
|
|
@@ -25,6 +25,8 @@ dependencies = [
|
|
|
25
25
|
"pytz>=2025.2",
|
|
26
26
|
"serpapi>=0.1.5",
|
|
27
27
|
"colorama>=0.4.6",
|
|
28
|
+
"filelock>=3.20.1",
|
|
29
|
+
"urllib3>=2.6.0",
|
|
28
30
|
]
|
|
29
31
|
|
|
30
32
|
[project.optional-dependencies]
|
|
@@ -7,6 +7,7 @@ from .llm_broker import LLMBroker # noqa: F401
|
|
|
7
7
|
from .chat_session import ChatSession # noqa: F401
|
|
8
8
|
from .message_composers import MessageBuilder, FileTypeSensor # noqa: F401
|
|
9
9
|
from .registry.llm_registry import LLMRegistry # noqa: F401
|
|
10
|
+
from .completion_config import CompletionConfig # noqa: F401
|
|
10
11
|
|
|
11
12
|
# Re-export gateway components at the LLM level
|
|
12
13
|
from .gateways.models import ( # noqa: F401
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Iterator, List, Optional
|
|
2
2
|
|
|
3
3
|
from mojentic.llm import LLMBroker
|
|
4
|
+
from mojentic.llm.completion_config import CompletionConfig
|
|
4
5
|
from mojentic.llm.gateways.models import LLMMessage, MessageRole
|
|
5
6
|
from mojentic.llm.gateways.tokenizer_gateway import TokenizerGateway
|
|
6
7
|
from mojentic.llm.tools.llm_tool import LLMTool
|
|
@@ -23,6 +24,7 @@ class ChatSession:
|
|
|
23
24
|
tools: Optional[List[LLMTool]] = None,
|
|
24
25
|
max_context: int = 32768,
|
|
25
26
|
tokenizer_gateway: TokenizerGateway = None,
|
|
27
|
+
config: Optional[CompletionConfig] = None,
|
|
26
28
|
temperature: float = 1.0):
|
|
27
29
|
"""
|
|
28
30
|
Create an instance of the ChatSession.
|
|
@@ -39,15 +41,25 @@ class ChatSession:
|
|
|
39
41
|
The maximum number of tokens to keep in the context. Defaults to 32768.
|
|
40
42
|
tokenizer_gateway : TokenizerGateway, optional
|
|
41
43
|
The gateway to use for tokenization. If None, `mxbai-embed-large` is used on a local Ollama server.
|
|
44
|
+
config : Optional[CompletionConfig], optional
|
|
45
|
+
Configuration object for LLM completion. If None, one is created from temperature and max_context.
|
|
42
46
|
temperature : float, optional
|
|
43
|
-
The temperature to use for the response. Defaults to 1.0.
|
|
47
|
+
The temperature to use for the response. Defaults to 1.0. Deprecated: use config.
|
|
44
48
|
"""
|
|
45
49
|
|
|
46
50
|
self.llm = llm
|
|
47
51
|
self.system_prompt = system_prompt
|
|
48
52
|
self.tools = tools
|
|
49
53
|
self.max_context = max_context
|
|
50
|
-
|
|
54
|
+
|
|
55
|
+
# Use config if provided, otherwise build from individual kwargs
|
|
56
|
+
if config is not None:
|
|
57
|
+
self.config = config
|
|
58
|
+
else:
|
|
59
|
+
self.config = CompletionConfig(
|
|
60
|
+
temperature=temperature,
|
|
61
|
+
num_ctx=max_context
|
|
62
|
+
)
|
|
51
63
|
|
|
52
64
|
if tokenizer_gateway is None:
|
|
53
65
|
self.tokenizer_gateway = TokenizerGateway()
|
|
@@ -73,7 +85,7 @@ class ChatSession:
|
|
|
73
85
|
The response from the LLM.
|
|
74
86
|
"""
|
|
75
87
|
self.insert_message(LLMMessage(role=MessageRole.User, content=query))
|
|
76
|
-
response = self.llm.generate(self.messages, tools=self.tools,
|
|
88
|
+
response = self.llm.generate(self.messages, tools=self.tools, config=self.config)
|
|
77
89
|
self._ensure_all_messages_are_sized()
|
|
78
90
|
self.insert_message(LLMMessage(role=MessageRole.Assistant, content=response))
|
|
79
91
|
return response
|
|
@@ -95,7 +107,7 @@ class ChatSession:
|
|
|
95
107
|
"""
|
|
96
108
|
self.insert_message(LLMMessage(role=MessageRole.User, content=query))
|
|
97
109
|
accumulated = []
|
|
98
|
-
for chunk in self.llm.generate_stream(self.messages, tools=self.tools,
|
|
110
|
+
for chunk in self.llm.generate_stream(self.messages, tools=self.tools, config=self.config):
|
|
99
111
|
accumulated.append(chunk)
|
|
100
112
|
yield chunk
|
|
101
113
|
self._ensure_all_messages_are_sized()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from typing import Optional, Literal
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class CompletionConfig(BaseModel):
|
|
6
|
+
"""
|
|
7
|
+
Configuration object for LLM completion requests.
|
|
8
|
+
|
|
9
|
+
This model provides a unified way to configure LLM behavior across different
|
|
10
|
+
providers and models. It replaces loose kwargs with a structured configuration
|
|
11
|
+
object.
|
|
12
|
+
|
|
13
|
+
Attributes
|
|
14
|
+
----------
|
|
15
|
+
temperature : float
|
|
16
|
+
Controls randomness in the output. Higher values (e.g., 1.5) make output
|
|
17
|
+
more random, while lower values (e.g., 0.1) make it more deterministic.
|
|
18
|
+
Defaults to 1.0.
|
|
19
|
+
num_ctx : int
|
|
20
|
+
The number of context tokens to use. This sets the context window size.
|
|
21
|
+
Defaults to 32768.
|
|
22
|
+
max_tokens : int
|
|
23
|
+
The maximum number of tokens to generate in the response.
|
|
24
|
+
Defaults to 16384.
|
|
25
|
+
num_predict : int
|
|
26
|
+
The number of tokens to predict. A value of -1 means no limit.
|
|
27
|
+
Defaults to -1.
|
|
28
|
+
reasoning_effort : Optional[Literal["low", "medium", "high"]]
|
|
29
|
+
Controls the reasoning effort level for models that support extended thinking.
|
|
30
|
+
- "low": Quick, minimal reasoning
|
|
31
|
+
- "medium": Balanced reasoning effort
|
|
32
|
+
- "high": Deep, thorough reasoning
|
|
33
|
+
Provider-specific behavior:
|
|
34
|
+
- Ollama: Maps to `think: true` parameter for all levels
|
|
35
|
+
- OpenAI: Maps to `reasoning_effort` API parameter for reasoning models
|
|
36
|
+
Defaults to None (no extended reasoning).
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
temperature: float = Field(
|
|
40
|
+
default=1.0,
|
|
41
|
+
description="Temperature for sampling (higher = more random)"
|
|
42
|
+
)
|
|
43
|
+
num_ctx: int = Field(
|
|
44
|
+
default=32768,
|
|
45
|
+
description="Number of context tokens"
|
|
46
|
+
)
|
|
47
|
+
max_tokens: int = Field(
|
|
48
|
+
default=16384,
|
|
49
|
+
description="Maximum tokens to generate"
|
|
50
|
+
)
|
|
51
|
+
num_predict: int = Field(
|
|
52
|
+
default=-1,
|
|
53
|
+
description="Number of tokens to predict (-1 = no limit)"
|
|
54
|
+
)
|
|
55
|
+
reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
|
|
56
|
+
default=None,
|
|
57
|
+
description="Reasoning effort level for extended thinking"
|
|
58
|
+
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from pydantic import ValidationError
|
|
3
|
+
|
|
4
|
+
from mojentic.llm.completion_config import CompletionConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DescribeCompletionConfig:
|
|
8
|
+
|
|
9
|
+
def should_use_default_values(self):
|
|
10
|
+
config = CompletionConfig()
|
|
11
|
+
assert config.temperature == 1.0
|
|
12
|
+
assert config.num_ctx == 32768
|
|
13
|
+
assert config.max_tokens == 16384
|
|
14
|
+
assert config.num_predict == -1
|
|
15
|
+
assert config.reasoning_effort is None
|
|
16
|
+
|
|
17
|
+
def should_accept_custom_values(self):
|
|
18
|
+
config = CompletionConfig(
|
|
19
|
+
temperature=0.5,
|
|
20
|
+
num_ctx=16384,
|
|
21
|
+
max_tokens=8192,
|
|
22
|
+
num_predict=100,
|
|
23
|
+
reasoning_effort="high"
|
|
24
|
+
)
|
|
25
|
+
assert config.temperature == 0.5
|
|
26
|
+
assert config.num_ctx == 16384
|
|
27
|
+
assert config.max_tokens == 8192
|
|
28
|
+
assert config.num_predict == 100
|
|
29
|
+
assert config.reasoning_effort == "high"
|
|
30
|
+
|
|
31
|
+
def should_accept_valid_reasoning_effort_levels(self):
|
|
32
|
+
for level in ["low", "medium", "high"]:
|
|
33
|
+
config = CompletionConfig(reasoning_effort=level)
|
|
34
|
+
assert config.reasoning_effort == level
|
|
35
|
+
|
|
36
|
+
def should_reject_invalid_reasoning_effort_levels(self):
|
|
37
|
+
with pytest.raises(ValidationError) as exc_info:
|
|
38
|
+
CompletionConfig(reasoning_effort="invalid")
|
|
39
|
+
|
|
40
|
+
assert "reasoning_effort" in str(exc_info.value)
|
|
41
|
+
|
|
42
|
+
def should_accept_none_reasoning_effort(self):
|
|
43
|
+
config = CompletionConfig(reasoning_effort=None)
|
|
44
|
+
assert config.reasoning_effort is None
|
|
@@ -17,6 +17,19 @@ class AnthropicGateway(LLMGateway):
|
|
|
17
17
|
def complete(self, **args) -> LLMGatewayResponse:
|
|
18
18
|
|
|
19
19
|
messages = args.get('messages')
|
|
20
|
+
config = args.get('config', None)
|
|
21
|
+
|
|
22
|
+
# Extract temperature and max_tokens from config if provided
|
|
23
|
+
if config:
|
|
24
|
+
temperature = config.temperature
|
|
25
|
+
max_tokens = config.max_tokens
|
|
26
|
+
# Note: reasoning_effort not supported by Anthropic yet
|
|
27
|
+
if config.reasoning_effort is not None:
|
|
28
|
+
logger.warning("Anthropic gateway does not yet support reasoning_effort parameter",
|
|
29
|
+
reasoning_effort=config.reasoning_effort)
|
|
30
|
+
else:
|
|
31
|
+
temperature = args.get('temperature', 1.0)
|
|
32
|
+
max_tokens = args.get('max_tokens', args.get('num_predict', 2000))
|
|
20
33
|
|
|
21
34
|
system_messages = [m for m in messages if m.role == MessageRole.System]
|
|
22
35
|
user_messages = [m for m in messages if m.role == MessageRole.User]
|
|
@@ -29,8 +42,8 @@ class AnthropicGateway(LLMGateway):
|
|
|
29
42
|
|
|
30
43
|
response = self.client.messages.create(
|
|
31
44
|
**anthropic_args,
|
|
32
|
-
temperature=
|
|
33
|
-
max_tokens=
|
|
45
|
+
temperature=temperature,
|
|
46
|
+
max_tokens=max_tokens,
|
|
34
47
|
# thinking={
|
|
35
48
|
# "type": "enabled",
|
|
36
49
|
# "budget_tokens": 32768,
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
from typing import List, Optional, Type
|
|
1
|
+
from typing import List, Optional, Type, TYPE_CHECKING
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
5
|
from mojentic.llm.gateways.models import LLMGatewayResponse, LLMMessage
|
|
6
6
|
from mojentic.llm.tools.llm_tool import LLMTool
|
|
7
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from mojentic.llm.completion_config import CompletionConfig
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class LLMGateway:
|
|
10
13
|
"""
|
|
@@ -18,6 +21,7 @@ class LLMGateway:
|
|
|
18
21
|
messages: List[LLMMessage],
|
|
19
22
|
object_model: Optional[Type[BaseModel]] = None,
|
|
20
23
|
tools: Optional[List[LLMTool]] = None,
|
|
24
|
+
config: Optional['CompletionConfig'] = None,
|
|
21
25
|
temperature: float = 1.0,
|
|
22
26
|
num_ctx: int = 32768, max_tokens: int = 16384,
|
|
23
27
|
num_predict: int = -1) -> LLMGatewayResponse:
|
|
@@ -35,14 +39,16 @@ class LLMGateway:
|
|
|
35
39
|
tools : Optional[List[LLMTool]]
|
|
36
40
|
A list of tools to use with the LLM. If a tool call is requested, the tool will be called and the output
|
|
37
41
|
will be included in the response.
|
|
42
|
+
config : Optional[CompletionConfig]
|
|
43
|
+
Configuration object for LLM completion (recommended over individual kwargs).
|
|
38
44
|
temperature : float
|
|
39
|
-
The temperature to use for the response. Defaults to 1.0.
|
|
45
|
+
The temperature to use for the response. Defaults to 1.0. (Deprecated: use config)
|
|
40
46
|
num_ctx : int
|
|
41
|
-
The number of context tokens to use. Defaults to 32768.
|
|
47
|
+
The number of context tokens to use. Defaults to 32768. (Deprecated: use config)
|
|
42
48
|
max_tokens : int
|
|
43
|
-
The maximum number of tokens to generate. Defaults to 16384.
|
|
49
|
+
The maximum number of tokens to generate. Defaults to 16384. (Deprecated: use config)
|
|
44
50
|
num_predict : int
|
|
45
|
-
The number of tokens to predict. Defaults to no limit.
|
|
51
|
+
The number of tokens to predict. Defaults to no limit. (Deprecated: use config)
|
|
46
52
|
|
|
47
53
|
Returns
|
|
48
54
|
-------
|
|
@@ -97,8 +97,11 @@ class LLMGatewayResponse(BaseModel):
|
|
|
97
97
|
Parsed response object.
|
|
98
98
|
tool_calls : List[LLMToolCall]
|
|
99
99
|
List of requested tool calls from the LLM.
|
|
100
|
+
thinking : Optional[str]
|
|
101
|
+
Model thinking/reasoning trace (populated by some providers).
|
|
100
102
|
"""
|
|
101
103
|
content: Optional[Union[str, dict[str, str]]] = Field(None, description="The content of the response.")
|
|
102
104
|
object: Optional[BaseModel] = Field(None, description="Parsed response object")
|
|
103
105
|
tool_calls: List[LLMToolCall] = Field(default_factory=list,
|
|
104
106
|
description="List of requested tool calls from the LLM.")
|
|
107
|
+
thinking: Optional[str] = Field(None, description="Model thinking/reasoning trace (populated by some providers)")
|
|
@@ -20,9 +20,12 @@ class StreamingResponse(BaseModel):
|
|
|
20
20
|
Text content chunk from the LLM response.
|
|
21
21
|
tool_calls : Optional[List]
|
|
22
22
|
Tool calls from the LLM response (raw ollama format).
|
|
23
|
+
thinking : Optional[str]
|
|
24
|
+
Thinking/reasoning trace from the LLM response.
|
|
23
25
|
"""
|
|
24
26
|
content: Optional[str] = None
|
|
25
27
|
tool_calls: Optional[List] = None
|
|
28
|
+
thinking: Optional[str] = None
|
|
26
29
|
|
|
27
30
|
|
|
28
31
|
class OllamaGateway(LLMGateway):
|
|
@@ -41,14 +44,26 @@ class OllamaGateway(LLMGateway):
|
|
|
41
44
|
self.client = Client(host=host, headers=headers, timeout=timeout)
|
|
42
45
|
|
|
43
46
|
def _extract_options_from_args(self, args):
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
47
|
+
# Extract config if present, otherwise use individual kwargs
|
|
48
|
+
config = args.get('config', None)
|
|
49
|
+
if config:
|
|
50
|
+
options = Options(
|
|
51
|
+
temperature=config.temperature,
|
|
52
|
+
num_ctx=config.num_ctx,
|
|
53
|
+
)
|
|
54
|
+
if config.num_predict > 0:
|
|
55
|
+
options.num_predict = config.num_predict
|
|
56
|
+
if config.max_tokens:
|
|
57
|
+
options.num_predict = config.max_tokens
|
|
58
|
+
else:
|
|
59
|
+
options = Options(
|
|
60
|
+
temperature=args.get('temperature', 1.0),
|
|
61
|
+
num_ctx=args.get('num_ctx', 32768),
|
|
62
|
+
)
|
|
63
|
+
if args.get('num_predict', 0) > 0:
|
|
64
|
+
options.num_predict = args['num_predict']
|
|
65
|
+
if 'max_tokens' in args:
|
|
66
|
+
options.num_predict = args['max_tokens']
|
|
52
67
|
return options
|
|
53
68
|
|
|
54
69
|
def complete(self, **args) -> LLMGatewayResponse:
|
|
@@ -90,6 +105,12 @@ class OllamaGateway(LLMGateway):
|
|
|
90
105
|
'options': options
|
|
91
106
|
}
|
|
92
107
|
|
|
108
|
+
# Handle reasoning effort - if config has reasoning_effort set, enable thinking
|
|
109
|
+
config = args.get('config', None)
|
|
110
|
+
if config and config.reasoning_effort is not None:
|
|
111
|
+
ollama_args['think'] = True
|
|
112
|
+
logger.info("Enabling extended thinking for Ollama", reasoning_effort=config.reasoning_effort)
|
|
113
|
+
|
|
93
114
|
if 'object_model' in args and args['object_model'] is not None:
|
|
94
115
|
ollama_args['format'] = args['object_model'].model_json_schema()
|
|
95
116
|
|
|
@@ -113,10 +134,14 @@ class OllamaGateway(LLMGateway):
|
|
|
113
134
|
arguments={str(k): str(t.function.arguments[k]) for k in t.function.arguments})
|
|
114
135
|
for t in response.message.tool_calls]
|
|
115
136
|
|
|
137
|
+
# Extract thinking content if present
|
|
138
|
+
thinking = getattr(response.message, 'thinking', None)
|
|
139
|
+
|
|
116
140
|
return LLMGatewayResponse(
|
|
117
141
|
content=response.message.content,
|
|
118
142
|
object=object,
|
|
119
143
|
tool_calls=tool_calls,
|
|
144
|
+
thinking=thinking
|
|
120
145
|
)
|
|
121
146
|
|
|
122
147
|
def complete_stream(self, **args) -> Iterator[StreamingResponse]:
|
|
@@ -156,6 +181,12 @@ class OllamaGateway(LLMGateway):
|
|
|
156
181
|
'stream': True
|
|
157
182
|
}
|
|
158
183
|
|
|
184
|
+
# Handle reasoning effort - if config has reasoning_effort set, enable thinking
|
|
185
|
+
config = args.get('config', None)
|
|
186
|
+
if config and config.reasoning_effort is not None:
|
|
187
|
+
ollama_args['think'] = True
|
|
188
|
+
logger.info("Enabling extended thinking for Ollama streaming", reasoning_effort=config.reasoning_effort)
|
|
189
|
+
|
|
159
190
|
# Enable tool support if tools are provided
|
|
160
191
|
if 'tools' in args and args['tools'] is not None:
|
|
161
192
|
ollama_args['tools'] = [t.descriptor for t in args['tools']]
|
|
@@ -168,6 +199,10 @@ class OllamaGateway(LLMGateway):
|
|
|
168
199
|
if chunk.message.content:
|
|
169
200
|
yield StreamingResponse(content=chunk.message.content)
|
|
170
201
|
|
|
202
|
+
# Yield thinking chunks when they arrive
|
|
203
|
+
if hasattr(chunk.message, 'thinking') and chunk.message.thinking:
|
|
204
|
+
yield StreamingResponse(thinking=chunk.message.thinking)
|
|
205
|
+
|
|
171
206
|
# Yield tool calls when they arrive
|
|
172
207
|
if chunk.message.tool_calls:
|
|
173
208
|
yield StreamingResponse(tool_calls=chunk.message.tool_calls)
|
|
@@ -121,6 +121,20 @@ class OpenAIGateway(LLMGateway):
|
|
|
121
121
|
supported_temperatures=capabilities.supported_temperatures)
|
|
122
122
|
adapted_args['temperature'] = default_temp
|
|
123
123
|
|
|
124
|
+
# Handle reasoning_effort for reasoning models
|
|
125
|
+
if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
|
|
126
|
+
if capabilities.model_type == ModelType.REASONING:
|
|
127
|
+
# Keep reasoning_effort for reasoning models
|
|
128
|
+
logger.info("Adding reasoning_effort parameter for reasoning model",
|
|
129
|
+
model=model,
|
|
130
|
+
reasoning_effort=adapted_args['reasoning_effort'])
|
|
131
|
+
else:
|
|
132
|
+
# Warn and remove for non-reasoning models
|
|
133
|
+
logger.warning("Model does not support reasoning_effort, ignoring parameter",
|
|
134
|
+
model=model,
|
|
135
|
+
requested_reasoning_effort=adapted_args['reasoning_effort'])
|
|
136
|
+
adapted_args.pop('reasoning_effort', None)
|
|
137
|
+
|
|
124
138
|
return adapted_args
|
|
125
139
|
|
|
126
140
|
def _validate_model_parameters(self, model: str, args: dict) -> None:
|
|
@@ -189,10 +203,21 @@ class OpenAIGateway(LLMGateway):
|
|
|
189
203
|
messages = kwargs.get('messages')
|
|
190
204
|
object_model = kwargs.get('object_model', None)
|
|
191
205
|
tools = kwargs.get('tools', None)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
206
|
+
config = kwargs.get('config', None)
|
|
207
|
+
|
|
208
|
+
# Use config if provided, otherwise use individual kwargs
|
|
209
|
+
if config:
|
|
210
|
+
temperature = config.temperature
|
|
211
|
+
num_ctx = config.num_ctx
|
|
212
|
+
max_tokens = config.max_tokens
|
|
213
|
+
num_predict = config.num_predict
|
|
214
|
+
reasoning_effort = config.reasoning_effort
|
|
215
|
+
else:
|
|
216
|
+
temperature = kwargs.get('temperature', 1.0)
|
|
217
|
+
num_ctx = kwargs.get('num_ctx', 32768)
|
|
218
|
+
max_tokens = kwargs.get('max_tokens', 16384)
|
|
219
|
+
num_predict = kwargs.get('num_predict', -1)
|
|
220
|
+
reasoning_effort = None
|
|
196
221
|
|
|
197
222
|
if not model:
|
|
198
223
|
raise ValueError("'model' parameter is required")
|
|
@@ -208,7 +233,8 @@ class OpenAIGateway(LLMGateway):
|
|
|
208
233
|
'temperature': temperature,
|
|
209
234
|
'num_ctx': num_ctx,
|
|
210
235
|
'max_tokens': max_tokens,
|
|
211
|
-
'num_predict': num_predict
|
|
236
|
+
'num_predict': num_predict,
|
|
237
|
+
'reasoning_effort': reasoning_effort
|
|
212
238
|
}
|
|
213
239
|
|
|
214
240
|
# Adapt parameters based on model type
|
|
@@ -247,10 +273,15 @@ class OpenAIGateway(LLMGateway):
|
|
|
247
273
|
elif 'max_completion_tokens' in adapted_args:
|
|
248
274
|
openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
|
|
249
275
|
|
|
276
|
+
# Add reasoning_effort if present in adapted args
|
|
277
|
+
if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
|
|
278
|
+
openai_args['reasoning_effort'] = adapted_args['reasoning_effort']
|
|
279
|
+
|
|
250
280
|
logger.debug("Making OpenAI API call",
|
|
251
281
|
model=openai_args['model'],
|
|
252
282
|
has_tools='tools' in openai_args,
|
|
253
283
|
has_object_model='response_format' in openai_args,
|
|
284
|
+
has_reasoning_effort='reasoning_effort' in openai_args,
|
|
254
285
|
token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
|
|
255
286
|
|
|
256
287
|
try:
|
|
@@ -339,10 +370,21 @@ class OpenAIGateway(LLMGateway):
|
|
|
339
370
|
messages = kwargs.get('messages')
|
|
340
371
|
object_model = kwargs.get('object_model', None)
|
|
341
372
|
tools = kwargs.get('tools', None)
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
373
|
+
config = kwargs.get('config', None)
|
|
374
|
+
|
|
375
|
+
# Use config if provided, otherwise use individual kwargs
|
|
376
|
+
if config:
|
|
377
|
+
temperature = config.temperature
|
|
378
|
+
num_ctx = config.num_ctx
|
|
379
|
+
max_tokens = config.max_tokens
|
|
380
|
+
num_predict = config.num_predict
|
|
381
|
+
reasoning_effort = config.reasoning_effort
|
|
382
|
+
else:
|
|
383
|
+
temperature = kwargs.get('temperature', 1.0)
|
|
384
|
+
num_ctx = kwargs.get('num_ctx', 32768)
|
|
385
|
+
max_tokens = kwargs.get('max_tokens', 16384)
|
|
386
|
+
num_predict = kwargs.get('num_predict', -1)
|
|
387
|
+
reasoning_effort = None
|
|
346
388
|
|
|
347
389
|
if not model:
|
|
348
390
|
raise ValueError("'model' parameter is required")
|
|
@@ -358,7 +400,8 @@ class OpenAIGateway(LLMGateway):
|
|
|
358
400
|
'temperature': temperature,
|
|
359
401
|
'num_ctx': num_ctx,
|
|
360
402
|
'max_tokens': max_tokens,
|
|
361
|
-
'num_predict': num_predict
|
|
403
|
+
'num_predict': num_predict,
|
|
404
|
+
'reasoning_effort': reasoning_effort
|
|
362
405
|
}
|
|
363
406
|
|
|
364
407
|
# Adapt parameters based on model type
|
|
@@ -401,9 +444,14 @@ class OpenAIGateway(LLMGateway):
|
|
|
401
444
|
elif 'max_completion_tokens' in adapted_args:
|
|
402
445
|
openai_args['max_completion_tokens'] = adapted_args['max_completion_tokens']
|
|
403
446
|
|
|
447
|
+
# Add reasoning_effort if present in adapted args
|
|
448
|
+
if 'reasoning_effort' in adapted_args and adapted_args['reasoning_effort'] is not None:
|
|
449
|
+
openai_args['reasoning_effort'] = adapted_args['reasoning_effort']
|
|
450
|
+
|
|
404
451
|
logger.debug("Making OpenAI streaming API call",
|
|
405
452
|
model=openai_args['model'],
|
|
406
453
|
has_tools='tools' in openai_args,
|
|
454
|
+
has_reasoning_effort='reasoning_effort' in openai_args,
|
|
407
455
|
token_param='max_completion_tokens' if 'max_completion_tokens' in openai_args else 'max_tokens')
|
|
408
456
|
|
|
409
457
|
try:
|