PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (460) hide show

llama_stack/providers/inline/agents/meta_reference/responses/streaming.py CHANGED Viewed

@@ -8,15 +8,42 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
-from llama_stack.apis.agents.openai_responses import (
+from openai.types.chat import ChatCompletionToolParam
+from opentelemetry import trace
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
+from llama_stack_api import (
     AllowedToolsFilter,
     ApprovalFilter,
+    Inference,
     MCPListToolsTool,
+    ModelNotFoundError,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolChoice,
+    OpenAIChatCompletionToolChoiceAllowedTools,
+    OpenAIChatCompletionToolChoiceCustomTool,
+    OpenAIChatCompletionToolChoiceFunctionTool,
+    OpenAIChoice,
+    OpenAIChoiceLogprobs,
+    OpenAIMessageParam,
     OpenAIResponseContentPartOutputText,
     OpenAIResponseContentPartReasoningText,
     OpenAIResponseContentPartRefusal,
     OpenAIResponseError,
     OpenAIResponseInputTool,
+    OpenAIResponseInputToolChoice,
+    OpenAIResponseInputToolChoiceAllowedTools,
+    OpenAIResponseInputToolChoiceCustomTool,
+    OpenAIResponseInputToolChoiceFileSearch,
+    OpenAIResponseInputToolChoiceFunctionTool,
+    OpenAIResponseInputToolChoiceMCPTool,
+    OpenAIResponseInputToolChoiceMode,
+    OpenAIResponseInputToolChoiceWebSearch,
     OpenAIResponseInputToolMCP,
     OpenAIResponseMCPApprovalRequest,
     OpenAIResponseMessage,
@@ -49,34 +76,27 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageMCPListTools,
     OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponsePrompt,
     OpenAIResponseText,
     OpenAIResponseUsage,
     OpenAIResponseUsageInputTokensDetails,
     OpenAIResponseUsageOutputTokensDetails,
+    OpenAIToolMessageParam,
+    ResponseItemInclude,
+    Safety,
     WebSearchToolTypes,
 )
-from llama_stack.apis.inference import (
-    Inference,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAIChatCompletionToolCall,
-    OpenAIChoice,
-    OpenAIMessageParam,
-)
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-from llama_stack.providers.utils.telemetry import tracing
 from .types import ChatCompletionContext, ChatCompletionResult
 from .utils import (
     convert_chat_choice_to_response_message,
+    convert_mcp_tool_choice,
     is_function_tool_call,
     run_guardrails,
 )
 logger = get_logger(name=__name__, category="agents::meta_reference")
+tracer = trace.get_tracer(__name__)
 def convert_tooldef_to_chat_tool(tool_def):
@@ -110,9 +130,14 @@ class StreamingResponseOrchestrator:
         text: OpenAIResponseText,
         max_infer_iters: int,
         tool_executor,  # Will be the tool execution logic from the main class
-        instructions: str,
-        safety_api,
+        instructions: str | None,
+        safety_api: Safety | None,
         guardrail_ids: list[str] | None = None,
+        prompt: OpenAIResponsePrompt | None = None,
+        parallel_tool_calls: bool | None = None,
+        max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
+        include: list[ResponseItemInclude] | None = None,
     ):
         self.inference_api = inference_api
         self.ctx = ctx
@@ -123,9 +148,27 @@ class StreamingResponseOrchestrator:
         self.tool_executor = tool_executor
         self.safety_api = safety_api
         self.guardrail_ids = guardrail_ids or []
+        self.prompt = prompt
+        # System message that is inserted into the model's context
+        self.instructions = instructions
+        # Whether to allow more than one function tool call generated per turn.
+        self.parallel_tool_calls = parallel_tool_calls
+        # Max number of total calls to built-in tools that can be processed in a response
+        self.max_tool_calls = max_tool_calls
+        self.metadata = metadata
+        self.include = include
         self.sequence_number = 0
         # Store MCP tool mapping that gets built during tool processing
-        self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ctx.tool_context.previous_tools or {}
+        self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
+            ctx.tool_context.previous_tools if ctx.tool_context else {}
+        )
+        # Reverse mapping: server_label -> list of tool names for efficient lookup
+        self.server_label_to_tools: dict[str, list[str]] = {}
+        # Build initial reverse mapping from previous_tools
+        for tool_name, mcp_server in self.mcp_tool_to_server.items():
+            if mcp_server.server_label not in self.server_label_to_tools:
+                self.server_label_to_tools[mcp_server.server_label] = []
+            self.server_label_to_tools[mcp_server.server_label].append(tool_name)
         # Track final messages after all tool executions
         self.final_messages: list[OpenAIMessageParam] = []
         # mapping for annotations
@@ -134,8 +177,8 @@ class StreamingResponseOrchestrator:
         self.accumulated_usage: OpenAIResponseUsage | None = None
         # Track if we've sent a refusal response
         self.violation_detected = False
-        # system message that is inserted into the model's context
-        self.instructions = instructions
+        # Track total calls made to built-in tools
+        self.accumulated_builtin_tool_calls = 0
     async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
         """Create a refusal response to replace streaming content."""
@@ -148,6 +191,7 @@ class StreamingResponseOrchestrator:
             model=self.ctx.model,
             status="completed",
             output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
+            metadata=self.metadata,
         )
         return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
@@ -177,9 +221,14 @@ class StreamingResponseOrchestrator:
             output=self._clone_outputs(outputs),
             text=self.text,
             tools=self.ctx.available_tools(),
+            tool_choice=self.ctx.tool_choice,
             error=error,
             usage=self.accumulated_usage,
             instructions=self.instructions,
+            prompt=self.prompt,
+            parallel_tool_calls=self.parallel_tool_calls,
+            max_tool_calls=self.max_tool_calls,
+            metadata=self.metadata,
         )
     async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
@@ -208,6 +257,34 @@ class StreamingResponseOrchestrator:
         async for stream_event in self._process_tools(output_messages):
             yield stream_event
+        chat_tool_choice = None
+        # Track allowed tools for filtering (persists across iterations)
+        allowed_tool_names: set[str] | None = None
+        if self.ctx.tool_choice and len(self.ctx.chat_tools) > 0:
+            processed_tool_choice = await _process_tool_choice(
+                self.ctx.chat_tools,
+                self.ctx.tool_choice,
+                self.server_label_to_tools,
+            )
+            # chat_tool_choice can be str, dict-like object, or None
+            if isinstance(processed_tool_choice, str | type(None)):
+                chat_tool_choice = processed_tool_choice
+            elif isinstance(processed_tool_choice, OpenAIChatCompletionToolChoiceAllowedTools):
+                # For allowed_tools: filter the tools list instead of using tool_choice
+                # This maintains the constraint across all iterations while letting model
+                # decide freely whether to call a tool or respond
+                allowed_tool_names = {
+                    tool["function"]["name"]
+                    for tool in processed_tool_choice.allowed_tools.tools
+                    if tool.get("type") == "function" and "function" in tool
+                }
+                # Use the mode (e.g., "required") for first iteration, then "auto"
+                chat_tool_choice = (
+                    processed_tool_choice.allowed_tools.mode if processed_tool_choice.allowed_tools.mode else "auto"
+                )
+            else:
+                chat_tool_choice = processed_tool_choice.model_dump()
         n_iter = 0
         messages = self.ctx.messages.copy()
         final_status = "completed"
@@ -217,19 +294,36 @@ class StreamingResponseOrchestrator:
             while True:
                 # Text is the default response format for chat completion so don't need to pass it
                 # (some providers don't support non-empty response_format when tools are present)
-                response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
-                logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
+                response_format = (
+                    None if getattr(self.ctx.response_format, "type", None) == "text" else self.ctx.response_format
+                )
+                # Filter tools to only allowed ones if tool_choice specified an allowed list
+                effective_tools = self.ctx.chat_tools
+                if allowed_tool_names is not None:
+                    effective_tools = [
+                        tool
+                        for tool in self.ctx.chat_tools
+                        if tool.get("function", {}).get("name") in allowed_tool_names
+                    ]
+                logger.debug(f"calling openai_chat_completion with tools: {effective_tools}")
+                logprobs = (
+                    True if self.include and ResponseItemInclude.message_output_text_logprobs in self.include else None
+                )
                 params = OpenAIChatCompletionRequestWithExtraBody(
                     model=self.ctx.model,
                     messages=messages,
-                    tools=self.ctx.chat_tools,
+                    # Pydantic models are dict-compatible but mypy treats them as distinct types
+                    tools=effective_tools,  # type: ignore[arg-type]
+                    tool_choice=chat_tool_choice,
                     stream=True,
                     temperature=self.ctx.temperature,
                     response_format=response_format,
                     stream_options={
                         "include_usage": True,
                     },
+                    logprobs=logprobs,
                 )
                 completion_result = await self.inference_api.openai_chat_completion(params)
@@ -266,7 +360,12 @@ class StreamingResponseOrchestrator:
                 # Handle choices with no tool calls
                 for choice in current_response.choices:
-                    if not (choice.message.tool_calls and self.ctx.response_tools):
+                    has_tool_calls = (
+                        isinstance(choice.message, OpenAIAssistantMessageParam)
+                        and choice.message.tool_calls
+                        and self.ctx.response_tools
+                    )
+                    if not has_tool_calls:
                         output_messages.append(
                             await convert_chat_choice_to_response_message(
                                 choice,
@@ -295,6 +394,14 @@ class StreamingResponseOrchestrator:
                     break
                 n_iter += 1
+                # After first iteration, reset tool_choice to "auto" to let model decide freely
+                # based on tool results (prevents infinite loops when forcing specific tools)
+                # Note: When allowed_tool_names is set, tools are already filtered so model
+                # can only call allowed tools - we just need to let it decide whether to call
+                # a tool or respond (hence "auto" mode)
+                if n_iter == 1 and chat_tool_choice and chat_tool_choice != "auto":
+                    chat_tool_choice = "auto"
                 if n_iter >= self.max_infer_iters:
                     logger.info(
                         f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}"
@@ -305,6 +412,8 @@ class StreamingResponseOrchestrator:
             if last_completion_result and last_completion_result.finish_reason == "length":
                 final_status = "incomplete"
+        except ModelNotFoundError:
+            raise
         except Exception as exc:  # noqa: BLE001
             self.final_messages = messages.copy()
             self.sequence_number += 1
@@ -544,6 +653,7 @@ class StreamingResponseOrchestrator:
         chunk_created = 0
         chunk_model = ""
         chunk_finish_reason = ""
+        chat_response_logprobs = []
         # Create a placeholder message item for delta events
         message_item_id = f"msg_{uuid.uuid4()}"
@@ -573,6 +683,12 @@ class StreamingResponseOrchestrator:
             chunk_events: list[OpenAIResponseObjectStream] = []
             for chunk_choice in chunk.choices:
+                # Collect logprobs if present
+                chunk_logprobs = None
+                if chunk_choice.logprobs and chunk_choice.logprobs.content:
+                    chunk_logprobs = chunk_choice.logprobs.content
+                    chat_response_logprobs.extend(chunk_logprobs)
                 # Emit incremental text content as delta events
                 if chunk_choice.delta.content:
                     # Emit output_item.added for the message on first content
@@ -612,6 +728,7 @@ class StreamingResponseOrchestrator:
                         content_index=content_index,
                         delta=chunk_choice.delta.content,
                         item_id=message_item_id,
+                        logprobs=chunk_logprobs,
                         output_index=message_output_index,
                         sequence_number=self.sequence_number,
                     )
@@ -716,7 +833,10 @@ class StreamingResponseOrchestrator:
                                 )
                             # Accumulate arguments for final response (only for subsequent chunks)
-                            if not is_new_tool_call:
+                            if not is_new_tool_call and response_tool_call is not None:
+                                # Both should have functions since we're inside the tool_call.function check above
+                                assert response_tool_call.function is not None
+                                assert tool_call.function is not None
                                 response_tool_call.function.arguments = (
                                     response_tool_call.function.arguments or ""
                                 ) + tool_call.function.arguments
@@ -741,10 +861,13 @@ class StreamingResponseOrchestrator:
         for tool_call_index in sorted(chat_response_tool_calls.keys()):
             tool_call = chat_response_tool_calls[tool_call_index]
             # Ensure that arguments, if sent back to the inference provider, are not None
-            tool_call.function.arguments = tool_call.function.arguments or "{}"
+            if tool_call.function:
+                tool_call.function.arguments = tool_call.function.arguments or "{}"
             tool_call_item_id = tool_call_item_ids[tool_call_index]
-            final_arguments = tool_call.function.arguments
-            tool_call_name = chat_response_tool_calls[tool_call_index].function.name
+            final_arguments: str = tool_call.function.arguments or "{}" if tool_call.function else "{}"
+            func = chat_response_tool_calls[tool_call_index].function
+            tool_call_name = func.name if func else ""
             # Check if this is an MCP tool call
             is_mcp_tool = tool_call_name and tool_call_name in self.mcp_tool_to_server
@@ -809,6 +932,7 @@ class StreamingResponseOrchestrator:
                     OpenAIResponseOutputMessageContentOutputText(
                         text=final_text,
                         annotations=[],
+                        logprobs=chat_response_logprobs if chat_response_logprobs else None,
                     )
                 )
@@ -836,6 +960,7 @@ class StreamingResponseOrchestrator:
             message_item_id=message_item_id,
             tool_call_item_ids=tool_call_item_ids,
             content_part_emitted=content_part_emitted,
+            logprobs=OpenAIChoiceLogprobs(content=chat_response_logprobs) if chat_response_logprobs else None,
         )
     def _build_chat_completion(self, result: ChatCompletionResult) -> OpenAIChatCompletion:
@@ -857,6 +982,7 @@ class StreamingResponseOrchestrator:
                     message=assistant_message,
                     finish_reason=result.finish_reason,
                     index=0,
+                    logprobs=result.logprobs,
                 )
             ],
             created=result.created,
@@ -874,6 +1000,17 @@ class StreamingResponseOrchestrator:
         """Coordinate execution of both function and non-function tool calls."""
         # Execute non-function tool calls
         for tool_call in non_function_tool_calls:
+            # if total calls made to built-in and mcp tools exceed max_tool_calls
+            # then create a tool response message indicating the call was skipped
+            if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
+                logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
+                skipped_call_message = OpenAIToolMessageParam(
+                    content=f"Tool call skipped: maximum tool calls limit ({self.max_tool_calls}) reached.",
+                    tool_call_id=tool_call.id,
+                )
+                next_turn_messages.append(skipped_call_message)
+                continue
             # Find the item_id for this tool call
             matching_item_id = None
             for index, item_id in completion_result_data.tool_call_item_ids.items():
@@ -888,12 +1025,11 @@ class StreamingResponseOrchestrator:
             self.sequence_number += 1
             if tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server:
-                item = OpenAIResponseOutputMessageMCPCall(
+                item: OpenAIResponseOutput = OpenAIResponseOutputMessageMCPCall(
                     arguments="",
                     name=tool_call.function.name,
                     id=matching_item_id,
                     server_label=self.mcp_tool_to_server[tool_call.function.name].server_label,
-                    status="in_progress",
                 )
             elif tool_call.function.name == "web_search":
                 item = OpenAIResponseOutputMessageWebSearchToolCall(
@@ -955,6 +1091,9 @@ class StreamingResponseOrchestrator:
             if tool_response_message:
                 next_turn_messages.append(tool_response_message)
+            # Track number of calls made to built-in and mcp tools
+            self.accumulated_builtin_tool_calls += 1
         # Execute function tool calls (client-side)
         for tool_call in function_tool_calls:
             # Find the item_id for this tool call from our tracking dictionary
@@ -992,9 +1131,9 @@ class StreamingResponseOrchestrator:
         """Process all tools and emit appropriate streaming events."""
         from openai.types.chat import ChatCompletionToolParam
-        from llama_stack.apis.tools import ToolDef
         from llama_stack.models.llama.datatypes import ToolDefinition
         from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+        from llama_stack_api import ToolDef
         def make_openai_tool(tool_name: str, tool: ToolDef) -> ChatCompletionToolParam:
             tool_def = ToolDefinition(
@@ -1002,7 +1141,7 @@ class StreamingResponseOrchestrator:
                 description=tool.description,
                 input_schema=tool.input_schema,
             )
-            return convert_tooldef_to_openai_tool(tool_def)
+            return convert_tooldef_to_openai_tool(tool_def)  # type: ignore[return-value]  # Returns dict but ChatCompletionToolParam expects TypedDict
         # Initialize chat_tools if not already set
         if self.ctx.chat_tools is None:
@@ -1010,7 +1149,7 @@ class StreamingResponseOrchestrator:
         for input_tool in tools:
             if input_tool.type == "function":
-                self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
+                self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))  # type: ignore[typeddict-item,arg-type]  # Dict compatible with FunctionDefinition
             elif input_tool.type in WebSearchToolTypes:
                 tool_name = "web_search"
                 # Need to access tool_groups_api from tool_executor
@@ -1049,8 +1188,8 @@ class StreamingResponseOrchestrator:
                 if isinstance(mcp_tool.allowed_tools, list):
                     always_allowed = mcp_tool.allowed_tools
                 elif isinstance(mcp_tool.allowed_tools, AllowedToolsFilter):
-                    always_allowed = mcp_tool.allowed_tools.always
-                    never_allowed = mcp_tool.allowed_tools.never
+                    # AllowedToolsFilter only has tool_names field (not allowed/disallowed)
+                    always_allowed = mcp_tool.allowed_tools.tool_names
             # Call list_mcp_tools
             tool_defs = None
@@ -1060,10 +1199,14 @@ class StreamingResponseOrchestrator:
                 "server_url": mcp_tool.server_url,
                 "mcp_list_tools_id": list_id,
             }
-            async with tracing.span("list_mcp_tools", attributes):
+            # TODO: follow semantic conventions for Open Telemetry tool spans
+            # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
+            with tracer.start_as_current_span("list_mcp_tools", attributes=attributes):
                 tool_defs = await list_mcp_tools(
                     endpoint=mcp_tool.server_url,
-                    headers=mcp_tool.headers or {},
+                    headers=mcp_tool.headers,
+                    authorization=mcp_tool.authorization,
                 )
             # Create the MCP list tools message
@@ -1082,13 +1225,18 @@ class StreamingResponseOrchestrator:
                     openai_tool = convert_tooldef_to_chat_tool(t)
                     if self.ctx.chat_tools is None:
                         self.ctx.chat_tools = []
-                    self.ctx.chat_tools.append(openai_tool)
+                    self.ctx.chat_tools.append(openai_tool)  # type: ignore[arg-type]  # Returns dict but ChatCompletionToolParam expects TypedDict
                     # Add to MCP tool mapping
                     if t.name in self.mcp_tool_to_server:
                         raise ValueError(f"Duplicate tool name {t.name} found for server {mcp_tool.server_label}")
                     self.mcp_tool_to_server[t.name] = mcp_tool
+                    # Add to reverse mapping for efficient server_label lookup
+                    if mcp_tool.server_label not in self.server_label_to_tools:
+                        self.server_label_to_tools[mcp_tool.server_label] = []
+                    self.server_label_to_tools[mcp_tool.server_label].append(t.name)
                     # Add to MCP list message
                     mcp_list_message.tools.append(
                         MCPListToolsTool(
@@ -1114,13 +1262,17 @@ class StreamingResponseOrchestrator:
         self, output_messages: list[OpenAIResponseOutput]
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         # Handle all mcp tool lists from previous response that are still valid:
-        for tool in self.ctx.tool_context.previous_tool_listings:
-            async for evt in self._reuse_mcp_list_tools(tool, output_messages):
-                yield evt
-        # Process all remaining tools (including MCP tools) and emit streaming events
-        if self.ctx.tool_context.tools_to_process:
-            async for stream_event in self._process_new_tools(self.ctx.tool_context.tools_to_process, output_messages):
-                yield stream_event
+        # tool_context can be None when no tools are provided in the response request
+        if self.ctx.tool_context:
+            for tool in self.ctx.tool_context.previous_tool_listings:
+                async for evt in self._reuse_mcp_list_tools(tool, output_messages):
+                    yield evt
+            # Process all remaining tools (including MCP tools) and emit streaming events
+            if self.ctx.tool_context.tools_to_process:
+                async for stream_event in self._process_new_tools(
+                    self.ctx.tool_context.tools_to_process, output_messages
+                ):
+                    yield stream_event
     def _approval_required(self, tool_name: str) -> bool:
         if tool_name not in self.mcp_tool_to_server:
@@ -1131,9 +1283,9 @@ class StreamingResponseOrchestrator:
         if mcp_server.require_approval == "never":
             return False
         if isinstance(mcp_server, ApprovalFilter):
-            if tool_name in mcp_server.always:
+            if mcp_server.always and tool_name in mcp_server.always:
                 return True
-            if tool_name in mcp_server.never:
+            if mcp_server.never and tool_name in mcp_server.never:
                 return False
         return True
@@ -1214,7 +1366,7 @@ class StreamingResponseOrchestrator:
             openai_tool = convert_tooldef_to_openai_tool(tool_def)
             if self.ctx.chat_tools is None:
                 self.ctx.chat_tools = []
-            self.ctx.chat_tools.append(openai_tool)
+            self.ctx.chat_tools.append(openai_tool)  # type: ignore[arg-type]  # Returns dict but ChatCompletionToolParam expects TypedDict
         mcp_list_message = OpenAIResponseOutputMessageMCPListTools(
             id=f"mcp_list_{uuid.uuid4()}",
@@ -1224,3 +1376,112 @@ class StreamingResponseOrchestrator:
         async for stream_event in self._add_mcp_list_tools(mcp_list_message, output_messages):
             yield stream_event
+async def _process_tool_choice(
+    chat_tools: list[ChatCompletionToolParam],
+    tool_choice: OpenAIResponseInputToolChoice,
+    server_label_to_tools: dict[str, list[str]],
+) -> str | OpenAIChatCompletionToolChoice | None:
+    """Process and validate the OpenAI Responses tool choice and return the appropriate chat completion tool choice object.
+    :param chat_tools: The list of chat tools to enforce tool choice against.
+    :param tool_choice: The OpenAI Responses tool choice to process.
+    :param server_label_to_tools: A dictionary mapping server labels to the list of tools available on that server.
+    :return: The appropriate chat completion tool choice object.
+    """
+    # retrieve all function tool names from the chat tools
+    # Note: chat_tools contains dicts, not objects
+    chat_tool_names = [tool["function"]["name"] for tool in chat_tools if tool["type"] == "function"]
+    if isinstance(tool_choice, OpenAIResponseInputToolChoiceMode):
+        if tool_choice.value == "required":
+            if len(chat_tool_names) == 0:
+                return None
+            # add all function tools to the allowed tools list and set mode to required
+            return OpenAIChatCompletionToolChoiceAllowedTools(
+                tools=[{"type": "function", "function": {"name": tool}} for tool in chat_tool_names],
+                mode="required",
+            )
+        # return other modes as is
+        return tool_choice.value
+    elif isinstance(tool_choice, OpenAIResponseInputToolChoiceAllowedTools):
+        # ensure that specified tool choices are available in the chat tools, if not, remove them from the list
+        final_tools = []
+        for tool in tool_choice.tools:
+            match tool.get("type"):
+                case "function":
+                    final_tools.append({"type": "function", "function": {"name": tool.get("name")}})
+                case "custom":
+                    final_tools.append({"type": "custom", "custom": {"name": tool.get("name")}})
+                case "mcp":
+                    mcp_tools = convert_mcp_tool_choice(
+                        chat_tool_names, tool.get("server_label"), server_label_to_tools, None
+                    )
+                    # convert_mcp_tool_choice can return a dict, list, or None
+                    if isinstance(mcp_tools, list):
+                        final_tools.extend(mcp_tools)
+                    elif isinstance(mcp_tools, dict):
+                        final_tools.append(mcp_tools)
+                    # Skip if None or empty
+                case "file_search":
+                    final_tools.append({"type": "function", "function": {"name": "file_search"}})
+                case _ if tool["type"] in WebSearchToolTypes:
+                    final_tools.append({"type": "function", "function": {"name": "web_search"}})
+                case _:
+                    logger.warning(f"Unsupported tool type: {tool['type']}, skipping tool choice enforcement for it")
+                    continue
+        return OpenAIChatCompletionToolChoiceAllowedTools(
+            tools=final_tools,
+            mode=tool_choice.mode,
+        )
+    else:
+        # Handle specific tool choice by type
+        # Each case validates the tool exists in chat_tools before returning
+        tool_name = getattr(tool_choice, "name", None)
+        match tool_choice:
+            case OpenAIResponseInputToolChoiceCustomTool():
+                if tool_name and tool_name not in chat_tool_names:
+                    logger.warning(f"Tool {tool_name} not found in chat tools")
+                    return None
+                return OpenAIChatCompletionToolChoiceCustomTool(name=tool_name)
+            case OpenAIResponseInputToolChoiceFunctionTool():
+                if tool_name and tool_name not in chat_tool_names:
+                    logger.warning(f"Tool {tool_name} not found in chat tools")
+                    return None
+                return OpenAIChatCompletionToolChoiceFunctionTool(name=tool_name)
+            case OpenAIResponseInputToolChoiceFileSearch():
+                if "file_search" not in chat_tool_names:
+                    logger.warning("Tool file_search not found in chat tools")
+                    return None
+                return OpenAIChatCompletionToolChoiceFunctionTool(name="file_search")
+            case OpenAIResponseInputToolChoiceWebSearch():
+                if "web_search" not in chat_tool_names:
+                    logger.warning("Tool web_search not found in chat tools")
+                    return None
+                return OpenAIChatCompletionToolChoiceFunctionTool(name="web_search")
+            case OpenAIResponseInputToolChoiceMCPTool():
+                tool_choice = convert_mcp_tool_choice(
+                    chat_tool_names,
+                    tool_choice.server_label,
+                    server_label_to_tools,
+                    tool_name,
+                )
+                if isinstance(tool_choice, dict):
+                    # for single tool choice, return as function tool choice
+                    return OpenAIChatCompletionToolChoiceFunctionTool(name=tool_choice["function"]["name"])
+                elif isinstance(tool_choice, list):
+                    # for multiple tool choices, return as allowed tools
+                    return OpenAIChatCompletionToolChoiceAllowedTools(
+                        tools=tool_choice,
+                        mode="required",
+                    )

llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl