PyPI - kiln-ai - Versions diffs - 0.18.0__py3-none-any.whl → 0.20.1__py3-none-any.whl - Mend

kiln-ai 0.18.0py3-none-any.whl → 0.20.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (89) hide show

kiln_ai/adapters/__init__.py +2 -2
kiln_ai/adapters/adapter_registry.py +46 -0
kiln_ai/adapters/chat/chat_formatter.py +8 -12
kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
kiln_ai/adapters/data_gen/data_gen_task.py +2 -2
kiln_ai/adapters/data_gen/test_data_gen_task.py +7 -3
kiln_ai/adapters/docker_model_runner_tools.py +119 -0
kiln_ai/adapters/eval/base_eval.py +2 -2
kiln_ai/adapters/eval/eval_runner.py +3 -1
kiln_ai/adapters/eval/g_eval.py +2 -2
kiln_ai/adapters/eval/test_base_eval.py +1 -1
kiln_ai/adapters/eval/test_eval_runner.py +6 -12
kiln_ai/adapters/eval/test_g_eval.py +3 -4
kiln_ai/adapters/eval/test_g_eval_data.py +1 -1
kiln_ai/adapters/fine_tune/__init__.py +1 -1
kiln_ai/adapters/fine_tune/base_finetune.py +1 -0
kiln_ai/adapters/fine_tune/fireworks_finetune.py +32 -20
kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +30 -21
kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
kiln_ai/adapters/ml_model_list.py +1009 -111
kiln_ai/adapters/model_adapters/base_adapter.py +62 -28
kiln_ai/adapters/model_adapters/litellm_adapter.py +397 -80
kiln_ai/adapters/model_adapters/test_base_adapter.py +194 -18
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +428 -4
kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
kiln_ai/adapters/model_adapters/test_structured_output.py +120 -14
kiln_ai/adapters/parsers/__init__.py +1 -1
kiln_ai/adapters/parsers/test_r1_parser.py +1 -1
kiln_ai/adapters/provider_tools.py +35 -20
kiln_ai/adapters/remote_config.py +57 -10
kiln_ai/adapters/repair/repair_task.py +1 -1
kiln_ai/adapters/repair/test_repair_task.py +12 -9
kiln_ai/adapters/run_output.py +3 -0
kiln_ai/adapters/test_adapter_registry.py +109 -2
kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
kiln_ai/adapters/test_ml_model_list.py +51 -1
kiln_ai/adapters/test_prompt_adaptors.py +13 -6
kiln_ai/adapters/test_provider_tools.py +73 -12
kiln_ai/adapters/test_remote_config.py +470 -16
kiln_ai/datamodel/__init__.py +23 -21
kiln_ai/datamodel/basemodel.py +54 -28
kiln_ai/datamodel/datamodel_enums.py +3 -0
kiln_ai/datamodel/dataset_split.py +5 -3
kiln_ai/datamodel/eval.py +4 -4
kiln_ai/datamodel/external_tool_server.py +298 -0
kiln_ai/datamodel/finetune.py +2 -2
kiln_ai/datamodel/json_schema.py +25 -10
kiln_ai/datamodel/project.py +11 -4
kiln_ai/datamodel/prompt.py +2 -2
kiln_ai/datamodel/prompt_id.py +4 -4
kiln_ai/datamodel/registry.py +0 -15
kiln_ai/datamodel/run_config.py +62 -0
kiln_ai/datamodel/task.py +8 -83
kiln_ai/datamodel/task_output.py +7 -2
kiln_ai/datamodel/task_run.py +41 -0
kiln_ai/datamodel/test_basemodel.py +213 -21
kiln_ai/datamodel/test_eval_model.py +6 -6
kiln_ai/datamodel/test_example_models.py +175 -0
kiln_ai/datamodel/test_external_tool_server.py +691 -0
kiln_ai/datamodel/test_model_perf.py +1 -1
kiln_ai/datamodel/test_prompt_id.py +5 -1
kiln_ai/datamodel/test_registry.py +8 -3
kiln_ai/datamodel/test_task.py +20 -47
kiln_ai/datamodel/test_tool_id.py +239 -0
kiln_ai/datamodel/tool_id.py +83 -0
kiln_ai/tools/__init__.py +8 -0
kiln_ai/tools/base_tool.py +82 -0
kiln_ai/tools/built_in_tools/__init__.py +13 -0
kiln_ai/tools/built_in_tools/math_tools.py +124 -0
kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
kiln_ai/tools/mcp_server_tool.py +95 -0
kiln_ai/tools/mcp_session_manager.py +243 -0
kiln_ai/tools/test_base_tools.py +199 -0
kiln_ai/tools/test_mcp_server_tool.py +457 -0
kiln_ai/tools/test_mcp_session_manager.py +1585 -0
kiln_ai/tools/test_tool_registry.py +473 -0
kiln_ai/tools/tool_registry.py +64 -0
kiln_ai/utils/config.py +32 -0
kiln_ai/utils/open_ai_types.py +94 -0
kiln_ai/utils/project_utils.py +17 -0
kiln_ai/utils/test_config.py +138 -1
kiln_ai/utils/test_open_ai_types.py +131 -0
{kiln_ai-0.18.0.dist-info → kiln_ai-0.20.1.dist-info}/METADATA +37 -6
kiln_ai-0.20.1.dist-info/RECORD +138 -0
kiln_ai-0.18.0.dist-info/RECORD +0 -115
{kiln_ai-0.18.0.dist-info → kiln_ai-0.20.1.dist-info}/WHEEL +0 -0
{kiln_ai-0.18.0.dist-info → kiln_ai-0.20.1.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/model_adapters/litellm_adapter.py CHANGED Viewed

@@ -1,9 +1,26 @@
+import copy
+import json
 import logging
-from typing import Any, Dict
+from dataclasses import dataclass
+from typing import Any, Dict, List, Tuple, TypeAlias, Union
 import litellm
-from litellm.types.utils import ChoiceLogprobs, Choices, ModelResponse
+from litellm.types.utils import (
+    ChatCompletionMessageToolCall,
+    ChoiceLogprobs,
+    Choices,
+    ModelResponse,
+)
+from litellm.types.utils import (
+    Message as LiteLLMMessage,
+)
 from litellm.types.utils import Usage as LiteLlmUsage
+from openai.types.chat import (
+    ChatCompletionToolMessageParam,
+)
+from openai.types.chat.chat_completion_message_tool_call_param import (
+    ChatCompletionMessageToolCallParam,
+)
 import kiln_ai.datamodel as datamodel
 from kiln_ai.adapters.ml_model_list import (
@@ -18,11 +35,32 @@ from kiln_ai.adapters.model_adapters.base_adapter import (
     Usage,
 )
 from kiln_ai.adapters.model_adapters.litellm_config import LiteLlmConfig
-from kiln_ai.datamodel.task import run_config_from_run_config_properties
+from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
+from kiln_ai.tools.base_tool import KilnToolInterface
 from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
+from kiln_ai.utils.open_ai_types import (
+    ChatCompletionAssistantMessageParamWrapper,
+    ChatCompletionMessageParam,
+)
+MAX_CALLS_PER_TURN = 10
+MAX_TOOL_CALLS_PER_TURN = 30
 logger = logging.getLogger(__name__)
+ChatCompletionMessageIncludingLiteLLM: TypeAlias = Union[
+    ChatCompletionMessageParam, LiteLLMMessage
+]
+@dataclass
+class ModelTurnResult:
+    assistant_message: str
+    all_messages: list[ChatCompletionMessageIncludingLiteLLM]
+    model_response: ModelResponse | None
+    model_choice: Choices | None
+    usage: Usage
 class LiteLlmAdapter(BaseAdapter):
     def __init__(
@@ -36,117 +74,226 @@ class LiteLlmAdapter(BaseAdapter):
         self._api_base = config.base_url
         self._headers = config.default_headers
         self._litellm_model_id: str | None = None
+        self._cached_available_tools: list[KilnToolInterface] | None = None
-        # Create a RunConfig, adding the task to the RunConfigProperties
-        run_config = run_config_from_run_config_properties(
+        super().__init__(
             task=kiln_task,
-            run_config_properties=config.run_config_properties,
+            run_config=config.run_config_properties,
+            config=base_adapter_config,
         )
-        super().__init__(
-            run_config=run_config,
-            config=base_adapter_config,
+    async def _run_model_turn(
+        self,
+        provider: KilnModelProvider,
+        prior_messages: list[ChatCompletionMessageIncludingLiteLLM],
+        top_logprobs: int | None,
+        skip_response_format: bool,
+    ) -> ModelTurnResult:
+        """
+        Call the model for a single top level turn: from user message to agent message.
+        It may make handle iterations of tool calls between the user/agent message if needed.
+        """
+        usage = Usage()
+        messages = list(prior_messages)
+        tool_calls_count = 0
+        while tool_calls_count < MAX_TOOL_CALLS_PER_TURN:
+            # Build completion kwargs for tool calls
+            completion_kwargs = await self.build_completion_kwargs(
+                provider,
+                # Pass a copy, as acompletion mutates objects and breaks types.
+                copy.deepcopy(messages),
+                top_logprobs,
+                skip_response_format,
+            )
+            # Make the completion call
+            model_response, response_choice = await self.acompletion_checking_response(
+                **completion_kwargs
+            )
+            # count the usage
+            usage += self.usage_from_response(model_response)
+            # Extract content and tool calls
+            if not hasattr(response_choice, "message"):
+                raise ValueError("Response choice has no message")
+            content = response_choice.message.content
+            tool_calls = response_choice.message.tool_calls
+            if not content and not tool_calls:
+                raise ValueError(
+                    "Model returned an assistant message, but no content or tool calls. This is not supported."
+                )
+            # Add message to messages, so it can be used in the next turn
+            messages.append(response_choice.message)
+            # Process tool calls if any
+            if tool_calls and len(tool_calls) > 0:
+                (
+                    assistant_message_from_toolcall,
+                    tool_call_messages,
+                ) = await self.process_tool_calls(tool_calls)
+                # Add tool call results to messages
+                messages.extend(tool_call_messages)
+                # If task_response tool was called, we're done
+                if assistant_message_from_toolcall is not None:
+                    return ModelTurnResult(
+                        assistant_message=assistant_message_from_toolcall,
+                        all_messages=messages,
+                        model_response=model_response,
+                        model_choice=response_choice,
+                        usage=usage,
+                    )
+                # If there were tool calls, increment counter and continue
+                if tool_call_messages:
+                    tool_calls_count += 1
+                    continue
+            # If no tool calls, return the content as final output
+            if content:
+                return ModelTurnResult(
+                    assistant_message=content,
+                    all_messages=messages,
+                    model_response=model_response,
+                    model_choice=response_choice,
+                    usage=usage,
+                )
+            # If we get here with no content and no tool calls, break
+            raise RuntimeError(
+                "Model returned neither content nor tool calls. It must return at least one of these."
+            )
+        raise RuntimeError(
+            f"Too many tool calls ({tool_calls_count}). Stopping iteration to avoid using too many tokens."
         )
     async def _run(self, input: Dict | str) -> tuple[RunOutput, Usage | None]:
+        usage = Usage()
         provider = self.model_provider()
         if not provider.model_id:
             raise ValueError("Model ID is required for OpenAI compatible models")
         chat_formatter = self.build_chat_formatter(input)
+        messages: list[ChatCompletionMessageIncludingLiteLLM] = []
-        prior_output = None
-        prior_message = None
-        response = None
+        prior_output: str | None = None
+        final_choice: Choices | None = None
         turns = 0
         while True:
             turns += 1
-            if turns > 10:
+            if turns > MAX_CALLS_PER_TURN:
                 raise RuntimeError(
-                    "Too many turns. Stopping iteration to avoid using too many tokens."
+                    f"Too many turns ({turns}). Stopping iteration to avoid using too many tokens."
                 )
             turn = chat_formatter.next_turn(prior_output)
             if turn is None:
+                # No next turn, we're done
                 break
+            # Add messages from the turn to chat history
+            for message in turn.messages:
+                if message.content is None:
+                    raise ValueError("Empty message content isn't allowed")
+                # pyright incorrectly warns about this, but it's valid so we can ignore. It can't handle the multi-value role.
+                messages.append({"role": message.role, "content": message.content})  # type: ignore
             skip_response_format = not turn.final_call
-            all_messages = chat_formatter.message_dicts()
-            completion_kwargs = await self.build_completion_kwargs(
+            turn_result = await self._run_model_turn(
                 provider,
-                all_messages,
+                messages,
                 self.base_adapter_config.top_logprobs if turn.final_call else None,
                 skip_response_format,
             )
-            response = await litellm.acompletion(**completion_kwargs)
-            if (
-                not isinstance(response, ModelResponse)
-                or not response.choices
-                or len(response.choices) == 0
-                or not isinstance(response.choices[0], Choices)
-            ):
-                raise RuntimeError(
-                    f"Expected ModelResponse with Choices, got {type(response)}."
-                )
-            prior_message = response.choices[0].message
-            prior_output = prior_message.content
-            # Fallback: Use args of first tool call to task_response if it exists
-            if (
-                not prior_output
-                and hasattr(prior_message, "tool_calls")
-                and prior_message.tool_calls
-            ):
-                tool_call = next(
-                    (
-                        tool_call
-                        for tool_call in prior_message.tool_calls
-                        if tool_call.function.name == "task_response"
-                    ),
-                    None,
-                )
-                if tool_call:
-                    prior_output = tool_call.function.arguments
+            usage += turn_result.usage
+            prior_output = turn_result.assistant_message
+            messages = turn_result.all_messages
+            final_choice = turn_result.model_choice
             if not prior_output:
-                raise RuntimeError("No output returned from model")
+                raise RuntimeError("No assistant message/output returned from model")
-        if response is None or prior_message is None:
-            raise RuntimeError("No response returned from model")
+        logprobs = self._extract_and_validate_logprobs(final_choice)
+        # Save COT/reasoning if it exists. May be a message, or may be parsed by LiteLLM (or openrouter, or anyone upstream)
         intermediate_outputs = chat_formatter.intermediate_outputs()
+        self._extract_reasoning_to_intermediate_outputs(
+            final_choice, intermediate_outputs
+        )
+        if not isinstance(prior_output, str):
+            raise RuntimeError(f"assistant message is not a string: {prior_output}")
-        logprobs = (
-            response.choices[0].logprobs
-            if hasattr(response.choices[0], "logprobs")
-            and isinstance(response.choices[0].logprobs, ChoiceLogprobs)
-            else None
+        trace = self.all_messages_to_trace(messages)
+        output = RunOutput(
+            output=prior_output,
+            intermediate_outputs=intermediate_outputs,
+            output_logprobs=logprobs,
+            trace=trace,
         )
-        # Check logprobs worked, if requested
-        if self.base_adapter_config.top_logprobs is not None and logprobs is None:
-            raise RuntimeError("Logprobs were required, but no logprobs were returned.")
+        return output, usage
-        # Save reasoning if it exists and was parsed by LiteLLM (or openrouter, or anyone upstream)
+    def _extract_and_validate_logprobs(
+        self, final_choice: Choices | None
+    ) -> ChoiceLogprobs | None:
+        """
+        Extract logprobs from the final choice and validate they exist if required.
+        """
+        logprobs = None
         if (
-            prior_message is not None
-            and hasattr(prior_message, "reasoning_content")
-            and prior_message.reasoning_content
-            and len(prior_message.reasoning_content.strip()) > 0
+            final_choice is not None
+            and hasattr(final_choice, "logprobs")
+            and isinstance(final_choice.logprobs, ChoiceLogprobs)
         ):
-            intermediate_outputs["reasoning"] = prior_message.reasoning_content.strip()
+            logprobs = final_choice.logprobs
-        # the string content of the response
-        response_content = prior_output
+        # Check logprobs worked, if required
+        if self.base_adapter_config.top_logprobs is not None and logprobs is None:
+            raise RuntimeError("Logprobs were required, but no logprobs were returned.")
-        if not isinstance(response_content, str):
-            raise RuntimeError(f"response is not a string: {response_content}")
+        return logprobs
-        return RunOutput(
-            output=response_content,
-            intermediate_outputs=intermediate_outputs,
-            output_logprobs=logprobs,
-        ), self.usage_from_response(response)
+    def _extract_reasoning_to_intermediate_outputs(
+        self, final_choice: Choices | None, intermediate_outputs: Dict[str, Any]
+    ) -> None:
+        """Extract reasoning content from model choice and add to intermediate outputs if present."""
+        if (
+            final_choice is not None
+            and hasattr(final_choice, "message")
+            and hasattr(final_choice.message, "reasoning_content")
+        ):
+            reasoning_content = final_choice.message.reasoning_content
+            if reasoning_content is not None:
+                stripped_reasoning_content = reasoning_content.strip()
+                if len(stripped_reasoning_content) > 0:
+                    intermediate_outputs["reasoning"] = stripped_reasoning_content
+    async def acompletion_checking_response(
+        self, **kwargs
+    ) -> Tuple[ModelResponse, Choices]:
+        response = await litellm.acompletion(**kwargs)
+        if (
+            not isinstance(response, ModelResponse)
+            or not response.choices
+            or len(response.choices) == 0
+            or not isinstance(response.choices[0], Choices)
+        ):
+            raise RuntimeError(
+                f"Expected ModelResponse with Choices, got {type(response)}."
+            )
+        return response, response.choices[0]
     def adapter_name(self) -> str:
         return "kiln_openai_compatible_adapter"
@@ -181,6 +328,9 @@ class LiteLlmAdapter(BaseAdapter):
                 if provider_name == ModelProviderName.ollama:
                     # Ollama added json_schema to all models: https://ollama.com/blog/structured-outputs
                     return self.json_schema_response_format()
+                elif provider_name == ModelProviderName.docker_model_runner:
+                    # Docker Model Runner uses OpenAI-compatible API with JSON schema support
+                    return self.json_schema_response_format()
                 else:
                     # Default to function calling -- it's older than the other modes. Higher compatibility.
                     # Strict isn't widely supported yet, so we don't use it by default unless it's OpenAI.
@@ -193,7 +343,7 @@ class LiteLlmAdapter(BaseAdapter):
                 raise_exhaustive_enum_error(structured_output_mode)
     def json_schema_response_format(self) -> dict[str, Any]:
-        output_schema = self.task().output_schema()
+        output_schema = self.task.output_schema()
         return {
             "response_format": {
                 "type": "json_schema",
@@ -206,7 +356,7 @@ class LiteLlmAdapter(BaseAdapter):
     def tool_call_params(self, strict: bool) -> dict[str, Any]:
         # Add additional_properties: false to the schema (OpenAI requires this for some models)
-        output_schema = self.task().output_schema()
+        output_schema = self.task.output_schema()
         if not isinstance(output_schema, dict):
             raise ValueError(
                 "Invalid output schema for this task. Can not use tool calls."
@@ -235,7 +385,7 @@ class LiteLlmAdapter(BaseAdapter):
         }
     def build_extra_body(self, provider: KilnModelProvider) -> dict[str, Any]:
-        # TODO P1: Don't love having this logic here. But it's a usability improvement
+        # Don't love having this logic here. But it's worth the usability improvement
         # so better to keep it than exclude it. Should figure out how I want to isolate
         # this sort of logic so it's config driven and can be overridden
@@ -251,6 +401,11 @@ class LiteLlmAdapter(BaseAdapter):
                 "exclude": False,
             }
+        if provider.gemini_reasoning_enabled:
+            extra_body["reasoning"] = {
+                "enabled": True,
+            }
         if provider.name == ModelProviderName.openrouter:
             # Ask OpenRouter to include usage in the response (cost)
             extra_body["usage"] = {"include": True}
@@ -280,6 +435,10 @@ class LiteLlmAdapter(BaseAdapter):
             # Oddball case, R1 14/8/1.5B fail with this param, even though they support thinking params.
             provider_options["require_parameters"] = False
+        # Siliconflow uses a bool flag for thinking, for some models
+        if provider.siliconflow_enable_thinking is not None:
+            extra_body["enable_thinking"] = provider.siliconflow_enable_thinking
         if len(provider_options) > 0:
             extra_body["provider"] = provider_options
@@ -311,6 +470,10 @@ class LiteLlmAdapter(BaseAdapter):
                 # We don't let litellm use the Ollama API and muck with our requests. We use Ollama's OpenAI compatible API.
                 # This is because we're setting detailed features like response_format=json_schema and want lower level control.
                 is_custom = True
+            case ModelProviderName.docker_model_runner:
+                # Docker Model Runner uses OpenAI-compatible API, similar to Ollama
+                # We want direct control over the requests for features like response_format=json_schema
+                is_custom = True
             case ModelProviderName.gemini_api:
                 litellm_provider_name = "gemini"
             case ModelProviderName.fireworks_ai:
@@ -325,6 +488,10 @@ class LiteLlmAdapter(BaseAdapter):
                 litellm_provider_name = "vertex_ai"
             case ModelProviderName.together_ai:
                 litellm_provider_name = "together_ai"
+            case ModelProviderName.cerebras:
+                litellm_provider_name = "cerebras"
+            case ModelProviderName.siliconflow_cn:
+                is_custom = True
             case ModelProviderName.openai_compatible:
                 is_custom = True
             case ModelProviderName.kiln_custom_registry:
@@ -354,7 +521,7 @@ class LiteLlmAdapter(BaseAdapter):
     async def build_completion_kwargs(
         self,
         provider: KilnModelProvider,
-        messages: list[dict[str, Any]],
+        messages: list[ChatCompletionMessageIncludingLiteLLM],
         top_logprobs: int | None,
         skip_response_format: bool = False,
     ) -> dict[str, Any]:
@@ -377,9 +544,23 @@ class LiteLlmAdapter(BaseAdapter):
             **self._additional_body_options,
         }
+        tool_calls = await self.litellm_tools()
+        has_tools = len(tool_calls) > 0
+        if has_tools:
+            completion_kwargs["tools"] = tool_calls
+            completion_kwargs["tool_choice"] = "auto"
         if not skip_response_format:
             # Response format: json_schema, json_instructions, json_mode, function_calling, etc
             response_format_options = await self.response_format_options()
+            # Check for a conflict between tools and response format using tools
+            # We could reconsider this. Model could be able to choose between a final answer or a tool call on any turn. However, good models for tools tend to also support json_schea, so do we need to support both? If we do, merge them, and consider auto vs forced when merging (only forced for final, auto for merged).
+            if has_tools and "tools" in response_format_options:
+                raise ValueError(
+                    "Function calling/tools can't be used as the JSON response format if you're also using tools. Please select a different structured output mode."
+                )
             completion_kwargs.update(response_format_options)
         if top_logprobs is not None:
@@ -388,7 +569,7 @@ class LiteLlmAdapter(BaseAdapter):
         return completion_kwargs
-    def usage_from_response(self, response: ModelResponse) -> Usage | None:
+    def usage_from_response(self, response: ModelResponse) -> Usage:
         litellm_usage = response.get("usage", None)
         # LiteLLM isn't consistent in how it returns the cost.
@@ -396,11 +577,11 @@ class LiteLlmAdapter(BaseAdapter):
         if cost is None and litellm_usage:
             cost = litellm_usage.get("cost", None)
-        if not litellm_usage and not cost:
-            return None
         usage = Usage()
+        if not litellm_usage and not cost:
+            return usage
         if litellm_usage and isinstance(litellm_usage, LiteLlmUsage):
             usage.input_tokens = litellm_usage.get("prompt_tokens", None)
             usage.output_tokens = litellm_usage.get("completion_tokens", None)
@@ -419,3 +600,139 @@ class LiteLlmAdapter(BaseAdapter):
             )
         return usage
+    async def cached_available_tools(self) -> list[KilnToolInterface]:
+        if self._cached_available_tools is None:
+            self._cached_available_tools = await self.available_tools()
+        return self._cached_available_tools
+    async def litellm_tools(self) -> list[Dict]:
+        available_tools = await self.cached_available_tools()
+        # LiteLLM takes the standard OpenAI-compatible tool call format
+        return [await tool.toolcall_definition() for tool in available_tools]
+    async def process_tool_calls(
+        self, tool_calls: list[ChatCompletionMessageToolCall] | None
+    ) -> tuple[str | None, list[ChatCompletionToolMessageParam]]:
+        if tool_calls is None:
+            return None, []
+        assistant_output_from_toolcall: str | None = None
+        tool_call_response_messages: list[ChatCompletionToolMessageParam] = []
+        for tool_call in tool_calls:
+            # Kiln "task_response" tool is used for returning structured output via tool calls.
+            # Load the output from the tool call. Also
+            if tool_call.function.name == "task_response":
+                assistant_output_from_toolcall = tool_call.function.arguments
+                continue
+            # Process normal tool calls (not the "task_response" tool)
+            tool_name = tool_call.function.name
+            tool = None
+            for tool_option in await self.cached_available_tools():
+                if await tool_option.name() == tool_name:
+                    tool = tool_option
+                    break
+            if not tool:
+                raise RuntimeError(
+                    f"A tool named '{tool_name}' was invoked by a model, but was not available."
+                )
+            # Parse the arguments and validate them against the tool's schema
+            try:
+                parsed_args = json.loads(tool_call.function.arguments)
+            except json.JSONDecodeError:
+                raise RuntimeError(
+                    f"Failed to parse arguments for tool '{tool_name}' (should be JSON): {tool_call.function.arguments}"
+                )
+            try:
+                tool_call_definition = await tool.toolcall_definition()
+                json_schema = json.dumps(tool_call_definition["function"]["parameters"])
+                validate_schema_with_value_error(parsed_args, json_schema)
+            except Exception as e:
+                raise RuntimeError(
+                    f"Failed to validate arguments for tool '{tool_name}'. The arguments didn't match the tool's schema. The arguments were: {parsed_args}\n The error was: {e}"
+                ) from e
+            result = await tool.run(**parsed_args)
+            tool_call_response_messages.append(
+                ChatCompletionToolMessageParam(
+                    role="tool",
+                    tool_call_id=tool_call.id,
+                    content=result,
+                )
+            )
+        if (
+            assistant_output_from_toolcall is not None
+            and len(tool_call_response_messages) > 0
+        ):
+            raise RuntimeError(
+                "Model asked for impossible combination: task_response tool call and other tool calls were both provided in the same turn. This is not supported as it means the model asked us to both return task_response results (ending the turn) and run new tools calls to send back to the model. If the model makes this mistake often, try a difference structured data model like JSON schema, where this is impossible."
+            )
+        return assistant_output_from_toolcall, tool_call_response_messages
+    def litellm_message_to_trace_message(
+        self, raw_message: LiteLLMMessage
+    ) -> ChatCompletionAssistantMessageParamWrapper:
+        """
+        Convert a LiteLLM Message object to an OpenAI compatible message, our ChatCompletionAssistantMessageParamWrapper
+        """
+        message: ChatCompletionAssistantMessageParamWrapper = {
+            "role": "assistant",
+        }
+        if raw_message.role != "assistant":
+            raise ValueError(
+                "Model returned a message with a role other than assistant. This is not supported."
+            )
+        if hasattr(raw_message, "content"):
+            message["content"] = raw_message.content
+        if hasattr(raw_message, "reasoning_content"):
+            message["reasoning_content"] = raw_message.reasoning_content
+        if hasattr(raw_message, "tool_calls"):
+            # Convert ChatCompletionMessageToolCall to ChatCompletionMessageToolCallParam
+            open_ai_tool_calls: List[ChatCompletionMessageToolCallParam] = []
+            for litellm_tool_call in raw_message.tool_calls or []:
+                # Optional in the SDK for streaming responses, but should never be None at this point.
+                if litellm_tool_call.function.name is None:
+                    raise ValueError(
+                        "The model requested a tool call, without providing a function name (required)."
+                    )
+                open_ai_tool_calls.append(
+                    ChatCompletionMessageToolCallParam(
+                        id=litellm_tool_call.id,
+                        type="function",
+                        function={
+                            "name": litellm_tool_call.function.name,
+                            "arguments": litellm_tool_call.function.arguments,
+                        },
+                    )
+                )
+            if len(open_ai_tool_calls) > 0:
+                message["tool_calls"] = open_ai_tool_calls
+        if not message.get("content") and not message.get("tool_calls"):
+            raise ValueError(
+                "Model returned an assistant message, but no content or tool calls. This is not supported."
+            )
+        return message
+    def all_messages_to_trace(
+        self, messages: list[ChatCompletionMessageIncludingLiteLLM]
+    ) -> list[ChatCompletionMessageParam]:
+        """
+        Internally we allow LiteLLM Message objects, but for trace we need OpenAI compatible types. Replace LiteLLM Message objects with OpenAI compatible types.
+        """
+        trace: list[ChatCompletionMessageParam] = []
+        for message in messages:
+            if isinstance(message, LiteLLMMessage):
+                trace.append(self.litellm_message_to_trace_message(message))
+            else:
+                trace.append(message)
+        return trace

kiln-ai 0.18.0__py3-none-any.whl → 0.20.1__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.18.0py3-none-any.whl → 0.20.1py3-none-any.whl