PyPI - kiln-ai - Versions diffs - 0.20.1__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

kiln-ai 0.20.1py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (133) hide show

kiln_ai/adapters/__init__.py +6 -0
kiln_ai/adapters/adapter_registry.py +43 -226
kiln_ai/adapters/chunkers/__init__.py +13 -0
kiln_ai/adapters/chunkers/base_chunker.py +42 -0
kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
kiln_ai/adapters/chunkers/helpers.py +23 -0
kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
kiln_ai/adapters/chunkers/test_helpers.py +75 -0
kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
kiln_ai/adapters/embedding/__init__.py +0 -0
kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
kiln_ai/adapters/embedding/embedding_registry.py +32 -0
kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
kiln_ai/adapters/eval/eval_runner.py +6 -2
kiln_ai/adapters/eval/test_base_eval.py +1 -3
kiln_ai/adapters/eval/test_g_eval.py +1 -1
kiln_ai/adapters/extractors/__init__.py +18 -0
kiln_ai/adapters/extractors/base_extractor.py +72 -0
kiln_ai/adapters/extractors/encoding.py +20 -0
kiln_ai/adapters/extractors/extractor_registry.py +44 -0
kiln_ai/adapters/extractors/extractor_runner.py +112 -0
kiln_ai/adapters/extractors/litellm_extractor.py +406 -0
kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
kiln_ai/adapters/extractors/test_encoding.py +54 -0
kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
kiln_ai/adapters/extractors/test_litellm_extractor.py +1290 -0
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
kiln_ai/adapters/ml_embedding_model_list.py +494 -0
kiln_ai/adapters/ml_model_list.py +876 -18
kiln_ai/adapters/model_adapters/litellm_adapter.py +40 -75
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +79 -1
kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
kiln_ai/adapters/model_adapters/test_structured_output.py +9 -10
kiln_ai/adapters/ollama_tools.py +69 -12
kiln_ai/adapters/provider_tools.py +190 -46
kiln_ai/adapters/rag/deduplication.py +49 -0
kiln_ai/adapters/rag/progress.py +252 -0
kiln_ai/adapters/rag/rag_runners.py +844 -0
kiln_ai/adapters/rag/test_deduplication.py +195 -0
kiln_ai/adapters/rag/test_progress.py +785 -0
kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
kiln_ai/adapters/remote_config.py +80 -8
kiln_ai/adapters/test_adapter_registry.py +579 -86
kiln_ai/adapters/test_ml_embedding_model_list.py +239 -0
kiln_ai/adapters/test_ml_model_list.py +202 -0
kiln_ai/adapters/test_ollama_tools.py +340 -1
kiln_ai/adapters/test_prompt_builders.py +1 -1
kiln_ai/adapters/test_provider_tools.py +199 -8
kiln_ai/adapters/test_remote_config.py +551 -56
kiln_ai/adapters/vector_store/__init__.py +1 -0
kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
kiln_ai/datamodel/__init__.py +16 -13
kiln_ai/datamodel/basemodel.py +201 -4
kiln_ai/datamodel/chunk.py +158 -0
kiln_ai/datamodel/datamodel_enums.py +27 -0
kiln_ai/datamodel/embedding.py +64 -0
kiln_ai/datamodel/external_tool_server.py +206 -54
kiln_ai/datamodel/extraction.py +317 -0
kiln_ai/datamodel/project.py +33 -1
kiln_ai/datamodel/rag.py +79 -0
kiln_ai/datamodel/task.py +5 -0
kiln_ai/datamodel/task_output.py +41 -11
kiln_ai/datamodel/test_attachment.py +649 -0
kiln_ai/datamodel/test_basemodel.py +270 -14
kiln_ai/datamodel/test_chunk_models.py +317 -0
kiln_ai/datamodel/test_dataset_split.py +1 -1
kiln_ai/datamodel/test_datasource.py +50 -0
kiln_ai/datamodel/test_embedding_models.py +448 -0
kiln_ai/datamodel/test_eval_model.py +6 -6
kiln_ai/datamodel/test_external_tool_server.py +534 -152
kiln_ai/datamodel/test_extraction_chunk.py +206 -0
kiln_ai/datamodel/test_extraction_model.py +501 -0
kiln_ai/datamodel/test_rag.py +641 -0
kiln_ai/datamodel/test_task.py +35 -1
kiln_ai/datamodel/test_tool_id.py +187 -1
kiln_ai/datamodel/test_vector_store.py +320 -0
kiln_ai/datamodel/tool_id.py +58 -0
kiln_ai/datamodel/vector_store.py +141 -0
kiln_ai/tools/base_tool.py +12 -3
kiln_ai/tools/built_in_tools/math_tools.py +12 -4
kiln_ai/tools/kiln_task_tool.py +158 -0
kiln_ai/tools/mcp_server_tool.py +2 -2
kiln_ai/tools/mcp_session_manager.py +51 -22
kiln_ai/tools/rag_tools.py +164 -0
kiln_ai/tools/test_kiln_task_tool.py +527 -0
kiln_ai/tools/test_mcp_server_tool.py +4 -15
kiln_ai/tools/test_mcp_session_manager.py +187 -227
kiln_ai/tools/test_rag_tools.py +929 -0
kiln_ai/tools/test_tool_registry.py +290 -7
kiln_ai/tools/tool_registry.py +69 -16
kiln_ai/utils/__init__.py +3 -0
kiln_ai/utils/async_job_runner.py +62 -17
kiln_ai/utils/config.py +2 -2
kiln_ai/utils/env.py +15 -0
kiln_ai/utils/filesystem.py +14 -0
kiln_ai/utils/filesystem_cache.py +60 -0
kiln_ai/utils/litellm.py +94 -0
kiln_ai/utils/lock.py +100 -0
kiln_ai/utils/mime_type.py +38 -0
kiln_ai/utils/open_ai_types.py +19 -2
kiln_ai/utils/pdf_utils.py +59 -0
kiln_ai/utils/test_async_job_runner.py +151 -35
kiln_ai/utils/test_env.py +142 -0
kiln_ai/utils/test_filesystem_cache.py +316 -0
kiln_ai/utils/test_litellm.py +206 -0
kiln_ai/utils/test_lock.py +185 -0
kiln_ai/utils/test_mime_type.py +66 -0
kiln_ai/utils/test_open_ai_types.py +88 -12
kiln_ai/utils/test_pdf_utils.py +86 -0
kiln_ai/utils/test_uuid.py +111 -0
kiln_ai/utils/test_validation.py +524 -0
kiln_ai/utils/uuid.py +9 -0
kiln_ai/utils/validation.py +90 -0
{kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/METADATA +9 -1
kiln_ai-0.22.0.dist-info/RECORD +213 -0
kiln_ai-0.20.1.dist-info/RECORD +0 -138
{kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/model_adapters/litellm_adapter.py CHANGED Viewed

@@ -11,13 +11,8 @@ from litellm.types.utils import (
     Choices,
     ModelResponse,
 )
-from litellm.types.utils import (
-    Message as LiteLLMMessage,
-)
+from litellm.types.utils import Message as LiteLLMMessage
 from litellm.types.utils import Usage as LiteLlmUsage
-from openai.types.chat import (
-    ChatCompletionToolMessageParam,
-)
 from openai.types.chat.chat_completion_message_tool_call_param import (
     ChatCompletionMessageToolCallParam,
 )
@@ -36,11 +31,14 @@ from kiln_ai.adapters.model_adapters.base_adapter import (
 )
 from kiln_ai.adapters.model_adapters.litellm_config import LiteLlmConfig
 from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
-from kiln_ai.tools.base_tool import KilnToolInterface
+from kiln_ai.tools.base_tool import KilnToolInterface, ToolCallContext
+from kiln_ai.tools.kiln_task_tool import KilnTaskToolResult
 from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
+from kiln_ai.utils.litellm import get_litellm_provider_info
 from kiln_ai.utils.open_ai_types import (
     ChatCompletionAssistantMessageParamWrapper,
     ChatCompletionMessageParam,
+    ChatCompletionToolMessageParamWrapper,
 )
 MAX_CALLS_PER_TURN = 10
@@ -447,75 +445,16 @@ class LiteLlmAdapter(BaseAdapter):
     def litellm_model_id(self) -> str:
         # The model ID is an interesting combination of format and url endpoint.
         # It specifics the provider URL/host, but this is overridden if you manually set an api url
         if self._litellm_model_id:
             return self._litellm_model_id
-        provider = self.model_provider()
-        if not provider.model_id:
-            raise ValueError("Model ID is required for OpenAI compatible models")
-        litellm_provider_name: str | None = None
-        is_custom = False
-        match provider.name:
-            case ModelProviderName.openrouter:
-                litellm_provider_name = "openrouter"
-            case ModelProviderName.openai:
-                litellm_provider_name = "openai"
-            case ModelProviderName.groq:
-                litellm_provider_name = "groq"
-            case ModelProviderName.anthropic:
-                litellm_provider_name = "anthropic"
-            case ModelProviderName.ollama:
-                # We don't let litellm use the Ollama API and muck with our requests. We use Ollama's OpenAI compatible API.
-                # This is because we're setting detailed features like response_format=json_schema and want lower level control.
-                is_custom = True
-            case ModelProviderName.docker_model_runner:
-                # Docker Model Runner uses OpenAI-compatible API, similar to Ollama
-                # We want direct control over the requests for features like response_format=json_schema
-                is_custom = True
-            case ModelProviderName.gemini_api:
-                litellm_provider_name = "gemini"
-            case ModelProviderName.fireworks_ai:
-                litellm_provider_name = "fireworks_ai"
-            case ModelProviderName.amazon_bedrock:
-                litellm_provider_name = "bedrock"
-            case ModelProviderName.azure_openai:
-                litellm_provider_name = "azure"
-            case ModelProviderName.huggingface:
-                litellm_provider_name = "huggingface"
-            case ModelProviderName.vertex:
-                litellm_provider_name = "vertex_ai"
-            case ModelProviderName.together_ai:
-                litellm_provider_name = "together_ai"
-            case ModelProviderName.cerebras:
-                litellm_provider_name = "cerebras"
-            case ModelProviderName.siliconflow_cn:
-                is_custom = True
-            case ModelProviderName.openai_compatible:
-                is_custom = True
-            case ModelProviderName.kiln_custom_registry:
-                is_custom = True
-            case ModelProviderName.kiln_fine_tune:
-                is_custom = True
-            case _:
-                raise_exhaustive_enum_error(provider.name)
-        if is_custom:
-            if self._api_base is None:
-                raise ValueError(
-                    "Explicit Base URL is required for OpenAI compatible APIs (custom models, ollama, fine tunes, and custom registry models)"
-                )
-            # Use openai as it's only used for format, not url
-            litellm_provider_name = "openai"
-        # Sholdn't be possible but keep type checker happy
-        if litellm_provider_name is None:
+        litellm_provider_info = get_litellm_provider_info(self.model_provider())
+        if litellm_provider_info.is_custom and self._api_base is None:
             raise ValueError(
-                f"Provider name could not lookup valid litellm provider ID {provider.model_id}"
+                "Explicit Base URL is required for OpenAI compatible APIs (custom models, ollama, fine tunes, and custom registry models)"
             )
-        self._litellm_model_id = litellm_provider_name + "/" + provider.model_id
+        self._litellm_model_id = litellm_provider_info.litellm_model_id
         return self._litellm_model_id
     async def build_completion_kwargs(
@@ -550,6 +489,21 @@ class LiteLlmAdapter(BaseAdapter):
             completion_kwargs["tools"] = tool_calls
             completion_kwargs["tool_choice"] = "auto"
+        # Special condition for Claude Opus 4.1 and Sonnet 4.5, where we can only specify top_p or temp, not both.
+        # Remove default values (1.0) prioritizing anything the user customized, then error with helpful message if they are both custom.
+        if provider.temp_top_p_exclusive:
+            if "top_p" in completion_kwargs and completion_kwargs["top_p"] == 1.0:
+                del completion_kwargs["top_p"]
+            if (
+                "temperature" in completion_kwargs
+                and completion_kwargs["temperature"] == 1.0
+            ):
+                del completion_kwargs["temperature"]
+            if "top_p" in completion_kwargs and "temperature" in completion_kwargs:
+                raise ValueError(
+                    "top_p and temperature can not both have custom values for this model. This is a restriction from the model provider. Please set only one of them to a custom value (not 1.0)."
+                )
         if not skip_response_format:
             # Response format: json_schema, json_instructions, json_mode, function_calling, etc
             response_format_options = await self.response_format_options()
@@ -614,12 +568,12 @@ class LiteLlmAdapter(BaseAdapter):
     async def process_tool_calls(
         self, tool_calls: list[ChatCompletionMessageToolCall] | None
-    ) -> tuple[str | None, list[ChatCompletionToolMessageParam]]:
+    ) -> tuple[str | None, list[ChatCompletionToolMessageParamWrapper]]:
         if tool_calls is None:
             return None, []
         assistant_output_from_toolcall: str | None = None
-        tool_call_response_messages: list[ChatCompletionToolMessageParam] = []
+        tool_call_response_messages: list[ChatCompletionToolMessageParamWrapper] = []
         for tool_call in tool_calls:
             # Kiln "task_response" tool is used for returning structured output via tool calls.
@@ -656,13 +610,24 @@ class LiteLlmAdapter(BaseAdapter):
                     f"Failed to validate arguments for tool '{tool_name}'. The arguments didn't match the tool's schema. The arguments were: {parsed_args}\n The error was: {e}"
                 ) from e
-            result = await tool.run(**parsed_args)
+            # Create context with the calling task's allow_saving setting
+            context = ToolCallContext(
+                allow_saving=self.base_adapter_config.allow_saving
+            )
+            result = await tool.run(context, **parsed_args)
+            if isinstance(result, KilnTaskToolResult):
+                content = result.output
+                kiln_task_tool_data = result.kiln_task_tool_data
+            else:
+                content = result
+                kiln_task_tool_data = None
             tool_call_response_messages.append(
-                ChatCompletionToolMessageParam(
+                ChatCompletionToolMessageParamWrapper(
                     role="tool",
                     tool_call_id=tool_call.id,
-                    content=result,
+                    content=content,
+                    kiln_task_tool_data=kiln_task_tool_data,
                 )
             )

kiln_ai/adapters/model_adapters/test_litellm_adapter.py CHANGED Viewed

@@ -351,7 +351,7 @@ def test_litellm_model_id_unknown_provider(config, mock_task):
     with patch.object(adapter, "model_provider", return_value=mock_provider):
         with patch(
-            "kiln_ai.adapters.model_adapters.litellm_adapter.raise_exhaustive_enum_error"
+            "kiln_ai.utils.litellm.raise_exhaustive_enum_error"
         ) as mock_raise_error:
             mock_raise_error.side_effect = Exception("Test error")
@@ -405,6 +405,7 @@ async def test_build_completion_kwargs_custom_temperature_top_p(config, mock_tas
     adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
     mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
     messages = [{"role": "user", "content": "Hello"}]
     with (
@@ -446,6 +447,7 @@ async def test_build_completion_kwargs(
     """Test build_completion_kwargs with various configurations"""
     adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
     mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
     messages = [{"role": "user", "content": "Hello"}]
     with (
@@ -613,6 +615,7 @@ async def test_build_completion_kwargs_includes_tools(
     """Test build_completion_kwargs includes tools when available_tools has tools"""
     adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
     mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
     messages = [{"role": "user", "content": "Hello"}]
     with (
@@ -666,6 +669,7 @@ async def test_build_completion_kwargs_raises_error_with_tools_conflict(
     config.run_config_properties.structured_output_mode = structured_output_mode
     adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
     mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
     messages = [{"role": "user", "content": "Hello"}]
     with (
@@ -976,3 +980,77 @@ def test_build_extra_body_enable_thinking(config, mock_task, enable_thinking):
     extra_body = adapter.build_extra_body(provider)
     assert extra_body["enable_thinking"] == enable_thinking
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "temperature,top_p,should_raise,expected_temp,expected_top_p",
+    [
+        (1.0, 1.0, False, None, None),
+        (0.7, 1.0, False, 0.7, None),
+        (1.0, 0.9, False, None, 0.9),
+        (0.7, 0.9, True, None, None),
+        (0.5, 0.5, True, None, None),
+    ],
+)
+async def test_build_completion_kwargs_temp_top_p_exclusive(
+    config, mock_task, temperature, top_p, should_raise, expected_temp, expected_top_p
+):
+    """Test build_completion_kwargs with temp_top_p_exclusive provider flag"""
+    config.run_config_properties.temperature = temperature
+    config.run_config_properties.top_p = top_p
+    adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
+    mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = True
+    messages = [{"role": "user", "content": "Hello"}]
+    with (
+        patch.object(adapter, "model_provider", return_value=mock_provider),
+        patch.object(adapter, "litellm_model_id", return_value="anthropic/test-model"),
+        patch.object(adapter, "build_extra_body", return_value={}),
+        patch.object(adapter, "response_format_options", return_value={}),
+    ):
+        if should_raise:
+            with pytest.raises(
+                ValueError,
+                match="top_p and temperature can not both have custom values",
+            ):
+                await adapter.build_completion_kwargs(mock_provider, messages, None)
+        else:
+            kwargs = await adapter.build_completion_kwargs(
+                mock_provider, messages, None
+            )
+            if expected_temp is None:
+                assert "temperature" not in kwargs
+            else:
+                assert kwargs["temperature"] == expected_temp
+            if expected_top_p is None:
+                assert "top_p" not in kwargs
+            else:
+                assert kwargs["top_p"] == expected_top_p
+@pytest.mark.asyncio
+async def test_build_completion_kwargs_temp_top_p_not_exclusive(config, mock_task):
+    """Test build_completion_kwargs with temp_top_p_exclusive=False allows both params"""
+    config.run_config_properties.temperature = 0.7
+    config.run_config_properties.top_p = 0.9
+    adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
+    mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
+    messages = [{"role": "user", "content": "Hello"}]
+    with (
+        patch.object(adapter, "model_provider", return_value=mock_provider),
+        patch.object(adapter, "litellm_model_id", return_value="openai/test-model"),
+        patch.object(adapter, "build_extra_body", return_value={}),
+        patch.object(adapter, "response_format_options", return_value={}),
+    ):
+        kwargs = await adapter.build_completion_kwargs(mock_provider, messages, None)
+        assert kwargs["temperature"] == 0.7
+        assert kwargs["top_p"] == 0.9

kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py CHANGED Viewed

@@ -18,12 +18,15 @@ from kiln_ai.adapters.test_prompt_adaptors import get_all_models_and_providers
 from kiln_ai.datamodel import PromptId
 from kiln_ai.datamodel.datamodel_enums import ModelProviderName, StructuredOutputMode
 from kiln_ai.datamodel.task import RunConfigProperties
+from kiln_ai.datamodel.tool_id import ToolId
+from kiln_ai.tools.base_tool import ToolCallContext
 from kiln_ai.tools.built_in_tools.math_tools import (
     AddTool,
     DivideTool,
     MultiplyTool,
     SubtractTool,
 )
+from kiln_ai.tools.kiln_task_tool import KilnTaskToolResult
 from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam
@@ -91,6 +94,7 @@ async def run_simple_task_with_tools(
             # Verify that AddTool.run was called with correct parameters
             add_spy.run.assert_called()
             add_call_args = add_spy.run.call_args
+            assert add_call_args.args[0].allow_saving  # First arg is ToolCallContext
             add_kwargs = add_call_args.kwargs
             assert add_kwargs.get("a") == 2
             assert add_kwargs.get("b") == 2
@@ -126,6 +130,9 @@ async def run_simple_task_with_tools(
             # Verify that MultiplyTool.run was called with correct parameters
             multiply_spy.run.assert_called()
             multiply_call_args = multiply_spy.run.call_args
+            assert multiply_call_args.args[
+                0
+            ].allow_saving  # First arg is ToolCallContext
             multiply_kwargs = multiply_call_args.kwargs
             # Check that multiply was called with a=6, b=10 (or vice versa)
             assert (
@@ -137,6 +144,7 @@ async def run_simple_task_with_tools(
             # Verify that AddTool.run was called with correct parameters
             add_spy.run.assert_called()
             add_call_args = add_spy.run.call_args
+            assert add_call_args.args[0].allow_saving  # First arg is ToolCallContext
             add_kwargs = add_call_args.kwargs
             # Check that add was called with a=60, b=4 (or vice versa)
             assert (add_kwargs.get("a") == 60 and add_kwargs.get("b") == 4) or (
@@ -482,8 +490,16 @@ async def test_run_model_turn_parallel_tools(tmp_path):
                     )
     # Verify both tools were called in parallel
-    multiply_spy.run.assert_called_once_with(a=6, b=10)
-    add_spy.run.assert_called_once_with(a=2, b=3)
+    # The context is passed as the first positional argument, not as a keyword argument
+    multiply_spy.run.assert_called_once()
+    multiply_call_args = multiply_spy.run.call_args
+    assert multiply_call_args.args[0].allow_saving  # First arg is ToolCallContext
+    assert multiply_call_args.kwargs == {"a": 6, "b": 10}
+    add_spy.run.assert_called_once()
+    add_call_args = add_spy.run.call_args
+    assert add_call_args.args[0].allow_saving  # First arg is ToolCallContext
+    assert add_call_args.kwargs == {"a": 2, "b": 3}
     # Verify the result structure
     assert isinstance(result, ModelTurnResult)
@@ -596,8 +612,16 @@ async def test_run_model_turn_sequential_tools(tmp_path):
                     )
     # Verify tools were called sequentially
-    multiply_spy.run.assert_called_once_with(a=6, b=10)
-    add_spy.run.assert_called_once_with(a=60, b=4)
+    # The context is passed as the first positional argument, not as a keyword argument
+    multiply_spy.run.assert_called_once()
+    multiply_call_args = multiply_spy.run.call_args
+    assert multiply_call_args.args[0].allow_saving  # First arg is ToolCallContext
+    assert multiply_call_args.kwargs == {"a": 6, "b": 10}
+    add_spy.run.assert_called_once()
+    add_call_args = add_spy.run.call_args
+    assert add_call_args.args[0].allow_saving  # First arg is ToolCallContext
+    assert add_call_args.kwargs == {"a": 60, "b": 4}
     # Verify the result structure
     assert isinstance(result, ModelTurnResult)
@@ -756,11 +780,59 @@ class MockTool:
             }
         }
-    async def run(self, **kwargs) -> str:
+    async def run(self, context: ToolCallContext | None = None, **kwargs) -> str:
         if self._raise_on_run:
             raise self._raise_on_run
         return self._return_value
+    async def id(self) -> ToolId:
+        """Mock implementation of id for testing."""
+        return f"mock_tool_{self._name}"
+class MockKilnTaskTool:
+    """Mock tool class that returns KilnTaskToolResult for testing"""
+    def __init__(
+        self,
+        name: str,
+        raise_on_run: Exception | None = None,
+        output: str = "kiln_task_output",
+        kiln_task_tool_data: str = "project_id:::tool_id:::task_id:::run_id",
+    ):
+        self._name = name
+        self._raise_on_run = raise_on_run
+        self._output = output
+        self._kiln_task_tool_data = kiln_task_tool_data
+    async def name(self) -> str:
+        return self._name
+    async def toolcall_definition(self) -> dict:
+        return {
+            "function": {
+                "parameters": {
+                    "type": "object",
+                    "properties": {"input": {"type": "string"}},
+                    "required": ["input"],
+                }
+            }
+        }
+    async def run(
+        self, context: ToolCallContext | None = None, **kwargs
+    ) -> KilnTaskToolResult:
+        if self._raise_on_run:
+            raise self._raise_on_run
+        return KilnTaskToolResult(
+            output=self._output,
+            kiln_task_tool_data=self._kiln_task_tool_data,
+        )
+    async def id(self) -> ToolId:
+        """Mock implementation of id for testing."""
+        return f"mock_kiln_task_tool_{self._name}"
 async def test_process_tool_calls_none_input(tmp_path):
     """Test process_tool_calls with None input"""
@@ -879,6 +951,7 @@ async def test_process_tool_calls_normal_tool_success(tmp_path):
         "role": "tool",
         "tool_call_id": "call_1",
         "content": "5",
+        "kiln_task_tool_data": None,
     }
@@ -915,8 +988,10 @@ async def test_process_tool_calls_multiple_normal_tools(tmp_path):
     assert len(tool_messages) == 2
     assert tool_messages[0]["tool_call_id"] == "call_1"
     assert tool_messages[0]["content"] == "5"
+    assert tool_messages[0].get("kiln_task_tool_data") is None
     assert tool_messages[1]["tool_call_id"] == "call_2"
     assert tool_messages[1]["content"] == "6"
+    assert tool_messages[1].get("kiln_task_tool_data") is None
 async def test_process_tool_calls_tool_not_found(tmp_path):
@@ -1072,6 +1147,7 @@ async def test_process_tool_calls_complex_result(tmp_path):
     assert assistant_output is None
     assert len(tool_messages) == 1
     assert tool_messages[0]["content"] == complex_result
+    assert tool_messages[0].get("kiln_task_tool_data") is None
 async def test_process_tool_calls_task_response_with_normal_tools_error(tmp_path):
@@ -1101,3 +1177,41 @@ async def test_process_tool_calls_task_response_with_normal_tools_error(tmp_path
             match="task_response tool call and other tool calls were both provided",
         ):
             await litellm_adapter.process_tool_calls(tool_calls)  # type: ignore
+async def test_process_tool_calls_kiln_task_tool_result(tmp_path):
+    """Test process_tool_calls with KilnTaskToolResult - tests the new if statement branch"""
+    task = build_test_task(tmp_path)
+    config = LiteLlmConfig(
+        run_config_properties=RunConfigProperties(
+            structured_output_mode=StructuredOutputMode.json_schema,
+            model_name="gpt_4_1_mini",
+            model_provider_name=ModelProviderName.openai,
+            prompt_id="simple_prompt_builder",
+        )
+    )
+    litellm_adapter = LiteLlmAdapter(config=config, kiln_task=task)
+    mock_kiln_task_tool = MockKilnTaskTool(
+        "kiln_task_tool",
+        output="Task completed successfully",
+        kiln_task_tool_data="proj123:::tool456:::task789:::run101",
+    )
+    tool_calls = [MockToolCall("call_1", "kiln_task_tool", '{"input": "test input"}')]
+    with patch.object(
+        litellm_adapter, "cached_available_tools", return_value=[mock_kiln_task_tool]
+    ):
+        assistant_output, tool_messages = await litellm_adapter.process_tool_calls(
+            tool_calls  # type: ignore
+        )
+    assert assistant_output is None
+    assert len(tool_messages) == 1
+    assert tool_messages[0]["role"] == "tool"
+    assert tool_messages[0]["tool_call_id"] == "call_1"
+    assert tool_messages[0]["content"] == "Task completed successfully"
+    assert (
+        tool_messages[0].get("kiln_task_tool_data")
+        == "proj123:::tool456:::task789:::run101"
+    )

kiln_ai/adapters/model_adapters/test_saving_adapter_results.py CHANGED Viewed

@@ -60,7 +60,9 @@ def test_save_run_isolation(test_task, adapter):
     )
     task_run = adapter.generate_run(
-        input=input_data, input_source=None, run_output=run_output
+        input=input_data,
+        input_source=None,
+        run_output=run_output,
     )
     task_run.save_to_file()
@@ -146,7 +148,9 @@ def test_generate_run_non_ascii(test_task, adapter):
     )
     task_run = adapter.generate_run(
-        input=input_data, input_source=None, run_output=run_output
+        input=input_data,
+        input_source=None,
+        run_output=run_output,
     )
     task_run.save_to_file()
@@ -256,7 +260,9 @@ def test_properties_for_task_output_custom_values(test_task):
     run_output = RunOutput(output=output_data, intermediate_outputs=None)
     task_run = adapter.generate_run(
-        input=input_data, input_source=None, run_output=run_output
+        input=input_data,
+        input_source=None,
+        run_output=run_output,
     )
     task_run.save_to_file()

kiln_ai/adapters/model_adapters/test_structured_output.py CHANGED Viewed

@@ -175,15 +175,12 @@ async def run_structured_output_test(tmp_path: Path, model_name: str, provider:
     # Check reasoning models
     assert a._model_provider is not None
-    if a._model_provider.reasoning_capable:
-        # some providers have reasoning_capable models that do not return the reasoning
-        # for structured output responses (they provide it only for non-structured output)
-        if a._model_provider.reasoning_optional_for_structured_output:
-            # models may be updated to include the reasoning in the future
-            assert "reasoning" not in run.intermediate_outputs
-        else:
-            assert "reasoning" in run.intermediate_outputs
-            assert isinstance(run.intermediate_outputs["reasoning"], str)
+    if (
+        a._model_provider.reasoning_capable
+        and not a._model_provider.reasoning_optional_for_structured_output
+    ):
+        assert "reasoning" in run.intermediate_outputs
+        assert isinstance(run.intermediate_outputs["reasoning"], str)
 def build_structured_input_test_task(tmp_path: Path):
@@ -344,6 +341,7 @@ async def test_all_built_in_models_structured_input_mocked(tmp_path):
     mock_config = Mock()
     mock_config.open_ai_api_key = "mock_api_key"
     mock_config.user_id = "test_user"
+    mock_config.groq_api_key = "mock_api_key"
     with (
         patch(
@@ -398,6 +396,7 @@ async def test_structured_input_cot_prompt_builder_mocked(tmp_path):
     mock_config = Mock()
     mock_config.open_ai_api_key = "mock_api_key"
     mock_config.user_id = "test_user"
+    mock_config.groq_api_key = "mock_api_key"
     with (
         patch(
@@ -456,7 +455,7 @@ When asked for a final result, this is the format (for an equilateral example):
 """
     task.output_json_schema = json.dumps(triangle_schema)
     task.save_to_file()
-    response, adapter, _ = await run_structured_input_task_no_validation(
+    response, _, _ = await run_structured_input_task_no_validation(
         task, model_name, provider_name, "simple_chain_of_thought_prompt_builder"
     )

kiln-ai 0.20.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.20.1py3-none-any.whl → 0.22.0py3-none-any.whl