PyPI - kiln-ai - Versions diffs - 0.21.0__py3-none-any.whl → 0.22.1__py3-none-any.whl - Mend

kiln-ai 0.21.0py3-none-any.whl → 0.22.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (53) hide show

kiln_ai/adapters/extractors/litellm_extractor.py +52 -32
kiln_ai/adapters/extractors/test_litellm_extractor.py +169 -71
kiln_ai/adapters/ml_embedding_model_list.py +330 -28
kiln_ai/adapters/ml_model_list.py +503 -23
kiln_ai/adapters/model_adapters/litellm_adapter.py +39 -8
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +78 -0
kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
kiln_ai/adapters/model_adapters/test_structured_output.py +6 -9
kiln_ai/adapters/test_ml_embedding_model_list.py +89 -279
kiln_ai/adapters/test_ml_model_list.py +0 -10
kiln_ai/adapters/vector_store/lancedb_adapter.py +24 -70
kiln_ai/adapters/vector_store/lancedb_helpers.py +101 -0
kiln_ai/adapters/vector_store/test_lancedb_adapter.py +9 -16
kiln_ai/adapters/vector_store/test_lancedb_helpers.py +142 -0
kiln_ai/adapters/vector_store_loaders/__init__.py +0 -0
kiln_ai/adapters/vector_store_loaders/test_lancedb_loader.py +282 -0
kiln_ai/adapters/vector_store_loaders/test_vector_store_loader.py +544 -0
kiln_ai/adapters/vector_store_loaders/vector_store_loader.py +91 -0
kiln_ai/datamodel/basemodel.py +31 -3
kiln_ai/datamodel/external_tool_server.py +206 -54
kiln_ai/datamodel/extraction.py +14 -0
kiln_ai/datamodel/task.py +5 -0
kiln_ai/datamodel/task_output.py +41 -11
kiln_ai/datamodel/test_attachment.py +3 -3
kiln_ai/datamodel/test_basemodel.py +269 -13
kiln_ai/datamodel/test_datasource.py +50 -0
kiln_ai/datamodel/test_external_tool_server.py +534 -152
kiln_ai/datamodel/test_extraction_model.py +31 -0
kiln_ai/datamodel/test_task.py +35 -1
kiln_ai/datamodel/test_tool_id.py +106 -1
kiln_ai/datamodel/tool_id.py +49 -0
kiln_ai/tools/base_tool.py +30 -6
kiln_ai/tools/built_in_tools/math_tools.py +12 -4
kiln_ai/tools/kiln_task_tool.py +162 -0
kiln_ai/tools/mcp_server_tool.py +7 -5
kiln_ai/tools/mcp_session_manager.py +50 -24
kiln_ai/tools/rag_tools.py +17 -6
kiln_ai/tools/test_kiln_task_tool.py +527 -0
kiln_ai/tools/test_mcp_server_tool.py +4 -15
kiln_ai/tools/test_mcp_session_manager.py +186 -226
kiln_ai/tools/test_rag_tools.py +86 -5
kiln_ai/tools/test_tool_registry.py +199 -5
kiln_ai/tools/tool_registry.py +49 -17
kiln_ai/utils/filesystem.py +4 -4
kiln_ai/utils/open_ai_types.py +19 -2
kiln_ai/utils/pdf_utils.py +21 -0
kiln_ai/utils/test_open_ai_types.py +88 -12
kiln_ai/utils/test_pdf_utils.py +14 -1
{kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/METADATA +79 -1
{kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/RECORD +53 -45
{kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/WHEEL +0 -0
{kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/model_adapters/litellm_adapter.py CHANGED Viewed

@@ -13,7 +13,6 @@ from litellm.types.utils import (
 )
 from litellm.types.utils import Message as LiteLLMMessage
 from litellm.types.utils import Usage as LiteLlmUsage
-from openai.types.chat import ChatCompletionToolMessageParam
 from openai.types.chat.chat_completion_message_tool_call_param import (
     ChatCompletionMessageToolCallParam,
 )
@@ -32,12 +31,18 @@ from kiln_ai.adapters.model_adapters.base_adapter import (
 )
 from kiln_ai.adapters.model_adapters.litellm_config import LiteLlmConfig
 from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
-from kiln_ai.tools.base_tool import KilnToolInterface
+from kiln_ai.tools.base_tool import (
+    KilnToolInterface,
+    ToolCallContext,
+    ToolCallDefinition,
+)
+from kiln_ai.tools.kiln_task_tool import KilnTaskToolResult
 from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
 from kiln_ai.utils.litellm import get_litellm_provider_info
 from kiln_ai.utils.open_ai_types import (
     ChatCompletionAssistantMessageParamWrapper,
     ChatCompletionMessageParam,
+    ChatCompletionToolMessageParamWrapper,
 )
 MAX_CALLS_PER_TURN = 10
@@ -488,6 +493,21 @@ class LiteLlmAdapter(BaseAdapter):
             completion_kwargs["tools"] = tool_calls
             completion_kwargs["tool_choice"] = "auto"
+        # Special condition for Claude Opus 4.1 and Sonnet 4.5, where we can only specify top_p or temp, not both.
+        # Remove default values (1.0) prioritizing anything the user customized, then error with helpful message if they are both custom.
+        if provider.temp_top_p_exclusive:
+            if "top_p" in completion_kwargs and completion_kwargs["top_p"] == 1.0:
+                del completion_kwargs["top_p"]
+            if (
+                "temperature" in completion_kwargs
+                and completion_kwargs["temperature"] == 1.0
+            ):
+                del completion_kwargs["temperature"]
+            if "top_p" in completion_kwargs and "temperature" in completion_kwargs:
+                raise ValueError(
+                    "top_p and temperature can not both have custom values for this model. This is a restriction from the model provider. Please set only one of them to a custom value (not 1.0)."
+                )
         if not skip_response_format:
             # Response format: json_schema, json_instructions, json_mode, function_calling, etc
             response_format_options = await self.response_format_options()
@@ -544,7 +564,7 @@ class LiteLlmAdapter(BaseAdapter):
             self._cached_available_tools = await self.available_tools()
         return self._cached_available_tools
-    async def litellm_tools(self) -> list[Dict]:
+    async def litellm_tools(self) -> list[ToolCallDefinition]:
         available_tools = await self.cached_available_tools()
         # LiteLLM takes the standard OpenAI-compatible tool call format
@@ -552,12 +572,12 @@ class LiteLlmAdapter(BaseAdapter):
     async def process_tool_calls(
         self, tool_calls: list[ChatCompletionMessageToolCall] | None
-    ) -> tuple[str | None, list[ChatCompletionToolMessageParam]]:
+    ) -> tuple[str | None, list[ChatCompletionToolMessageParamWrapper]]:
         if tool_calls is None:
             return None, []
         assistant_output_from_toolcall: str | None = None
-        tool_call_response_messages: list[ChatCompletionToolMessageParam] = []
+        tool_call_response_messages: list[ChatCompletionToolMessageParamWrapper] = []
         for tool_call in tool_calls:
             # Kiln "task_response" tool is used for returning structured output via tool calls.
@@ -594,13 +614,24 @@ class LiteLlmAdapter(BaseAdapter):
                     f"Failed to validate arguments for tool '{tool_name}'. The arguments didn't match the tool's schema. The arguments were: {parsed_args}\n The error was: {e}"
                 ) from e
-            result = await tool.run(**parsed_args)
+            # Create context with the calling task's allow_saving setting
+            context = ToolCallContext(
+                allow_saving=self.base_adapter_config.allow_saving
+            )
+            result = await tool.run(context, **parsed_args)
+            if isinstance(result, KilnTaskToolResult):
+                content = result.output
+                kiln_task_tool_data = result.kiln_task_tool_data
+            else:
+                content = result
+                kiln_task_tool_data = None
             tool_call_response_messages.append(
-                ChatCompletionToolMessageParam(
+                ChatCompletionToolMessageParamWrapper(
                     role="tool",
                     tool_call_id=tool_call.id,
-                    content=result,
+                    content=content,
+                    kiln_task_tool_data=kiln_task_tool_data,
                 )
             )

kiln_ai/adapters/model_adapters/test_litellm_adapter.py CHANGED Viewed

@@ -405,6 +405,7 @@ async def test_build_completion_kwargs_custom_temperature_top_p(config, mock_tas
     adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
     mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
     messages = [{"role": "user", "content": "Hello"}]
     with (
@@ -446,6 +447,7 @@ async def test_build_completion_kwargs(
     """Test build_completion_kwargs with various configurations"""
     adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
     mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
     messages = [{"role": "user", "content": "Hello"}]
     with (
@@ -613,6 +615,7 @@ async def test_build_completion_kwargs_includes_tools(
     """Test build_completion_kwargs includes tools when available_tools has tools"""
     adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
     mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
     messages = [{"role": "user", "content": "Hello"}]
     with (
@@ -666,6 +669,7 @@ async def test_build_completion_kwargs_raises_error_with_tools_conflict(
     config.run_config_properties.structured_output_mode = structured_output_mode
     adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
     mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
     messages = [{"role": "user", "content": "Hello"}]
     with (
@@ -976,3 +980,77 @@ def test_build_extra_body_enable_thinking(config, mock_task, enable_thinking):
     extra_body = adapter.build_extra_body(provider)
     assert extra_body["enable_thinking"] == enable_thinking
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "temperature,top_p,should_raise,expected_temp,expected_top_p",
+    [
+        (1.0, 1.0, False, None, None),
+        (0.7, 1.0, False, 0.7, None),
+        (1.0, 0.9, False, None, 0.9),
+        (0.7, 0.9, True, None, None),
+        (0.5, 0.5, True, None, None),
+    ],
+)
+async def test_build_completion_kwargs_temp_top_p_exclusive(
+    config, mock_task, temperature, top_p, should_raise, expected_temp, expected_top_p
+):
+    """Test build_completion_kwargs with temp_top_p_exclusive provider flag"""
+    config.run_config_properties.temperature = temperature
+    config.run_config_properties.top_p = top_p
+    adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
+    mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = True
+    messages = [{"role": "user", "content": "Hello"}]
+    with (
+        patch.object(adapter, "model_provider", return_value=mock_provider),
+        patch.object(adapter, "litellm_model_id", return_value="anthropic/test-model"),
+        patch.object(adapter, "build_extra_body", return_value={}),
+        patch.object(adapter, "response_format_options", return_value={}),
+    ):
+        if should_raise:
+            with pytest.raises(
+                ValueError,
+                match="top_p and temperature can not both have custom values",
+            ):
+                await adapter.build_completion_kwargs(mock_provider, messages, None)
+        else:
+            kwargs = await adapter.build_completion_kwargs(
+                mock_provider, messages, None
+            )
+            if expected_temp is None:
+                assert "temperature" not in kwargs
+            else:
+                assert kwargs["temperature"] == expected_temp
+            if expected_top_p is None:
+                assert "top_p" not in kwargs
+            else:
+                assert kwargs["top_p"] == expected_top_p
+@pytest.mark.asyncio
+async def test_build_completion_kwargs_temp_top_p_not_exclusive(config, mock_task):
+    """Test build_completion_kwargs with temp_top_p_exclusive=False allows both params"""
+    config.run_config_properties.temperature = 0.7
+    config.run_config_properties.top_p = 0.9
+    adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
+    mock_provider = Mock()
+    mock_provider.temp_top_p_exclusive = False
+    messages = [{"role": "user", "content": "Hello"}]
+    with (
+        patch.object(adapter, "model_provider", return_value=mock_provider),
+        patch.object(adapter, "litellm_model_id", return_value="openai/test-model"),
+        patch.object(adapter, "build_extra_body", return_value={}),
+        patch.object(adapter, "response_format_options", return_value={}),
+    ):
+        kwargs = await adapter.build_completion_kwargs(mock_provider, messages, None)
+        assert kwargs["temperature"] == 0.7
+        assert kwargs["top_p"] == 0.9

kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py CHANGED Viewed

@@ -18,12 +18,15 @@ from kiln_ai.adapters.test_prompt_adaptors import get_all_models_and_providers
 from kiln_ai.datamodel import PromptId
 from kiln_ai.datamodel.datamodel_enums import ModelProviderName, StructuredOutputMode
 from kiln_ai.datamodel.task import RunConfigProperties
+from kiln_ai.datamodel.tool_id import ToolId
+from kiln_ai.tools.base_tool import ToolCallContext
 from kiln_ai.tools.built_in_tools.math_tools import (
     AddTool,
     DivideTool,
     MultiplyTool,
     SubtractTool,
 )
+from kiln_ai.tools.kiln_task_tool import KilnTaskToolResult
 from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam
@@ -91,6 +94,7 @@ async def run_simple_task_with_tools(
             # Verify that AddTool.run was called with correct parameters
             add_spy.run.assert_called()
             add_call_args = add_spy.run.call_args
+            assert add_call_args.args[0].allow_saving  # First arg is ToolCallContext
             add_kwargs = add_call_args.kwargs
             assert add_kwargs.get("a") == 2
             assert add_kwargs.get("b") == 2
@@ -126,6 +130,9 @@ async def run_simple_task_with_tools(
             # Verify that MultiplyTool.run was called with correct parameters
             multiply_spy.run.assert_called()
             multiply_call_args = multiply_spy.run.call_args
+            assert multiply_call_args.args[
+                0
+            ].allow_saving  # First arg is ToolCallContext
             multiply_kwargs = multiply_call_args.kwargs
             # Check that multiply was called with a=6, b=10 (or vice versa)
             assert (
@@ -137,6 +144,7 @@ async def run_simple_task_with_tools(
             # Verify that AddTool.run was called with correct parameters
             add_spy.run.assert_called()
             add_call_args = add_spy.run.call_args
+            assert add_call_args.args[0].allow_saving  # First arg is ToolCallContext
             add_kwargs = add_call_args.kwargs
             # Check that add was called with a=60, b=4 (or vice versa)
             assert (add_kwargs.get("a") == 60 and add_kwargs.get("b") == 4) or (
@@ -482,8 +490,16 @@ async def test_run_model_turn_parallel_tools(tmp_path):
                     )
     # Verify both tools were called in parallel
-    multiply_spy.run.assert_called_once_with(a=6, b=10)
-    add_spy.run.assert_called_once_with(a=2, b=3)
+    # The context is passed as the first positional argument, not as a keyword argument
+    multiply_spy.run.assert_called_once()
+    multiply_call_args = multiply_spy.run.call_args
+    assert multiply_call_args.args[0].allow_saving  # First arg is ToolCallContext
+    assert multiply_call_args.kwargs == {"a": 6, "b": 10}
+    add_spy.run.assert_called_once()
+    add_call_args = add_spy.run.call_args
+    assert add_call_args.args[0].allow_saving  # First arg is ToolCallContext
+    assert add_call_args.kwargs == {"a": 2, "b": 3}
     # Verify the result structure
     assert isinstance(result, ModelTurnResult)
@@ -596,8 +612,16 @@ async def test_run_model_turn_sequential_tools(tmp_path):
                     )
     # Verify tools were called sequentially
-    multiply_spy.run.assert_called_once_with(a=6, b=10)
-    add_spy.run.assert_called_once_with(a=60, b=4)
+    # The context is passed as the first positional argument, not as a keyword argument
+    multiply_spy.run.assert_called_once()
+    multiply_call_args = multiply_spy.run.call_args
+    assert multiply_call_args.args[0].allow_saving  # First arg is ToolCallContext
+    assert multiply_call_args.kwargs == {"a": 6, "b": 10}
+    add_spy.run.assert_called_once()
+    add_call_args = add_spy.run.call_args
+    assert add_call_args.args[0].allow_saving  # First arg is ToolCallContext
+    assert add_call_args.kwargs == {"a": 60, "b": 4}
     # Verify the result structure
     assert isinstance(result, ModelTurnResult)
@@ -756,11 +780,59 @@ class MockTool:
             }
         }
-    async def run(self, **kwargs) -> str:
+    async def run(self, context: ToolCallContext | None = None, **kwargs) -> str:
         if self._raise_on_run:
             raise self._raise_on_run
         return self._return_value
+    async def id(self) -> ToolId:
+        """Mock implementation of id for testing."""
+        return f"mock_tool_{self._name}"
+class MockKilnTaskTool:
+    """Mock tool class that returns KilnTaskToolResult for testing"""
+    def __init__(
+        self,
+        name: str,
+        raise_on_run: Exception | None = None,
+        output: str = "kiln_task_output",
+        kiln_task_tool_data: str = "project_id:::tool_id:::task_id:::run_id",
+    ):
+        self._name = name
+        self._raise_on_run = raise_on_run
+        self._output = output
+        self._kiln_task_tool_data = kiln_task_tool_data
+    async def name(self) -> str:
+        return self._name
+    async def toolcall_definition(self) -> dict:
+        return {
+            "function": {
+                "parameters": {
+                    "type": "object",
+                    "properties": {"input": {"type": "string"}},
+                    "required": ["input"],
+                }
+            }
+        }
+    async def run(
+        self, context: ToolCallContext | None = None, **kwargs
+    ) -> KilnTaskToolResult:
+        if self._raise_on_run:
+            raise self._raise_on_run
+        return KilnTaskToolResult(
+            output=self._output,
+            kiln_task_tool_data=self._kiln_task_tool_data,
+        )
+    async def id(self) -> ToolId:
+        """Mock implementation of id for testing."""
+        return f"mock_kiln_task_tool_{self._name}"
 async def test_process_tool_calls_none_input(tmp_path):
     """Test process_tool_calls with None input"""
@@ -879,6 +951,7 @@ async def test_process_tool_calls_normal_tool_success(tmp_path):
         "role": "tool",
         "tool_call_id": "call_1",
         "content": "5",
+        "kiln_task_tool_data": None,
     }
@@ -915,8 +988,10 @@ async def test_process_tool_calls_multiple_normal_tools(tmp_path):
     assert len(tool_messages) == 2
     assert tool_messages[0]["tool_call_id"] == "call_1"
     assert tool_messages[0]["content"] == "5"
+    assert tool_messages[0].get("kiln_task_tool_data") is None
     assert tool_messages[1]["tool_call_id"] == "call_2"
     assert tool_messages[1]["content"] == "6"
+    assert tool_messages[1].get("kiln_task_tool_data") is None
 async def test_process_tool_calls_tool_not_found(tmp_path):
@@ -1072,6 +1147,7 @@ async def test_process_tool_calls_complex_result(tmp_path):
     assert assistant_output is None
     assert len(tool_messages) == 1
     assert tool_messages[0]["content"] == complex_result
+    assert tool_messages[0].get("kiln_task_tool_data") is None
 async def test_process_tool_calls_task_response_with_normal_tools_error(tmp_path):
@@ -1101,3 +1177,41 @@ async def test_process_tool_calls_task_response_with_normal_tools_error(tmp_path
             match="task_response tool call and other tool calls were both provided",
         ):
             await litellm_adapter.process_tool_calls(tool_calls)  # type: ignore
+async def test_process_tool_calls_kiln_task_tool_result(tmp_path):
+    """Test process_tool_calls with KilnTaskToolResult - tests the new if statement branch"""
+    task = build_test_task(tmp_path)
+    config = LiteLlmConfig(
+        run_config_properties=RunConfigProperties(
+            structured_output_mode=StructuredOutputMode.json_schema,
+            model_name="gpt_4_1_mini",
+            model_provider_name=ModelProviderName.openai,
+            prompt_id="simple_prompt_builder",
+        )
+    )
+    litellm_adapter = LiteLlmAdapter(config=config, kiln_task=task)
+    mock_kiln_task_tool = MockKilnTaskTool(
+        "kiln_task_tool",
+        output="Task completed successfully",
+        kiln_task_tool_data="proj123:::tool456:::task789:::run101",
+    )
+    tool_calls = [MockToolCall("call_1", "kiln_task_tool", '{"input": "test input"}')]
+    with patch.object(
+        litellm_adapter, "cached_available_tools", return_value=[mock_kiln_task_tool]
+    ):
+        assistant_output, tool_messages = await litellm_adapter.process_tool_calls(
+            tool_calls  # type: ignore
+        )
+    assert assistant_output is None
+    assert len(tool_messages) == 1
+    assert tool_messages[0]["role"] == "tool"
+    assert tool_messages[0]["tool_call_id"] == "call_1"
+    assert tool_messages[0]["content"] == "Task completed successfully"
+    assert (
+        tool_messages[0].get("kiln_task_tool_data")
+        == "proj123:::tool456:::task789:::run101"
+    )

kiln_ai/adapters/model_adapters/test_saving_adapter_results.py CHANGED Viewed

@@ -60,7 +60,9 @@ def test_save_run_isolation(test_task, adapter):
     )
     task_run = adapter.generate_run(
-        input=input_data, input_source=None, run_output=run_output
+        input=input_data,
+        input_source=None,
+        run_output=run_output,
     )
     task_run.save_to_file()
@@ -146,7 +148,9 @@ def test_generate_run_non_ascii(test_task, adapter):
     )
     task_run = adapter.generate_run(
-        input=input_data, input_source=None, run_output=run_output
+        input=input_data,
+        input_source=None,
+        run_output=run_output,
     )
     task_run.save_to_file()
@@ -256,7 +260,9 @@ def test_properties_for_task_output_custom_values(test_task):
     run_output = RunOutput(output=output_data, intermediate_outputs=None)
     task_run = adapter.generate_run(
-        input=input_data, input_source=None, run_output=run_output
+        input=input_data,
+        input_source=None,
+        run_output=run_output,
     )
     task_run.save_to_file()

kiln_ai/adapters/model_adapters/test_structured_output.py CHANGED Viewed

@@ -175,15 +175,12 @@ async def run_structured_output_test(tmp_path: Path, model_name: str, provider:
     # Check reasoning models
     assert a._model_provider is not None
-    if a._model_provider.reasoning_capable:
-        # some providers have reasoning_capable models that do not return the reasoning
-        # for structured output responses (they provide it only for non-structured output)
-        if a._model_provider.reasoning_optional_for_structured_output:
-            # models may be updated to include the reasoning in the future
-            assert "reasoning" not in run.intermediate_outputs
-        else:
-            assert "reasoning" in run.intermediate_outputs
-            assert isinstance(run.intermediate_outputs["reasoning"], str)
+    if (
+        a._model_provider.reasoning_capable
+        and not a._model_provider.reasoning_optional_for_structured_output
+    ):
+        assert "reasoning" in run.intermediate_outputs
+        assert isinstance(run.intermediate_outputs["reasoning"], str)
 def build_structured_input_test_task(tmp_path: Path):

kiln-ai 0.21.0__py3-none-any.whl → 0.22.1__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.21.0py3-none-any.whl → 0.22.1py3-none-any.whl