PyPI - kiln-ai - Versions diffs - 0.21.0__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

kiln-ai 0.21.0py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (45) hide show

kiln_ai/adapters/extractors/litellm_extractor.py +52 -32
kiln_ai/adapters/extractors/test_litellm_extractor.py +169 -71
kiln_ai/adapters/ml_embedding_model_list.py +330 -28
kiln_ai/adapters/ml_model_list.py +503 -23
kiln_ai/adapters/model_adapters/litellm_adapter.py +34 -7
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +78 -0
kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
kiln_ai/adapters/model_adapters/test_structured_output.py +6 -9
kiln_ai/adapters/test_ml_embedding_model_list.py +89 -279
kiln_ai/adapters/test_ml_model_list.py +0 -10
kiln_ai/datamodel/basemodel.py +31 -3
kiln_ai/datamodel/external_tool_server.py +206 -54
kiln_ai/datamodel/extraction.py +14 -0
kiln_ai/datamodel/task.py +5 -0
kiln_ai/datamodel/task_output.py +41 -11
kiln_ai/datamodel/test_attachment.py +3 -3
kiln_ai/datamodel/test_basemodel.py +269 -13
kiln_ai/datamodel/test_datasource.py +50 -0
kiln_ai/datamodel/test_external_tool_server.py +534 -152
kiln_ai/datamodel/test_extraction_model.py +31 -0
kiln_ai/datamodel/test_task.py +35 -1
kiln_ai/datamodel/test_tool_id.py +106 -1
kiln_ai/datamodel/tool_id.py +36 -0
kiln_ai/tools/base_tool.py +12 -3
kiln_ai/tools/built_in_tools/math_tools.py +12 -4
kiln_ai/tools/kiln_task_tool.py +158 -0
kiln_ai/tools/mcp_server_tool.py +2 -2
kiln_ai/tools/mcp_session_manager.py +50 -24
kiln_ai/tools/rag_tools.py +12 -5
kiln_ai/tools/test_kiln_task_tool.py +527 -0
kiln_ai/tools/test_mcp_server_tool.py +4 -15
kiln_ai/tools/test_mcp_session_manager.py +186 -226
kiln_ai/tools/test_rag_tools.py +86 -5
kiln_ai/tools/test_tool_registry.py +199 -5
kiln_ai/tools/tool_registry.py +49 -17
kiln_ai/utils/filesystem.py +4 -4
kiln_ai/utils/open_ai_types.py +19 -2
kiln_ai/utils/pdf_utils.py +21 -0
kiln_ai/utils/test_open_ai_types.py +88 -12
kiln_ai/utils/test_pdf_utils.py +14 -1
{kiln_ai-0.21.0.dist-info → kiln_ai-0.22.0.dist-info}/METADATA +3 -1
{kiln_ai-0.21.0.dist-info → kiln_ai-0.22.0.dist-info}/RECORD +45 -43
{kiln_ai-0.21.0.dist-info → kiln_ai-0.22.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.21.0.dist-info → kiln_ai-0.22.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/datamodel/test_extraction_model.py CHANGED Viewed

@@ -468,3 +468,34 @@ def test_document_invalid_mime_type(
 )
 def test_get_kind_from_mime_type(mime_type, expected_kind):
     assert get_kind_from_mime_type(mime_type) == expected_kind
+def test_document_friendly_name(mock_project, mock_attachment_factory):
+    name = f"Test Document {uuid.uuid4()!s}"
+    document = Document(
+        name=name,
+        description=f"Test description {uuid.uuid4()!s}",
+        kind=Kind.DOCUMENT,
+        original_file=FileInfo(
+            filename=f"test_{name}.txt",
+            size=100,
+            mime_type="text/plain",
+            attachment=mock_attachment_factory(),
+        ),
+        parent=mock_project,
+    )
+    document.save_to_file()
+    # backward compatibility: old documents did not have name_override
+    assert document.name_override is None
+    assert document.friendly_name == name
+    # new documents have name_override
+    document.name_override = "Test Document Override"
+    assert document.friendly_name == "Test Document Override"
+    document.save_to_file()
+    document = Document.from_id_and_parent_path(str(document.id), mock_project.path)
+    assert document is not None
+    assert document.friendly_name == "Test Document Override"

kiln_ai/datamodel/test_task.py CHANGED Viewed

@@ -254,7 +254,7 @@ def test_run_config_upgrade_old_entries():
         },
         "prompt": {
             "name": "Dazzling Unicorn",
-            "description": "Frozen copy of prompt 'simple_prompt_builder', created for evaluations.",
+            "description": "Frozen copy of prompt 'simple_prompt_builder'.",
             "generator_id": "simple_prompt_builder",
             "prompt": "Generate a joke, given a theme. The theme will be provided as a word or phrase as the input to the model. The assistant should output a joke that is funny and relevant to the theme. If a style is provided, the joke should be in that style. The output should include a setup and punchline.\n\nYour response should respect the following requirements:\n1) Keep the joke on topic. If the user specifies a theme, the joke must be related to that theme.\n2) Avoid any jokes that are offensive or inappropriate. Keep the joke clean and appropriate for all audiences.\n3) Make the joke funny and engaging. It should be something that someone would want to tell to their friends. Something clever, not just a simple pun.\n",
             "chain_of_thought_instructions": None,
@@ -296,3 +296,37 @@ def test_run_config_upgrade_old_entries():
 def test_task_name_unicode_name():
     task = Task(name="你好", instruction="Do something")
     assert task.name == "你好"
+def test_task_default_run_config_id_property(tmp_path):
+    """Test that default_run_config_id can be set and retrieved."""
+    # Create a task
+    task = Task(
+        name="Test Task", instruction="Test instruction", path=tmp_path / "task.kiln"
+    )
+    task.save_to_file()
+    # Create a run config for the task
+    run_config = TaskRunConfig(
+        name="Test Config",
+        run_config_properties=RunConfigProperties(
+            model_name="gpt-4",
+            model_provider_name="openai",
+            prompt_id=PromptGenerators.SIMPLE,
+            structured_output_mode=StructuredOutputMode.json_schema,
+        ),
+        parent=task,
+    )
+    run_config.save_to_file()
+    # Test None default (should be valid)
+    assert task.default_run_config_id is None
+    # Test setting a valid ID
+    task.default_run_config_id = "123456789012"
+    assert task.default_run_config_id == "123456789012"
+    # Test setting back to None
+    task.default_run_config_id = None
+    assert task.default_run_config_id is None

kiln_ai/datamodel/test_tool_id.py CHANGED Viewed

@@ -8,6 +8,7 @@ from kiln_ai.datamodel.tool_id import (
     KilnBuiltInToolId,
     ToolId,
     _check_tool_id,
+    kiln_task_server_id_from_tool_id,
     mcp_server_and_tool_name_from_id,
     rag_config_id_from_id,
 )
@@ -145,6 +146,39 @@ class TestCheckToolId:
         with pytest.raises(ValueError, match="Invalid RAG tool ID"):
             _check_tool_id("kiln_tool::rag::")
+    def test_valid_kiln_task_tools(self):
+        """Test validation of valid Kiln task tools."""
+        valid_ids = [
+            "kiln_task::server1",
+            "kiln_task::my_server",
+            "kiln_task::test_server_123",
+            "kiln_task::server_with_underscores",
+            "kiln_task::server-with-dashes",
+            "kiln_task::server.with.dots",
+        ]
+        for tool_id in valid_ids:
+            result = _check_tool_id(tool_id)
+            assert result == tool_id
+    def test_invalid_kiln_task_format(self):
+        """Test validation fails for invalid Kiln task tool formats."""
+        # These IDs start with the Kiln task prefix but have invalid formats
+        kiln_task_invalid_ids = [
+            "kiln_task::",  # Missing server ID
+            "kiln_task::server::extra",  # Too many parts
+            "kiln_task::server::tool::extra",  # Too many parts
+        ]
+        for invalid_id in kiln_task_invalid_ids:
+            with pytest.raises(ValueError, match="Invalid Kiln task tool ID"):
+                _check_tool_id(invalid_id)
+    def test_kiln_task_tool_empty_server_id(self):
+        """Test that Kiln task tool with empty server ID is handled properly."""
+        # This tests the case where kiln_task_server_id_from_tool_id returns empty string which should raise an error
+        with pytest.raises(ValueError, match="Invalid Kiln task tool ID"):
+            _check_tool_id("kiln_task::")
 class TestMcpServerAndToolNameFromId:
     """Test the mcp_server_and_tool_name_from_id function."""
@@ -220,7 +254,7 @@ class TestToolIdPydanticType:
             model = self._ModelWithToolId(tool_id=tool_id.value)
             assert model.tool_id == tool_id.value
-    def test_valid_mcp_tools(self):
+    def test_valid_tool_ids(self):
         """Test ToolId validates MCP remote and local tools."""
         valid_ids = [
             # Remote MCP tools
@@ -232,6 +266,9 @@ class TestToolIdPydanticType:
             # RAG tools
             "kiln_tool::rag::config1",
             "kiln_tool::rag::my_rag_config",
+            # Kiln task tools
+            "kiln_task::server1",
+            "kiln_task::my_server",
         ]
         for tool_id in valid_ids:
@@ -249,6 +286,8 @@ class TestToolIdPydanticType:
             "mcp::local::server",
             "kiln_tool::rag::",
             "kiln_tool::rag::config::extra",
+            "kiln_task::",
+            "kiln_task::server::extra",
         ]
         for invalid_id in invalid_ids:
@@ -318,3 +357,69 @@ class TestRagConfigIdFromId:
         # The validation for empty config ID happens in _check_tool_id
         result = rag_config_id_from_id("kiln_tool::rag::")
         assert result == ""
+class TestKilnTaskServerIdFromToolId:
+    """Test the kiln_task_server_id_from_tool_id function."""
+    def test_valid_kiln_task_ids(self):
+        """Test parsing valid Kiln task tool IDs."""
+        test_cases = [
+            ("kiln_task::server1", "server1"),
+            ("kiln_task::my_server", "my_server"),
+            ("kiln_task::test_server_123", "test_server_123"),
+            ("kiln_task::a", "a"),  # Minimal valid case
+            ("kiln_task::server_with_underscores", "server_with_underscores"),
+            ("kiln_task::server-with-dashes", "server-with-dashes"),
+            ("kiln_task::server.with.dots", "server.with.dots"),
+        ]
+        for tool_id, expected in test_cases:
+            result = kiln_task_server_id_from_tool_id(tool_id)
+            assert result == expected
+    def test_invalid_kiln_task_ids(self):
+        """Test parsing fails for invalid Kiln task tool IDs."""
+        # Test various invalid formats
+        invalid_ids = [
+            "kiln_task::",  # Empty server ID
+            "kiln_task::server::extra",  # Too many parts (3 parts)
+            "kiln_task::server::tool::extra",  # Too many parts (4 parts)
+            "wrong::server",  # Wrong prefix
+            "kiln_wrong::server",  # Wrong prefix
+            "task::server",  # Too few parts (2 parts)
+            "",  # Empty string
+            "single_part",  # Only 1 part
+            "kiln_task",  # Missing server ID
+        ]
+        for invalid_id in invalid_ids:
+            with pytest.raises(ValueError, match="Invalid Kiln task tool ID format"):
+                kiln_task_server_id_from_tool_id(invalid_id)
+    def test_kiln_task_id_with_empty_server_id(self):
+        """Test that Kiln task tool ID with empty server ID raises error."""
+        with pytest.raises(ValueError, match="Invalid Kiln task tool ID format"):
+            kiln_task_server_id_from_tool_id("kiln_task::")
+    def test_kiln_task_id_with_whitespace_server_id(self):
+        """Test that Kiln task tool ID with whitespace-only server ID raises error."""
+        with pytest.raises(ValueError, match="Invalid Kiln task tool ID format"):
+            kiln_task_server_id_from_tool_id("kiln_task::")
+    def test_kiln_task_id_with_multiple_colons(self):
+        """Test that Kiln task tool ID with multiple colons raises error."""
+        with pytest.raises(ValueError, match="Invalid Kiln task tool ID format"):
+            kiln_task_server_id_from_tool_id("kiln_task::server::extra")
+    def test_kiln_task_id_case_sensitivity(self):
+        """Test that Kiln task tool IDs are case sensitive."""
+        # These should work
+        result1 = kiln_task_server_id_from_tool_id("kiln_task::Server")
+        assert result1 == "Server"
+        result2 = kiln_task_server_id_from_tool_id("kiln_task::SERVER")
+        assert result2 == "SERVER"
+        result3 = kiln_task_server_id_from_tool_id("kiln_task::server")
+        assert result3 == "server"

kiln_ai/datamodel/tool_id.py CHANGED Viewed

@@ -14,6 +14,7 @@ Tool IDs can be one of:
 - A kiln built-in tool name: kiln_tool::add_numbers
 - A remote MCP tool: mcp::remote::<server_id>::<tool_name>
 - A local MCP tool: mcp::local::<server_id>::<tool_name>
+- A Kiln task tool: kiln_task::<server_id>
 - More coming soon like kiln_project_tool::rag::RAG_CONFIG_ID
 """
@@ -28,6 +29,7 @@ class KilnBuiltInToolId(str, Enum):
 MCP_REMOTE_TOOL_ID_PREFIX = "mcp::remote::"
 RAG_TOOL_ID_PREFIX = "kiln_tool::rag::"
 MCP_LOCAL_TOOL_ID_PREFIX = "mcp::local::"
+KILN_TASK_TOOL_ID_PREFIX = "kiln_task::"
 def _check_tool_id(id: str) -> str:
@@ -68,6 +70,15 @@ def _check_tool_id(id: str) -> str:
             )
         return id
+    # Kiln task tools must have format: kiln_task::<server_id>
+    if id.startswith(KILN_TASK_TOOL_ID_PREFIX):
+        server_id = kiln_task_server_id_from_tool_id(id)
+        if not server_id:
+            raise ValueError(
+                f"Invalid Kiln task tool ID: {id}. Expected format: 'kiln_task::<server_id>'."
+            )
+        return id
     raise ValueError(f"Invalid tool ID: {id}")
@@ -103,3 +114,28 @@ def rag_config_id_from_id(id: str) -> str:
             f"Invalid RAG tool ID: {id}. Expected format: 'kiln_tool::rag::<rag_config_id>'."
         )
     return parts[2]
+def kiln_task_server_id_from_tool_id(tool_id: str) -> str:
+    """
+    Get the server ID from the tool ID.
+    """
+    if not tool_id.startswith(KILN_TASK_TOOL_ID_PREFIX):
+        raise ValueError(
+            f"Invalid Kiln task tool ID format: {tool_id}. Expected format: 'kiln_task::<server_id>'."
+        )
+    # Remove prefix and split on ::
+    remaining = tool_id[len(KILN_TASK_TOOL_ID_PREFIX) :]
+    if not remaining:
+        raise ValueError(
+            f"Invalid Kiln task tool ID format: {tool_id}. Expected format: 'kiln_task::<server_id>'."
+        )
+    parts = remaining.split("::")
+    if len(parts) != 1 or not parts[0].strip():
+        raise ValueError(
+            f"Invalid Kiln task tool ID format: {tool_id}. Expected format: 'kiln_task::<server_id>'."
+        )
+    return parts[0]  # server_id

kiln_ai/tools/base_tool.py CHANGED Viewed

@@ -1,10 +1,19 @@
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
 from typing import Any, Dict
 from kiln_ai.datamodel.json_schema import validate_schema_dict
 from kiln_ai.datamodel.tool_id import KilnBuiltInToolId, ToolId
+@dataclass
+class ToolCallContext:
+    """Context passed to tools when they are called, containing information from the calling task."""
+    """Used for Kiln Tasks as Tools, to know if the tool call should save the task run it invoked to that task's Dataset."""
+    allow_saving: bool = True
 class KilnToolInterface(ABC):
     """
     Abstract interface defining the core API that all Kiln tools must implement.
@@ -12,8 +21,8 @@ class KilnToolInterface(ABC):
     """
     @abstractmethod
-    async def run(self, **kwargs) -> Any:
-        """Execute the tool with the given parameters."""
+    async def run(self, context: ToolCallContext | None = None, **kwargs) -> Any:
+        """Execute the tool with the given parameters and calling context if provided."""
         pass
     @abstractmethod
@@ -77,6 +86,6 @@ class KilnTool(KilnToolInterface):
         }
     @abstractmethod
-    async def run(self, **kwargs) -> str:
+    async def run(self, context: ToolCallContext | None = None, **kwargs) -> Any:
         """Subclasses must implement the actual tool logic."""
         pass

kiln_ai/tools/built_in_tools/math_tools.py CHANGED Viewed

@@ -27,7 +27,9 @@ class AddTool(KilnTool):
             parameters_schema=parameters_schema,
         )
-    async def run(self, a: Union[int, float], b: Union[int, float]) -> str:
+    async def run(
+        self, context=None, *, a: Union[int, float], b: Union[int, float]
+    ) -> str:
         """Add two numbers and return the result."""
         return str(a + b)
@@ -57,7 +59,9 @@ class SubtractTool(KilnTool):
             parameters_schema=parameters_schema,
         )
-    async def run(self, a: Union[int, float], b: Union[int, float]) -> str:
+    async def run(
+        self, context=None, *, a: Union[int, float], b: Union[int, float]
+    ) -> str:
         """Subtract b from a and return the result."""
         return str(a - b)
@@ -84,7 +88,9 @@ class MultiplyTool(KilnTool):
             parameters_schema=parameters_schema,
         )
-    async def run(self, a: Union[int, float], b: Union[int, float]) -> str:
+    async def run(
+        self, context=None, *, a: Union[int, float], b: Union[int, float]
+    ) -> str:
         """Multiply two numbers and return the result."""
         return str(a * b)
@@ -117,7 +123,9 @@ class DivideTool(KilnTool):
             parameters_schema=parameters_schema,
         )
-    async def run(self, a: Union[int, float], b: Union[int, float]) -> str:
+    async def run(
+        self, context=None, *, a: Union[int, float], b: Union[int, float]
+    ) -> str:
         """Divide a by b and return the result."""
         if b == 0:
             raise ZeroDivisionError("Cannot divide by zero")

kiln_ai/tools/kiln_task_tool.py ADDED Viewed

@@ -0,0 +1,158 @@
+from dataclasses import dataclass
+from functools import cached_property
+from typing import Any, Dict
+from kiln_ai.datamodel import Task
+from kiln_ai.datamodel.external_tool_server import ExternalToolServer
+from kiln_ai.datamodel.task import TaskRunConfig
+from kiln_ai.datamodel.task_output import DataSource, DataSourceType
+from kiln_ai.datamodel.tool_id import ToolId
+from kiln_ai.tools.base_tool import KilnToolInterface, ToolCallContext
+from kiln_ai.utils.project_utils import project_from_id
+@dataclass
+class KilnTaskToolResult:
+    output: str
+    kiln_task_tool_data: str
+class KilnTaskTool(KilnToolInterface):
+    """
+    A tool that wraps a Kiln task, allowing it to be called as a function.
+    This tool loads a task by ID and executes it using the specified run configuration.
+    """
+    def __init__(
+        self,
+        project_id: str,
+        tool_id: str,
+        data_model: ExternalToolServer,
+    ):
+        self._project_id = project_id
+        self._tool_server_model = data_model
+        self._tool_id = tool_id
+        self._name = data_model.properties.get("name", "")
+        self._description = data_model.properties.get("description", "")
+        self._task_id = data_model.properties.get("task_id", "")
+        self._run_config_id = data_model.properties.get("run_config_id", "")
+    async def id(self) -> ToolId:
+        return self._tool_id
+    async def name(self) -> str:
+        return self._name
+    async def description(self) -> str:
+        return self._description
+    async def toolcall_definition(self) -> Dict[str, Any]:
+        """Generate OpenAI-compatible tool definition."""
+        return {
+            "type": "function",
+            "function": {
+                "name": await self.name(),
+                "description": await self.description(),
+                "parameters": self.parameters_schema,
+            },
+        }
+    async def run(
+        self, context: ToolCallContext | None = None, **kwargs
+    ) -> KilnTaskToolResult:
+        """Execute the wrapped Kiln task with the given parameters and calling context."""
+        if context is None:
+            raise ValueError("Context is required for running a KilnTaskTool.")
+        # Determine the input format
+        if self._task.input_json_schema:
+            # Structured input - pass kwargs directly
+            input = kwargs
+        else:
+            # Plaintext input - extract from 'input' parameter
+            if "input" in kwargs:
+                input = kwargs["input"]
+            else:
+                raise ValueError(f"Input not found in kwargs: {kwargs}")
+        # These imports are here to avoid circular chains
+        from kiln_ai.adapters.adapter_registry import adapter_for_task
+        from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
+        # Create adapter and run the task using the calling task's allow_saving setting
+        adapter = adapter_for_task(
+            self._task,
+            run_config_properties=self._run_config.run_config_properties,
+            base_adapter_config=AdapterConfig(
+                allow_saving=context.allow_saving,
+                default_tags=["tool_call"],
+            ),
+        )
+        task_run = await adapter.invoke(
+            input,
+            input_source=DataSource(
+                type=DataSourceType.tool_call,
+                run_config=self._run_config.run_config_properties,
+            ),
+        )
+        return KilnTaskToolResult(
+            output=task_run.output.output,
+            kiln_task_tool_data=f"{self._project_id}:::{self._tool_id}:::{self._task.id}:::{task_run.id}",
+        )
+    @cached_property
+    def _task(self) -> Task:
+        # Load the project first
+        project = project_from_id(self._project_id)
+        if project is None:
+            raise ValueError(f"Project not found: {self._project_id}")
+        # Load the task from the project
+        task = Task.from_id_and_parent_path(self._task_id, project.path)
+        if task is None:
+            raise ValueError(
+                f"Task not found: {self._task_id} in project {self._project_id}"
+            )
+        return task
+    @cached_property
+    def _run_config(self) -> TaskRunConfig:
+        run_config = next(
+            (
+                run_config
+                for run_config in self._task.run_configs(readonly=True)
+                if run_config.id == self._run_config_id
+            ),
+            None,
+        )
+        if run_config is None:
+            raise ValueError(
+                f"Task run config not found: {self._run_config_id} for task {self._task_id} in project {self._project_id}"
+            )
+        return run_config
+    @cached_property
+    def parameters_schema(self) -> Dict[str, Any]:
+        if self._task.input_json_schema:
+            # Use the task's input schema directly if it exists
+            parameters_schema = self._task.input_schema()
+        else:
+            # For plaintext tasks, create a simple string input parameter
+            parameters_schema = {
+                "type": "object",
+                "properties": {
+                    "input": {
+                        "type": "string",
+                        "description": "Plaintext input for the tool.",
+                    }
+                },
+                "required": ["input"],
+            }
+        if parameters_schema is None:
+            raise ValueError(
+                f"Failed to create parameters schema for tool_id {self._tool_id}"
+            )
+        return parameters_schema

kiln_ai/tools/mcp_server_tool.py CHANGED Viewed

@@ -5,7 +5,7 @@ from mcp.types import Tool as MCPTool
 from kiln_ai.datamodel.external_tool_server import ExternalToolServer
 from kiln_ai.datamodel.tool_id import MCP_REMOTE_TOOL_ID_PREFIX, ToolId
-from kiln_ai.tools.base_tool import KilnToolInterface
+from kiln_ai.tools.base_tool import KilnToolInterface, ToolCallContext
 from kiln_ai.tools.mcp_session_manager import MCPSessionManager
@@ -38,7 +38,7 @@ class MCPServerTool(KilnToolInterface):
             },
         }
-    async def run(self, **kwargs) -> Any:
+    async def run(self, context: ToolCallContext | None = None, **kwargs) -> str:
         result = await self._call_tool(**kwargs)
         if result.isError:

kiln_ai/tools/mcp_session_manager.py CHANGED Viewed

@@ -2,6 +2,7 @@ import logging
 import os
 import subprocess
 import sys
+import tempfile
 from contextlib import asynccontextmanager
 from datetime import timedelta
 from typing import AsyncGenerator
@@ -19,6 +20,8 @@ from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
 logger = logging.getLogger(__name__)
+LOCAL_MCP_ERROR_INSTRUCTION = "Please verify your command, arguments, and environment variables, and consult the server's documentation for the correct setup."
 class MCPSessionManager:
     """
@@ -51,6 +54,8 @@ class MCPSessionManager:
             case ToolServerType.local_mcp:
                 async with self._create_local_mcp_session(tool_server) as session:
                     yield session
+            case ToolServerType.kiln_task:
+                raise ValueError("Kiln task tools are not available from an MCP server")
             case _:
                 raise_exhaustive_enum_error(tool_server.type)
@@ -164,35 +169,56 @@ class MCPSessionManager:
             env_vars["PATH"] = self._get_path()
         # Set the server parameters
+        cwd = os.path.join(Config.settings_dir(), "cache", "mcp_cache")
+        os.makedirs(cwd, exist_ok=True)
         server_params = StdioServerParameters(
-            command=command,
-            args=args,
-            env=env_vars,
+            command=command, args=args, env=env_vars, cwd=cwd
         )
-        try:
-            async with stdio_client(server_params) as (read, write):
-                async with ClientSession(
-                    read, write, read_timeout_seconds=timedelta(seconds=8)
-                ) as session:
-                    await session.initialize()
-                    yield session
-        except Exception as e:
-            # Check for MCP errors. Things like wrong arguments would fall here.
-            mcp_error = self._extract_first_exception(e, McpError)
-            if mcp_error and isinstance(mcp_error, McpError):
-                self._raise_local_mcp_error(mcp_error)
-            # Re-raise the original error but with a friendlier message
-            self._raise_local_mcp_error(e)
-    def _raise_local_mcp_error(self, e: Exception):
+        # Create temporary file to capture MCP server stderr
+        # Use errors="replace" to handle non-UTF-8 bytes gracefully
+        with tempfile.TemporaryFile(
+            mode="w+", encoding="utf-8", errors="replace"
+        ) as err_log:
+            try:
+                async with stdio_client(server_params, errlog=err_log) as (
+                    read,
+                    write,
+                ):
+                    async with ClientSession(
+                        read, write, read_timeout_seconds=timedelta(seconds=30)
+                    ) as session:
+                        await session.initialize()
+                        yield session
+            except Exception as e:
+                # Read stderr content from temporary file for debugging
+                err_log.seek(0)  # Read from the start of the file
+                stderr_content = err_log.read()
+                if stderr_content:
+                    logger.error(
+                        f"MCP server '{tool_server.name}' stderr output: {stderr_content}"
+                    )
+                # Check for MCP errors. Things like wrong arguments would fall here.
+                mcp_error = self._extract_first_exception(e, McpError)
+                if mcp_error and isinstance(mcp_error, McpError):
+                    self._raise_local_mcp_error(mcp_error, stderr_content)
+                # Re-raise the original error but with a friendlier message
+                self._raise_local_mcp_error(e, stderr_content)
+    def _raise_local_mcp_error(self, e: Exception, stderr: str):
         """
-        Raise a ValueError with a friendlier message for local MCP errors.
+        Raise a RuntimeError with a friendlier message for local MCP errors.
         """
-        raise RuntimeError(
-            f"MCP server failed to start. Please verify your command, arguments, and environment variables, and consult the server's documentation for the correct setup. Original error: {e}"
-        ) from e
+        error_msg = f"'{e}'"
+        if stderr:
+            error_msg += f"\nMCP server error: {stderr}"
+        error_msg += f"\n{LOCAL_MCP_ERROR_INSTRUCTION}"
+        raise RuntimeError(error_msg) from e
     def _get_path(self) -> str:
         """

kiln-ai 0.21.0__py3-none-any.whl → 0.22.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.21.0py3-none-any.whl → 0.22.0py3-none-any.whl