PyPI - kiln-ai - Versions diffs - 0.20.1__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

kiln-ai 0.20.1py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (133) hide show

kiln_ai/adapters/__init__.py +6 -0
kiln_ai/adapters/adapter_registry.py +43 -226
kiln_ai/adapters/chunkers/__init__.py +13 -0
kiln_ai/adapters/chunkers/base_chunker.py +42 -0
kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
kiln_ai/adapters/chunkers/helpers.py +23 -0
kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
kiln_ai/adapters/chunkers/test_helpers.py +75 -0
kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
kiln_ai/adapters/embedding/__init__.py +0 -0
kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
kiln_ai/adapters/embedding/embedding_registry.py +32 -0
kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
kiln_ai/adapters/eval/eval_runner.py +6 -2
kiln_ai/adapters/eval/test_base_eval.py +1 -3
kiln_ai/adapters/eval/test_g_eval.py +1 -1
kiln_ai/adapters/extractors/__init__.py +18 -0
kiln_ai/adapters/extractors/base_extractor.py +72 -0
kiln_ai/adapters/extractors/encoding.py +20 -0
kiln_ai/adapters/extractors/extractor_registry.py +44 -0
kiln_ai/adapters/extractors/extractor_runner.py +112 -0
kiln_ai/adapters/extractors/litellm_extractor.py +406 -0
kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
kiln_ai/adapters/extractors/test_encoding.py +54 -0
kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
kiln_ai/adapters/extractors/test_litellm_extractor.py +1290 -0
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
kiln_ai/adapters/ml_embedding_model_list.py +494 -0
kiln_ai/adapters/ml_model_list.py +876 -18
kiln_ai/adapters/model_adapters/litellm_adapter.py +40 -75
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +79 -1
kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
kiln_ai/adapters/model_adapters/test_structured_output.py +9 -10
kiln_ai/adapters/ollama_tools.py +69 -12
kiln_ai/adapters/provider_tools.py +190 -46
kiln_ai/adapters/rag/deduplication.py +49 -0
kiln_ai/adapters/rag/progress.py +252 -0
kiln_ai/adapters/rag/rag_runners.py +844 -0
kiln_ai/adapters/rag/test_deduplication.py +195 -0
kiln_ai/adapters/rag/test_progress.py +785 -0
kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
kiln_ai/adapters/remote_config.py +80 -8
kiln_ai/adapters/test_adapter_registry.py +579 -86
kiln_ai/adapters/test_ml_embedding_model_list.py +239 -0
kiln_ai/adapters/test_ml_model_list.py +202 -0
kiln_ai/adapters/test_ollama_tools.py +340 -1
kiln_ai/adapters/test_prompt_builders.py +1 -1
kiln_ai/adapters/test_provider_tools.py +199 -8
kiln_ai/adapters/test_remote_config.py +551 -56
kiln_ai/adapters/vector_store/__init__.py +1 -0
kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
kiln_ai/datamodel/__init__.py +16 -13
kiln_ai/datamodel/basemodel.py +201 -4
kiln_ai/datamodel/chunk.py +158 -0
kiln_ai/datamodel/datamodel_enums.py +27 -0
kiln_ai/datamodel/embedding.py +64 -0
kiln_ai/datamodel/external_tool_server.py +206 -54
kiln_ai/datamodel/extraction.py +317 -0
kiln_ai/datamodel/project.py +33 -1
kiln_ai/datamodel/rag.py +79 -0
kiln_ai/datamodel/task.py +5 -0
kiln_ai/datamodel/task_output.py +41 -11
kiln_ai/datamodel/test_attachment.py +649 -0
kiln_ai/datamodel/test_basemodel.py +270 -14
kiln_ai/datamodel/test_chunk_models.py +317 -0
kiln_ai/datamodel/test_dataset_split.py +1 -1
kiln_ai/datamodel/test_datasource.py +50 -0
kiln_ai/datamodel/test_embedding_models.py +448 -0
kiln_ai/datamodel/test_eval_model.py +6 -6
kiln_ai/datamodel/test_external_tool_server.py +534 -152
kiln_ai/datamodel/test_extraction_chunk.py +206 -0
kiln_ai/datamodel/test_extraction_model.py +501 -0
kiln_ai/datamodel/test_rag.py +641 -0
kiln_ai/datamodel/test_task.py +35 -1
kiln_ai/datamodel/test_tool_id.py +187 -1
kiln_ai/datamodel/test_vector_store.py +320 -0
kiln_ai/datamodel/tool_id.py +58 -0
kiln_ai/datamodel/vector_store.py +141 -0
kiln_ai/tools/base_tool.py +12 -3
kiln_ai/tools/built_in_tools/math_tools.py +12 -4
kiln_ai/tools/kiln_task_tool.py +158 -0
kiln_ai/tools/mcp_server_tool.py +2 -2
kiln_ai/tools/mcp_session_manager.py +51 -22
kiln_ai/tools/rag_tools.py +164 -0
kiln_ai/tools/test_kiln_task_tool.py +527 -0
kiln_ai/tools/test_mcp_server_tool.py +4 -15
kiln_ai/tools/test_mcp_session_manager.py +187 -227
kiln_ai/tools/test_rag_tools.py +929 -0
kiln_ai/tools/test_tool_registry.py +290 -7
kiln_ai/tools/tool_registry.py +69 -16
kiln_ai/utils/__init__.py +3 -0
kiln_ai/utils/async_job_runner.py +62 -17
kiln_ai/utils/config.py +2 -2
kiln_ai/utils/env.py +15 -0
kiln_ai/utils/filesystem.py +14 -0
kiln_ai/utils/filesystem_cache.py +60 -0
kiln_ai/utils/litellm.py +94 -0
kiln_ai/utils/lock.py +100 -0
kiln_ai/utils/mime_type.py +38 -0
kiln_ai/utils/open_ai_types.py +19 -2
kiln_ai/utils/pdf_utils.py +59 -0
kiln_ai/utils/test_async_job_runner.py +151 -35
kiln_ai/utils/test_env.py +142 -0
kiln_ai/utils/test_filesystem_cache.py +316 -0
kiln_ai/utils/test_litellm.py +206 -0
kiln_ai/utils/test_lock.py +185 -0
kiln_ai/utils/test_mime_type.py +66 -0
kiln_ai/utils/test_open_ai_types.py +88 -12
kiln_ai/utils/test_pdf_utils.py +86 -0
kiln_ai/utils/test_uuid.py +111 -0
kiln_ai/utils/test_validation.py +524 -0
kiln_ai/utils/uuid.py +9 -0
kiln_ai/utils/validation.py +90 -0
{kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/METADATA +9 -1
kiln_ai-0.22.0.dist-info/RECORD +213 -0
kiln_ai-0.20.1.dist-info/RECORD +0 -138
{kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/datamodel/chunk.py ADDED Viewed

@@ -0,0 +1,158 @@
+import logging
+from enum import Enum
+from typing import TYPE_CHECKING, List, Union
+import anyio
+from pydantic import (
+    BaseModel,
+    Field,
+    SerializationInfo,
+    ValidationInfo,
+    field_serializer,
+    field_validator,
+)
+from kiln_ai.datamodel.basemodel import (
+    ID_TYPE,
+    FilenameString,
+    KilnAttachmentModel,
+    KilnParentedModel,
+    KilnParentModel,
+)
+from kiln_ai.datamodel.embedding import ChunkEmbeddings
+logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from kiln_ai.datamodel.extraction import Extraction
+    from kiln_ai.datamodel.project import Project
+def validate_fixed_window_chunker_properties(
+    properties: dict[str, str | int | float | bool],
+) -> dict[str, str | int | float | bool]:
+    """Validate the properties for the fixed window chunker and set defaults if needed."""
+    chunk_overlap = properties.get("chunk_overlap")
+    if chunk_overlap is None:
+        raise ValueError("Chunk overlap is required.")
+    chunk_size = properties.get("chunk_size")
+    if chunk_size is None:
+        raise ValueError("Chunk size is required.")
+    if not isinstance(chunk_overlap, int):
+        raise ValueError("Chunk overlap must be an integer.")
+    if chunk_overlap < 0:
+        raise ValueError("Chunk overlap must be greater than or equal to 0.")
+    if not isinstance(chunk_size, int):
+        raise ValueError("Chunk size must be an integer.")
+    if chunk_size <= 0:
+        raise ValueError("Chunk size must be greater than 0.")
+    if chunk_overlap >= chunk_size:
+        raise ValueError("Chunk overlap must be less than chunk size.")
+    return properties
+class ChunkerType(str, Enum):
+    FIXED_WINDOW = "fixed_window"
+class ChunkerConfig(KilnParentedModel):
+    name: FilenameString = Field(
+        description="A name to identify the chunker config.",
+    )
+    description: str | None = Field(
+        default=None, description="The description of the chunker config"
+    )
+    chunker_type: ChunkerType = Field(
+        description="This is used to determine the type of chunker to use.",
+    )
+    properties: dict[str, str | int | float | bool] = Field(
+        description="Properties to be used to execute the chunker config. This is chunker_type specific and should serialize to a json dict.",
+    )
+    # Workaround to return typed parent without importing Project
+    def parent_project(self) -> Union["Project", None]:
+        if self.parent is None or self.parent.__class__.__name__ != "Project":
+            return None
+        return self.parent  # type: ignore
+    @field_validator("properties")
+    @classmethod
+    def validate_properties(
+        cls, properties: dict[str, str | int | float | bool], info: ValidationInfo
+    ) -> dict[str, str | int | float | bool]:
+        if info.data.get("chunker_type") == ChunkerType.FIXED_WINDOW:
+            # do not trigger revalidation of properties
+            return validate_fixed_window_chunker_properties(properties)
+        return properties
+    def chunk_size(self) -> int | None:
+        if self.properties.get("chunk_size") is None:
+            return None
+        if not isinstance(self.properties["chunk_size"], int):
+            raise ValueError("Chunk size must be an integer.")
+        return self.properties["chunk_size"]
+    def chunk_overlap(self) -> int | None:
+        if self.properties.get("chunk_overlap") is None:
+            return None
+        if not isinstance(self.properties["chunk_overlap"], int):
+            raise ValueError("Chunk overlap must be an integer.")
+        return self.properties["chunk_overlap"]
+class Chunk(BaseModel):
+    content: KilnAttachmentModel = Field(
+        description="The content of the chunk, stored as an attachment."
+    )
+    @field_serializer("content")
+    def serialize_content(
+        self, content: KilnAttachmentModel, info: SerializationInfo
+    ) -> dict:
+        context = info.context or {}
+        context["filename_prefix"] = "content"
+        return content.model_dump(mode="json", context=context)
+class ChunkedDocument(
+    KilnParentedModel, KilnParentModel, parent_of={"chunk_embeddings": ChunkEmbeddings}
+):
+    chunker_config_id: ID_TYPE = Field(
+        description="The ID of the chunker config used to chunk the document.",
+    )
+    chunks: List[Chunk] = Field(description="The chunks of the document.")
+    def parent_extraction(self) -> Union["Extraction", None]:
+        if self.parent is None or self.parent.__class__.__name__ != "Extraction":
+            return None
+        return self.parent  # type: ignore
+    def chunk_embeddings(self, readonly: bool = False) -> list[ChunkEmbeddings]:
+        return super().chunk_embeddings(readonly=readonly)  # type: ignore
+    async def load_chunks_text(self) -> list[str]:
+        """Utility to return a list of text for each chunk, loaded from each chunk's content attachment."""
+        if not self.path:
+            raise ValueError(
+                "Failed to resolve the path of chunk content attachment because the chunk does not have a path."
+            )
+        chunks_text: list[str] = []
+        for chunk in self.chunks:
+            full_path = chunk.content.resolve_path(self.path.parent)
+            try:
+                chunks_text.append(
+                    await anyio.Path(full_path).read_text(encoding="utf-8")
+                )
+            except Exception as e:
+                raise ValueError(
+                    f"Failed to read chunk content for {full_path}: {e}"
+                ) from e
+        return chunks_text

kiln_ai/datamodel/datamodel_enums.py CHANGED Viewed

@@ -100,3 +100,30 @@ class ModelProviderName(str, Enum):
     siliconflow_cn = "siliconflow_cn"
     cerebras = "cerebras"
     docker_model_runner = "docker_model_runner"
+class KilnMimeType(str, Enum):
+    """
+    Enumeration of supported mime types.
+    """
+    # documents
+    PDF = "application/pdf"
+    CSV = "text/csv"
+    TXT = "text/plain"
+    HTML = "text/html"
+    MD = "text/markdown"
+    # images
+    PNG = "image/png"
+    JPG = "image/jpeg"
+    JPEG = "image/jpeg"
+    # audio
+    MP3 = "audio/mpeg"
+    WAV = "audio/wav"
+    OGG = "audio/ogg"
+    # video
+    MP4 = "video/mp4"
+    MOV = "video/quicktime"

kiln_ai/datamodel/embedding.py ADDED Viewed

@@ -0,0 +1,64 @@
+from typing import TYPE_CHECKING, List, Union
+from pydantic import BaseModel, Field, model_validator
+from kiln_ai.datamodel.basemodel import ID_TYPE, FilenameString, KilnParentedModel
+from kiln_ai.datamodel.datamodel_enums import ModelProviderName
+if TYPE_CHECKING:
+    from kiln_ai.datamodel.chunk import ChunkedDocument
+    from kiln_ai.datamodel.project import Project
+class EmbeddingConfig(KilnParentedModel):
+    name: FilenameString = Field(
+        description="A name to identify the embedding config.",
+    )
+    description: str | None = Field(
+        default=None,
+        description="A description for your reference, not shared with embedding models.",
+    )
+    model_provider_name: ModelProviderName = Field(
+        description="The provider to use to generate embeddings.",
+    )
+    model_name: str = Field(
+        description="The model to use to generate embeddings.",
+    )
+    properties: dict[str, str | int | float | bool] = Field(
+        description="Properties to be used to execute the embedding config.",
+    )
+    # Workaround to return typed parent without importing Project
+    def parent_project(self) -> Union["Project", None]:
+        if self.parent is None or self.parent.__class__.__name__ != "Project":
+            return None
+        return self.parent  # type: ignore
+    @model_validator(mode="after")
+    def validate_properties(self):
+        if "dimensions" in self.properties:
+            if (
+                not isinstance(self.properties["dimensions"], int)
+                or self.properties["dimensions"] <= 0
+            ):
+                raise ValueError("Dimensions must be a positive integer")
+        return self
+class Embedding(BaseModel):
+    vector: List[float] = Field(description="The vector of the embedding.")
+class ChunkEmbeddings(KilnParentedModel):
+    embedding_config_id: ID_TYPE = Field(
+        description="The ID of the embedding config used to generate the embeddings.",
+    )
+    embeddings: List[Embedding] = Field(
+        description="The embeddings of the chunks. The embedding at index i corresponds to the chunk at index i in the parent chunked document."
+    )
+    def parent_chunked_document(self) -> Union["ChunkedDocument", None]:
+        if self.parent is None or self.parent.__class__.__name__ != "ChunkedDocument":
+            return None
+        return self.parent  # type: ignore

kiln_ai/datamodel/external_tool_server.py CHANGED Viewed

@@ -1,7 +1,10 @@
+import re
 from enum import Enum
-from typing import Any, Dict
+from typing import Any
+from urllib.parse import urlparse
 from pydantic import Field, PrivateAttr, model_validator
+from typing_extensions import NotRequired, TypedDict
 from kiln_ai.datamodel.basemodel import (
     FilenameString,
@@ -9,6 +12,7 @@ from kiln_ai.datamodel.basemodel import (
 )
 from kiln_ai.utils.config import MCP_SECRETS_KEY, Config
 from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
+from kiln_ai.utils.validation import tool_name_validator, validate_return_dict_prop
 class ToolServerType(str, Enum):
@@ -18,6 +22,28 @@ class ToolServerType(str, Enum):
     remote_mcp = "remote_mcp"
     local_mcp = "local_mcp"
+    kiln_task = "kiln_task"
+class LocalServerProperties(TypedDict, total=True):
+    command: str
+    args: NotRequired[list[str]]
+    env_vars: NotRequired[dict[str, str]]
+    secret_env_var_keys: NotRequired[list[str]]
+class RemoteServerProperties(TypedDict, total=True):
+    server_url: str
+    headers: NotRequired[dict[str, str]]
+    secret_header_keys: NotRequired[list[str]]
+class KilnTaskServerProperties(TypedDict, total=True):
+    task_id: str
+    run_config_id: str
+    name: str
+    description: str
+    is_archived: bool
 class ExternalToolServer(KilnParentedModel):
@@ -36,8 +62,10 @@ class ExternalToolServer(KilnParentedModel):
         default=None,
         description="A description of the external tool for you and your team. Will not be used in prompts/training/validation.",
     )
-    properties: Dict[str, Any] = Field(
-        default={},
+    properties: (
+        LocalServerProperties | RemoteServerProperties | KilnTaskServerProperties
+    ) = Field(
         description="Configuration properties specific to the tool type.",
     )
@@ -80,6 +108,9 @@ class ExternalToolServer(KilnParentedModel):
                         # Remove from env_vars immediately so they are not saved to file
                         del env_vars[key_name]
+            case ToolServerType.kiln_task:
+                pass
             case _:
                 raise_exhaustive_enum_error(self.type)
@@ -93,76 +124,195 @@ class ExternalToolServer(KilnParentedModel):
         if name == "properties":
             self._process_secrets_from_properties()
-    @model_validator(mode="after")
-    def validate_required_fields(self) -> "ExternalToolServer":
+    # Validation Helpers
+    @classmethod
+    def check_server_url(cls, server_url: str) -> None:
+        """Validate Server URL"""
+        if not isinstance(server_url, str):
+            raise ValueError("Server URL must be a string")
+        # Check for leading whitespace in URL
+        if server_url != server_url.lstrip():
+            raise ValueError("Server URL must not have leading whitespace")
+        parsed_url = urlparse(server_url)
+        if not parsed_url.netloc:
+            raise ValueError("Server URL is not a valid URL")
+        if parsed_url.scheme not in ["http", "https"]:
+            raise ValueError("Server URL must start with http:// or https://")
+    @classmethod
+    def check_headers(cls, headers: dict) -> None:
+        """Validate Headers"""
+        if not isinstance(headers, dict):
+            raise ValueError("headers must be a dictionary")
+        for key, value in headers.items():
+            if not key:
+                raise ValueError("Header name is required")
+            if not value:
+                raise ValueError("Header value is required")
+            # Reject invalid header names and CR/LF in names/values
+            token_re = re.compile(r"^[!#$%&'*+.^_`|~0-9A-Za-z-]+$")
+            if not token_re.match(key):
+                raise ValueError(f'Invalid header name: "{key}"')
+            if re.search(r"\r|\n", key) or re.search(r"\r|\n", value):
+                raise ValueError(
+                    "Header names/values must not contain invalid characters"
+                )
+    @classmethod
+    def check_secret_keys(
+        cls, secret_keys: list, key_type: str, tool_type: str
+    ) -> None:
+        """Validate Secret Keys (generic method for both header and env var keys)"""
+        if not isinstance(secret_keys, list):
+            raise ValueError(
+                f"{key_type} must be a list for external tools of type '{tool_type}'"
+            )
+        if not all(isinstance(k, str) for k in secret_keys):
+            raise ValueError(f"{key_type} must contain only strings")
+        if not all(key for key in secret_keys):
+            raise ValueError("Secret key is required")
+    @classmethod
+    def check_env_vars(cls, env_vars: dict) -> None:
+        """Validate Environment Variables"""
+        if not isinstance(env_vars, dict):
+            raise ValueError("environment variables must be a dictionary")
+        # Validate env_vars keys are in the correct format for Environment Variables
+        # According to POSIX specification, environment variable names must:
+        # - Start with a letter (a-z, A-Z) or underscore (_)
+        # - Contain only ASCII letters, digits, and underscores
+        for key, _ in env_vars.items():
+            if not key or not (
+                key[0].isascii() and (key[0].isalpha() or key[0] == "_")
+            ):
+                raise ValueError(
+                    f"Invalid environment variable key: {key}. Must start with a letter or underscore."
+                )
+            if not all(c.isascii() and (c.isalnum() or c == "_") for c in key):
+                raise ValueError(
+                    f"Invalid environment variable key: {key}. Can only contain letters, digits, and underscores."
+                )
+    @classmethod
+    def type_from_data(cls, data: dict) -> ToolServerType:
+        """Get the tool server type from the data for the the validators"""
+        raw_type = data.get("type")
+        if raw_type is None:
+            raise ValueError("type is required")
+        try:
+            return ToolServerType(raw_type)
+        except ValueError:
+            valid_types = ", ".join(type.value for type in ToolServerType)
+            raise ValueError(f"type must be one of: {valid_types}")
+    @model_validator(mode="before")
+    def validate_required_fields(cls, data: dict) -> dict:
         """Validate that each tool type has the required configuration."""
-        match self.type:
+        server_type = ExternalToolServer.type_from_data(data)
+        properties = data.get("properties", {})
+        match server_type:
             case ToolServerType.remote_mcp:
-                server_url = self.properties.get("server_url", None)
-                if not isinstance(server_url, str):
-                    raise ValueError(
-                        "server_url must be a string for external tools of type 'remote_mcp'"
-                    )
-                if not server_url:
+                server_url = properties.get("server_url", None)
+                if server_url is None:
                     raise ValueError(
-                        "server_url is required to connect to a remote MCP server"
+                        "Server URL is required to connect to a remote MCP server"
                     )
+                ExternalToolServer.check_server_url(server_url)
-                headers = self.properties.get("headers", None)
-                if headers is None:
-                    raise ValueError("headers must be set when type is 'remote_mcp'")
-                if not isinstance(headers, dict):
+            case ToolServerType.local_mcp:
+                command = properties.get("command", None)
+                if command is None:
+                    raise ValueError("command is required to start a local MCP server")
+                if not isinstance(command, str):
                     raise ValueError(
-                        "headers must be a dictionary for external tools of type 'remote_mcp'"
+                        "command must be a string to start a local MCP server"
                     )
+                # Reject empty/whitespace-only command strings
+                if command.strip() == "":
+                    raise ValueError("command must be a non-empty string")
-                secret_header_keys = self.properties.get("secret_header_keys", None)
-                # Secret header keys are optional, but if they are set, they must be a list of strings
-                if secret_header_keys is not None:
-                    if not isinstance(secret_header_keys, list):
+                args = properties.get("args", None)
+                if args is not None:
+                    if not isinstance(args, list):
                         raise ValueError(
-                            "secret_header_keys must be a list for external tools of type 'remote_mcp'"
+                            "arguments must be a list to start a local MCP server"
                         )
-                    if not all(isinstance(k, str) for k in secret_header_keys):
-                        raise ValueError("secret_header_keys must contain only strings")
-            case ToolServerType.local_mcp:
-                command = self.properties.get("command", None)
-                if not isinstance(command, str):
-                    raise ValueError(
-                        "command must be a string to start a local MCP server"
-                    )
-                if not command.strip():
-                    raise ValueError("command is required to start a local MCP server")
+            case ToolServerType.kiln_task:
+                tool_name_validator(properties.get("name", ""))
+                err_msg_prefix = "Kiln task server properties:"
+                validate_return_dict_prop(
+                    properties, "description", str, err_msg_prefix
+                )
+                description = properties.get("description", "")
+                if len(description) > 128:
+                    raise ValueError("description must be 128 characters or less")
+                validate_return_dict_prop(
+                    properties, "is_archived", bool, err_msg_prefix
+                )
+                validate_return_dict_prop(properties, "task_id", str, err_msg_prefix)
+                validate_return_dict_prop(
+                    properties, "run_config_id", str, err_msg_prefix
+                )
-                args = self.properties.get("args", None)
-                if not isinstance(args, list):
-                    raise ValueError(
-                        "arguments must be a list to start a local MCP server"
-                    )
+            case _:
+                # Type checking will catch missing cases
+                raise_exhaustive_enum_error(server_type)
+        return data
-                env_vars = self.properties.get("env_vars", {})
-                if not isinstance(env_vars, dict):
-                    raise ValueError(
-                        "environment variables must be a dictionary for external tools of type 'local_mcp'"
+    @model_validator(mode="before")
+    def validate_headers_and_env_vars(cls, data: dict) -> dict:
+        """
+        Validate secrets, these needs to be validated before model initlization because secrets will be processed and stripped
+        """
+        type = ExternalToolServer.type_from_data(data)
+        properties = data.get("properties", {})
+        if properties is None:
+            raise ValueError("properties is required")
+        match type:
+            case ToolServerType.remote_mcp:
+                # Validate headers
+                headers = properties.get("headers", None)
+                if headers is not None:
+                    ExternalToolServer.check_headers(headers)
+                # Secret header keys are optional, validate if they are set
+                secret_header_keys = properties.get("secret_header_keys", None)
+                if secret_header_keys is not None:
+                    ExternalToolServer.check_secret_keys(
+                        secret_header_keys, "secret_header_keys", "remote_mcp"
                     )
-                secret_env_var_keys = self.properties.get("secret_env_var_keys", None)
+            case ToolServerType.local_mcp:
+                # Validate secret environment variable keys
+                env_vars = properties.get("env_vars", {})
+                if env_vars is not None:
+                    ExternalToolServer.check_env_vars(env_vars)
                 # Secret env var keys are optional, but if they are set, they must be a list of strings
+                secret_env_var_keys = properties.get("secret_env_var_keys", None)
                 if secret_env_var_keys is not None:
-                    if not isinstance(secret_env_var_keys, list):
-                        raise ValueError(
-                            "secret_env_var_keys must be a list for external tools of type 'local_mcp'"
-                        )
-                    if not all(isinstance(k, str) for k in secret_env_var_keys):
-                        raise ValueError(
-                            "secret_env_var_keys must contain only strings"
-                        )
+                    ExternalToolServer.check_secret_keys(
+                        secret_env_var_keys, "secret_env_var_keys", "local_mcp"
+                    )
+            case ToolServerType.kiln_task:
+                pass
             case _:
-                # Type checking will catch missing cases
-                raise_exhaustive_enum_error(self.type)
-        return self
+                raise_exhaustive_enum_error(type)
+        return data
     def get_secret_keys(self) -> list[str]:
         """
@@ -176,6 +326,8 @@ class ExternalToolServer(KilnParentedModel):
                 return self.properties.get("secret_header_keys", [])
             case ToolServerType.local_mcp:
                 return self.properties.get("secret_env_var_keys", [])
+            case ToolServerType.kiln_task:
+                return []
             case _:
                 raise_exhaustive_enum_error(self.type)

kiln-ai 0.20.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.20.1py3-none-any.whl → 0.22.0py3-none-any.whl