PyPI - kiln-ai - Versions diffs - 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl - Mend

kiln-ai 0.16.0py3-none-any.whl → 0.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (54) hide show

kiln_ai/adapters/__init__.py +2 -0
kiln_ai/adapters/adapter_registry.py +22 -44
kiln_ai/adapters/chat/__init__.py +8 -0
kiln_ai/adapters/chat/chat_formatter.py +234 -0
kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
kiln_ai/adapters/eval/base_eval.py +8 -6
kiln_ai/adapters/eval/eval_runner.py +4 -1
kiln_ai/adapters/eval/g_eval.py +23 -5
kiln_ai/adapters/eval/test_base_eval.py +166 -15
kiln_ai/adapters/eval/test_eval_runner.py +3 -0
kiln_ai/adapters/eval/test_g_eval.py +1 -0
kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
kiln_ai/adapters/fine_tune/dataset_formatter.py +138 -272
kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
kiln_ai/adapters/ml_model_list.py +80 -43
kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
kiln_ai/adapters/model_adapters/litellm_adapter.py +79 -97
kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -60
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +56 -21
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
kiln_ai/adapters/prompt_builders.py +0 -16
kiln_ai/adapters/provider_tools.py +27 -9
kiln_ai/adapters/repair/test_repair_task.py +24 -3
kiln_ai/adapters/test_adapter_registry.py +88 -28
kiln_ai/adapters/test_ml_model_list.py +158 -0
kiln_ai/adapters/test_prompt_adaptors.py +17 -3
kiln_ai/adapters/test_prompt_builders.py +3 -16
kiln_ai/adapters/test_provider_tools.py +69 -20
kiln_ai/datamodel/__init__.py +0 -2
kiln_ai/datamodel/datamodel_enums.py +38 -13
kiln_ai/datamodel/finetune.py +12 -7
kiln_ai/datamodel/task.py +68 -7
kiln_ai/datamodel/test_basemodel.py +2 -1
kiln_ai/datamodel/test_dataset_split.py +0 -8
kiln_ai/datamodel/test_models.py +33 -10
kiln_ai/datamodel/test_task.py +168 -2
kiln_ai/utils/config.py +3 -2
kiln_ai/utils/dataset_import.py +1 -1
kiln_ai/utils/logging.py +165 -0
kiln_ai/utils/test_config.py +23 -0
kiln_ai/utils/test_dataset_import.py +30 -0
{kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
{kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/RECORD +54 -49
{kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/test_provider_tools.py CHANGED Viewed

@@ -18,7 +18,7 @@ from kiln_ai.adapters.provider_tools import (
     finetune_provider_model,
     get_model_and_provider,
     kiln_model_provider_from,
-    lite_llm_config,
+    lite_llm_config_for_openai_compatible,
     lite_llm_provider_model,
     parse_custom_model_id,
     provider_enabled,
@@ -27,10 +27,11 @@ from kiln_ai.adapters.provider_tools import (
 )
 from kiln_ai.datamodel import (
     Finetune,
-    FinetuneDataStrategy,
     StructuredOutputMode,
     Task,
 )
+from kiln_ai.datamodel.datamodel_enums import ChatStrategy
+from kiln_ai.datamodel.task import RunConfigProperties
 @pytest.fixture(autouse=True)
@@ -71,7 +72,7 @@ def mock_finetune():
         finetune.provider = ModelProviderName.openai
         finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
         finetune.structured_output_mode = StructuredOutputMode.json_schema
-        finetune.data_strategy = FinetuneDataStrategy.final_only
+        finetune.data_strategy = ChatStrategy.single_turn
         mock.return_value = finetune
         yield mock
@@ -83,7 +84,7 @@ def mock_finetune_final_and_intermediate():
         finetune.provider = ModelProviderName.openai
         finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
         finetune.structured_output_mode = StructuredOutputMode.json_schema
-        finetune.data_strategy = FinetuneDataStrategy.final_and_intermediate
+        finetune.data_strategy = ChatStrategy.two_message_cot
         mock.return_value = finetune
         yield mock
@@ -95,9 +96,7 @@ def mock_finetune_r1_compatible():
         finetune.provider = ModelProviderName.ollama
         finetune.fine_tune_model_id = "ft:deepseek-r1:671b:custom:model-123"
         finetune.structured_output_mode = StructuredOutputMode.json_schema
-        finetune.data_strategy = (
-            FinetuneDataStrategy.final_and_intermediate_r1_compatible
-        )
+        finetune.data_strategy = ChatStrategy.single_turn_r1_thinking
         mock.return_value = finetune
         yield mock
@@ -357,6 +356,7 @@ async def test_kiln_model_provider_from_custom_model_valid(mock_config):
     assert provider.supports_data_gen is False
     assert provider.untested_model is True
     assert provider.model_id == "custom_model"
+    assert provider.structured_output_mode == StructuredOutputMode.json_instructions
 @pytest.mark.asyncio
@@ -374,6 +374,7 @@ async def test_kiln_model_provider_from_custom_registry(mock_config):
     assert provider.supports_data_gen is False
     assert provider.untested_model is True
     assert provider.model_id == "gpt-4-turbo"
+    assert provider.structured_output_mode == StructuredOutputMode.json_instructions
 @pytest.mark.asyncio
@@ -474,7 +475,7 @@ def test_finetune_provider_model_success_final_and_intermediate(
     assert provider.name == ModelProviderName.openai
     assert provider.model_id == "ft:gpt-3.5-turbo:custom:model-123"
     assert provider.structured_output_mode == StructuredOutputMode.json_schema
-    assert provider.reasoning_capable is True
+    assert provider.reasoning_capable is False
     assert provider.parser == None
@@ -580,7 +581,7 @@ def test_finetune_provider_model_structured_mode(
     finetune.provider = provider_name
     finetune.fine_tune_model_id = "fireworks-model-123"
     finetune.structured_output_mode = structured_output_mode
-    finetune.data_strategy = FinetuneDataStrategy.final_only
+    finetune.data_strategy = ChatStrategy.single_turn
     mock_finetune.return_value = finetune
     provider = finetune_provider_model("project-123::task-456::finetune-789")
@@ -596,10 +597,20 @@ def test_openai_compatible_provider_config(mock_shared_config):
     """Test successful creation of an OpenAI compatible provider"""
     model_id = "test_provider::gpt-4"
-    config = lite_llm_config(model_id)
+    config = lite_llm_config_for_openai_compatible(
+        RunConfigProperties(
+            model_name=model_id,
+            model_provider_name=ModelProviderName.openai_compatible,
+            prompt_id="simple_prompt_builder",
+            structured_output_mode="json_schema",
+        )
+    )
-    assert config.provider_name == ModelProviderName.openai_compatible
-    assert config.model_name == "gpt-4"
+    assert (
+        config.run_config_properties.model_provider_name
+        == ModelProviderName.openai_compatible
+    )
+    assert config.run_config_properties.model_name == "gpt-4"
     assert config.additional_body_options == {"api_key": "test-key"}
     assert config.base_url == "https://api.test.com"
@@ -621,10 +632,20 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
     """Test provider creation without API key (should work as some providers don't require it, but should pass NA to LiteLLM as it requires one)"""
     model_id = "no_key_provider::gpt-4"
-    config = lite_llm_config(model_id)
+    config = lite_llm_config_for_openai_compatible(
+        RunConfigProperties(
+            model_name=model_id,
+            model_provider_name=ModelProviderName.openai,
+            prompt_id="simple_prompt_builder",
+            structured_output_mode="json_schema",
+        )
+    )
-    assert config.provider_name == ModelProviderName.openai_compatible
-    assert config.model_name == "gpt-4"
+    assert (
+        config.run_config_properties.model_provider_name
+        == ModelProviderName.openai_compatible
+    )
+    assert config.run_config_properties.model_name == "gpt-4"
     assert config.additional_body_options == {"api_key": "NA"}
     assert config.base_url == "https://api.nokey.com"
@@ -632,7 +653,14 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
 def test_lite_llm_config_invalid_id():
     """Test handling of invalid model ID format"""
     with pytest.raises(ValueError) as exc_info:
-        lite_llm_config("invalid-id-format")
+        lite_llm_config_for_openai_compatible(
+            RunConfigProperties(
+                model_name="invalid-id-format",
+                model_provider_name=ModelProviderName.openai_compatible,
+                prompt_id="simple_prompt_builder",
+                structured_output_mode="json_schema",
+            )
+        )
     assert (
         str(exc_info.value) == "Invalid openai compatible model ID: invalid-id-format"
     )
@@ -643,14 +671,28 @@ def test_lite_llm_config_no_providers(mock_shared_config):
     mock_shared_config.return_value.openai_compatible_providers = None
     with pytest.raises(ValueError) as exc_info:
-        lite_llm_config("test_provider::gpt-4")
+        lite_llm_config_for_openai_compatible(
+            RunConfigProperties(
+                model_name="test_provider::gpt-4",
+                model_provider_name=ModelProviderName.openai_compatible,
+                prompt_id="simple_prompt_builder",
+                structured_output_mode="json_schema",
+            )
+        )
     assert str(exc_info.value) == "OpenAI compatible provider test_provider not found"
 def test_lite_llm_config_provider_not_found(mock_shared_config):
     """Test handling of non-existent provider"""
     with pytest.raises(ValueError) as exc_info:
-        lite_llm_config("unknown_provider::gpt-4")
+        lite_llm_config_for_openai_compatible(
+            RunConfigProperties(
+                model_name="unknown_provider::gpt-4",
+                model_provider_name=ModelProviderName.openai_compatible,
+                prompt_id="simple_prompt_builder",
+                structured_output_mode="json_schema",
+            )
+        )
     assert (
         str(exc_info.value) == "OpenAI compatible provider unknown_provider not found"
     )
@@ -666,7 +708,14 @@ def test_lite_llm_config_no_base_url(mock_shared_config):
     ]
     with pytest.raises(ValueError) as exc_info:
-        lite_llm_config("test_provider::gpt-4")
+        lite_llm_config_for_openai_compatible(
+            RunConfigProperties(
+                model_name="test_provider::gpt-4",
+                model_provider_name=ModelProviderName.openai_compatible,
+                prompt_id="simple_prompt_builder",
+                structured_output_mode="json_schema",
+            )
+        )
     assert (
         str(exc_info.value)
         == "OpenAI compatible provider test_provider has no base URL"
@@ -867,7 +916,7 @@ def test_finetune_provider_model_vertex_ai(mock_project, mock_task, mock_finetun
     finetune.provider = ModelProviderName.vertex
     finetune.fine_tune_model_id = "projects/123/locations/us-central1/endpoints/456"
     finetune.structured_output_mode = StructuredOutputMode.json_mode
-    finetune.data_strategy = FinetuneDataStrategy.final_only
+    finetune.data_strategy = ChatStrategy.single_turn
     mock_finetune.return_value = finetune
     provider = finetune_provider_model("project-123::task-456::finetune-789")

kiln_ai/datamodel/__init__.py CHANGED Viewed

@@ -13,7 +13,6 @@ from __future__ import annotations
 from kiln_ai.datamodel import dataset_split, eval, strict_mode
 from kiln_ai.datamodel.datamodel_enums import (
-    FinetuneDataStrategy,
     FineTuneStatusType,
     Priority,
     StructuredOutputMode,
@@ -71,7 +70,6 @@ __all__ = [
     "Prompt",
     "TaskOutputRating",
     "StructuredOutputMode",
-    "FinetuneDataStrategy",
     "PromptId",
     "PromptGenerators",
     "prompt_generator_values",

kiln_ai/datamodel/datamodel_enums.py CHANGED Viewed

@@ -24,13 +24,14 @@ class StructuredOutputMode(str, Enum):
     """
     Enumeration of supported structured output modes.
-    - default: let the adapter decide
     - json_schema: request json using API capabilities for json_schema
     - function_calling: request json using API capabilities for function calling
     - json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
     - json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
     - json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
     - json_custom_instructions: The model should output JSON, but custom instructions are already included in the system prompt. Don't append additional JSON instructions.
+    - default: let the adapter decide (legacy, do not use for new use cases)
+    - unknown: used for cases where the structured output mode is not known (on old models where it wasn't saved). Should lookup best option at runtime.
     """
     default = "default"
@@ -41,6 +42,7 @@ class StructuredOutputMode(str, Enum):
     json_instructions = "json_instructions"
     json_instruction_and_object = "json_instruction_and_object"
     json_custom_instructions = "json_custom_instructions"
+    unknown = "unknown"
 class FineTuneStatusType(str, Enum):
@@ -55,20 +57,43 @@ class FineTuneStatusType(str, Enum):
     failed = "failed"
-class FinetuneDataStrategy(str, Enum):
-    """Strategy for what data to include when fine-tuning a model."""
+class ChatStrategy(str, Enum):
+    """Strategy for how a chat is structured."""
-    # Only train on the final response, ignoring any intermediate steps or chain of thought
-    final_only = "final_only"
+    # Single turn, immediately return the answer
+    single_turn = "final_only"
+    # Two turn, first turn is the thinking, second turn is the answer. Legacy format - used for old fine tunes but not new trains.
+    two_message_cot_legacy = "final_and_intermediate"
+    # Two turn, first turn is the thinking, second turn is the answer. New format - used for new trains.
+    two_message_cot = "two_message_cot"
+    # Single turn, with both the thinking and the answer in the same message, using R1-style thinking format in <think> tags
+    single_turn_r1_thinking = "final_and_intermediate_r1_compatible"
-    # Train on both the final response and any intermediate steps/chain of thought
-    final_and_intermediate = "final_and_intermediate"
-    # Train using R1-style thinking format, which includes the reasoning in <think> tags in the message
-    final_and_intermediate_r1_compatible = "final_and_intermediate_r1_compatible"
+THINKING_DATA_STRATEGIES: list[ChatStrategy] = [
+    ChatStrategy.two_message_cot_legacy,
+    ChatStrategy.single_turn_r1_thinking,
+    ChatStrategy.two_message_cot,
+]
-THINKING_DATA_STRATEGIES: list[FinetuneDataStrategy] = [
-    FinetuneDataStrategy.final_and_intermediate,
-    FinetuneDataStrategy.final_and_intermediate_r1_compatible,
-]
+class ModelProviderName(str, Enum):
+    """
+    Enumeration of supported AI model providers.
+    """
+    openai = "openai"
+    groq = "groq"
+    amazon_bedrock = "amazon_bedrock"
+    ollama = "ollama"
+    openrouter = "openrouter"
+    fireworks_ai = "fireworks_ai"
+    kiln_fine_tune = "kiln_fine_tune"
+    kiln_custom_registry = "kiln_custom_registry"
+    openai_compatible = "openai_compatible"
+    anthropic = "anthropic"
+    gemini_api = "gemini_api"
+    azure_openai = "azure_openai"
+    huggingface = "huggingface"
+    vertex = "vertex"
+    together_ai = "together_ai"

kiln_ai/datamodel/finetune.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing_extensions import Self
 from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
 from kiln_ai.datamodel.datamodel_enums import (
     THINKING_DATA_STRATEGIES,
-    FinetuneDataStrategy,
+    ChatStrategy,
     FineTuneStatusType,
     StructuredOutputMode,
 )
@@ -14,6 +14,11 @@ from kiln_ai.datamodel.datamodel_enums import (
 if TYPE_CHECKING:
     from kiln_ai.datamodel.task import Task
+DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS = [
+    ChatStrategy.two_message_cot_legacy,
+    ChatStrategy.two_message_cot,
+]
 class Finetune(KilnParentedModel):
     """
@@ -76,8 +81,8 @@ class Finetune(KilnParentedModel):
         default={},
         description="Properties of the fine-tune. Different providers may use different properties.",
     )
-    data_strategy: FinetuneDataStrategy = Field(
-        default=FinetuneDataStrategy.final_only,
+    data_strategy: ChatStrategy = Field(
+        default=ChatStrategy.single_turn,
         description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).",
     )
@@ -91,16 +96,16 @@ class Finetune(KilnParentedModel):
     def validate_thinking_instructions(self) -> Self:
         if (
             self.thinking_instructions is not None
-            and self.data_strategy != FinetuneDataStrategy.final_and_intermediate
+            and self.data_strategy not in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
         ):
             raise ValueError(
-                "Thinking instructions can only be used when data_strategy is final_and_intermediate"
+                f"Thinking instructions can only be used when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
             )
         if (
             self.thinking_instructions is None
-            and self.data_strategy == FinetuneDataStrategy.final_and_intermediate
+            and self.data_strategy in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
         ):
             raise ValueError(
-                "Thinking instructions are required when data_strategy is final_and_intermediate"
+                f"Thinking instructions are required when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
             )
         return self

kiln_ai/datamodel/task.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import TYPE_CHECKING, Dict, List, Union
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, ValidationInfo, model_validator
+from typing_extensions import Self
 from kiln_ai.datamodel import Finetune
 from kiln_ai.datamodel.basemodel import (
@@ -11,7 +12,12 @@ from kiln_ai.datamodel.basemodel import (
     KilnParentedModel,
     KilnParentModel,
 )
-from kiln_ai.datamodel.datamodel_enums import Priority, TaskOutputRatingType
+from kiln_ai.datamodel.datamodel_enums import (
+    ModelProviderName,
+    Priority,
+    StructuredOutputMode,
+    TaskOutputRatingType,
+)
 from kiln_ai.datamodel.dataset_split import DatasetSplit
 from kiln_ai.datamodel.eval import Eval
 from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
@@ -47,12 +53,33 @@ class RunConfigProperties(BaseModel):
     """
     model_name: str = Field(description="The model to use for this run config.")
-    model_provider_name: str = Field(
+    model_provider_name: ModelProviderName = Field(
         description="The provider to use for this run config."
     )
     prompt_id: PromptId = Field(
         description="The prompt to use for this run config. Defaults to building a simple prompt from the task if not provided.",
     )
+    top_p: float = Field(
+        default=1.0,
+        description="The top-p value to use for this run config. Defaults to 1.0.",
+    )
+    temperature: float = Field(
+        default=1.0,
+        description="The temperature to use for this run config. Defaults to 1.0.",
+    )
+    structured_output_mode: StructuredOutputMode = Field(
+        description="The structured output mode to use for this run config.",
+    )
+    @model_validator(mode="after")
+    def validate_required_fields(self) -> Self:
+        if not (0 <= self.top_p <= 1):
+            raise ValueError("top_p must be between 0 and 1")
+        elif self.temperature < 0 or self.temperature > 2:
+            raise ValueError("temperature must be between 0 and 2")
+        return self
 class RunConfig(RunConfigProperties):
@@ -101,12 +128,46 @@ class TaskRunConfig(KilnParentedModel):
         parent_task = self.parent_task()
         if parent_task is None:
             raise ValueError("Run config must be parented to a task")
-        return RunConfig(
+        return run_config_from_run_config_properties(
             task=parent_task,
-            model_name=self.run_config_properties.model_name,
-            model_provider_name=self.run_config_properties.model_provider_name,
-            prompt_id=self.run_config_properties.prompt_id,
+            run_config_properties=self.run_config_properties,
+        )
+    # Previously we didn't store structured_output_mode in the run_config_properties. Updgrade old models when loading from file.
+    @model_validator(mode="before")
+    def upgrade_old_entries(cls, data: dict, info: ValidationInfo) -> dict:
+        if not info.context or not info.context.get("loading_from_file", False):
+            # Not loading from file, so no need to upgrade
+            return data
+        if not isinstance(data, dict):
+            return data
+        structured_output_mode = data.get("run_config_properties", {}).get(
+            "structured_output_mode", None
         )
+        if structured_output_mode is None and "run_config_properties" in data:
+            # Default to unknown. Adapter will have to guess at runtime.
+            data["run_config_properties"]["structured_output_mode"] = (
+                StructuredOutputMode.unknown
+            )
+        return data
+def run_config_from_run_config_properties(
+    task: "Task",
+    run_config_properties: RunConfigProperties,
+) -> RunConfig:
+    return RunConfig(
+        task=task,
+        model_name=run_config_properties.model_name,
+        model_provider_name=run_config_properties.model_provider_name,
+        prompt_id=run_config_properties.prompt_id,
+        top_p=run_config_properties.top_p,
+        temperature=run_config_properties.temperature,
+        structured_output_mode=run_config_properties.structured_output_mode,
+    )
 class Task(

kiln_ai/datamodel/test_basemodel.py CHANGED Viewed

@@ -500,8 +500,9 @@ def adapter(base_task):
         run_config=RunConfig(
             task=base_task,
             model_name="test_model",
-            model_provider_name="test_provider",
+            model_provider_name="openai",
             prompt_id="simple_prompt_builder",
+            structured_output_mode="json_schema",
         ),
     )

kiln_ai/datamodel/test_dataset_split.py CHANGED Viewed

@@ -72,14 +72,6 @@ def sample_task_runs(sample_task):
     return task_runs
-@pytest.fixture
-def standard_splitstandard_splitss():
-    return [
-        DatasetSplitDefinition(name="train", percentage=0.8),
-        DatasetSplitDefinition(name="test", percentage=0.2),
-    ]
 @pytest.fixture
 def task_run():
     return TaskRun(

kiln_ai/datamodel/test_models.py CHANGED Viewed

@@ -9,13 +9,13 @@ from kiln_ai.datamodel import (
     DataSource,
     DataSourceType,
     Finetune,
-    FinetuneDataStrategy,
     Project,
     Prompt,
     Task,
     TaskOutput,
     TaskRun,
 )
+from kiln_ai.datamodel.datamodel_enums import ChatStrategy
 from kiln_ai.datamodel.test_json_schema import json_joke_schema
@@ -536,44 +536,58 @@ def test_prompt_parent_task():
         # Test 1: Valid case - no thinking instructions with final_only
         (
             None,
-            FinetuneDataStrategy.final_only,
+            ChatStrategy.single_turn,
             False,
             None,
         ),
         # Test 2: Valid case - thinking instructions with final_and_intermediate
         (
             "Think step by step",
-            FinetuneDataStrategy.final_and_intermediate,
+            ChatStrategy.two_message_cot_legacy,
             False,
             None,
         ),
         # Test 3: Valid case - no thinking instructions with final_and_intermediate_r1_compatible
         (
             None,
-            FinetuneDataStrategy.final_and_intermediate_r1_compatible,
+            ChatStrategy.single_turn_r1_thinking,
             False,
             None,
         ),
         # Test 4: Invalid case - thinking instructions with final_only
         (
             "Think step by step",
-            FinetuneDataStrategy.final_only,
+            ChatStrategy.single_turn,
             True,
-            "Thinking instructions can only be used when data_strategy is final_and_intermediate",
+            "Thinking instructions can only be used when data_strategy is",
         ),
         # Test 5: Invalid case - no thinking instructions with final_and_intermediate
         (
             None,
-            FinetuneDataStrategy.final_and_intermediate,
+            ChatStrategy.two_message_cot_legacy,
             True,
-            "Thinking instructions are required when data_strategy is final_and_intermediate",
+            "Thinking instructions are required when data_strategy is",
         ),
         # Test 6: Invalid case - thinking instructions with final_and_intermediate_r1_compatible
         (
             "Think step by step",
-            FinetuneDataStrategy.final_and_intermediate_r1_compatible,
+            ChatStrategy.single_turn_r1_thinking,
             True,
-            "Thinking instructions can only be used when data_strategy is final_and_intermediate",
+            "Thinking instructions can only be used when data_strategy is",
+        ),
+        # Test 7: new COT format
+        (
+            "Think step by step",
+            ChatStrategy.two_message_cot,
+            False,
+            None,
+        ),
+        # Test 8: new COT format
+        (
+            None,
+            ChatStrategy.two_message_cot,
+            True,
+            "Thinking instructions are required when data_strategy is",
         ),
     ],
 )
@@ -665,3 +679,12 @@ def test_task_run_thinking_training_data(intermediate_outputs, expected):
         intermediate_outputs=intermediate_outputs,
     )
     assert task_run.thinking_training_data() == expected
+def test_chat_strategy_enum():
+    # This has to align to the old FinetuneDataStrategy enum
+    assert ChatStrategy.single_turn == "final_only"
+    assert ChatStrategy.two_message_cot_legacy == "final_and_intermediate"
+    assert (
+        ChatStrategy.single_turn_r1_thinking == "final_and_intermediate_r1_compatible"
+    )

kiln-ai 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.16.0py3-none-any.whl → 0.17.0py3-none-any.whl