PyPI - kiln-ai - Versions diffs - 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

kiln-ai 0.15.0py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

kiln_ai/adapters/eval/eval_runner.py +5 -64
kiln_ai/adapters/eval/g_eval.py +3 -3
kiln_ai/adapters/fine_tune/dataset_formatter.py +124 -34
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +264 -7
kiln_ai/adapters/ml_model_list.py +478 -4
kiln_ai/adapters/model_adapters/base_adapter.py +26 -8
kiln_ai/adapters/model_adapters/litellm_adapter.py +41 -7
kiln_ai/adapters/model_adapters/test_base_adapter.py +74 -2
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +65 -1
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +3 -2
kiln_ai/adapters/model_adapters/test_structured_output.py +4 -6
kiln_ai/adapters/parsers/base_parser.py +0 -3
kiln_ai/adapters/parsers/parser_registry.py +5 -3
kiln_ai/adapters/parsers/r1_parser.py +17 -2
kiln_ai/adapters/parsers/request_formatters.py +40 -0
kiln_ai/adapters/parsers/test_parser_registry.py +2 -2
kiln_ai/adapters/parsers/test_r1_parser.py +44 -1
kiln_ai/adapters/parsers/test_request_formatters.py +76 -0
kiln_ai/adapters/prompt_builders.py +14 -1
kiln_ai/adapters/provider_tools.py +18 -1
kiln_ai/adapters/repair/test_repair_task.py +3 -2
kiln_ai/adapters/test_prompt_builders.py +24 -3
kiln_ai/adapters/test_provider_tools.py +70 -1
kiln_ai/datamodel/__init__.py +2 -0
kiln_ai/datamodel/datamodel_enums.py +14 -0
kiln_ai/datamodel/dataset_filters.py +69 -1
kiln_ai/datamodel/dataset_split.py +4 -0
kiln_ai/datamodel/eval.py +8 -0
kiln_ai/datamodel/finetune.py +1 -0
kiln_ai/datamodel/prompt_id.py +1 -0
kiln_ai/datamodel/task_output.py +1 -1
kiln_ai/datamodel/task_run.py +39 -7
kiln_ai/datamodel/test_basemodel.py +3 -7
kiln_ai/datamodel/test_dataset_filters.py +82 -0
kiln_ai/datamodel/test_dataset_split.py +2 -0
kiln_ai/datamodel/test_example_models.py +54 -0
kiln_ai/datamodel/test_models.py +50 -2
kiln_ai/utils/async_job_runner.py +106 -0
kiln_ai/utils/dataset_import.py +80 -18
kiln_ai/utils/test_async_job_runner.py +199 -0
kiln_ai/utils/test_dataset_import.py +242 -10
{kiln_ai-0.15.0.dist-info → kiln_ai-0.16.0.dist-info}/METADATA +1 -1
{kiln_ai-0.15.0.dist-info → kiln_ai-0.16.0.dist-info}/RECORD +45 -41
{kiln_ai-0.15.0.dist-info → kiln_ai-0.16.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.15.0.dist-info → kiln_ai-0.16.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/model_adapters/litellm_adapter.py CHANGED Viewed

@@ -1,7 +1,9 @@
+import logging
 from typing import Any, Dict
 import litellm
 from litellm.types.utils import ChoiceLogprobs, Choices, ModelResponse
+from litellm.types.utils import Usage as LiteLlmUsage
 import kiln_ai.datamodel as datamodel
 from kiln_ai.adapters.ml_model_list import (
@@ -14,14 +16,15 @@ from kiln_ai.adapters.model_adapters.base_adapter import (
     AdapterConfig,
     BaseAdapter,
     RunOutput,
+    Usage,
 )
-from kiln_ai.adapters.model_adapters.litellm_config import (
-    LiteLlmConfig,
-)
+from kiln_ai.adapters.model_adapters.litellm_config import LiteLlmConfig
 from kiln_ai.datamodel import PromptGenerators, PromptId
 from kiln_ai.datamodel.task import RunConfig
 from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
+logger = logging.getLogger(__name__)
 class LiteLlmAdapter(BaseAdapter):
     def __init__(
@@ -49,7 +52,7 @@ class LiteLlmAdapter(BaseAdapter):
             config=base_adapter_config,
         )
-    async def _run(self, input: Dict | str) -> RunOutput:
+    async def _run(self, input: Dict | str) -> tuple[RunOutput, Usage | None]:
         provider = self.model_provider()
         if not provider.model_id:
             raise ValueError("Model ID is required for OpenAI compatible models")
@@ -139,8 +142,12 @@ class LiteLlmAdapter(BaseAdapter):
             raise RuntimeError("Logprobs were required, but no logprobs were returned.")
         # Save reasoning if it exists and was parsed by LiteLLM (or openrouter, or anyone upstream)
-        if hasattr(message, "reasoning_content") and message.reasoning_content:
-            intermediate_outputs["reasoning"] = message.reasoning_content
+        if (
+            hasattr(message, "reasoning_content")
+            and message.reasoning_content
+            and len(message.reasoning_content.strip()) > 0
+        ):
+            intermediate_outputs["reasoning"] = message.reasoning_content.strip()
         # the string content of the response
         response_content = message.content
@@ -169,7 +176,7 @@ class LiteLlmAdapter(BaseAdapter):
             output=response_content,
             intermediate_outputs=intermediate_outputs,
             output_logprobs=logprobs,
-        )
+        ), self.usage_from_response(response)
     def adapter_name(self) -> str:
         return "kiln_openai_compatible_adapter"
@@ -394,3 +401,30 @@ class LiteLlmAdapter(BaseAdapter):
             completion_kwargs["top_logprobs"] = top_logprobs
         return completion_kwargs
+    def usage_from_response(self, response: ModelResponse) -> Usage | None:
+        litellm_usage = response.get("usage", None)
+        cost = response._hidden_params.get("response_cost", None)
+        if not litellm_usage and not cost:
+            return None
+        usage = Usage()
+        if litellm_usage and isinstance(litellm_usage, LiteLlmUsage):
+            usage.input_tokens = litellm_usage.get("prompt_tokens", None)
+            usage.output_tokens = litellm_usage.get("completion_tokens", None)
+            usage.total_tokens = litellm_usage.get("total_tokens", None)
+        else:
+            logger.warning(
+                f"Unexpected usage format from litellm: {litellm_usage}. Expected Usage object, got {type(litellm_usage)}"
+            )
+        if isinstance(cost, float):
+            usage.cost = cost
+        elif cost is not None:
+            # None is allowed, but no other types are expected
+            logger.warning(
+                f"Unexpected cost format from litellm: {cost}. Expected float, got {type(cost)}"
+            )
+        return usage

kiln_ai/adapters/model_adapters/test_base_adapter.py CHANGED Viewed

@@ -3,7 +3,8 @@ from unittest.mock import MagicMock, patch
 import pytest
 from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
-from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
+from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter, RunOutput
+from kiln_ai.adapters.parsers.request_formatters import request_formatter_from_id
 from kiln_ai.datamodel import Task
 from kiln_ai.datamodel.task import RunConfig
@@ -12,7 +13,7 @@ class MockAdapter(BaseAdapter):
     """Concrete implementation of BaseAdapter for testing"""
     async def _run(self, input):
-        return None
+        return None, None
     def adapter_name(self) -> str:
         return "test"
@@ -42,6 +43,22 @@ def adapter(base_task):
     )
+@pytest.fixture
+def mock_formatter():
+    formatter = MagicMock()
+    formatter.format_input.return_value = {"formatted": "input"}
+    return formatter
+@pytest.fixture
+def mock_parser():
+    parser = MagicMock()
+    parser.parse_output.return_value = RunOutput(
+        output="test output", intermediate_outputs={}
+    )
+    return parser
 async def test_model_provider_uses_cache(adapter, mock_provider):
     """Test that cached provider is returned if it exists"""
     # Set up cached provider
@@ -197,3 +214,58 @@ async def test_run_strategy(
     # Test
     result = adapter.run_strategy()
     assert result == expected
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "formatter_id,expected_input,expected_calls",
+    [
+        (None, {"original": "input"}, 0),  # No formatter
+        ("test_formatter", {"formatted": "input"}, 1),  # With formatter
+    ],
+)
+async def test_input_formatting(
+    adapter, mock_formatter, mock_parser, formatter_id, expected_input, expected_calls
+):
+    """Test that input formatting is handled correctly based on formatter configuration"""
+    # Mock the model provider to return our formatter ID and parser
+    provider = MagicMock()
+    provider.formatter = formatter_id
+    provider.parser = "test_parser"
+    provider.reasoning_capable = False
+    adapter.model_provider = MagicMock(return_value=provider)
+    # Mock the formatter factory and parser factory
+    with (
+        patch(
+            "kiln_ai.adapters.model_adapters.base_adapter.request_formatter_from_id"
+        ) as mock_factory,
+        patch(
+            "kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id"
+        ) as mock_parser_factory,
+    ):
+        mock_factory.return_value = mock_formatter
+        mock_parser_factory.return_value = mock_parser
+        # Mock the _run method to capture the input
+        captured_input = None
+        async def mock_run(input):
+            nonlocal captured_input
+            captured_input = input
+            return RunOutput(output="test output", intermediate_outputs={}), None
+        adapter._run = mock_run
+        # Run the adapter
+        original_input = {"original": "input"}
+        await adapter.invoke_returning_run_output(original_input)
+        # Verify formatter was called correctly
+        assert captured_input == expected_input
+        assert mock_factory.call_count == (1 if formatter_id else 0)
+        assert mock_formatter.format_input.call_count == expected_calls
+        # Verify original input was preserved in the run
+        if formatter_id:
+            mock_formatter.format_input.assert_called_once_with(original_input)

kiln_ai/adapters/model_adapters/test_litellm_adapter.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 from unittest.mock import Mock, patch
+import litellm
 import pytest
 from kiln_ai.adapters.ml_model_list import ModelProviderName, StructuredOutputMode
@@ -9,7 +10,7 @@ from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
 from kiln_ai.adapters.model_adapters.litellm_config import (
     LiteLlmConfig,
 )
-from kiln_ai.datamodel import Project, Task
+from kiln_ai.datamodel import Project, Task, Usage
 @pytest.fixture
@@ -405,3 +406,66 @@ async def test_build_completion_kwargs(
     # Verify extra body is included
     for key, value in extra_body.items():
         assert kwargs[key] == value
+@pytest.mark.parametrize(
+    "litellm_usage,cost,expected_usage",
+    [
+        # No usage data
+        (None, None, None),
+        # Only cost
+        (None, 0.5, Usage(cost=0.5)),
+        # Only token counts
+        (
+            litellm.types.utils.Usage(
+                prompt_tokens=10,
+                completion_tokens=20,
+                total_tokens=30,
+            ),
+            None,
+            Usage(input_tokens=10, output_tokens=20, total_tokens=30),
+        ),
+        # Both cost and token counts
+        (
+            litellm.types.utils.Usage(
+                prompt_tokens=10,
+                completion_tokens=20,
+                total_tokens=30,
+            ),
+            0.5,
+            Usage(input_tokens=10, output_tokens=20, total_tokens=30, cost=0.5),
+        ),
+        # Invalid usage type (should be ignored)
+        ({"prompt_tokens": 10}, None, None),
+        # Invalid cost type (should be ignored)
+        (None, "0.5", None),
+    ],
+)
+def test_usage_from_response(config, mock_task, litellm_usage, cost, expected_usage):
+    """Test usage_from_response with various combinations of usage data and cost"""
+    adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
+    # Create a mock response
+    response = Mock(spec=litellm.types.utils.ModelResponse)
+    response.get.return_value = litellm_usage
+    response._hidden_params = {"response_cost": cost}
+    # Call the method
+    result = adapter.usage_from_response(response)
+    # Verify the result
+    if expected_usage is None:
+        if result is not None:
+            assert result.input_tokens is None
+            assert result.output_tokens is None
+            assert result.total_tokens is None
+            assert result.cost is None
+    else:
+        assert result is not None
+        assert result.input_tokens == expected_usage.input_tokens
+        assert result.output_tokens == expected_usage.output_tokens
+        assert result.total_tokens == expected_usage.total_tokens
+        assert result.cost == expected_usage.cost
+    # Verify the response was queried correctly
+    response.get.assert_called_once_with("usage", None)

kiln_ai/adapters/model_adapters/test_saving_adapter_results.py CHANGED Viewed

@@ -11,14 +11,15 @@ from kiln_ai.datamodel import (
     DataSourceType,
     Project,
     Task,
+    Usage,
 )
 from kiln_ai.datamodel.task import RunConfig
 from kiln_ai.utils.config import Config
 class MockAdapter(BaseAdapter):
-    async def _run(self, input: dict | str) -> dict | str:
-        return RunOutput(output="Test output", intermediate_outputs=None)
+    async def _run(self, input: dict | str) -> tuple[RunOutput, Usage | None]:
+        return RunOutput(output="Test output", intermediate_outputs=None), None
     def adapter_name(self) -> str:
         return "mock_adapter"

kiln_ai/adapters/model_adapters/test_structured_output.py CHANGED Viewed

@@ -12,6 +12,7 @@ from kiln_ai.adapters.ml_model_list import (
 from kiln_ai.adapters.model_adapters.base_adapter import (
     BaseAdapter,
     RunOutput,
+    Usage,
 )
 from kiln_ai.adapters.ollama_tools import ollama_online
 from kiln_ai.adapters.test_prompt_adaptors import get_all_models_and_providers
@@ -54,8 +55,8 @@ class MockAdapter(BaseAdapter):
         )
         self.response = response
-    async def _run(self, input: str) -> RunOutput:
-        return RunOutput(output=self.response, intermediate_outputs=None)
+    async def _run(self, input: str) -> tuple[RunOutput, Usage | None]:
+        return RunOutput(output=self.response, intermediate_outputs=None), None
     def adapter_name(self) -> str:
         return "mock_adapter"
@@ -223,10 +224,7 @@ async def run_structured_input_task(
     with pytest.raises(ValueError):
         # not structured input in dictionary
         await a.invoke("a=1, b=2, c=3")
-    with pytest.raises(
-        ValueError,
-        match="This task requires a specific output schema. While the model produced JSON, that JSON didn't meet the schema.",
-    ):
+    with pytest.raises(ValueError, match="This task requires a specific input"):
         # invalid structured input
         await a.invoke({"a": 1, "b": 2, "d": 3})

kiln_ai/adapters/parsers/base_parser.py CHANGED Viewed

@@ -2,9 +2,6 @@ from kiln_ai.adapters.run_output import RunOutput
 class BaseParser:
-    def __init__(self, structured_output: bool = False):
-        self.structured_output = structured_output
     def parse_output(self, original_output: RunOutput) -> RunOutput:
         """
         Method for parsing the output of a model. Typically overridden by subclasses.

kiln_ai/adapters/parsers/parser_registry.py CHANGED Viewed

@@ -6,14 +6,16 @@ from kiln_ai.adapters.parsers.r1_parser import R1ThinkingParser
 from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
-def model_parser_from_id(parser_id: ModelParserID | None) -> Type[BaseParser]:
+def model_parser_from_id(parser_id: ModelParserID | None) -> BaseParser:
     """
     Get a model parser from its ID.
     """
     match parser_id:
         case None:
-            return BaseParser
+            return BaseParser()
         case ModelParserID.r1_thinking:
-            return R1ThinkingParser
+            return R1ThinkingParser()
+        case ModelParserID.optional_r1_thinking:
+            return R1ThinkingParser(allow_missing_thinking=True)
         case _:
             raise_exhaustive_enum_error(parser_id)

kiln_ai/adapters/parsers/r1_parser.py CHANGED Viewed

@@ -7,6 +7,9 @@ class R1ThinkingParser(BaseParser):
     START_TAG = "<think>"
     END_TAG = "</think>"
+    def __init__(self, allow_missing_thinking: bool = False):
+        self.allow_missing_thinking = allow_missing_thinking
     def parse_output(self, original_output: RunOutput) -> RunOutput:
         """
         Parse the <think> </think> tags from the response into the intermediate and final outputs.
@@ -27,6 +30,14 @@ class R1ThinkingParser(BaseParser):
             original_output.intermediate_outputs is not None
             and "reasoning" in original_output.intermediate_outputs
         ):
+            # sometimes the output and reasoning are wrapped in newlines
+            if isinstance(original_output.output, str):
+                original_output.output = original_output.output.strip()
+            original_output.intermediate_outputs["reasoning"] = (
+                original_output.intermediate_outputs["reasoning"].strip()
+            )
             return original_output
         # This parser only works for strings
@@ -39,7 +50,10 @@ class R1ThinkingParser(BaseParser):
         # Find the thinking tags
         think_end = cleaned_response.find(self.END_TAG)
         if think_end == -1:
-            raise ValueError("Missing </think> tag")
+            if self.allow_missing_thinking:
+                return original_output
+            else:
+                raise ValueError("Missing </think> tag")
         think_tag_start = cleaned_response.find(self.START_TAG)
         if think_tag_start == -1:
@@ -66,7 +80,8 @@ class R1ThinkingParser(BaseParser):
         # Add thinking content to intermediate outputs if it exists
         intermediate_outputs = original_output.intermediate_outputs or {}
-        intermediate_outputs["reasoning"] = thinking_content
+        if thinking_content is not None and len(thinking_content) > 0:
+            intermediate_outputs["reasoning"] = thinking_content
         return RunOutput(
             output=result,

kiln_ai/adapters/parsers/request_formatters.py ADDED Viewed

@@ -0,0 +1,40 @@
+import json
+from typing import Dict, Protocol
+from kiln_ai.adapters.ml_model_list import ModelFormatterID
+from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
+class RequestFormatter(Protocol):
+    def format_input(self, original_input: Dict | str) -> Dict | str:
+        """
+        Method for formatting the input to a model.
+        """
+        ...
+class Qwen3StyleNoThinkFormatter:
+    def format_input(self, original_input: Dict | str) -> Dict | str:
+        """
+        Format the input to a model for Qwen3 /no_think instruction
+        """
+        formatted_input = (
+            original_input
+            if isinstance(original_input, str)
+            else json.dumps(original_input, indent=2)
+        )
+        return formatted_input + "\n\n/no_think"
+def request_formatter_from_id(
+    formatter_id: ModelFormatterID,
+) -> RequestFormatter:
+    """
+    Get a model parser from its ID.
+    """
+    match formatter_id:
+        case ModelFormatterID.qwen3_style_no_think:
+            return Qwen3StyleNoThinkFormatter()
+        case _:
+            raise_exhaustive_enum_error(formatter_id)

kiln_ai/adapters/parsers/test_parser_registry.py CHANGED Viewed

@@ -28,5 +28,5 @@ def test_model_parser_from_id_invalid():
 )
 def test_model_parser_from_id_parametrized(parser_id, expected_class):
     """Test all valid parser IDs using parametrize."""
-    parser_class = model_parser_from_id(parser_id)
-    assert parser_class == expected_class
+    parser = model_parser_from_id(parser_id)
+    assert isinstance(parser, expected_class)

kiln_ai/adapters/parsers/test_r1_parser.py CHANGED Viewed

@@ -46,6 +46,21 @@ def test_response_with_whitespace(parser):
     assert parsed.output.strip() == "This is the result"
+def test_empty_thinking_content(parser):
+    response = RunOutput(
+        output="""
+        <think>
+        </think>
+            This is the result
+    """,
+        intermediate_outputs=None,
+    )
+    parsed = parser.parse_output(response)
+    assert "reasoning" not in parsed.intermediate_outputs
+    assert parsed.output.strip() == "This is the result"
 def test_missing_start_tag(parser):
     parsed = parser.parse_output(
         RunOutput(output="Some content</think>result", intermediate_outputs=None)
@@ -86,7 +101,7 @@ def test_empty_thinking_content(parser):
         output="<think></think>This is the result", intermediate_outputs=None
     )
     parsed = parser.parse_output(response)
-    assert parsed.intermediate_outputs == {"reasoning": ""}
+    assert "reasoning" not in parsed.intermediate_outputs
     assert parsed.output == "This is the result"
@@ -154,3 +169,31 @@ def test_intermediate_outputs(parser):
         )
     )
     assert out.intermediate_outputs["reasoning"] == "Some content"
+def test_strip_newlines(parser):
+    # certain providers via LiteLLM for example, add newlines to the output
+    # and to the reasoning. This tests that we strip those newlines.
+    response = RunOutput(
+        output="\n\nSome content",
+        intermediate_outputs={
+            "reasoning": "\n\nSome thinking\n\n",
+        },
+    )
+    parsed = parser.parse_output(response)
+    assert parsed.output == "Some content"
+    assert parsed.intermediate_outputs["reasoning"] == "Some thinking"
+def test_strip_newlines_with_structured_output(parser):
+    # certain providers via LiteLLM for example, add newlines to the output
+    # and to the reasoning. This tests that we strip those newlines.
+    response = RunOutput(
+        output={"some_key": "Some content"},
+        intermediate_outputs={
+            "reasoning": "\n\nSome thinking\n\n",
+        },
+    )
+    parsed = parser.parse_output(response)
+    assert parsed.output == {"some_key": "Some content"}
+    assert parsed.intermediate_outputs["reasoning"] == "Some thinking"

kiln_ai/adapters/parsers/test_request_formatters.py ADDED Viewed

@@ -0,0 +1,76 @@
+import pytest
+from kiln_ai.adapters.ml_model_list import ModelFormatterID
+from kiln_ai.adapters.parsers.request_formatters import (
+    Qwen3StyleNoThinkFormatter,
+    request_formatter_from_id,
+)
+@pytest.fixture
+def qwen_formatter():
+    return Qwen3StyleNoThinkFormatter()
+def test_qwen_formatter_string_input(qwen_formatter):
+    input_text = "Hello world"
+    formatted = qwen_formatter.format_input(input_text)
+    assert formatted == "Hello world\n\n/no_think"
+def test_qwen_formatter_dict_input(qwen_formatter):
+    input_dict = {"key": "value", "nested": {"inner": "data"}}
+    formatted = qwen_formatter.format_input(input_dict)
+    expected = """{
+  "key": "value",
+  "nested": {
+    "inner": "data"
+  }
+}
+/no_think"""
+    assert formatted == expected
+def test_qwen_formatter_empty_input(qwen_formatter):
+    # Test empty string
+    assert qwen_formatter.format_input("") == "\n\n/no_think"
+    # Test empty dict
+    assert qwen_formatter.format_input({}) == "{}\n\n/no_think"
+def test_qwen_formatter_special_characters(qwen_formatter):
+    input_text = "Special chars: !@#$%^&*()_+思"
+    formatted = qwen_formatter.format_input(input_text)
+    assert formatted == "Special chars: !@#$%^&*()_+思\n\n/no_think"
+def test_qwen_formatter_multiline_string(qwen_formatter):
+    input_text = """Line 1
+    Line 2
+    Line 3"""
+    formatted = qwen_formatter.format_input(input_text)
+    assert (
+        formatted
+        == """Line 1
+    Line 2
+    Line 3
+/no_think"""
+    )
+def test_request_formatter_factory():
+    # Test valid formatter ID
+    formatter = request_formatter_from_id(ModelFormatterID.qwen3_style_no_think)
+    assert isinstance(formatter, Qwen3StyleNoThinkFormatter)
+    # Test that the formatter works
+    assert formatter.format_input("test") == "test\n\n/no_think"
+def test_request_formatter_factory_invalid_id():
+    # Test with an invalid enum value by using a string that doesn't exist in the enum
+    with pytest.raises(ValueError, match="Unhandled enum value"):
+        request_formatter_from_id("invalid_formatter_id")  # type: ignore

kiln_ai/adapters/prompt_builders.py CHANGED Viewed

@@ -101,7 +101,6 @@ class SimplePromptBuilder(BasePromptBuilder):
         """
         base_prompt = self.task.instruction
-        # TODO: this is just a quick version. Formatting and best practices TBD
         if len(self.task.requirements) > 0:
             base_prompt += (
                 "\n\nYour response should respect the following requirements:\n"
@@ -113,6 +112,18 @@ class SimplePromptBuilder(BasePromptBuilder):
         return base_prompt
+class ShortPromptBuilder(BasePromptBuilder):
+    """A prompt builder that includes a the base prompt but excludes the requirements."""
+    def build_base_prompt(self) -> str:
+        """Build a short prompt with just the base prompt, no requirements.
+        Returns:
+            str: The constructed prompt string.
+        """
+        return self.task.instruction
 class MultiShotPromptBuilder(BasePromptBuilder):
     """A prompt builder that includes multiple examples in the prompt."""
@@ -414,6 +425,8 @@ def prompt_builder_from_id(prompt_id: PromptId, task: Task) -> BasePromptBuilder
     match typed_prompt_generator:
         case PromptGenerators.SIMPLE:
             return SimplePromptBuilder(task)
+        case PromptGenerators.SHORT:
+            return ShortPromptBuilder(task)
         case PromptGenerators.FEW_SHOT:
             return FewShotPromptBuilder(task)
         case PromptGenerators.MULTI_SHOT:

kiln_ai/adapters/provider_tools.py CHANGED Viewed

@@ -5,6 +5,7 @@ from kiln_ai.adapters.ml_model_list import (
     KilnModel,
     KilnModelProvider,
     ModelName,
+    ModelParserID,
     ModelProviderName,
     StructuredOutputMode,
     built_in_models,
@@ -15,7 +16,7 @@ from kiln_ai.adapters.model_adapters.litellm_config import (
 from kiln_ai.adapters.ollama_tools import (
     get_ollama_connection,
 )
-from kiln_ai.datamodel import Finetune, Task
+from kiln_ai.datamodel import Finetune, FinetuneDataStrategy, Task
 from kiln_ai.datamodel.registry import project_from_id
 from kiln_ai.utils.config import Config
 from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
@@ -257,6 +258,14 @@ def finetune_from_id(model_id: str) -> Finetune:
     return fine_tune
+def parser_from_data_strategy(
+    data_strategy: FinetuneDataStrategy,
+) -> ModelParserID | None:
+    if data_strategy == FinetuneDataStrategy.final_and_intermediate_r1_compatible:
+        return ModelParserID.r1_thinking
+    return None
 def finetune_provider_model(
     model_id: str,
 ) -> KilnModelProvider:
@@ -266,6 +275,14 @@ def finetune_provider_model(
     model_provider = KilnModelProvider(
         name=provider,
         model_id=fine_tune.fine_tune_model_id,
+        parser=parser_from_data_strategy(fine_tune.data_strategy),
+        reasoning_capable=(
+            fine_tune.data_strategy
+            in [
+                FinetuneDataStrategy.final_and_intermediate,
+                FinetuneDataStrategy.final_and_intermediate_r1_compatible,
+            ]
+        ),
     )
     if provider == ModelProviderName.vertex and fine_tune.fine_tune_model_id:

kiln-ai 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

kiln-ai 0.15.0py3-none-any.whl → 0.16.0py3-none-any.whl