PyPI - kiln-ai - Versions diffs - 0.8.0__py3-none-any.whl → 0.11.1__py3-none-any.whl - Mend

kiln-ai 0.8.0py3-none-any.whl → 0.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (57) hide show

kiln_ai/adapters/__init__.py +7 -7
kiln_ai/adapters/adapter_registry.py +77 -5
kiln_ai/adapters/data_gen/data_gen_task.py +3 -3
kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +469 -129
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +113 -21
kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
kiln_ai/adapters/ml_model_list.py +323 -94
kiln_ai/adapters/model_adapters/__init__.py +18 -0
kiln_ai/adapters/{base_adapter.py → model_adapters/base_adapter.py} +81 -37
kiln_ai/adapters/{langchain_adapters.py → model_adapters/langchain_adapters.py} +130 -84
kiln_ai/adapters/model_adapters/openai_compatible_config.py +11 -0
kiln_ai/adapters/model_adapters/openai_model_adapter.py +246 -0
kiln_ai/adapters/model_adapters/test_base_adapter.py +190 -0
kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +103 -88
kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +225 -0
kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +43 -15
kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +93 -20
kiln_ai/adapters/parsers/__init__.py +10 -0
kiln_ai/adapters/parsers/base_parser.py +12 -0
kiln_ai/adapters/parsers/json_parser.py +37 -0
kiln_ai/adapters/parsers/parser_registry.py +19 -0
kiln_ai/adapters/parsers/r1_parser.py +69 -0
kiln_ai/adapters/parsers/test_json_parser.py +81 -0
kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
kiln_ai/adapters/prompt_builders.py +126 -20
kiln_ai/adapters/provider_tools.py +91 -36
kiln_ai/adapters/repair/repair_task.py +17 -6
kiln_ai/adapters/repair/test_repair_task.py +4 -4
kiln_ai/adapters/run_output.py +8 -0
kiln_ai/adapters/test_adapter_registry.py +177 -0
kiln_ai/adapters/test_generate_docs.py +69 -0
kiln_ai/adapters/test_prompt_adaptors.py +8 -4
kiln_ai/adapters/test_prompt_builders.py +190 -29
kiln_ai/adapters/test_provider_tools.py +268 -46
kiln_ai/datamodel/__init__.py +199 -12
kiln_ai/datamodel/basemodel.py +31 -11
kiln_ai/datamodel/json_schema.py +8 -3
kiln_ai/datamodel/model_cache.py +8 -3
kiln_ai/datamodel/test_basemodel.py +81 -2
kiln_ai/datamodel/test_dataset_split.py +100 -3
kiln_ai/datamodel/test_example_models.py +25 -4
kiln_ai/datamodel/test_model_cache.py +24 -0
kiln_ai/datamodel/test_model_perf.py +125 -0
kiln_ai/datamodel/test_models.py +129 -0
kiln_ai/utils/exhaustive_error.py +6 -0
{kiln_ai-0.8.0.dist-info → kiln_ai-0.11.1.dist-info}/METADATA +9 -7
kiln_ai-0.11.1.dist-info/RECORD +76 -0
kiln_ai-0.8.0.dist-info/RECORD +0 -58
{kiln_ai-0.8.0.dist-info → kiln_ai-0.11.1.dist-info}/WHEEL +0 -0
{kiln_ai-0.8.0.dist-info → kiln_ai-0.11.1.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} RENAMED Viewed

@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 from typing import Dict
@@ -7,10 +8,14 @@ import pytest
 import kiln_ai.datamodel as datamodel
 from kiln_ai.adapters.adapter_registry import adapter_for_task
-from kiln_ai.adapters.base_adapter import AdapterInfo, BaseAdapter, RunOutput
 from kiln_ai.adapters.ml_model_list import (
     built_in_models,
 )
+from kiln_ai.adapters.model_adapters.base_adapter import (
+    AdapterInfo,
+    BaseAdapter,
+    RunOutput,
+)
 from kiln_ai.adapters.ollama_tools import ollama_online
 from kiln_ai.adapters.prompt_builders import (
     BasePromptBuilder,
@@ -44,7 +49,7 @@ async def test_structured_output_ollama_llama(tmp_path, model_name):
 class MockAdapter(BaseAdapter):
     def __init__(self, kiln_task: datamodel.Task, response: Dict | str | None):
-        super().__init__(kiln_task)
+        super().__init__(kiln_task, model_name="phi_3_5", model_provider_name="ollama")
         self.response = response
     async def _run(self, input: str) -> RunOutput:
@@ -93,19 +98,10 @@ async def test_mock_unstructred_response(tmp_path):
         answer = await adapter.invoke("You are a mock, send me the response!")
-@pytest.mark.paid
-@pytest.mark.ollama
-@pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
-async def test_all_built_in_models_structured_output(
-    tmp_path, model_name, provider_name
-):
+def check_supports_structured_output(model_name: str, provider_name: str):
     for model in built_in_models:
         if model.name != model_name:
             continue
-        if not model.supports_structured_output:
-            pytest.skip(
-                f"Skipping {model.name} because it does not support structured output"
-            )
         for provider in model.providers:
             if provider.name != provider_name:
                 continue
@@ -113,11 +109,20 @@ async def test_all_built_in_models_structured_output(
                 pytest.skip(
                     f"Skipping {model.name} {provider.name} because it does not support structured output"
                 )
-            await run_structured_output_test(tmp_path, model.name, provider.name)
             return
     raise RuntimeError(f"No model {model_name} {provider_name} found")
+@pytest.mark.paid
+@pytest.mark.ollama
+@pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
+async def test_all_built_in_models_structured_output(
+    tmp_path, model_name, provider_name
+):
+    check_supports_structured_output(model_name, provider_name)
+    await run_structured_output_test(tmp_path, model_name, provider_name)
 def build_structured_output_test_task(tmp_path: Path):
     project = datamodel.Project(name="test", path=tmp_path / "test.kiln")
     project.save_to_file()
@@ -140,7 +145,14 @@ def build_structured_output_test_task(tmp_path: Path):
 async def run_structured_output_test(tmp_path: Path, model_name: str, provider: str):
     task = build_structured_output_test_task(tmp_path)
     a = adapter_for_task(task, model_name=model_name, provider=provider)
-    parsed = await a.invoke_returning_raw("Cows")  # a joke about cows
+    try:
+        parsed = await a.invoke_returning_raw("Cows")  # a joke about cows
+    except ValueError as e:
+        if str(e) == "Failed to connect to Ollama. Ensure Ollama is running.":
+            pytest.skip(
+                f"Skipping {model_name} {provider} because Ollama is not running"
+            )
+        raise e
     if parsed is None or not isinstance(parsed, Dict):
         raise RuntimeError(f"structured response is not a dict: {parsed}")
     assert parsed["setup"] is not None
@@ -161,6 +173,7 @@ def build_structured_input_test_task(tmp_path: Path):
         parent=project,
         name="test task",
         instruction="You are an assistant which classifies a triangle given the lengths of its sides. If all sides are of equal length, the triangle is equilateral. If two sides are equal, the triangle is isosceles. Otherwise, it is scalene.\n\nAt the end of your response return the result in double square brackets. It should be plain text. It should be exactly one of the three following strings: '[[equilateral]]', or '[[isosceles]]', or '[[scalene]]'.",
+        thinking_prompt="Think step by step.",
     )
     task.input_json_schema = json_triangle_schema
     schema = task.input_schema()
@@ -177,7 +190,14 @@ def build_structured_input_test_task(tmp_path: Path):
 async def run_structured_input_test(tmp_path: Path, model_name: str, provider: str):
     task = build_structured_input_test_task(tmp_path)
-    await run_structured_input_task(task, model_name, provider)
+    try:
+        await run_structured_input_task(task, model_name, provider)
+    except ValueError as e:
+        if str(e) == "Failed to connect to Ollama. Ensure Ollama is running.":
+            pytest.skip(
+                f"Skipping {model_name} {provider} because Ollama is not running"
+            )
+        raise e
 async def run_structured_input_task(
@@ -196,10 +216,19 @@ async def run_structured_input_task(
         # invalid structured input
         await a.invoke({"a": 1, "b": 2, "d": 3})
-    response = await a.invoke_returning_raw({"a": 2, "b": 2, "c": 2})
+    try:
+        response = await a.invoke_returning_raw({"a": 2, "b": 2, "c": 2})
+    except ValueError as e:
+        if str(e) == "Failed to connect to Ollama. Ensure Ollama is running.":
+            pytest.skip(
+                f"Skipping {model_name} {provider} because Ollama is not running"
+            )
+        raise e
     assert response is not None
-    assert isinstance(response, str)
-    assert "[[equilateral]]" in response
+    if isinstance(response, str):
+        assert "[[equilateral]]" in response
+    else:
+        assert response["is_equilateral"] is True
     adapter_info = a.adapter_info()
     expected_pb_name = "simple_prompt_builder"
     if pb is not None:
@@ -207,7 +236,6 @@ async def run_structured_input_task(
     assert adapter_info.prompt_builder_name == expected_pb_name
     assert adapter_info.model_name == model_name
     assert adapter_info.model_provider == provider
-    assert adapter_info.adapter_name == "kiln_langchain_adapter"
 @pytest.mark.paid
@@ -227,7 +255,52 @@ async def test_all_built_in_models_structured_input(
 @pytest.mark.paid
 @pytest.mark.ollama
 @pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
-async def test_structured_cot_prompt_builder(tmp_path, model_name, provider_name):
+async def test_structured_input_cot_prompt_builder(tmp_path, model_name, provider_name):
     task = build_structured_input_test_task(tmp_path)
     pb = SimpleChainOfThoughtPromptBuilder(task)
     await run_structured_input_task(task, model_name, provider_name, pb)
+@pytest.mark.paid
+@pytest.mark.ollama
+@pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
+async def test_structured_output_cot_prompt_builder(
+    tmp_path, model_name, provider_name
+):
+    check_supports_structured_output(model_name, provider_name)
+    triangle_schema = {
+        "type": "object",
+        "properties": {
+            "is_equilateral": {
+                "type": "boolean",
+                "description": "True if all sides of the triangle are equal in length",
+            },
+            "is_scalene": {
+                "type": "boolean",
+                "description": "True if all sides of the triangle have different lengths",
+            },
+            "is_obtuse": {
+                "type": "boolean",
+                "description": "True if one of the angles is greater than 90 degrees",
+            },
+        },
+        "required": ["is_equilateral", "is_scalene", "is_obtuse"],
+        "additionalProperties": False,
+    }
+    task = build_structured_input_test_task(tmp_path)
+    task.instruction = """
+You are an assistant which classifies a triangle given the lengths of its sides. If all sides are of equal length, the triangle is equilateral. If two sides are equal, the triangle is isosceles. Otherwise, it is scalene.\n\n"
+When asked for a final result, this is the format (for an equilateral example):
+```json
+{
+    "is_equilateral": true,
+    "is_scalene": false,
+    "is_obtuse": false
+}
+```
+"""
+    task.output_json_schema = json.dumps(triangle_schema)
+    task.save_to_file()
+    pb = SimpleChainOfThoughtPromptBuilder(task)
+    await run_structured_input_task(task, model_name, provider_name, pb)

kiln_ai/adapters/parsers/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""
+# Parsers
+Parsing utilities for JSON and models with custom output formats (R1, etc.)
+"""
+from . import base_parser, json_parser, r1_parser
+__all__ = ["r1_parser", "base_parser", "json_parser"]

kiln_ai/adapters/parsers/base_parser.py ADDED Viewed

@@ -0,0 +1,12 @@
+from kiln_ai.adapters.run_output import RunOutput
+class BaseParser:
+    def __init__(self, structured_output: bool = False):
+        self.structured_output = structured_output
+    def parse_output(self, original_output: RunOutput) -> RunOutput:
+        """
+        Method for parsing the output of a model. Typically overridden by subclasses.
+        """
+        return original_output

kiln_ai/adapters/parsers/json_parser.py ADDED Viewed

@@ -0,0 +1,37 @@
+import json
+from typing import Any, Dict
+def parse_json_string(json_string: str) -> Dict[str, Any]:
+    """
+    Parse a JSON string into a dictionary. Handles multiple formats:
+    - Plain JSON
+    - JSON wrapped in ```json code blocks
+    - JSON wrapped in ``` code blocks
+    Args:
+        json_string: String containing JSON data, possibly wrapped in code blocks
+    Returns:
+        Dict containing parsed JSON data
+    Raises:
+        ValueError: If JSON parsing fails
+    """
+    # Remove code block markers if present
+    cleaned_string = json_string.strip()
+    if cleaned_string.startswith("```"):
+        # Split by newlines and remove first/last lines if they contain ```
+        lines = cleaned_string.split("\n")
+        if lines[0].startswith("```"):
+            lines = lines[1:]
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        cleaned_string = "\n".join(lines)
+    try:
+        return json.loads(cleaned_string)
+    except json.JSONDecodeError as e:
+        raise ValueError(
+            f"This task requires JSON output but the model didn't return valid JSON. Search 'Troubleshooting Structured Data Issues' in our docs for more information. The model produced the following: {cleaned_string}"
+        ) from e

kiln_ai/adapters/parsers/parser_registry.py ADDED Viewed

@@ -0,0 +1,19 @@
+from typing import Type
+from kiln_ai.adapters.ml_model_list import ModelParserID
+from kiln_ai.adapters.parsers.base_parser import BaseParser
+from kiln_ai.adapters.parsers.r1_parser import R1ThinkingParser
+from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
+def model_parser_from_id(parser_id: ModelParserID | None) -> Type[BaseParser]:
+    """
+    Get a model parser from its ID.
+    """
+    match parser_id:
+        case None:
+            return BaseParser
+        case ModelParserID.r1_thinking:
+            return R1ThinkingParser
+        case _:
+            raise_exhaustive_enum_error(parser_id)

kiln_ai/adapters/parsers/r1_parser.py ADDED Viewed

@@ -0,0 +1,69 @@
+from kiln_ai.adapters.parsers.base_parser import BaseParser
+from kiln_ai.adapters.parsers.json_parser import parse_json_string
+from kiln_ai.adapters.run_output import RunOutput
+class R1ThinkingParser(BaseParser):
+    START_TAG = "<think>"
+    END_TAG = "</think>"
+    def parse_output(self, original_output: RunOutput) -> RunOutput:
+        """
+        Parse the <think> </think> tags from the response into the intermediate and final outputs.
+        Args:
+            original_output: RunOutput containing the raw response string
+        Returns:
+            ParsedOutput containing the intermediate content (thinking content) and final result
+        Raises:
+            ValueError: If response format is invalid (missing tags, multiple tags, or no content after closing tag)
+        """
+        # This parser only works for strings
+        if not isinstance(original_output.output, str):
+            raise ValueError("Response must be a string for R1 parser")
+        # Strip whitespace and validate basic structure
+        cleaned_response = original_output.output.strip()
+        if not cleaned_response.startswith(self.START_TAG):
+            raise ValueError("Response must start with <think> tag")
+        # Find the thinking tags
+        think_start = cleaned_response.find(self.START_TAG)
+        think_end = cleaned_response.find(self.END_TAG)
+        if think_start == -1 or think_end == -1:
+            raise ValueError("Missing thinking tags")
+        # Check for multiple tags
+        if (
+            cleaned_response.count(self.START_TAG) > 1
+            or cleaned_response.count(self.END_TAG) > 1
+        ):
+            raise ValueError("Multiple thinking tags found")
+        # Extract thinking content
+        thinking_content = cleaned_response[
+            think_start + len(self.START_TAG) : think_end
+        ].strip()
+        # Extract result (everything after </think>)
+        result = cleaned_response[think_end + len(self.END_TAG) :].strip()
+        if not result or len(result) == 0:
+            raise ValueError("No content found after </think> tag")
+        # Parse JSON if needed
+        output = result
+        if self.structured_output:
+            output = parse_json_string(result)
+        # Add thinking content to intermediate outputs if it exists
+        intermediate_outputs = original_output.intermediate_outputs or {}
+        intermediate_outputs["reasoning"] = thinking_content
+        return RunOutput(
+            output=output,
+            intermediate_outputs=intermediate_outputs,
+        )

kiln_ai/adapters/parsers/test_json_parser.py ADDED Viewed

@@ -0,0 +1,81 @@
+import pytest
+from kiln_ai.adapters.parsers.json_parser import parse_json_string
+def test_parse_plain_json():
+    json_str = '{"key": "value", "number": 42}'
+    result = parse_json_string(json_str)
+    assert result == {"key": "value", "number": 42}
+def test_parse_json_with_code_block():
+    json_str = """```
+    {"key": "value", "number": 42}
+    ```"""
+    result = parse_json_string(json_str)
+    assert result == {"key": "value", "number": 42}
+def test_parse_json_with_language_block():
+    json_str = """```json
+    {"key": "value", "number": 42}
+    ```"""
+    result = parse_json_string(json_str)
+    assert result == {"key": "value", "number": 42}
+def test_parse_json_with_whitespace():
+    json_str = """
+        {
+            "key": "value",
+            "number": 42
+        }
+    """
+    result = parse_json_string(json_str)
+    assert result == {"key": "value", "number": 42}
+def test_parse_invalid_json():
+    json_str = '{"key": "value", invalid}'
+    with pytest.raises(ValueError) as exc_info:
+        parse_json_string(json_str)
+    assert (
+        "This task requires JSON output but the model didn't return valid JSON."
+        in str(exc_info.value)
+    )
+def test_parse_empty_code_block():
+    json_str = """```json
+    ```"""
+    with pytest.raises(ValueError) as exc_info:
+        parse_json_string(json_str)
+    assert (
+        "This task requires JSON output but the model didn't return valid JSON."
+        in str(exc_info.value)
+    )
+def test_parse_complex_json():
+    json_str = """```json
+    {
+        "string": "hello",
+        "number": 42,
+        "bool": true,
+        "null": null,
+        "array": [1, 2, 3],
+        "nested": {
+            "inner": "value"
+        }
+    }
+    ```"""
+    result = parse_json_string(json_str)
+    assert result == {
+        "string": "hello",
+        "number": 42,
+        "bool": True,
+        "null": None,
+        "array": [1, 2, 3],
+        "nested": {"inner": "value"},
+    }

kiln_ai/adapters/parsers/test_parser_registry.py ADDED Viewed

@@ -0,0 +1,32 @@
+import pytest
+from kiln_ai.adapters.ml_model_list import ModelParserID
+from kiln_ai.adapters.parsers.base_parser import BaseParser
+from kiln_ai.adapters.parsers.parser_registry import model_parser_from_id
+from kiln_ai.adapters.parsers.r1_parser import R1ThinkingParser
+def test_model_parser_from_id_invalid():
+    """Test that invalid parser ID raises ValueError."""
+    # Create a mock enum value that isn't handled
+    class MockModelParserID:
+        mock_value = "mock_value"
+    with pytest.raises(ValueError) as exc_info:
+        model_parser_from_id(MockModelParserID.mock_value)  # type: ignore
+    assert "Unhandled enum value" in str(exc_info.value)
+@pytest.mark.parametrize(
+    "parser_id,expected_class",
+    [
+        (None, BaseParser),
+        (ModelParserID.r1_thinking, R1ThinkingParser),
+    ],
+)
+def test_model_parser_from_id_parametrized(parser_id, expected_class):
+    """Test all valid parser IDs using parametrize."""
+    parser_class = model_parser_from_id(parser_id)
+    assert parser_class == expected_class

kiln_ai/adapters/parsers/test_r1_parser.py ADDED Viewed

@@ -0,0 +1,144 @@
+import pytest
+from kiln_ai.adapters.parsers.r1_parser import R1ThinkingParser
+from kiln_ai.adapters.run_output import RunOutput
+@pytest.fixture
+def parser():
+    return R1ThinkingParser()
+def test_valid_response(parser):
+    response = RunOutput(
+        output="<think>This is thinking content</think>This is the result",
+        intermediate_outputs=None,
+    )
+    parsed = parser.parse_output(response)
+    assert parsed.intermediate_outputs["reasoning"] == "This is thinking content"
+    assert parsed.output == "This is the result"
+def test_response_with_whitespace(parser):
+    response = RunOutput(
+        output="""
+        <think>
+            This is thinking content
+        </think>
+            This is the result
+    """,
+        intermediate_outputs=None,
+    )
+    parsed = parser.parse_output(response)
+    assert (
+        parsed.intermediate_outputs["reasoning"].strip() == "This is thinking content"
+    )
+    assert parsed.output.strip() == "This is the result"
+def test_missing_start_tag(parser):
+    with pytest.raises(ValueError, match="Response must start with <think> tag"):
+        parser.parse_output(
+            RunOutput(output="Some content</think>result", intermediate_outputs=None)
+        )
+def test_missing_end_tag(parser):
+    with pytest.raises(ValueError, match="Missing thinking tags"):
+        parser.parse_output(
+            RunOutput(output="<think>Some content", intermediate_outputs=None)
+        )
+def test_multiple_start_tags(parser):
+    with pytest.raises(ValueError, match="Multiple thinking tags found"):
+        parser.parse_output(
+            RunOutput(
+                output="<think>content1<think>content2</think>result",
+                intermediate_outputs=None,
+            )
+        )
+def test_multiple_end_tags(parser):
+    with pytest.raises(ValueError, match="Multiple thinking tags found"):
+        parser.parse_output(
+            RunOutput(
+                output="<think>content</think></think>result", intermediate_outputs=None
+            )
+        )
+def test_empty_thinking_content(parser):
+    response = RunOutput(
+        output="<think></think>This is the result", intermediate_outputs=None
+    )
+    parsed = parser.parse_output(response)
+    assert parsed.intermediate_outputs == {"reasoning": ""}
+    assert parsed.output == "This is the result"
+def test_missing_result(parser):
+    with pytest.raises(ValueError, match="No content found after </think> tag"):
+        parser.parse_output(
+            RunOutput(output="<think>Some content</think>", intermediate_outputs=None)
+        )
+def test_multiline_content(parser):
+    response = RunOutput(
+        output="""<think>Line 1
+    Line 2
+    Line 3</think>Final result""",
+        intermediate_outputs=None,
+    )
+    parsed = parser.parse_output(response)
+    assert "Line 1" in parsed.intermediate_outputs["reasoning"]
+    assert "Line 2" in parsed.intermediate_outputs["reasoning"]
+    assert "Line 3" in parsed.intermediate_outputs["reasoning"]
+    assert parsed.output == "Final result"
+def test_special_characters(parser):
+    response = RunOutput(
+        output="<think>Content with: !@#$%^&*思()</think>Result with: !@#$%^&*思()",
+        intermediate_outputs=None,
+    )
+    parsed = parser.parse_output(response)
+    assert parsed.intermediate_outputs["reasoning"] == "Content with: !@#$%^&*思()"
+    assert parsed.output == "Result with: !@#$%^&*思()"
+def test_non_string_input(parser):
+    with pytest.raises(ValueError, match="Response must be a string for R1 parser"):
+        parser.parse_output(RunOutput(output={}, intermediate_outputs=None))
+def test_intermediate_outputs(parser):
+    # append to existing intermediate outputs
+    out = parser.parse_output(
+        RunOutput(
+            output="<think>Some content</think>result",
+            intermediate_outputs={"existing": "data"},
+        )
+    )
+    assert out.intermediate_outputs["reasoning"] == "Some content"
+    assert out.intermediate_outputs["existing"] == "data"
+    # empty dict is allowed
+    out = parser.parse_output(
+        RunOutput(
+            output="<think>Some content</think>result",
+            intermediate_outputs={},
+        )
+    )
+    assert out.intermediate_outputs["reasoning"] == "Some content"
+    # None is allowed
+    out = parser.parse_output(
+        RunOutput(
+            output="<think>Some content</think>result",
+            intermediate_outputs=None,
+        )
+    )
+    assert out.intermediate_outputs["reasoning"] == "Some content"

kiln-ai 0.8.0__py3-none-any.whl → 0.11.1__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.8.0py3-none-any.whl → 0.11.1py3-none-any.whl