PyPI - kiln-ai - Versions diffs - 0.8.1__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

kiln-ai 0.8.1py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (88) hide show

kiln_ai/adapters/__init__.py +7 -7
kiln_ai/adapters/adapter_registry.py +81 -10
kiln_ai/adapters/data_gen/data_gen_task.py +21 -3
kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
kiln_ai/adapters/eval/base_eval.py +164 -0
kiln_ai/adapters/eval/eval_runner.py +267 -0
kiln_ai/adapters/eval/g_eval.py +367 -0
kiln_ai/adapters/eval/registry.py +16 -0
kiln_ai/adapters/eval/test_base_eval.py +324 -0
kiln_ai/adapters/eval/test_eval_runner.py +640 -0
kiln_ai/adapters/eval/test_g_eval.py +497 -0
kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +472 -129
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +114 -22
kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
kiln_ai/adapters/ml_model_list.py +434 -93
kiln_ai/adapters/model_adapters/__init__.py +18 -0
kiln_ai/adapters/model_adapters/base_adapter.py +250 -0
kiln_ai/adapters/model_adapters/langchain_adapters.py +309 -0
kiln_ai/adapters/model_adapters/openai_compatible_config.py +10 -0
kiln_ai/adapters/model_adapters/openai_model_adapter.py +289 -0
kiln_ai/adapters/model_adapters/test_base_adapter.py +199 -0
kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +105 -97
kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +216 -0
kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +80 -30
kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +125 -46
kiln_ai/adapters/ollama_tools.py +0 -1
kiln_ai/adapters/parsers/__init__.py +10 -0
kiln_ai/adapters/parsers/base_parser.py +12 -0
kiln_ai/adapters/parsers/json_parser.py +37 -0
kiln_ai/adapters/parsers/parser_registry.py +19 -0
kiln_ai/adapters/parsers/r1_parser.py +69 -0
kiln_ai/adapters/parsers/test_json_parser.py +81 -0
kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
kiln_ai/adapters/prompt_builders.py +193 -49
kiln_ai/adapters/provider_tools.py +91 -36
kiln_ai/adapters/repair/repair_task.py +18 -19
kiln_ai/adapters/repair/test_repair_task.py +7 -7
kiln_ai/adapters/run_output.py +11 -0
kiln_ai/adapters/test_adapter_registry.py +177 -0
kiln_ai/adapters/test_generate_docs.py +69 -0
kiln_ai/adapters/test_ollama_tools.py +0 -1
kiln_ai/adapters/test_prompt_adaptors.py +25 -18
kiln_ai/adapters/test_prompt_builders.py +265 -44
kiln_ai/adapters/test_provider_tools.py +268 -46
kiln_ai/datamodel/__init__.py +51 -772
kiln_ai/datamodel/basemodel.py +31 -11
kiln_ai/datamodel/datamodel_enums.py +58 -0
kiln_ai/datamodel/dataset_filters.py +114 -0
kiln_ai/datamodel/dataset_split.py +170 -0
kiln_ai/datamodel/eval.py +298 -0
kiln_ai/datamodel/finetune.py +105 -0
kiln_ai/datamodel/json_schema.py +14 -3
kiln_ai/datamodel/model_cache.py +8 -3
kiln_ai/datamodel/project.py +23 -0
kiln_ai/datamodel/prompt.py +37 -0
kiln_ai/datamodel/prompt_id.py +83 -0
kiln_ai/datamodel/strict_mode.py +24 -0
kiln_ai/datamodel/task.py +181 -0
kiln_ai/datamodel/task_output.py +321 -0
kiln_ai/datamodel/task_run.py +164 -0
kiln_ai/datamodel/test_basemodel.py +80 -2
kiln_ai/datamodel/test_dataset_filters.py +71 -0
kiln_ai/datamodel/test_dataset_split.py +127 -6
kiln_ai/datamodel/test_datasource.py +3 -2
kiln_ai/datamodel/test_eval_model.py +635 -0
kiln_ai/datamodel/test_example_models.py +34 -17
kiln_ai/datamodel/test_json_schema.py +23 -0
kiln_ai/datamodel/test_model_cache.py +24 -0
kiln_ai/datamodel/test_model_perf.py +125 -0
kiln_ai/datamodel/test_models.py +131 -2
kiln_ai/datamodel/test_prompt_id.py +129 -0
kiln_ai/datamodel/test_task.py +159 -0
kiln_ai/utils/config.py +6 -1
kiln_ai/utils/exhaustive_error.py +6 -0
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/METADATA +45 -7
kiln_ai-0.12.0.dist-info/RECORD +100 -0
kiln_ai/adapters/base_adapter.py +0 -191
kiln_ai/adapters/langchain_adapters.py +0 -256
kiln_ai-0.8.1.dist-info/RECORD +0 -58
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/fine_tune/test_dataset_formatter.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import logging
 import tempfile
 from pathlib import Path
 from unittest.mock import Mock
@@ -8,49 +9,84 @@ import pytest
 from kiln_ai.adapters.fine_tune.dataset_formatter import (
     DatasetFormat,
     DatasetFormatter,
+    ModelTrainingData,
+    build_training_data,
     generate_chat_message_response,
     generate_chat_message_toolcall,
     generate_huggingface_chat_template,
     generate_huggingface_chat_template_toolcall,
+    generate_vertex_gemini_1_5,
 )
+from kiln_ai.adapters.model_adapters.base_adapter import COT_FINAL_ANSWER_PROMPT
 from kiln_ai.datamodel import (
     DatasetSplit,
     DataSource,
     DataSourceType,
+    FinetuneDataStrategy,
     Task,
     TaskOutput,
     TaskRun,
 )
+logger = logging.getLogger(__name__)
 @pytest.fixture
 def mock_task():
-    task = Mock(spec=Task)
+    task = Mock(spec=Task, thinking_instruction=None)
     task_runs = [
-        TaskRun(
-            id=f"run{i}",
-            input='{"test": "input"}',
-            input_source=DataSource(
-                type=DataSourceType.human, properties={"created_by": "test"}
-            ),
-            output=TaskOutput(
-                output='{"test": "output"}',
-                source=DataSource(
-                    type=DataSourceType.synthetic,
-                    properties={
-                        "model_name": "test",
-                        "model_provider": "test",
-                        "adapter_name": "test",
+        Mock(
+            spec=TaskRun,
+            **{
+                "id": f"run{i}",
+                "input": '{"test": "input 你好"}',
+                "repaired_output": None,
+                "intermediate_outputs": {},
+                "input_source": Mock(
+                    spec=DataSource,
+                    **{
+                        "type": DataSourceType.human,
+                        "properties": {"created_by": "test"},
+                    },
+                ),
+                "output": Mock(
+                    spec=TaskOutput,
+                    **{
+                        "output": '{"test":   "output 你好"}',
+                        "source": Mock(
+                            spec=DataSource,
+                            **{
+                                "type": DataSourceType.synthetic,
+                                "properties": {
+                                    "model_name": "test",
+                                    "model_provider": "test",
+                                    "adapter_name": "test",
+                                },
+                            },
+                        ),
                     },
                 ),
-            ),
+            },
         )
         for i in range(1, 4)
     ]
+    # Set up parent_task reference for each TaskRun
+    for run in task_runs:
+        run.parent_task = Mock(return_value=task)
     task.runs.return_value = task_runs
     return task
+@pytest.fixture
+def mock_intermediate_outputs(mock_task):
+    for run in mock_task.runs():
+        run.intermediate_outputs = {"reasoning": "thinking output"}
+    mock_task.thinking_instruction = "thinking instructions"
+    return mock_task
 @pytest.fixture
 def mock_dataset(mock_task):
     dataset = Mock(spec=DatasetSplit)
@@ -61,26 +97,13 @@ def mock_dataset(mock_task):
 def test_generate_chat_message_response():
-    task_run = TaskRun(
-        id="run1",
+    thinking_data = ModelTrainingData(
         input="test input",
-        input_source=DataSource(
-            type=DataSourceType.human, properties={"created_by": "test"}
-        ),
-        output=TaskOutput(
-            output="test output",
-            source=DataSource(
-                type=DataSourceType.synthetic,
-                properties={
-                    "model_name": "test",
-                    "model_provider": "test",
-                    "adapter_name": "test",
-                },
-            ),
-        ),
+        system_message="system message",
+        final_output="test output",
     )
-    result = generate_chat_message_response(task_run, "system message")
+    result = generate_chat_message_response(thinking_data)
     assert result == {
         "messages": [
@@ -91,32 +114,80 @@ def test_generate_chat_message_response():
     }
+def test_generate_chat_message_response_thinking():
+    thinking_data = ModelTrainingData(
+        input="test input",
+        system_message="system message",
+        final_output="test output",
+        thinking="thinking output",
+        thinking_instructions="thinking instructions",
+        thinking_final_answer_prompt="thinking final answer prompt",
+    )
+    result = generate_chat_message_response(thinking_data)
+    assert result == {
+        "messages": [
+            {"role": "system", "content": "system message"},
+            {"role": "user", "content": "test input"},
+            {"role": "user", "content": "thinking instructions"},
+            {"role": "assistant", "content": "thinking output"},
+            {"role": "user", "content": "thinking final answer prompt"},
+            {"role": "assistant", "content": "test output"},
+        ]
+    }
 def test_generate_chat_message_toolcall():
-    task_run = TaskRun(
-        id="run1",
+    training_data = ModelTrainingData(
+        input="test input 你好",
+        system_message="system message 你好",
+        final_output='{"key": "value 你好"}',
+    )
+    result = generate_chat_message_toolcall(training_data)
+    assert result == {
+        "messages": [
+            {"role": "system", "content": "system message 你好"},
+            {"role": "user", "content": "test input 你好"},
+            {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "task_response",
+                            "arguments": '{"key": "value 你好"}',
+                        },
+                    }
+                ],
+            },
+        ]
+    }
+def test_generate_chat_message_toolcall_thinking():
+    training_data = ModelTrainingData(
         input="test input",
-        input_source=DataSource(
-            type=DataSourceType.human, properties={"created_by": "test"}
-        ),
-        output=TaskOutput(
-            output='{"key": "value"}',
-            source=DataSource(
-                type=DataSourceType.synthetic,
-                properties={
-                    "model_name": "test",
-                    "model_provider": "test",
-                    "adapter_name": "test",
-                },
-            ),
-        ),
+        system_message="system message",
+        final_output='{"key": "value"}',
+        thinking="thinking output",
+        thinking_instructions="thinking instructions",
+        thinking_final_answer_prompt="thinking final answer prompt",
     )
-    result = generate_chat_message_toolcall(task_run, "system message")
+    result = generate_chat_message_toolcall(training_data)
     assert result == {
         "messages": [
             {"role": "system", "content": "system message"},
             {"role": "user", "content": "test input"},
+            {"role": "user", "content": "thinking instructions"},
+            {"role": "assistant", "content": "thinking output"},
+            {"role": "user", "content": "thinking final answer prompt"},
             {
                 "role": "assistant",
                 "content": None,
@@ -136,27 +207,14 @@ def test_generate_chat_message_toolcall():
 def test_generate_chat_message_toolcall_invalid_json():
-    task_run = TaskRun(
-        id="run1",
+    training_data = ModelTrainingData(
         input="test input",
-        input_source=DataSource(
-            type=DataSourceType.human, properties={"created_by": "test"}
-        ),
-        output=TaskOutput(
-            output="invalid json",
-            source=DataSource(
-                type=DataSourceType.synthetic,
-                properties={
-                    "model_name": "test",
-                    "model_provider": "test",
-                    "adapter_name": "test",
-                },
-            ),
-        ),
+        system_message="system message",
+        final_output="invalid json",
     )
     with pytest.raises(ValueError, match="Invalid JSON in for tool call"):
-        generate_chat_message_toolcall(task_run, "system message")
+        generate_chat_message_toolcall(training_data)
 def test_dataset_formatter_init_no_parent_task(mock_dataset):
@@ -170,14 +228,20 @@ def test_dataset_formatter_dump_invalid_format(mock_dataset):
     formatter = DatasetFormatter(mock_dataset, "system message")
     with pytest.raises(ValueError, match="Unsupported format"):
-        formatter.dump_to_file("train", "invalid_format")  # type: ignore
+        formatter.dump_to_file(
+            "train", "invalid_format", FinetuneDataStrategy.final_only
+        )  # type: ignore
 def test_dataset_formatter_dump_invalid_split(mock_dataset):
     formatter = DatasetFormatter(mock_dataset, "system message")
     with pytest.raises(ValueError, match="Split invalid_split not found in dataset"):
-        formatter.dump_to_file("invalid_split", DatasetFormat.OPENAI_CHAT_JSONL)
+        formatter.dump_to_file(
+            "invalid_split",
+            DatasetFormat.OPENAI_CHAT_JSONL,
+            FinetuneDataStrategy.final_only,
+        )
 def test_dataset_formatter_dump_to_file(mock_dataset, tmp_path):
@@ -185,7 +249,10 @@ def test_dataset_formatter_dump_to_file(mock_dataset, tmp_path):
     output_path = tmp_path / "output.jsonl"
     result_path = formatter.dump_to_file(
-        "train", DatasetFormat.OPENAI_CHAT_JSONL, output_path
+        "train",
+        DatasetFormat.OPENAI_CHAT_JSONL,
+        path=output_path,
+        data_strategy=FinetuneDataStrategy.final_only,
     )
     assert result_path == output_path
@@ -200,23 +267,38 @@ def test_dataset_formatter_dump_to_file(mock_dataset, tmp_path):
             assert "messages" in data
             assert len(data["messages"]) == 3
             assert data["messages"][0]["content"] == "system message"
-            assert data["messages"][1]["content"] == '{"test": "input"}'
-            assert data["messages"][2]["content"] == '{"test": "output"}'
+            assert data["messages"][1]["content"] == '{"test": "input 你好"}'
+            # Raw chat doesn't fix json issues, like extra spaces
+            assert data["messages"][2]["content"] == '{"test":   "output 你好"}'
 def test_dataset_formatter_dump_to_temp_file(mock_dataset):
-    formatter = DatasetFormatter(mock_dataset, "system message")
+    formatter = DatasetFormatter(mock_dataset, "system message 你好")
-    result_path = formatter.dump_to_file("train", DatasetFormat.OPENAI_CHAT_JSONL)
+    result_path = formatter.dump_to_file(
+        "train",
+        DatasetFormat.OPENAI_CHAT_JSONL,
+        data_strategy=FinetuneDataStrategy.final_only,
+    )
     assert result_path.exists()
     assert result_path.parent == Path(tempfile.gettempdir())
-    assert result_path.name.startswith("test_dataset_train_")
+    # Test our nice naming
+    assert result_path.name.startswith(
+        "test_dataset -- split-train -- format-openai_chat_jsonl -- no-cot.jsonl"
+    )
     assert result_path.name.endswith(".jsonl")
     # Verify file contents
     with open(result_path) as f:
         lines = f.readlines()
         assert len(lines) == 2
+        # check non-ascii characters are not escaped
+        assert "你好" in lines[0]
+        assert "你好" in lines[1]
+        # confirm didn't use COT for final_only
+        assert "thinking output" not in lines[0]
+        assert "thinking instructions" not in lines[0]
 def test_dataset_formatter_dump_to_file_tool_format(mock_dataset, tmp_path):
@@ -224,7 +306,10 @@ def test_dataset_formatter_dump_to_file_tool_format(mock_dataset, tmp_path):
     output_path = tmp_path / "output.jsonl"
     result_path = formatter.dump_to_file(
-        "train", DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL, output_path
+        "train",
+        DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL,
+        path=output_path,
+        data_strategy=FinetuneDataStrategy.final_only,
     )
     assert result_path == output_path
@@ -240,7 +325,7 @@ def test_dataset_formatter_dump_to_file_tool_format(mock_dataset, tmp_path):
             assert len(data["messages"]) == 3
             # Check system and user messages
             assert data["messages"][0]["content"] == "system message"
-            assert data["messages"][1]["content"] == '{"test": "input"}'
+            assert data["messages"][1]["content"] == '{"test": "input 你好"}'
             # Check tool call format
             assistant_msg = data["messages"][2]
             assert assistant_msg["content"] is None
@@ -249,61 +334,178 @@ def test_dataset_formatter_dump_to_file_tool_format(mock_dataset, tmp_path):
             tool_call = assistant_msg["tool_calls"][0]
             assert tool_call["type"] == "function"
             assert tool_call["function"]["name"] == "task_response"
-            assert tool_call["function"]["arguments"] == '{"test": "output"}'
+            assert tool_call["function"]["arguments"] == '{"test": "output 你好"}'
+def test_dataset_formatter_dump_with_intermediate_data(
+    mock_dataset, mock_intermediate_outputs
+):
+    formatter = DatasetFormatter(
+        mock_dataset,
+        "system message 你好",
+        thinking_instructions="thinking instructions",
+    )
+    result_path = formatter.dump_to_file(
+        "train",
+        DatasetFormat.OPENAI_CHAT_JSONL,
+        data_strategy=FinetuneDataStrategy.final_and_intermediate,
+    )
+    assert result_path.exists()
+    assert result_path.parent == Path(tempfile.gettempdir())
+    # Test our nice naming, with cot
+    assert (
+        result_path.name
+        == "test_dataset -- split-train -- format-openai_chat_jsonl -- cot.jsonl"
+    )
+    # Verify file contents
+    with open(result_path) as f:
+        lines = f.readlines()
+        assert len(lines) == 2
+        for line in lines:
+            assert "thinking output" in line
+            assert "thinking instructions" in line
+def test_dataset_formatter_dump_with_intermediate_data_custom_instructions(
+    mock_dataset, mock_intermediate_outputs
+):
+    formatter = DatasetFormatter(
+        mock_dataset, "custom system message 你好", "custom thinking instructions"
+    )
+    result_path = formatter.dump_to_file(
+        "train",
+        DatasetFormat.OPENAI_CHAT_JSONL,
+        data_strategy=FinetuneDataStrategy.final_and_intermediate,
+    )
+    assert result_path.exists()
+    assert result_path.parent == Path(tempfile.gettempdir())
+    # Test our nice naming, with cot
+    assert (
+        result_path.name
+        == "test_dataset -- split-train -- format-openai_chat_jsonl -- cot.jsonl"
+    )
+    # Verify file contents
+    with open(result_path) as f:
+        lines = f.readlines()
+        assert len(lines) == 2
+        for line in lines:
+            assert "custom system message 你好" in line
+            assert "custom thinking instructions" in line
+            assert "thinking output" in line
 def test_generate_huggingface_chat_template():
-    task_run = TaskRun(
-        id="run1",
+    training_data = ModelTrainingData(
         input="test input",
-        input_source=DataSource(
-            type=DataSourceType.human, properties={"created_by": "test"}
-        ),
-        output=TaskOutput(
-            output="test output",
-            source=DataSource(
-                type=DataSourceType.synthetic,
-                properties={
-                    "model_name": "test",
-                    "model_provider": "test",
-                    "adapter_name": "test",
-                },
-            ),
-        ),
+        system_message="system message",
+        final_output="test output",
+    )
+    result = generate_huggingface_chat_template(training_data)
+    assert result == {
+        "conversations": [
+            {"role": "system", "content": "system message"},
+            {"role": "user", "content": "test input"},
+            {"role": "assistant", "content": "test output"},
+        ]
+    }
+def test_generate_huggingface_chat_template_thinking():
+    training_data = ModelTrainingData(
+        input="test input",
+        system_message="system message",
+        final_output="test output",
+        thinking="thinking output",
+        thinking_instructions="thinking instructions",
+        thinking_final_answer_prompt="thinking final answer prompt",
     )
-    result = generate_huggingface_chat_template(task_run, "system message")
+    result = generate_huggingface_chat_template(training_data)
     assert result == {
         "conversations": [
             {"role": "system", "content": "system message"},
             {"role": "user", "content": "test input"},
+            {"role": "user", "content": "thinking instructions"},
+            {"role": "assistant", "content": "thinking output"},
+            {"role": "user", "content": "thinking final answer prompt"},
             {"role": "assistant", "content": "test output"},
         ]
     }
+def test_generate_vertex_template():
+    training_data = ModelTrainingData(
+        input="test input",
+        system_message="system message",
+        final_output="test output",
+    )
+    result = generate_vertex_gemini_1_5(training_data)
+    assert result == {
+        "systemInstruction": {
+            "role": "system",
+            "parts": [
+                {
+                    "text": "system message",
+                }
+            ],
+        },
+        "contents": [
+            {"role": "user", "parts": [{"text": "test input"}]},
+            {"role": "model", "parts": [{"text": "test output"}]},
+        ],
+    }
+def test_generate_vertex_template_thinking():
+    training_data = ModelTrainingData(
+        input="test input",
+        system_message="system message",
+        final_output="test output",
+        thinking="thinking output",
+        thinking_instructions="thinking instructions",
+        thinking_final_answer_prompt="thinking final answer prompt",
+    )
+    result = generate_vertex_gemini_1_5(training_data)
+    logger.info(result)
+    assert result == {
+        "systemInstruction": {
+            "role": "system",
+            "parts": [
+                {
+                    "text": "system message",
+                }
+            ],
+        },
+        "contents": [
+            {"role": "user", "parts": [{"text": "test input"}]},
+            {"role": "user", "parts": [{"text": "thinking instructions"}]},
+            {"role": "model", "parts": [{"text": "thinking output"}]},
+            {"role": "user", "parts": [{"text": "thinking final answer prompt"}]},
+            {"role": "model", "parts": [{"text": "test output"}]},
+        ],
+    }
 def test_generate_huggingface_chat_template_toolcall():
-    task_run = TaskRun(
-        id="run1",
+    training_data = ModelTrainingData(
         input="test input",
-        input_source=DataSource(
-            type=DataSourceType.human, properties={"created_by": "test"}
-        ),
-        output=TaskOutput(
-            output='{"key": "value"}',
-            source=DataSource(
-                type=DataSourceType.synthetic,
-                properties={
-                    "model_name": "test",
-                    "model_provider": "test",
-                    "adapter_name": "test",
-                },
-            ),
-        ),
+        system_message="system message",
+        final_output='{"key": "value"}',
     )
-    result = generate_huggingface_chat_template_toolcall(task_run, "system message")
+    result = generate_huggingface_chat_template_toolcall(training_data)
     assert result["conversations"][0] == {"role": "system", "content": "system message"}
     assert result["conversations"][1] == {"role": "user", "content": "test input"}
@@ -318,25 +520,166 @@ def test_generate_huggingface_chat_template_toolcall():
     assert tool_call["function"]["arguments"] == {"key": "value"}
+def test_generate_huggingface_chat_template_toolcall_thinking():
+    training_data = ModelTrainingData(
+        input="test input",
+        system_message="system message",
+        final_output='{"key": "value"}',
+        thinking="thinking output",
+        thinking_instructions="thinking instructions",
+        thinking_final_answer_prompt="thinking final answer prompt",
+    )
+    result = generate_huggingface_chat_template_toolcall(training_data)
+    assert result["conversations"][0] == {"role": "system", "content": "system message"}
+    assert result["conversations"][1] == {"role": "user", "content": "test input"}
+    assert result["conversations"][2] == {
+        "role": "user",
+        "content": "thinking instructions",
+    }
+    assert result["conversations"][3] == {
+        "role": "assistant",
+        "content": "thinking output",
+    }
+    assert result["conversations"][4] == {
+        "role": "user",
+        "content": "thinking final answer prompt",
+    }
+    assistant_msg = result["conversations"][5]
+    assert assistant_msg["role"] == "assistant"
+    assert len(assistant_msg["tool_calls"]) == 1
+    tool_call = assistant_msg["tool_calls"][0]
+    assert tool_call["type"] == "function"
+    assert tool_call["function"]["name"] == "task_response"
+    assert len(tool_call["function"]["id"]) == 9  # UUID is truncated to 9 chars
+    assert tool_call["function"]["id"].isalnum()  # Check ID is alphanumeric
+    assert tool_call["function"]["arguments"] == {"key": "value"}
 def test_generate_huggingface_chat_template_toolcall_invalid_json():
-    task_run = TaskRun(
-        id="run1",
+    training_data = ModelTrainingData(
         input="test input",
-        input_source=DataSource(
-            type=DataSourceType.human, properties={"created_by": "test"}
-        ),
-        output=TaskOutput(
-            output="invalid json",
-            source=DataSource(
-                type=DataSourceType.synthetic,
-                properties={
-                    "model_name": "test",
-                    "model_provider": "test",
-                    "adapter_name": "test",
-                },
-            ),
-        ),
+        system_message="system message",
+        final_output="invalid json",
     )
     with pytest.raises(ValueError, match="Invalid JSON in for tool call"):
-        generate_huggingface_chat_template_toolcall(task_run, "system message")
+        generate_huggingface_chat_template_toolcall(training_data)
+def test_build_training_data(mock_task):
+    # Non repaired should use original output
+    mock_task_run = mock_task.runs()[0]
+    training_data_output = build_training_data(mock_task_run, "system message", False)
+    assert training_data_output.final_output == '{"test":   "output 你好"}'
+    assert training_data_output.thinking is None
+    assert training_data_output.thinking_instructions is None
+    assert training_data_output.thinking_final_answer_prompt is None
+    assert training_data_output.input == '{"test": "input 你好"}'
+    assert training_data_output.system_message == "system message"
+    assert not training_data_output.supports_cot()
+def test_build_training_data_with_COT(mock_task):
+    # Setup with needed fields for thinking
+    mock_task_run = mock_task.runs()[0]
+    assert mock_task_run.parent_task() == mock_task
+    mock_task_run.intermediate_outputs = {"chain_of_thought": "cot output"}
+    training_data_output = build_training_data(
+        mock_task_run,
+        "system message",
+        True,
+        thinking_instructions="thinking instructions",
+    )
+    assert training_data_output.final_output == '{"test":   "output 你好"}'
+    assert training_data_output.thinking == "cot output"
+    assert training_data_output.thinking_instructions == "thinking instructions"
+    assert training_data_output.thinking_final_answer_prompt == COT_FINAL_ANSWER_PROMPT
+    assert training_data_output.input == '{"test": "input 你好"}'
+    assert training_data_output.system_message == "system message"
+    assert training_data_output.supports_cot()
+def test_build_training_data_with_thinking(mock_task):
+    # Setup with needed fields for thinking
+    mock_task_run = mock_task.runs()[0]
+    assert mock_task_run.parent_task() == mock_task
+    # It should just use the reasoning output if both thinking and chain_of_thought are present
+    mock_task_run.intermediate_outputs = {
+        "reasoning": "thinking output",
+        "chain_of_thought": "cot output",
+    }
+    mock_task.thinking_instruction = "thinking instructions"
+    assert mock_task.thinking_instruction == "thinking instructions"
+    training_data_output = build_training_data(
+        mock_task_run,
+        "system message",
+        True,
+        thinking_instructions="thinking instructions",
+    )
+    assert training_data_output.final_output == '{"test":   "output 你好"}'
+    assert training_data_output.thinking == "thinking output"
+    assert training_data_output.thinking_instructions == "thinking instructions"
+    assert training_data_output.thinking_final_answer_prompt == COT_FINAL_ANSWER_PROMPT
+    assert training_data_output.input == '{"test": "input 你好"}'
+    assert training_data_output.system_message == "system message"
+    assert training_data_output.supports_cot()
+def test_build_training_data_with_repaired_output(mock_task):
+    # use repaired output if available
+    mock_task_run = mock_task.runs()[0]
+    mock_task_run.repair_instructions = "repair instructions"
+    mock_task_run.repaired_output = TaskOutput(
+        output='{"test": "repaired output"}',
+        source=DataSource(
+            type=DataSourceType.human,
+            properties={"created_by": "test-user"},
+        ),
+    )
+    training_data_output = build_training_data(mock_task_run, "system message", False)
+    assert training_data_output.final_output == '{"test": "repaired output"}'
+    assert training_data_output.thinking is None
+    assert training_data_output.thinking_instructions is None
+    assert training_data_output.thinking_final_answer_prompt is None
+    assert training_data_output.input == '{"test": "input 你好"}'
+    assert training_data_output.system_message == "system message"
+def test_dataset_formatter_dump_to_file_json_schema_format(mock_dataset, tmp_path):
+    formatter = DatasetFormatter(mock_dataset, "system message")
+    output_path = tmp_path / "output.jsonl"
+    result_path = formatter.dump_to_file(
+        "train",
+        DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
+        path=output_path,
+        data_strategy=FinetuneDataStrategy.final_only,
+    )
+    assert result_path == output_path
+    assert output_path.exists()
+    # Verify file contents
+    with open(output_path) as f:
+        lines = f.readlines()
+        assert len(lines) == 2  # Should have 2 entries for train split
+        for line in lines:
+            data = json.loads(line)
+            assert "messages" in data
+            assert len(data["messages"]) == 3
+            # Check system and user messages
+            assert data["messages"][0]["content"] == "system message"
+            assert data["messages"][1]["content"] == '{"test": "input 你好"}'
+            # Check JSON format
+            assistant_msg = data["messages"][2]
+            assert assistant_msg["role"] == "assistant"
+            # Verify the content is valid JSON
+            assert assistant_msg["content"] == '{"test": "output 你好"}'
+            json_content = json.loads(assistant_msg["content"])
+            assert json_content == {"test": "output 你好"}

kiln-ai 0.8.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.8.1py3-none-any.whl → 0.12.0py3-none-any.whl