PyPI - kiln-ai - Versions diffs - 0.15.0__py3-none-any.whl → 0.17.0__py3-none-any.whl - Mend

kiln-ai 0.15.0py3-none-any.whl → 0.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (72) hide show

kiln_ai/adapters/__init__.py +2 -0
kiln_ai/adapters/adapter_registry.py +22 -44
kiln_ai/adapters/chat/__init__.py +8 -0
kiln_ai/adapters/chat/chat_formatter.py +234 -0
kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
kiln_ai/adapters/eval/base_eval.py +8 -6
kiln_ai/adapters/eval/eval_runner.py +9 -65
kiln_ai/adapters/eval/g_eval.py +26 -8
kiln_ai/adapters/eval/test_base_eval.py +166 -15
kiln_ai/adapters/eval/test_eval_runner.py +3 -0
kiln_ai/adapters/eval/test_g_eval.py +1 -0
kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
kiln_ai/adapters/fine_tune/dataset_formatter.py +153 -197
kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +402 -211
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
kiln_ai/adapters/ml_model_list.py +556 -45
kiln_ai/adapters/model_adapters/base_adapter.py +100 -35
kiln_ai/adapters/model_adapters/litellm_adapter.py +116 -100
kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
kiln_ai/adapters/model_adapters/test_base_adapter.py +299 -52
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +121 -22
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +44 -2
kiln_ai/adapters/model_adapters/test_structured_output.py +48 -18
kiln_ai/adapters/parsers/base_parser.py +0 -3
kiln_ai/adapters/parsers/parser_registry.py +5 -3
kiln_ai/adapters/parsers/r1_parser.py +17 -2
kiln_ai/adapters/parsers/request_formatters.py +40 -0
kiln_ai/adapters/parsers/test_parser_registry.py +2 -2
kiln_ai/adapters/parsers/test_r1_parser.py +44 -1
kiln_ai/adapters/parsers/test_request_formatters.py +76 -0
kiln_ai/adapters/prompt_builders.py +14 -17
kiln_ai/adapters/provider_tools.py +39 -4
kiln_ai/adapters/repair/test_repair_task.py +27 -5
kiln_ai/adapters/test_adapter_registry.py +88 -28
kiln_ai/adapters/test_ml_model_list.py +158 -0
kiln_ai/adapters/test_prompt_adaptors.py +17 -3
kiln_ai/adapters/test_prompt_builders.py +27 -19
kiln_ai/adapters/test_provider_tools.py +130 -12
kiln_ai/datamodel/__init__.py +2 -2
kiln_ai/datamodel/datamodel_enums.py +43 -4
kiln_ai/datamodel/dataset_filters.py +69 -1
kiln_ai/datamodel/dataset_split.py +4 -0
kiln_ai/datamodel/eval.py +8 -0
kiln_ai/datamodel/finetune.py +13 -7
kiln_ai/datamodel/prompt_id.py +1 -0
kiln_ai/datamodel/task.py +68 -7
kiln_ai/datamodel/task_output.py +1 -1
kiln_ai/datamodel/task_run.py +39 -7
kiln_ai/datamodel/test_basemodel.py +5 -8
kiln_ai/datamodel/test_dataset_filters.py +82 -0
kiln_ai/datamodel/test_dataset_split.py +2 -8
kiln_ai/datamodel/test_example_models.py +54 -0
kiln_ai/datamodel/test_models.py +80 -9
kiln_ai/datamodel/test_task.py +168 -2
kiln_ai/utils/async_job_runner.py +106 -0
kiln_ai/utils/config.py +3 -2
kiln_ai/utils/dataset_import.py +81 -19
kiln_ai/utils/logging.py +165 -0
kiln_ai/utils/test_async_job_runner.py +199 -0
kiln_ai/utils/test_config.py +23 -0
kiln_ai/utils/test_dataset_import.py +272 -10
{kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
kiln_ai-0.17.0.dist-info/RECORD +113 -0
kiln_ai-0.15.0.dist-info/RECORD +0 -104
{kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/fine_tune/test_dataset_formatter.py CHANGED Viewed

@@ -1,32 +1,34 @@
 import json
 import logging
+import re
 import tempfile
 from pathlib import Path
 from unittest.mock import Mock
 import pytest
+from kiln_ai.adapters.chat.chat_formatter import COT_FINAL_ANSWER_PROMPT, ChatMessage
 from kiln_ai.adapters.fine_tune.dataset_formatter import (
+    VERTEX_GEMINI_ROLE_MAP,
     DatasetFormat,
     DatasetFormatter,
-    ModelTrainingData,
-    build_training_data,
+    build_training_chat,
     generate_chat_message_response,
     generate_chat_message_toolcall,
     generate_huggingface_chat_template,
     generate_huggingface_chat_template_toolcall,
     generate_vertex_gemini,
+    serialize_r1_style_message,
 )
-from kiln_ai.adapters.model_adapters.base_adapter import COT_FINAL_ANSWER_PROMPT
 from kiln_ai.datamodel import (
     DatasetSplit,
     DataSource,
     DataSourceType,
-    FinetuneDataStrategy,
     Task,
     TaskOutput,
     TaskRun,
 )
+from kiln_ai.datamodel.datamodel_enums import ChatStrategy
 logger = logging.getLogger(__name__)
@@ -42,6 +44,7 @@ def mock_task():
                 "input": '{"test": "input 你好"}',
                 "repaired_output": None,
                 "intermediate_outputs": {},
+                "thinking_training_data": Mock(return_value=None),
                 "input_source": Mock(
                     spec=DataSource,
                     **{
@@ -83,6 +86,7 @@ def mock_task():
 def mock_intermediate_outputs(mock_task):
     for run in mock_task.runs():
         run.intermediate_outputs = {"reasoning": "thinking output"}
+        run.thinking_training_data.return_value = "thinking output"
     mock_task.thinking_instruction = "thinking instructions"
     return mock_task
@@ -96,41 +100,56 @@ def mock_dataset(mock_task):
     return dataset
-def test_generate_chat_message_response():
-    thinking_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output="test output",
-    )
+@pytest.fixture
+def mock_training_chat_short():
+    return [
+        ChatMessage(role="system", content="system message"),
+        ChatMessage(
+            role="user",
+            content="test input",
+        ),
+        ChatMessage(role="assistant", content="test output"),
+    ]
-    result = generate_chat_message_response(thinking_data)
-    assert result == {
-        "messages": [
-            {"role": "system", "content": "system message"},
-            {"role": "user", "content": "test input"},
-            {"role": "assistant", "content": "test output"},
-        ]
-    }
+@pytest.fixture
+def mock_training_chat_two_step_plaintext():
+    return [
+        ChatMessage(role="system", content="system message"),
+        ChatMessage(
+            role="user",
+            content="The input is:\n<user_input>\ntest input\n</user_input>\n\nthinking instructions",
+        ),
+        ChatMessage(role="assistant", content="thinking output"),
+        ChatMessage(role="user", content="thinking final answer prompt"),
+        ChatMessage(role="assistant", content="test output"),
+    ]
-def test_generate_chat_message_response_thinking():
-    thinking_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output="test output",
-        thinking="thinking output",
-        thinking_instructions="thinking instructions",
-        thinking_final_answer_prompt="thinking final answer prompt",
-    )
+@pytest.fixture
+def mock_training_chat_two_step_json():
+    return [
+        ChatMessage(role="system", content="system message"),
+        ChatMessage(
+            role="user",
+            content="The input is:\n<user_input>\ntest input\n</user_input>\n\nthinking instructions",
+        ),
+        ChatMessage(role="assistant", content="thinking output"),
+        ChatMessage(role="user", content="thinking final answer prompt"),
+        ChatMessage(role="assistant", content='{"a":"你好"}'),
+    ]
-    result = generate_chat_message_response(thinking_data)
+def test_generate_chat_message_response(mock_training_chat_two_step_plaintext):
+    result = generate_chat_message_response(mock_training_chat_two_step_plaintext)
     assert result == {
         "messages": [
             {"role": "system", "content": "system message"},
-            {"role": "user", "content": "test input"},
-            {"role": "user", "content": "thinking instructions"},
+            {
+                "role": "user",
+                "content": "The input is:\n<user_input>\ntest input\n</user_input>\n\nthinking instructions",
+            },
             {"role": "assistant", "content": "thinking output"},
             {"role": "user", "content": "thinking final answer prompt"},
             {"role": "assistant", "content": "test output"},
@@ -138,54 +157,33 @@ def test_generate_chat_message_response_thinking():
     }
-def test_generate_chat_message_toolcall():
-    training_data = ModelTrainingData(
-        input="test input 你好",
-        system_message="system message 你好",
-        final_output='{"key": "value 你好"}',
-    )
-    result = generate_chat_message_toolcall(training_data)
+def test_generate_chat_message_response_json(mock_training_chat_two_step_json):
+    result = generate_chat_message_response(mock_training_chat_two_step_json)
     assert result == {
         "messages": [
-            {"role": "system", "content": "system message 你好"},
-            {"role": "user", "content": "test input 你好"},
+            {"role": "system", "content": "system message"},
             {
-                "role": "assistant",
-                "content": None,
-                "tool_calls": [
-                    {
-                        "id": "call_1",
-                        "type": "function",
-                        "function": {
-                            "name": "task_response",
-                            "arguments": '{"key": "value 你好"}',
-                        },
-                    }
-                ],
+                "role": "user",
+                "content": "The input is:\n<user_input>\ntest input\n</user_input>\n\nthinking instructions",
             },
+            {"role": "assistant", "content": "thinking output"},
+            {"role": "user", "content": "thinking final answer prompt"},
+            {"role": "assistant", "content": '{"a":"你好"}'},
         ]
     }
-def test_generate_chat_message_toolcall_thinking():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output='{"key": "value"}',
-        thinking="thinking output",
-        thinking_instructions="thinking instructions",
-        thinking_final_answer_prompt="thinking final answer prompt",
-    )
-    result = generate_chat_message_toolcall(training_data)
+def test_generate_chat_message_toolcall(mock_training_chat_two_step_json):
+    result = generate_chat_message_toolcall(mock_training_chat_two_step_json)
     assert result == {
         "messages": [
             {"role": "system", "content": "system message"},
-            {"role": "user", "content": "test input"},
-            {"role": "user", "content": "thinking instructions"},
+            {
+                "role": "user",
+                "content": "The input is:\n<user_input>\ntest input\n</user_input>\n\nthinking instructions",
+            },
             {"role": "assistant", "content": "thinking output"},
             {"role": "user", "content": "thinking final answer prompt"},
             {
@@ -197,7 +195,7 @@ def test_generate_chat_message_toolcall_thinking():
                         "type": "function",
                         "function": {
                             "name": "task_response",
-                            "arguments": '{"key": "value"}',
+                            "arguments": '{"a": "你好"}',
                         },
                     }
                 ],
@@ -206,31 +204,17 @@ def test_generate_chat_message_toolcall_thinking():
     }
-def test_generate_chat_message_toolcall_invalid_json():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output="invalid json",
-    )
-    with pytest.raises(ValueError, match="Invalid JSON in for tool call"):
-        generate_chat_message_toolcall(training_data)
-def test_dataset_formatter_init_no_parent_task(mock_dataset):
-    mock_dataset.parent_task.return_value = None
-    with pytest.raises(ValueError, match="Dataset has no parent task"):
-        DatasetFormatter(mock_dataset, "system message")
+def test_generate_chat_message_toolcall_invalid_json(mock_training_chat_two_step_json):
+    mock_training_chat_two_step_json[-1].content = "invalid json"
+    with pytest.raises(ValueError, match="^Last message is not JSON"):
+        generate_chat_message_toolcall(mock_training_chat_two_step_json)
 def test_dataset_formatter_dump_invalid_format(mock_dataset):
     formatter = DatasetFormatter(mock_dataset, "system message")
     with pytest.raises(ValueError, match="Unsupported format"):
-        formatter.dump_to_file(
-            "train", "invalid_format", FinetuneDataStrategy.final_only
-        )  # type: ignore
+        formatter.dump_to_file("train", "invalid_format", ChatStrategy.single_turn)
 def test_dataset_formatter_dump_invalid_split(mock_dataset):
@@ -240,7 +224,7 @@ def test_dataset_formatter_dump_invalid_split(mock_dataset):
         formatter.dump_to_file(
             "invalid_split",
             DatasetFormat.OPENAI_CHAT_JSONL,
-            FinetuneDataStrategy.final_only,
+            ChatStrategy.single_turn,
         )
@@ -252,7 +236,7 @@ def test_dataset_formatter_dump_to_file(mock_dataset, tmp_path):
         "train",
         DatasetFormat.OPENAI_CHAT_JSONL,
         path=output_path,
-        data_strategy=FinetuneDataStrategy.final_only,
+        data_strategy=ChatStrategy.single_turn,
     )
     assert result_path == output_path
@@ -278,7 +262,7 @@ def test_dataset_formatter_dump_to_temp_file(mock_dataset):
     result_path = formatter.dump_to_file(
         "train",
         DatasetFormat.OPENAI_CHAT_JSONL,
-        data_strategy=FinetuneDataStrategy.final_only,
+        data_strategy=ChatStrategy.single_turn,
     )
     assert result_path.exists()
@@ -309,7 +293,7 @@ def test_dataset_formatter_dump_to_file_tool_format(mock_dataset, tmp_path):
         "train",
         DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL,
         path=output_path,
-        data_strategy=FinetuneDataStrategy.final_only,
+        data_strategy=ChatStrategy.single_turn,
     )
     assert result_path == output_path
@@ -349,7 +333,7 @@ def test_dataset_formatter_dump_with_intermediate_data(
     result_path = formatter.dump_to_file(
         "train",
         DatasetFormat.OPENAI_CHAT_JSONL,
-        data_strategy=FinetuneDataStrategy.final_and_intermediate,
+        data_strategy=ChatStrategy.two_message_cot_legacy,
     )
     assert result_path.exists()
@@ -368,17 +352,19 @@ def test_dataset_formatter_dump_with_intermediate_data(
             assert "thinking instructions" in line
-def test_dataset_formatter_dump_with_intermediate_data_custom_instructions(
+def test_dataset_formatter_dump_with_intermediate_data_r1_style(
     mock_dataset, mock_intermediate_outputs
 ):
     formatter = DatasetFormatter(
-        mock_dataset, "custom system message 你好", "custom thinking instructions"
+        mock_dataset,
+        "system message 你好",
+        thinking_instructions=None,
     )
     result_path = formatter.dump_to_file(
         "train",
         DatasetFormat.OPENAI_CHAT_JSONL,
-        data_strategy=FinetuneDataStrategy.final_and_intermediate,
+        data_strategy=ChatStrategy.single_turn_r1_thinking,
     )
     assert result_path.exists()
@@ -393,46 +379,50 @@ def test_dataset_formatter_dump_with_intermediate_data_custom_instructions(
         lines = f.readlines()
         assert len(lines) == 2
         for line in lines:
-            assert "custom system message 你好" in line
-            assert "custom thinking instructions" in line
-            assert "thinking output" in line
+            assert "<think>" in line
+            assert "</think>" in line
-def test_generate_huggingface_chat_template():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output="test output",
+def test_dataset_formatter_dump_with_intermediate_data_custom_instructions(
+    mock_dataset, mock_intermediate_outputs
+):
+    formatter = DatasetFormatter(
+        mock_dataset, "custom system message 你好", "custom thinking instructions"
     )
-    result = generate_huggingface_chat_template(training_data)
-    assert result == {
-        "conversations": [
-            {"role": "system", "content": "system message"},
-            {"role": "user", "content": "test input"},
-            {"role": "assistant", "content": "test output"},
-        ]
-    }
+    result_path = formatter.dump_to_file(
+        "train",
+        DatasetFormat.OPENAI_CHAT_JSONL,
+        data_strategy=ChatStrategy.two_message_cot_legacy,
+    )
-def test_generate_huggingface_chat_template_thinking():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output="test output",
-        thinking="thinking output",
-        thinking_instructions="thinking instructions",
-        thinking_final_answer_prompt="thinking final answer prompt",
+    assert result_path.exists()
+    assert result_path.parent == Path(tempfile.gettempdir())
+    # Test our nice naming, with cot
+    assert (
+        result_path.name
+        == "test_dataset -- split-train -- format-openai_chat_jsonl -- cot.jsonl"
     )
+    # Verify file contents
+    with open(result_path) as f:
+        lines = f.readlines()
+        assert len(lines) == 2
+        for line in lines:
+            assert "custom system message 你好" in line
+            assert "custom thinking instructions" in line
+            assert "thinking output" in line
-    result = generate_huggingface_chat_template(training_data)
+def test_generate_huggingface_chat_template(mock_training_chat_two_step_plaintext):
+    result = generate_huggingface_chat_template(mock_training_chat_two_step_plaintext)
     assert result == {
         "conversations": [
             {"role": "system", "content": "system message"},
-            {"role": "user", "content": "test input"},
-            {"role": "user", "content": "thinking instructions"},
+            {
+                "role": "user",
+                "content": "The input is:\n<user_input>\ntest input\n</user_input>\n\nthinking instructions",
+            },
             {"role": "assistant", "content": "thinking output"},
             {"role": "user", "content": "thinking final answer prompt"},
             {"role": "assistant", "content": "test output"},
@@ -440,14 +430,8 @@ def test_generate_huggingface_chat_template_thinking():
     }
-def test_generate_vertex_template():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output="test output",
-    )
-    result = generate_vertex_gemini(training_data)
+def test_generate_vertex_template(mock_training_chat_short):
+    result = generate_vertex_gemini(mock_training_chat_short)
     assert result == {
         "systemInstruction": {
@@ -465,19 +449,8 @@ def test_generate_vertex_template():
     }
-def test_generate_vertex_template_thinking():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output="test output",
-        thinking="thinking output",
-        thinking_instructions="thinking instructions",
-        thinking_final_answer_prompt="thinking final answer prompt",
-    )
-    result = generate_vertex_gemini(training_data)
-    logger.info(result)
+def test_generate_vertex_template_thinking(mock_training_chat_two_step_plaintext):
+    result = generate_vertex_gemini(mock_training_chat_two_step_plaintext)
     assert result == {
         "systemInstruction": {
@@ -489,8 +462,14 @@ def test_generate_vertex_template_thinking():
             ],
         },
         "contents": [
-            {"role": "user", "parts": [{"text": "test input"}]},
-            {"role": "user", "parts": [{"text": "thinking instructions"}]},
+            {
+                "role": "user",
+                "parts": [
+                    {
+                        "text": "The input is:\n<user_input>\ntest input\n</user_input>\n\nthinking instructions",
+                    }
+                ],
+            },
             {"role": "model", "parts": [{"text": "thinking output"}]},
             {"role": "user", "parts": [{"text": "thinking final answer prompt"}]},
             {"role": "model", "parts": [{"text": "test output"}]},
@@ -499,13 +478,13 @@ def test_generate_vertex_template_thinking():
 def test_generate_huggingface_chat_template_toolcall():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output='{"key": "value"}',
-    )
+    messages = [
+        ChatMessage("system", "system message"),
+        ChatMessage("user", "test input"),
+        ChatMessage("assistant", '{"key":"value"}'),
+    ]
-    result = generate_huggingface_chat_template_toolcall(training_data)
+    result = generate_huggingface_chat_template_toolcall(messages)
     assert result["conversations"][0] == {"role": "system", "content": "system message"}
     assert result["conversations"][1] == {"role": "user", "content": "test input"}
@@ -520,34 +499,28 @@ def test_generate_huggingface_chat_template_toolcall():
     assert tool_call["function"]["arguments"] == {"key": "value"}
-def test_generate_huggingface_chat_template_toolcall_thinking():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output='{"key": "value"}',
-        thinking="thinking output",
-        thinking_instructions="thinking instructions",
-        thinking_final_answer_prompt="thinking final answer prompt",
+def test_generate_huggingface_chat_template_toolcall_thinking(
+    mock_training_chat_two_step_json,
+):
+    result = generate_huggingface_chat_template_toolcall(
+        mock_training_chat_two_step_json
     )
-    result = generate_huggingface_chat_template_toolcall(training_data)
     assert result["conversations"][0] == {"role": "system", "content": "system message"}
-    assert result["conversations"][1] == {"role": "user", "content": "test input"}
-    assert result["conversations"][2] == {
+    assert result["conversations"][1] == {
         "role": "user",
-        "content": "thinking instructions",
+        "content": "The input is:\n<user_input>\ntest input\n</user_input>\n\nthinking instructions",
     }
-    assert result["conversations"][3] == {
+    assert result["conversations"][2] == {
         "role": "assistant",
         "content": "thinking output",
     }
-    assert result["conversations"][4] == {
+    assert result["conversations"][3] == {
         "role": "user",
         "content": "thinking final answer prompt",
     }
-    assistant_msg = result["conversations"][5]
+    assistant_msg = result["conversations"][4]
     assert assistant_msg["role"] == "assistant"
     assert len(assistant_msg["tool_calls"]) == 1
     tool_call = assistant_msg["tool_calls"][0]
@@ -555,31 +528,39 @@ def test_generate_huggingface_chat_template_toolcall_thinking():
     assert tool_call["function"]["name"] == "task_response"
     assert len(tool_call["function"]["id"]) == 9  # UUID is truncated to 9 chars
     assert tool_call["function"]["id"].isalnum()  # Check ID is alphanumeric
-    assert tool_call["function"]["arguments"] == {"key": "value"}
+    assert tool_call["function"]["arguments"] == {"a": "你好"}
-def test_generate_huggingface_chat_template_toolcall_invalid_json():
-    training_data = ModelTrainingData(
-        input="test input",
-        system_message="system message",
-        final_output="invalid json",
-    )
+def test_generate_huggingface_chat_template_toolcall_invalid_json(
+    mock_training_chat_two_step_json,
+):
+    mock_training_chat_two_step_json[-1].content = "invalid json"
-    with pytest.raises(ValueError, match="Invalid JSON in for tool call"):
-        generate_huggingface_chat_template_toolcall(training_data)
+    with pytest.raises(ValueError, match="^Last message is not JSON"):
+        generate_huggingface_chat_template_toolcall(mock_training_chat_two_step_json)
-def test_build_training_data(mock_task):
+def test_build_training_chat(mock_task):
     # Non repaired should use original output
     mock_task_run = mock_task.runs()[0]
-    training_data_output = build_training_data(mock_task_run, "system message", False)
-    assert training_data_output.final_output == '{"test":   "output 你好"}'
-    assert training_data_output.thinking is None
-    assert training_data_output.thinking_instructions is None
-    assert training_data_output.thinking_final_answer_prompt is None
-    assert training_data_output.input == '{"test": "input 你好"}'
-    assert training_data_output.system_message == "system message"
-    assert not training_data_output.supports_cot()
+    messages = build_training_chat(
+        mock_task_run,
+        "system message",
+        data_strategy=ChatStrategy.single_turn,
+    )
+    assert len(messages) == 3
+    system_msg = messages[0]
+    assert system_msg.role == "system"
+    assert system_msg.content == "system message"
+    user_msg = messages[1]
+    assert user_msg.role == "user"
+    assert user_msg.content == '{"test": "input 你好"}'
+    final_msg = messages[2]
+    assert final_msg.role == "assistant"
+    assert final_msg.content == '{"test":   "output 你好"}'
 def test_build_training_data_with_COT(mock_task):
@@ -587,20 +568,109 @@ def test_build_training_data_with_COT(mock_task):
     mock_task_run = mock_task.runs()[0]
     assert mock_task_run.parent_task() == mock_task
     mock_task_run.intermediate_outputs = {"chain_of_thought": "cot output"}
+    mock_task_run.thinking_training_data.return_value = "cot output"
-    training_data_output = build_training_data(
+    messages = build_training_chat(
         mock_task_run,
         "system message",
-        True,
+        data_strategy=ChatStrategy.two_message_cot,
         thinking_instructions="thinking instructions",
     )
-    assert training_data_output.final_output == '{"test":   "output 你好"}'
-    assert training_data_output.thinking == "cot output"
-    assert training_data_output.thinking_instructions == "thinking instructions"
-    assert training_data_output.thinking_final_answer_prompt == COT_FINAL_ANSWER_PROMPT
-    assert training_data_output.input == '{"test": "input 你好"}'
-    assert training_data_output.system_message == "system message"
-    assert training_data_output.supports_cot()
+    assert len(messages) == 5
+    system_msg = messages[0]
+    assert system_msg.role == "system"
+    assert system_msg.content == "system message"
+    user_msg = messages[1]
+    assert user_msg.role == "user"
+    assert (
+        user_msg.content
+        == 'The input is:\n<user_input>\n{"test": "input 你好"}\n</user_input>\n\nthinking instructions'
+    )
+    assistant_msg = messages[2]
+    assert assistant_msg.role == "assistant"
+    assert assistant_msg.content == "cot output"
+    final_answer_prompt_msg = messages[3]
+    assert final_answer_prompt_msg.role == "user"
+    assert final_answer_prompt_msg.content == COT_FINAL_ANSWER_PROMPT
+    final_msg = messages[4]
+    assert final_msg.role == "assistant"
+    assert final_msg.content == '{"test":   "output 你好"}'
+def test_build_training_data_with_COT_legacy(mock_task):
+    # Setup with needed fields for thinking
+    mock_task_run = mock_task.runs()[0]
+    assert mock_task_run.parent_task() == mock_task
+    mock_task_run.intermediate_outputs = {"chain_of_thought": "cot output"}
+    mock_task_run.thinking_training_data.return_value = "cot output"
+    messages = build_training_chat(
+        mock_task_run,
+        "system message",
+        data_strategy=ChatStrategy.two_message_cot_legacy,
+        thinking_instructions="thinking instructions",
+    )
+    assert len(messages) == 6
+    system_msg = messages[0]
+    assert system_msg.role == "system"
+    assert system_msg.content == "system message"
+    user_msg = messages[1]
+    assert user_msg.role == "user"
+    assert user_msg.content == '{"test": "input 你好"}'
+    cot_msg = messages[2]
+    assert cot_msg.role == "system"
+    assert cot_msg.content == "thinking instructions"
+    assistant_msg = messages[3]
+    assert assistant_msg.role == "assistant"
+    assert assistant_msg.content == "cot output"
+    final_answer_prompt_msg = messages[4]
+    assert final_answer_prompt_msg.role == "user"
+    assert final_answer_prompt_msg.content == COT_FINAL_ANSWER_PROMPT
+    final_msg = messages[5]
+    assert final_msg.role == "assistant"
+    assert final_msg.content == '{"test":   "output 你好"}'
+def test_build_training_data_with_COT_r1_style(mock_task):
+    # Setup with needed fields for thinking
+    mock_task_run = mock_task.runs()[0]
+    assert mock_task_run.parent_task() == mock_task
+    mock_task_run.intermediate_outputs = {"chain_of_thought": "cot output"}
+    mock_task_run.thinking_training_data.return_value = "cot output"
+    messages = build_training_chat(
+        mock_task_run,
+        "system message",
+        data_strategy=ChatStrategy.single_turn_r1_thinking,
+        thinking_instructions=None,
+    )
+    assert len(messages) == 3
+    system_msg = messages[0]
+    assert system_msg.role == "system"
+    assert system_msg.content == "system message"
+    user_msg = messages[1]
+    assert user_msg.role == "user"
+    assert user_msg.content == '{"test": "input 你好"}'
+    final_msg = messages[2]
+    assert final_msg.role == "assistant"
+    assert (
+        final_msg.content
+        == '<think>\ncot output\n</think>\n\n{"test":   "output 你好"}'
+    )
 def test_build_training_data_with_thinking(mock_task):
@@ -612,22 +682,78 @@ def test_build_training_data_with_thinking(mock_task):
         "reasoning": "thinking output",
         "chain_of_thought": "cot output",
     }
+    mock_task_run.thinking_training_data.return_value = "thinking output"
     mock_task.thinking_instruction = "thinking instructions"
     assert mock_task.thinking_instruction == "thinking instructions"
-    training_data_output = build_training_data(
+    messages = build_training_chat(
         mock_task_run,
         "system message",
-        True,
+        ChatStrategy.two_message_cot,
         thinking_instructions="thinking instructions",
     )
-    assert training_data_output.final_output == '{"test":   "output 你好"}'
-    assert training_data_output.thinking == "thinking output"
-    assert training_data_output.thinking_instructions == "thinking instructions"
-    assert training_data_output.thinking_final_answer_prompt == COT_FINAL_ANSWER_PROMPT
-    assert training_data_output.input == '{"test": "input 你好"}'
-    assert training_data_output.system_message == "system message"
-    assert training_data_output.supports_cot()
+    assert len(messages) == 5
+    system_msg = messages[0]
+    assert system_msg.role == "system"
+    assert system_msg.content == "system message"
+    user_msg = messages[1]
+    assert user_msg.role == "user"
+    assert (
+        user_msg.content
+        == 'The input is:\n<user_input>\n{"test": "input 你好"}\n</user_input>\n\nthinking instructions'
+    )
+    assistant_msg = messages[2]
+    assert assistant_msg.role == "assistant"
+    assert assistant_msg.content == "thinking output"
+    final_answer_prompt_msg = messages[3]
+    assert final_answer_prompt_msg.role == "user"
+    assert final_answer_prompt_msg.content == COT_FINAL_ANSWER_PROMPT
+    final_msg = messages[4]
+    assert final_msg.role == "assistant"
+    assert final_msg.content == '{"test":   "output 你好"}'
+def test_build_training_data_with_thinking_r1_style(mock_task):
+    # Setup with needed fields for thinking
+    mock_task_run = mock_task.runs()[0]
+    assert mock_task_run.parent_task() == mock_task
+    # It should just use the reasoning output if both thinking and chain_of_thought are present
+    mock_task_run.intermediate_outputs = {
+        "reasoning": "thinking output",
+        "chain_of_thought": "cot output",
+    }
+    mock_task_run.thinking_training_data.return_value = "thinking output"
+    mock_task.thinking_instruction = "thinking instructions"
+    assert mock_task.thinking_instruction == "thinking instructions"
+    messages = build_training_chat(
+        mock_task_run,
+        "system message",
+        ChatStrategy.single_turn_r1_thinking,
+        thinking_instructions=None,
+    )
+    assert len(messages) == 3
+    system_msg = messages[0]
+    assert system_msg.role == "system"
+    assert system_msg.content == "system message"
+    user_msg = messages[1]
+    assert user_msg.role == "user"
+    assert user_msg.content == '{"test": "input 你好"}'
+    final_msg = messages[2]
+    assert final_msg.role == "assistant"
+    assert (
+        final_msg.content
+        == '<think>\nthinking output\n</think>\n\n{"test":   "output 你好"}'
+    )
 def test_build_training_data_with_repaired_output(mock_task):
@@ -642,13 +768,25 @@ def test_build_training_data_with_repaired_output(mock_task):
         ),
     )
-    training_data_output = build_training_data(mock_task_run, "system message", False)
-    assert training_data_output.final_output == '{"test": "repaired output"}'
-    assert training_data_output.thinking is None
-    assert training_data_output.thinking_instructions is None
-    assert training_data_output.thinking_final_answer_prompt is None
-    assert training_data_output.input == '{"test": "input 你好"}'
-    assert training_data_output.system_message == "system message"
+    messages = build_training_chat(
+        mock_task_run,
+        "system message",
+        data_strategy=ChatStrategy.single_turn,
+    )
+    assert len(messages) == 3
+    system_msg = messages[0]
+    assert system_msg.role == "system"
+    assert system_msg.content == "system message"
+    user_msg = messages[1]
+    assert user_msg.role == "user"
+    assert user_msg.content == '{"test": "input 你好"}'
+    final_msg = messages[2]
+    assert final_msg.role == "assistant"
+    # Note we re-format the json
+    assert final_msg.content == '{"test": "repaired output"}'
 def test_dataset_formatter_dump_to_file_json_schema_format(mock_dataset, tmp_path):
@@ -659,7 +797,7 @@ def test_dataset_formatter_dump_to_file_json_schema_format(mock_dataset, tmp_pat
         "train",
         DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
         path=output_path,
-        data_strategy=FinetuneDataStrategy.final_only,
+        data_strategy=ChatStrategy.single_turn,
     )
     assert result_path == output_path
@@ -683,3 +821,56 @@ def test_dataset_formatter_dump_to_file_json_schema_format(mock_dataset, tmp_pat
             assert assistant_msg["content"] == '{"test": "output 你好"}'
             json_content = json.loads(assistant_msg["content"])
             assert json_content == {"test": "output 你好"}
+@pytest.mark.parametrize(
+    "thinking,final_output,expected_output",
+    [
+        ("thinking", "final output", "<think>\nthinking\n</think>\n\nfinal output"),
+        ("thinking", '{"name":"joe"}', '<think>\nthinking\n</think>\n\n{"name":"joe"}'),
+    ],
+)
+def test_serialize_r1_style_message(thinking, final_output, expected_output):
+    assert (
+        serialize_r1_style_message(thinking=thinking, final_output=final_output)
+        == expected_output
+    )
+@pytest.mark.parametrize(
+    "thinking,final_output",
+    [
+        (None, "final output"),
+        ("", "final output"),
+        (" ", "final output"),
+    ],
+)
+def test_serialize_r1_style_message_missing_thinking(thinking, final_output):
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Thinking data is required when fine-tuning thinking models (R1, QwQ, etc). Please ensure your fine-tuning dataset contains reasoning or chain of thought output for every entry."
+        ),
+    ):
+        serialize_r1_style_message(thinking=thinking, final_output=final_output)
+def test_vertex_gemini_role_map_coverage():
+    """Test that VERTEX_GEMINI_ROLE_MAP covers all possible ChatMessage.role values"""
+    from typing import Literal, get_type_hints
+    # Get the Literal type from ChatMessage.role
+    role_type = get_type_hints(ChatMessage)["role"]
+    # Extract the possible values from the Literal type
+    possible_roles = role_type.__args__  # type: ignore
+    # Check that every possible role is in the map
+    for role in possible_roles:
+        assert role in VERTEX_GEMINI_ROLE_MAP, (
+            f"Role {role} is not mapped in VERTEX_GEMINI_ROLE_MAP"
+        )
+    # Check that there are no extra mappings
+    assert set(VERTEX_GEMINI_ROLE_MAP.keys()) == set(possible_roles), (
+        "VERTEX_GEMINI_ROLE_MAP has extra mappings"
+    )

kiln-ai 0.15.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.15.0py3-none-any.whl → 0.17.0py3-none-any.whl