kiln-ai 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (54) hide show
  1. kiln_ai/adapters/__init__.py +2 -0
  2. kiln_ai/adapters/adapter_registry.py +22 -44
  3. kiln_ai/adapters/chat/__init__.py +8 -0
  4. kiln_ai/adapters/chat/chat_formatter.py +234 -0
  5. kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
  6. kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
  7. kiln_ai/adapters/eval/base_eval.py +8 -6
  8. kiln_ai/adapters/eval/eval_runner.py +4 -1
  9. kiln_ai/adapters/eval/g_eval.py +23 -5
  10. kiln_ai/adapters/eval/test_base_eval.py +166 -15
  11. kiln_ai/adapters/eval/test_eval_runner.py +3 -0
  12. kiln_ai/adapters/eval/test_g_eval.py +1 -0
  13. kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
  14. kiln_ai/adapters/fine_tune/dataset_formatter.py +138 -272
  15. kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
  16. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
  17. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
  18. kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
  19. kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
  20. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
  21. kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
  22. kiln_ai/adapters/ml_model_list.py +80 -43
  23. kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
  24. kiln_ai/adapters/model_adapters/litellm_adapter.py +79 -97
  25. kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
  26. kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -60
  27. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +56 -21
  28. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
  29. kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
  30. kiln_ai/adapters/prompt_builders.py +0 -16
  31. kiln_ai/adapters/provider_tools.py +27 -9
  32. kiln_ai/adapters/repair/test_repair_task.py +24 -3
  33. kiln_ai/adapters/test_adapter_registry.py +88 -28
  34. kiln_ai/adapters/test_ml_model_list.py +158 -0
  35. kiln_ai/adapters/test_prompt_adaptors.py +17 -3
  36. kiln_ai/adapters/test_prompt_builders.py +3 -16
  37. kiln_ai/adapters/test_provider_tools.py +69 -20
  38. kiln_ai/datamodel/__init__.py +0 -2
  39. kiln_ai/datamodel/datamodel_enums.py +38 -13
  40. kiln_ai/datamodel/finetune.py +12 -7
  41. kiln_ai/datamodel/task.py +68 -7
  42. kiln_ai/datamodel/test_basemodel.py +2 -1
  43. kiln_ai/datamodel/test_dataset_split.py +0 -8
  44. kiln_ai/datamodel/test_models.py +33 -10
  45. kiln_ai/datamodel/test_task.py +168 -2
  46. kiln_ai/utils/config.py +3 -2
  47. kiln_ai/utils/dataset_import.py +1 -1
  48. kiln_ai/utils/logging.py +165 -0
  49. kiln_ai/utils/test_config.py +23 -0
  50. kiln_ai/utils/test_dataset_import.py +30 -0
  51. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
  52. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/RECORD +54 -49
  53. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
  54. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -18,7 +18,7 @@ from kiln_ai.adapters.provider_tools import (
18
18
  finetune_provider_model,
19
19
  get_model_and_provider,
20
20
  kiln_model_provider_from,
21
- lite_llm_config,
21
+ lite_llm_config_for_openai_compatible,
22
22
  lite_llm_provider_model,
23
23
  parse_custom_model_id,
24
24
  provider_enabled,
@@ -27,10 +27,11 @@ from kiln_ai.adapters.provider_tools import (
27
27
  )
28
28
  from kiln_ai.datamodel import (
29
29
  Finetune,
30
- FinetuneDataStrategy,
31
30
  StructuredOutputMode,
32
31
  Task,
33
32
  )
33
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
34
+ from kiln_ai.datamodel.task import RunConfigProperties
34
35
 
35
36
 
36
37
  @pytest.fixture(autouse=True)
@@ -71,7 +72,7 @@ def mock_finetune():
71
72
  finetune.provider = ModelProviderName.openai
72
73
  finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
73
74
  finetune.structured_output_mode = StructuredOutputMode.json_schema
74
- finetune.data_strategy = FinetuneDataStrategy.final_only
75
+ finetune.data_strategy = ChatStrategy.single_turn
75
76
  mock.return_value = finetune
76
77
  yield mock
77
78
 
@@ -83,7 +84,7 @@ def mock_finetune_final_and_intermediate():
83
84
  finetune.provider = ModelProviderName.openai
84
85
  finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
85
86
  finetune.structured_output_mode = StructuredOutputMode.json_schema
86
- finetune.data_strategy = FinetuneDataStrategy.final_and_intermediate
87
+ finetune.data_strategy = ChatStrategy.two_message_cot
87
88
  mock.return_value = finetune
88
89
  yield mock
89
90
 
@@ -95,9 +96,7 @@ def mock_finetune_r1_compatible():
95
96
  finetune.provider = ModelProviderName.ollama
96
97
  finetune.fine_tune_model_id = "ft:deepseek-r1:671b:custom:model-123"
97
98
  finetune.structured_output_mode = StructuredOutputMode.json_schema
98
- finetune.data_strategy = (
99
- FinetuneDataStrategy.final_and_intermediate_r1_compatible
100
- )
99
+ finetune.data_strategy = ChatStrategy.single_turn_r1_thinking
101
100
  mock.return_value = finetune
102
101
  yield mock
103
102
 
@@ -357,6 +356,7 @@ async def test_kiln_model_provider_from_custom_model_valid(mock_config):
357
356
  assert provider.supports_data_gen is False
358
357
  assert provider.untested_model is True
359
358
  assert provider.model_id == "custom_model"
359
+ assert provider.structured_output_mode == StructuredOutputMode.json_instructions
360
360
 
361
361
 
362
362
  @pytest.mark.asyncio
@@ -374,6 +374,7 @@ async def test_kiln_model_provider_from_custom_registry(mock_config):
374
374
  assert provider.supports_data_gen is False
375
375
  assert provider.untested_model is True
376
376
  assert provider.model_id == "gpt-4-turbo"
377
+ assert provider.structured_output_mode == StructuredOutputMode.json_instructions
377
378
 
378
379
 
379
380
  @pytest.mark.asyncio
@@ -474,7 +475,7 @@ def test_finetune_provider_model_success_final_and_intermediate(
474
475
  assert provider.name == ModelProviderName.openai
475
476
  assert provider.model_id == "ft:gpt-3.5-turbo:custom:model-123"
476
477
  assert provider.structured_output_mode == StructuredOutputMode.json_schema
477
- assert provider.reasoning_capable is True
478
+ assert provider.reasoning_capable is False
478
479
  assert provider.parser == None
479
480
 
480
481
 
@@ -580,7 +581,7 @@ def test_finetune_provider_model_structured_mode(
580
581
  finetune.provider = provider_name
581
582
  finetune.fine_tune_model_id = "fireworks-model-123"
582
583
  finetune.structured_output_mode = structured_output_mode
583
- finetune.data_strategy = FinetuneDataStrategy.final_only
584
+ finetune.data_strategy = ChatStrategy.single_turn
584
585
  mock_finetune.return_value = finetune
585
586
 
586
587
  provider = finetune_provider_model("project-123::task-456::finetune-789")
@@ -596,10 +597,20 @@ def test_openai_compatible_provider_config(mock_shared_config):
596
597
  """Test successful creation of an OpenAI compatible provider"""
597
598
  model_id = "test_provider::gpt-4"
598
599
 
599
- config = lite_llm_config(model_id)
600
+ config = lite_llm_config_for_openai_compatible(
601
+ RunConfigProperties(
602
+ model_name=model_id,
603
+ model_provider_name=ModelProviderName.openai_compatible,
604
+ prompt_id="simple_prompt_builder",
605
+ structured_output_mode="json_schema",
606
+ )
607
+ )
600
608
 
601
- assert config.provider_name == ModelProviderName.openai_compatible
602
- assert config.model_name == "gpt-4"
609
+ assert (
610
+ config.run_config_properties.model_provider_name
611
+ == ModelProviderName.openai_compatible
612
+ )
613
+ assert config.run_config_properties.model_name == "gpt-4"
603
614
  assert config.additional_body_options == {"api_key": "test-key"}
604
615
  assert config.base_url == "https://api.test.com"
605
616
 
@@ -621,10 +632,20 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
621
632
  """Test provider creation without API key (should work as some providers don't require it, but should pass NA to LiteLLM as it requires one)"""
622
633
  model_id = "no_key_provider::gpt-4"
623
634
 
624
- config = lite_llm_config(model_id)
635
+ config = lite_llm_config_for_openai_compatible(
636
+ RunConfigProperties(
637
+ model_name=model_id,
638
+ model_provider_name=ModelProviderName.openai,
639
+ prompt_id="simple_prompt_builder",
640
+ structured_output_mode="json_schema",
641
+ )
642
+ )
625
643
 
626
- assert config.provider_name == ModelProviderName.openai_compatible
627
- assert config.model_name == "gpt-4"
644
+ assert (
645
+ config.run_config_properties.model_provider_name
646
+ == ModelProviderName.openai_compatible
647
+ )
648
+ assert config.run_config_properties.model_name == "gpt-4"
628
649
  assert config.additional_body_options == {"api_key": "NA"}
629
650
  assert config.base_url == "https://api.nokey.com"
630
651
 
@@ -632,7 +653,14 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
632
653
  def test_lite_llm_config_invalid_id():
633
654
  """Test handling of invalid model ID format"""
634
655
  with pytest.raises(ValueError) as exc_info:
635
- lite_llm_config("invalid-id-format")
656
+ lite_llm_config_for_openai_compatible(
657
+ RunConfigProperties(
658
+ model_name="invalid-id-format",
659
+ model_provider_name=ModelProviderName.openai_compatible,
660
+ prompt_id="simple_prompt_builder",
661
+ structured_output_mode="json_schema",
662
+ )
663
+ )
636
664
  assert (
637
665
  str(exc_info.value) == "Invalid openai compatible model ID: invalid-id-format"
638
666
  )
@@ -643,14 +671,28 @@ def test_lite_llm_config_no_providers(mock_shared_config):
643
671
  mock_shared_config.return_value.openai_compatible_providers = None
644
672
 
645
673
  with pytest.raises(ValueError) as exc_info:
646
- lite_llm_config("test_provider::gpt-4")
674
+ lite_llm_config_for_openai_compatible(
675
+ RunConfigProperties(
676
+ model_name="test_provider::gpt-4",
677
+ model_provider_name=ModelProviderName.openai_compatible,
678
+ prompt_id="simple_prompt_builder",
679
+ structured_output_mode="json_schema",
680
+ )
681
+ )
647
682
  assert str(exc_info.value) == "OpenAI compatible provider test_provider not found"
648
683
 
649
684
 
650
685
  def test_lite_llm_config_provider_not_found(mock_shared_config):
651
686
  """Test handling of non-existent provider"""
652
687
  with pytest.raises(ValueError) as exc_info:
653
- lite_llm_config("unknown_provider::gpt-4")
688
+ lite_llm_config_for_openai_compatible(
689
+ RunConfigProperties(
690
+ model_name="unknown_provider::gpt-4",
691
+ model_provider_name=ModelProviderName.openai_compatible,
692
+ prompt_id="simple_prompt_builder",
693
+ structured_output_mode="json_schema",
694
+ )
695
+ )
654
696
  assert (
655
697
  str(exc_info.value) == "OpenAI compatible provider unknown_provider not found"
656
698
  )
@@ -666,7 +708,14 @@ def test_lite_llm_config_no_base_url(mock_shared_config):
666
708
  ]
667
709
 
668
710
  with pytest.raises(ValueError) as exc_info:
669
- lite_llm_config("test_provider::gpt-4")
711
+ lite_llm_config_for_openai_compatible(
712
+ RunConfigProperties(
713
+ model_name="test_provider::gpt-4",
714
+ model_provider_name=ModelProviderName.openai_compatible,
715
+ prompt_id="simple_prompt_builder",
716
+ structured_output_mode="json_schema",
717
+ )
718
+ )
670
719
  assert (
671
720
  str(exc_info.value)
672
721
  == "OpenAI compatible provider test_provider has no base URL"
@@ -867,7 +916,7 @@ def test_finetune_provider_model_vertex_ai(mock_project, mock_task, mock_finetun
867
916
  finetune.provider = ModelProviderName.vertex
868
917
  finetune.fine_tune_model_id = "projects/123/locations/us-central1/endpoints/456"
869
918
  finetune.structured_output_mode = StructuredOutputMode.json_mode
870
- finetune.data_strategy = FinetuneDataStrategy.final_only
919
+ finetune.data_strategy = ChatStrategy.single_turn
871
920
  mock_finetune.return_value = finetune
872
921
 
873
922
  provider = finetune_provider_model("project-123::task-456::finetune-789")
@@ -13,7 +13,6 @@ from __future__ import annotations
13
13
 
14
14
  from kiln_ai.datamodel import dataset_split, eval, strict_mode
15
15
  from kiln_ai.datamodel.datamodel_enums import (
16
- FinetuneDataStrategy,
17
16
  FineTuneStatusType,
18
17
  Priority,
19
18
  StructuredOutputMode,
@@ -71,7 +70,6 @@ __all__ = [
71
70
  "Prompt",
72
71
  "TaskOutputRating",
73
72
  "StructuredOutputMode",
74
- "FinetuneDataStrategy",
75
73
  "PromptId",
76
74
  "PromptGenerators",
77
75
  "prompt_generator_values",
@@ -24,13 +24,14 @@ class StructuredOutputMode(str, Enum):
24
24
  """
25
25
  Enumeration of supported structured output modes.
26
26
 
27
- - default: let the adapter decide
28
27
  - json_schema: request json using API capabilities for json_schema
29
28
  - function_calling: request json using API capabilities for function calling
30
29
  - json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
31
30
  - json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
32
31
  - json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
33
32
  - json_custom_instructions: The model should output JSON, but custom instructions are already included in the system prompt. Don't append additional JSON instructions.
33
+ - default: let the adapter decide (legacy, do not use for new use cases)
34
+ - unknown: used for cases where the structured output mode is not known (on old models where it wasn't saved). Should lookup best option at runtime.
34
35
  """
35
36
 
36
37
  default = "default"
@@ -41,6 +42,7 @@ class StructuredOutputMode(str, Enum):
41
42
  json_instructions = "json_instructions"
42
43
  json_instruction_and_object = "json_instruction_and_object"
43
44
  json_custom_instructions = "json_custom_instructions"
45
+ unknown = "unknown"
44
46
 
45
47
 
46
48
  class FineTuneStatusType(str, Enum):
@@ -55,20 +57,43 @@ class FineTuneStatusType(str, Enum):
55
57
  failed = "failed"
56
58
 
57
59
 
58
- class FinetuneDataStrategy(str, Enum):
59
- """Strategy for what data to include when fine-tuning a model."""
60
+ class ChatStrategy(str, Enum):
61
+ """Strategy for how a chat is structured."""
60
62
 
61
- # Only train on the final response, ignoring any intermediate steps or chain of thought
62
- final_only = "final_only"
63
+ # Single turn, immediately return the answer
64
+ single_turn = "final_only"
65
+ # Two turn, first turn is the thinking, second turn is the answer. Legacy format - used for old fine tunes but not new trains.
66
+ two_message_cot_legacy = "final_and_intermediate"
67
+ # Two turn, first turn is the thinking, second turn is the answer. New format - used for new trains.
68
+ two_message_cot = "two_message_cot"
69
+ # Single turn, with both the thinking and the answer in the same message, using R1-style thinking format in <think> tags
70
+ single_turn_r1_thinking = "final_and_intermediate_r1_compatible"
63
71
 
64
- # Train on both the final response and any intermediate steps/chain of thought
65
- final_and_intermediate = "final_and_intermediate"
66
72
 
67
- # Train using R1-style thinking format, which includes the reasoning in <think> tags in the message
68
- final_and_intermediate_r1_compatible = "final_and_intermediate_r1_compatible"
73
+ THINKING_DATA_STRATEGIES: list[ChatStrategy] = [
74
+ ChatStrategy.two_message_cot_legacy,
75
+ ChatStrategy.single_turn_r1_thinking,
76
+ ChatStrategy.two_message_cot,
77
+ ]
69
78
 
70
79
 
71
- THINKING_DATA_STRATEGIES: list[FinetuneDataStrategy] = [
72
- FinetuneDataStrategy.final_and_intermediate,
73
- FinetuneDataStrategy.final_and_intermediate_r1_compatible,
74
- ]
80
+ class ModelProviderName(str, Enum):
81
+ """
82
+ Enumeration of supported AI model providers.
83
+ """
84
+
85
+ openai = "openai"
86
+ groq = "groq"
87
+ amazon_bedrock = "amazon_bedrock"
88
+ ollama = "ollama"
89
+ openrouter = "openrouter"
90
+ fireworks_ai = "fireworks_ai"
91
+ kiln_fine_tune = "kiln_fine_tune"
92
+ kiln_custom_registry = "kiln_custom_registry"
93
+ openai_compatible = "openai_compatible"
94
+ anthropic = "anthropic"
95
+ gemini_api = "gemini_api"
96
+ azure_openai = "azure_openai"
97
+ huggingface = "huggingface"
98
+ vertex = "vertex"
99
+ together_ai = "together_ai"
@@ -6,7 +6,7 @@ from typing_extensions import Self
6
6
  from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
7
7
  from kiln_ai.datamodel.datamodel_enums import (
8
8
  THINKING_DATA_STRATEGIES,
9
- FinetuneDataStrategy,
9
+ ChatStrategy,
10
10
  FineTuneStatusType,
11
11
  StructuredOutputMode,
12
12
  )
@@ -14,6 +14,11 @@ from kiln_ai.datamodel.datamodel_enums import (
14
14
  if TYPE_CHECKING:
15
15
  from kiln_ai.datamodel.task import Task
16
16
 
17
+ DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS = [
18
+ ChatStrategy.two_message_cot_legacy,
19
+ ChatStrategy.two_message_cot,
20
+ ]
21
+
17
22
 
18
23
  class Finetune(KilnParentedModel):
19
24
  """
@@ -76,8 +81,8 @@ class Finetune(KilnParentedModel):
76
81
  default={},
77
82
  description="Properties of the fine-tune. Different providers may use different properties.",
78
83
  )
79
- data_strategy: FinetuneDataStrategy = Field(
80
- default=FinetuneDataStrategy.final_only,
84
+ data_strategy: ChatStrategy = Field(
85
+ default=ChatStrategy.single_turn,
81
86
  description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).",
82
87
  )
83
88
 
@@ -91,16 +96,16 @@ class Finetune(KilnParentedModel):
91
96
  def validate_thinking_instructions(self) -> Self:
92
97
  if (
93
98
  self.thinking_instructions is not None
94
- and self.data_strategy != FinetuneDataStrategy.final_and_intermediate
99
+ and self.data_strategy not in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
95
100
  ):
96
101
  raise ValueError(
97
- "Thinking instructions can only be used when data_strategy is final_and_intermediate"
102
+ f"Thinking instructions can only be used when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
98
103
  )
99
104
  if (
100
105
  self.thinking_instructions is None
101
- and self.data_strategy == FinetuneDataStrategy.final_and_intermediate
106
+ and self.data_strategy in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
102
107
  ):
103
108
  raise ValueError(
104
- "Thinking instructions are required when data_strategy is final_and_intermediate"
109
+ f"Thinking instructions are required when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
105
110
  )
106
111
  return self
kiln_ai/datamodel/task.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from typing import TYPE_CHECKING, Dict, List, Union
2
2
 
3
- from pydantic import BaseModel, Field
3
+ from pydantic import BaseModel, Field, ValidationInfo, model_validator
4
+ from typing_extensions import Self
4
5
 
5
6
  from kiln_ai.datamodel import Finetune
6
7
  from kiln_ai.datamodel.basemodel import (
@@ -11,7 +12,12 @@ from kiln_ai.datamodel.basemodel import (
11
12
  KilnParentedModel,
12
13
  KilnParentModel,
13
14
  )
14
- from kiln_ai.datamodel.datamodel_enums import Priority, TaskOutputRatingType
15
+ from kiln_ai.datamodel.datamodel_enums import (
16
+ ModelProviderName,
17
+ Priority,
18
+ StructuredOutputMode,
19
+ TaskOutputRatingType,
20
+ )
15
21
  from kiln_ai.datamodel.dataset_split import DatasetSplit
16
22
  from kiln_ai.datamodel.eval import Eval
17
23
  from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
@@ -47,12 +53,33 @@ class RunConfigProperties(BaseModel):
47
53
  """
48
54
 
49
55
  model_name: str = Field(description="The model to use for this run config.")
50
- model_provider_name: str = Field(
56
+ model_provider_name: ModelProviderName = Field(
51
57
  description="The provider to use for this run config."
52
58
  )
53
59
  prompt_id: PromptId = Field(
54
60
  description="The prompt to use for this run config. Defaults to building a simple prompt from the task if not provided.",
55
61
  )
62
+ top_p: float = Field(
63
+ default=1.0,
64
+ description="The top-p value to use for this run config. Defaults to 1.0.",
65
+ )
66
+ temperature: float = Field(
67
+ default=1.0,
68
+ description="The temperature to use for this run config. Defaults to 1.0.",
69
+ )
70
+ structured_output_mode: StructuredOutputMode = Field(
71
+ description="The structured output mode to use for this run config.",
72
+ )
73
+
74
+ @model_validator(mode="after")
75
+ def validate_required_fields(self) -> Self:
76
+ if not (0 <= self.top_p <= 1):
77
+ raise ValueError("top_p must be between 0 and 1")
78
+
79
+ elif self.temperature < 0 or self.temperature > 2:
80
+ raise ValueError("temperature must be between 0 and 2")
81
+
82
+ return self
56
83
 
57
84
 
58
85
  class RunConfig(RunConfigProperties):
@@ -101,12 +128,46 @@ class TaskRunConfig(KilnParentedModel):
101
128
  parent_task = self.parent_task()
102
129
  if parent_task is None:
103
130
  raise ValueError("Run config must be parented to a task")
104
- return RunConfig(
131
+ return run_config_from_run_config_properties(
105
132
  task=parent_task,
106
- model_name=self.run_config_properties.model_name,
107
- model_provider_name=self.run_config_properties.model_provider_name,
108
- prompt_id=self.run_config_properties.prompt_id,
133
+ run_config_properties=self.run_config_properties,
134
+ )
135
+
136
+ # Previously we didn't store structured_output_mode in the run_config_properties. Updgrade old models when loading from file.
137
+ @model_validator(mode="before")
138
+ def upgrade_old_entries(cls, data: dict, info: ValidationInfo) -> dict:
139
+ if not info.context or not info.context.get("loading_from_file", False):
140
+ # Not loading from file, so no need to upgrade
141
+ return data
142
+
143
+ if not isinstance(data, dict):
144
+ return data
145
+
146
+ structured_output_mode = data.get("run_config_properties", {}).get(
147
+ "structured_output_mode", None
109
148
  )
149
+ if structured_output_mode is None and "run_config_properties" in data:
150
+ # Default to unknown. Adapter will have to guess at runtime.
151
+ data["run_config_properties"]["structured_output_mode"] = (
152
+ StructuredOutputMode.unknown
153
+ )
154
+
155
+ return data
156
+
157
+
158
+ def run_config_from_run_config_properties(
159
+ task: "Task",
160
+ run_config_properties: RunConfigProperties,
161
+ ) -> RunConfig:
162
+ return RunConfig(
163
+ task=task,
164
+ model_name=run_config_properties.model_name,
165
+ model_provider_name=run_config_properties.model_provider_name,
166
+ prompt_id=run_config_properties.prompt_id,
167
+ top_p=run_config_properties.top_p,
168
+ temperature=run_config_properties.temperature,
169
+ structured_output_mode=run_config_properties.structured_output_mode,
170
+ )
110
171
 
111
172
 
112
173
  class Task(
@@ -500,8 +500,9 @@ def adapter(base_task):
500
500
  run_config=RunConfig(
501
501
  task=base_task,
502
502
  model_name="test_model",
503
- model_provider_name="test_provider",
503
+ model_provider_name="openai",
504
504
  prompt_id="simple_prompt_builder",
505
+ structured_output_mode="json_schema",
505
506
  ),
506
507
  )
507
508
 
@@ -72,14 +72,6 @@ def sample_task_runs(sample_task):
72
72
  return task_runs
73
73
 
74
74
 
75
- @pytest.fixture
76
- def standard_splitstandard_splitss():
77
- return [
78
- DatasetSplitDefinition(name="train", percentage=0.8),
79
- DatasetSplitDefinition(name="test", percentage=0.2),
80
- ]
81
-
82
-
83
75
  @pytest.fixture
84
76
  def task_run():
85
77
  return TaskRun(
@@ -9,13 +9,13 @@ from kiln_ai.datamodel import (
9
9
  DataSource,
10
10
  DataSourceType,
11
11
  Finetune,
12
- FinetuneDataStrategy,
13
12
  Project,
14
13
  Prompt,
15
14
  Task,
16
15
  TaskOutput,
17
16
  TaskRun,
18
17
  )
18
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
19
19
  from kiln_ai.datamodel.test_json_schema import json_joke_schema
20
20
 
21
21
 
@@ -536,44 +536,58 @@ def test_prompt_parent_task():
536
536
  # Test 1: Valid case - no thinking instructions with final_only
537
537
  (
538
538
  None,
539
- FinetuneDataStrategy.final_only,
539
+ ChatStrategy.single_turn,
540
540
  False,
541
541
  None,
542
542
  ),
543
543
  # Test 2: Valid case - thinking instructions with final_and_intermediate
544
544
  (
545
545
  "Think step by step",
546
- FinetuneDataStrategy.final_and_intermediate,
546
+ ChatStrategy.two_message_cot_legacy,
547
547
  False,
548
548
  None,
549
549
  ),
550
550
  # Test 3: Valid case - no thinking instructions with final_and_intermediate_r1_compatible
551
551
  (
552
552
  None,
553
- FinetuneDataStrategy.final_and_intermediate_r1_compatible,
553
+ ChatStrategy.single_turn_r1_thinking,
554
554
  False,
555
555
  None,
556
556
  ),
557
557
  # Test 4: Invalid case - thinking instructions with final_only
558
558
  (
559
559
  "Think step by step",
560
- FinetuneDataStrategy.final_only,
560
+ ChatStrategy.single_turn,
561
561
  True,
562
- "Thinking instructions can only be used when data_strategy is final_and_intermediate",
562
+ "Thinking instructions can only be used when data_strategy is",
563
563
  ),
564
564
  # Test 5: Invalid case - no thinking instructions with final_and_intermediate
565
565
  (
566
566
  None,
567
- FinetuneDataStrategy.final_and_intermediate,
567
+ ChatStrategy.two_message_cot_legacy,
568
568
  True,
569
- "Thinking instructions are required when data_strategy is final_and_intermediate",
569
+ "Thinking instructions are required when data_strategy is",
570
570
  ),
571
571
  # Test 6: Invalid case - thinking instructions with final_and_intermediate_r1_compatible
572
572
  (
573
573
  "Think step by step",
574
- FinetuneDataStrategy.final_and_intermediate_r1_compatible,
574
+ ChatStrategy.single_turn_r1_thinking,
575
575
  True,
576
- "Thinking instructions can only be used when data_strategy is final_and_intermediate",
576
+ "Thinking instructions can only be used when data_strategy is",
577
+ ),
578
+ # Test 7: new COT format
579
+ (
580
+ "Think step by step",
581
+ ChatStrategy.two_message_cot,
582
+ False,
583
+ None,
584
+ ),
585
+ # Test 8: new COT format
586
+ (
587
+ None,
588
+ ChatStrategy.two_message_cot,
589
+ True,
590
+ "Thinking instructions are required when data_strategy is",
577
591
  ),
578
592
  ],
579
593
  )
@@ -665,3 +679,12 @@ def test_task_run_thinking_training_data(intermediate_outputs, expected):
665
679
  intermediate_outputs=intermediate_outputs,
666
680
  )
667
681
  assert task_run.thinking_training_data() == expected
682
+
683
+
684
+ def test_chat_strategy_enum():
685
+ # This has to align to the old FinetuneDataStrategy enum
686
+ assert ChatStrategy.single_turn == "final_only"
687
+ assert ChatStrategy.two_message_cot_legacy == "final_and_intermediate"
688
+ assert (
689
+ ChatStrategy.single_turn_r1_thinking == "final_and_intermediate_r1_compatible"
690
+ )