kiln-ai 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. kiln_ai/adapters/__init__.py +2 -0
  2. kiln_ai/adapters/adapter_registry.py +22 -44
  3. kiln_ai/adapters/chat/__init__.py +8 -0
  4. kiln_ai/adapters/chat/chat_formatter.py +233 -0
  5. kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
  6. kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
  7. kiln_ai/adapters/data_gen/data_gen_task.py +49 -36
  8. kiln_ai/adapters/data_gen/test_data_gen_task.py +330 -40
  9. kiln_ai/adapters/eval/base_eval.py +7 -6
  10. kiln_ai/adapters/eval/eval_runner.py +9 -2
  11. kiln_ai/adapters/eval/g_eval.py +40 -17
  12. kiln_ai/adapters/eval/test_base_eval.py +174 -17
  13. kiln_ai/adapters/eval/test_eval_runner.py +3 -0
  14. kiln_ai/adapters/eval/test_g_eval.py +116 -5
  15. kiln_ai/adapters/fine_tune/base_finetune.py +3 -8
  16. kiln_ai/adapters/fine_tune/dataset_formatter.py +135 -273
  17. kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
  18. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
  19. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
  20. kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
  21. kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
  22. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +6 -11
  23. kiln_ai/adapters/fine_tune/together_finetune.py +13 -2
  24. kiln_ai/adapters/ml_model_list.py +370 -84
  25. kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
  26. kiln_ai/adapters/model_adapters/litellm_adapter.py +88 -97
  27. kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
  28. kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -61
  29. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +104 -21
  30. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
  31. kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
  32. kiln_ai/adapters/parsers/parser_registry.py +0 -2
  33. kiln_ai/adapters/parsers/r1_parser.py +0 -1
  34. kiln_ai/adapters/prompt_builders.py +0 -16
  35. kiln_ai/adapters/provider_tools.py +27 -9
  36. kiln_ai/adapters/remote_config.py +66 -0
  37. kiln_ai/adapters/repair/repair_task.py +1 -6
  38. kiln_ai/adapters/repair/test_repair_task.py +24 -3
  39. kiln_ai/adapters/test_adapter_registry.py +88 -28
  40. kiln_ai/adapters/test_ml_model_list.py +176 -0
  41. kiln_ai/adapters/test_prompt_adaptors.py +17 -7
  42. kiln_ai/adapters/test_prompt_builders.py +3 -16
  43. kiln_ai/adapters/test_provider_tools.py +69 -20
  44. kiln_ai/adapters/test_remote_config.py +100 -0
  45. kiln_ai/datamodel/__init__.py +0 -2
  46. kiln_ai/datamodel/datamodel_enums.py +38 -13
  47. kiln_ai/datamodel/eval.py +32 -0
  48. kiln_ai/datamodel/finetune.py +12 -8
  49. kiln_ai/datamodel/task.py +68 -7
  50. kiln_ai/datamodel/task_output.py +0 -2
  51. kiln_ai/datamodel/task_run.py +0 -2
  52. kiln_ai/datamodel/test_basemodel.py +2 -1
  53. kiln_ai/datamodel/test_dataset_split.py +0 -8
  54. kiln_ai/datamodel/test_eval_model.py +146 -4
  55. kiln_ai/datamodel/test_models.py +33 -10
  56. kiln_ai/datamodel/test_task.py +168 -2
  57. kiln_ai/utils/config.py +3 -2
  58. kiln_ai/utils/dataset_import.py +1 -1
  59. kiln_ai/utils/logging.py +166 -0
  60. kiln_ai/utils/test_config.py +23 -0
  61. kiln_ai/utils/test_dataset_import.py +30 -0
  62. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/METADATA +2 -2
  63. kiln_ai-0.18.0.dist-info/RECORD +115 -0
  64. kiln_ai-0.16.0.dist-info/RECORD +0 -108
  65. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/WHEEL +0 -0
  66. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,176 @@
1
+ import pytest
2
+
3
+ from kiln_ai.adapters.ml_model_list import (
4
+ ModelName,
5
+ default_structured_output_mode_for_model_provider,
6
+ get_model_by_name,
7
+ )
8
+ from kiln_ai.datamodel.datamodel_enums import ModelProviderName, StructuredOutputMode
9
+
10
+
11
+ class TestDefaultStructuredOutputModeForModelProvider:
12
+ """Test cases for default_structured_output_mode_for_model_provider function"""
13
+
14
+ def test_valid_model_and_provider_returns_provider_mode(self):
15
+ """Test that valid model and provider returns the provider's structured output mode"""
16
+ # GPT 4.1 has OpenAI provider with json_schema mode
17
+ result = default_structured_output_mode_for_model_provider(
18
+ model_name="gpt_4_1",
19
+ provider=ModelProviderName.openai,
20
+ )
21
+ assert result == StructuredOutputMode.json_schema
22
+
23
+ def test_valid_model_different_provider_modes(self):
24
+ """Test that different providers for the same model return different modes"""
25
+ # Claude 3.5 Sonnet has different modes for different providers
26
+ # Anthropic provider uses function_calling
27
+ result_anthropic = default_structured_output_mode_for_model_provider(
28
+ model_name="claude_3_5_sonnet",
29
+ provider=ModelProviderName.anthropic,
30
+ )
31
+ assert result_anthropic == StructuredOutputMode.function_calling
32
+
33
+ # Vertex provider uses function_calling_weak
34
+ result_vertex = default_structured_output_mode_for_model_provider(
35
+ model_name="claude_3_5_sonnet",
36
+ provider=ModelProviderName.vertex,
37
+ )
38
+ assert result_vertex == StructuredOutputMode.function_calling_weak
39
+
40
+ def test_invalid_model_name_returns_default(self):
41
+ """Test that invalid model name returns the default value"""
42
+ result = default_structured_output_mode_for_model_provider(
43
+ model_name="invalid_model_name",
44
+ provider=ModelProviderName.openai,
45
+ )
46
+ assert result == StructuredOutputMode.default
47
+
48
+ def test_invalid_model_name_returns_custom_default(self):
49
+ """Test that invalid model name returns custom default when specified"""
50
+ custom_default = StructuredOutputMode.json_instructions
51
+ result = default_structured_output_mode_for_model_provider(
52
+ model_name="invalid_model_name",
53
+ provider=ModelProviderName.openai,
54
+ default=custom_default,
55
+ )
56
+ assert result == custom_default
57
+
58
+ def test_valid_model_invalid_provider_returns_default(self):
59
+ """Test that valid model but invalid provider returns default"""
60
+ result = default_structured_output_mode_for_model_provider(
61
+ model_name="gpt_4_1",
62
+ provider=ModelProviderName.gemini_api, # GPT 4.1 doesn't have gemini_api provider
63
+ )
64
+ assert result == StructuredOutputMode.default
65
+
66
+ def test_disallowed_modes_returns_default(self):
67
+ """Test that when provider's mode is in disallowed_modes, returns default"""
68
+ # GPT 4.1 OpenAI provider uses json_schema, but we disallow it
69
+ result = default_structured_output_mode_for_model_provider(
70
+ model_name="gpt_4_1",
71
+ provider=ModelProviderName.openai,
72
+ disallowed_modes=[StructuredOutputMode.json_schema],
73
+ )
74
+ assert result == StructuredOutputMode.default
75
+
76
+ def test_disallowed_modes_with_custom_default(self):
77
+ """Test disallowed modes with custom default value"""
78
+ custom_default = StructuredOutputMode.json_instructions
79
+ result = default_structured_output_mode_for_model_provider(
80
+ model_name="gpt_4_1",
81
+ provider=ModelProviderName.openai,
82
+ default=custom_default,
83
+ disallowed_modes=[StructuredOutputMode.json_schema],
84
+ )
85
+ assert result == custom_default
86
+
87
+ def test_empty_disallowed_modes_list(self):
88
+ """Test that empty disallowed_modes list works correctly"""
89
+ result = default_structured_output_mode_for_model_provider(
90
+ model_name="gpt_4_1",
91
+ provider=ModelProviderName.openai,
92
+ disallowed_modes=[],
93
+ )
94
+ assert result == StructuredOutputMode.json_schema
95
+
96
+ def test_multiple_disallowed_modes(self):
97
+ """Test with multiple disallowed modes"""
98
+ result = default_structured_output_mode_for_model_provider(
99
+ model_name="gpt_4_1",
100
+ provider=ModelProviderName.openai,
101
+ disallowed_modes=[
102
+ StructuredOutputMode.json_schema,
103
+ StructuredOutputMode.function_calling,
104
+ ],
105
+ )
106
+ assert result == StructuredOutputMode.default
107
+
108
+ def test_reasoning_model_with_different_providers(self):
109
+ """Test reasoning models that have different structured output modes"""
110
+ # DeepSeek R1 uses json_instructions for reasoning
111
+ result = default_structured_output_mode_for_model_provider(
112
+ model_name="deepseek_r1",
113
+ provider=ModelProviderName.openrouter,
114
+ )
115
+ assert result == StructuredOutputMode.json_instructions
116
+
117
+ @pytest.mark.parametrize(
118
+ "model_name,provider,expected_mode",
119
+ [
120
+ ("gpt_4o", ModelProviderName.openai, StructuredOutputMode.json_schema),
121
+ (
122
+ "claude_3_5_haiku",
123
+ ModelProviderName.anthropic,
124
+ StructuredOutputMode.function_calling,
125
+ ),
126
+ (
127
+ "gemini_2_5_pro",
128
+ ModelProviderName.gemini_api,
129
+ StructuredOutputMode.json_schema,
130
+ ),
131
+ ("llama_3_1_8b", ModelProviderName.groq, StructuredOutputMode.default),
132
+ (
133
+ "qwq_32b",
134
+ ModelProviderName.fireworks_ai,
135
+ StructuredOutputMode.json_instructions,
136
+ ),
137
+ ],
138
+ )
139
+ def test_parametrized_valid_combinations(self, model_name, provider, expected_mode):
140
+ """Test multiple valid model/provider combinations"""
141
+ result = default_structured_output_mode_for_model_provider(
142
+ model_name=model_name,
143
+ provider=provider,
144
+ )
145
+ assert result == expected_mode
146
+
147
+ def test_model_with_single_provider(self):
148
+ """Test model that only has one provider"""
149
+ # Find a model with only one provider for this test
150
+ model = get_model_by_name(ModelName.gpt_4_1_nano)
151
+ assert len(model.providers) >= 1 # Verify it has providers
152
+
153
+ first_provider = model.providers[0]
154
+ result = default_structured_output_mode_for_model_provider(
155
+ model_name="gpt_4_1_nano",
156
+ provider=first_provider.name,
157
+ )
158
+ assert result == first_provider.structured_output_mode
159
+
160
+
161
+ def test_uncensored():
162
+ """Test that uncensored is set correctly"""
163
+ model = get_model_by_name(ModelName.grok_3_mini)
164
+ for provider in model.providers:
165
+ assert provider.uncensored
166
+ assert not provider.suggested_for_uncensored_data_gen
167
+
168
+ model = get_model_by_name(ModelName.gpt_4_1_nano)
169
+ for provider in model.providers:
170
+ assert not provider.uncensored
171
+ assert not provider.suggested_for_uncensored_data_gen
172
+
173
+ model = get_model_by_name(ModelName.grok_4)
174
+ for provider in model.providers:
175
+ assert provider.uncensored
176
+ assert provider.suggested_for_uncensored_data_gen
@@ -13,11 +13,8 @@ from kiln_ai.adapters.model_adapters.litellm_adapter import (
13
13
  LiteLlmConfig,
14
14
  )
15
15
  from kiln_ai.adapters.ollama_tools import ollama_online
16
- from kiln_ai.adapters.prompt_builders import (
17
- BasePromptBuilder,
18
- SimpleChainOfThoughtPromptBuilder,
19
- )
20
16
  from kiln_ai.datamodel import PromptId
17
+ from kiln_ai.datamodel.task import RunConfigProperties
21
18
 
22
19
 
23
20
  def get_all_models_and_providers():
@@ -124,8 +121,12 @@ async def test_mock_returning_run(tmp_path):
124
121
 
125
122
  adapter = LiteLlmAdapter(
126
123
  config=LiteLlmConfig(
127
- model_name="custom_model",
128
- provider_name="ollama",
124
+ run_config_properties=RunConfigProperties(
125
+ model_name="custom_model",
126
+ model_provider_name="ollama",
127
+ prompt_id="simple_prompt_builder",
128
+ structured_output_mode="json_schema",
129
+ ),
129
130
  base_url="http://localhost:11434",
130
131
  additional_body_options={"api_key": "test_key"},
131
132
  ),
@@ -145,6 +146,9 @@ async def test_mock_returning_run(tmp_path):
145
146
  "model_name": "custom_model",
146
147
  "model_provider": "ollama",
147
148
  "prompt_id": "simple_prompt_builder",
149
+ "structured_output_mode": "json_schema",
150
+ "temperature": 1.0,
151
+ "top_p": 1.0,
148
152
  }
149
153
 
150
154
 
@@ -212,7 +216,13 @@ async def run_simple_task(
212
216
  prompt_id: PromptId | None = None,
213
217
  ) -> datamodel.TaskRun:
214
218
  adapter = adapter_for_task(
215
- task, model_name=model_name, provider=provider, prompt_id=prompt_id
219
+ task,
220
+ RunConfigProperties(
221
+ structured_output_mode="json_schema",
222
+ model_name=model_name,
223
+ model_provider_name=provider,
224
+ prompt_id=prompt_id or "simple_prompt_builder",
225
+ ),
216
226
  )
217
227
 
218
228
  run = await adapter.invoke(
@@ -27,7 +27,6 @@ from kiln_ai.datamodel import (
27
27
  DataSource,
28
28
  DataSourceType,
29
29
  Finetune,
30
- FinetuneDataStrategy,
31
30
  Project,
32
31
  Prompt,
33
32
  Task,
@@ -36,6 +35,7 @@ from kiln_ai.datamodel import (
36
35
  TaskRun,
37
36
  Usage,
38
37
  )
38
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
39
39
  from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
40
40
 
41
41
  logger = logging.getLogger(__name__)
@@ -54,9 +54,6 @@ def test_simple_prompt_builder(tmp_path):
54
54
  assert "1) " + task.requirements[0].instruction in prompt
55
55
  assert "2) " + task.requirements[1].instruction in prompt
56
56
  assert "3) " + task.requirements[2].instruction in prompt
57
-
58
- user_msg = builder.build_user_message(input)
59
- assert input in user_msg
60
57
  assert input not in prompt
61
58
 
62
59
 
@@ -93,20 +90,9 @@ def test_simple_prompt_builder_structured_output(tmp_path):
93
90
  input = "Cows"
94
91
  prompt = builder.build_prompt(include_json_instructions=False)
95
92
  assert "You are an assistant which tells a joke, given a subject." in prompt
96
-
97
- user_msg = builder.build_user_message(input)
98
- assert input in user_msg
99
93
  assert input not in prompt
100
94
 
101
95
 
102
- def test_simple_prompt_builder_structured_input_non_ascii(tmp_path):
103
- task = build_structured_output_test_task(tmp_path)
104
- builder = SimplePromptBuilder(task=task)
105
- input = {"key": "你好👋"}
106
- user_msg = builder.build_user_message(input)
107
- assert "你好👋" in user_msg
108
-
109
-
110
96
  @pytest.fixture
111
97
  def task_with_examples(tmp_path):
112
98
  # Create a project and task hierarchy
@@ -404,7 +390,7 @@ def test_prompt_builder_from_id(task_with_examples):
404
390
  base_model_id="test_base_model_id",
405
391
  dataset_split_id="asdf",
406
392
  provider="test_provider",
407
- data_strategy=FinetuneDataStrategy.final_and_intermediate,
393
+ data_strategy=ChatStrategy.two_message_cot,
408
394
  )
409
395
  finetune.save_to_file()
410
396
  nested_fine_tune_id = (
@@ -619,6 +605,7 @@ def test_task_run_config_prompt_builder(tmp_path):
619
605
  model_name="gpt-4",
620
606
  model_provider_name="openai",
621
607
  prompt_id="simple_prompt_builder",
608
+ structured_output_mode="json_schema",
622
609
  ),
623
610
  prompt=Prompt(
624
611
  name="test prompt name",
@@ -18,7 +18,7 @@ from kiln_ai.adapters.provider_tools import (
18
18
  finetune_provider_model,
19
19
  get_model_and_provider,
20
20
  kiln_model_provider_from,
21
- lite_llm_config,
21
+ lite_llm_config_for_openai_compatible,
22
22
  lite_llm_provider_model,
23
23
  parse_custom_model_id,
24
24
  provider_enabled,
@@ -27,10 +27,11 @@ from kiln_ai.adapters.provider_tools import (
27
27
  )
28
28
  from kiln_ai.datamodel import (
29
29
  Finetune,
30
- FinetuneDataStrategy,
31
30
  StructuredOutputMode,
32
31
  Task,
33
32
  )
33
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
34
+ from kiln_ai.datamodel.task import RunConfigProperties
34
35
 
35
36
 
36
37
  @pytest.fixture(autouse=True)
@@ -71,7 +72,7 @@ def mock_finetune():
71
72
  finetune.provider = ModelProviderName.openai
72
73
  finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
73
74
  finetune.structured_output_mode = StructuredOutputMode.json_schema
74
- finetune.data_strategy = FinetuneDataStrategy.final_only
75
+ finetune.data_strategy = ChatStrategy.single_turn
75
76
  mock.return_value = finetune
76
77
  yield mock
77
78
 
@@ -83,7 +84,7 @@ def mock_finetune_final_and_intermediate():
83
84
  finetune.provider = ModelProviderName.openai
84
85
  finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
85
86
  finetune.structured_output_mode = StructuredOutputMode.json_schema
86
- finetune.data_strategy = FinetuneDataStrategy.final_and_intermediate
87
+ finetune.data_strategy = ChatStrategy.two_message_cot
87
88
  mock.return_value = finetune
88
89
  yield mock
89
90
 
@@ -95,9 +96,7 @@ def mock_finetune_r1_compatible():
95
96
  finetune.provider = ModelProviderName.ollama
96
97
  finetune.fine_tune_model_id = "ft:deepseek-r1:671b:custom:model-123"
97
98
  finetune.structured_output_mode = StructuredOutputMode.json_schema
98
- finetune.data_strategy = (
99
- FinetuneDataStrategy.final_and_intermediate_r1_compatible
100
- )
99
+ finetune.data_strategy = ChatStrategy.single_turn_r1_thinking
101
100
  mock.return_value = finetune
102
101
  yield mock
103
102
 
@@ -357,6 +356,7 @@ async def test_kiln_model_provider_from_custom_model_valid(mock_config):
357
356
  assert provider.supports_data_gen is False
358
357
  assert provider.untested_model is True
359
358
  assert provider.model_id == "custom_model"
359
+ assert provider.structured_output_mode == StructuredOutputMode.json_instructions
360
360
 
361
361
 
362
362
  @pytest.mark.asyncio
@@ -374,6 +374,7 @@ async def test_kiln_model_provider_from_custom_registry(mock_config):
374
374
  assert provider.supports_data_gen is False
375
375
  assert provider.untested_model is True
376
376
  assert provider.model_id == "gpt-4-turbo"
377
+ assert provider.structured_output_mode == StructuredOutputMode.json_instructions
377
378
 
378
379
 
379
380
  @pytest.mark.asyncio
@@ -474,7 +475,7 @@ def test_finetune_provider_model_success_final_and_intermediate(
474
475
  assert provider.name == ModelProviderName.openai
475
476
  assert provider.model_id == "ft:gpt-3.5-turbo:custom:model-123"
476
477
  assert provider.structured_output_mode == StructuredOutputMode.json_schema
477
- assert provider.reasoning_capable is True
478
+ assert provider.reasoning_capable is False
478
479
  assert provider.parser == None
479
480
 
480
481
 
@@ -580,7 +581,7 @@ def test_finetune_provider_model_structured_mode(
580
581
  finetune.provider = provider_name
581
582
  finetune.fine_tune_model_id = "fireworks-model-123"
582
583
  finetune.structured_output_mode = structured_output_mode
583
- finetune.data_strategy = FinetuneDataStrategy.final_only
584
+ finetune.data_strategy = ChatStrategy.single_turn
584
585
  mock_finetune.return_value = finetune
585
586
 
586
587
  provider = finetune_provider_model("project-123::task-456::finetune-789")
@@ -596,10 +597,20 @@ def test_openai_compatible_provider_config(mock_shared_config):
596
597
  """Test successful creation of an OpenAI compatible provider"""
597
598
  model_id = "test_provider::gpt-4"
598
599
 
599
- config = lite_llm_config(model_id)
600
+ config = lite_llm_config_for_openai_compatible(
601
+ RunConfigProperties(
602
+ model_name=model_id,
603
+ model_provider_name=ModelProviderName.openai_compatible,
604
+ prompt_id="simple_prompt_builder",
605
+ structured_output_mode="json_schema",
606
+ )
607
+ )
600
608
 
601
- assert config.provider_name == ModelProviderName.openai_compatible
602
- assert config.model_name == "gpt-4"
609
+ assert (
610
+ config.run_config_properties.model_provider_name
611
+ == ModelProviderName.openai_compatible
612
+ )
613
+ assert config.run_config_properties.model_name == "gpt-4"
603
614
  assert config.additional_body_options == {"api_key": "test-key"}
604
615
  assert config.base_url == "https://api.test.com"
605
616
 
@@ -621,10 +632,20 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
621
632
  """Test provider creation without API key (should work as some providers don't require it, but should pass NA to LiteLLM as it requires one)"""
622
633
  model_id = "no_key_provider::gpt-4"
623
634
 
624
- config = lite_llm_config(model_id)
635
+ config = lite_llm_config_for_openai_compatible(
636
+ RunConfigProperties(
637
+ model_name=model_id,
638
+ model_provider_name=ModelProviderName.openai,
639
+ prompt_id="simple_prompt_builder",
640
+ structured_output_mode="json_schema",
641
+ )
642
+ )
625
643
 
626
- assert config.provider_name == ModelProviderName.openai_compatible
627
- assert config.model_name == "gpt-4"
644
+ assert (
645
+ config.run_config_properties.model_provider_name
646
+ == ModelProviderName.openai_compatible
647
+ )
648
+ assert config.run_config_properties.model_name == "gpt-4"
628
649
  assert config.additional_body_options == {"api_key": "NA"}
629
650
  assert config.base_url == "https://api.nokey.com"
630
651
 
@@ -632,7 +653,14 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
632
653
  def test_lite_llm_config_invalid_id():
633
654
  """Test handling of invalid model ID format"""
634
655
  with pytest.raises(ValueError) as exc_info:
635
- lite_llm_config("invalid-id-format")
656
+ lite_llm_config_for_openai_compatible(
657
+ RunConfigProperties(
658
+ model_name="invalid-id-format",
659
+ model_provider_name=ModelProviderName.openai_compatible,
660
+ prompt_id="simple_prompt_builder",
661
+ structured_output_mode="json_schema",
662
+ )
663
+ )
636
664
  assert (
637
665
  str(exc_info.value) == "Invalid openai compatible model ID: invalid-id-format"
638
666
  )
@@ -643,14 +671,28 @@ def test_lite_llm_config_no_providers(mock_shared_config):
643
671
  mock_shared_config.return_value.openai_compatible_providers = None
644
672
 
645
673
  with pytest.raises(ValueError) as exc_info:
646
- lite_llm_config("test_provider::gpt-4")
674
+ lite_llm_config_for_openai_compatible(
675
+ RunConfigProperties(
676
+ model_name="test_provider::gpt-4",
677
+ model_provider_name=ModelProviderName.openai_compatible,
678
+ prompt_id="simple_prompt_builder",
679
+ structured_output_mode="json_schema",
680
+ )
681
+ )
647
682
  assert str(exc_info.value) == "OpenAI compatible provider test_provider not found"
648
683
 
649
684
 
650
685
  def test_lite_llm_config_provider_not_found(mock_shared_config):
651
686
  """Test handling of non-existent provider"""
652
687
  with pytest.raises(ValueError) as exc_info:
653
- lite_llm_config("unknown_provider::gpt-4")
688
+ lite_llm_config_for_openai_compatible(
689
+ RunConfigProperties(
690
+ model_name="unknown_provider::gpt-4",
691
+ model_provider_name=ModelProviderName.openai_compatible,
692
+ prompt_id="simple_prompt_builder",
693
+ structured_output_mode="json_schema",
694
+ )
695
+ )
654
696
  assert (
655
697
  str(exc_info.value) == "OpenAI compatible provider unknown_provider not found"
656
698
  )
@@ -666,7 +708,14 @@ def test_lite_llm_config_no_base_url(mock_shared_config):
666
708
  ]
667
709
 
668
710
  with pytest.raises(ValueError) as exc_info:
669
- lite_llm_config("test_provider::gpt-4")
711
+ lite_llm_config_for_openai_compatible(
712
+ RunConfigProperties(
713
+ model_name="test_provider::gpt-4",
714
+ model_provider_name=ModelProviderName.openai_compatible,
715
+ prompt_id="simple_prompt_builder",
716
+ structured_output_mode="json_schema",
717
+ )
718
+ )
670
719
  assert (
671
720
  str(exc_info.value)
672
721
  == "OpenAI compatible provider test_provider has no base URL"
@@ -867,7 +916,7 @@ def test_finetune_provider_model_vertex_ai(mock_project, mock_task, mock_finetun
867
916
  finetune.provider = ModelProviderName.vertex
868
917
  finetune.fine_tune_model_id = "projects/123/locations/us-central1/endpoints/456"
869
918
  finetune.structured_output_mode = StructuredOutputMode.json_mode
870
- finetune.data_strategy = FinetuneDataStrategy.final_only
919
+ finetune.data_strategy = ChatStrategy.single_turn
871
920
  mock_finetune.return_value = finetune
872
921
 
873
922
  provider = finetune_provider_model("project-123::task-456::finetune-789")
@@ -0,0 +1,100 @@
1
+ import asyncio
2
+ import os
3
+ from unittest.mock import patch
4
+
5
+ import pytest
6
+
7
+ from kiln_ai.adapters.ml_model_list import built_in_models
8
+ from kiln_ai.adapters.remote_config import (
9
+ deserialize_config,
10
+ dump_builtin_config,
11
+ load_from_url,
12
+ load_remote_models,
13
+ serialize_config,
14
+ )
15
+
16
+
17
+ def test_round_trip(tmp_path):
18
+ path = tmp_path / "models.json"
19
+ serialize_config(built_in_models, path)
20
+ loaded = deserialize_config(path)
21
+ assert [m.model_dump(mode="json") for m in loaded] == [
22
+ m.model_dump(mode="json") for m in built_in_models
23
+ ]
24
+
25
+
26
+ def test_load_from_url():
27
+ sample = [built_in_models[0].model_dump(mode="json")]
28
+
29
+ class FakeResponse:
30
+ def raise_for_status(self):
31
+ pass
32
+
33
+ def json(self):
34
+ return {"model_list": sample}
35
+
36
+ with patch(
37
+ "kiln_ai.adapters.remote_config.requests.get", return_value=FakeResponse()
38
+ ):
39
+ models = load_from_url("http://example.com/models.json")
40
+ assert [m.model_dump(mode="json") for m in models] == sample
41
+
42
+
43
+ def test_dump_builtin_config(tmp_path):
44
+ path = tmp_path / "out.json"
45
+ dump_builtin_config(path)
46
+ loaded = deserialize_config(path)
47
+ assert [m.model_dump(mode="json") for m in loaded] == [
48
+ m.model_dump(mode="json") for m in built_in_models
49
+ ]
50
+
51
+
52
+ @pytest.mark.asyncio
53
+ async def test_load_remote_models_success(monkeypatch):
54
+ del os.environ["KILN_SKIP_REMOTE_MODEL_LIST"]
55
+ original = built_in_models.copy()
56
+ sample_models = [built_in_models[0]]
57
+
58
+ def fake_fetch(url):
59
+ return sample_models
60
+
61
+ monkeypatch.setattr("kiln_ai.adapters.remote_config.load_from_url", fake_fetch)
62
+
63
+ load_remote_models("http://example.com/models.json")
64
+ await asyncio.sleep(0.01)
65
+ assert built_in_models == sample_models
66
+ built_in_models[:] = original
67
+
68
+
69
+ @pytest.mark.asyncio
70
+ async def test_load_remote_models_failure(monkeypatch):
71
+ original = built_in_models.copy()
72
+
73
+ def fake_fetch(url):
74
+ raise RuntimeError("fail")
75
+
76
+ monkeypatch.setattr("kiln_ai.adapters.remote_config.load_from_url", fake_fetch)
77
+
78
+ load_remote_models("http://example.com/models.json")
79
+ await asyncio.sleep(0.01)
80
+ assert built_in_models == original
81
+
82
+
83
+ def test_deserialize_config_with_extra_keys(tmp_path):
84
+ # Take a valid model and add an extra key, ensure it is ignored and still loads
85
+ import json
86
+
87
+ from kiln_ai.adapters.ml_model_list import built_in_models
88
+
89
+ model_dict = built_in_models[0].model_dump(mode="json")
90
+ model_dict["extra_key"] = "should be ignored or error"
91
+ model_dict["providers"][0]["extra_key"] = "should be ignored or error"
92
+ data = {"model_list": [model_dict]}
93
+ path = tmp_path / "extra.json"
94
+ path.write_text(json.dumps(data))
95
+ # Should NOT raise, and extra key should be ignored
96
+ models = deserialize_config(path)
97
+ assert hasattr(models[0], "family")
98
+ assert not hasattr(models[0], "extra_key")
99
+ assert hasattr(models[0], "providers")
100
+ assert not hasattr(models[0].providers[0], "extra_key")
@@ -13,7 +13,6 @@ from __future__ import annotations
13
13
 
14
14
  from kiln_ai.datamodel import dataset_split, eval, strict_mode
15
15
  from kiln_ai.datamodel.datamodel_enums import (
16
- FinetuneDataStrategy,
17
16
  FineTuneStatusType,
18
17
  Priority,
19
18
  StructuredOutputMode,
@@ -71,7 +70,6 @@ __all__ = [
71
70
  "Prompt",
72
71
  "TaskOutputRating",
73
72
  "StructuredOutputMode",
74
- "FinetuneDataStrategy",
75
73
  "PromptId",
76
74
  "PromptGenerators",
77
75
  "prompt_generator_values",