kiln-ai 0.15.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (72) hide show
  1. kiln_ai/adapters/__init__.py +2 -0
  2. kiln_ai/adapters/adapter_registry.py +22 -44
  3. kiln_ai/adapters/chat/__init__.py +8 -0
  4. kiln_ai/adapters/chat/chat_formatter.py +234 -0
  5. kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
  6. kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
  7. kiln_ai/adapters/eval/base_eval.py +8 -6
  8. kiln_ai/adapters/eval/eval_runner.py +9 -65
  9. kiln_ai/adapters/eval/g_eval.py +26 -8
  10. kiln_ai/adapters/eval/test_base_eval.py +166 -15
  11. kiln_ai/adapters/eval/test_eval_runner.py +3 -0
  12. kiln_ai/adapters/eval/test_g_eval.py +1 -0
  13. kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
  14. kiln_ai/adapters/fine_tune/dataset_formatter.py +153 -197
  15. kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
  16. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +402 -211
  17. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
  18. kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
  19. kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
  20. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
  21. kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
  22. kiln_ai/adapters/ml_model_list.py +556 -45
  23. kiln_ai/adapters/model_adapters/base_adapter.py +100 -35
  24. kiln_ai/adapters/model_adapters/litellm_adapter.py +116 -100
  25. kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
  26. kiln_ai/adapters/model_adapters/test_base_adapter.py +299 -52
  27. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +121 -22
  28. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +44 -2
  29. kiln_ai/adapters/model_adapters/test_structured_output.py +48 -18
  30. kiln_ai/adapters/parsers/base_parser.py +0 -3
  31. kiln_ai/adapters/parsers/parser_registry.py +5 -3
  32. kiln_ai/adapters/parsers/r1_parser.py +17 -2
  33. kiln_ai/adapters/parsers/request_formatters.py +40 -0
  34. kiln_ai/adapters/parsers/test_parser_registry.py +2 -2
  35. kiln_ai/adapters/parsers/test_r1_parser.py +44 -1
  36. kiln_ai/adapters/parsers/test_request_formatters.py +76 -0
  37. kiln_ai/adapters/prompt_builders.py +14 -17
  38. kiln_ai/adapters/provider_tools.py +39 -4
  39. kiln_ai/adapters/repair/test_repair_task.py +27 -5
  40. kiln_ai/adapters/test_adapter_registry.py +88 -28
  41. kiln_ai/adapters/test_ml_model_list.py +158 -0
  42. kiln_ai/adapters/test_prompt_adaptors.py +17 -3
  43. kiln_ai/adapters/test_prompt_builders.py +27 -19
  44. kiln_ai/adapters/test_provider_tools.py +130 -12
  45. kiln_ai/datamodel/__init__.py +2 -2
  46. kiln_ai/datamodel/datamodel_enums.py +43 -4
  47. kiln_ai/datamodel/dataset_filters.py +69 -1
  48. kiln_ai/datamodel/dataset_split.py +4 -0
  49. kiln_ai/datamodel/eval.py +8 -0
  50. kiln_ai/datamodel/finetune.py +13 -7
  51. kiln_ai/datamodel/prompt_id.py +1 -0
  52. kiln_ai/datamodel/task.py +68 -7
  53. kiln_ai/datamodel/task_output.py +1 -1
  54. kiln_ai/datamodel/task_run.py +39 -7
  55. kiln_ai/datamodel/test_basemodel.py +5 -8
  56. kiln_ai/datamodel/test_dataset_filters.py +82 -0
  57. kiln_ai/datamodel/test_dataset_split.py +2 -8
  58. kiln_ai/datamodel/test_example_models.py +54 -0
  59. kiln_ai/datamodel/test_models.py +80 -9
  60. kiln_ai/datamodel/test_task.py +168 -2
  61. kiln_ai/utils/async_job_runner.py +106 -0
  62. kiln_ai/utils/config.py +3 -2
  63. kiln_ai/utils/dataset_import.py +81 -19
  64. kiln_ai/utils/logging.py +165 -0
  65. kiln_ai/utils/test_async_job_runner.py +199 -0
  66. kiln_ai/utils/test_config.py +23 -0
  67. kiln_ai/utils/test_dataset_import.py +272 -10
  68. {kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
  69. kiln_ai-0.17.0.dist-info/RECORD +113 -0
  70. kiln_ai-0.15.0.dist-info/RECORD +0 -104
  71. {kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
  72. {kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -21,6 +21,7 @@ from kiln_ai.datamodel import (
21
21
  TaskRequirement,
22
22
  TaskRun,
23
23
  )
24
+ from kiln_ai.datamodel.task import RunConfigProperties
24
25
 
25
26
  json_joke_schema = """{
26
27
  "type": "object",
@@ -189,7 +190,15 @@ async def test_live_run(sample_task, sample_task_run, sample_repair_data):
189
190
  repair_task_input = RepairTaskRun.build_repair_task_input(**sample_repair_data)
190
191
  assert isinstance(repair_task_input, RepairTaskInput)
191
192
 
192
- adapter = adapter_for_task(repair_task, model_name="llama_3_1_8b", provider="groq")
193
+ adapter = adapter_for_task(
194
+ repair_task,
195
+ RunConfigProperties(
196
+ model_name="llama_3_1_8b",
197
+ model_provider_name="groq",
198
+ prompt_id="simple_prompt_builder",
199
+ structured_output_mode="default",
200
+ ),
201
+ )
193
202
 
194
203
  run = await adapter.invoke(repair_task_input.model_dump())
195
204
  assert run is not None
@@ -198,10 +207,13 @@ async def test_live_run(sample_task, sample_task_run, sample_repair_data):
198
207
  assert "setup" in parsed_output
199
208
  assert "punchline" in parsed_output
200
209
  assert run.output.source.properties == {
201
- "adapter_name": "kiln_langchain_adapter",
210
+ "adapter_name": "kiln_openai_compatible_adapter",
202
211
  "model_name": "llama_3_1_8b",
203
212
  "model_provider": "groq",
204
213
  "prompt_id": "simple_prompt_builder",
214
+ "structured_output_mode": "default",
215
+ "temperature": 1.0,
216
+ "top_p": 1.0,
205
217
  }
206
218
 
207
219
 
@@ -218,12 +230,19 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
218
230
  }
219
231
 
220
232
  with patch.object(LiteLlmAdapter, "_run", new_callable=AsyncMock) as mock_run:
221
- mock_run.return_value = RunOutput(
222
- output=mocked_output, intermediate_outputs=None
233
+ mock_run.return_value = (
234
+ RunOutput(output=mocked_output, intermediate_outputs=None),
235
+ None,
223
236
  )
224
237
 
225
238
  adapter = adapter_for_task(
226
- repair_task, model_name="llama_3_1_8b", provider="ollama"
239
+ repair_task,
240
+ RunConfigProperties(
241
+ model_name="llama_3_1_8b",
242
+ model_provider_name="ollama",
243
+ prompt_id="simple_prompt_builder",
244
+ structured_output_mode="json_schema",
245
+ ),
227
246
  )
228
247
 
229
248
  run = await adapter.invoke(repair_task_input.model_dump())
@@ -239,6 +258,9 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
239
258
  "model_name": "llama_3_1_8b",
240
259
  "model_provider": "ollama",
241
260
  "prompt_id": "simple_prompt_builder",
261
+ "structured_output_mode": "json_schema",
262
+ "temperature": 1.0,
263
+ "top_p": 1.0,
242
264
  }
243
265
  assert run.input_source.type == DataSourceType.human
244
266
  assert "created_by" in run.input_source.properties
@@ -7,8 +7,8 @@ from kiln_ai.adapters.adapter_registry import adapter_for_task
7
7
  from kiln_ai.adapters.ml_model_list import ModelProviderName
8
8
  from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
9
9
  from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
10
- from kiln_ai.adapters.prompt_builders import BasePromptBuilder
11
10
  from kiln_ai.adapters.provider_tools import kiln_model_provider_from
11
+ from kiln_ai.datamodel.task import RunConfigProperties
12
12
 
13
13
 
14
14
  @pytest.fixture
@@ -35,18 +35,28 @@ def mock_finetune_from_id():
35
35
  with patch("kiln_ai.adapters.provider_tools.finetune_from_id") as mock:
36
36
  mock.return_value.provider = ModelProviderName.openai
37
37
  mock.return_value.fine_tune_model_id = "test-model"
38
+ mock.return_value.data_strategy = "final_only"
38
39
  yield mock
39
40
 
40
41
 
41
42
  def test_openai_adapter_creation(mock_config, basic_task):
42
43
  adapter = adapter_for_task(
43
- kiln_task=basic_task, model_name="gpt-4", provider=ModelProviderName.openai
44
+ kiln_task=basic_task,
45
+ run_config_properties=RunConfigProperties(
46
+ model_name="gpt-4",
47
+ model_provider_name=ModelProviderName.openai,
48
+ prompt_id="simple_prompt_builder",
49
+ structured_output_mode="json_schema",
50
+ ),
44
51
  )
45
52
 
46
53
  assert isinstance(adapter, LiteLlmAdapter)
47
- assert adapter.config.model_name == "gpt-4"
54
+ assert adapter.config.run_config_properties.model_name == "gpt-4"
48
55
  assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
49
- assert adapter.config.provider_name == ModelProviderName.openai
56
+ assert (
57
+ adapter.config.run_config_properties.model_provider_name
58
+ == ModelProviderName.openai
59
+ )
50
60
  assert adapter.config.base_url is None # OpenAI url is default
51
61
  assert adapter.config.default_headers is None
52
62
 
@@ -54,14 +64,21 @@ def test_openai_adapter_creation(mock_config, basic_task):
54
64
  def test_openrouter_adapter_creation(mock_config, basic_task):
55
65
  adapter = adapter_for_task(
56
66
  kiln_task=basic_task,
57
- model_name="anthropic/claude-3-opus",
58
- provider=ModelProviderName.openrouter,
67
+ run_config_properties=RunConfigProperties(
68
+ model_name="anthropic/claude-3-opus",
69
+ model_provider_name=ModelProviderName.openrouter,
70
+ prompt_id="simple_prompt_builder",
71
+ structured_output_mode="json_schema",
72
+ ),
59
73
  )
60
74
 
61
75
  assert isinstance(adapter, LiteLlmAdapter)
62
- assert adapter.config.model_name == "anthropic/claude-3-opus"
76
+ assert adapter.config.run_config_properties.model_name == "anthropic/claude-3-opus"
63
77
  assert adapter.config.additional_body_options == {"api_key": "test-openrouter-key"}
64
- assert adapter.config.provider_name == ModelProviderName.openrouter
78
+ assert (
79
+ adapter.config.run_config_properties.model_provider_name
80
+ == ModelProviderName.openrouter
81
+ )
65
82
  assert adapter.config.default_headers == {
66
83
  "HTTP-Referer": "https://getkiln.ai/openrouter",
67
84
  "X-Title": "KilnAI",
@@ -79,7 +96,13 @@ def test_openrouter_adapter_creation(mock_config, basic_task):
79
96
  )
80
97
  def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
81
98
  adapter = adapter_for_task(
82
- kiln_task=basic_task, model_name="test-model", provider=provider
99
+ kiln_task=basic_task,
100
+ run_config_properties=RunConfigProperties(
101
+ model_name="test-model",
102
+ model_provider_name=provider,
103
+ prompt_id="simple_prompt_builder",
104
+ structured_output_mode="json_schema",
105
+ ),
83
106
  )
84
107
 
85
108
  assert isinstance(adapter, LiteLlmAdapter)
@@ -90,9 +113,12 @@ def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
90
113
  def test_custom_prompt_builder(mock_config, basic_task):
91
114
  adapter = adapter_for_task(
92
115
  kiln_task=basic_task,
93
- model_name="gpt-4",
94
- provider=ModelProviderName.openai,
95
- prompt_id="simple_chain_of_thought_prompt_builder",
116
+ run_config_properties=RunConfigProperties(
117
+ model_name="gpt-4",
118
+ model_provider_name=ModelProviderName.openai,
119
+ prompt_id="simple_chain_of_thought_prompt_builder",
120
+ structured_output_mode="json_schema",
121
+ ),
96
122
  )
97
123
 
98
124
  assert adapter.run_config.prompt_id == "simple_chain_of_thought_prompt_builder"
@@ -103,8 +129,12 @@ def test_tags_passed_through(mock_config, basic_task):
103
129
  tags = ["test-tag-1", "test-tag-2"]
104
130
  adapter = adapter_for_task(
105
131
  kiln_task=basic_task,
106
- model_name="gpt-4",
107
- provider=ModelProviderName.openai,
132
+ run_config_properties=RunConfigProperties(
133
+ model_name="gpt-4",
134
+ model_provider_name=ModelProviderName.openai,
135
+ prompt_id="simple_prompt_builder",
136
+ structured_output_mode="json_schema",
137
+ ),
108
138
  base_adapter_config=AdapterConfig(
109
139
  default_tags=tags,
110
140
  ),
@@ -114,13 +144,19 @@ def test_tags_passed_through(mock_config, basic_task):
114
144
 
115
145
 
116
146
  def test_invalid_provider(mock_config, basic_task):
117
- with pytest.raises(ValueError, match="Unhandled enum value"):
147
+ with pytest.raises(ValueError, match="Input should be"):
118
148
  adapter_for_task(
119
- kiln_task=basic_task, model_name="test-model", provider="invalid"
149
+ kiln_task=basic_task,
150
+ run_config_properties=RunConfigProperties(
151
+ model_name="test-model",
152
+ model_provider_name="invalid",
153
+ prompt_id="simple_prompt_builder",
154
+ structured_output_mode="json_schema",
155
+ ),
120
156
  )
121
157
 
122
158
 
123
- @patch("kiln_ai.adapters.adapter_registry.lite_llm_config")
159
+ @patch("kiln_ai.adapters.adapter_registry.lite_llm_config_for_openai_compatible")
124
160
  def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_task):
125
161
  mock_compatible_config.return_value.model_name = "test-model"
126
162
  mock_compatible_config.return_value.additional_body_options = {
@@ -128,44 +164,68 @@ def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_ta
128
164
  }
129
165
  mock_compatible_config.return_value.base_url = "https://test.com/v1"
130
166
  mock_compatible_config.return_value.provider_name = "CustomProvider99"
167
+ mock_compatible_config.return_value.run_config_properties = RunConfigProperties(
168
+ model_name="provider::test-model",
169
+ model_provider_name=ModelProviderName.openai_compatible,
170
+ prompt_id="simple_prompt_builder",
171
+ structured_output_mode="json_schema",
172
+ )
131
173
 
132
174
  adapter = adapter_for_task(
133
175
  kiln_task=basic_task,
134
- model_name="provider::test-model",
135
- provider=ModelProviderName.openai_compatible,
176
+ run_config_properties=RunConfigProperties(
177
+ model_name="provider::test-model",
178
+ model_provider_name=ModelProviderName.openai_compatible,
179
+ prompt_id="simple_prompt_builder",
180
+ structured_output_mode="json_schema",
181
+ ),
136
182
  )
137
183
 
138
184
  assert isinstance(adapter, LiteLlmAdapter)
139
- mock_compatible_config.assert_called_once_with("provider::test-model")
185
+ mock_compatible_config.assert_called_once()
140
186
  assert adapter.config == mock_compatible_config.return_value
141
187
 
142
188
 
143
189
  def test_custom_openai_compatible_provider(mock_config, basic_task):
144
190
  adapter = adapter_for_task(
145
191
  kiln_task=basic_task,
146
- model_name="openai::test-model",
147
- provider=ModelProviderName.kiln_custom_registry,
192
+ run_config_properties=RunConfigProperties(
193
+ model_name="openai::test-model",
194
+ model_provider_name=ModelProviderName.kiln_custom_registry,
195
+ prompt_id="simple_prompt_builder",
196
+ structured_output_mode="json_schema",
197
+ ),
148
198
  )
149
199
 
150
200
  assert isinstance(adapter, LiteLlmAdapter)
151
- assert adapter.config.model_name == "openai::test-model"
201
+ assert adapter.config.run_config_properties.model_name == "openai::test-model"
152
202
  assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
153
203
  assert adapter.config.base_url is None # openai is none
154
- assert adapter.config.provider_name == ModelProviderName.kiln_custom_registry
204
+ assert (
205
+ adapter.config.run_config_properties.model_provider_name
206
+ == ModelProviderName.kiln_custom_registry
207
+ )
155
208
 
156
209
 
157
210
  async def test_fine_tune_provider(mock_config, basic_task, mock_finetune_from_id):
158
211
  adapter = adapter_for_task(
159
212
  kiln_task=basic_task,
160
- model_name="proj::task::tune",
161
- provider=ModelProviderName.kiln_fine_tune,
213
+ run_config_properties=RunConfigProperties(
214
+ model_name="proj::task::tune",
215
+ model_provider_name=ModelProviderName.kiln_fine_tune,
216
+ prompt_id="simple_prompt_builder",
217
+ structured_output_mode="json_schema",
218
+ ),
162
219
  )
163
220
 
164
221
  mock_finetune_from_id.assert_called_once_with("proj::task::tune")
165
222
  assert isinstance(adapter, LiteLlmAdapter)
166
- assert adapter.config.provider_name == ModelProviderName.kiln_fine_tune
223
+ assert (
224
+ adapter.config.run_config_properties.model_provider_name
225
+ == ModelProviderName.kiln_fine_tune
226
+ )
167
227
  # Kiln model name here, but the underlying openai model id below
168
- assert adapter.config.model_name == "proj::task::tune"
228
+ assert adapter.config.run_config_properties.model_name == "proj::task::tune"
169
229
 
170
230
  provider = kiln_model_provider_from(
171
231
  "proj::task::tune", provider_name=ModelProviderName.kiln_fine_tune
@@ -0,0 +1,158 @@
1
+ import pytest
2
+
3
+ from kiln_ai.adapters.ml_model_list import (
4
+ ModelName,
5
+ default_structured_output_mode_for_model_provider,
6
+ get_model_by_name,
7
+ )
8
+ from kiln_ai.datamodel.datamodel_enums import ModelProviderName, StructuredOutputMode
9
+
10
+
11
+ class TestDefaultStructuredOutputModeForModelProvider:
12
+ """Test cases for default_structured_output_mode_for_model_provider function"""
13
+
14
+ def test_valid_model_and_provider_returns_provider_mode(self):
15
+ """Test that valid model and provider returns the provider's structured output mode"""
16
+ # GPT 4.1 has OpenAI provider with json_schema mode
17
+ result = default_structured_output_mode_for_model_provider(
18
+ model_name="gpt_4_1",
19
+ provider=ModelProviderName.openai,
20
+ )
21
+ assert result == StructuredOutputMode.json_schema
22
+
23
+ def test_valid_model_different_provider_modes(self):
24
+ """Test that different providers for the same model return different modes"""
25
+ # Claude 3.5 Sonnet has different modes for different providers
26
+ # Anthropic provider uses function_calling
27
+ result_anthropic = default_structured_output_mode_for_model_provider(
28
+ model_name="claude_3_5_sonnet",
29
+ provider=ModelProviderName.anthropic,
30
+ )
31
+ assert result_anthropic == StructuredOutputMode.function_calling
32
+
33
+ # Vertex provider uses function_calling_weak
34
+ result_vertex = default_structured_output_mode_for_model_provider(
35
+ model_name="claude_3_5_sonnet",
36
+ provider=ModelProviderName.vertex,
37
+ )
38
+ assert result_vertex == StructuredOutputMode.function_calling_weak
39
+
40
+ def test_invalid_model_name_returns_default(self):
41
+ """Test that invalid model name returns the default value"""
42
+ result = default_structured_output_mode_for_model_provider(
43
+ model_name="invalid_model_name",
44
+ provider=ModelProviderName.openai,
45
+ )
46
+ assert result == StructuredOutputMode.default
47
+
48
+ def test_invalid_model_name_returns_custom_default(self):
49
+ """Test that invalid model name returns custom default when specified"""
50
+ custom_default = StructuredOutputMode.json_instructions
51
+ result = default_structured_output_mode_for_model_provider(
52
+ model_name="invalid_model_name",
53
+ provider=ModelProviderName.openai,
54
+ default=custom_default,
55
+ )
56
+ assert result == custom_default
57
+
58
+ def test_valid_model_invalid_provider_returns_default(self):
59
+ """Test that valid model but invalid provider returns default"""
60
+ result = default_structured_output_mode_for_model_provider(
61
+ model_name="gpt_4_1",
62
+ provider=ModelProviderName.gemini_api, # GPT 4.1 doesn't have gemini_api provider
63
+ )
64
+ assert result == StructuredOutputMode.default
65
+
66
+ def test_disallowed_modes_returns_default(self):
67
+ """Test that when provider's mode is in disallowed_modes, returns default"""
68
+ # GPT 4.1 OpenAI provider uses json_schema, but we disallow it
69
+ result = default_structured_output_mode_for_model_provider(
70
+ model_name="gpt_4_1",
71
+ provider=ModelProviderName.openai,
72
+ disallowed_modes=[StructuredOutputMode.json_schema],
73
+ )
74
+ assert result == StructuredOutputMode.default
75
+
76
+ def test_disallowed_modes_with_custom_default(self):
77
+ """Test disallowed modes with custom default value"""
78
+ custom_default = StructuredOutputMode.json_instructions
79
+ result = default_structured_output_mode_for_model_provider(
80
+ model_name="gpt_4_1",
81
+ provider=ModelProviderName.openai,
82
+ default=custom_default,
83
+ disallowed_modes=[StructuredOutputMode.json_schema],
84
+ )
85
+ assert result == custom_default
86
+
87
+ def test_empty_disallowed_modes_list(self):
88
+ """Test that empty disallowed_modes list works correctly"""
89
+ result = default_structured_output_mode_for_model_provider(
90
+ model_name="gpt_4_1",
91
+ provider=ModelProviderName.openai,
92
+ disallowed_modes=[],
93
+ )
94
+ assert result == StructuredOutputMode.json_schema
95
+
96
+ def test_multiple_disallowed_modes(self):
97
+ """Test with multiple disallowed modes"""
98
+ result = default_structured_output_mode_for_model_provider(
99
+ model_name="gpt_4_1",
100
+ provider=ModelProviderName.openai,
101
+ disallowed_modes=[
102
+ StructuredOutputMode.json_schema,
103
+ StructuredOutputMode.function_calling,
104
+ ],
105
+ )
106
+ assert result == StructuredOutputMode.default
107
+
108
+ def test_reasoning_model_with_different_providers(self):
109
+ """Test reasoning models that have different structured output modes"""
110
+ # DeepSeek R1 uses json_instructions for reasoning
111
+ result = default_structured_output_mode_for_model_provider(
112
+ model_name="deepseek_r1",
113
+ provider=ModelProviderName.openrouter,
114
+ )
115
+ assert result == StructuredOutputMode.json_instructions
116
+
117
+ @pytest.mark.parametrize(
118
+ "model_name,provider,expected_mode",
119
+ [
120
+ ("gpt_4o", ModelProviderName.openai, StructuredOutputMode.json_schema),
121
+ (
122
+ "claude_3_5_haiku",
123
+ ModelProviderName.anthropic,
124
+ StructuredOutputMode.function_calling,
125
+ ),
126
+ (
127
+ "gemini_2_5_pro",
128
+ ModelProviderName.gemini_api,
129
+ StructuredOutputMode.json_schema,
130
+ ),
131
+ ("llama_3_1_8b", ModelProviderName.groq, StructuredOutputMode.default),
132
+ (
133
+ "qwq_32b",
134
+ ModelProviderName.fireworks_ai,
135
+ StructuredOutputMode.json_instructions,
136
+ ),
137
+ ],
138
+ )
139
+ def test_parametrized_valid_combinations(self, model_name, provider, expected_mode):
140
+ """Test multiple valid model/provider combinations"""
141
+ result = default_structured_output_mode_for_model_provider(
142
+ model_name=model_name,
143
+ provider=provider,
144
+ )
145
+ assert result == expected_mode
146
+
147
+ def test_model_with_single_provider(self):
148
+ """Test model that only has one provider"""
149
+ # Find a model with only one provider for this test
150
+ model = get_model_by_name(ModelName.gpt_4_1_nano)
151
+ assert len(model.providers) >= 1 # Verify it has providers
152
+
153
+ first_provider = model.providers[0]
154
+ result = default_structured_output_mode_for_model_provider(
155
+ model_name="gpt_4_1_nano",
156
+ provider=first_provider.name,
157
+ )
158
+ assert result == first_provider.structured_output_mode
@@ -18,6 +18,7 @@ from kiln_ai.adapters.prompt_builders import (
18
18
  SimpleChainOfThoughtPromptBuilder,
19
19
  )
20
20
  from kiln_ai.datamodel import PromptId
21
+ from kiln_ai.datamodel.task import RunConfigProperties
21
22
 
22
23
 
23
24
  def get_all_models_and_providers():
@@ -124,8 +125,12 @@ async def test_mock_returning_run(tmp_path):
124
125
 
125
126
  adapter = LiteLlmAdapter(
126
127
  config=LiteLlmConfig(
127
- model_name="custom_model",
128
- provider_name="ollama",
128
+ run_config_properties=RunConfigProperties(
129
+ model_name="custom_model",
130
+ model_provider_name="ollama",
131
+ prompt_id="simple_prompt_builder",
132
+ structured_output_mode="json_schema",
133
+ ),
129
134
  base_url="http://localhost:11434",
130
135
  additional_body_options={"api_key": "test_key"},
131
136
  ),
@@ -145,6 +150,9 @@ async def test_mock_returning_run(tmp_path):
145
150
  "model_name": "custom_model",
146
151
  "model_provider": "ollama",
147
152
  "prompt_id": "simple_prompt_builder",
153
+ "structured_output_mode": "json_schema",
154
+ "temperature": 1.0,
155
+ "top_p": 1.0,
148
156
  }
149
157
 
150
158
 
@@ -212,7 +220,13 @@ async def run_simple_task(
212
220
  prompt_id: PromptId | None = None,
213
221
  ) -> datamodel.TaskRun:
214
222
  adapter = adapter_for_task(
215
- task, model_name=model_name, provider=provider, prompt_id=prompt_id
223
+ task,
224
+ RunConfigProperties(
225
+ structured_output_mode="json_schema",
226
+ model_name=model_name,
227
+ model_provider_name=provider,
228
+ prompt_id=prompt_id or "simple_prompt_builder",
229
+ ),
216
230
  )
217
231
 
218
232
  run = await adapter.invoke(
@@ -3,7 +3,7 @@ import logging
3
3
 
4
4
  import pytest
5
5
 
6
- from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
6
+ from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter, RunOutput
7
7
  from kiln_ai.adapters.model_adapters.test_structured_output import (
8
8
  build_structured_output_test_task,
9
9
  )
@@ -15,6 +15,7 @@ from kiln_ai.adapters.prompt_builders import (
15
15
  MultiShotPromptBuilder,
16
16
  RepairsPromptBuilder,
17
17
  SavedPromptBuilder,
18
+ ShortPromptBuilder,
18
19
  SimpleChainOfThoughtPromptBuilder,
19
20
  SimplePromptBuilder,
20
21
  TaskRunConfigPromptBuilder,
@@ -26,14 +27,15 @@ from kiln_ai.datamodel import (
26
27
  DataSource,
27
28
  DataSourceType,
28
29
  Finetune,
29
- FinetuneDataStrategy,
30
30
  Project,
31
31
  Prompt,
32
32
  Task,
33
33
  TaskOutput,
34
34
  TaskOutputRating,
35
35
  TaskRun,
36
+ Usage,
36
37
  )
38
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
37
39
  from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
38
40
 
39
41
  logger = logging.getLogger(__name__)
@@ -52,15 +54,31 @@ def test_simple_prompt_builder(tmp_path):
52
54
  assert "1) " + task.requirements[0].instruction in prompt
53
55
  assert "2) " + task.requirements[1].instruction in prompt
54
56
  assert "3) " + task.requirements[2].instruction in prompt
55
-
56
- user_msg = builder.build_user_message(input)
57
- assert input in user_msg
58
57
  assert input not in prompt
59
58
 
60
59
 
60
+ def test_short_prompt_builder(tmp_path):
61
+ task = build_test_task(tmp_path)
62
+ builder = ShortPromptBuilder(task=task)
63
+ prompt = builder.build_prompt(include_json_instructions=False)
64
+
65
+ # Should only include the instruction, not requirements
66
+ assert task.instruction == prompt
67
+ assert task.requirements[0].instruction not in prompt
68
+ assert task.requirements[1].instruction not in prompt
69
+ assert task.requirements[2].instruction not in prompt
70
+
71
+ # Should handle JSON instructions correctly
72
+ prompt_with_json = builder.build_prompt(include_json_instructions=True)
73
+ assert task.instruction in prompt_with_json
74
+ if task.output_schema():
75
+ assert "# Format Instructions" in prompt_with_json
76
+ assert task.output_schema() in prompt_with_json
77
+
78
+
61
79
  class MockAdapter(BaseAdapter):
62
- def _run(self, input: str) -> str:
63
- return "mock response"
80
+ async def _run(self, input: str) -> tuple[RunOutput, Usage | None]:
81
+ return RunOutput(output="mock response", intermediate_outputs=None), None
64
82
 
65
83
  def adapter_name(self) -> str:
66
84
  return "mock_adapter"
@@ -72,20 +90,9 @@ def test_simple_prompt_builder_structured_output(tmp_path):
72
90
  input = "Cows"
73
91
  prompt = builder.build_prompt(include_json_instructions=False)
74
92
  assert "You are an assistant which tells a joke, given a subject." in prompt
75
-
76
- user_msg = builder.build_user_message(input)
77
- assert input in user_msg
78
93
  assert input not in prompt
79
94
 
80
95
 
81
- def test_simple_prompt_builder_structured_input_non_ascii(tmp_path):
82
- task = build_structured_output_test_task(tmp_path)
83
- builder = SimplePromptBuilder(task=task)
84
- input = {"key": "你好👋"}
85
- user_msg = builder.build_user_message(input)
86
- assert "你好👋" in user_msg
87
-
88
-
89
96
  @pytest.fixture
90
97
  def task_with_examples(tmp_path):
91
98
  # Create a project and task hierarchy
@@ -383,7 +390,7 @@ def test_prompt_builder_from_id(task_with_examples):
383
390
  base_model_id="test_base_model_id",
384
391
  dataset_split_id="asdf",
385
392
  provider="test_provider",
386
- data_strategy=FinetuneDataStrategy.final_and_intermediate,
393
+ data_strategy=ChatStrategy.two_message_cot,
387
394
  )
388
395
  finetune.save_to_file()
389
396
  nested_fine_tune_id = (
@@ -598,6 +605,7 @@ def test_task_run_config_prompt_builder(tmp_path):
598
605
  model_name="gpt-4",
599
606
  model_provider_name="openai",
600
607
  prompt_id="simple_prompt_builder",
608
+ structured_output_mode="json_schema",
601
609
  ),
602
610
  prompt=Prompt(
603
611
  name="test prompt name",