kiln-ai 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (54) hide show
  1. kiln_ai/adapters/__init__.py +2 -0
  2. kiln_ai/adapters/adapter_registry.py +22 -44
  3. kiln_ai/adapters/chat/__init__.py +8 -0
  4. kiln_ai/adapters/chat/chat_formatter.py +234 -0
  5. kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
  6. kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
  7. kiln_ai/adapters/eval/base_eval.py +8 -6
  8. kiln_ai/adapters/eval/eval_runner.py +4 -1
  9. kiln_ai/adapters/eval/g_eval.py +23 -5
  10. kiln_ai/adapters/eval/test_base_eval.py +166 -15
  11. kiln_ai/adapters/eval/test_eval_runner.py +3 -0
  12. kiln_ai/adapters/eval/test_g_eval.py +1 -0
  13. kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
  14. kiln_ai/adapters/fine_tune/dataset_formatter.py +138 -272
  15. kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
  16. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
  17. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
  18. kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
  19. kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
  20. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
  21. kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
  22. kiln_ai/adapters/ml_model_list.py +80 -43
  23. kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
  24. kiln_ai/adapters/model_adapters/litellm_adapter.py +79 -97
  25. kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
  26. kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -60
  27. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +56 -21
  28. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
  29. kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
  30. kiln_ai/adapters/prompt_builders.py +0 -16
  31. kiln_ai/adapters/provider_tools.py +27 -9
  32. kiln_ai/adapters/repair/test_repair_task.py +24 -3
  33. kiln_ai/adapters/test_adapter_registry.py +88 -28
  34. kiln_ai/adapters/test_ml_model_list.py +158 -0
  35. kiln_ai/adapters/test_prompt_adaptors.py +17 -3
  36. kiln_ai/adapters/test_prompt_builders.py +3 -16
  37. kiln_ai/adapters/test_provider_tools.py +69 -20
  38. kiln_ai/datamodel/__init__.py +0 -2
  39. kiln_ai/datamodel/datamodel_enums.py +38 -13
  40. kiln_ai/datamodel/finetune.py +12 -7
  41. kiln_ai/datamodel/task.py +68 -7
  42. kiln_ai/datamodel/test_basemodel.py +2 -1
  43. kiln_ai/datamodel/test_dataset_split.py +0 -8
  44. kiln_ai/datamodel/test_models.py +33 -10
  45. kiln_ai/datamodel/test_task.py +168 -2
  46. kiln_ai/utils/config.py +3 -2
  47. kiln_ai/utils/dataset_import.py +1 -1
  48. kiln_ai/utils/logging.py +165 -0
  49. kiln_ai/utils/test_config.py +23 -0
  50. kiln_ai/utils/test_dataset_import.py +30 -0
  51. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
  52. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/RECORD +54 -49
  53. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
  54. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -7,8 +7,8 @@ from kiln_ai.adapters.adapter_registry import adapter_for_task
7
7
  from kiln_ai.adapters.ml_model_list import ModelProviderName
8
8
  from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
9
9
  from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
10
- from kiln_ai.adapters.prompt_builders import BasePromptBuilder
11
10
  from kiln_ai.adapters.provider_tools import kiln_model_provider_from
11
+ from kiln_ai.datamodel.task import RunConfigProperties
12
12
 
13
13
 
14
14
  @pytest.fixture
@@ -35,18 +35,28 @@ def mock_finetune_from_id():
35
35
  with patch("kiln_ai.adapters.provider_tools.finetune_from_id") as mock:
36
36
  mock.return_value.provider = ModelProviderName.openai
37
37
  mock.return_value.fine_tune_model_id = "test-model"
38
+ mock.return_value.data_strategy = "final_only"
38
39
  yield mock
39
40
 
40
41
 
41
42
  def test_openai_adapter_creation(mock_config, basic_task):
42
43
  adapter = adapter_for_task(
43
- kiln_task=basic_task, model_name="gpt-4", provider=ModelProviderName.openai
44
+ kiln_task=basic_task,
45
+ run_config_properties=RunConfigProperties(
46
+ model_name="gpt-4",
47
+ model_provider_name=ModelProviderName.openai,
48
+ prompt_id="simple_prompt_builder",
49
+ structured_output_mode="json_schema",
50
+ ),
44
51
  )
45
52
 
46
53
  assert isinstance(adapter, LiteLlmAdapter)
47
- assert adapter.config.model_name == "gpt-4"
54
+ assert adapter.config.run_config_properties.model_name == "gpt-4"
48
55
  assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
49
- assert adapter.config.provider_name == ModelProviderName.openai
56
+ assert (
57
+ adapter.config.run_config_properties.model_provider_name
58
+ == ModelProviderName.openai
59
+ )
50
60
  assert adapter.config.base_url is None # OpenAI url is default
51
61
  assert adapter.config.default_headers is None
52
62
 
@@ -54,14 +64,21 @@ def test_openai_adapter_creation(mock_config, basic_task):
54
64
  def test_openrouter_adapter_creation(mock_config, basic_task):
55
65
  adapter = adapter_for_task(
56
66
  kiln_task=basic_task,
57
- model_name="anthropic/claude-3-opus",
58
- provider=ModelProviderName.openrouter,
67
+ run_config_properties=RunConfigProperties(
68
+ model_name="anthropic/claude-3-opus",
69
+ model_provider_name=ModelProviderName.openrouter,
70
+ prompt_id="simple_prompt_builder",
71
+ structured_output_mode="json_schema",
72
+ ),
59
73
  )
60
74
 
61
75
  assert isinstance(adapter, LiteLlmAdapter)
62
- assert adapter.config.model_name == "anthropic/claude-3-opus"
76
+ assert adapter.config.run_config_properties.model_name == "anthropic/claude-3-opus"
63
77
  assert adapter.config.additional_body_options == {"api_key": "test-openrouter-key"}
64
- assert adapter.config.provider_name == ModelProviderName.openrouter
78
+ assert (
79
+ adapter.config.run_config_properties.model_provider_name
80
+ == ModelProviderName.openrouter
81
+ )
65
82
  assert adapter.config.default_headers == {
66
83
  "HTTP-Referer": "https://getkiln.ai/openrouter",
67
84
  "X-Title": "KilnAI",
@@ -79,7 +96,13 @@ def test_openrouter_adapter_creation(mock_config, basic_task):
79
96
  )
80
97
  def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
81
98
  adapter = adapter_for_task(
82
- kiln_task=basic_task, model_name="test-model", provider=provider
99
+ kiln_task=basic_task,
100
+ run_config_properties=RunConfigProperties(
101
+ model_name="test-model",
102
+ model_provider_name=provider,
103
+ prompt_id="simple_prompt_builder",
104
+ structured_output_mode="json_schema",
105
+ ),
83
106
  )
84
107
 
85
108
  assert isinstance(adapter, LiteLlmAdapter)
@@ -90,9 +113,12 @@ def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
90
113
  def test_custom_prompt_builder(mock_config, basic_task):
91
114
  adapter = adapter_for_task(
92
115
  kiln_task=basic_task,
93
- model_name="gpt-4",
94
- provider=ModelProviderName.openai,
95
- prompt_id="simple_chain_of_thought_prompt_builder",
116
+ run_config_properties=RunConfigProperties(
117
+ model_name="gpt-4",
118
+ model_provider_name=ModelProviderName.openai,
119
+ prompt_id="simple_chain_of_thought_prompt_builder",
120
+ structured_output_mode="json_schema",
121
+ ),
96
122
  )
97
123
 
98
124
  assert adapter.run_config.prompt_id == "simple_chain_of_thought_prompt_builder"
@@ -103,8 +129,12 @@ def test_tags_passed_through(mock_config, basic_task):
103
129
  tags = ["test-tag-1", "test-tag-2"]
104
130
  adapter = adapter_for_task(
105
131
  kiln_task=basic_task,
106
- model_name="gpt-4",
107
- provider=ModelProviderName.openai,
132
+ run_config_properties=RunConfigProperties(
133
+ model_name="gpt-4",
134
+ model_provider_name=ModelProviderName.openai,
135
+ prompt_id="simple_prompt_builder",
136
+ structured_output_mode="json_schema",
137
+ ),
108
138
  base_adapter_config=AdapterConfig(
109
139
  default_tags=tags,
110
140
  ),
@@ -114,13 +144,19 @@ def test_tags_passed_through(mock_config, basic_task):
114
144
 
115
145
 
116
146
  def test_invalid_provider(mock_config, basic_task):
117
- with pytest.raises(ValueError, match="Unhandled enum value"):
147
+ with pytest.raises(ValueError, match="Input should be"):
118
148
  adapter_for_task(
119
- kiln_task=basic_task, model_name="test-model", provider="invalid"
149
+ kiln_task=basic_task,
150
+ run_config_properties=RunConfigProperties(
151
+ model_name="test-model",
152
+ model_provider_name="invalid",
153
+ prompt_id="simple_prompt_builder",
154
+ structured_output_mode="json_schema",
155
+ ),
120
156
  )
121
157
 
122
158
 
123
- @patch("kiln_ai.adapters.adapter_registry.lite_llm_config")
159
+ @patch("kiln_ai.adapters.adapter_registry.lite_llm_config_for_openai_compatible")
124
160
  def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_task):
125
161
  mock_compatible_config.return_value.model_name = "test-model"
126
162
  mock_compatible_config.return_value.additional_body_options = {
@@ -128,44 +164,68 @@ def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_ta
128
164
  }
129
165
  mock_compatible_config.return_value.base_url = "https://test.com/v1"
130
166
  mock_compatible_config.return_value.provider_name = "CustomProvider99"
167
+ mock_compatible_config.return_value.run_config_properties = RunConfigProperties(
168
+ model_name="provider::test-model",
169
+ model_provider_name=ModelProviderName.openai_compatible,
170
+ prompt_id="simple_prompt_builder",
171
+ structured_output_mode="json_schema",
172
+ )
131
173
 
132
174
  adapter = adapter_for_task(
133
175
  kiln_task=basic_task,
134
- model_name="provider::test-model",
135
- provider=ModelProviderName.openai_compatible,
176
+ run_config_properties=RunConfigProperties(
177
+ model_name="provider::test-model",
178
+ model_provider_name=ModelProviderName.openai_compatible,
179
+ prompt_id="simple_prompt_builder",
180
+ structured_output_mode="json_schema",
181
+ ),
136
182
  )
137
183
 
138
184
  assert isinstance(adapter, LiteLlmAdapter)
139
- mock_compatible_config.assert_called_once_with("provider::test-model")
185
+ mock_compatible_config.assert_called_once()
140
186
  assert adapter.config == mock_compatible_config.return_value
141
187
 
142
188
 
143
189
  def test_custom_openai_compatible_provider(mock_config, basic_task):
144
190
  adapter = adapter_for_task(
145
191
  kiln_task=basic_task,
146
- model_name="openai::test-model",
147
- provider=ModelProviderName.kiln_custom_registry,
192
+ run_config_properties=RunConfigProperties(
193
+ model_name="openai::test-model",
194
+ model_provider_name=ModelProviderName.kiln_custom_registry,
195
+ prompt_id="simple_prompt_builder",
196
+ structured_output_mode="json_schema",
197
+ ),
148
198
  )
149
199
 
150
200
  assert isinstance(adapter, LiteLlmAdapter)
151
- assert adapter.config.model_name == "openai::test-model"
201
+ assert adapter.config.run_config_properties.model_name == "openai::test-model"
152
202
  assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
153
203
  assert adapter.config.base_url is None # openai is none
154
- assert adapter.config.provider_name == ModelProviderName.kiln_custom_registry
204
+ assert (
205
+ adapter.config.run_config_properties.model_provider_name
206
+ == ModelProviderName.kiln_custom_registry
207
+ )
155
208
 
156
209
 
157
210
  async def test_fine_tune_provider(mock_config, basic_task, mock_finetune_from_id):
158
211
  adapter = adapter_for_task(
159
212
  kiln_task=basic_task,
160
- model_name="proj::task::tune",
161
- provider=ModelProviderName.kiln_fine_tune,
213
+ run_config_properties=RunConfigProperties(
214
+ model_name="proj::task::tune",
215
+ model_provider_name=ModelProviderName.kiln_fine_tune,
216
+ prompt_id="simple_prompt_builder",
217
+ structured_output_mode="json_schema",
218
+ ),
162
219
  )
163
220
 
164
221
  mock_finetune_from_id.assert_called_once_with("proj::task::tune")
165
222
  assert isinstance(adapter, LiteLlmAdapter)
166
- assert adapter.config.provider_name == ModelProviderName.kiln_fine_tune
223
+ assert (
224
+ adapter.config.run_config_properties.model_provider_name
225
+ == ModelProviderName.kiln_fine_tune
226
+ )
167
227
  # Kiln model name here, but the underlying openai model id below
168
- assert adapter.config.model_name == "proj::task::tune"
228
+ assert adapter.config.run_config_properties.model_name == "proj::task::tune"
169
229
 
170
230
  provider = kiln_model_provider_from(
171
231
  "proj::task::tune", provider_name=ModelProviderName.kiln_fine_tune
@@ -0,0 +1,158 @@
1
+ import pytest
2
+
3
+ from kiln_ai.adapters.ml_model_list import (
4
+ ModelName,
5
+ default_structured_output_mode_for_model_provider,
6
+ get_model_by_name,
7
+ )
8
+ from kiln_ai.datamodel.datamodel_enums import ModelProviderName, StructuredOutputMode
9
+
10
+
11
+ class TestDefaultStructuredOutputModeForModelProvider:
12
+ """Test cases for default_structured_output_mode_for_model_provider function"""
13
+
14
+ def test_valid_model_and_provider_returns_provider_mode(self):
15
+ """Test that valid model and provider returns the provider's structured output mode"""
16
+ # GPT 4.1 has OpenAI provider with json_schema mode
17
+ result = default_structured_output_mode_for_model_provider(
18
+ model_name="gpt_4_1",
19
+ provider=ModelProviderName.openai,
20
+ )
21
+ assert result == StructuredOutputMode.json_schema
22
+
23
+ def test_valid_model_different_provider_modes(self):
24
+ """Test that different providers for the same model return different modes"""
25
+ # Claude 3.5 Sonnet has different modes for different providers
26
+ # Anthropic provider uses function_calling
27
+ result_anthropic = default_structured_output_mode_for_model_provider(
28
+ model_name="claude_3_5_sonnet",
29
+ provider=ModelProviderName.anthropic,
30
+ )
31
+ assert result_anthropic == StructuredOutputMode.function_calling
32
+
33
+ # Vertex provider uses function_calling_weak
34
+ result_vertex = default_structured_output_mode_for_model_provider(
35
+ model_name="claude_3_5_sonnet",
36
+ provider=ModelProviderName.vertex,
37
+ )
38
+ assert result_vertex == StructuredOutputMode.function_calling_weak
39
+
40
+ def test_invalid_model_name_returns_default(self):
41
+ """Test that invalid model name returns the default value"""
42
+ result = default_structured_output_mode_for_model_provider(
43
+ model_name="invalid_model_name",
44
+ provider=ModelProviderName.openai,
45
+ )
46
+ assert result == StructuredOutputMode.default
47
+
48
+ def test_invalid_model_name_returns_custom_default(self):
49
+ """Test that invalid model name returns custom default when specified"""
50
+ custom_default = StructuredOutputMode.json_instructions
51
+ result = default_structured_output_mode_for_model_provider(
52
+ model_name="invalid_model_name",
53
+ provider=ModelProviderName.openai,
54
+ default=custom_default,
55
+ )
56
+ assert result == custom_default
57
+
58
+ def test_valid_model_invalid_provider_returns_default(self):
59
+ """Test that valid model but invalid provider returns default"""
60
+ result = default_structured_output_mode_for_model_provider(
61
+ model_name="gpt_4_1",
62
+ provider=ModelProviderName.gemini_api, # GPT 4.1 doesn't have gemini_api provider
63
+ )
64
+ assert result == StructuredOutputMode.default
65
+
66
+ def test_disallowed_modes_returns_default(self):
67
+ """Test that when provider's mode is in disallowed_modes, returns default"""
68
+ # GPT 4.1 OpenAI provider uses json_schema, but we disallow it
69
+ result = default_structured_output_mode_for_model_provider(
70
+ model_name="gpt_4_1",
71
+ provider=ModelProviderName.openai,
72
+ disallowed_modes=[StructuredOutputMode.json_schema],
73
+ )
74
+ assert result == StructuredOutputMode.default
75
+
76
+ def test_disallowed_modes_with_custom_default(self):
77
+ """Test disallowed modes with custom default value"""
78
+ custom_default = StructuredOutputMode.json_instructions
79
+ result = default_structured_output_mode_for_model_provider(
80
+ model_name="gpt_4_1",
81
+ provider=ModelProviderName.openai,
82
+ default=custom_default,
83
+ disallowed_modes=[StructuredOutputMode.json_schema],
84
+ )
85
+ assert result == custom_default
86
+
87
+ def test_empty_disallowed_modes_list(self):
88
+ """Test that empty disallowed_modes list works correctly"""
89
+ result = default_structured_output_mode_for_model_provider(
90
+ model_name="gpt_4_1",
91
+ provider=ModelProviderName.openai,
92
+ disallowed_modes=[],
93
+ )
94
+ assert result == StructuredOutputMode.json_schema
95
+
96
+ def test_multiple_disallowed_modes(self):
97
+ """Test with multiple disallowed modes"""
98
+ result = default_structured_output_mode_for_model_provider(
99
+ model_name="gpt_4_1",
100
+ provider=ModelProviderName.openai,
101
+ disallowed_modes=[
102
+ StructuredOutputMode.json_schema,
103
+ StructuredOutputMode.function_calling,
104
+ ],
105
+ )
106
+ assert result == StructuredOutputMode.default
107
+
108
+ def test_reasoning_model_with_different_providers(self):
109
+ """Test reasoning models that have different structured output modes"""
110
+ # DeepSeek R1 uses json_instructions for reasoning
111
+ result = default_structured_output_mode_for_model_provider(
112
+ model_name="deepseek_r1",
113
+ provider=ModelProviderName.openrouter,
114
+ )
115
+ assert result == StructuredOutputMode.json_instructions
116
+
117
+ @pytest.mark.parametrize(
118
+ "model_name,provider,expected_mode",
119
+ [
120
+ ("gpt_4o", ModelProviderName.openai, StructuredOutputMode.json_schema),
121
+ (
122
+ "claude_3_5_haiku",
123
+ ModelProviderName.anthropic,
124
+ StructuredOutputMode.function_calling,
125
+ ),
126
+ (
127
+ "gemini_2_5_pro",
128
+ ModelProviderName.gemini_api,
129
+ StructuredOutputMode.json_schema,
130
+ ),
131
+ ("llama_3_1_8b", ModelProviderName.groq, StructuredOutputMode.default),
132
+ (
133
+ "qwq_32b",
134
+ ModelProviderName.fireworks_ai,
135
+ StructuredOutputMode.json_instructions,
136
+ ),
137
+ ],
138
+ )
139
+ def test_parametrized_valid_combinations(self, model_name, provider, expected_mode):
140
+ """Test multiple valid model/provider combinations"""
141
+ result = default_structured_output_mode_for_model_provider(
142
+ model_name=model_name,
143
+ provider=provider,
144
+ )
145
+ assert result == expected_mode
146
+
147
+ def test_model_with_single_provider(self):
148
+ """Test model that only has one provider"""
149
+ # Find a model with only one provider for this test
150
+ model = get_model_by_name(ModelName.gpt_4_1_nano)
151
+ assert len(model.providers) >= 1 # Verify it has providers
152
+
153
+ first_provider = model.providers[0]
154
+ result = default_structured_output_mode_for_model_provider(
155
+ model_name="gpt_4_1_nano",
156
+ provider=first_provider.name,
157
+ )
158
+ assert result == first_provider.structured_output_mode
@@ -18,6 +18,7 @@ from kiln_ai.adapters.prompt_builders import (
18
18
  SimpleChainOfThoughtPromptBuilder,
19
19
  )
20
20
  from kiln_ai.datamodel import PromptId
21
+ from kiln_ai.datamodel.task import RunConfigProperties
21
22
 
22
23
 
23
24
  def get_all_models_and_providers():
@@ -124,8 +125,12 @@ async def test_mock_returning_run(tmp_path):
124
125
 
125
126
  adapter = LiteLlmAdapter(
126
127
  config=LiteLlmConfig(
127
- model_name="custom_model",
128
- provider_name="ollama",
128
+ run_config_properties=RunConfigProperties(
129
+ model_name="custom_model",
130
+ model_provider_name="ollama",
131
+ prompt_id="simple_prompt_builder",
132
+ structured_output_mode="json_schema",
133
+ ),
129
134
  base_url="http://localhost:11434",
130
135
  additional_body_options={"api_key": "test_key"},
131
136
  ),
@@ -145,6 +150,9 @@ async def test_mock_returning_run(tmp_path):
145
150
  "model_name": "custom_model",
146
151
  "model_provider": "ollama",
147
152
  "prompt_id": "simple_prompt_builder",
153
+ "structured_output_mode": "json_schema",
154
+ "temperature": 1.0,
155
+ "top_p": 1.0,
148
156
  }
149
157
 
150
158
 
@@ -212,7 +220,13 @@ async def run_simple_task(
212
220
  prompt_id: PromptId | None = None,
213
221
  ) -> datamodel.TaskRun:
214
222
  adapter = adapter_for_task(
215
- task, model_name=model_name, provider=provider, prompt_id=prompt_id
223
+ task,
224
+ RunConfigProperties(
225
+ structured_output_mode="json_schema",
226
+ model_name=model_name,
227
+ model_provider_name=provider,
228
+ prompt_id=prompt_id or "simple_prompt_builder",
229
+ ),
216
230
  )
217
231
 
218
232
  run = await adapter.invoke(
@@ -27,7 +27,6 @@ from kiln_ai.datamodel import (
27
27
  DataSource,
28
28
  DataSourceType,
29
29
  Finetune,
30
- FinetuneDataStrategy,
31
30
  Project,
32
31
  Prompt,
33
32
  Task,
@@ -36,6 +35,7 @@ from kiln_ai.datamodel import (
36
35
  TaskRun,
37
36
  Usage,
38
37
  )
38
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
39
39
  from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
40
40
 
41
41
  logger = logging.getLogger(__name__)
@@ -54,9 +54,6 @@ def test_simple_prompt_builder(tmp_path):
54
54
  assert "1) " + task.requirements[0].instruction in prompt
55
55
  assert "2) " + task.requirements[1].instruction in prompt
56
56
  assert "3) " + task.requirements[2].instruction in prompt
57
-
58
- user_msg = builder.build_user_message(input)
59
- assert input in user_msg
60
57
  assert input not in prompt
61
58
 
62
59
 
@@ -93,20 +90,9 @@ def test_simple_prompt_builder_structured_output(tmp_path):
93
90
  input = "Cows"
94
91
  prompt = builder.build_prompt(include_json_instructions=False)
95
92
  assert "You are an assistant which tells a joke, given a subject." in prompt
96
-
97
- user_msg = builder.build_user_message(input)
98
- assert input in user_msg
99
93
  assert input not in prompt
100
94
 
101
95
 
102
- def test_simple_prompt_builder_structured_input_non_ascii(tmp_path):
103
- task = build_structured_output_test_task(tmp_path)
104
- builder = SimplePromptBuilder(task=task)
105
- input = {"key": "你好👋"}
106
- user_msg = builder.build_user_message(input)
107
- assert "你好👋" in user_msg
108
-
109
-
110
96
  @pytest.fixture
111
97
  def task_with_examples(tmp_path):
112
98
  # Create a project and task hierarchy
@@ -404,7 +390,7 @@ def test_prompt_builder_from_id(task_with_examples):
404
390
  base_model_id="test_base_model_id",
405
391
  dataset_split_id="asdf",
406
392
  provider="test_provider",
407
- data_strategy=FinetuneDataStrategy.final_and_intermediate,
393
+ data_strategy=ChatStrategy.two_message_cot,
408
394
  )
409
395
  finetune.save_to_file()
410
396
  nested_fine_tune_id = (
@@ -619,6 +605,7 @@ def test_task_run_config_prompt_builder(tmp_path):
619
605
  model_name="gpt-4",
620
606
  model_provider_name="openai",
621
607
  prompt_id="simple_prompt_builder",
608
+ structured_output_mode="json_schema",
622
609
  ),
623
610
  prompt=Prompt(
624
611
  name="test prompt name",