kiln-ai 0.15.0__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +2 -0
- kiln_ai/adapters/adapter_registry.py +22 -44
- kiln_ai/adapters/chat/__init__.py +8 -0
- kiln_ai/adapters/chat/chat_formatter.py +234 -0
- kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
- kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
- kiln_ai/adapters/eval/base_eval.py +8 -6
- kiln_ai/adapters/eval/eval_runner.py +9 -65
- kiln_ai/adapters/eval/g_eval.py +26 -8
- kiln_ai/adapters/eval/test_base_eval.py +166 -15
- kiln_ai/adapters/eval/test_eval_runner.py +3 -0
- kiln_ai/adapters/eval/test_g_eval.py +1 -0
- kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
- kiln_ai/adapters/fine_tune/dataset_formatter.py +153 -197
- kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +402 -211
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
- kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
- kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
- kiln_ai/adapters/ml_model_list.py +556 -45
- kiln_ai/adapters/model_adapters/base_adapter.py +100 -35
- kiln_ai/adapters/model_adapters/litellm_adapter.py +116 -100
- kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
- kiln_ai/adapters/model_adapters/test_base_adapter.py +299 -52
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +121 -22
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +44 -2
- kiln_ai/adapters/model_adapters/test_structured_output.py +48 -18
- kiln_ai/adapters/parsers/base_parser.py +0 -3
- kiln_ai/adapters/parsers/parser_registry.py +5 -3
- kiln_ai/adapters/parsers/r1_parser.py +17 -2
- kiln_ai/adapters/parsers/request_formatters.py +40 -0
- kiln_ai/adapters/parsers/test_parser_registry.py +2 -2
- kiln_ai/adapters/parsers/test_r1_parser.py +44 -1
- kiln_ai/adapters/parsers/test_request_formatters.py +76 -0
- kiln_ai/adapters/prompt_builders.py +14 -17
- kiln_ai/adapters/provider_tools.py +39 -4
- kiln_ai/adapters/repair/test_repair_task.py +27 -5
- kiln_ai/adapters/test_adapter_registry.py +88 -28
- kiln_ai/adapters/test_ml_model_list.py +158 -0
- kiln_ai/adapters/test_prompt_adaptors.py +17 -3
- kiln_ai/adapters/test_prompt_builders.py +27 -19
- kiln_ai/adapters/test_provider_tools.py +130 -12
- kiln_ai/datamodel/__init__.py +2 -2
- kiln_ai/datamodel/datamodel_enums.py +43 -4
- kiln_ai/datamodel/dataset_filters.py +69 -1
- kiln_ai/datamodel/dataset_split.py +4 -0
- kiln_ai/datamodel/eval.py +8 -0
- kiln_ai/datamodel/finetune.py +13 -7
- kiln_ai/datamodel/prompt_id.py +1 -0
- kiln_ai/datamodel/task.py +68 -7
- kiln_ai/datamodel/task_output.py +1 -1
- kiln_ai/datamodel/task_run.py +39 -7
- kiln_ai/datamodel/test_basemodel.py +5 -8
- kiln_ai/datamodel/test_dataset_filters.py +82 -0
- kiln_ai/datamodel/test_dataset_split.py +2 -8
- kiln_ai/datamodel/test_example_models.py +54 -0
- kiln_ai/datamodel/test_models.py +80 -9
- kiln_ai/datamodel/test_task.py +168 -2
- kiln_ai/utils/async_job_runner.py +106 -0
- kiln_ai/utils/config.py +3 -2
- kiln_ai/utils/dataset_import.py +81 -19
- kiln_ai/utils/logging.py +165 -0
- kiln_ai/utils/test_async_job_runner.py +199 -0
- kiln_ai/utils/test_config.py +23 -0
- kiln_ai/utils/test_dataset_import.py +272 -10
- {kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
- kiln_ai-0.17.0.dist-info/RECORD +113 -0
- kiln_ai-0.15.0.dist-info/RECORD +0 -104
- {kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.15.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -21,6 +21,7 @@ from kiln_ai.datamodel import (
|
|
|
21
21
|
TaskRequirement,
|
|
22
22
|
TaskRun,
|
|
23
23
|
)
|
|
24
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
24
25
|
|
|
25
26
|
json_joke_schema = """{
|
|
26
27
|
"type": "object",
|
|
@@ -189,7 +190,15 @@ async def test_live_run(sample_task, sample_task_run, sample_repair_data):
|
|
|
189
190
|
repair_task_input = RepairTaskRun.build_repair_task_input(**sample_repair_data)
|
|
190
191
|
assert isinstance(repair_task_input, RepairTaskInput)
|
|
191
192
|
|
|
192
|
-
adapter = adapter_for_task(
|
|
193
|
+
adapter = adapter_for_task(
|
|
194
|
+
repair_task,
|
|
195
|
+
RunConfigProperties(
|
|
196
|
+
model_name="llama_3_1_8b",
|
|
197
|
+
model_provider_name="groq",
|
|
198
|
+
prompt_id="simple_prompt_builder",
|
|
199
|
+
structured_output_mode="default",
|
|
200
|
+
),
|
|
201
|
+
)
|
|
193
202
|
|
|
194
203
|
run = await adapter.invoke(repair_task_input.model_dump())
|
|
195
204
|
assert run is not None
|
|
@@ -198,10 +207,13 @@ async def test_live_run(sample_task, sample_task_run, sample_repair_data):
|
|
|
198
207
|
assert "setup" in parsed_output
|
|
199
208
|
assert "punchline" in parsed_output
|
|
200
209
|
assert run.output.source.properties == {
|
|
201
|
-
"adapter_name": "
|
|
210
|
+
"adapter_name": "kiln_openai_compatible_adapter",
|
|
202
211
|
"model_name": "llama_3_1_8b",
|
|
203
212
|
"model_provider": "groq",
|
|
204
213
|
"prompt_id": "simple_prompt_builder",
|
|
214
|
+
"structured_output_mode": "default",
|
|
215
|
+
"temperature": 1.0,
|
|
216
|
+
"top_p": 1.0,
|
|
205
217
|
}
|
|
206
218
|
|
|
207
219
|
|
|
@@ -218,12 +230,19 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
|
|
|
218
230
|
}
|
|
219
231
|
|
|
220
232
|
with patch.object(LiteLlmAdapter, "_run", new_callable=AsyncMock) as mock_run:
|
|
221
|
-
mock_run.return_value =
|
|
222
|
-
output=mocked_output, intermediate_outputs=None
|
|
233
|
+
mock_run.return_value = (
|
|
234
|
+
RunOutput(output=mocked_output, intermediate_outputs=None),
|
|
235
|
+
None,
|
|
223
236
|
)
|
|
224
237
|
|
|
225
238
|
adapter = adapter_for_task(
|
|
226
|
-
repair_task,
|
|
239
|
+
repair_task,
|
|
240
|
+
RunConfigProperties(
|
|
241
|
+
model_name="llama_3_1_8b",
|
|
242
|
+
model_provider_name="ollama",
|
|
243
|
+
prompt_id="simple_prompt_builder",
|
|
244
|
+
structured_output_mode="json_schema",
|
|
245
|
+
),
|
|
227
246
|
)
|
|
228
247
|
|
|
229
248
|
run = await adapter.invoke(repair_task_input.model_dump())
|
|
@@ -239,6 +258,9 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
|
|
|
239
258
|
"model_name": "llama_3_1_8b",
|
|
240
259
|
"model_provider": "ollama",
|
|
241
260
|
"prompt_id": "simple_prompt_builder",
|
|
261
|
+
"structured_output_mode": "json_schema",
|
|
262
|
+
"temperature": 1.0,
|
|
263
|
+
"top_p": 1.0,
|
|
242
264
|
}
|
|
243
265
|
assert run.input_source.type == DataSourceType.human
|
|
244
266
|
assert "created_by" in run.input_source.properties
|
|
@@ -7,8 +7,8 @@ from kiln_ai.adapters.adapter_registry import adapter_for_task
|
|
|
7
7
|
from kiln_ai.adapters.ml_model_list import ModelProviderName
|
|
8
8
|
from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
|
|
9
9
|
from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
|
|
10
|
-
from kiln_ai.adapters.prompt_builders import BasePromptBuilder
|
|
11
10
|
from kiln_ai.adapters.provider_tools import kiln_model_provider_from
|
|
11
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@pytest.fixture
|
|
@@ -35,18 +35,28 @@ def mock_finetune_from_id():
|
|
|
35
35
|
with patch("kiln_ai.adapters.provider_tools.finetune_from_id") as mock:
|
|
36
36
|
mock.return_value.provider = ModelProviderName.openai
|
|
37
37
|
mock.return_value.fine_tune_model_id = "test-model"
|
|
38
|
+
mock.return_value.data_strategy = "final_only"
|
|
38
39
|
yield mock
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
def test_openai_adapter_creation(mock_config, basic_task):
|
|
42
43
|
adapter = adapter_for_task(
|
|
43
|
-
kiln_task=basic_task,
|
|
44
|
+
kiln_task=basic_task,
|
|
45
|
+
run_config_properties=RunConfigProperties(
|
|
46
|
+
model_name="gpt-4",
|
|
47
|
+
model_provider_name=ModelProviderName.openai,
|
|
48
|
+
prompt_id="simple_prompt_builder",
|
|
49
|
+
structured_output_mode="json_schema",
|
|
50
|
+
),
|
|
44
51
|
)
|
|
45
52
|
|
|
46
53
|
assert isinstance(adapter, LiteLlmAdapter)
|
|
47
|
-
assert adapter.config.model_name == "gpt-4"
|
|
54
|
+
assert adapter.config.run_config_properties.model_name == "gpt-4"
|
|
48
55
|
assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
|
|
49
|
-
assert
|
|
56
|
+
assert (
|
|
57
|
+
adapter.config.run_config_properties.model_provider_name
|
|
58
|
+
== ModelProviderName.openai
|
|
59
|
+
)
|
|
50
60
|
assert adapter.config.base_url is None # OpenAI url is default
|
|
51
61
|
assert adapter.config.default_headers is None
|
|
52
62
|
|
|
@@ -54,14 +64,21 @@ def test_openai_adapter_creation(mock_config, basic_task):
|
|
|
54
64
|
def test_openrouter_adapter_creation(mock_config, basic_task):
|
|
55
65
|
adapter = adapter_for_task(
|
|
56
66
|
kiln_task=basic_task,
|
|
57
|
-
|
|
58
|
-
|
|
67
|
+
run_config_properties=RunConfigProperties(
|
|
68
|
+
model_name="anthropic/claude-3-opus",
|
|
69
|
+
model_provider_name=ModelProviderName.openrouter,
|
|
70
|
+
prompt_id="simple_prompt_builder",
|
|
71
|
+
structured_output_mode="json_schema",
|
|
72
|
+
),
|
|
59
73
|
)
|
|
60
74
|
|
|
61
75
|
assert isinstance(adapter, LiteLlmAdapter)
|
|
62
|
-
assert adapter.config.model_name == "anthropic/claude-3-opus"
|
|
76
|
+
assert adapter.config.run_config_properties.model_name == "anthropic/claude-3-opus"
|
|
63
77
|
assert adapter.config.additional_body_options == {"api_key": "test-openrouter-key"}
|
|
64
|
-
assert
|
|
78
|
+
assert (
|
|
79
|
+
adapter.config.run_config_properties.model_provider_name
|
|
80
|
+
== ModelProviderName.openrouter
|
|
81
|
+
)
|
|
65
82
|
assert adapter.config.default_headers == {
|
|
66
83
|
"HTTP-Referer": "https://getkiln.ai/openrouter",
|
|
67
84
|
"X-Title": "KilnAI",
|
|
@@ -79,7 +96,13 @@ def test_openrouter_adapter_creation(mock_config, basic_task):
|
|
|
79
96
|
)
|
|
80
97
|
def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
|
|
81
98
|
adapter = adapter_for_task(
|
|
82
|
-
kiln_task=basic_task,
|
|
99
|
+
kiln_task=basic_task,
|
|
100
|
+
run_config_properties=RunConfigProperties(
|
|
101
|
+
model_name="test-model",
|
|
102
|
+
model_provider_name=provider,
|
|
103
|
+
prompt_id="simple_prompt_builder",
|
|
104
|
+
structured_output_mode="json_schema",
|
|
105
|
+
),
|
|
83
106
|
)
|
|
84
107
|
|
|
85
108
|
assert isinstance(adapter, LiteLlmAdapter)
|
|
@@ -90,9 +113,12 @@ def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
|
|
|
90
113
|
def test_custom_prompt_builder(mock_config, basic_task):
|
|
91
114
|
adapter = adapter_for_task(
|
|
92
115
|
kiln_task=basic_task,
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
116
|
+
run_config_properties=RunConfigProperties(
|
|
117
|
+
model_name="gpt-4",
|
|
118
|
+
model_provider_name=ModelProviderName.openai,
|
|
119
|
+
prompt_id="simple_chain_of_thought_prompt_builder",
|
|
120
|
+
structured_output_mode="json_schema",
|
|
121
|
+
),
|
|
96
122
|
)
|
|
97
123
|
|
|
98
124
|
assert adapter.run_config.prompt_id == "simple_chain_of_thought_prompt_builder"
|
|
@@ -103,8 +129,12 @@ def test_tags_passed_through(mock_config, basic_task):
|
|
|
103
129
|
tags = ["test-tag-1", "test-tag-2"]
|
|
104
130
|
adapter = adapter_for_task(
|
|
105
131
|
kiln_task=basic_task,
|
|
106
|
-
|
|
107
|
-
|
|
132
|
+
run_config_properties=RunConfigProperties(
|
|
133
|
+
model_name="gpt-4",
|
|
134
|
+
model_provider_name=ModelProviderName.openai,
|
|
135
|
+
prompt_id="simple_prompt_builder",
|
|
136
|
+
structured_output_mode="json_schema",
|
|
137
|
+
),
|
|
108
138
|
base_adapter_config=AdapterConfig(
|
|
109
139
|
default_tags=tags,
|
|
110
140
|
),
|
|
@@ -114,13 +144,19 @@ def test_tags_passed_through(mock_config, basic_task):
|
|
|
114
144
|
|
|
115
145
|
|
|
116
146
|
def test_invalid_provider(mock_config, basic_task):
|
|
117
|
-
with pytest.raises(ValueError, match="
|
|
147
|
+
with pytest.raises(ValueError, match="Input should be"):
|
|
118
148
|
adapter_for_task(
|
|
119
|
-
kiln_task=basic_task,
|
|
149
|
+
kiln_task=basic_task,
|
|
150
|
+
run_config_properties=RunConfigProperties(
|
|
151
|
+
model_name="test-model",
|
|
152
|
+
model_provider_name="invalid",
|
|
153
|
+
prompt_id="simple_prompt_builder",
|
|
154
|
+
structured_output_mode="json_schema",
|
|
155
|
+
),
|
|
120
156
|
)
|
|
121
157
|
|
|
122
158
|
|
|
123
|
-
@patch("kiln_ai.adapters.adapter_registry.
|
|
159
|
+
@patch("kiln_ai.adapters.adapter_registry.lite_llm_config_for_openai_compatible")
|
|
124
160
|
def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_task):
|
|
125
161
|
mock_compatible_config.return_value.model_name = "test-model"
|
|
126
162
|
mock_compatible_config.return_value.additional_body_options = {
|
|
@@ -128,44 +164,68 @@ def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_ta
|
|
|
128
164
|
}
|
|
129
165
|
mock_compatible_config.return_value.base_url = "https://test.com/v1"
|
|
130
166
|
mock_compatible_config.return_value.provider_name = "CustomProvider99"
|
|
167
|
+
mock_compatible_config.return_value.run_config_properties = RunConfigProperties(
|
|
168
|
+
model_name="provider::test-model",
|
|
169
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
170
|
+
prompt_id="simple_prompt_builder",
|
|
171
|
+
structured_output_mode="json_schema",
|
|
172
|
+
)
|
|
131
173
|
|
|
132
174
|
adapter = adapter_for_task(
|
|
133
175
|
kiln_task=basic_task,
|
|
134
|
-
|
|
135
|
-
|
|
176
|
+
run_config_properties=RunConfigProperties(
|
|
177
|
+
model_name="provider::test-model",
|
|
178
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
179
|
+
prompt_id="simple_prompt_builder",
|
|
180
|
+
structured_output_mode="json_schema",
|
|
181
|
+
),
|
|
136
182
|
)
|
|
137
183
|
|
|
138
184
|
assert isinstance(adapter, LiteLlmAdapter)
|
|
139
|
-
mock_compatible_config.
|
|
185
|
+
mock_compatible_config.assert_called_once()
|
|
140
186
|
assert adapter.config == mock_compatible_config.return_value
|
|
141
187
|
|
|
142
188
|
|
|
143
189
|
def test_custom_openai_compatible_provider(mock_config, basic_task):
|
|
144
190
|
adapter = adapter_for_task(
|
|
145
191
|
kiln_task=basic_task,
|
|
146
|
-
|
|
147
|
-
|
|
192
|
+
run_config_properties=RunConfigProperties(
|
|
193
|
+
model_name="openai::test-model",
|
|
194
|
+
model_provider_name=ModelProviderName.kiln_custom_registry,
|
|
195
|
+
prompt_id="simple_prompt_builder",
|
|
196
|
+
structured_output_mode="json_schema",
|
|
197
|
+
),
|
|
148
198
|
)
|
|
149
199
|
|
|
150
200
|
assert isinstance(adapter, LiteLlmAdapter)
|
|
151
|
-
assert adapter.config.model_name == "openai::test-model"
|
|
201
|
+
assert adapter.config.run_config_properties.model_name == "openai::test-model"
|
|
152
202
|
assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
|
|
153
203
|
assert adapter.config.base_url is None # openai is none
|
|
154
|
-
assert
|
|
204
|
+
assert (
|
|
205
|
+
adapter.config.run_config_properties.model_provider_name
|
|
206
|
+
== ModelProviderName.kiln_custom_registry
|
|
207
|
+
)
|
|
155
208
|
|
|
156
209
|
|
|
157
210
|
async def test_fine_tune_provider(mock_config, basic_task, mock_finetune_from_id):
|
|
158
211
|
adapter = adapter_for_task(
|
|
159
212
|
kiln_task=basic_task,
|
|
160
|
-
|
|
161
|
-
|
|
213
|
+
run_config_properties=RunConfigProperties(
|
|
214
|
+
model_name="proj::task::tune",
|
|
215
|
+
model_provider_name=ModelProviderName.kiln_fine_tune,
|
|
216
|
+
prompt_id="simple_prompt_builder",
|
|
217
|
+
structured_output_mode="json_schema",
|
|
218
|
+
),
|
|
162
219
|
)
|
|
163
220
|
|
|
164
221
|
mock_finetune_from_id.assert_called_once_with("proj::task::tune")
|
|
165
222
|
assert isinstance(adapter, LiteLlmAdapter)
|
|
166
|
-
assert
|
|
223
|
+
assert (
|
|
224
|
+
adapter.config.run_config_properties.model_provider_name
|
|
225
|
+
== ModelProviderName.kiln_fine_tune
|
|
226
|
+
)
|
|
167
227
|
# Kiln model name here, but the underlying openai model id below
|
|
168
|
-
assert adapter.config.model_name == "proj::task::tune"
|
|
228
|
+
assert adapter.config.run_config_properties.model_name == "proj::task::tune"
|
|
169
229
|
|
|
170
230
|
provider = kiln_model_provider_from(
|
|
171
231
|
"proj::task::tune", provider_name=ModelProviderName.kiln_fine_tune
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from kiln_ai.adapters.ml_model_list import (
|
|
4
|
+
ModelName,
|
|
5
|
+
default_structured_output_mode_for_model_provider,
|
|
6
|
+
get_model_by_name,
|
|
7
|
+
)
|
|
8
|
+
from kiln_ai.datamodel.datamodel_enums import ModelProviderName, StructuredOutputMode
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestDefaultStructuredOutputModeForModelProvider:
|
|
12
|
+
"""Test cases for default_structured_output_mode_for_model_provider function"""
|
|
13
|
+
|
|
14
|
+
def test_valid_model_and_provider_returns_provider_mode(self):
|
|
15
|
+
"""Test that valid model and provider returns the provider's structured output mode"""
|
|
16
|
+
# GPT 4.1 has OpenAI provider with json_schema mode
|
|
17
|
+
result = default_structured_output_mode_for_model_provider(
|
|
18
|
+
model_name="gpt_4_1",
|
|
19
|
+
provider=ModelProviderName.openai,
|
|
20
|
+
)
|
|
21
|
+
assert result == StructuredOutputMode.json_schema
|
|
22
|
+
|
|
23
|
+
def test_valid_model_different_provider_modes(self):
|
|
24
|
+
"""Test that different providers for the same model return different modes"""
|
|
25
|
+
# Claude 3.5 Sonnet has different modes for different providers
|
|
26
|
+
# Anthropic provider uses function_calling
|
|
27
|
+
result_anthropic = default_structured_output_mode_for_model_provider(
|
|
28
|
+
model_name="claude_3_5_sonnet",
|
|
29
|
+
provider=ModelProviderName.anthropic,
|
|
30
|
+
)
|
|
31
|
+
assert result_anthropic == StructuredOutputMode.function_calling
|
|
32
|
+
|
|
33
|
+
# Vertex provider uses function_calling_weak
|
|
34
|
+
result_vertex = default_structured_output_mode_for_model_provider(
|
|
35
|
+
model_name="claude_3_5_sonnet",
|
|
36
|
+
provider=ModelProviderName.vertex,
|
|
37
|
+
)
|
|
38
|
+
assert result_vertex == StructuredOutputMode.function_calling_weak
|
|
39
|
+
|
|
40
|
+
def test_invalid_model_name_returns_default(self):
|
|
41
|
+
"""Test that invalid model name returns the default value"""
|
|
42
|
+
result = default_structured_output_mode_for_model_provider(
|
|
43
|
+
model_name="invalid_model_name",
|
|
44
|
+
provider=ModelProviderName.openai,
|
|
45
|
+
)
|
|
46
|
+
assert result == StructuredOutputMode.default
|
|
47
|
+
|
|
48
|
+
def test_invalid_model_name_returns_custom_default(self):
|
|
49
|
+
"""Test that invalid model name returns custom default when specified"""
|
|
50
|
+
custom_default = StructuredOutputMode.json_instructions
|
|
51
|
+
result = default_structured_output_mode_for_model_provider(
|
|
52
|
+
model_name="invalid_model_name",
|
|
53
|
+
provider=ModelProviderName.openai,
|
|
54
|
+
default=custom_default,
|
|
55
|
+
)
|
|
56
|
+
assert result == custom_default
|
|
57
|
+
|
|
58
|
+
def test_valid_model_invalid_provider_returns_default(self):
|
|
59
|
+
"""Test that valid model but invalid provider returns default"""
|
|
60
|
+
result = default_structured_output_mode_for_model_provider(
|
|
61
|
+
model_name="gpt_4_1",
|
|
62
|
+
provider=ModelProviderName.gemini_api, # GPT 4.1 doesn't have gemini_api provider
|
|
63
|
+
)
|
|
64
|
+
assert result == StructuredOutputMode.default
|
|
65
|
+
|
|
66
|
+
def test_disallowed_modes_returns_default(self):
|
|
67
|
+
"""Test that when provider's mode is in disallowed_modes, returns default"""
|
|
68
|
+
# GPT 4.1 OpenAI provider uses json_schema, but we disallow it
|
|
69
|
+
result = default_structured_output_mode_for_model_provider(
|
|
70
|
+
model_name="gpt_4_1",
|
|
71
|
+
provider=ModelProviderName.openai,
|
|
72
|
+
disallowed_modes=[StructuredOutputMode.json_schema],
|
|
73
|
+
)
|
|
74
|
+
assert result == StructuredOutputMode.default
|
|
75
|
+
|
|
76
|
+
def test_disallowed_modes_with_custom_default(self):
|
|
77
|
+
"""Test disallowed modes with custom default value"""
|
|
78
|
+
custom_default = StructuredOutputMode.json_instructions
|
|
79
|
+
result = default_structured_output_mode_for_model_provider(
|
|
80
|
+
model_name="gpt_4_1",
|
|
81
|
+
provider=ModelProviderName.openai,
|
|
82
|
+
default=custom_default,
|
|
83
|
+
disallowed_modes=[StructuredOutputMode.json_schema],
|
|
84
|
+
)
|
|
85
|
+
assert result == custom_default
|
|
86
|
+
|
|
87
|
+
def test_empty_disallowed_modes_list(self):
|
|
88
|
+
"""Test that empty disallowed_modes list works correctly"""
|
|
89
|
+
result = default_structured_output_mode_for_model_provider(
|
|
90
|
+
model_name="gpt_4_1",
|
|
91
|
+
provider=ModelProviderName.openai,
|
|
92
|
+
disallowed_modes=[],
|
|
93
|
+
)
|
|
94
|
+
assert result == StructuredOutputMode.json_schema
|
|
95
|
+
|
|
96
|
+
def test_multiple_disallowed_modes(self):
|
|
97
|
+
"""Test with multiple disallowed modes"""
|
|
98
|
+
result = default_structured_output_mode_for_model_provider(
|
|
99
|
+
model_name="gpt_4_1",
|
|
100
|
+
provider=ModelProviderName.openai,
|
|
101
|
+
disallowed_modes=[
|
|
102
|
+
StructuredOutputMode.json_schema,
|
|
103
|
+
StructuredOutputMode.function_calling,
|
|
104
|
+
],
|
|
105
|
+
)
|
|
106
|
+
assert result == StructuredOutputMode.default
|
|
107
|
+
|
|
108
|
+
def test_reasoning_model_with_different_providers(self):
|
|
109
|
+
"""Test reasoning models that have different structured output modes"""
|
|
110
|
+
# DeepSeek R1 uses json_instructions for reasoning
|
|
111
|
+
result = default_structured_output_mode_for_model_provider(
|
|
112
|
+
model_name="deepseek_r1",
|
|
113
|
+
provider=ModelProviderName.openrouter,
|
|
114
|
+
)
|
|
115
|
+
assert result == StructuredOutputMode.json_instructions
|
|
116
|
+
|
|
117
|
+
@pytest.mark.parametrize(
|
|
118
|
+
"model_name,provider,expected_mode",
|
|
119
|
+
[
|
|
120
|
+
("gpt_4o", ModelProviderName.openai, StructuredOutputMode.json_schema),
|
|
121
|
+
(
|
|
122
|
+
"claude_3_5_haiku",
|
|
123
|
+
ModelProviderName.anthropic,
|
|
124
|
+
StructuredOutputMode.function_calling,
|
|
125
|
+
),
|
|
126
|
+
(
|
|
127
|
+
"gemini_2_5_pro",
|
|
128
|
+
ModelProviderName.gemini_api,
|
|
129
|
+
StructuredOutputMode.json_schema,
|
|
130
|
+
),
|
|
131
|
+
("llama_3_1_8b", ModelProviderName.groq, StructuredOutputMode.default),
|
|
132
|
+
(
|
|
133
|
+
"qwq_32b",
|
|
134
|
+
ModelProviderName.fireworks_ai,
|
|
135
|
+
StructuredOutputMode.json_instructions,
|
|
136
|
+
),
|
|
137
|
+
],
|
|
138
|
+
)
|
|
139
|
+
def test_parametrized_valid_combinations(self, model_name, provider, expected_mode):
|
|
140
|
+
"""Test multiple valid model/provider combinations"""
|
|
141
|
+
result = default_structured_output_mode_for_model_provider(
|
|
142
|
+
model_name=model_name,
|
|
143
|
+
provider=provider,
|
|
144
|
+
)
|
|
145
|
+
assert result == expected_mode
|
|
146
|
+
|
|
147
|
+
def test_model_with_single_provider(self):
|
|
148
|
+
"""Test model that only has one provider"""
|
|
149
|
+
# Find a model with only one provider for this test
|
|
150
|
+
model = get_model_by_name(ModelName.gpt_4_1_nano)
|
|
151
|
+
assert len(model.providers) >= 1 # Verify it has providers
|
|
152
|
+
|
|
153
|
+
first_provider = model.providers[0]
|
|
154
|
+
result = default_structured_output_mode_for_model_provider(
|
|
155
|
+
model_name="gpt_4_1_nano",
|
|
156
|
+
provider=first_provider.name,
|
|
157
|
+
)
|
|
158
|
+
assert result == first_provider.structured_output_mode
|
|
@@ -18,6 +18,7 @@ from kiln_ai.adapters.prompt_builders import (
|
|
|
18
18
|
SimpleChainOfThoughtPromptBuilder,
|
|
19
19
|
)
|
|
20
20
|
from kiln_ai.datamodel import PromptId
|
|
21
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
def get_all_models_and_providers():
|
|
@@ -124,8 +125,12 @@ async def test_mock_returning_run(tmp_path):
|
|
|
124
125
|
|
|
125
126
|
adapter = LiteLlmAdapter(
|
|
126
127
|
config=LiteLlmConfig(
|
|
127
|
-
|
|
128
|
-
|
|
128
|
+
run_config_properties=RunConfigProperties(
|
|
129
|
+
model_name="custom_model",
|
|
130
|
+
model_provider_name="ollama",
|
|
131
|
+
prompt_id="simple_prompt_builder",
|
|
132
|
+
structured_output_mode="json_schema",
|
|
133
|
+
),
|
|
129
134
|
base_url="http://localhost:11434",
|
|
130
135
|
additional_body_options={"api_key": "test_key"},
|
|
131
136
|
),
|
|
@@ -145,6 +150,9 @@ async def test_mock_returning_run(tmp_path):
|
|
|
145
150
|
"model_name": "custom_model",
|
|
146
151
|
"model_provider": "ollama",
|
|
147
152
|
"prompt_id": "simple_prompt_builder",
|
|
153
|
+
"structured_output_mode": "json_schema",
|
|
154
|
+
"temperature": 1.0,
|
|
155
|
+
"top_p": 1.0,
|
|
148
156
|
}
|
|
149
157
|
|
|
150
158
|
|
|
@@ -212,7 +220,13 @@ async def run_simple_task(
|
|
|
212
220
|
prompt_id: PromptId | None = None,
|
|
213
221
|
) -> datamodel.TaskRun:
|
|
214
222
|
adapter = adapter_for_task(
|
|
215
|
-
task,
|
|
223
|
+
task,
|
|
224
|
+
RunConfigProperties(
|
|
225
|
+
structured_output_mode="json_schema",
|
|
226
|
+
model_name=model_name,
|
|
227
|
+
model_provider_name=provider,
|
|
228
|
+
prompt_id=prompt_id or "simple_prompt_builder",
|
|
229
|
+
),
|
|
216
230
|
)
|
|
217
231
|
|
|
218
232
|
run = await adapter.invoke(
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
5
|
|
|
6
|
-
from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
|
|
6
|
+
from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter, RunOutput
|
|
7
7
|
from kiln_ai.adapters.model_adapters.test_structured_output import (
|
|
8
8
|
build_structured_output_test_task,
|
|
9
9
|
)
|
|
@@ -15,6 +15,7 @@ from kiln_ai.adapters.prompt_builders import (
|
|
|
15
15
|
MultiShotPromptBuilder,
|
|
16
16
|
RepairsPromptBuilder,
|
|
17
17
|
SavedPromptBuilder,
|
|
18
|
+
ShortPromptBuilder,
|
|
18
19
|
SimpleChainOfThoughtPromptBuilder,
|
|
19
20
|
SimplePromptBuilder,
|
|
20
21
|
TaskRunConfigPromptBuilder,
|
|
@@ -26,14 +27,15 @@ from kiln_ai.datamodel import (
|
|
|
26
27
|
DataSource,
|
|
27
28
|
DataSourceType,
|
|
28
29
|
Finetune,
|
|
29
|
-
FinetuneDataStrategy,
|
|
30
30
|
Project,
|
|
31
31
|
Prompt,
|
|
32
32
|
Task,
|
|
33
33
|
TaskOutput,
|
|
34
34
|
TaskOutputRating,
|
|
35
35
|
TaskRun,
|
|
36
|
+
Usage,
|
|
36
37
|
)
|
|
38
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
37
39
|
from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
|
|
38
40
|
|
|
39
41
|
logger = logging.getLogger(__name__)
|
|
@@ -52,15 +54,31 @@ def test_simple_prompt_builder(tmp_path):
|
|
|
52
54
|
assert "1) " + task.requirements[0].instruction in prompt
|
|
53
55
|
assert "2) " + task.requirements[1].instruction in prompt
|
|
54
56
|
assert "3) " + task.requirements[2].instruction in prompt
|
|
55
|
-
|
|
56
|
-
user_msg = builder.build_user_message(input)
|
|
57
|
-
assert input in user_msg
|
|
58
57
|
assert input not in prompt
|
|
59
58
|
|
|
60
59
|
|
|
60
|
+
def test_short_prompt_builder(tmp_path):
|
|
61
|
+
task = build_test_task(tmp_path)
|
|
62
|
+
builder = ShortPromptBuilder(task=task)
|
|
63
|
+
prompt = builder.build_prompt(include_json_instructions=False)
|
|
64
|
+
|
|
65
|
+
# Should only include the instruction, not requirements
|
|
66
|
+
assert task.instruction == prompt
|
|
67
|
+
assert task.requirements[0].instruction not in prompt
|
|
68
|
+
assert task.requirements[1].instruction not in prompt
|
|
69
|
+
assert task.requirements[2].instruction not in prompt
|
|
70
|
+
|
|
71
|
+
# Should handle JSON instructions correctly
|
|
72
|
+
prompt_with_json = builder.build_prompt(include_json_instructions=True)
|
|
73
|
+
assert task.instruction in prompt_with_json
|
|
74
|
+
if task.output_schema():
|
|
75
|
+
assert "# Format Instructions" in prompt_with_json
|
|
76
|
+
assert task.output_schema() in prompt_with_json
|
|
77
|
+
|
|
78
|
+
|
|
61
79
|
class MockAdapter(BaseAdapter):
|
|
62
|
-
def _run(self, input: str) ->
|
|
63
|
-
return "mock response"
|
|
80
|
+
async def _run(self, input: str) -> tuple[RunOutput, Usage | None]:
|
|
81
|
+
return RunOutput(output="mock response", intermediate_outputs=None), None
|
|
64
82
|
|
|
65
83
|
def adapter_name(self) -> str:
|
|
66
84
|
return "mock_adapter"
|
|
@@ -72,20 +90,9 @@ def test_simple_prompt_builder_structured_output(tmp_path):
|
|
|
72
90
|
input = "Cows"
|
|
73
91
|
prompt = builder.build_prompt(include_json_instructions=False)
|
|
74
92
|
assert "You are an assistant which tells a joke, given a subject." in prompt
|
|
75
|
-
|
|
76
|
-
user_msg = builder.build_user_message(input)
|
|
77
|
-
assert input in user_msg
|
|
78
93
|
assert input not in prompt
|
|
79
94
|
|
|
80
95
|
|
|
81
|
-
def test_simple_prompt_builder_structured_input_non_ascii(tmp_path):
|
|
82
|
-
task = build_structured_output_test_task(tmp_path)
|
|
83
|
-
builder = SimplePromptBuilder(task=task)
|
|
84
|
-
input = {"key": "你好👋"}
|
|
85
|
-
user_msg = builder.build_user_message(input)
|
|
86
|
-
assert "你好👋" in user_msg
|
|
87
|
-
|
|
88
|
-
|
|
89
96
|
@pytest.fixture
|
|
90
97
|
def task_with_examples(tmp_path):
|
|
91
98
|
# Create a project and task hierarchy
|
|
@@ -383,7 +390,7 @@ def test_prompt_builder_from_id(task_with_examples):
|
|
|
383
390
|
base_model_id="test_base_model_id",
|
|
384
391
|
dataset_split_id="asdf",
|
|
385
392
|
provider="test_provider",
|
|
386
|
-
data_strategy=
|
|
393
|
+
data_strategy=ChatStrategy.two_message_cot,
|
|
387
394
|
)
|
|
388
395
|
finetune.save_to_file()
|
|
389
396
|
nested_fine_tune_id = (
|
|
@@ -598,6 +605,7 @@ def test_task_run_config_prompt_builder(tmp_path):
|
|
|
598
605
|
model_name="gpt-4",
|
|
599
606
|
model_provider_name="openai",
|
|
600
607
|
prompt_id="simple_prompt_builder",
|
|
608
|
+
structured_output_mode="json_schema",
|
|
601
609
|
),
|
|
602
610
|
prompt=Prompt(
|
|
603
611
|
name="test prompt name",
|