kiln-ai 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kiln_ai/adapters/__init__.py +2 -0
- kiln_ai/adapters/adapter_registry.py +22 -44
- kiln_ai/adapters/chat/__init__.py +8 -0
- kiln_ai/adapters/chat/chat_formatter.py +233 -0
- kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
- kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
- kiln_ai/adapters/data_gen/data_gen_task.py +49 -36
- kiln_ai/adapters/data_gen/test_data_gen_task.py +330 -40
- kiln_ai/adapters/eval/base_eval.py +7 -6
- kiln_ai/adapters/eval/eval_runner.py +9 -2
- kiln_ai/adapters/eval/g_eval.py +40 -17
- kiln_ai/adapters/eval/test_base_eval.py +174 -17
- kiln_ai/adapters/eval/test_eval_runner.py +3 -0
- kiln_ai/adapters/eval/test_g_eval.py +116 -5
- kiln_ai/adapters/fine_tune/base_finetune.py +3 -8
- kiln_ai/adapters/fine_tune/dataset_formatter.py +135 -273
- kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
- kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +6 -11
- kiln_ai/adapters/fine_tune/together_finetune.py +13 -2
- kiln_ai/adapters/ml_model_list.py +370 -84
- kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
- kiln_ai/adapters/model_adapters/litellm_adapter.py +88 -97
- kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
- kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -61
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +104 -21
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
- kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
- kiln_ai/adapters/parsers/parser_registry.py +0 -2
- kiln_ai/adapters/parsers/r1_parser.py +0 -1
- kiln_ai/adapters/prompt_builders.py +0 -16
- kiln_ai/adapters/provider_tools.py +27 -9
- kiln_ai/adapters/remote_config.py +66 -0
- kiln_ai/adapters/repair/repair_task.py +1 -6
- kiln_ai/adapters/repair/test_repair_task.py +24 -3
- kiln_ai/adapters/test_adapter_registry.py +88 -28
- kiln_ai/adapters/test_ml_model_list.py +176 -0
- kiln_ai/adapters/test_prompt_adaptors.py +17 -7
- kiln_ai/adapters/test_prompt_builders.py +3 -16
- kiln_ai/adapters/test_provider_tools.py +69 -20
- kiln_ai/adapters/test_remote_config.py +100 -0
- kiln_ai/datamodel/__init__.py +0 -2
- kiln_ai/datamodel/datamodel_enums.py +38 -13
- kiln_ai/datamodel/eval.py +32 -0
- kiln_ai/datamodel/finetune.py +12 -8
- kiln_ai/datamodel/task.py +68 -7
- kiln_ai/datamodel/task_output.py +0 -2
- kiln_ai/datamodel/task_run.py +0 -2
- kiln_ai/datamodel/test_basemodel.py +2 -1
- kiln_ai/datamodel/test_dataset_split.py +0 -8
- kiln_ai/datamodel/test_eval_model.py +146 -4
- kiln_ai/datamodel/test_models.py +33 -10
- kiln_ai/datamodel/test_task.py +168 -2
- kiln_ai/utils/config.py +3 -2
- kiln_ai/utils/dataset_import.py +1 -1
- kiln_ai/utils/logging.py +166 -0
- kiln_ai/utils/test_config.py +23 -0
- kiln_ai/utils/test_dataset_import.py +30 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/METADATA +2 -2
- kiln_ai-0.18.0.dist-info/RECORD +115 -0
- kiln_ai-0.16.0.dist-info/RECORD +0 -108
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -4,9 +4,9 @@ import pytest
|
|
|
4
4
|
|
|
5
5
|
from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
|
|
6
6
|
from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter, RunOutput
|
|
7
|
-
from kiln_ai.adapters.parsers.request_formatters import request_formatter_from_id
|
|
8
7
|
from kiln_ai.datamodel import Task
|
|
9
|
-
from kiln_ai.datamodel.
|
|
8
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
9
|
+
from kiln_ai.datamodel.task import RunConfig, RunConfigProperties
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class MockAdapter(BaseAdapter):
|
|
@@ -37,8 +37,9 @@ def adapter(base_task):
|
|
|
37
37
|
run_config=RunConfig(
|
|
38
38
|
task=base_task,
|
|
39
39
|
model_name="test_model",
|
|
40
|
-
model_provider_name="
|
|
40
|
+
model_provider_name="openai",
|
|
41
41
|
prompt_id="simple_prompt_builder",
|
|
42
|
+
structured_output_mode="json_schema",
|
|
42
43
|
),
|
|
43
44
|
)
|
|
44
45
|
|
|
@@ -88,7 +89,7 @@ async def test_model_provider_loads_and_caches(adapter, mock_provider):
|
|
|
88
89
|
# First call should load and cache
|
|
89
90
|
provider1 = adapter.model_provider()
|
|
90
91
|
assert provider1 == mock_provider
|
|
91
|
-
mock_loader.assert_called_once_with("test_model", "
|
|
92
|
+
mock_loader.assert_called_once_with("test_model", "openai")
|
|
92
93
|
|
|
93
94
|
# Second call should use cache
|
|
94
95
|
mock_loader.reset_mock()
|
|
@@ -97,29 +98,30 @@ async def test_model_provider_loads_and_caches(adapter, mock_provider):
|
|
|
97
98
|
mock_loader.assert_not_called()
|
|
98
99
|
|
|
99
100
|
|
|
100
|
-
async def
|
|
101
|
+
async def test_model_provider_invalid_provider_model_name(base_task):
|
|
102
|
+
"""Test error when model or provider name is missing"""
|
|
103
|
+
# Test with missing model name
|
|
104
|
+
with pytest.raises(ValueError, match="Input should be"):
|
|
105
|
+
adapter = MockAdapter(
|
|
106
|
+
run_config=RunConfig(
|
|
107
|
+
task=base_task,
|
|
108
|
+
model_name="test_model",
|
|
109
|
+
model_provider_name="invalid",
|
|
110
|
+
prompt_id="simple_prompt_builder",
|
|
111
|
+
),
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
async def test_model_provider_missing_model_names(base_task):
|
|
101
116
|
"""Test error when model or provider name is missing"""
|
|
102
117
|
# Test with missing model name
|
|
103
118
|
adapter = MockAdapter(
|
|
104
119
|
run_config=RunConfig(
|
|
105
120
|
task=base_task,
|
|
106
121
|
model_name="",
|
|
107
|
-
model_provider_name="",
|
|
108
|
-
prompt_id="simple_prompt_builder",
|
|
109
|
-
),
|
|
110
|
-
)
|
|
111
|
-
with pytest.raises(
|
|
112
|
-
ValueError, match="model_name and model_provider_name must be provided"
|
|
113
|
-
):
|
|
114
|
-
await adapter.model_provider()
|
|
115
|
-
|
|
116
|
-
# Test with missing provider name
|
|
117
|
-
adapter = MockAdapter(
|
|
118
|
-
run_config=RunConfig(
|
|
119
|
-
task=base_task,
|
|
120
|
-
model_name="test_model",
|
|
121
|
-
model_provider_name="",
|
|
122
|
+
model_provider_name="openai",
|
|
122
123
|
prompt_id="simple_prompt_builder",
|
|
124
|
+
structured_output_mode="json_schema",
|
|
123
125
|
),
|
|
124
126
|
)
|
|
125
127
|
with pytest.raises(
|
|
@@ -138,7 +140,7 @@ async def test_model_provider_not_found(adapter):
|
|
|
138
140
|
|
|
139
141
|
with pytest.raises(
|
|
140
142
|
ValueError,
|
|
141
|
-
match="
|
|
143
|
+
match="not found for model test_model",
|
|
142
144
|
):
|
|
143
145
|
await adapter.model_provider()
|
|
144
146
|
|
|
@@ -168,11 +170,7 @@ async def test_prompt_builder_json_instructions(
|
|
|
168
170
|
adapter.prompt_builder = mock_prompt_builder
|
|
169
171
|
adapter.model_provider_name = "openai"
|
|
170
172
|
adapter.has_structured_output = MagicMock(return_value=output_schema)
|
|
171
|
-
|
|
172
|
-
# provider mock
|
|
173
|
-
provider = MagicMock()
|
|
174
|
-
provider.structured_output_mode = structured_output_mode
|
|
175
|
-
adapter.model_provider = MagicMock(return_value=provider)
|
|
173
|
+
adapter.run_config.structured_output_mode = structured_output_mode
|
|
176
174
|
|
|
177
175
|
# Test
|
|
178
176
|
adapter.build_prompt()
|
|
@@ -181,41 +179,6 @@ async def test_prompt_builder_json_instructions(
|
|
|
181
179
|
)
|
|
182
180
|
|
|
183
181
|
|
|
184
|
-
@pytest.mark.parametrize(
|
|
185
|
-
"cot_prompt,has_structured_output,reasoning_capable,expected",
|
|
186
|
-
[
|
|
187
|
-
# COT and normal LLM
|
|
188
|
-
("think carefully", False, False, ("cot_two_call", "think carefully")),
|
|
189
|
-
# Structured output with thinking-capable LLM
|
|
190
|
-
("think carefully", True, True, ("cot_as_message", "think carefully")),
|
|
191
|
-
# Structured output with normal LLM
|
|
192
|
-
("think carefully", True, False, ("cot_two_call", "think carefully")),
|
|
193
|
-
# Basic cases - no COT
|
|
194
|
-
(None, True, True, ("basic", None)),
|
|
195
|
-
(None, False, False, ("basic", None)),
|
|
196
|
-
(None, True, False, ("basic", None)),
|
|
197
|
-
(None, False, True, ("basic", None)),
|
|
198
|
-
# Edge case - COT prompt exists but structured output is False and reasoning_capable is True
|
|
199
|
-
("think carefully", False, True, ("cot_as_message", "think carefully")),
|
|
200
|
-
],
|
|
201
|
-
)
|
|
202
|
-
async def test_run_strategy(
|
|
203
|
-
adapter, cot_prompt, has_structured_output, reasoning_capable, expected
|
|
204
|
-
):
|
|
205
|
-
"""Test that run_strategy returns correct strategy based on conditions"""
|
|
206
|
-
# Mock dependencies
|
|
207
|
-
adapter.prompt_builder.chain_of_thought_prompt = MagicMock(return_value=cot_prompt)
|
|
208
|
-
adapter.has_structured_output = MagicMock(return_value=has_structured_output)
|
|
209
|
-
|
|
210
|
-
provider = MagicMock()
|
|
211
|
-
provider.reasoning_capable = reasoning_capable
|
|
212
|
-
adapter.model_provider = MagicMock(return_value=provider)
|
|
213
|
-
|
|
214
|
-
# Test
|
|
215
|
-
result = adapter.run_strategy()
|
|
216
|
-
assert result == expected
|
|
217
|
-
|
|
218
|
-
|
|
219
182
|
@pytest.mark.asyncio
|
|
220
183
|
@pytest.mark.parametrize(
|
|
221
184
|
"formatter_id,expected_input,expected_calls",
|
|
@@ -269,3 +232,214 @@ async def test_input_formatting(
|
|
|
269
232
|
# Verify original input was preserved in the run
|
|
270
233
|
if formatter_id:
|
|
271
234
|
mock_formatter.format_input.assert_called_once_with(original_input)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
async def test_properties_for_task_output_includes_all_run_config_properties(adapter):
|
|
238
|
+
"""Test that all properties from RunConfigProperties are saved in task output properties"""
|
|
239
|
+
# Get all field names from RunConfigProperties
|
|
240
|
+
run_config_properties_fields = set(RunConfigProperties.model_fields.keys())
|
|
241
|
+
|
|
242
|
+
# Get the properties saved by the adapter
|
|
243
|
+
saved_properties = adapter._properties_for_task_output()
|
|
244
|
+
saved_property_keys = set(saved_properties.keys())
|
|
245
|
+
|
|
246
|
+
# Check which RunConfigProperties fields are missing from saved properties
|
|
247
|
+
# Note: model_provider_name becomes model_provider in saved properties
|
|
248
|
+
expected_mappings = {
|
|
249
|
+
"model_name": "model_name",
|
|
250
|
+
"model_provider_name": "model_provider",
|
|
251
|
+
"prompt_id": "prompt_id",
|
|
252
|
+
"temperature": "temperature",
|
|
253
|
+
"top_p": "top_p",
|
|
254
|
+
"structured_output_mode": "structured_output_mode",
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
missing_properties = []
|
|
258
|
+
for field_name in run_config_properties_fields:
|
|
259
|
+
expected_key = expected_mappings.get(field_name, field_name)
|
|
260
|
+
if expected_key not in saved_property_keys:
|
|
261
|
+
missing_properties.append(
|
|
262
|
+
f"RunConfigProperties.{field_name} -> {expected_key}"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
assert not missing_properties, (
|
|
266
|
+
f"The following RunConfigProperties fields are not saved by _properties_for_task_output: {missing_properties}. Please update the method to include them."
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
async def test_properties_for_task_output_catches_missing_new_property(adapter):
|
|
271
|
+
"""Test that demonstrates our test will catch when new properties are added to RunConfigProperties but not to _properties_for_task_output"""
|
|
272
|
+
# Simulate what happens if a new property was added to RunConfigProperties
|
|
273
|
+
# We'll mock the model_fields to include a fake new property
|
|
274
|
+
original_fields = RunConfigProperties.model_fields.copy()
|
|
275
|
+
|
|
276
|
+
# Create a mock field to simulate a new property being added
|
|
277
|
+
from pydantic.fields import FieldInfo
|
|
278
|
+
|
|
279
|
+
mock_field = FieldInfo(annotation=str, default="default_value")
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
# Add a fake new field to simulate someone adding a property
|
|
283
|
+
RunConfigProperties.model_fields["new_fake_property"] = mock_field
|
|
284
|
+
|
|
285
|
+
# Get all field names from RunConfigProperties (now includes our fake property)
|
|
286
|
+
run_config_properties_fields = set(RunConfigProperties.model_fields.keys())
|
|
287
|
+
|
|
288
|
+
# Get the properties saved by the adapter (won't include our fake property)
|
|
289
|
+
saved_properties = adapter._properties_for_task_output()
|
|
290
|
+
saved_property_keys = set(saved_properties.keys())
|
|
291
|
+
|
|
292
|
+
# The mappings don't include our fake property
|
|
293
|
+
expected_mappings = {
|
|
294
|
+
"model_name": "model_name",
|
|
295
|
+
"model_provider_name": "model_provider",
|
|
296
|
+
"prompt_id": "prompt_id",
|
|
297
|
+
"temperature": "temperature",
|
|
298
|
+
"top_p": "top_p",
|
|
299
|
+
"structured_output_mode": "structured_output_mode",
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
missing_properties = []
|
|
303
|
+
for field_name in run_config_properties_fields:
|
|
304
|
+
expected_key = expected_mappings.get(field_name, field_name)
|
|
305
|
+
if expected_key not in saved_property_keys:
|
|
306
|
+
missing_properties.append(
|
|
307
|
+
f"RunConfigProperties.{field_name} -> {expected_key}"
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# This should find our missing fake property
|
|
311
|
+
assert missing_properties == [
|
|
312
|
+
"RunConfigProperties.new_fake_property -> new_fake_property"
|
|
313
|
+
], f"Expected to find missing fake property, but got: {missing_properties}"
|
|
314
|
+
|
|
315
|
+
finally:
|
|
316
|
+
# Restore the original fields
|
|
317
|
+
RunConfigProperties.model_fields.clear()
|
|
318
|
+
RunConfigProperties.model_fields.update(original_fields)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
@pytest.mark.parametrize(
|
|
322
|
+
"cot_prompt,tuned_strategy,reasoning_capable,expected_formatter_class",
|
|
323
|
+
[
|
|
324
|
+
# No COT prompt -> always single turn
|
|
325
|
+
(None, None, False, "SingleTurnFormatter"),
|
|
326
|
+
(None, ChatStrategy.two_message_cot, False, "SingleTurnFormatter"),
|
|
327
|
+
(None, ChatStrategy.single_turn_r1_thinking, True, "SingleTurnFormatter"),
|
|
328
|
+
# With COT prompt:
|
|
329
|
+
# - Tuned strategy takes precedence (except single turn)
|
|
330
|
+
(
|
|
331
|
+
"think step by step",
|
|
332
|
+
ChatStrategy.two_message_cot,
|
|
333
|
+
False,
|
|
334
|
+
"TwoMessageCotFormatter",
|
|
335
|
+
),
|
|
336
|
+
(
|
|
337
|
+
"think step by step",
|
|
338
|
+
ChatStrategy.single_turn_r1_thinking,
|
|
339
|
+
False,
|
|
340
|
+
"SingleTurnR1ThinkingFormatter",
|
|
341
|
+
),
|
|
342
|
+
# - Tuned single turn is ignored when COT exists
|
|
343
|
+
(
|
|
344
|
+
"think step by step",
|
|
345
|
+
ChatStrategy.single_turn,
|
|
346
|
+
True,
|
|
347
|
+
"SingleTurnR1ThinkingFormatter",
|
|
348
|
+
),
|
|
349
|
+
# - Reasoning capable -> single turn R1 thinking
|
|
350
|
+
("think step by step", None, True, "SingleTurnR1ThinkingFormatter"),
|
|
351
|
+
# - Not reasoning capable -> two message COT
|
|
352
|
+
("think step by step", None, False, "TwoMessageCotFormatter"),
|
|
353
|
+
],
|
|
354
|
+
)
|
|
355
|
+
def test_build_chat_formatter(
|
|
356
|
+
adapter,
|
|
357
|
+
cot_prompt,
|
|
358
|
+
tuned_strategy,
|
|
359
|
+
reasoning_capable,
|
|
360
|
+
expected_formatter_class,
|
|
361
|
+
):
|
|
362
|
+
"""Test chat formatter strategy selection based on COT prompt, tuned strategy, and model capabilities"""
|
|
363
|
+
# Mock the prompt builder
|
|
364
|
+
mock_prompt_builder = MagicMock()
|
|
365
|
+
mock_prompt_builder.chain_of_thought_prompt.return_value = cot_prompt
|
|
366
|
+
mock_prompt_builder.build_prompt.return_value = "system message"
|
|
367
|
+
adapter.prompt_builder = mock_prompt_builder
|
|
368
|
+
|
|
369
|
+
# Mock the model provider
|
|
370
|
+
mock_provider = MagicMock()
|
|
371
|
+
mock_provider.tuned_chat_strategy = tuned_strategy
|
|
372
|
+
mock_provider.reasoning_capable = reasoning_capable
|
|
373
|
+
adapter.model_provider = MagicMock(return_value=mock_provider)
|
|
374
|
+
|
|
375
|
+
# Get the formatter
|
|
376
|
+
formatter = adapter.build_chat_formatter("test input")
|
|
377
|
+
|
|
378
|
+
# Verify the formatter type
|
|
379
|
+
assert formatter.__class__.__name__ == expected_formatter_class
|
|
380
|
+
|
|
381
|
+
# Verify the formatter was created with correct parameters
|
|
382
|
+
assert formatter.system_message == "system message"
|
|
383
|
+
assert formatter.user_input == "test input"
|
|
384
|
+
# Only check thinking_instructions for formatters that use it
|
|
385
|
+
if expected_formatter_class == "TwoMessageCotFormatter":
|
|
386
|
+
if cot_prompt:
|
|
387
|
+
assert formatter.thinking_instructions == cot_prompt
|
|
388
|
+
else:
|
|
389
|
+
assert formatter.thinking_instructions is None
|
|
390
|
+
# For other formatters, don't assert thinking_instructions
|
|
391
|
+
|
|
392
|
+
# Verify prompt builder was called correctly
|
|
393
|
+
mock_prompt_builder.build_prompt.assert_called_once()
|
|
394
|
+
mock_prompt_builder.chain_of_thought_prompt.assert_called_once()
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
@pytest.mark.parametrize(
|
|
398
|
+
"initial_mode,expected_mode",
|
|
399
|
+
[
|
|
400
|
+
(
|
|
401
|
+
StructuredOutputMode.json_schema,
|
|
402
|
+
StructuredOutputMode.json_schema,
|
|
403
|
+
), # Should not change
|
|
404
|
+
(
|
|
405
|
+
StructuredOutputMode.unknown,
|
|
406
|
+
StructuredOutputMode.json_mode,
|
|
407
|
+
), # Should update to default
|
|
408
|
+
],
|
|
409
|
+
)
|
|
410
|
+
async def test_update_run_config_unknown_structured_output_mode(
|
|
411
|
+
base_task, initial_mode, expected_mode
|
|
412
|
+
):
|
|
413
|
+
"""Test that unknown structured output mode is updated to the default for the model provider"""
|
|
414
|
+
# Create a run config with the initial mode
|
|
415
|
+
run_config = RunConfig(
|
|
416
|
+
task=base_task,
|
|
417
|
+
model_name="test_model",
|
|
418
|
+
model_provider_name="openai",
|
|
419
|
+
prompt_id="simple_prompt_builder",
|
|
420
|
+
structured_output_mode=initial_mode,
|
|
421
|
+
temperature=0.7, # Add some other properties to verify they're preserved
|
|
422
|
+
top_p=0.9,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# Mock the default mode lookup
|
|
426
|
+
with patch(
|
|
427
|
+
"kiln_ai.adapters.model_adapters.base_adapter.default_structured_output_mode_for_model_provider"
|
|
428
|
+
) as mock_default:
|
|
429
|
+
mock_default.return_value = StructuredOutputMode.json_mode
|
|
430
|
+
|
|
431
|
+
# Create the adapter
|
|
432
|
+
adapter = MockAdapter(run_config=run_config)
|
|
433
|
+
|
|
434
|
+
# Verify the mode was updated correctly
|
|
435
|
+
assert adapter.run_config.structured_output_mode == expected_mode
|
|
436
|
+
|
|
437
|
+
# Verify other properties were preserved
|
|
438
|
+
assert adapter.run_config.temperature == 0.7
|
|
439
|
+
assert adapter.run_config.top_p == 0.9
|
|
440
|
+
|
|
441
|
+
# Verify the default mode lookup was only called when needed
|
|
442
|
+
if initial_mode == StructuredOutputMode.unknown:
|
|
443
|
+
mock_default.assert_called_once_with("test_model", "openai")
|
|
444
|
+
else:
|
|
445
|
+
mock_default.assert_not_called()
|
|
@@ -11,6 +11,7 @@ from kiln_ai.adapters.model_adapters.litellm_config import (
|
|
|
11
11
|
LiteLlmConfig,
|
|
12
12
|
)
|
|
13
13
|
from kiln_ai.datamodel import Project, Task, Usage
|
|
14
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
@pytest.fixture
|
|
@@ -41,8 +42,12 @@ def mock_task(tmp_path):
|
|
|
41
42
|
def config():
|
|
42
43
|
return LiteLlmConfig(
|
|
43
44
|
base_url="https://api.test.com",
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
run_config_properties=RunConfigProperties(
|
|
46
|
+
model_name="test-model",
|
|
47
|
+
model_provider_name="openrouter",
|
|
48
|
+
prompt_id="simple_prompt_builder",
|
|
49
|
+
structured_output_mode="json_schema",
|
|
50
|
+
),
|
|
46
51
|
default_headers={"X-Test": "test"},
|
|
47
52
|
additional_body_options={"api_key": "test_key"},
|
|
48
53
|
)
|
|
@@ -52,7 +57,6 @@ def test_initialization(config, mock_task):
|
|
|
52
57
|
adapter = LiteLlmAdapter(
|
|
53
58
|
config=config,
|
|
54
59
|
kiln_task=mock_task,
|
|
55
|
-
prompt_id="simple_prompt_builder",
|
|
56
60
|
base_adapter_config=AdapterConfig(default_tags=["test-tag"]),
|
|
57
61
|
)
|
|
58
62
|
|
|
@@ -60,8 +64,11 @@ def test_initialization(config, mock_task):
|
|
|
60
64
|
assert adapter.run_config.task == mock_task
|
|
61
65
|
assert adapter.run_config.prompt_id == "simple_prompt_builder"
|
|
62
66
|
assert adapter.base_adapter_config.default_tags == ["test-tag"]
|
|
63
|
-
assert adapter.run_config.model_name == config.model_name
|
|
64
|
-
assert
|
|
67
|
+
assert adapter.run_config.model_name == config.run_config_properties.model_name
|
|
68
|
+
assert (
|
|
69
|
+
adapter.run_config.model_provider_name
|
|
70
|
+
== config.run_config_properties.model_provider_name
|
|
71
|
+
)
|
|
65
72
|
assert adapter.config.additional_body_options["api_key"] == "test_key"
|
|
66
73
|
assert adapter._api_base == config.base_url
|
|
67
74
|
assert adapter._headers == config.default_headers
|
|
@@ -72,8 +79,11 @@ def test_adapter_info(config, mock_task):
|
|
|
72
79
|
|
|
73
80
|
assert adapter.adapter_name() == "kiln_openai_compatible_adapter"
|
|
74
81
|
|
|
75
|
-
assert adapter.run_config.model_name == config.model_name
|
|
76
|
-
assert
|
|
82
|
+
assert adapter.run_config.model_name == config.run_config_properties.model_name
|
|
83
|
+
assert (
|
|
84
|
+
adapter.run_config.model_provider_name
|
|
85
|
+
== config.run_config_properties.model_provider_name
|
|
86
|
+
)
|
|
77
87
|
assert adapter.run_config.prompt_id == "simple_prompt_builder"
|
|
78
88
|
|
|
79
89
|
|
|
@@ -96,14 +106,12 @@ async def test_response_format_options_unstructured(config, mock_task):
|
|
|
96
106
|
)
|
|
97
107
|
@pytest.mark.asyncio
|
|
98
108
|
async def test_response_format_options_json_mode(config, mock_task, mode):
|
|
109
|
+
config.run_config_properties.structured_output_mode = mode
|
|
99
110
|
adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
|
|
100
111
|
|
|
101
112
|
with (
|
|
102
113
|
patch.object(adapter, "has_structured_output", return_value=True),
|
|
103
|
-
patch.object(adapter, "model_provider") as mock_provider,
|
|
104
114
|
):
|
|
105
|
-
mock_provider.return_value.structured_output_mode = mode
|
|
106
|
-
|
|
107
115
|
options = await adapter.response_format_options()
|
|
108
116
|
assert options == {"response_format": {"type": "json_object"}}
|
|
109
117
|
|
|
@@ -117,14 +125,12 @@ async def test_response_format_options_json_mode(config, mock_task, mode):
|
|
|
117
125
|
)
|
|
118
126
|
@pytest.mark.asyncio
|
|
119
127
|
async def test_response_format_options_function_calling(config, mock_task, mode):
|
|
128
|
+
config.run_config_properties.structured_output_mode = mode
|
|
120
129
|
adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
|
|
121
130
|
|
|
122
131
|
with (
|
|
123
132
|
patch.object(adapter, "has_structured_output", return_value=True),
|
|
124
|
-
patch.object(adapter, "model_provider") as mock_provider,
|
|
125
133
|
):
|
|
126
|
-
mock_provider.return_value.structured_output_mode = mode
|
|
127
|
-
|
|
128
134
|
options = await adapter.response_format_options()
|
|
129
135
|
assert "tools" in options
|
|
130
136
|
# full tool structure validated below
|
|
@@ -139,30 +145,26 @@ async def test_response_format_options_function_calling(config, mock_task, mode)
|
|
|
139
145
|
)
|
|
140
146
|
@pytest.mark.asyncio
|
|
141
147
|
async def test_response_format_options_json_instructions(config, mock_task, mode):
|
|
148
|
+
config.run_config_properties.structured_output_mode = mode
|
|
142
149
|
adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
|
|
143
150
|
|
|
144
151
|
with (
|
|
145
152
|
patch.object(adapter, "has_structured_output", return_value=True),
|
|
146
|
-
patch.object(adapter, "model_provider") as mock_provider,
|
|
147
153
|
):
|
|
148
|
-
mock_provider.return_value.structured_output_mode = (
|
|
149
|
-
StructuredOutputMode.json_instructions
|
|
150
|
-
)
|
|
151
154
|
options = await adapter.response_format_options()
|
|
152
155
|
assert options == {}
|
|
153
156
|
|
|
154
157
|
|
|
155
158
|
@pytest.mark.asyncio
|
|
156
159
|
async def test_response_format_options_json_schema(config, mock_task):
|
|
160
|
+
config.run_config_properties.structured_output_mode = (
|
|
161
|
+
StructuredOutputMode.json_schema
|
|
162
|
+
)
|
|
157
163
|
adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
|
|
158
164
|
|
|
159
165
|
with (
|
|
160
166
|
patch.object(adapter, "has_structured_output", return_value=True),
|
|
161
|
-
patch.object(adapter, "model_provider") as mock_provider,
|
|
162
167
|
):
|
|
163
|
-
mock_provider.return_value.structured_output_mode = (
|
|
164
|
-
StructuredOutputMode.json_schema
|
|
165
|
-
)
|
|
166
168
|
options = await adapter.response_format_options()
|
|
167
169
|
assert options == {
|
|
168
170
|
"response_format": {
|
|
@@ -350,6 +352,69 @@ def test_litellm_model_id_unknown_provider(config, mock_task):
|
|
|
350
352
|
adapter.litellm_model_id()
|
|
351
353
|
|
|
352
354
|
|
|
355
|
+
@pytest.mark.parametrize(
|
|
356
|
+
"provider_name,expected_usage_param",
|
|
357
|
+
[
|
|
358
|
+
(ModelProviderName.openrouter, {"usage": {"include": True}}),
|
|
359
|
+
(ModelProviderName.openai, {}),
|
|
360
|
+
(ModelProviderName.anthropic, {}),
|
|
361
|
+
(ModelProviderName.groq, {}),
|
|
362
|
+
],
|
|
363
|
+
)
|
|
364
|
+
def test_build_extra_body_openrouter_usage(
|
|
365
|
+
config, mock_task, provider_name, expected_usage_param
|
|
366
|
+
):
|
|
367
|
+
"""Test build_extra_body includes usage parameter for OpenRouter providers"""
|
|
368
|
+
adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
|
|
369
|
+
|
|
370
|
+
# Create a mock provider with the specified name and minimal required attributes
|
|
371
|
+
mock_provider = Mock()
|
|
372
|
+
mock_provider.name = provider_name
|
|
373
|
+
mock_provider.thinking_level = None
|
|
374
|
+
mock_provider.require_openrouter_reasoning = False
|
|
375
|
+
mock_provider.anthropic_extended_thinking = False
|
|
376
|
+
mock_provider.r1_openrouter_options = False
|
|
377
|
+
mock_provider.logprobs_openrouter_options = False
|
|
378
|
+
mock_provider.openrouter_skip_required_parameters = False
|
|
379
|
+
|
|
380
|
+
# Call build_extra_body
|
|
381
|
+
extra_body = adapter.build_extra_body(mock_provider)
|
|
382
|
+
|
|
383
|
+
# Verify the usage parameter is included only for OpenRouter
|
|
384
|
+
for key, value in expected_usage_param.items():
|
|
385
|
+
assert extra_body.get(key) == value
|
|
386
|
+
|
|
387
|
+
# Verify non-OpenRouter providers don't have the usage parameter
|
|
388
|
+
if provider_name != ModelProviderName.openrouter:
|
|
389
|
+
assert "usage" not in extra_body
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
@pytest.mark.asyncio
|
|
393
|
+
async def test_build_completion_kwargs_custom_temperature_top_p(config, mock_task):
|
|
394
|
+
"""Test build_completion_kwargs with custom temperature and top_p values"""
|
|
395
|
+
# Create config with custom temperature and top_p
|
|
396
|
+
config.run_config_properties.temperature = 0.7
|
|
397
|
+
config.run_config_properties.top_p = 0.9
|
|
398
|
+
|
|
399
|
+
adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
|
|
400
|
+
mock_provider = Mock()
|
|
401
|
+
messages = [{"role": "user", "content": "Hello"}]
|
|
402
|
+
|
|
403
|
+
with (
|
|
404
|
+
patch.object(adapter, "model_provider", return_value=mock_provider),
|
|
405
|
+
patch.object(adapter, "litellm_model_id", return_value="openai/test-model"),
|
|
406
|
+
patch.object(adapter, "build_extra_body", return_value={}),
|
|
407
|
+
patch.object(adapter, "response_format_options", return_value={}),
|
|
408
|
+
):
|
|
409
|
+
kwargs = await adapter.build_completion_kwargs(mock_provider, messages, None)
|
|
410
|
+
|
|
411
|
+
# Verify custom temperature and top_p are passed through
|
|
412
|
+
assert kwargs["temperature"] == 0.7
|
|
413
|
+
assert kwargs["top_p"] == 0.9
|
|
414
|
+
# Verify drop_params is set correctly
|
|
415
|
+
assert kwargs["drop_params"] is True
|
|
416
|
+
|
|
417
|
+
|
|
353
418
|
@pytest.mark.asyncio
|
|
354
419
|
@pytest.mark.parametrize(
|
|
355
420
|
"top_logprobs,response_format,extra_body",
|
|
@@ -391,6 +456,13 @@ async def test_build_completion_kwargs(
|
|
|
391
456
|
assert kwargs["messages"] == messages
|
|
392
457
|
assert kwargs["api_base"] == config.base_url
|
|
393
458
|
|
|
459
|
+
# Verify temperature and top_p are included with default values
|
|
460
|
+
assert kwargs["temperature"] == 1.0 # Default from RunConfigProperties
|
|
461
|
+
assert kwargs["top_p"] == 1.0 # Default from RunConfigProperties
|
|
462
|
+
|
|
463
|
+
# Verify drop_params is set correctly
|
|
464
|
+
assert kwargs["drop_params"] is True
|
|
465
|
+
|
|
394
466
|
# Verify optional parameters
|
|
395
467
|
if top_logprobs is not None:
|
|
396
468
|
assert kwargs["logprobs"] is True
|
|
@@ -439,6 +511,17 @@ async def test_build_completion_kwargs(
|
|
|
439
511
|
({"prompt_tokens": 10}, None, None),
|
|
440
512
|
# Invalid cost type (should be ignored)
|
|
441
513
|
(None, "0.5", None),
|
|
514
|
+
# Cost in OpenRouter format
|
|
515
|
+
(
|
|
516
|
+
litellm.types.utils.Usage(
|
|
517
|
+
prompt_tokens=10,
|
|
518
|
+
completion_tokens=20,
|
|
519
|
+
total_tokens=30,
|
|
520
|
+
cost=0.5,
|
|
521
|
+
),
|
|
522
|
+
None,
|
|
523
|
+
Usage(input_tokens=10, output_tokens=20, total_tokens=30, cost=0.5),
|
|
524
|
+
),
|
|
442
525
|
],
|
|
443
526
|
)
|
|
444
527
|
def test_usage_from_response(config, mock_task, litellm_usage, cost, expected_usage):
|
|
@@ -46,6 +46,7 @@ def adapter(test_task):
|
|
|
46
46
|
model_name="phi_3_5",
|
|
47
47
|
model_provider_name="ollama",
|
|
48
48
|
prompt_id="simple_chain_of_thought_prompt_builder",
|
|
49
|
+
structured_output_mode="json_schema",
|
|
49
50
|
),
|
|
50
51
|
)
|
|
51
52
|
|
|
@@ -102,6 +103,9 @@ def test_save_run_isolation(test_task, adapter):
|
|
|
102
103
|
reloaded_output.source.properties["prompt_id"]
|
|
103
104
|
== "simple_chain_of_thought_prompt_builder"
|
|
104
105
|
)
|
|
106
|
+
assert reloaded_output.source.properties["structured_output_mode"] == "json_schema"
|
|
107
|
+
assert reloaded_output.source.properties["temperature"] == 1.0
|
|
108
|
+
assert reloaded_output.source.properties["top_p"] == 1.0
|
|
105
109
|
# Run again, with same input and different output. Should create a new TaskRun.
|
|
106
110
|
different_run_output = RunOutput(
|
|
107
111
|
output="Different output", intermediate_outputs=None
|
|
@@ -228,3 +232,40 @@ async def test_autosave_true(test_task, adapter):
|
|
|
228
232
|
output.source.properties["prompt_id"]
|
|
229
233
|
== "simple_chain_of_thought_prompt_builder"
|
|
230
234
|
)
|
|
235
|
+
assert output.source.properties["structured_output_mode"] == "json_schema"
|
|
236
|
+
assert output.source.properties["temperature"] == 1.0
|
|
237
|
+
assert output.source.properties["top_p"] == 1.0
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def test_properties_for_task_output_custom_values(test_task):
|
|
241
|
+
"""Test that _properties_for_task_output includes custom temperature, top_p, and structured_output_mode"""
|
|
242
|
+
adapter = MockAdapter(
|
|
243
|
+
run_config=RunConfig(
|
|
244
|
+
task=test_task,
|
|
245
|
+
model_name="gpt-4",
|
|
246
|
+
model_provider_name="openai",
|
|
247
|
+
prompt_id="simple_prompt_builder",
|
|
248
|
+
temperature=0.7,
|
|
249
|
+
top_p=0.9,
|
|
250
|
+
structured_output_mode="json_schema",
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
input_data = "Test input"
|
|
255
|
+
output_data = "Test output"
|
|
256
|
+
run_output = RunOutput(output=output_data, intermediate_outputs=None)
|
|
257
|
+
|
|
258
|
+
task_run = adapter.generate_run(
|
|
259
|
+
input=input_data, input_source=None, run_output=run_output
|
|
260
|
+
)
|
|
261
|
+
task_run.save_to_file()
|
|
262
|
+
|
|
263
|
+
# Verify custom values are preserved in properties
|
|
264
|
+
output = task_run.output
|
|
265
|
+
assert output.source.properties["adapter_name"] == "mock_adapter"
|
|
266
|
+
assert output.source.properties["model_name"] == "gpt-4"
|
|
267
|
+
assert output.source.properties["model_provider"] == "openai"
|
|
268
|
+
assert output.source.properties["prompt_id"] == "simple_prompt_builder"
|
|
269
|
+
assert output.source.properties["structured_output_mode"] == "json_schema"
|
|
270
|
+
assert output.source.properties["temperature"] == 0.7
|
|
271
|
+
assert output.source.properties["top_p"] == 0.9
|