kiln-ai 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. kiln_ai/adapters/__init__.py +2 -0
  2. kiln_ai/adapters/adapter_registry.py +22 -44
  3. kiln_ai/adapters/chat/__init__.py +8 -0
  4. kiln_ai/adapters/chat/chat_formatter.py +233 -0
  5. kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
  6. kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
  7. kiln_ai/adapters/data_gen/data_gen_task.py +49 -36
  8. kiln_ai/adapters/data_gen/test_data_gen_task.py +330 -40
  9. kiln_ai/adapters/eval/base_eval.py +7 -6
  10. kiln_ai/adapters/eval/eval_runner.py +9 -2
  11. kiln_ai/adapters/eval/g_eval.py +40 -17
  12. kiln_ai/adapters/eval/test_base_eval.py +174 -17
  13. kiln_ai/adapters/eval/test_eval_runner.py +3 -0
  14. kiln_ai/adapters/eval/test_g_eval.py +116 -5
  15. kiln_ai/adapters/fine_tune/base_finetune.py +3 -8
  16. kiln_ai/adapters/fine_tune/dataset_formatter.py +135 -273
  17. kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
  18. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
  19. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
  20. kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
  21. kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
  22. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +6 -11
  23. kiln_ai/adapters/fine_tune/together_finetune.py +13 -2
  24. kiln_ai/adapters/ml_model_list.py +370 -84
  25. kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
  26. kiln_ai/adapters/model_adapters/litellm_adapter.py +88 -97
  27. kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
  28. kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -61
  29. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +104 -21
  30. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
  31. kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
  32. kiln_ai/adapters/parsers/parser_registry.py +0 -2
  33. kiln_ai/adapters/parsers/r1_parser.py +0 -1
  34. kiln_ai/adapters/prompt_builders.py +0 -16
  35. kiln_ai/adapters/provider_tools.py +27 -9
  36. kiln_ai/adapters/remote_config.py +66 -0
  37. kiln_ai/adapters/repair/repair_task.py +1 -6
  38. kiln_ai/adapters/repair/test_repair_task.py +24 -3
  39. kiln_ai/adapters/test_adapter_registry.py +88 -28
  40. kiln_ai/adapters/test_ml_model_list.py +176 -0
  41. kiln_ai/adapters/test_prompt_adaptors.py +17 -7
  42. kiln_ai/adapters/test_prompt_builders.py +3 -16
  43. kiln_ai/adapters/test_provider_tools.py +69 -20
  44. kiln_ai/adapters/test_remote_config.py +100 -0
  45. kiln_ai/datamodel/__init__.py +0 -2
  46. kiln_ai/datamodel/datamodel_enums.py +38 -13
  47. kiln_ai/datamodel/eval.py +32 -0
  48. kiln_ai/datamodel/finetune.py +12 -8
  49. kiln_ai/datamodel/task.py +68 -7
  50. kiln_ai/datamodel/task_output.py +0 -2
  51. kiln_ai/datamodel/task_run.py +0 -2
  52. kiln_ai/datamodel/test_basemodel.py +2 -1
  53. kiln_ai/datamodel/test_dataset_split.py +0 -8
  54. kiln_ai/datamodel/test_eval_model.py +146 -4
  55. kiln_ai/datamodel/test_models.py +33 -10
  56. kiln_ai/datamodel/test_task.py +168 -2
  57. kiln_ai/utils/config.py +3 -2
  58. kiln_ai/utils/dataset_import.py +1 -1
  59. kiln_ai/utils/logging.py +166 -0
  60. kiln_ai/utils/test_config.py +23 -0
  61. kiln_ai/utils/test_dataset_import.py +30 -0
  62. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/METADATA +2 -2
  63. kiln_ai-0.18.0.dist-info/RECORD +115 -0
  64. kiln_ai-0.16.0.dist-info/RECORD +0 -108
  65. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/WHEEL +0 -0
  66. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -4,9 +4,9 @@ import pytest
4
4
 
5
5
  from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
6
6
  from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter, RunOutput
7
- from kiln_ai.adapters.parsers.request_formatters import request_formatter_from_id
8
7
  from kiln_ai.datamodel import Task
9
- from kiln_ai.datamodel.task import RunConfig
8
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
9
+ from kiln_ai.datamodel.task import RunConfig, RunConfigProperties
10
10
 
11
11
 
12
12
  class MockAdapter(BaseAdapter):
@@ -37,8 +37,9 @@ def adapter(base_task):
37
37
  run_config=RunConfig(
38
38
  task=base_task,
39
39
  model_name="test_model",
40
- model_provider_name="test_provider",
40
+ model_provider_name="openai",
41
41
  prompt_id="simple_prompt_builder",
42
+ structured_output_mode="json_schema",
42
43
  ),
43
44
  )
44
45
 
@@ -88,7 +89,7 @@ async def test_model_provider_loads_and_caches(adapter, mock_provider):
88
89
  # First call should load and cache
89
90
  provider1 = adapter.model_provider()
90
91
  assert provider1 == mock_provider
91
- mock_loader.assert_called_once_with("test_model", "test_provider")
92
+ mock_loader.assert_called_once_with("test_model", "openai")
92
93
 
93
94
  # Second call should use cache
94
95
  mock_loader.reset_mock()
@@ -97,29 +98,30 @@ async def test_model_provider_loads_and_caches(adapter, mock_provider):
97
98
  mock_loader.assert_not_called()
98
99
 
99
100
 
100
- async def test_model_provider_missing_names(base_task):
101
+ async def test_model_provider_invalid_provider_model_name(base_task):
102
+ """Test error when model or provider name is missing"""
103
+ # Test with missing model name
104
+ with pytest.raises(ValueError, match="Input should be"):
105
+ adapter = MockAdapter(
106
+ run_config=RunConfig(
107
+ task=base_task,
108
+ model_name="test_model",
109
+ model_provider_name="invalid",
110
+ prompt_id="simple_prompt_builder",
111
+ ),
112
+ )
113
+
114
+
115
+ async def test_model_provider_missing_model_names(base_task):
101
116
  """Test error when model or provider name is missing"""
102
117
  # Test with missing model name
103
118
  adapter = MockAdapter(
104
119
  run_config=RunConfig(
105
120
  task=base_task,
106
121
  model_name="",
107
- model_provider_name="",
108
- prompt_id="simple_prompt_builder",
109
- ),
110
- )
111
- with pytest.raises(
112
- ValueError, match="model_name and model_provider_name must be provided"
113
- ):
114
- await adapter.model_provider()
115
-
116
- # Test with missing provider name
117
- adapter = MockAdapter(
118
- run_config=RunConfig(
119
- task=base_task,
120
- model_name="test_model",
121
- model_provider_name="",
122
+ model_provider_name="openai",
122
123
  prompt_id="simple_prompt_builder",
124
+ structured_output_mode="json_schema",
123
125
  ),
124
126
  )
125
127
  with pytest.raises(
@@ -138,7 +140,7 @@ async def test_model_provider_not_found(adapter):
138
140
 
139
141
  with pytest.raises(
140
142
  ValueError,
141
- match="model_provider_name test_provider not found for model test_model",
143
+ match="not found for model test_model",
142
144
  ):
143
145
  await adapter.model_provider()
144
146
 
@@ -168,11 +170,7 @@ async def test_prompt_builder_json_instructions(
168
170
  adapter.prompt_builder = mock_prompt_builder
169
171
  adapter.model_provider_name = "openai"
170
172
  adapter.has_structured_output = MagicMock(return_value=output_schema)
171
-
172
- # provider mock
173
- provider = MagicMock()
174
- provider.structured_output_mode = structured_output_mode
175
- adapter.model_provider = MagicMock(return_value=provider)
173
+ adapter.run_config.structured_output_mode = structured_output_mode
176
174
 
177
175
  # Test
178
176
  adapter.build_prompt()
@@ -181,41 +179,6 @@ async def test_prompt_builder_json_instructions(
181
179
  )
182
180
 
183
181
 
184
- @pytest.mark.parametrize(
185
- "cot_prompt,has_structured_output,reasoning_capable,expected",
186
- [
187
- # COT and normal LLM
188
- ("think carefully", False, False, ("cot_two_call", "think carefully")),
189
- # Structured output with thinking-capable LLM
190
- ("think carefully", True, True, ("cot_as_message", "think carefully")),
191
- # Structured output with normal LLM
192
- ("think carefully", True, False, ("cot_two_call", "think carefully")),
193
- # Basic cases - no COT
194
- (None, True, True, ("basic", None)),
195
- (None, False, False, ("basic", None)),
196
- (None, True, False, ("basic", None)),
197
- (None, False, True, ("basic", None)),
198
- # Edge case - COT prompt exists but structured output is False and reasoning_capable is True
199
- ("think carefully", False, True, ("cot_as_message", "think carefully")),
200
- ],
201
- )
202
- async def test_run_strategy(
203
- adapter, cot_prompt, has_structured_output, reasoning_capable, expected
204
- ):
205
- """Test that run_strategy returns correct strategy based on conditions"""
206
- # Mock dependencies
207
- adapter.prompt_builder.chain_of_thought_prompt = MagicMock(return_value=cot_prompt)
208
- adapter.has_structured_output = MagicMock(return_value=has_structured_output)
209
-
210
- provider = MagicMock()
211
- provider.reasoning_capable = reasoning_capable
212
- adapter.model_provider = MagicMock(return_value=provider)
213
-
214
- # Test
215
- result = adapter.run_strategy()
216
- assert result == expected
217
-
218
-
219
182
  @pytest.mark.asyncio
220
183
  @pytest.mark.parametrize(
221
184
  "formatter_id,expected_input,expected_calls",
@@ -269,3 +232,214 @@ async def test_input_formatting(
269
232
  # Verify original input was preserved in the run
270
233
  if formatter_id:
271
234
  mock_formatter.format_input.assert_called_once_with(original_input)
235
+
236
+
237
+ async def test_properties_for_task_output_includes_all_run_config_properties(adapter):
238
+ """Test that all properties from RunConfigProperties are saved in task output properties"""
239
+ # Get all field names from RunConfigProperties
240
+ run_config_properties_fields = set(RunConfigProperties.model_fields.keys())
241
+
242
+ # Get the properties saved by the adapter
243
+ saved_properties = adapter._properties_for_task_output()
244
+ saved_property_keys = set(saved_properties.keys())
245
+
246
+ # Check which RunConfigProperties fields are missing from saved properties
247
+ # Note: model_provider_name becomes model_provider in saved properties
248
+ expected_mappings = {
249
+ "model_name": "model_name",
250
+ "model_provider_name": "model_provider",
251
+ "prompt_id": "prompt_id",
252
+ "temperature": "temperature",
253
+ "top_p": "top_p",
254
+ "structured_output_mode": "structured_output_mode",
255
+ }
256
+
257
+ missing_properties = []
258
+ for field_name in run_config_properties_fields:
259
+ expected_key = expected_mappings.get(field_name, field_name)
260
+ if expected_key not in saved_property_keys:
261
+ missing_properties.append(
262
+ f"RunConfigProperties.{field_name} -> {expected_key}"
263
+ )
264
+
265
+ assert not missing_properties, (
266
+ f"The following RunConfigProperties fields are not saved by _properties_for_task_output: {missing_properties}. Please update the method to include them."
267
+ )
268
+
269
+
270
+ async def test_properties_for_task_output_catches_missing_new_property(adapter):
271
+ """Test that demonstrates our test will catch when new properties are added to RunConfigProperties but not to _properties_for_task_output"""
272
+ # Simulate what happens if a new property was added to RunConfigProperties
273
+ # We'll mock the model_fields to include a fake new property
274
+ original_fields = RunConfigProperties.model_fields.copy()
275
+
276
+ # Create a mock field to simulate a new property being added
277
+ from pydantic.fields import FieldInfo
278
+
279
+ mock_field = FieldInfo(annotation=str, default="default_value")
280
+
281
+ try:
282
+ # Add a fake new field to simulate someone adding a property
283
+ RunConfigProperties.model_fields["new_fake_property"] = mock_field
284
+
285
+ # Get all field names from RunConfigProperties (now includes our fake property)
286
+ run_config_properties_fields = set(RunConfigProperties.model_fields.keys())
287
+
288
+ # Get the properties saved by the adapter (won't include our fake property)
289
+ saved_properties = adapter._properties_for_task_output()
290
+ saved_property_keys = set(saved_properties.keys())
291
+
292
+ # The mappings don't include our fake property
293
+ expected_mappings = {
294
+ "model_name": "model_name",
295
+ "model_provider_name": "model_provider",
296
+ "prompt_id": "prompt_id",
297
+ "temperature": "temperature",
298
+ "top_p": "top_p",
299
+ "structured_output_mode": "structured_output_mode",
300
+ }
301
+
302
+ missing_properties = []
303
+ for field_name in run_config_properties_fields:
304
+ expected_key = expected_mappings.get(field_name, field_name)
305
+ if expected_key not in saved_property_keys:
306
+ missing_properties.append(
307
+ f"RunConfigProperties.{field_name} -> {expected_key}"
308
+ )
309
+
310
+ # This should find our missing fake property
311
+ assert missing_properties == [
312
+ "RunConfigProperties.new_fake_property -> new_fake_property"
313
+ ], f"Expected to find missing fake property, but got: {missing_properties}"
314
+
315
+ finally:
316
+ # Restore the original fields
317
+ RunConfigProperties.model_fields.clear()
318
+ RunConfigProperties.model_fields.update(original_fields)
319
+
320
+
321
+ @pytest.mark.parametrize(
322
+ "cot_prompt,tuned_strategy,reasoning_capable,expected_formatter_class",
323
+ [
324
+ # No COT prompt -> always single turn
325
+ (None, None, False, "SingleTurnFormatter"),
326
+ (None, ChatStrategy.two_message_cot, False, "SingleTurnFormatter"),
327
+ (None, ChatStrategy.single_turn_r1_thinking, True, "SingleTurnFormatter"),
328
+ # With COT prompt:
329
+ # - Tuned strategy takes precedence (except single turn)
330
+ (
331
+ "think step by step",
332
+ ChatStrategy.two_message_cot,
333
+ False,
334
+ "TwoMessageCotFormatter",
335
+ ),
336
+ (
337
+ "think step by step",
338
+ ChatStrategy.single_turn_r1_thinking,
339
+ False,
340
+ "SingleTurnR1ThinkingFormatter",
341
+ ),
342
+ # - Tuned single turn is ignored when COT exists
343
+ (
344
+ "think step by step",
345
+ ChatStrategy.single_turn,
346
+ True,
347
+ "SingleTurnR1ThinkingFormatter",
348
+ ),
349
+ # - Reasoning capable -> single turn R1 thinking
350
+ ("think step by step", None, True, "SingleTurnR1ThinkingFormatter"),
351
+ # - Not reasoning capable -> two message COT
352
+ ("think step by step", None, False, "TwoMessageCotFormatter"),
353
+ ],
354
+ )
355
+ def test_build_chat_formatter(
356
+ adapter,
357
+ cot_prompt,
358
+ tuned_strategy,
359
+ reasoning_capable,
360
+ expected_formatter_class,
361
+ ):
362
+ """Test chat formatter strategy selection based on COT prompt, tuned strategy, and model capabilities"""
363
+ # Mock the prompt builder
364
+ mock_prompt_builder = MagicMock()
365
+ mock_prompt_builder.chain_of_thought_prompt.return_value = cot_prompt
366
+ mock_prompt_builder.build_prompt.return_value = "system message"
367
+ adapter.prompt_builder = mock_prompt_builder
368
+
369
+ # Mock the model provider
370
+ mock_provider = MagicMock()
371
+ mock_provider.tuned_chat_strategy = tuned_strategy
372
+ mock_provider.reasoning_capable = reasoning_capable
373
+ adapter.model_provider = MagicMock(return_value=mock_provider)
374
+
375
+ # Get the formatter
376
+ formatter = adapter.build_chat_formatter("test input")
377
+
378
+ # Verify the formatter type
379
+ assert formatter.__class__.__name__ == expected_formatter_class
380
+
381
+ # Verify the formatter was created with correct parameters
382
+ assert formatter.system_message == "system message"
383
+ assert formatter.user_input == "test input"
384
+ # Only check thinking_instructions for formatters that use it
385
+ if expected_formatter_class == "TwoMessageCotFormatter":
386
+ if cot_prompt:
387
+ assert formatter.thinking_instructions == cot_prompt
388
+ else:
389
+ assert formatter.thinking_instructions is None
390
+ # For other formatters, don't assert thinking_instructions
391
+
392
+ # Verify prompt builder was called correctly
393
+ mock_prompt_builder.build_prompt.assert_called_once()
394
+ mock_prompt_builder.chain_of_thought_prompt.assert_called_once()
395
+
396
+
397
+ @pytest.mark.parametrize(
398
+ "initial_mode,expected_mode",
399
+ [
400
+ (
401
+ StructuredOutputMode.json_schema,
402
+ StructuredOutputMode.json_schema,
403
+ ), # Should not change
404
+ (
405
+ StructuredOutputMode.unknown,
406
+ StructuredOutputMode.json_mode,
407
+ ), # Should update to default
408
+ ],
409
+ )
410
+ async def test_update_run_config_unknown_structured_output_mode(
411
+ base_task, initial_mode, expected_mode
412
+ ):
413
+ """Test that unknown structured output mode is updated to the default for the model provider"""
414
+ # Create a run config with the initial mode
415
+ run_config = RunConfig(
416
+ task=base_task,
417
+ model_name="test_model",
418
+ model_provider_name="openai",
419
+ prompt_id="simple_prompt_builder",
420
+ structured_output_mode=initial_mode,
421
+ temperature=0.7, # Add some other properties to verify they're preserved
422
+ top_p=0.9,
423
+ )
424
+
425
+ # Mock the default mode lookup
426
+ with patch(
427
+ "kiln_ai.adapters.model_adapters.base_adapter.default_structured_output_mode_for_model_provider"
428
+ ) as mock_default:
429
+ mock_default.return_value = StructuredOutputMode.json_mode
430
+
431
+ # Create the adapter
432
+ adapter = MockAdapter(run_config=run_config)
433
+
434
+ # Verify the mode was updated correctly
435
+ assert adapter.run_config.structured_output_mode == expected_mode
436
+
437
+ # Verify other properties were preserved
438
+ assert adapter.run_config.temperature == 0.7
439
+ assert adapter.run_config.top_p == 0.9
440
+
441
+ # Verify the default mode lookup was only called when needed
442
+ if initial_mode == StructuredOutputMode.unknown:
443
+ mock_default.assert_called_once_with("test_model", "openai")
444
+ else:
445
+ mock_default.assert_not_called()
@@ -11,6 +11,7 @@ from kiln_ai.adapters.model_adapters.litellm_config import (
11
11
  LiteLlmConfig,
12
12
  )
13
13
  from kiln_ai.datamodel import Project, Task, Usage
14
+ from kiln_ai.datamodel.task import RunConfigProperties
14
15
 
15
16
 
16
17
  @pytest.fixture
@@ -41,8 +42,12 @@ def mock_task(tmp_path):
41
42
  def config():
42
43
  return LiteLlmConfig(
43
44
  base_url="https://api.test.com",
44
- model_name="test-model",
45
- provider_name="openrouter",
45
+ run_config_properties=RunConfigProperties(
46
+ model_name="test-model",
47
+ model_provider_name="openrouter",
48
+ prompt_id="simple_prompt_builder",
49
+ structured_output_mode="json_schema",
50
+ ),
46
51
  default_headers={"X-Test": "test"},
47
52
  additional_body_options={"api_key": "test_key"},
48
53
  )
@@ -52,7 +57,6 @@ def test_initialization(config, mock_task):
52
57
  adapter = LiteLlmAdapter(
53
58
  config=config,
54
59
  kiln_task=mock_task,
55
- prompt_id="simple_prompt_builder",
56
60
  base_adapter_config=AdapterConfig(default_tags=["test-tag"]),
57
61
  )
58
62
 
@@ -60,8 +64,11 @@ def test_initialization(config, mock_task):
60
64
  assert adapter.run_config.task == mock_task
61
65
  assert adapter.run_config.prompt_id == "simple_prompt_builder"
62
66
  assert adapter.base_adapter_config.default_tags == ["test-tag"]
63
- assert adapter.run_config.model_name == config.model_name
64
- assert adapter.run_config.model_provider_name == config.provider_name
67
+ assert adapter.run_config.model_name == config.run_config_properties.model_name
68
+ assert (
69
+ adapter.run_config.model_provider_name
70
+ == config.run_config_properties.model_provider_name
71
+ )
65
72
  assert adapter.config.additional_body_options["api_key"] == "test_key"
66
73
  assert adapter._api_base == config.base_url
67
74
  assert adapter._headers == config.default_headers
@@ -72,8 +79,11 @@ def test_adapter_info(config, mock_task):
72
79
 
73
80
  assert adapter.adapter_name() == "kiln_openai_compatible_adapter"
74
81
 
75
- assert adapter.run_config.model_name == config.model_name
76
- assert adapter.run_config.model_provider_name == config.provider_name
82
+ assert adapter.run_config.model_name == config.run_config_properties.model_name
83
+ assert (
84
+ adapter.run_config.model_provider_name
85
+ == config.run_config_properties.model_provider_name
86
+ )
77
87
  assert adapter.run_config.prompt_id == "simple_prompt_builder"
78
88
 
79
89
 
@@ -96,14 +106,12 @@ async def test_response_format_options_unstructured(config, mock_task):
96
106
  )
97
107
  @pytest.mark.asyncio
98
108
  async def test_response_format_options_json_mode(config, mock_task, mode):
109
+ config.run_config_properties.structured_output_mode = mode
99
110
  adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
100
111
 
101
112
  with (
102
113
  patch.object(adapter, "has_structured_output", return_value=True),
103
- patch.object(adapter, "model_provider") as mock_provider,
104
114
  ):
105
- mock_provider.return_value.structured_output_mode = mode
106
-
107
115
  options = await adapter.response_format_options()
108
116
  assert options == {"response_format": {"type": "json_object"}}
109
117
 
@@ -117,14 +125,12 @@ async def test_response_format_options_json_mode(config, mock_task, mode):
117
125
  )
118
126
  @pytest.mark.asyncio
119
127
  async def test_response_format_options_function_calling(config, mock_task, mode):
128
+ config.run_config_properties.structured_output_mode = mode
120
129
  adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
121
130
 
122
131
  with (
123
132
  patch.object(adapter, "has_structured_output", return_value=True),
124
- patch.object(adapter, "model_provider") as mock_provider,
125
133
  ):
126
- mock_provider.return_value.structured_output_mode = mode
127
-
128
134
  options = await adapter.response_format_options()
129
135
  assert "tools" in options
130
136
  # full tool structure validated below
@@ -139,30 +145,26 @@ async def test_response_format_options_function_calling(config, mock_task, mode)
139
145
  )
140
146
  @pytest.mark.asyncio
141
147
  async def test_response_format_options_json_instructions(config, mock_task, mode):
148
+ config.run_config_properties.structured_output_mode = mode
142
149
  adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
143
150
 
144
151
  with (
145
152
  patch.object(adapter, "has_structured_output", return_value=True),
146
- patch.object(adapter, "model_provider") as mock_provider,
147
153
  ):
148
- mock_provider.return_value.structured_output_mode = (
149
- StructuredOutputMode.json_instructions
150
- )
151
154
  options = await adapter.response_format_options()
152
155
  assert options == {}
153
156
 
154
157
 
155
158
  @pytest.mark.asyncio
156
159
  async def test_response_format_options_json_schema(config, mock_task):
160
+ config.run_config_properties.structured_output_mode = (
161
+ StructuredOutputMode.json_schema
162
+ )
157
163
  adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
158
164
 
159
165
  with (
160
166
  patch.object(adapter, "has_structured_output", return_value=True),
161
- patch.object(adapter, "model_provider") as mock_provider,
162
167
  ):
163
- mock_provider.return_value.structured_output_mode = (
164
- StructuredOutputMode.json_schema
165
- )
166
168
  options = await adapter.response_format_options()
167
169
  assert options == {
168
170
  "response_format": {
@@ -350,6 +352,69 @@ def test_litellm_model_id_unknown_provider(config, mock_task):
350
352
  adapter.litellm_model_id()
351
353
 
352
354
 
355
+ @pytest.mark.parametrize(
356
+ "provider_name,expected_usage_param",
357
+ [
358
+ (ModelProviderName.openrouter, {"usage": {"include": True}}),
359
+ (ModelProviderName.openai, {}),
360
+ (ModelProviderName.anthropic, {}),
361
+ (ModelProviderName.groq, {}),
362
+ ],
363
+ )
364
+ def test_build_extra_body_openrouter_usage(
365
+ config, mock_task, provider_name, expected_usage_param
366
+ ):
367
+ """Test build_extra_body includes usage parameter for OpenRouter providers"""
368
+ adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
369
+
370
+ # Create a mock provider with the specified name and minimal required attributes
371
+ mock_provider = Mock()
372
+ mock_provider.name = provider_name
373
+ mock_provider.thinking_level = None
374
+ mock_provider.require_openrouter_reasoning = False
375
+ mock_provider.anthropic_extended_thinking = False
376
+ mock_provider.r1_openrouter_options = False
377
+ mock_provider.logprobs_openrouter_options = False
378
+ mock_provider.openrouter_skip_required_parameters = False
379
+
380
+ # Call build_extra_body
381
+ extra_body = adapter.build_extra_body(mock_provider)
382
+
383
+ # Verify the usage parameter is included only for OpenRouter
384
+ for key, value in expected_usage_param.items():
385
+ assert extra_body.get(key) == value
386
+
387
+ # Verify non-OpenRouter providers don't have the usage parameter
388
+ if provider_name != ModelProviderName.openrouter:
389
+ assert "usage" not in extra_body
390
+
391
+
392
+ @pytest.mark.asyncio
393
+ async def test_build_completion_kwargs_custom_temperature_top_p(config, mock_task):
394
+ """Test build_completion_kwargs with custom temperature and top_p values"""
395
+ # Create config with custom temperature and top_p
396
+ config.run_config_properties.temperature = 0.7
397
+ config.run_config_properties.top_p = 0.9
398
+
399
+ adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
400
+ mock_provider = Mock()
401
+ messages = [{"role": "user", "content": "Hello"}]
402
+
403
+ with (
404
+ patch.object(adapter, "model_provider", return_value=mock_provider),
405
+ patch.object(adapter, "litellm_model_id", return_value="openai/test-model"),
406
+ patch.object(adapter, "build_extra_body", return_value={}),
407
+ patch.object(adapter, "response_format_options", return_value={}),
408
+ ):
409
+ kwargs = await adapter.build_completion_kwargs(mock_provider, messages, None)
410
+
411
+ # Verify custom temperature and top_p are passed through
412
+ assert kwargs["temperature"] == 0.7
413
+ assert kwargs["top_p"] == 0.9
414
+ # Verify drop_params is set correctly
415
+ assert kwargs["drop_params"] is True
416
+
417
+
353
418
  @pytest.mark.asyncio
354
419
  @pytest.mark.parametrize(
355
420
  "top_logprobs,response_format,extra_body",
@@ -391,6 +456,13 @@ async def test_build_completion_kwargs(
391
456
  assert kwargs["messages"] == messages
392
457
  assert kwargs["api_base"] == config.base_url
393
458
 
459
+ # Verify temperature and top_p are included with default values
460
+ assert kwargs["temperature"] == 1.0 # Default from RunConfigProperties
461
+ assert kwargs["top_p"] == 1.0 # Default from RunConfigProperties
462
+
463
+ # Verify drop_params is set correctly
464
+ assert kwargs["drop_params"] is True
465
+
394
466
  # Verify optional parameters
395
467
  if top_logprobs is not None:
396
468
  assert kwargs["logprobs"] is True
@@ -439,6 +511,17 @@ async def test_build_completion_kwargs(
439
511
  ({"prompt_tokens": 10}, None, None),
440
512
  # Invalid cost type (should be ignored)
441
513
  (None, "0.5", None),
514
+ # Cost in OpenRouter format
515
+ (
516
+ litellm.types.utils.Usage(
517
+ prompt_tokens=10,
518
+ completion_tokens=20,
519
+ total_tokens=30,
520
+ cost=0.5,
521
+ ),
522
+ None,
523
+ Usage(input_tokens=10, output_tokens=20, total_tokens=30, cost=0.5),
524
+ ),
442
525
  ],
443
526
  )
444
527
  def test_usage_from_response(config, mock_task, litellm_usage, cost, expected_usage):
@@ -46,6 +46,7 @@ def adapter(test_task):
46
46
  model_name="phi_3_5",
47
47
  model_provider_name="ollama",
48
48
  prompt_id="simple_chain_of_thought_prompt_builder",
49
+ structured_output_mode="json_schema",
49
50
  ),
50
51
  )
51
52
 
@@ -102,6 +103,9 @@ def test_save_run_isolation(test_task, adapter):
102
103
  reloaded_output.source.properties["prompt_id"]
103
104
  == "simple_chain_of_thought_prompt_builder"
104
105
  )
106
+ assert reloaded_output.source.properties["structured_output_mode"] == "json_schema"
107
+ assert reloaded_output.source.properties["temperature"] == 1.0
108
+ assert reloaded_output.source.properties["top_p"] == 1.0
105
109
  # Run again, with same input and different output. Should create a new TaskRun.
106
110
  different_run_output = RunOutput(
107
111
  output="Different output", intermediate_outputs=None
@@ -228,3 +232,40 @@ async def test_autosave_true(test_task, adapter):
228
232
  output.source.properties["prompt_id"]
229
233
  == "simple_chain_of_thought_prompt_builder"
230
234
  )
235
+ assert output.source.properties["structured_output_mode"] == "json_schema"
236
+ assert output.source.properties["temperature"] == 1.0
237
+ assert output.source.properties["top_p"] == 1.0
238
+
239
+
240
+ def test_properties_for_task_output_custom_values(test_task):
241
+ """Test that _properties_for_task_output includes custom temperature, top_p, and structured_output_mode"""
242
+ adapter = MockAdapter(
243
+ run_config=RunConfig(
244
+ task=test_task,
245
+ model_name="gpt-4",
246
+ model_provider_name="openai",
247
+ prompt_id="simple_prompt_builder",
248
+ temperature=0.7,
249
+ top_p=0.9,
250
+ structured_output_mode="json_schema",
251
+ ),
252
+ )
253
+
254
+ input_data = "Test input"
255
+ output_data = "Test output"
256
+ run_output = RunOutput(output=output_data, intermediate_outputs=None)
257
+
258
+ task_run = adapter.generate_run(
259
+ input=input_data, input_source=None, run_output=run_output
260
+ )
261
+ task_run.save_to_file()
262
+
263
+ # Verify custom values are preserved in properties
264
+ output = task_run.output
265
+ assert output.source.properties["adapter_name"] == "mock_adapter"
266
+ assert output.source.properties["model_name"] == "gpt-4"
267
+ assert output.source.properties["model_provider"] == "openai"
268
+ assert output.source.properties["prompt_id"] == "simple_prompt_builder"
269
+ assert output.source.properties["structured_output_mode"] == "json_schema"
270
+ assert output.source.properties["temperature"] == 0.7
271
+ assert output.source.properties["top_p"] == 0.9