kiln-ai 0.18.0__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (89) hide show
  1. kiln_ai/adapters/__init__.py +2 -2
  2. kiln_ai/adapters/adapter_registry.py +46 -0
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/data_gen/data_gen_task.py +2 -2
  6. kiln_ai/adapters/data_gen/test_data_gen_task.py +7 -3
  7. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  8. kiln_ai/adapters/eval/base_eval.py +2 -2
  9. kiln_ai/adapters/eval/eval_runner.py +3 -1
  10. kiln_ai/adapters/eval/g_eval.py +2 -2
  11. kiln_ai/adapters/eval/test_base_eval.py +1 -1
  12. kiln_ai/adapters/eval/test_eval_runner.py +6 -12
  13. kiln_ai/adapters/eval/test_g_eval.py +3 -4
  14. kiln_ai/adapters/eval/test_g_eval_data.py +1 -1
  15. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  16. kiln_ai/adapters/fine_tune/base_finetune.py +1 -0
  17. kiln_ai/adapters/fine_tune/fireworks_finetune.py +32 -20
  18. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  19. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +30 -21
  20. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  21. kiln_ai/adapters/ml_model_list.py +1009 -111
  22. kiln_ai/adapters/model_adapters/base_adapter.py +62 -28
  23. kiln_ai/adapters/model_adapters/litellm_adapter.py +397 -80
  24. kiln_ai/adapters/model_adapters/test_base_adapter.py +194 -18
  25. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +428 -4
  26. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  27. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  28. kiln_ai/adapters/model_adapters/test_structured_output.py +120 -14
  29. kiln_ai/adapters/parsers/__init__.py +1 -1
  30. kiln_ai/adapters/parsers/test_r1_parser.py +1 -1
  31. kiln_ai/adapters/provider_tools.py +35 -20
  32. kiln_ai/adapters/remote_config.py +57 -10
  33. kiln_ai/adapters/repair/repair_task.py +1 -1
  34. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  35. kiln_ai/adapters/run_output.py +3 -0
  36. kiln_ai/adapters/test_adapter_registry.py +109 -2
  37. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  38. kiln_ai/adapters/test_ml_model_list.py +51 -1
  39. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  40. kiln_ai/adapters/test_provider_tools.py +73 -12
  41. kiln_ai/adapters/test_remote_config.py +470 -16
  42. kiln_ai/datamodel/__init__.py +23 -21
  43. kiln_ai/datamodel/basemodel.py +54 -28
  44. kiln_ai/datamodel/datamodel_enums.py +3 -0
  45. kiln_ai/datamodel/dataset_split.py +5 -3
  46. kiln_ai/datamodel/eval.py +4 -4
  47. kiln_ai/datamodel/external_tool_server.py +298 -0
  48. kiln_ai/datamodel/finetune.py +2 -2
  49. kiln_ai/datamodel/json_schema.py +25 -10
  50. kiln_ai/datamodel/project.py +11 -4
  51. kiln_ai/datamodel/prompt.py +2 -2
  52. kiln_ai/datamodel/prompt_id.py +4 -4
  53. kiln_ai/datamodel/registry.py +0 -15
  54. kiln_ai/datamodel/run_config.py +62 -0
  55. kiln_ai/datamodel/task.py +8 -83
  56. kiln_ai/datamodel/task_output.py +7 -2
  57. kiln_ai/datamodel/task_run.py +41 -0
  58. kiln_ai/datamodel/test_basemodel.py +213 -21
  59. kiln_ai/datamodel/test_eval_model.py +6 -6
  60. kiln_ai/datamodel/test_example_models.py +175 -0
  61. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  62. kiln_ai/datamodel/test_model_perf.py +1 -1
  63. kiln_ai/datamodel/test_prompt_id.py +5 -1
  64. kiln_ai/datamodel/test_registry.py +8 -3
  65. kiln_ai/datamodel/test_task.py +20 -47
  66. kiln_ai/datamodel/test_tool_id.py +239 -0
  67. kiln_ai/datamodel/tool_id.py +83 -0
  68. kiln_ai/tools/__init__.py +8 -0
  69. kiln_ai/tools/base_tool.py +82 -0
  70. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  71. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  72. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  73. kiln_ai/tools/mcp_server_tool.py +95 -0
  74. kiln_ai/tools/mcp_session_manager.py +243 -0
  75. kiln_ai/tools/test_base_tools.py +199 -0
  76. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  77. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  78. kiln_ai/tools/test_tool_registry.py +473 -0
  79. kiln_ai/tools/tool_registry.py +64 -0
  80. kiln_ai/utils/config.py +32 -0
  81. kiln_ai/utils/open_ai_types.py +94 -0
  82. kiln_ai/utils/project_utils.py +17 -0
  83. kiln_ai/utils/test_config.py +138 -1
  84. kiln_ai/utils/test_open_ai_types.py +131 -0
  85. {kiln_ai-0.18.0.dist-info → kiln_ai-0.20.1.dist-info}/METADATA +37 -6
  86. kiln_ai-0.20.1.dist-info/RECORD +138 -0
  87. kiln_ai-0.18.0.dist-info/RECORD +0 -115
  88. {kiln_ai-0.18.0.dist-info → kiln_ai-0.20.1.dist-info}/WHEEL +0 -0
  89. {kiln_ai-0.18.0.dist-info → kiln_ai-0.20.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,12 +1,22 @@
1
1
  import asyncio
2
+ import json
3
+ import logging
2
4
  import os
3
5
  from unittest.mock import patch
4
6
 
5
7
  import pytest
6
8
 
7
- from kiln_ai.adapters.ml_model_list import built_in_models
9
+ from kiln_ai.adapters.ml_model_list import (
10
+ KilnModel,
11
+ KilnModelProvider,
12
+ ModelFamily,
13
+ ModelName,
14
+ ModelProviderName,
15
+ StructuredOutputMode,
16
+ built_in_models,
17
+ )
8
18
  from kiln_ai.adapters.remote_config import (
9
- deserialize_config,
19
+ deserialize_config_at_path,
10
20
  dump_builtin_config,
11
21
  load_from_url,
12
22
  load_remote_models,
@@ -14,17 +24,49 @@ from kiln_ai.adapters.remote_config import (
14
24
  )
15
25
 
16
26
 
27
+ @pytest.fixture
28
+ def mock_model() -> KilnModel:
29
+ return KilnModel(
30
+ family=ModelFamily.gpt,
31
+ name=ModelName.gpt_4_1,
32
+ friendly_name="GPT 4.1",
33
+ providers=[
34
+ KilnModelProvider(
35
+ name=ModelProviderName.openai,
36
+ model_id="gpt-4.1",
37
+ provider_finetune_id="gpt-4.1-2025-04-14",
38
+ structured_output_mode=StructuredOutputMode.json_schema,
39
+ supports_logprobs=True,
40
+ suggested_for_evals=True,
41
+ ),
42
+ KilnModelProvider(
43
+ name=ModelProviderName.openrouter,
44
+ model_id="openai/gpt-4.1",
45
+ structured_output_mode=StructuredOutputMode.json_schema,
46
+ supports_logprobs=True,
47
+ suggested_for_evals=True,
48
+ ),
49
+ KilnModelProvider(
50
+ name=ModelProviderName.azure_openai,
51
+ model_id="gpt-4.1",
52
+ suggested_for_evals=True,
53
+ ),
54
+ ],
55
+ )
56
+
57
+
17
58
  def test_round_trip(tmp_path):
18
59
  path = tmp_path / "models.json"
19
60
  serialize_config(built_in_models, path)
20
- loaded = deserialize_config(path)
61
+ loaded = deserialize_config_at_path(path)
21
62
  assert [m.model_dump(mode="json") for m in loaded] == [
22
63
  m.model_dump(mode="json") for m in built_in_models
23
64
  ]
24
65
 
25
66
 
26
- def test_load_from_url():
27
- sample = [built_in_models[0].model_dump(mode="json")]
67
+ def test_load_from_url(mock_model):
68
+ sample_model = mock_model
69
+ sample = [sample_model.model_dump(mode="json")]
28
70
 
29
71
  class FakeResponse:
30
72
  def raise_for_status(self):
@@ -37,23 +79,59 @@ def test_load_from_url():
37
79
  "kiln_ai.adapters.remote_config.requests.get", return_value=FakeResponse()
38
80
  ):
39
81
  models = load_from_url("http://example.com/models.json")
40
- assert [m.model_dump(mode="json") for m in models] == sample
82
+
83
+ assert len(models) == 1
84
+ assert sample_model == models[0]
85
+
86
+
87
+ def test_load_from_url_calls_deserialize_config_data(mock_model):
88
+ """Test that load_from_url calls deserialize_config_data with the model_list from the response."""
89
+ sample_model_data = [mock_model.model_dump(mode="json")]
90
+ response_data = {"model_list": sample_model_data}
91
+
92
+ class FakeResponse:
93
+ def raise_for_status(self):
94
+ pass
95
+
96
+ def json(self):
97
+ return response_data
98
+
99
+ with (
100
+ patch(
101
+ "kiln_ai.adapters.remote_config.requests.get", return_value=FakeResponse()
102
+ ) as mock_get,
103
+ patch(
104
+ "kiln_ai.adapters.remote_config.deserialize_config_data"
105
+ ) as mock_deserialize,
106
+ ):
107
+ mock_deserialize.return_value = [mock_model]
108
+
109
+ result = load_from_url("http://example.com/models.json")
110
+
111
+ # Verify requests.get was called with correct URL
112
+ mock_get.assert_called_once_with("http://example.com/models.json", timeout=10)
113
+
114
+ # Verify deserialize_config_data was called with the model_list data
115
+ mock_deserialize.assert_called_once_with(response_data)
116
+
117
+ # Verify the result is what deserialize_config_data returned
118
+ assert result == [mock_model]
41
119
 
42
120
 
43
121
  def test_dump_builtin_config(tmp_path):
44
122
  path = tmp_path / "out.json"
45
123
  dump_builtin_config(path)
46
- loaded = deserialize_config(path)
124
+ loaded = deserialize_config_at_path(path)
47
125
  assert [m.model_dump(mode="json") for m in loaded] == [
48
126
  m.model_dump(mode="json") for m in built_in_models
49
127
  ]
50
128
 
51
129
 
52
130
  @pytest.mark.asyncio
53
- async def test_load_remote_models_success(monkeypatch):
131
+ async def test_load_remote_models_success(monkeypatch, mock_model):
54
132
  del os.environ["KILN_SKIP_REMOTE_MODEL_LIST"]
55
133
  original = built_in_models.copy()
56
- sample_models = [built_in_models[0]]
134
+ sample_models = [mock_model]
57
135
 
58
136
  def fake_fetch(url):
59
137
  return sample_models
@@ -80,21 +158,397 @@ async def test_load_remote_models_failure(monkeypatch):
80
158
  assert built_in_models == original
81
159
 
82
160
 
83
- def test_deserialize_config_with_extra_keys(tmp_path):
161
+ def test_deserialize_config_with_extra_keys(tmp_path, mock_model):
84
162
  # Take a valid model and add an extra key, ensure it is ignored and still loads
85
- import json
86
-
87
- from kiln_ai.adapters.ml_model_list import built_in_models
88
-
89
- model_dict = built_in_models[0].model_dump(mode="json")
163
+ model_dict = mock_model.model_dump(mode="json")
90
164
  model_dict["extra_key"] = "should be ignored or error"
91
165
  model_dict["providers"][0]["extra_key"] = "should be ignored or error"
92
166
  data = {"model_list": [model_dict]}
93
167
  path = tmp_path / "extra.json"
94
168
  path.write_text(json.dumps(data))
95
169
  # Should NOT raise, and extra key should be ignored
96
- models = deserialize_config(path)
170
+ models = deserialize_config_at_path(path)
97
171
  assert hasattr(models[0], "family")
98
172
  assert not hasattr(models[0], "extra_key")
99
173
  assert hasattr(models[0], "providers")
100
174
  assert not hasattr(models[0].providers[0], "extra_key")
175
+
176
+
177
+ def test_deserialize_config_with_invalid_models(tmp_path, caplog, mock_model):
178
+ """Test comprehensive handling of invalid models and providers during deserialization."""
179
+
180
+ # Create a fully valid model as baseline
181
+ valid_model = mock_model.model_dump(mode="json")
182
+
183
+ # Case 1: Invalid model - missing required field 'family'
184
+ invalid_model_missing_family = mock_model.model_dump(mode="json")
185
+ del invalid_model_missing_family["family"]
186
+
187
+ # Case 2: Invalid model - invalid data type for required field
188
+ invalid_model_wrong_type = mock_model.model_dump(mode="json")
189
+ invalid_model_wrong_type["name"] = None # name should be a string, not None
190
+
191
+ # Case 3: Invalid model - completely malformed
192
+ invalid_model_malformed = {"not_a_valid_model": "at_all"}
193
+
194
+ # Case 4: Valid model with one invalid provider (should keep model, skip invalid provider)
195
+ valid_model_invalid_provider = mock_model.model_dump(mode="json")
196
+ valid_model_invalid_provider["name"] = "test_model_invalid_provider" # Unique name
197
+ valid_model_invalid_provider["providers"][0]["name"] = "unknown-provider-123"
198
+
199
+ # Case 5: Valid model with mixed valid/invalid providers (should keep model and valid providers)
200
+ valid_model_mixed_providers = mock_model.model_dump(mode="json")
201
+ valid_model_mixed_providers["name"] = "test_model_mixed_providers" # Unique name
202
+ # Add a second provider that's valid
203
+ valid_provider = valid_model_mixed_providers["providers"][0].copy()
204
+ valid_provider["name"] = "azure_openai"
205
+ # Make first provider invalid
206
+ valid_model_mixed_providers["providers"][0]["name"] = "invalid-provider-1"
207
+ # Add invalid provider with missing required field
208
+ invalid_provider = valid_model_mixed_providers["providers"][0].copy()
209
+ del invalid_provider["name"]
210
+ # Add another invalid provider with wrong type
211
+ invalid_provider_2 = valid_model_mixed_providers["providers"][0].copy()
212
+ invalid_provider_2["supports_structured_output"] = "not_a_boolean"
213
+
214
+ valid_model_mixed_providers["providers"] = [
215
+ valid_model_mixed_providers["providers"][0], # invalid name
216
+ valid_provider, # valid
217
+ invalid_provider, # missing name
218
+ invalid_provider_2, # wrong type
219
+ ]
220
+
221
+ # Case 6: Valid model with all invalid providers (should keep model with empty providers)
222
+ valid_model_all_invalid_providers = mock_model.model_dump(mode="json")
223
+ valid_model_all_invalid_providers["name"] = (
224
+ "test_model_all_invalid_providers" # Unique name
225
+ )
226
+ valid_model_all_invalid_providers["providers"][0]["name"] = "unknown-provider-456"
227
+ if len(valid_model_all_invalid_providers["providers"]) > 1:
228
+ valid_model_all_invalid_providers["providers"][1]["name"] = (
229
+ "another-unknown-provider"
230
+ )
231
+ if len(valid_model_all_invalid_providers["providers"]) > 2:
232
+ valid_model_all_invalid_providers["providers"][2]["name"] = (
233
+ "yet-another-unknown-provider"
234
+ )
235
+
236
+ data = {
237
+ "model_list": [
238
+ valid_model, # Should be kept
239
+ invalid_model_missing_family, # Should be skipped
240
+ invalid_model_wrong_type, # Should be skipped
241
+ invalid_model_malformed, # Should be skipped
242
+ valid_model_invalid_provider, # Should be kept with empty providers
243
+ valid_model_mixed_providers, # Should be kept with 1 valid provider
244
+ valid_model_all_invalid_providers, # Should be kept with empty providers
245
+ ]
246
+ }
247
+ path = tmp_path / "mixed_models.json"
248
+ path.write_text(json.dumps(data))
249
+
250
+ # Enable logging to capture warnings
251
+ with caplog.at_level(logging.WARNING):
252
+ models = deserialize_config_at_path(path)
253
+
254
+ # Should have 4 valid models (original + 3 with provider issues but valid model structure)
255
+ assert len(models) == 4
256
+
257
+ # Check the first model is fully intact
258
+ assert models[0].name == mock_model.name
259
+ assert models[0].family == mock_model.family
260
+ assert len(models[0].providers) == 3 # mock_model has 3 providers
261
+
262
+ # Check model with invalid provider has remaining valid providers
263
+ model_with_invalid_provider = next(
264
+ m for m in models if m.name == valid_model_invalid_provider["name"]
265
+ )
266
+ # Should keep the valid providers from the original model (openrouter, azure_openai)
267
+ assert len(model_with_invalid_provider.providers) == 2
268
+ provider_names = {p.name.value for p in model_with_invalid_provider.providers}
269
+ assert provider_names == {"openrouter", "azure_openai"}
270
+
271
+ # Check model with mixed providers has only the valid one
272
+ model_with_mixed_providers = next(
273
+ m for m in models if m.name == valid_model_mixed_providers["name"]
274
+ )
275
+ assert len(model_with_mixed_providers.providers) == 1
276
+ assert model_with_mixed_providers.providers[0].name.value == "azure_openai"
277
+
278
+ # Check model with all invalid providers has empty providers
279
+ model_with_all_invalid_providers = next(
280
+ m for m in models if m.name == valid_model_all_invalid_providers["name"]
281
+ )
282
+ assert len(model_with_all_invalid_providers.providers) == 0
283
+
284
+ # Check warning logs
285
+ warning_logs = [
286
+ record for record in caplog.records if record.levelno == logging.WARNING
287
+ ]
288
+
289
+ # Should have warnings for:
290
+ # - 3 invalid models (missing family, wrong type, malformed)
291
+ # - 1 invalid provider in case 4 (unknown-provider-123)
292
+ # - 3 invalid providers in case 5 (invalid-provider-1, missing name, wrong type boolean)
293
+ # - 3 invalid providers in case 6 (unknown-provider-456, another-unknown-provider, yet-another-unknown-provider)
294
+ assert len(warning_logs) >= 10
295
+
296
+ # Check that warning messages contain expected content
297
+ model_warnings = [
298
+ log for log in warning_logs if "Failed to validate a model from" in log.message
299
+ ]
300
+ provider_warnings = [
301
+ log
302
+ for log in warning_logs
303
+ if "Failed to validate a model provider" in log.message
304
+ ]
305
+
306
+ assert len(model_warnings) == 3 # 3 completely invalid models
307
+ assert (
308
+ len(provider_warnings) == 7
309
+ ) # Exactly 7 invalid providers across different models
310
+
311
+
312
+ def test_deserialize_config_empty_provider_list(tmp_path, mock_model):
313
+ """Test that models with empty provider lists are handled correctly."""
314
+ model_with_empty_providers = mock_model.model_dump(mode="json")
315
+ model_with_empty_providers["providers"] = []
316
+
317
+ data = {"model_list": [model_with_empty_providers]}
318
+ path = tmp_path / "empty_providers.json"
319
+ path.write_text(json.dumps(data))
320
+
321
+ models = deserialize_config_at_path(path)
322
+ assert len(models) == 1
323
+ assert len(models[0].providers) == 0
324
+
325
+
326
+ def test_deserialize_config_missing_provider_field(tmp_path, caplog, mock_model):
327
+ """Test that models missing the providers field are handled correctly."""
328
+ model_without_providers = mock_model.model_dump(mode="json")
329
+ del model_without_providers["providers"]
330
+
331
+ data = {"model_list": [model_without_providers]}
332
+ path = tmp_path / "no_providers.json"
333
+ path.write_text(json.dumps(data))
334
+
335
+ with caplog.at_level(logging.WARNING):
336
+ models = deserialize_config_at_path(path)
337
+
338
+ # Model should be kept with empty providers (deserialize_config handles missing providers gracefully)
339
+ assert len(models) == 1
340
+ assert len(models[0].providers) == 0
341
+ assert models[0].name == mock_model.name
342
+
343
+ # Should not have any warnings since the function handles missing providers gracefully
344
+ warning_logs = [
345
+ record for record in caplog.records if record.levelno == logging.WARNING
346
+ ]
347
+ assert len(warning_logs) == 0
348
+
349
+
350
+ def test_deserialize_config_provider_with_extra_fields(tmp_path, mock_model):
351
+ """Test that providers with extra unknown fields are handled gracefully."""
352
+ model_with_extra_provider_fields = mock_model.model_dump(mode="json")
353
+ model_with_extra_provider_fields["providers"][0]["unknown_field"] = (
354
+ "should_be_ignored"
355
+ )
356
+ model_with_extra_provider_fields["providers"][0]["another_extra"] = {
357
+ "nested": "data"
358
+ }
359
+
360
+ data = {"model_list": [model_with_extra_provider_fields]}
361
+ path = tmp_path / "extra_provider_fields.json"
362
+ path.write_text(json.dumps(data))
363
+
364
+ models = deserialize_config_at_path(path)
365
+ assert len(models) == 1
366
+ assert len(models[0].providers) == 3 # mock_model has 3 providers
367
+ # Extra fields should be ignored, not present in the final object
368
+ assert not hasattr(models[0].providers[0], "unknown_field")
369
+ assert not hasattr(models[0].providers[0], "another_extra")
370
+
371
+
372
+ def test_deserialize_config_model_with_extra_fields(tmp_path, mock_model):
373
+ """Test that models with extra unknown fields are handled gracefully."""
374
+ model_with_extra_fields = mock_model.model_dump(mode="json")
375
+ model_with_extra_fields["future_field"] = "should_be_ignored"
376
+ model_with_extra_fields["complex_extra"] = {"nested": {"data": [1, 2, 3]}}
377
+
378
+ data = {"model_list": [model_with_extra_fields]}
379
+ path = tmp_path / "extra_model_fields.json"
380
+ path.write_text(json.dumps(data))
381
+
382
+ models = deserialize_config_at_path(path)
383
+ assert len(models) == 1
384
+ assert models[0].name == mock_model.name
385
+ # Extra fields should be ignored, not present in the final object
386
+ assert not hasattr(models[0], "future_field")
387
+ assert not hasattr(models[0], "complex_extra")
388
+
389
+
390
+ def test_deserialize_config_mixed_valid_invalid_providers_single_model(
391
+ tmp_path, caplog, mock_model
392
+ ):
393
+ """Test a single model with a mix of valid and invalid providers in detail."""
394
+ model = mock_model.model_dump(mode="json")
395
+
396
+ # Create a mix of provider scenarios
397
+ valid_provider_1 = model["providers"][0].copy()
398
+ valid_provider_1["name"] = "openai"
399
+
400
+ valid_provider_2 = model["providers"][0].copy()
401
+ valid_provider_2["name"] = "azure_openai"
402
+
403
+ invalid_provider_unknown_name = model["providers"][0].copy()
404
+ invalid_provider_unknown_name["name"] = "nonexistent_provider"
405
+
406
+ invalid_provider_missing_name = model["providers"][0].copy()
407
+ del invalid_provider_missing_name["name"]
408
+
409
+ invalid_provider_wrong_type = model["providers"][0].copy()
410
+ invalid_provider_wrong_type["supports_structured_output"] = "not_a_boolean"
411
+
412
+ model["providers"] = [
413
+ valid_provider_1,
414
+ invalid_provider_unknown_name,
415
+ valid_provider_2,
416
+ invalid_provider_missing_name,
417
+ invalid_provider_wrong_type,
418
+ ]
419
+
420
+ data = {"model_list": [model]}
421
+ path = tmp_path / "mixed_providers_single.json"
422
+ path.write_text(json.dumps(data))
423
+
424
+ with caplog.at_level(logging.WARNING):
425
+ models = deserialize_config_at_path(path)
426
+
427
+ # Should have 1 model with 2 valid providers
428
+ assert len(models) == 1
429
+ assert len(models[0].providers) == 2
430
+ assert models[0].providers[0].name.value == "openai"
431
+ assert models[0].providers[1].name.value == "azure_openai"
432
+
433
+ # Should have logged 3 provider validation warnings
434
+ provider_warnings = [
435
+ log
436
+ for log in caplog.records
437
+ if log.levelno == logging.WARNING
438
+ and "Failed to validate a model provider" in log.message
439
+ ]
440
+ assert len(provider_warnings) == 3
441
+
442
+
443
+ def test_deserialize_config_empty_json_structures(tmp_path):
444
+ """Test various empty JSON structures."""
445
+ # Test empty model_list
446
+ data = {"model_list": []}
447
+ path = tmp_path / "empty_model_list.json"
448
+ path.write_text(json.dumps(data))
449
+ models = deserialize_config_at_path(path)
450
+ assert len(models) == 0
451
+
452
+ # Test empty object with no model_list key
453
+ path = tmp_path / "empty_object.json"
454
+ path.write_text(json.dumps({}))
455
+ with pytest.raises(ValueError):
456
+ deserialize_config_at_path(path)
457
+
458
+
459
+ def test_backwards_compatibility_with_v0_19(tmp_path):
460
+ """Test that kiln-ai v0.19 (first version with remote config) can parse JSON from current version.
461
+
462
+ This ensures our serialization format remains backwards compatible using uv scripts.
463
+
464
+ Skipped in CI/CD/VScode (needs UV), so you have to run it from the CLI (fine since it's slow):
465
+ Run from CLI: KILN_TEST_COMPATIBILITY=1 uv run python3 -m pytest libs/core/kiln_ai/adapters/test_remote_config.py::test_backwards_compatibility_with_v0_19 -s -v
466
+ """
467
+
468
+ # Skip unless explicitly requested via environment variable
469
+ if not os.environ.get("KILN_TEST_COMPATIBILITY"):
470
+ pytest.skip(
471
+ "Compatibility test skipped. Set KILN_TEST_COMPATIBILITY=1 to run this test."
472
+ )
473
+
474
+ import shutil
475
+ import subprocess
476
+
477
+ # Check if uv is available
478
+ if not shutil.which("uv"):
479
+ pytest.skip("uv is not available for compatibility test")
480
+
481
+ # Create JSON with current version
482
+ current_json_path = tmp_path / "current_models.json"
483
+ serialize_config(built_in_models, current_json_path)
484
+
485
+ # Test script using uv inline script metadata to install v0.19
486
+ test_script = f'''# /// script
487
+ # dependencies = [
488
+ # "kiln-ai==0.19.0",
489
+ # "pandas",
490
+ # ]
491
+ # ///
492
+ import sys
493
+ import json
494
+ from pathlib import Path
495
+
496
+ # Import from v0.19
497
+ try:
498
+ from kiln_ai.adapters.remote_config import deserialize_config_at_path
499
+ from kiln_ai.adapters.ml_model_list import KilnModel
500
+
501
+ # Try to deserialize current JSON with v0.19 code
502
+ models = deserialize_config_at_path("{current_json_path}")
503
+
504
+ # Basic validation - should have parsed successfully
505
+ assert len(models) > 0
506
+ assert all(isinstance(m, KilnModel) for m in models)
507
+
508
+ # Check basic fields exist and have expected types
509
+ for model in models:
510
+ assert hasattr(model, 'family') and isinstance(model.family, str)
511
+ assert hasattr(model, 'name') and isinstance(model.name, str)
512
+ assert hasattr(model, 'friendly_name') and isinstance(model.friendly_name, str)
513
+ assert hasattr(model, 'providers') and isinstance(model.providers, list)
514
+
515
+ # Check providers have basic fields
516
+ for provider in model.providers:
517
+ assert hasattr(provider, 'name')
518
+
519
+ sys.stdout.write("SUCCESS: v0.19 successfully parsed JSON from current version")
520
+ sys.stdout.write(f"Parsed {{len(models)}} models")
521
+
522
+ except Exception as e:
523
+ sys.stdout.write(f"ERROR: {{e}}")
524
+ sys.exit(1)
525
+ '''
526
+
527
+ try:
528
+ # Write the uv script
529
+ script_path = tmp_path / "test_v0_19.py"
530
+ script_path.write_text(test_script)
531
+
532
+ # Run the script using uv
533
+ result = subprocess.run(
534
+ ["uv", "run", str(script_path)], capture_output=True, text=True
535
+ )
536
+
537
+ # Check if the test passed
538
+ if result.returncode != 0:
539
+ pytest.fail(
540
+ f"v0.19 compatibility test failed:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}"
541
+ )
542
+
543
+ # Verify success message was printed
544
+ assert (
545
+ "SUCCESS: v0.19 successfully parsed JSON from current version"
546
+ in result.stdout
547
+ )
548
+
549
+ except subprocess.CalledProcessError as e:
550
+ # If we can't run uv, skip the test (might be network issues, etc.)
551
+ pytest.skip(f"Could not run uv script for compatibility test: {e}")
552
+ except FileNotFoundError:
553
+ # If uv command not found
554
+ pytest.skip("uv command not found for compatibility test")
@@ -3,7 +3,7 @@ See our docs for details about our datamodel classes and hierarchy:
3
3
 
4
4
  Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
5
5
 
6
- User docs: https://docs.getkiln.ai/developers/kiln-datamodel
6
+ User docs: https://docs.kiln.tech/developers/kiln-datamodel
7
7
  """
8
8
 
9
9
  # This component uses "flat" imports so we don't have too much internal structure exposed in the API.
@@ -22,6 +22,7 @@ from kiln_ai.datamodel.dataset_split import (
22
22
  DatasetSplit,
23
23
  DatasetSplitDefinition,
24
24
  )
25
+ from kiln_ai.datamodel.external_tool_server import ExternalToolServer
25
26
  from kiln_ai.datamodel.finetune import (
26
27
  Finetune,
27
28
  )
@@ -47,31 +48,32 @@ from kiln_ai.datamodel.task_run import (
47
48
  )
48
49
 
49
50
  __all__ = [
50
- "strict_mode",
51
- "dataset_split",
52
- "eval",
53
- "Task",
54
- "Project",
55
- "TaskRun",
56
- "TaskOutput",
57
- "Priority",
51
+ "BasePrompt",
58
52
  "DataSource",
59
- "DataSourceType",
60
53
  "DataSourceProperty",
61
- "Finetune",
54
+ "DataSourceType",
55
+ "DatasetSplit",
56
+ "DatasetSplitDefinition",
57
+ "ExternalToolServer",
62
58
  "FineTuneStatusType",
59
+ "Finetune",
60
+ "Priority",
61
+ "Project",
62
+ "Prompt",
63
+ "PromptGenerators",
64
+ "PromptId",
65
+ "RequirementRating",
66
+ "StructuredOutputMode",
67
+ "Task",
68
+ "TaskOutput",
69
+ "TaskOutputRating",
63
70
  "TaskOutputRatingType",
64
71
  "TaskRequirement",
65
- "DatasetSplitDefinition",
66
- "DatasetSplit",
67
- "RequirementRating",
68
72
  "TaskRequirement",
69
- "BasePrompt",
70
- "Prompt",
71
- "TaskOutputRating",
72
- "StructuredOutputMode",
73
- "PromptId",
74
- "PromptGenerators",
75
- "prompt_generator_values",
73
+ "TaskRun",
76
74
  "Usage",
75
+ "dataset_split",
76
+ "eval",
77
+ "prompt_generator_values",
78
+ "strict_mode",
77
79
  ]