kiln-ai 0.19.0__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +2 -2
- kiln_ai/adapters/adapter_registry.py +19 -1
- kiln_ai/adapters/chat/chat_formatter.py +8 -12
- kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
- kiln_ai/adapters/docker_model_runner_tools.py +119 -0
- kiln_ai/adapters/eval/base_eval.py +2 -2
- kiln_ai/adapters/eval/eval_runner.py +3 -1
- kiln_ai/adapters/eval/g_eval.py +2 -2
- kiln_ai/adapters/eval/test_base_eval.py +1 -1
- kiln_ai/adapters/eval/test_g_eval.py +3 -4
- kiln_ai/adapters/fine_tune/__init__.py +1 -1
- kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
- kiln_ai/adapters/ml_model_list.py +380 -34
- kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
- kiln_ai/adapters/model_adapters/litellm_adapter.py +383 -79
- kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +406 -1
- kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
- kiln_ai/adapters/model_adapters/test_structured_output.py +110 -4
- kiln_ai/adapters/parsers/__init__.py +1 -1
- kiln_ai/adapters/provider_tools.py +15 -1
- kiln_ai/adapters/repair/test_repair_task.py +12 -9
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +80 -1
- kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
- kiln_ai/adapters/test_ml_model_list.py +39 -1
- kiln_ai/adapters/test_prompt_adaptors.py +13 -6
- kiln_ai/adapters/test_provider_tools.py +55 -0
- kiln_ai/adapters/test_remote_config.py +98 -0
- kiln_ai/datamodel/__init__.py +23 -21
- kiln_ai/datamodel/datamodel_enums.py +1 -0
- kiln_ai/datamodel/eval.py +1 -1
- kiln_ai/datamodel/external_tool_server.py +298 -0
- kiln_ai/datamodel/json_schema.py +25 -10
- kiln_ai/datamodel/project.py +8 -1
- kiln_ai/datamodel/registry.py +0 -15
- kiln_ai/datamodel/run_config.py +62 -0
- kiln_ai/datamodel/task.py +2 -77
- kiln_ai/datamodel/task_output.py +6 -1
- kiln_ai/datamodel/task_run.py +41 -0
- kiln_ai/datamodel/test_basemodel.py +3 -3
- kiln_ai/datamodel/test_example_models.py +175 -0
- kiln_ai/datamodel/test_external_tool_server.py +691 -0
- kiln_ai/datamodel/test_registry.py +8 -3
- kiln_ai/datamodel/test_task.py +15 -47
- kiln_ai/datamodel/test_tool_id.py +239 -0
- kiln_ai/datamodel/tool_id.py +83 -0
- kiln_ai/tools/__init__.py +8 -0
- kiln_ai/tools/base_tool.py +82 -0
- kiln_ai/tools/built_in_tools/__init__.py +13 -0
- kiln_ai/tools/built_in_tools/math_tools.py +124 -0
- kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
- kiln_ai/tools/mcp_server_tool.py +95 -0
- kiln_ai/tools/mcp_session_manager.py +243 -0
- kiln_ai/tools/test_base_tools.py +199 -0
- kiln_ai/tools/test_mcp_server_tool.py +457 -0
- kiln_ai/tools/test_mcp_session_manager.py +1585 -0
- kiln_ai/tools/test_tool_registry.py +473 -0
- kiln_ai/tools/tool_registry.py +64 -0
- kiln_ai/utils/config.py +22 -0
- kiln_ai/utils/open_ai_types.py +94 -0
- kiln_ai/utils/project_utils.py +17 -0
- kiln_ai/utils/test_config.py +138 -1
- kiln_ai/utils/test_open_ai_types.py +131 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/METADATA +6 -5
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/RECORD +70 -47
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/WHEEL +0 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
from unittest.mock import AsyncMock, Mock, patch
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
import openai
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
8
|
+
DockerModelRunnerConnection,
|
|
9
|
+
docker_model_runner_base_url,
|
|
10
|
+
parse_docker_model_runner_models,
|
|
11
|
+
)
|
|
12
|
+
from kiln_ai.datamodel.datamodel_enums import ModelProviderName
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_docker_model_runner_base_url_default():
|
|
16
|
+
"""Test that the default base URL is returned when no config is set."""
|
|
17
|
+
with patch("kiln_ai.adapters.docker_model_runner_tools.Config") as mock_config:
|
|
18
|
+
mock_config.shared().docker_model_runner_base_url = None
|
|
19
|
+
result = docker_model_runner_base_url()
|
|
20
|
+
assert result == "http://localhost:12434/engines/llama.cpp"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_docker_model_runner_base_url_from_config():
|
|
24
|
+
"""Test that the configured base URL is returned when set."""
|
|
25
|
+
with patch("kiln_ai.adapters.docker_model_runner_tools.Config") as mock_config:
|
|
26
|
+
mock_config.shared().docker_model_runner_base_url = (
|
|
27
|
+
"http://custom:8080/engines/llama.cpp"
|
|
28
|
+
)
|
|
29
|
+
result = docker_model_runner_base_url()
|
|
30
|
+
assert result == "http://custom:8080/engines/llama.cpp"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_parse_docker_model_runner_models_with_supported_models():
|
|
34
|
+
"""Test parsing Docker Model Runner models response with supported models."""
|
|
35
|
+
# Create mock OpenAI Model objects
|
|
36
|
+
mock_models = [
|
|
37
|
+
Mock(id="ai/llama3.2:3B-Q4_K_M"),
|
|
38
|
+
Mock(id="ai/qwen3:8B-Q4_K_M"),
|
|
39
|
+
Mock(id="ai/gemma3n:4B-Q4_K_M"),
|
|
40
|
+
Mock(id="unsupported-model"),
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
with patch(
|
|
44
|
+
"kiln_ai.adapters.docker_model_runner_tools.built_in_models"
|
|
45
|
+
) as mock_built_in_models:
|
|
46
|
+
# Mock built-in models with Docker Model Runner providers
|
|
47
|
+
mock_model = Mock()
|
|
48
|
+
mock_provider = Mock()
|
|
49
|
+
mock_provider.name = ModelProviderName.docker_model_runner
|
|
50
|
+
mock_provider.model_id = "ai/llama3.2:3B-Q4_K_M"
|
|
51
|
+
mock_model.providers = [mock_provider]
|
|
52
|
+
mock_built_in_models.__iter__ = Mock(return_value=iter([mock_model]))
|
|
53
|
+
|
|
54
|
+
result = parse_docker_model_runner_models(mock_models) # type: ignore
|
|
55
|
+
|
|
56
|
+
assert result is not None
|
|
57
|
+
assert result.message == "Docker Model Runner connected"
|
|
58
|
+
assert "ai/llama3.2:3B-Q4_K_M" in result.supported_models
|
|
59
|
+
assert "unsupported-model" in result.untested_models
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_parse_docker_model_runner_models_no_models():
|
|
63
|
+
"""Test parsing Docker Model Runner models response with no models."""
|
|
64
|
+
mock_models = []
|
|
65
|
+
|
|
66
|
+
result = parse_docker_model_runner_models(mock_models)
|
|
67
|
+
|
|
68
|
+
assert result is not None
|
|
69
|
+
assert "no supported models are available" in result.message
|
|
70
|
+
assert len(result.supported_models) == 0
|
|
71
|
+
assert len(result.untested_models) == 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_docker_model_runner_connection_all_models():
|
|
75
|
+
"""Test that DockerModelRunnerConnection.all_models() returns both supported and untested models."""
|
|
76
|
+
connection = DockerModelRunnerConnection(
|
|
77
|
+
message="Test",
|
|
78
|
+
supported_models=["model1", "model2"],
|
|
79
|
+
untested_models=["model3", "model4"],
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
all_models = connection.all_models()
|
|
83
|
+
assert all_models == ["model1", "model2", "model3", "model4"]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.mark.asyncio
|
|
87
|
+
async def test_docker_model_runner_online_success():
|
|
88
|
+
"""Test that docker_model_runner_online returns True when service is available."""
|
|
89
|
+
with patch(
|
|
90
|
+
"kiln_ai.adapters.docker_model_runner_tools.httpx.AsyncClient"
|
|
91
|
+
) as mock_client_class:
|
|
92
|
+
mock_client = Mock()
|
|
93
|
+
mock_response = Mock()
|
|
94
|
+
mock_response.raise_for_status.return_value = None
|
|
95
|
+
mock_client.get = AsyncMock(return_value=mock_response)
|
|
96
|
+
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
97
|
+
|
|
98
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
99
|
+
docker_model_runner_online,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
result = await docker_model_runner_online()
|
|
103
|
+
|
|
104
|
+
assert result is True
|
|
105
|
+
mock_client.get.assert_called_once()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@pytest.mark.asyncio
|
|
109
|
+
async def test_docker_model_runner_online_failure():
|
|
110
|
+
"""Test that docker_model_runner_online returns False when service is unavailable."""
|
|
111
|
+
with patch(
|
|
112
|
+
"kiln_ai.adapters.docker_model_runner_tools.httpx.AsyncClient"
|
|
113
|
+
) as mock_client_class:
|
|
114
|
+
mock_client = Mock()
|
|
115
|
+
mock_client.get = AsyncMock(side_effect=httpx.RequestError("Connection error"))
|
|
116
|
+
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
117
|
+
|
|
118
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
119
|
+
docker_model_runner_online,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
result = await docker_model_runner_online()
|
|
123
|
+
|
|
124
|
+
assert result is False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@pytest.mark.asyncio
|
|
128
|
+
async def test_get_docker_model_runner_connection_success():
|
|
129
|
+
"""Test get_docker_model_runner_connection with successful connection."""
|
|
130
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
131
|
+
get_docker_model_runner_connection,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Mock OpenAI client and models response
|
|
135
|
+
mock_model = Mock()
|
|
136
|
+
mock_model.id = "ai/llama3.2:3B-Q4_K_M"
|
|
137
|
+
mock_models_response = [mock_model]
|
|
138
|
+
|
|
139
|
+
with (
|
|
140
|
+
patch(
|
|
141
|
+
"kiln_ai.adapters.docker_model_runner_tools.openai.OpenAI"
|
|
142
|
+
) as mock_openai,
|
|
143
|
+
patch(
|
|
144
|
+
"kiln_ai.adapters.docker_model_runner_tools.parse_docker_model_runner_models"
|
|
145
|
+
) as mock_parse,
|
|
146
|
+
patch(
|
|
147
|
+
"kiln_ai.adapters.docker_model_runner_tools.docker_model_runner_base_url"
|
|
148
|
+
) as mock_base_url,
|
|
149
|
+
):
|
|
150
|
+
mock_base_url.return_value = "http://localhost:12434/engines"
|
|
151
|
+
mock_client = Mock()
|
|
152
|
+
mock_client.models.list.return_value = mock_models_response
|
|
153
|
+
mock_openai.return_value = mock_client
|
|
154
|
+
|
|
155
|
+
expected_connection = DockerModelRunnerConnection(
|
|
156
|
+
message="Connected",
|
|
157
|
+
supported_models=["ai/llama3.2:3B-Q4_K_M"],
|
|
158
|
+
untested_models=[],
|
|
159
|
+
)
|
|
160
|
+
mock_parse.return_value = expected_connection
|
|
161
|
+
|
|
162
|
+
result = await get_docker_model_runner_connection()
|
|
163
|
+
|
|
164
|
+
assert result == expected_connection
|
|
165
|
+
mock_openai.assert_called_once_with(
|
|
166
|
+
api_key="dummy",
|
|
167
|
+
base_url="http://localhost:12434/engines/v1",
|
|
168
|
+
max_retries=0,
|
|
169
|
+
)
|
|
170
|
+
mock_parse.assert_called_once_with(mock_models_response)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@pytest.mark.asyncio
|
|
174
|
+
async def test_get_docker_model_runner_connection_with_custom_url():
|
|
175
|
+
"""Test get_docker_model_runner_connection with custom URL."""
|
|
176
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
177
|
+
get_docker_model_runner_connection,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Mock OpenAI client and models response
|
|
181
|
+
mock_model = Mock()
|
|
182
|
+
mock_model.id = "ai/llama3.2:3B-Q4_K_M"
|
|
183
|
+
mock_models_response = [mock_model]
|
|
184
|
+
|
|
185
|
+
with (
|
|
186
|
+
patch(
|
|
187
|
+
"kiln_ai.adapters.docker_model_runner_tools.openai.OpenAI"
|
|
188
|
+
) as mock_openai,
|
|
189
|
+
patch(
|
|
190
|
+
"kiln_ai.adapters.docker_model_runner_tools.parse_docker_model_runner_models"
|
|
191
|
+
) as mock_parse,
|
|
192
|
+
):
|
|
193
|
+
mock_client = Mock()
|
|
194
|
+
mock_client.models.list.return_value = mock_models_response
|
|
195
|
+
mock_openai.return_value = mock_client
|
|
196
|
+
|
|
197
|
+
expected_connection = DockerModelRunnerConnection(
|
|
198
|
+
message="Connected",
|
|
199
|
+
supported_models=["ai/llama3.2:3B-Q4_K_M"],
|
|
200
|
+
untested_models=[],
|
|
201
|
+
)
|
|
202
|
+
mock_parse.return_value = expected_connection
|
|
203
|
+
|
|
204
|
+
custom_url = "http://custom:8080/engines/llama.cpp"
|
|
205
|
+
result = await get_docker_model_runner_connection(custom_url)
|
|
206
|
+
|
|
207
|
+
assert result == expected_connection
|
|
208
|
+
mock_openai.assert_called_once_with(
|
|
209
|
+
api_key="dummy",
|
|
210
|
+
base_url=f"{custom_url}/v1",
|
|
211
|
+
max_retries=0,
|
|
212
|
+
)
|
|
213
|
+
mock_parse.assert_called_once_with(mock_models_response)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
@pytest.mark.asyncio
|
|
217
|
+
async def test_get_docker_model_runner_connection_api_error():
|
|
218
|
+
"""Test get_docker_model_runner_connection with API error."""
|
|
219
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
220
|
+
get_docker_model_runner_connection,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
with patch(
|
|
224
|
+
"kiln_ai.adapters.docker_model_runner_tools.openai.OpenAI"
|
|
225
|
+
) as mock_openai:
|
|
226
|
+
mock_client = Mock()
|
|
227
|
+
mock_client.models.list.side_effect = openai.APIConnectionError(request=Mock())
|
|
228
|
+
mock_openai.return_value = mock_client
|
|
229
|
+
|
|
230
|
+
result = await get_docker_model_runner_connection()
|
|
231
|
+
|
|
232
|
+
assert result is None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@pytest.mark.asyncio
|
|
236
|
+
async def test_get_docker_model_runner_connection_connection_error():
|
|
237
|
+
"""Test get_docker_model_runner_connection with connection error."""
|
|
238
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
239
|
+
get_docker_model_runner_connection,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
with patch(
|
|
243
|
+
"kiln_ai.adapters.docker_model_runner_tools.openai.OpenAI"
|
|
244
|
+
) as mock_openai:
|
|
245
|
+
mock_client = Mock()
|
|
246
|
+
mock_client.models.list.side_effect = httpx.RequestError("Connection error")
|
|
247
|
+
mock_openai.return_value = mock_client
|
|
248
|
+
|
|
249
|
+
result = await get_docker_model_runner_connection()
|
|
250
|
+
|
|
251
|
+
assert result is None
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@pytest.mark.asyncio
|
|
255
|
+
async def test_get_docker_model_runner_connection_http_error():
|
|
256
|
+
"""Test get_docker_model_runner_connection with HTTP error."""
|
|
257
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
258
|
+
get_docker_model_runner_connection,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
with patch(
|
|
262
|
+
"kiln_ai.adapters.docker_model_runner_tools.openai.OpenAI"
|
|
263
|
+
) as mock_openai:
|
|
264
|
+
mock_client = Mock()
|
|
265
|
+
mock_client.models.list.side_effect = httpx.RequestError("HTTP error")
|
|
266
|
+
mock_openai.return_value = mock_client
|
|
267
|
+
|
|
268
|
+
result = await get_docker_model_runner_connection()
|
|
269
|
+
|
|
270
|
+
assert result is None
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def test_docker_model_runner_model_installed_true():
|
|
274
|
+
"""Test docker_model_runner_model_installed returns True when model is installed."""
|
|
275
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
276
|
+
docker_model_runner_model_installed,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
connection = DockerModelRunnerConnection(
|
|
280
|
+
message="Test",
|
|
281
|
+
supported_models=["model1", "model2"],
|
|
282
|
+
untested_models=["model3", "model4"],
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# Test model in supported_models
|
|
286
|
+
assert docker_model_runner_model_installed(connection, "model1") is True
|
|
287
|
+
|
|
288
|
+
# Test model in untested_models
|
|
289
|
+
assert docker_model_runner_model_installed(connection, "model3") is True
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def test_docker_model_runner_model_installed_false():
|
|
293
|
+
"""Test docker_model_runner_model_installed returns False when model is not installed."""
|
|
294
|
+
from kiln_ai.adapters.docker_model_runner_tools import (
|
|
295
|
+
docker_model_runner_model_installed,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
connection = DockerModelRunnerConnection(
|
|
299
|
+
message="Test",
|
|
300
|
+
supported_models=["model1", "model2"],
|
|
301
|
+
untested_models=["model3", "model4"],
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Test model not in any list
|
|
305
|
+
assert docker_model_runner_model_installed(connection, "nonexistent_model") is False
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
|
|
1
3
|
import pytest
|
|
2
4
|
|
|
3
5
|
from kiln_ai.adapters.ml_model_list import (
|
|
@@ -132,7 +134,7 @@ class TestDefaultStructuredOutputModeForModelProvider:
|
|
|
132
134
|
("llama_3_1_8b", ModelProviderName.groq, StructuredOutputMode.default),
|
|
133
135
|
(
|
|
134
136
|
"qwq_32b",
|
|
135
|
-
ModelProviderName.
|
|
137
|
+
ModelProviderName.together_ai,
|
|
136
138
|
StructuredOutputMode.json_instructions,
|
|
137
139
|
),
|
|
138
140
|
],
|
|
@@ -186,3 +188,39 @@ def test_no_reasoning_for_structured_output():
|
|
|
186
188
|
assert provider.reasoning_capable, (
|
|
187
189
|
f"{model.name} {provider.name} has reasoning_optional_for_structured_output but is not reasoning capable. This field should only be defined for models that are reasoning capable."
|
|
188
190
|
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def test_unique_providers_per_model():
|
|
194
|
+
"""Test that each model can only have one entry per provider"""
|
|
195
|
+
for model in built_in_models:
|
|
196
|
+
provider_names = [provider.name for provider in model.providers]
|
|
197
|
+
unique_provider_names = set(provider_names)
|
|
198
|
+
|
|
199
|
+
if len(provider_names) != len(unique_provider_names):
|
|
200
|
+
# Find which providers have duplicates
|
|
201
|
+
provider_counts = Counter(provider_names)
|
|
202
|
+
duplicates = {
|
|
203
|
+
name: count for name, count in provider_counts.items() if count > 1
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
# Show details about duplicates
|
|
207
|
+
duplicate_details = []
|
|
208
|
+
for provider_name, count in duplicates.items():
|
|
209
|
+
duplicate_providers = [
|
|
210
|
+
p for p in model.providers if p.name == provider_name
|
|
211
|
+
]
|
|
212
|
+
model_ids = [p.model_id for p in duplicate_providers]
|
|
213
|
+
duplicate_details.append(
|
|
214
|
+
f"{provider_name} (appears {count} times with model_ids: {model_ids})"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
assert False, (
|
|
218
|
+
f"Model {model.name} has duplicate providers:\n"
|
|
219
|
+
f"Expected: 1 entry per provider\n"
|
|
220
|
+
f"Found: {len(provider_names)} total entries, {len(unique_provider_names)} unique providers\n"
|
|
221
|
+
f"Duplicates: {', '.join(duplicate_details)}\n"
|
|
222
|
+
f"This suggests either:\n"
|
|
223
|
+
f"1. A bug where the same provider is accidentally duplicated, or\n"
|
|
224
|
+
f"2. Intentional design where the same provider offers different model variants\n"
|
|
225
|
+
f"If this is intentional, the test should be updated to allow multiple entries per provider."
|
|
226
|
+
)
|
|
@@ -119,14 +119,16 @@ async def test_mock_returning_run(tmp_path):
|
|
|
119
119
|
choices=[{"message": {"content": "mock response"}}],
|
|
120
120
|
)
|
|
121
121
|
|
|
122
|
+
run_config = RunConfigProperties(
|
|
123
|
+
model_name="custom_model",
|
|
124
|
+
model_provider_name="ollama",
|
|
125
|
+
prompt_id="simple_prompt_builder",
|
|
126
|
+
structured_output_mode="json_schema",
|
|
127
|
+
)
|
|
128
|
+
|
|
122
129
|
adapter = LiteLlmAdapter(
|
|
123
130
|
config=LiteLlmConfig(
|
|
124
|
-
run_config_properties=
|
|
125
|
-
model_name="custom_model",
|
|
126
|
-
model_provider_name="ollama",
|
|
127
|
-
prompt_id="simple_prompt_builder",
|
|
128
|
-
structured_output_mode="json_schema",
|
|
129
|
-
),
|
|
131
|
+
run_config_properties=run_config,
|
|
130
132
|
base_url="http://localhost:11434",
|
|
131
133
|
additional_body_options={"api_key": "test_key"},
|
|
132
134
|
),
|
|
@@ -140,7 +142,9 @@ async def test_mock_returning_run(tmp_path):
|
|
|
140
142
|
assert run.id is not None
|
|
141
143
|
assert run.input == "You are a mock, send me the response!"
|
|
142
144
|
assert run.output.output == "mock response"
|
|
145
|
+
assert run.input_source is not None
|
|
143
146
|
assert "created_by" in run.input_source.properties
|
|
147
|
+
assert run.output.source is not None
|
|
144
148
|
assert run.output.source.properties == {
|
|
145
149
|
"adapter_name": "kiln_openai_compatible_adapter",
|
|
146
150
|
"model_name": "custom_model",
|
|
@@ -150,6 +154,9 @@ async def test_mock_returning_run(tmp_path):
|
|
|
150
154
|
"temperature": 1.0,
|
|
151
155
|
"top_p": 1.0,
|
|
152
156
|
}
|
|
157
|
+
assert run.output.source.run_config is not None
|
|
158
|
+
saved_run_config = run.output.source.run_config.model_dump()
|
|
159
|
+
assert saved_run_config == run_config.model_dump()
|
|
153
160
|
|
|
154
161
|
|
|
155
162
|
@pytest.mark.paid
|
|
@@ -2,6 +2,7 @@ from unittest.mock import AsyncMock, Mock, patch
|
|
|
2
2
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
|
+
from kiln_ai.adapters.docker_model_runner_tools import DockerModelRunnerConnection
|
|
5
6
|
from kiln_ai.adapters.ml_model_list import (
|
|
6
7
|
KilnModel,
|
|
7
8
|
ModelName,
|
|
@@ -931,3 +932,57 @@ def test_finetune_provider_model_vertex_ai(mock_project, mock_task, mock_finetun
|
|
|
931
932
|
# Verify the model_id is transformed into openai/endpoint_id format
|
|
932
933
|
assert provider.model_id == "openai/456"
|
|
933
934
|
assert provider.structured_output_mode == StructuredOutputMode.json_mode
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
@pytest.mark.asyncio
|
|
938
|
+
async def test_provider_enabled_docker_model_runner_success():
|
|
939
|
+
"""Test provider_enabled for Docker Model Runner with successful connection"""
|
|
940
|
+
with patch(
|
|
941
|
+
"kiln_ai.adapters.provider_tools.get_docker_model_runner_connection",
|
|
942
|
+
new_callable=AsyncMock,
|
|
943
|
+
) as mock_get_docker:
|
|
944
|
+
# Mock successful Docker Model Runner connection with models
|
|
945
|
+
mock_get_docker.return_value = DockerModelRunnerConnection(
|
|
946
|
+
message="Connected",
|
|
947
|
+
supported_models=["llama-3.2-3b-instruct"],
|
|
948
|
+
untested_models=[],
|
|
949
|
+
)
|
|
950
|
+
|
|
951
|
+
result = await provider_enabled(ModelProviderName.docker_model_runner)
|
|
952
|
+
assert result is True
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
@pytest.mark.asyncio
|
|
956
|
+
async def test_provider_enabled_docker_model_runner_no_models():
|
|
957
|
+
"""Test provider_enabled for Docker Model Runner with no models"""
|
|
958
|
+
with patch(
|
|
959
|
+
"kiln_ai.adapters.provider_tools.get_docker_model_runner_connection",
|
|
960
|
+
new_callable=AsyncMock,
|
|
961
|
+
) as mock_get_docker:
|
|
962
|
+
# Mock Docker Model Runner connection but with no models
|
|
963
|
+
mock_get_docker.return_value = DockerModelRunnerConnection(
|
|
964
|
+
message="Connected but no models", supported_models=[], untested_models=[]
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
result = await provider_enabled(ModelProviderName.docker_model_runner)
|
|
968
|
+
assert result is False
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
@pytest.mark.asyncio
|
|
972
|
+
async def test_provider_enabled_docker_model_runner_connection_error():
|
|
973
|
+
"""Test provider_enabled for Docker Model Runner with connection error"""
|
|
974
|
+
with patch(
|
|
975
|
+
"kiln_ai.adapters.provider_tools.get_docker_model_runner_connection",
|
|
976
|
+
new_callable=AsyncMock,
|
|
977
|
+
) as mock_get_docker:
|
|
978
|
+
# Mock Docker Model Runner connection failure
|
|
979
|
+
mock_get_docker.side_effect = Exception("Connection failed")
|
|
980
|
+
|
|
981
|
+
result = await provider_enabled(ModelProviderName.docker_model_runner)
|
|
982
|
+
assert result is False
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def test_provider_name_from_id_docker_model_runner():
|
|
986
|
+
"""Test provider_name_from_id for Docker Model Runner"""
|
|
987
|
+
result = provider_name_from_id(ModelProviderName.docker_model_runner)
|
|
988
|
+
assert result == "Docker Model Runner"
|
|
@@ -454,3 +454,101 @@ def test_deserialize_config_empty_json_structures(tmp_path):
|
|
|
454
454
|
path.write_text(json.dumps({}))
|
|
455
455
|
with pytest.raises(ValueError):
|
|
456
456
|
deserialize_config_at_path(path)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def test_backwards_compatibility_with_v0_19(tmp_path):
|
|
460
|
+
"""Test that kiln-ai v0.19 (first version with remote config) can parse JSON from current version.
|
|
461
|
+
|
|
462
|
+
This ensures our serialization format remains backwards compatible using uv scripts.
|
|
463
|
+
|
|
464
|
+
Skipped in CI/CD/VScode (needs UV), so you have to run it from the CLI (fine since it's slow):
|
|
465
|
+
Run from CLI: KILN_TEST_COMPATIBILITY=1 uv run python3 -m pytest libs/core/kiln_ai/adapters/test_remote_config.py::test_backwards_compatibility_with_v0_19 -s -v
|
|
466
|
+
"""
|
|
467
|
+
|
|
468
|
+
# Skip unless explicitly requested via environment variable
|
|
469
|
+
if not os.environ.get("KILN_TEST_COMPATIBILITY"):
|
|
470
|
+
pytest.skip(
|
|
471
|
+
"Compatibility test skipped. Set KILN_TEST_COMPATIBILITY=1 to run this test."
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
import shutil
|
|
475
|
+
import subprocess
|
|
476
|
+
|
|
477
|
+
# Check if uv is available
|
|
478
|
+
if not shutil.which("uv"):
|
|
479
|
+
pytest.skip("uv is not available for compatibility test")
|
|
480
|
+
|
|
481
|
+
# Create JSON with current version
|
|
482
|
+
current_json_path = tmp_path / "current_models.json"
|
|
483
|
+
serialize_config(built_in_models, current_json_path)
|
|
484
|
+
|
|
485
|
+
# Test script using uv inline script metadata to install v0.19
|
|
486
|
+
test_script = f'''# /// script
|
|
487
|
+
# dependencies = [
|
|
488
|
+
# "kiln-ai==0.19.0",
|
|
489
|
+
# "pandas",
|
|
490
|
+
# ]
|
|
491
|
+
# ///
|
|
492
|
+
import sys
|
|
493
|
+
import json
|
|
494
|
+
from pathlib import Path
|
|
495
|
+
|
|
496
|
+
# Import from v0.19
|
|
497
|
+
try:
|
|
498
|
+
from kiln_ai.adapters.remote_config import deserialize_config_at_path
|
|
499
|
+
from kiln_ai.adapters.ml_model_list import KilnModel
|
|
500
|
+
|
|
501
|
+
# Try to deserialize current JSON with v0.19 code
|
|
502
|
+
models = deserialize_config_at_path("{current_json_path}")
|
|
503
|
+
|
|
504
|
+
# Basic validation - should have parsed successfully
|
|
505
|
+
assert len(models) > 0
|
|
506
|
+
assert all(isinstance(m, KilnModel) for m in models)
|
|
507
|
+
|
|
508
|
+
# Check basic fields exist and have expected types
|
|
509
|
+
for model in models:
|
|
510
|
+
assert hasattr(model, 'family') and isinstance(model.family, str)
|
|
511
|
+
assert hasattr(model, 'name') and isinstance(model.name, str)
|
|
512
|
+
assert hasattr(model, 'friendly_name') and isinstance(model.friendly_name, str)
|
|
513
|
+
assert hasattr(model, 'providers') and isinstance(model.providers, list)
|
|
514
|
+
|
|
515
|
+
# Check providers have basic fields
|
|
516
|
+
for provider in model.providers:
|
|
517
|
+
assert hasattr(provider, 'name')
|
|
518
|
+
|
|
519
|
+
sys.stdout.write("SUCCESS: v0.19 successfully parsed JSON from current version")
|
|
520
|
+
sys.stdout.write(f"Parsed {{len(models)}} models")
|
|
521
|
+
|
|
522
|
+
except Exception as e:
|
|
523
|
+
sys.stdout.write(f"ERROR: {{e}}")
|
|
524
|
+
sys.exit(1)
|
|
525
|
+
'''
|
|
526
|
+
|
|
527
|
+
try:
|
|
528
|
+
# Write the uv script
|
|
529
|
+
script_path = tmp_path / "test_v0_19.py"
|
|
530
|
+
script_path.write_text(test_script)
|
|
531
|
+
|
|
532
|
+
# Run the script using uv
|
|
533
|
+
result = subprocess.run(
|
|
534
|
+
["uv", "run", str(script_path)], capture_output=True, text=True
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Check if the test passed
|
|
538
|
+
if result.returncode != 0:
|
|
539
|
+
pytest.fail(
|
|
540
|
+
f"v0.19 compatibility test failed:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}"
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# Verify success message was printed
|
|
544
|
+
assert (
|
|
545
|
+
"SUCCESS: v0.19 successfully parsed JSON from current version"
|
|
546
|
+
in result.stdout
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
except subprocess.CalledProcessError as e:
|
|
550
|
+
# If we can't run uv, skip the test (might be network issues, etc.)
|
|
551
|
+
pytest.skip(f"Could not run uv script for compatibility test: {e}")
|
|
552
|
+
except FileNotFoundError:
|
|
553
|
+
# If uv command not found
|
|
554
|
+
pytest.skip("uv command not found for compatibility test")
|
kiln_ai/datamodel/__init__.py
CHANGED
|
@@ -3,7 +3,7 @@ See our docs for details about our datamodel classes and hierarchy:
|
|
|
3
3
|
|
|
4
4
|
Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
|
|
5
5
|
|
|
6
|
-
User docs: https://docs.
|
|
6
|
+
User docs: https://docs.kiln.tech/developers/kiln-datamodel
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
# This component uses "flat" imports so we don't have too much internal structure exposed in the API.
|
|
@@ -22,6 +22,7 @@ from kiln_ai.datamodel.dataset_split import (
|
|
|
22
22
|
DatasetSplit,
|
|
23
23
|
DatasetSplitDefinition,
|
|
24
24
|
)
|
|
25
|
+
from kiln_ai.datamodel.external_tool_server import ExternalToolServer
|
|
25
26
|
from kiln_ai.datamodel.finetune import (
|
|
26
27
|
Finetune,
|
|
27
28
|
)
|
|
@@ -47,31 +48,32 @@ from kiln_ai.datamodel.task_run import (
|
|
|
47
48
|
)
|
|
48
49
|
|
|
49
50
|
__all__ = [
|
|
50
|
-
"
|
|
51
|
-
"dataset_split",
|
|
52
|
-
"eval",
|
|
53
|
-
"Task",
|
|
54
|
-
"Project",
|
|
55
|
-
"TaskRun",
|
|
56
|
-
"TaskOutput",
|
|
57
|
-
"Priority",
|
|
51
|
+
"BasePrompt",
|
|
58
52
|
"DataSource",
|
|
59
|
-
"DataSourceType",
|
|
60
53
|
"DataSourceProperty",
|
|
61
|
-
"
|
|
54
|
+
"DataSourceType",
|
|
55
|
+
"DatasetSplit",
|
|
56
|
+
"DatasetSplitDefinition",
|
|
57
|
+
"ExternalToolServer",
|
|
62
58
|
"FineTuneStatusType",
|
|
59
|
+
"Finetune",
|
|
60
|
+
"Priority",
|
|
61
|
+
"Project",
|
|
62
|
+
"Prompt",
|
|
63
|
+
"PromptGenerators",
|
|
64
|
+
"PromptId",
|
|
65
|
+
"RequirementRating",
|
|
66
|
+
"StructuredOutputMode",
|
|
67
|
+
"Task",
|
|
68
|
+
"TaskOutput",
|
|
69
|
+
"TaskOutputRating",
|
|
63
70
|
"TaskOutputRatingType",
|
|
64
71
|
"TaskRequirement",
|
|
65
|
-
"DatasetSplitDefinition",
|
|
66
|
-
"DatasetSplit",
|
|
67
|
-
"RequirementRating",
|
|
68
72
|
"TaskRequirement",
|
|
69
|
-
"
|
|
70
|
-
"Prompt",
|
|
71
|
-
"TaskOutputRating",
|
|
72
|
-
"StructuredOutputMode",
|
|
73
|
-
"PromptId",
|
|
74
|
-
"PromptGenerators",
|
|
75
|
-
"prompt_generator_values",
|
|
73
|
+
"TaskRun",
|
|
76
74
|
"Usage",
|
|
75
|
+
"dataset_split",
|
|
76
|
+
"eval",
|
|
77
|
+
"prompt_generator_values",
|
|
78
|
+
"strict_mode",
|
|
77
79
|
]
|
kiln_ai/datamodel/eval.py
CHANGED
|
@@ -252,7 +252,7 @@ class EvalConfig(KilnParentedModel, KilnParentModel, parent_of={"runs": EvalRun}
|
|
|
252
252
|
# This will raise a TypeError if the dict contains non-JSON-serializable objects
|
|
253
253
|
json.dumps(self.properties)
|
|
254
254
|
except TypeError as e:
|
|
255
|
-
raise ValueError(f"Properties must be JSON serializable: {
|
|
255
|
+
raise ValueError(f"Properties must be JSON serializable: {e!s}")
|
|
256
256
|
return self
|
|
257
257
|
|
|
258
258
|
|