kiln-ai 0.19.0__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (70) hide show
  1. kiln_ai/adapters/__init__.py +2 -2
  2. kiln_ai/adapters/adapter_registry.py +19 -1
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  6. kiln_ai/adapters/eval/base_eval.py +2 -2
  7. kiln_ai/adapters/eval/eval_runner.py +3 -1
  8. kiln_ai/adapters/eval/g_eval.py +2 -2
  9. kiln_ai/adapters/eval/test_base_eval.py +1 -1
  10. kiln_ai/adapters/eval/test_g_eval.py +3 -4
  11. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  12. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  13. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  14. kiln_ai/adapters/ml_model_list.py +380 -34
  15. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  16. kiln_ai/adapters/model_adapters/litellm_adapter.py +383 -79
  17. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  18. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +406 -1
  19. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  20. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  21. kiln_ai/adapters/model_adapters/test_structured_output.py +110 -4
  22. kiln_ai/adapters/parsers/__init__.py +1 -1
  23. kiln_ai/adapters/provider_tools.py +15 -1
  24. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  25. kiln_ai/adapters/run_output.py +3 -0
  26. kiln_ai/adapters/test_adapter_registry.py +80 -1
  27. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  28. kiln_ai/adapters/test_ml_model_list.py +39 -1
  29. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  30. kiln_ai/adapters/test_provider_tools.py +55 -0
  31. kiln_ai/adapters/test_remote_config.py +98 -0
  32. kiln_ai/datamodel/__init__.py +23 -21
  33. kiln_ai/datamodel/datamodel_enums.py +1 -0
  34. kiln_ai/datamodel/eval.py +1 -1
  35. kiln_ai/datamodel/external_tool_server.py +298 -0
  36. kiln_ai/datamodel/json_schema.py +25 -10
  37. kiln_ai/datamodel/project.py +8 -1
  38. kiln_ai/datamodel/registry.py +0 -15
  39. kiln_ai/datamodel/run_config.py +62 -0
  40. kiln_ai/datamodel/task.py +2 -77
  41. kiln_ai/datamodel/task_output.py +6 -1
  42. kiln_ai/datamodel/task_run.py +41 -0
  43. kiln_ai/datamodel/test_basemodel.py +3 -3
  44. kiln_ai/datamodel/test_example_models.py +175 -0
  45. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  46. kiln_ai/datamodel/test_registry.py +8 -3
  47. kiln_ai/datamodel/test_task.py +15 -47
  48. kiln_ai/datamodel/test_tool_id.py +239 -0
  49. kiln_ai/datamodel/tool_id.py +83 -0
  50. kiln_ai/tools/__init__.py +8 -0
  51. kiln_ai/tools/base_tool.py +82 -0
  52. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  53. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  54. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  55. kiln_ai/tools/mcp_server_tool.py +95 -0
  56. kiln_ai/tools/mcp_session_manager.py +243 -0
  57. kiln_ai/tools/test_base_tools.py +199 -0
  58. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  59. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  60. kiln_ai/tools/test_tool_registry.py +473 -0
  61. kiln_ai/tools/tool_registry.py +64 -0
  62. kiln_ai/utils/config.py +22 -0
  63. kiln_ai/utils/open_ai_types.py +94 -0
  64. kiln_ai/utils/project_utils.py +17 -0
  65. kiln_ai/utils/test_config.py +138 -1
  66. kiln_ai/utils/test_open_ai_types.py +131 -0
  67. {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/METADATA +6 -5
  68. {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/RECORD +70 -47
  69. {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/WHEEL +0 -0
  70. {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -13,7 +13,7 @@ from kiln_ai.datamodel import (
13
13
  Task,
14
14
  Usage,
15
15
  )
16
- from kiln_ai.datamodel.task import RunConfig
16
+ from kiln_ai.datamodel.task import RunConfigProperties
17
17
  from kiln_ai.utils.config import Config
18
18
 
19
19
 
@@ -41,8 +41,8 @@ def test_task(tmp_path):
41
41
  @pytest.fixture
42
42
  def adapter(test_task):
43
43
  return MockAdapter(
44
- run_config=RunConfig(
45
- task=test_task,
44
+ task=test_task,
45
+ run_config=RunConfigProperties(
46
46
  model_name="phi_3_5",
47
47
  model_provider_name="ollama",
48
48
  prompt_id="simple_chain_of_thought_prompt_builder",
@@ -240,8 +240,8 @@ async def test_autosave_true(test_task, adapter):
240
240
  def test_properties_for_task_output_custom_values(test_task):
241
241
  """Test that _properties_for_task_output includes custom temperature, top_p, and structured_output_mode"""
242
242
  adapter = MockAdapter(
243
- run_config=RunConfig(
244
- task=test_task,
243
+ task=test_task,
244
+ run_config=RunConfigProperties(
245
245
  model_name="gpt-4",
246
246
  model_provider_name="openai",
247
247
  prompt_id="simple_prompt_builder",
@@ -1,8 +1,10 @@
1
1
  import json
2
2
  from pathlib import Path
3
3
  from typing import Dict
4
+ from unittest.mock import Mock, patch
4
5
 
5
6
  import pytest
7
+ from litellm.types.utils import ModelResponse
6
8
 
7
9
  import kiln_ai.datamodel as datamodel
8
10
  from kiln_ai.adapters.adapter_registry import adapter_for_task
@@ -11,7 +13,7 @@ from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter, RunOutput,
11
13
  from kiln_ai.adapters.ollama_tools import ollama_online
12
14
  from kiln_ai.adapters.test_prompt_adaptors import get_all_models_and_providers
13
15
  from kiln_ai.datamodel import PromptId
14
- from kiln_ai.datamodel.task import RunConfig, RunConfigProperties
16
+ from kiln_ai.datamodel.task import RunConfigProperties
15
17
  from kiln_ai.datamodel.test_json_schema import json_joke_schema, json_triangle_schema
16
18
 
17
19
 
@@ -40,8 +42,8 @@ async def test_structured_output_ollama(tmp_path, model_name):
40
42
  class MockAdapter(BaseAdapter):
41
43
  def __init__(self, kiln_task: datamodel.Task, response: Dict | str | None):
42
44
  super().__init__(
43
- run_config=RunConfig(
44
- task=kiln_task,
45
+ task=kiln_task,
46
+ run_config=RunConfigProperties(
45
47
  model_name="phi_3_5",
46
48
  model_provider_name="ollama",
47
49
  prompt_id="simple_chain_of_thought_prompt_builder",
@@ -259,6 +261,7 @@ async def run_structured_input_task(
259
261
  model_name: str,
260
262
  provider: str,
261
263
  prompt_id: PromptId,
264
+ verify_trace_cot: bool = False,
262
265
  ):
263
266
  response, a, run = await run_structured_input_task_no_validation(
264
267
  task, model_name, provider, prompt_id
@@ -282,6 +285,32 @@ async def run_structured_input_task(
282
285
  assert "reasoning" in run.intermediate_outputs
283
286
  assert isinstance(run.intermediate_outputs["reasoning"], str)
284
287
 
288
+ # Check the trace
289
+ trace = run.trace
290
+ assert trace is not None
291
+ if verify_trace_cot:
292
+ assert len(trace) == 5
293
+ assert trace[0]["role"] == "system"
294
+ assert "You are an assistant which classifies a triangle" in trace[0]["content"]
295
+ assert trace[1]["role"] == "user"
296
+ assert trace[2]["role"] == "assistant"
297
+ assert trace[2].get("tool_calls") is None
298
+ assert trace[3]["role"] == "user"
299
+ assert trace[4]["role"] == "assistant"
300
+ assert trace[4].get("tool_calls") is None
301
+ else:
302
+ assert len(trace) == 3
303
+ assert trace[0]["role"] == "system"
304
+ assert "You are an assistant which classifies a triangle" in trace[0]["content"]
305
+ assert trace[1]["role"] == "user"
306
+ json_content = json.loads(trace[1]["content"])
307
+ assert json_content["a"] == 2
308
+ assert json_content["b"] == 2
309
+ assert json_content["c"] == 2
310
+ assert trace[2]["role"] == "assistant"
311
+ assert trace[2].get("tool_calls") is None
312
+ assert "[[equilateral]]" in trace[2]["content"]
313
+
285
314
 
286
315
  @pytest.mark.paid
287
316
  async def test_structured_input_gpt_4o_mini(tmp_path):
@@ -299,15 +328,92 @@ async def test_all_built_in_models_structured_input(
299
328
  )
300
329
 
301
330
 
331
+ async def test_all_built_in_models_structured_input_mocked(tmp_path):
332
+ mock_response = ModelResponse(
333
+ model="gpt-4o-mini",
334
+ choices=[
335
+ {
336
+ "message": {
337
+ "content": "The answer is [[equilateral]]",
338
+ }
339
+ }
340
+ ],
341
+ )
342
+
343
+ # Mock the Config.shared() method to return a mock config with required attributes
344
+ mock_config = Mock()
345
+ mock_config.open_ai_api_key = "mock_api_key"
346
+ mock_config.user_id = "test_user"
347
+
348
+ with (
349
+ patch(
350
+ "litellm.acompletion",
351
+ side_effect=[mock_response],
352
+ ),
353
+ patch("kiln_ai.utils.config.Config.shared", return_value=mock_config),
354
+ ):
355
+ await run_structured_input_test(
356
+ tmp_path, "llama_3_1_8b", "groq", "simple_prompt_builder"
357
+ )
358
+
359
+
302
360
  @pytest.mark.paid
303
361
  @pytest.mark.ollama
304
362
  @pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
305
363
  async def test_structured_input_cot_prompt_builder(tmp_path, model_name, provider_name):
306
364
  task = build_structured_input_test_task(tmp_path)
307
365
  await run_structured_input_task(
308
- task, model_name, provider_name, "simple_chain_of_thought_prompt_builder"
366
+ task,
367
+ model_name,
368
+ provider_name,
369
+ "simple_chain_of_thought_prompt_builder",
370
+ verify_trace_cot=True,
371
+ )
372
+
373
+
374
+ async def test_structured_input_cot_prompt_builder_mocked(tmp_path):
375
+ task = build_structured_input_test_task(tmp_path)
376
+ mock_response_1 = ModelResponse(
377
+ model="gpt-4o-mini",
378
+ choices=[
379
+ {
380
+ "message": {
381
+ "content": "I'm thinking real hard... oh!",
382
+ }
383
+ }
384
+ ],
385
+ )
386
+ mock_response_2 = ModelResponse(
387
+ model="gpt-4o-mini",
388
+ choices=[
389
+ {
390
+ "message": {
391
+ "content": "After thinking, I've decided the answer is [[equilateral]]",
392
+ }
393
+ }
394
+ ],
309
395
  )
310
396
 
397
+ # Mock the Config.shared() method to return a mock config with required attributes
398
+ mock_config = Mock()
399
+ mock_config.open_ai_api_key = "mock_api_key"
400
+ mock_config.user_id = "test_user"
401
+
402
+ with (
403
+ patch(
404
+ "litellm.acompletion",
405
+ side_effect=[mock_response_1, mock_response_2],
406
+ ),
407
+ patch("kiln_ai.utils.config.Config.shared", return_value=mock_config),
408
+ ):
409
+ await run_structured_input_task(
410
+ task,
411
+ "llama_3_1_8b",
412
+ "groq",
413
+ "simple_chain_of_thought_prompt_builder",
414
+ verify_trace_cot=True,
415
+ )
416
+
311
417
 
312
418
  @pytest.mark.paid
313
419
  @pytest.mark.ollama
@@ -7,4 +7,4 @@ Parsing utilities for JSON and models with custom output formats (R1, etc.)
7
7
 
8
8
  from . import base_parser, json_parser, r1_parser
9
9
 
10
- __all__ = ["r1_parser", "base_parser", "json_parser"]
10
+ __all__ = ["base_parser", "json_parser", "r1_parser"]
@@ -2,6 +2,9 @@ import logging
2
2
  from dataclasses import dataclass
3
3
  from typing import Dict, List
4
4
 
5
+ from kiln_ai.adapters.docker_model_runner_tools import (
6
+ get_docker_model_runner_connection,
7
+ )
5
8
  from kiln_ai.adapters.ml_model_list import (
6
9
  KilnModel,
7
10
  KilnModelProvider,
@@ -14,10 +17,10 @@ from kiln_ai.adapters.model_adapters.litellm_config import LiteLlmConfig
14
17
  from kiln_ai.adapters.ollama_tools import get_ollama_connection
15
18
  from kiln_ai.datamodel import Finetune, Task
16
19
  from kiln_ai.datamodel.datamodel_enums import ChatStrategy
17
- from kiln_ai.datamodel.registry import project_from_id
18
20
  from kiln_ai.datamodel.task import RunConfigProperties
19
21
  from kiln_ai.utils.config import Config
20
22
  from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
23
+ from kiln_ai.utils.project_utils import project_from_id
21
24
 
22
25
  logger = logging.getLogger(__name__)
23
26
 
@@ -32,6 +35,15 @@ async def provider_enabled(provider_name: ModelProviderName) -> bool:
32
35
  except Exception:
33
36
  return False
34
37
 
38
+ if provider_name == ModelProviderName.docker_model_runner:
39
+ try:
40
+ conn = await get_docker_model_runner_connection()
41
+ return conn is not None and (
42
+ len(conn.supported_models) > 0 or len(conn.untested_models) > 0
43
+ )
44
+ except Exception:
45
+ return False
46
+
35
47
  provider_warning = provider_warnings.get(provider_name)
36
48
  if provider_warning is None:
37
49
  return False
@@ -377,6 +389,8 @@ def provider_name_from_id(id: str) -> str:
377
389
  return "SiliconFlow"
378
390
  case ModelProviderName.cerebras:
379
391
  return "Cerebras"
392
+ case ModelProviderName.docker_model_runner:
393
+ return "Docker Model Runner"
380
394
  case _:
381
395
  # triggers pyright warning if I miss a case
382
396
  raise_exhaustive_enum_error(enum_id)
@@ -229,21 +229,20 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
229
229
  "rating": 8,
230
230
  }
231
231
 
232
+ run_config = RunConfigProperties(
233
+ model_name="llama_3_1_8b",
234
+ model_provider_name="ollama",
235
+ prompt_id="simple_prompt_builder",
236
+ structured_output_mode="json_schema",
237
+ )
238
+
232
239
  with patch.object(LiteLlmAdapter, "_run", new_callable=AsyncMock) as mock_run:
233
240
  mock_run.return_value = (
234
241
  RunOutput(output=mocked_output, intermediate_outputs=None),
235
242
  None,
236
243
  )
237
244
 
238
- adapter = adapter_for_task(
239
- repair_task,
240
- RunConfigProperties(
241
- model_name="llama_3_1_8b",
242
- model_provider_name="ollama",
243
- prompt_id="simple_prompt_builder",
244
- structured_output_mode="json_schema",
245
- ),
246
- )
245
+ adapter = adapter_for_task(repair_task, run_config)
247
246
 
248
247
  run = await adapter.invoke(repair_task_input.model_dump())
249
248
 
@@ -264,6 +263,10 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
264
263
  }
265
264
  assert run.input_source.type == DataSourceType.human
266
265
  assert "created_by" in run.input_source.properties
266
+ assert run.output.source is not None
267
+ assert run.output.source.run_config is not None
268
+ saved_run_config = run.output.source.run_config.model_dump()
269
+ assert saved_run_config == run_config.model_dump()
267
270
 
268
271
  # Verify that the mock was called
269
272
  mock_run.assert_called_once()
@@ -3,9 +3,12 @@ from typing import Dict
3
3
 
4
4
  from litellm.types.utils import ChoiceLogprobs
5
5
 
6
+ from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam
7
+
6
8
 
7
9
  @dataclass
8
10
  class RunOutput:
9
11
  output: Dict | str
10
12
  intermediate_outputs: Dict[str, str] | None
11
13
  output_logprobs: ChoiceLogprobs | None = None
14
+ trace: list[ChatCompletionMessageParam] | None = None
@@ -8,6 +8,7 @@ from kiln_ai.adapters.ml_model_list import ModelProviderName
8
8
  from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
9
9
  from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
10
10
  from kiln_ai.adapters.provider_tools import kiln_model_provider_from
11
+ from kiln_ai.datamodel.datamodel_enums import StructuredOutputMode
11
12
  from kiln_ai.datamodel.task import RunConfigProperties
12
13
 
13
14
 
@@ -17,6 +18,9 @@ def mock_config():
17
18
  mock.shared.return_value.open_ai_api_key = "test-openai-key"
18
19
  mock.shared.return_value.open_router_api_key = "test-openrouter-key"
19
20
  mock.shared.return_value.siliconflow_cn_api_key = "test-siliconflow-key"
21
+ mock.shared.return_value.docker_model_runner_base_url = (
22
+ "http://localhost:12434/engines/llama.cpp"
23
+ )
20
24
  yield mock
21
25
 
22
26
 
@@ -108,7 +112,7 @@ def test_siliconflow_adapter_creation(mock_config, basic_task):
108
112
  == ModelProviderName.siliconflow_cn
109
113
  )
110
114
  assert adapter.config.default_headers == {
111
- "HTTP-Referer": "https://getkiln.ai/siliconflow",
115
+ "HTTP-Referer": "https://kiln.tech/siliconflow",
112
116
  "X-Title": "KilnAI",
113
117
  }
114
118
 
@@ -260,3 +264,78 @@ async def test_fine_tune_provider(mock_config, basic_task, mock_finetune_from_id
260
264
  )
261
265
  # The actual model name from the fine tune object
262
266
  assert provider.model_id == "test-model"
267
+
268
+
269
+ def test_docker_model_runner_adapter_creation(mock_config, basic_task):
270
+ """Test Docker Model Runner adapter creation with default and custom base URL."""
271
+ adapter = adapter_for_task(
272
+ kiln_task=basic_task,
273
+ run_config_properties=RunConfigProperties(
274
+ model_name="llama_3_2_3b",
275
+ model_provider_name=ModelProviderName.docker_model_runner,
276
+ prompt_id="simple_prompt_builder",
277
+ structured_output_mode=StructuredOutputMode.json_schema,
278
+ ),
279
+ )
280
+
281
+ assert isinstance(adapter, LiteLlmAdapter)
282
+ assert adapter.config.run_config_properties.model_name == "llama_3_2_3b"
283
+ assert adapter.config.additional_body_options == {"api_key": "DMR"}
284
+ assert (
285
+ adapter.config.run_config_properties.model_provider_name
286
+ == ModelProviderName.docker_model_runner
287
+ )
288
+ assert adapter.config.base_url == "http://localhost:12434/engines/llama.cpp/v1"
289
+ assert adapter.config.default_headers is None
290
+
291
+
292
+ def test_docker_model_runner_adapter_creation_with_custom_url(mock_config, basic_task):
293
+ """Test Docker Model Runner adapter creation with custom base URL."""
294
+ mock_config.shared.return_value.docker_model_runner_base_url = (
295
+ "http://custom:8080/engines/llama.cpp"
296
+ )
297
+
298
+ adapter = adapter_for_task(
299
+ kiln_task=basic_task,
300
+ run_config_properties=RunConfigProperties(
301
+ model_name="llama_3_2_3b",
302
+ model_provider_name=ModelProviderName.docker_model_runner,
303
+ prompt_id="simple_prompt_builder",
304
+ structured_output_mode=StructuredOutputMode.json_schema,
305
+ ),
306
+ )
307
+
308
+ assert isinstance(adapter, LiteLlmAdapter)
309
+ assert adapter.config.run_config_properties.model_name == "llama_3_2_3b"
310
+ assert adapter.config.additional_body_options == {"api_key": "DMR"}
311
+ assert (
312
+ adapter.config.run_config_properties.model_provider_name
313
+ == ModelProviderName.docker_model_runner
314
+ )
315
+ assert adapter.config.base_url == "http://custom:8080/engines/llama.cpp/v1"
316
+ assert adapter.config.default_headers is None
317
+
318
+
319
+ def test_docker_model_runner_adapter_creation_with_none_url(mock_config, basic_task):
320
+ """Test Docker Model Runner adapter creation when config URL is None."""
321
+ mock_config.shared.return_value.docker_model_runner_base_url = None
322
+
323
+ adapter = adapter_for_task(
324
+ kiln_task=basic_task,
325
+ run_config_properties=RunConfigProperties(
326
+ model_name="llama_3_2_3b",
327
+ model_provider_name=ModelProviderName.docker_model_runner,
328
+ prompt_id="simple_prompt_builder",
329
+ structured_output_mode=StructuredOutputMode.json_schema,
330
+ ),
331
+ )
332
+
333
+ assert isinstance(adapter, LiteLlmAdapter)
334
+ assert adapter.config.run_config_properties.model_name == "llama_3_2_3b"
335
+ assert adapter.config.additional_body_options == {"api_key": "DMR"}
336
+ assert (
337
+ adapter.config.run_config_properties.model_provider_name
338
+ == ModelProviderName.docker_model_runner
339
+ )
340
+ assert adapter.config.base_url == "http://localhost:12434/engines/llama.cpp/v1"
341
+ assert adapter.config.default_headers is None