kiln-ai 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (80) hide show
  1. kiln_ai/adapters/__init__.py +4 -0
  2. kiln_ai/adapters/adapter_registry.py +163 -39
  3. kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
  4. kiln_ai/adapters/eval/__init__.py +28 -0
  5. kiln_ai/adapters/eval/base_eval.py +164 -0
  6. kiln_ai/adapters/eval/eval_runner.py +270 -0
  7. kiln_ai/adapters/eval/g_eval.py +368 -0
  8. kiln_ai/adapters/eval/registry.py +16 -0
  9. kiln_ai/adapters/eval/test_base_eval.py +325 -0
  10. kiln_ai/adapters/eval/test_eval_runner.py +641 -0
  11. kiln_ai/adapters/eval/test_g_eval.py +498 -0
  12. kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
  13. kiln_ai/adapters/fine_tune/base_finetune.py +16 -2
  14. kiln_ai/adapters/fine_tune/finetune_registry.py +2 -0
  15. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
  16. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
  17. kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
  18. kiln_ai/adapters/fine_tune/test_together_finetune.py +531 -0
  19. kiln_ai/adapters/fine_tune/together_finetune.py +325 -0
  20. kiln_ai/adapters/ml_model_list.py +758 -163
  21. kiln_ai/adapters/model_adapters/__init__.py +2 -4
  22. kiln_ai/adapters/model_adapters/base_adapter.py +61 -43
  23. kiln_ai/adapters/model_adapters/litellm_adapter.py +391 -0
  24. kiln_ai/adapters/model_adapters/litellm_config.py +13 -0
  25. kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
  26. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -0
  27. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
  28. kiln_ai/adapters/model_adapters/test_structured_output.py +59 -35
  29. kiln_ai/adapters/ollama_tools.py +3 -3
  30. kiln_ai/adapters/parsers/r1_parser.py +19 -14
  31. kiln_ai/adapters/parsers/test_r1_parser.py +17 -5
  32. kiln_ai/adapters/prompt_builders.py +80 -42
  33. kiln_ai/adapters/provider_tools.py +50 -58
  34. kiln_ai/adapters/repair/repair_task.py +9 -21
  35. kiln_ai/adapters/repair/test_repair_task.py +6 -6
  36. kiln_ai/adapters/run_output.py +3 -0
  37. kiln_ai/adapters/test_adapter_registry.py +26 -29
  38. kiln_ai/adapters/test_generate_docs.py +4 -4
  39. kiln_ai/adapters/test_ollama_tools.py +0 -1
  40. kiln_ai/adapters/test_prompt_adaptors.py +47 -33
  41. kiln_ai/adapters/test_prompt_builders.py +91 -31
  42. kiln_ai/adapters/test_provider_tools.py +26 -81
  43. kiln_ai/datamodel/__init__.py +50 -952
  44. kiln_ai/datamodel/basemodel.py +2 -0
  45. kiln_ai/datamodel/datamodel_enums.py +60 -0
  46. kiln_ai/datamodel/dataset_filters.py +114 -0
  47. kiln_ai/datamodel/dataset_split.py +170 -0
  48. kiln_ai/datamodel/eval.py +298 -0
  49. kiln_ai/datamodel/finetune.py +105 -0
  50. kiln_ai/datamodel/json_schema.py +7 -1
  51. kiln_ai/datamodel/project.py +23 -0
  52. kiln_ai/datamodel/prompt.py +37 -0
  53. kiln_ai/datamodel/prompt_id.py +83 -0
  54. kiln_ai/datamodel/strict_mode.py +24 -0
  55. kiln_ai/datamodel/task.py +181 -0
  56. kiln_ai/datamodel/task_output.py +328 -0
  57. kiln_ai/datamodel/task_run.py +164 -0
  58. kiln_ai/datamodel/test_basemodel.py +19 -11
  59. kiln_ai/datamodel/test_dataset_filters.py +71 -0
  60. kiln_ai/datamodel/test_dataset_split.py +32 -8
  61. kiln_ai/datamodel/test_datasource.py +22 -2
  62. kiln_ai/datamodel/test_eval_model.py +635 -0
  63. kiln_ai/datamodel/test_example_models.py +9 -13
  64. kiln_ai/datamodel/test_json_schema.py +23 -0
  65. kiln_ai/datamodel/test_models.py +2 -2
  66. kiln_ai/datamodel/test_prompt_id.py +129 -0
  67. kiln_ai/datamodel/test_task.py +159 -0
  68. kiln_ai/utils/config.py +43 -1
  69. kiln_ai/utils/dataset_import.py +232 -0
  70. kiln_ai/utils/test_dataset_import.py +596 -0
  71. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/METADATA +86 -6
  72. kiln_ai-0.13.0.dist-info/RECORD +103 -0
  73. kiln_ai/adapters/model_adapters/langchain_adapters.py +0 -302
  74. kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -11
  75. kiln_ai/adapters/model_adapters/openai_model_adapter.py +0 -246
  76. kiln_ai/adapters/model_adapters/test_langchain_adapter.py +0 -350
  77. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +0 -225
  78. kiln_ai-0.11.1.dist-info/RECORD +0 -76
  79. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/WHEEL +0 -0
  80. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
6
6
  from kiln_ai.adapters.prompt_builders import (
7
7
  BasePromptBuilder,
8
8
  SavedPromptBuilder,
9
- prompt_builder_registry,
9
+ prompt_builder_from_id,
10
10
  )
11
11
  from kiln_ai.datamodel import Priority, Project, Task, TaskRequirement, TaskRun
12
12
 
@@ -49,28 +49,16 @@ feedback describing what should be improved. Your job is to understand the evalu
49
49
  if run.output.source is None or run.output.source.properties is None:
50
50
  raise ValueError("No source properties found")
51
51
 
52
- # Try ID first, then builder name
53
- prompt_id = run.output.source.properties.get("prompt_id", None)
52
+ # Get the prompt builder id. Need the second check because we used to store this in a prompt_builder_name field, so loading legacy runs will need this.
53
+ prompt_id = run.output.source.properties.get(
54
+ "prompt_id"
55
+ ) or run.output.source.properties.get("prompt_builder_name", None)
54
56
  if prompt_id is not None and isinstance(prompt_id, str):
55
- static_prompt_builder = SavedPromptBuilder(task, prompt_id)
56
- return static_prompt_builder.build_prompt(include_json_instructions=False)
57
+ prompt_builder = prompt_builder_from_id(prompt_id, task)
58
+ if isinstance(prompt_builder, BasePromptBuilder):
59
+ return prompt_builder.build_prompt(include_json_instructions=False)
57
60
 
58
- prompt_builder_class: Type[BasePromptBuilder] | None = None
59
- prompt_builder_name = run.output.source.properties.get(
60
- "prompt_builder_name", None
61
- )
62
- if prompt_builder_name is not None and isinstance(prompt_builder_name, str):
63
- prompt_builder_class = prompt_builder_registry.get(
64
- prompt_builder_name, None
65
- )
66
- if prompt_builder_class is None:
67
- raise ValueError(f"No prompt builder found for name: {prompt_builder_name}")
68
- prompt_builder = prompt_builder_class(task=task)
69
- if not isinstance(prompt_builder, BasePromptBuilder):
70
- raise ValueError(
71
- f"Prompt builder {prompt_builder_name} is not a valid prompt builder"
72
- )
73
- return prompt_builder.build_prompt(include_json_instructions=False)
61
+ raise ValueError(f"Prompt builder '{prompt_id}' is not a valid prompt builder")
74
62
 
75
63
  @classmethod
76
64
  def build_repair_task_input(
@@ -7,7 +7,7 @@ from pydantic import ValidationError
7
7
 
8
8
  from kiln_ai.adapters.adapter_registry import adapter_for_task
9
9
  from kiln_ai.adapters.model_adapters.base_adapter import RunOutput
10
- from kiln_ai.adapters.model_adapters.langchain_adapters import LangchainAdapter
10
+ from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
11
11
  from kiln_ai.adapters.repair.repair_task import (
12
12
  RepairTaskInput,
13
13
  RepairTaskRun,
@@ -95,7 +95,7 @@ def sample_task_run(sample_task):
95
95
  "model_name": "gpt_4o",
96
96
  "model_provider": "openai",
97
97
  "adapter_name": "langchain_adapter",
98
- "prompt_builder_name": "simple_prompt_builder",
98
+ "prompt_id": "simple_prompt_builder",
99
99
  },
100
100
  ),
101
101
  ),
@@ -201,7 +201,7 @@ async def test_live_run(sample_task, sample_task_run, sample_repair_data):
201
201
  "adapter_name": "kiln_langchain_adapter",
202
202
  "model_name": "llama_3_1_8b",
203
203
  "model_provider": "groq",
204
- "prompt_builder_name": "simple_prompt_builder",
204
+ "prompt_id": "simple_prompt_builder",
205
205
  }
206
206
 
207
207
 
@@ -217,7 +217,7 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
217
217
  "rating": 8,
218
218
  }
219
219
 
220
- with patch.object(LangchainAdapter, "_run", new_callable=AsyncMock) as mock_run:
220
+ with patch.object(LiteLlmAdapter, "_run", new_callable=AsyncMock) as mock_run:
221
221
  mock_run.return_value = RunOutput(
222
222
  output=mocked_output, intermediate_outputs=None
223
223
  )
@@ -235,10 +235,10 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
235
235
  parsed_output = json.loads(run.output.output)
236
236
  assert parsed_output == mocked_output
237
237
  assert run.output.source.properties == {
238
- "adapter_name": "kiln_langchain_adapter",
238
+ "adapter_name": "kiln_openai_compatible_adapter",
239
239
  "model_name": "llama_3_1_8b",
240
240
  "model_provider": "ollama",
241
- "prompt_builder_name": "simple_prompt_builder",
241
+ "prompt_id": "simple_prompt_builder",
242
242
  }
243
243
  assert run.input_source.type == DataSourceType.human
244
244
  assert "created_by" in run.input_source.properties
@@ -1,8 +1,11 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Dict
3
3
 
4
+ from litellm.types.utils import ChoiceLogprobs
5
+
4
6
 
5
7
  @dataclass
6
8
  class RunOutput:
7
9
  output: Dict | str
8
10
  intermediate_outputs: Dict[str, str] | None
11
+ output_logprobs: ChoiceLogprobs | None = None
@@ -5,8 +5,8 @@ import pytest
5
5
  from kiln_ai import datamodel
6
6
  from kiln_ai.adapters.adapter_registry import adapter_for_task
7
7
  from kiln_ai.adapters.ml_model_list import ModelProviderName
8
- from kiln_ai.adapters.model_adapters.langchain_adapters import LangchainAdapter
9
- from kiln_ai.adapters.model_adapters.openai_model_adapter import OpenAICompatibleAdapter
8
+ from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
9
+ from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
10
10
  from kiln_ai.adapters.prompt_builders import BasePromptBuilder
11
11
  from kiln_ai.adapters.provider_tools import kiln_model_provider_from
12
12
 
@@ -43,9 +43,9 @@ def test_openai_adapter_creation(mock_config, basic_task):
43
43
  kiln_task=basic_task, model_name="gpt-4", provider=ModelProviderName.openai
44
44
  )
45
45
 
46
- assert isinstance(adapter, OpenAICompatibleAdapter)
46
+ assert isinstance(adapter, LiteLlmAdapter)
47
47
  assert adapter.config.model_name == "gpt-4"
48
- assert adapter.config.api_key == "test-openai-key"
48
+ assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
49
49
  assert adapter.config.provider_name == ModelProviderName.openai
50
50
  assert adapter.config.base_url is None # OpenAI url is default
51
51
  assert adapter.config.default_headers is None
@@ -58,11 +58,10 @@ def test_openrouter_adapter_creation(mock_config, basic_task):
58
58
  provider=ModelProviderName.openrouter,
59
59
  )
60
60
 
61
- assert isinstance(adapter, OpenAICompatibleAdapter)
61
+ assert isinstance(adapter, LiteLlmAdapter)
62
62
  assert adapter.config.model_name == "anthropic/claude-3-opus"
63
- assert adapter.config.api_key == "test-openrouter-key"
63
+ assert adapter.config.additional_body_options == {"api_key": "test-openrouter-key"}
64
64
  assert adapter.config.provider_name == ModelProviderName.openrouter
65
- assert adapter.config.base_url == "https://openrouter.ai/api/v1"
66
65
  assert adapter.config.default_headers == {
67
66
  "HTTP-Referer": "https://getkiln.ai/openrouter",
68
67
  "X-Title": "KilnAI",
@@ -78,30 +77,25 @@ def test_openrouter_adapter_creation(mock_config, basic_task):
78
77
  ModelProviderName.fireworks_ai,
79
78
  ],
80
79
  )
81
- def test_langchain_adapter_creation(mock_config, basic_task, provider):
80
+ def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
82
81
  adapter = adapter_for_task(
83
82
  kiln_task=basic_task, model_name="test-model", provider=provider
84
83
  )
85
84
 
86
- assert isinstance(adapter, LangchainAdapter)
87
- assert adapter.model_name == "test-model"
85
+ assert isinstance(adapter, LiteLlmAdapter)
86
+ assert adapter.run_config.model_name == "test-model"
88
87
 
89
88
 
90
89
  # TODO should run for all cases
91
90
  def test_custom_prompt_builder(mock_config, basic_task):
92
- class TestPromptBuilder(BasePromptBuilder):
93
- def build_base_prompt(self, kiln_task) -> str:
94
- return "test-prompt"
95
-
96
- prompt_builder = TestPromptBuilder(basic_task)
97
91
  adapter = adapter_for_task(
98
92
  kiln_task=basic_task,
99
93
  model_name="gpt-4",
100
94
  provider=ModelProviderName.openai,
101
- prompt_builder=prompt_builder,
95
+ prompt_id="simple_chain_of_thought_prompt_builder",
102
96
  )
103
97
 
104
- assert adapter.prompt_builder == prompt_builder
98
+ assert adapter.run_config.prompt_id == "simple_chain_of_thought_prompt_builder"
105
99
 
106
100
 
107
101
  # TODO should run for all cases
@@ -111,10 +105,12 @@ def test_tags_passed_through(mock_config, basic_task):
111
105
  kiln_task=basic_task,
112
106
  model_name="gpt-4",
113
107
  provider=ModelProviderName.openai,
114
- tags=tags,
108
+ base_adapter_config=AdapterConfig(
109
+ default_tags=tags,
110
+ ),
115
111
  )
116
112
 
117
- assert adapter.default_tags == tags
113
+ assert adapter.base_adapter_config.default_tags == tags
118
114
 
119
115
 
120
116
  def test_invalid_provider(mock_config, basic_task):
@@ -124,11 +120,14 @@ def test_invalid_provider(mock_config, basic_task):
124
120
  )
125
121
 
126
122
 
127
- @patch("kiln_ai.adapters.adapter_registry.openai_compatible_config")
123
+ @patch("kiln_ai.adapters.adapter_registry.lite_llm_config")
128
124
  def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_task):
129
125
  mock_compatible_config.return_value.model_name = "test-model"
130
- mock_compatible_config.return_value.api_key = "test-key"
126
+ mock_compatible_config.return_value.additional_body_options = {
127
+ "api_key": "test-key"
128
+ }
131
129
  mock_compatible_config.return_value.base_url = "https://test.com/v1"
130
+ mock_compatible_config.return_value.provider_name = "CustomProvider99"
132
131
 
133
132
  adapter = adapter_for_task(
134
133
  kiln_task=basic_task,
@@ -136,11 +135,9 @@ def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_ta
136
135
  provider=ModelProviderName.openai_compatible,
137
136
  )
138
137
 
139
- assert isinstance(adapter, OpenAICompatibleAdapter)
138
+ assert isinstance(adapter, LiteLlmAdapter)
140
139
  mock_compatible_config.assert_called_once_with("provider::test-model")
141
- assert adapter.config.model_name == "test-model"
142
- assert adapter.config.api_key == "test-key"
143
- assert adapter.config.base_url == "https://test.com/v1"
140
+ assert adapter.config == mock_compatible_config.return_value
144
141
 
145
142
 
146
143
  def test_custom_openai_compatible_provider(mock_config, basic_task):
@@ -150,9 +147,9 @@ def test_custom_openai_compatible_provider(mock_config, basic_task):
150
147
  provider=ModelProviderName.kiln_custom_registry,
151
148
  )
152
149
 
153
- assert isinstance(adapter, OpenAICompatibleAdapter)
150
+ assert isinstance(adapter, LiteLlmAdapter)
154
151
  assert adapter.config.model_name == "openai::test-model"
155
- assert adapter.config.api_key == "test-openai-key"
152
+ assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
156
153
  assert adapter.config.base_url is None # openai is none
157
154
  assert adapter.config.provider_name == ModelProviderName.kiln_custom_registry
158
155
 
@@ -165,7 +162,7 @@ async def test_fine_tune_provider(mock_config, basic_task, mock_finetune_from_id
165
162
  )
166
163
 
167
164
  mock_finetune_from_id.assert_called_once_with("proj::task::tune")
168
- assert isinstance(adapter, OpenAICompatibleAdapter)
165
+ assert isinstance(adapter, LiteLlmAdapter)
169
166
  assert adapter.config.provider_name == ModelProviderName.kiln_fine_tune
170
167
  # Kiln model name here, but the underlying openai model id below
171
168
  assert adapter.config.model_name == "proj::task::tune"
@@ -174,4 +171,4 @@ async def test_fine_tune_provider(mock_config, basic_task, mock_finetune_from_id
174
171
  "proj::task::tune", provider_name=ModelProviderName.kiln_fine_tune
175
172
  )
176
173
  # The actual model name from the fine tune object
177
- assert provider.provider_options["model"] == "test-model"
174
+ assert provider.model_id == "test-model"
@@ -1,13 +1,13 @@
1
+ import logging
1
2
  from typing import List
2
3
 
3
4
  import pytest
4
5
 
5
- from libs.core.kiln_ai.adapters.ml_model_list import (
6
- KilnModelProvider,
7
- built_in_models,
8
- )
6
+ from libs.core.kiln_ai.adapters.ml_model_list import KilnModelProvider, built_in_models
9
7
  from libs.core.kiln_ai.adapters.provider_tools import provider_name_from_id
10
8
 
9
+ logger = logging.getLogger(__name__)
10
+
11
11
 
12
12
  def _all_providers_support(providers: List[KilnModelProvider], attribute: str) -> bool:
13
13
  """Check if all providers support a given feature"""
@@ -10,7 +10,6 @@ from kiln_ai.adapters.ollama_tools import (
10
10
  def test_parse_ollama_tags_no_models():
11
11
  json_response = '{"models":[{"name":"scosman_net","model":"scosman_net:latest"},{"name":"phi3.5:latest","model":"phi3.5:latest","modified_at":"2024-10-02T12:04:35.191519822-04:00","size":2176178843,"digest":"61819fb370a3c1a9be6694869331e5f85f867a079e9271d66cb223acb81d04ba","details":{"parent_model":"","format":"gguf","family":"phi3","families":["phi3"],"parameter_size":"3.8B","quantization_level":"Q4_0"}},{"name":"gemma2:2b","model":"gemma2:2b","modified_at":"2024-09-09T16:46:38.64348929-04:00","size":1629518495,"digest":"8ccf136fdd5298f3ffe2d69862750ea7fb56555fa4d5b18c04e3fa4d82ee09d7","details":{"parent_model":"","format":"gguf","family":"gemma2","families":["gemma2"],"parameter_size":"2.6B","quantization_level":"Q4_0"}},{"name":"llama3.1:latest","model":"llama3.1:latest","modified_at":"2024-09-01T17:19:43.481523695-04:00","size":4661230720,"digest":"f66fc8dc39ea206e03ff6764fcc696b1b4dfb693f0b6ef751731dd4e6269046e","details":{"parent_model":"","format":"gguf","family":"llama","families":["llama"],"parameter_size":"8.0B","quantization_level":"Q4_0"}}]}'
12
12
  tags = json.loads(json_response)
13
- print(json.dumps(tags, indent=2))
14
13
  conn = parse_ollama_tags(tags)
15
14
  assert "phi3.5:latest" in conn.supported_models
16
15
  assert "gemma2:2b" in conn.supported_models
@@ -1,24 +1,32 @@
1
1
  import os
2
2
  from pathlib import Path
3
+ from unittest.mock import patch
3
4
 
4
5
  import pytest
5
- from langchain_core.language_models.fake_chat_models import FakeListChatModel
6
+ from litellm.utils import ModelResponse
6
7
 
7
8
  import kiln_ai.datamodel as datamodel
8
9
  from kiln_ai.adapters.adapter_registry import adapter_for_task
9
10
  from kiln_ai.adapters.ml_model_list import built_in_models
10
- from kiln_ai.adapters.model_adapters.langchain_adapters import LangchainAdapter
11
+ from kiln_ai.adapters.model_adapters.litellm_adapter import (
12
+ LiteLlmAdapter,
13
+ LiteLlmConfig,
14
+ )
11
15
  from kiln_ai.adapters.ollama_tools import ollama_online
12
16
  from kiln_ai.adapters.prompt_builders import (
13
17
  BasePromptBuilder,
14
18
  SimpleChainOfThoughtPromptBuilder,
15
19
  )
20
+ from kiln_ai.datamodel import PromptId
16
21
 
17
22
 
18
23
  def get_all_models_and_providers():
19
24
  model_provider_pairs = []
20
25
  for model in built_in_models:
21
26
  for provider in model.providers:
27
+ if not provider.model_id:
28
+ # it's possible for models to not have an ID (fine-tune only model)
29
+ continue
22
30
  model_provider_pairs.append((model.name, provider.name))
23
31
  return model_provider_pairs
24
32
 
@@ -105,23 +113,27 @@ async def test_amazon_bedrock(tmp_path):
105
113
  await run_simple_test(tmp_path, "llama_3_1_8b", "amazon_bedrock")
106
114
 
107
115
 
108
- async def test_mock(tmp_path):
109
- task = build_test_task(tmp_path)
110
- mockChatModel = FakeListChatModel(responses=["mock response"])
111
- adapter = LangchainAdapter(
112
- task,
113
- custom_model=mockChatModel,
114
- provider="ollama",
115
- )
116
- run = await adapter.invoke("You are a mock, send me the response!")
117
- assert "mock response" in run.output.output
118
-
119
-
120
116
  async def test_mock_returning_run(tmp_path):
121
117
  task = build_test_task(tmp_path)
122
- mockChatModel = FakeListChatModel(responses=["mock response"])
123
- adapter = LangchainAdapter(task, custom_model=mockChatModel, provider="ollama")
124
- run = await adapter.invoke("You are a mock, send me the response!")
118
+ with patch("litellm.acompletion") as mock_acompletion:
119
+ # Configure the mock to return a properly structured response
120
+ mock_acompletion.return_value = ModelResponse(
121
+ model="custom_model",
122
+ choices=[{"message": {"content": "mock response"}}],
123
+ )
124
+
125
+ adapter = LiteLlmAdapter(
126
+ config=LiteLlmConfig(
127
+ model_name="custom_model",
128
+ provider_name="ollama",
129
+ base_url="http://localhost:11434",
130
+ additional_body_options={"api_key": "test_key"},
131
+ ),
132
+ kiln_task=task,
133
+ )
134
+
135
+ run = await adapter.invoke("You are a mock, send me the response!")
136
+
125
137
  assert run.output.output == "mock response"
126
138
  assert run is not None
127
139
  assert run.id is not None
@@ -129,10 +141,10 @@ async def test_mock_returning_run(tmp_path):
129
141
  assert run.output.output == "mock response"
130
142
  assert "created_by" in run.input_source.properties
131
143
  assert run.output.source.properties == {
132
- "adapter_name": "kiln_langchain_adapter",
133
- "model_name": "custom.langchain:unknown_model",
144
+ "adapter_name": "kiln_openai_compatible_adapter",
145
+ "model_name": "custom_model",
134
146
  "model_provider": "ollama",
135
- "prompt_builder_name": "simple_prompt_builder",
147
+ "prompt_id": "simple_prompt_builder",
136
148
  }
137
149
 
138
150
 
@@ -149,8 +161,9 @@ async def test_all_models_providers_plaintext(tmp_path, model_name, provider_nam
149
161
  @pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
150
162
  async def test_cot_prompt_builder(tmp_path, model_name, provider_name):
151
163
  task = build_test_task(tmp_path)
152
- pb = SimpleChainOfThoughtPromptBuilder(task)
153
- await run_simple_task(task, model_name, provider_name, pb)
164
+ await run_simple_task(
165
+ task, model_name, provider_name, "simple_chain_of_thought_prompt_builder"
166
+ )
154
167
 
155
168
 
156
169
  def build_test_task(tmp_path: Path):
@@ -186,20 +199,20 @@ async def run_simple_test(
186
199
  tmp_path: Path,
187
200
  model_name: str,
188
201
  provider: str | None = None,
189
- prompt_builder: BasePromptBuilder | None = None,
202
+ prompt_id: PromptId | None = None,
190
203
  ):
191
204
  task = build_test_task(tmp_path)
192
- return await run_simple_task(task, model_name, provider, prompt_builder)
205
+ return await run_simple_task(task, model_name, provider, prompt_id)
193
206
 
194
207
 
195
208
  async def run_simple_task(
196
209
  task: datamodel.Task,
197
210
  model_name: str,
198
211
  provider: str,
199
- prompt_builder: BasePromptBuilder | None = None,
212
+ prompt_id: PromptId | None = None,
200
213
  ) -> datamodel.TaskRun:
201
214
  adapter = adapter_for_task(
202
- task, model_name=model_name, provider=provider, prompt_builder=prompt_builder
215
+ task, model_name=model_name, provider=provider, prompt_id=prompt_id
203
216
  )
204
217
 
205
218
  run = await adapter.invoke(
@@ -212,13 +225,14 @@ async def run_simple_task(
212
225
  )
213
226
  assert "64" in run.output.output
214
227
  source_props = run.output.source.properties
215
- assert source_props["adapter_name"] == "kiln_langchain_adapter"
228
+ assert source_props["adapter_name"] in [
229
+ "kiln_langchain_adapter",
230
+ "kiln_openai_compatible_adapter",
231
+ ]
216
232
  assert source_props["model_name"] == model_name
217
233
  assert source_props["model_provider"] == provider
218
- expected_prompt_builder_name = (
219
- prompt_builder.__class__.prompt_builder_name()
220
- if prompt_builder
221
- else "simple_prompt_builder"
222
- )
223
- assert source_props["prompt_builder_name"] == expected_prompt_builder_name
234
+ if prompt_id is None:
235
+ assert source_props["prompt_id"] == "simple_prompt_builder"
236
+ else:
237
+ assert source_props["prompt_id"] == prompt_id
224
238
  return run
@@ -1,8 +1,9 @@
1
1
  import json
2
+ import logging
2
3
 
3
4
  import pytest
4
5
 
5
- from kiln_ai.adapters.model_adapters.base_adapter import AdapterInfo, BaseAdapter
6
+ from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
6
7
  from kiln_ai.adapters.model_adapters.test_structured_output import (
7
8
  build_structured_output_test_task,
8
9
  )
@@ -16,8 +17,9 @@ from kiln_ai.adapters.prompt_builders import (
16
17
  SavedPromptBuilder,
17
18
  SimpleChainOfThoughtPromptBuilder,
18
19
  SimplePromptBuilder,
20
+ TaskRunConfigPromptBuilder,
19
21
  chain_of_thought_prompt,
20
- prompt_builder_from_ui_name,
22
+ prompt_builder_from_id,
21
23
  )
22
24
  from kiln_ai.adapters.test_prompt_adaptors import build_test_task
23
25
  from kiln_ai.datamodel import (
@@ -32,6 +34,9 @@ from kiln_ai.datamodel import (
32
34
  TaskOutputRating,
33
35
  TaskRun,
34
36
  )
37
+ from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
38
+
39
+ logger = logging.getLogger(__name__)
35
40
 
36
41
 
37
42
  def test_simple_prompt_builder(tmp_path):
@@ -57,12 +62,8 @@ class MockAdapter(BaseAdapter):
57
62
  def _run(self, input: str) -> str:
58
63
  return "mock response"
59
64
 
60
- def adapter_info(self) -> AdapterInfo:
61
- return AdapterInfo(
62
- adapter_name="mock_adapter",
63
- model_name="mock_model",
64
- model_provider="mock_provider",
65
- )
65
+ def adapter_name(self) -> str:
66
+ return "mock_adapter"
66
67
 
67
68
 
68
69
  def test_simple_prompt_builder_structured_output(tmp_path):
@@ -269,7 +270,6 @@ def test_few_shot_prompt_builder(tmp_path):
269
270
  rating=TaskOutputRating(value=4 + (i % 2), reason="Good joke"),
270
271
  ),
271
272
  )
272
- print("RATING", "Joke Initial Output ", i + 1, " - RATED:", 4 + (i % 2), "\n")
273
273
  if i < 2:
274
274
  run = run.model_copy(
275
275
  update={
@@ -290,7 +290,7 @@ def test_few_shot_prompt_builder(tmp_path):
290
290
  prompt = prompt_builder.build_prompt(include_json_instructions=False)
291
291
  assert prompt.count("## Example") == 4
292
292
 
293
- print("PROMPT", prompt)
293
+ logger.info("PROMPT: %s", prompt)
294
294
  # Verify the order of examples (2 repaired, then 2 highest-rated)
295
295
  assert "Repaired Joke 1" in prompt
296
296
  assert "Repaired Joke 2" in prompt
@@ -314,54 +314,53 @@ def check_example_outputs(task: Task, count: int):
314
314
  assert f"## Example {count}" in prompt
315
315
 
316
316
 
317
- def test_prompt_builder_name():
318
- assert SimplePromptBuilder.prompt_builder_name() == "simple_prompt_builder"
319
- assert MultiShotPromptBuilder.prompt_builder_name() == "multi_shot_prompt_builder"
320
- assert RepairsPromptBuilder.prompt_builder_name() == "repairs_prompt_builder"
321
-
322
-
323
- def test_prompt_builder_from_ui_name(task_with_examples):
317
+ def test_prompt_builder_from_id(task_with_examples):
324
318
  task = task_with_examples
325
- assert isinstance(prompt_builder_from_ui_name("basic", task), SimplePromptBuilder)
326
319
  assert isinstance(
327
- prompt_builder_from_ui_name("few_shot", task), FewShotPromptBuilder
320
+ prompt_builder_from_id("simple_prompt_builder", task), SimplePromptBuilder
328
321
  )
329
322
  assert isinstance(
330
- prompt_builder_from_ui_name("many_shot", task), MultiShotPromptBuilder
323
+ prompt_builder_from_id("few_shot_prompt_builder", task),
324
+ FewShotPromptBuilder,
331
325
  )
332
326
  assert isinstance(
333
- prompt_builder_from_ui_name("repairs", task), RepairsPromptBuilder
327
+ prompt_builder_from_id("multi_shot_prompt_builder", task),
328
+ MultiShotPromptBuilder,
334
329
  )
335
330
  assert isinstance(
336
- prompt_builder_from_ui_name("simple_chain_of_thought", task),
331
+ prompt_builder_from_id("repairs_prompt_builder", task),
332
+ RepairsPromptBuilder,
333
+ )
334
+ assert isinstance(
335
+ prompt_builder_from_id("simple_chain_of_thought_prompt_builder", task),
337
336
  SimpleChainOfThoughtPromptBuilder,
338
337
  )
339
338
  assert isinstance(
340
- prompt_builder_from_ui_name("few_shot_chain_of_thought", task),
339
+ prompt_builder_from_id("few_shot_chain_of_thought_prompt_builder", task),
341
340
  FewShotChainOfThoughtPromptBuilder,
342
341
  )
343
342
  assert isinstance(
344
- prompt_builder_from_ui_name("multi_shot_chain_of_thought", task),
343
+ prompt_builder_from_id("multi_shot_chain_of_thought_prompt_builder", task),
345
344
  MultiShotChainOfThoughtPromptBuilder,
346
345
  )
347
346
 
348
- with pytest.raises(ValueError, match="Unknown prompt builder: invalid_name"):
349
- prompt_builder_from_ui_name("invalid_name", task)
347
+ with pytest.raises(ValueError, match="Unknown prompt generator: invalid_name"):
348
+ prompt_builder_from_id("invalid_name", task)
350
349
 
351
350
  with pytest.raises(ValueError, match="Prompt ID not found: 123"):
352
- prompt_builder_from_ui_name("id::123", task)
351
+ prompt_builder_from_id("id::123", task)
353
352
 
354
353
  with pytest.raises(
355
354
  ValueError,
356
355
  match="Invalid fine-tune ID format. Expected 'project_id::task_id::fine_tune_id'",
357
356
  ):
358
- prompt_builder_from_ui_name("fine_tune_prompt::123", task)
357
+ prompt_builder_from_id("fine_tune_prompt::123", task)
359
358
 
360
359
  with pytest.raises(
361
360
  ValueError,
362
361
  match="Fine-tune ID not found",
363
362
  ):
364
- prompt_builder_from_ui_name("fine_tune_prompt::123::456::789", task)
363
+ prompt_builder_from_id("fine_tune_prompt::123::456::789", task)
365
364
 
366
365
  prompt = Prompt(
367
366
  name="test_prompt_name",
@@ -370,7 +369,7 @@ def test_prompt_builder_from_ui_name(task_with_examples):
370
369
  parent=task,
371
370
  )
372
371
  prompt.save_to_file()
373
- pb = prompt_builder_from_ui_name("id::" + prompt.id, task)
372
+ pb = prompt_builder_from_id("id::" + prompt.id, task)
374
373
  assert isinstance(pb, SavedPromptBuilder)
375
374
  assert pb.prompt_id() == prompt.id
376
375
  assert pb.build_prompt(include_json_instructions=False) == "test_prompt"
@@ -390,7 +389,7 @@ def test_prompt_builder_from_ui_name(task_with_examples):
390
389
  nested_fine_tune_id = (
391
390
  task_with_examples.parent.id + "::" + task_with_examples.id + "::" + finetune.id
392
391
  )
393
- pb = prompt_builder_from_ui_name(
392
+ pb = prompt_builder_from_id(
394
393
  "fine_tune_prompt::" + nested_fine_tune_id,
395
394
  task_with_examples,
396
395
  )
@@ -587,3 +586,64 @@ def test_build_prompt_with_json_instructions(tmp_path):
587
586
  assert task.instruction in prompt_with_json
588
587
  for requirement in task.requirements:
589
588
  assert requirement.instruction in prompt_with_json
589
+
590
+
591
+ def test_task_run_config_prompt_builder(tmp_path):
592
+ task = build_test_task(tmp_path)
593
+
594
+ run_config = TaskRunConfig(
595
+ name="test_run_config",
596
+ parent=task,
597
+ run_config_properties=RunConfigProperties(
598
+ model_name="gpt-4",
599
+ model_provider_name="openai",
600
+ prompt_id="simple_prompt_builder",
601
+ ),
602
+ prompt=Prompt(
603
+ name="test prompt name",
604
+ prompt="test prompt content",
605
+ chain_of_thought_instructions="test step by step",
606
+ ),
607
+ )
608
+ run_config.save_to_file()
609
+
610
+ # Construct the eval prompt ID
611
+ run_config_prompt_id = (
612
+ f"task_run_config::{task.parent.id}::{task.id}::{run_config.id}"
613
+ )
614
+
615
+ # Test successful creation 2 ways: constructor and ID creation
616
+ builders = [
617
+ TaskRunConfigPromptBuilder(
618
+ task=task, run_config_prompt_id=run_config_prompt_id
619
+ ),
620
+ prompt_builder_from_id(run_config_prompt_id, task),
621
+ ]
622
+
623
+ for builder in builders:
624
+ assert (
625
+ builder.build_prompt(include_json_instructions=False)
626
+ == "test prompt content"
627
+ )
628
+ assert builder.chain_of_thought_prompt() == "test step by step"
629
+ assert builder.prompt_id() == run_config_prompt_id
630
+
631
+
632
+ def test_task_run_config_prompt_builder_validation_errors(tmp_path):
633
+ task = build_test_task(tmp_path)
634
+
635
+ # Test invalid format
636
+ with pytest.raises(ValueError, match="Invalid task run config prompt ID"):
637
+ TaskRunConfigPromptBuilder(
638
+ task=task, run_config_prompt_id="task_run_config::wrong::format"
639
+ )
640
+
641
+ # Test task ID mismatch
642
+ wrong_task_id = f"task_run_config::{task.parent.id}::wrong_task_id::config_id"
643
+ with pytest.raises(ValueError, match="Task ID mismatch"):
644
+ TaskRunConfigPromptBuilder(task=task, run_config_prompt_id=wrong_task_id)
645
+
646
+ # Test eval not found
647
+ nonexistent_eval = f"task_run_config::{task.parent.id}::{task.id}::nonexistent_id"
648
+ with pytest.raises(ValueError, match="Task run config ID not found"):
649
+ TaskRunConfigPromptBuilder(task=task, run_config_prompt_id=nonexistent_eval)