kiln-ai 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (63) hide show
  1. kiln_ai/adapters/adapter_registry.py +12 -13
  2. kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
  3. kiln_ai/adapters/eval/base_eval.py +164 -0
  4. kiln_ai/adapters/eval/eval_runner.py +267 -0
  5. kiln_ai/adapters/eval/g_eval.py +367 -0
  6. kiln_ai/adapters/eval/registry.py +16 -0
  7. kiln_ai/adapters/eval/test_base_eval.py +324 -0
  8. kiln_ai/adapters/eval/test_eval_runner.py +640 -0
  9. kiln_ai/adapters/eval/test_g_eval.py +497 -0
  10. kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
  11. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
  12. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
  13. kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
  14. kiln_ai/adapters/ml_model_list.py +141 -29
  15. kiln_ai/adapters/model_adapters/base_adapter.py +50 -35
  16. kiln_ai/adapters/model_adapters/langchain_adapters.py +27 -20
  17. kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -1
  18. kiln_ai/adapters/model_adapters/openai_model_adapter.py +93 -50
  19. kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
  20. kiln_ai/adapters/model_adapters/test_langchain_adapter.py +7 -14
  21. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +55 -64
  22. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
  23. kiln_ai/adapters/model_adapters/test_structured_output.py +36 -30
  24. kiln_ai/adapters/ollama_tools.py +0 -1
  25. kiln_ai/adapters/prompt_builders.py +80 -42
  26. kiln_ai/adapters/repair/repair_task.py +9 -21
  27. kiln_ai/adapters/repair/test_repair_task.py +3 -3
  28. kiln_ai/adapters/run_output.py +3 -0
  29. kiln_ai/adapters/test_adapter_registry.py +10 -10
  30. kiln_ai/adapters/test_generate_docs.py +6 -6
  31. kiln_ai/adapters/test_ollama_tools.py +0 -1
  32. kiln_ai/adapters/test_prompt_adaptors.py +17 -14
  33. kiln_ai/adapters/test_prompt_builders.py +91 -31
  34. kiln_ai/datamodel/__init__.py +50 -952
  35. kiln_ai/datamodel/datamodel_enums.py +58 -0
  36. kiln_ai/datamodel/dataset_filters.py +114 -0
  37. kiln_ai/datamodel/dataset_split.py +170 -0
  38. kiln_ai/datamodel/eval.py +298 -0
  39. kiln_ai/datamodel/finetune.py +105 -0
  40. kiln_ai/datamodel/json_schema.py +6 -0
  41. kiln_ai/datamodel/project.py +23 -0
  42. kiln_ai/datamodel/prompt.py +37 -0
  43. kiln_ai/datamodel/prompt_id.py +83 -0
  44. kiln_ai/datamodel/strict_mode.py +24 -0
  45. kiln_ai/datamodel/task.py +181 -0
  46. kiln_ai/datamodel/task_output.py +321 -0
  47. kiln_ai/datamodel/task_run.py +164 -0
  48. kiln_ai/datamodel/test_basemodel.py +10 -11
  49. kiln_ai/datamodel/test_dataset_filters.py +71 -0
  50. kiln_ai/datamodel/test_dataset_split.py +32 -8
  51. kiln_ai/datamodel/test_datasource.py +3 -2
  52. kiln_ai/datamodel/test_eval_model.py +635 -0
  53. kiln_ai/datamodel/test_example_models.py +9 -13
  54. kiln_ai/datamodel/test_json_schema.py +23 -0
  55. kiln_ai/datamodel/test_models.py +2 -2
  56. kiln_ai/datamodel/test_prompt_id.py +129 -0
  57. kiln_ai/datamodel/test_task.py +159 -0
  58. kiln_ai/utils/config.py +6 -1
  59. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/METADATA +37 -1
  60. kiln_ai-0.12.0.dist-info/RECORD +100 -0
  61. kiln_ai-0.11.1.dist-info/RECORD +0 -76
  62. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/WHEEL +0 -0
  63. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -9,18 +9,23 @@ from openai.types.chat import (
9
9
  )
10
10
 
11
11
  import kiln_ai.datamodel as datamodel
12
- from kiln_ai.adapters.ml_model_list import StructuredOutputMode
12
+ from kiln_ai.adapters.ml_model_list import (
13
+ KilnModelProvider,
14
+ ModelProviderName,
15
+ StructuredOutputMode,
16
+ )
13
17
  from kiln_ai.adapters.model_adapters.base_adapter import (
14
18
  COT_FINAL_ANSWER_PROMPT,
15
- AdapterInfo,
19
+ AdapterConfig,
16
20
  BaseAdapter,
17
- BasePromptBuilder,
18
21
  RunOutput,
19
22
  )
20
23
  from kiln_ai.adapters.model_adapters.openai_compatible_config import (
21
24
  OpenAICompatibleConfig,
22
25
  )
23
26
  from kiln_ai.adapters.parsers.json_parser import parse_json_string
27
+ from kiln_ai.datamodel import PromptGenerators, PromptId
28
+ from kiln_ai.datamodel.task import RunConfig
24
29
  from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
25
30
 
26
31
 
@@ -29,8 +34,8 @@ class OpenAICompatibleAdapter(BaseAdapter):
29
34
  self,
30
35
  config: OpenAICompatibleConfig,
31
36
  kiln_task: datamodel.Task,
32
- prompt_builder: BasePromptBuilder | None = None,
33
- tags: list[str] | None = None,
37
+ prompt_id: PromptId | None = None,
38
+ base_adapter_config: AdapterConfig | None = None,
34
39
  ):
35
40
  self.config = config
36
41
  self.client = AsyncOpenAI(
@@ -39,12 +44,16 @@ class OpenAICompatibleAdapter(BaseAdapter):
39
44
  default_headers=config.default_headers,
40
45
  )
41
46
 
42
- super().__init__(
43
- kiln_task,
47
+ run_config = RunConfig(
48
+ task=kiln_task,
44
49
  model_name=config.model_name,
45
50
  model_provider_name=config.provider_name,
46
- prompt_builder=prompt_builder,
47
- tags=tags,
51
+ prompt_id=prompt_id or PromptGenerators.SIMPLE,
52
+ )
53
+
54
+ super().__init__(
55
+ run_config=run_config,
56
+ config=base_adapter_config,
48
57
  )
49
58
 
50
59
  async def _run(self, input: Dict | str) -> RunOutput:
@@ -93,21 +102,8 @@ class OpenAICompatibleAdapter(BaseAdapter):
93
102
  ]
94
103
  )
95
104
 
96
- # OpenRouter specific options for reasoning models
97
- extra_body = {}
98
- require_or_reasoning = (
99
- self.config.openrouter_style_reasoning and provider.reasoning_capable
100
- )
101
- if require_or_reasoning:
102
- extra_body["include_reasoning"] = True
103
- # Filter to providers that support the reasoning parameter
104
- extra_body["provider"] = {
105
- "require_parameters": True,
106
- # Ugly to have these here, but big range of quality of R1 providers
107
- "order": ["Fireworks", "Together"],
108
- # fp8 quants are awful
109
- "ignore": ["DeepInfra"],
110
- }
105
+ # Build custom request params based on model provider
106
+ extra_body = self.build_extra_body(provider)
111
107
 
112
108
  # Main completion call
113
109
  response_format_options = await self.response_format_options()
@@ -115,6 +111,8 @@ class OpenAICompatibleAdapter(BaseAdapter):
115
111
  model=provider.provider_options["model"],
116
112
  messages=messages,
117
113
  extra_body=extra_body,
114
+ logprobs=self.base_adapter_config.top_logprobs is not None,
115
+ top_logprobs=self.base_adapter_config.top_logprobs,
118
116
  **response_format_options,
119
117
  )
120
118
 
@@ -133,9 +131,14 @@ class OpenAICompatibleAdapter(BaseAdapter):
133
131
  )
134
132
 
135
133
  message = response.choices[0].message
134
+ logprobs = response.choices[0].logprobs
135
+
136
+ # Check logprobs worked, if requested
137
+ if self.base_adapter_config.top_logprobs is not None and logprobs is None:
138
+ raise RuntimeError("Logprobs were required, but no logprobs were returned.")
136
139
 
137
- # Save reasoning if it exists (OpenRouter specific format)
138
- if require_or_reasoning:
140
+ # Save reasoning if it exists (OpenRouter specific api response field)
141
+ if provider.require_openrouter_reasoning:
139
142
  if (
140
143
  hasattr(message, "reasoning") and message.reasoning # pyright: ignore
141
144
  ):
@@ -164,26 +167,19 @@ class OpenAICompatibleAdapter(BaseAdapter):
164
167
  if not isinstance(response_content, str):
165
168
  raise RuntimeError(f"response is not a string: {response_content}")
166
169
 
170
+ # Parse to dict if we have structured output
171
+ output: Dict | str = response_content
167
172
  if self.has_structured_output():
168
- structured_response = parse_json_string(response_content)
169
- return RunOutput(
170
- output=structured_response,
171
- intermediate_outputs=intermediate_outputs,
172
- )
173
+ output = parse_json_string(response_content)
173
174
 
174
175
  return RunOutput(
175
- output=response_content,
176
+ output=output,
176
177
  intermediate_outputs=intermediate_outputs,
178
+ output_logprobs=logprobs,
177
179
  )
178
180
 
179
- def adapter_info(self) -> AdapterInfo:
180
- return AdapterInfo(
181
- model_name=self.model_name,
182
- model_provider=self.model_provider_name,
183
- adapter_name="kiln_openai_compatible_adapter",
184
- prompt_builder_name=self.prompt_builder.__class__.prompt_builder_name(),
185
- prompt_id=self.prompt_builder.prompt_id(),
186
- )
181
+ def adapter_name(self) -> str:
182
+ return "kiln_openai_compatible_adapter"
187
183
 
188
184
  async def response_format_options(self) -> dict[str, Any]:
189
185
  # Unstructured if task isn't structured
@@ -195,7 +191,7 @@ class OpenAICompatibleAdapter(BaseAdapter):
195
191
  case StructuredOutputMode.json_mode:
196
192
  return {"response_format": {"type": "json_object"}}
197
193
  case StructuredOutputMode.json_schema:
198
- output_schema = self.kiln_task.output_schema()
194
+ output_schema = self.task().output_schema()
199
195
  return {
200
196
  "response_format": {
201
197
  "type": "json_schema",
@@ -205,8 +201,10 @@ class OpenAICompatibleAdapter(BaseAdapter):
205
201
  },
206
202
  }
207
203
  }
204
+ case StructuredOutputMode.function_calling_weak:
205
+ return self.tool_call_params(strict=False)
208
206
  case StructuredOutputMode.function_calling:
209
- return self.tool_call_params()
207
+ return self.tool_call_params(strict=True)
210
208
  case StructuredOutputMode.json_instructions:
211
209
  # JSON done via instructions in prompt, not the API response format. Do not ask for json_object (see option below).
212
210
  return {}
@@ -215,28 +213,32 @@ class OpenAICompatibleAdapter(BaseAdapter):
215
213
  return {"response_format": {"type": "json_object"}}
216
214
  case StructuredOutputMode.default:
217
215
  # Default to function calling -- it's older than the other modes. Higher compatibility.
218
- return self.tool_call_params()
216
+ return self.tool_call_params(strict=True)
219
217
  case _:
220
218
  raise_exhaustive_enum_error(provider.structured_output_mode)
221
219
 
222
- def tool_call_params(self) -> dict[str, Any]:
220
+ def tool_call_params(self, strict: bool) -> dict[str, Any]:
223
221
  # Add additional_properties: false to the schema (OpenAI requires this for some models)
224
- output_schema = self.kiln_task.output_schema()
222
+ output_schema = self.task().output_schema()
225
223
  if not isinstance(output_schema, dict):
226
224
  raise ValueError(
227
225
  "Invalid output schema for this task. Can not use tool calls."
228
226
  )
229
227
  output_schema["additionalProperties"] = False
230
228
 
229
+ function_params = {
230
+ "name": "task_response",
231
+ "parameters": output_schema,
232
+ }
233
+ # This should be on, but we allow setting function_calling_weak for APIs that don't support it.
234
+ if strict:
235
+ function_params["strict"] = True
236
+
231
237
  return {
232
238
  "tools": [
233
239
  {
234
240
  "type": "function",
235
- "function": {
236
- "name": "task_response",
237
- "parameters": output_schema,
238
- "strict": True,
239
- },
241
+ "function": function_params,
240
242
  }
241
243
  ],
242
244
  "tool_choice": {
@@ -244,3 +246,44 @@ class OpenAICompatibleAdapter(BaseAdapter):
244
246
  "function": {"name": "task_response"},
245
247
  },
246
248
  }
249
+
250
+ def build_extra_body(self, provider: KilnModelProvider) -> dict[str, Any]:
251
+ # TODO P1: Don't love having this logic here. But it's a usability improvement
252
+ # so better to keep it than exclude it. Should figure out how I want to isolate
253
+ # this sort of logic so it's config driven and can be overridden
254
+
255
+ extra_body = {}
256
+ provider_options = {}
257
+
258
+ if provider.require_openrouter_reasoning:
259
+ # https://openrouter.ai/docs/use-cases/reasoning-tokens
260
+ extra_body["reasoning"] = {
261
+ "exclude": False,
262
+ }
263
+
264
+ if provider.r1_openrouter_options:
265
+ # Require providers that support the reasoning parameter
266
+ provider_options["require_parameters"] = True
267
+ # Prefer R1 providers with reasonable perf/quants
268
+ provider_options["order"] = ["Fireworks", "Together"]
269
+ # R1 providers with unreasonable quants
270
+ provider_options["ignore"] = ["DeepInfra"]
271
+
272
+ # Only set of this request is to get logprobs.
273
+ if (
274
+ provider.logprobs_openrouter_options
275
+ and self.base_adapter_config.top_logprobs is not None
276
+ ):
277
+ # Don't let OpenRouter choose a provider that doesn't support logprobs.
278
+ provider_options["require_parameters"] = True
279
+ # DeepInfra silently fails to return logprobs consistently.
280
+ provider_options["ignore"] = ["DeepInfra"]
281
+
282
+ if provider.openrouter_skip_required_parameters:
283
+ # Oddball case, R1 14/8/1.5B fail with this param, even though they support thinking params.
284
+ provider_options["require_parameters"] = False
285
+
286
+ if len(provider_options) > 0:
287
+ extra_body["provider"] = provider_options
288
+
289
+ return extra_body
@@ -3,8 +3,9 @@ from unittest.mock import MagicMock, patch
3
3
  import pytest
4
4
 
5
5
  from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
6
- from kiln_ai.adapters.model_adapters.base_adapter import AdapterInfo, BaseAdapter
6
+ from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
7
7
  from kiln_ai.datamodel import Task
8
+ from kiln_ai.datamodel.task import RunConfig
8
9
 
9
10
 
10
11
  class MockAdapter(BaseAdapter):
@@ -13,13 +14,8 @@ class MockAdapter(BaseAdapter):
13
14
  async def _run(self, input):
14
15
  return None
15
16
 
16
- def adapter_info(self) -> AdapterInfo:
17
- return AdapterInfo(
18
- adapter_name="test",
19
- model_name=self.model_name,
20
- model_provider=self.model_provider_name,
21
- prompt_builder_name="test",
22
- )
17
+ def adapter_name(self) -> str:
18
+ return "test"
23
19
 
24
20
 
25
21
  @pytest.fixture
@@ -37,9 +33,12 @@ def base_task():
37
33
  @pytest.fixture
38
34
  def adapter(base_task):
39
35
  return MockAdapter(
40
- kiln_task=base_task,
41
- model_name="test_model",
42
- model_provider_name="test_provider",
36
+ run_config=RunConfig(
37
+ task=base_task,
38
+ model_name="test_model",
39
+ model_provider_name="test_provider",
40
+ prompt_id="simple_prompt_builder",
41
+ ),
43
42
  )
44
43
 
45
44
 
@@ -85,7 +84,12 @@ async def test_model_provider_missing_names(base_task):
85
84
  """Test error when model or provider name is missing"""
86
85
  # Test with missing model name
87
86
  adapter = MockAdapter(
88
- kiln_task=base_task, model_name="", model_provider_name="test_provider"
87
+ run_config=RunConfig(
88
+ task=base_task,
89
+ model_name="",
90
+ model_provider_name="",
91
+ prompt_id="simple_prompt_builder",
92
+ ),
89
93
  )
90
94
  with pytest.raises(
91
95
  ValueError, match="model_name and model_provider_name must be provided"
@@ -94,7 +98,12 @@ async def test_model_provider_missing_names(base_task):
94
98
 
95
99
  # Test with missing provider name
96
100
  adapter = MockAdapter(
97
- kiln_task=base_task, model_name="test_model", model_provider_name=""
101
+ run_config=RunConfig(
102
+ task=base_task,
103
+ model_name="test_model",
104
+ model_provider_name="",
105
+ prompt_id="simple_prompt_builder",
106
+ ),
98
107
  )
99
108
  with pytest.raises(
100
109
  ValueError, match="model_name and model_provider_name must be provided"
@@ -18,8 +18,8 @@ from kiln_ai.adapters.model_adapters.langchain_adapters import (
18
18
  LangchainAdapter,
19
19
  langchain_model_from_provider,
20
20
  )
21
- from kiln_ai.adapters.prompt_builders import SimpleChainOfThoughtPromptBuilder
22
21
  from kiln_ai.adapters.test_prompt_adaptors import build_test_task
22
+ from kiln_ai.datamodel.task import RunConfig
23
23
 
24
24
 
25
25
  @pytest.fixture
@@ -56,9 +56,8 @@ def test_langchain_adapter_infer_model_name(tmp_path):
56
56
 
57
57
  lca = LangchainAdapter(kiln_task=task, custom_model=custom)
58
58
 
59
- model_info = lca.adapter_info()
60
- assert model_info.model_name == "custom.langchain:llama-3.1-8b-instant"
61
- assert model_info.model_provider == "custom.langchain:ChatGroq"
59
+ assert lca.run_config.model_name == "custom.langchain:llama-3.1-8b-instant"
60
+ assert lca.run_config.model_provider_name == "custom.langchain:ChatGroq"
62
61
 
63
62
 
64
63
  def test_langchain_adapter_info(tmp_path):
@@ -66,10 +65,9 @@ def test_langchain_adapter_info(tmp_path):
66
65
 
67
66
  lca = LangchainAdapter(kiln_task=task, model_name="llama_3_1_8b", provider="ollama")
68
67
 
69
- model_info = lca.adapter_info()
70
- assert model_info.adapter_name == "kiln_langchain_adapter"
71
- assert model_info.model_name == "llama_3_1_8b"
72
- assert model_info.model_provider == "ollama"
68
+ assert lca.adapter_name() == "kiln_langchain_adapter"
69
+ assert lca.run_config.model_name == "llama_3_1_8b"
70
+ assert lca.run_config.model_provider_name == "ollama"
73
71
 
74
72
 
75
73
  async def test_langchain_adapter_with_cot(tmp_path):
@@ -81,7 +79,7 @@ async def test_langchain_adapter_with_cot(tmp_path):
81
79
  kiln_task=task,
82
80
  model_name="llama_3_1_8b",
83
81
  provider="ollama",
84
- prompt_builder=SimpleChainOfThoughtPromptBuilder(task),
82
+ prompt_id="simple_chain_of_thought_prompt_builder",
85
83
  )
86
84
 
87
85
  # Mock the base model and its invoke method
@@ -324,11 +322,6 @@ async def test_langchain_adapter_model_no_structured_output_support(tmp_path):
324
322
 
325
323
  import pytest
326
324
 
327
- from kiln_ai.adapters.ml_model_list import KilnModelProvider, ModelProviderName
328
- from kiln_ai.adapters.model_adapters.langchain_adapters import (
329
- langchain_model_from_provider,
330
- )
331
-
332
325
 
333
326
  @pytest.mark.parametrize(
334
327
  "provider_name",
@@ -5,7 +5,7 @@ import pytest
5
5
  from openai import AsyncOpenAI
6
6
 
7
7
  from kiln_ai.adapters.ml_model_list import StructuredOutputMode
8
- from kiln_ai.adapters.model_adapters.base_adapter import AdapterInfo, BasePromptBuilder
8
+ from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
9
9
  from kiln_ai.adapters.model_adapters.openai_compatible_config import (
10
10
  OpenAICompatibleConfig,
11
11
  )
@@ -37,63 +37,47 @@ def mock_task(tmp_path):
37
37
  return task
38
38
 
39
39
 
40
- @pytest.fixture
41
- def mock_prompt_builder():
42
- builder = Mock(spec=BasePromptBuilder)
43
- type(builder).prompt_builder_name = Mock(return_value="test_prompt_builder")
44
- builder.prompt_id = Mock(return_value="test_prompt_id")
45
- return builder
46
-
47
-
48
40
  @pytest.fixture
49
41
  def config():
50
42
  return OpenAICompatibleConfig(
51
43
  api_key="test_key",
52
44
  base_url="https://api.test.com",
53
45
  model_name="test-model",
54
- provider_name="test-provider",
46
+ provider_name="openrouter",
55
47
  default_headers={"X-Test": "test"},
56
48
  )
57
49
 
58
50
 
59
- def test_initialization(config, mock_task, mock_prompt_builder):
51
+ def test_initialization(config, mock_task):
60
52
  adapter = OpenAICompatibleAdapter(
61
53
  config=config,
62
54
  kiln_task=mock_task,
63
- prompt_builder=mock_prompt_builder,
64
- tags=["test-tag"],
55
+ prompt_id="simple_prompt_builder",
56
+ base_adapter_config=AdapterConfig(default_tags=["test-tag"]),
65
57
  )
66
58
 
67
59
  assert isinstance(adapter.client, AsyncOpenAI)
68
60
  assert adapter.config == config
69
- assert adapter.kiln_task == mock_task
70
- assert adapter.prompt_builder == mock_prompt_builder
71
- assert adapter.default_tags == ["test-tag"]
72
- assert adapter.model_name == config.model_name
73
- assert adapter.model_provider_name == config.provider_name
61
+ assert adapter.run_config.task == mock_task
62
+ assert adapter.run_config.prompt_id == "simple_prompt_builder"
63
+ assert adapter.base_adapter_config.default_tags == ["test-tag"]
64
+ assert adapter.run_config.model_name == config.model_name
65
+ assert adapter.run_config.model_provider_name == config.provider_name
74
66
 
75
67
 
76
- def test_adapter_info(config, mock_task, mock_prompt_builder):
77
- adapter = OpenAICompatibleAdapter(
78
- config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
79
- )
68
+ def test_adapter_info(config, mock_task):
69
+ adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
80
70
 
81
- info = adapter.adapter_info()
82
- assert isinstance(info, AdapterInfo)
83
- assert info.model_name == config.model_name
84
- assert info.model_provider == config.provider_name
85
- assert info.adapter_name == "kiln_openai_compatible_adapter"
86
- assert info.prompt_builder_name == "base_prompt_builder"
87
- assert info.prompt_id == "test_prompt_id"
71
+ assert adapter.adapter_name() == "kiln_openai_compatible_adapter"
72
+
73
+ assert adapter.run_config.model_name == config.model_name
74
+ assert adapter.run_config.model_provider_name == config.provider_name
75
+ assert adapter.run_config.prompt_id == "simple_prompt_builder"
88
76
 
89
77
 
90
78
  @pytest.mark.asyncio
91
- async def test_response_format_options_unstructured(
92
- config, mock_task, mock_prompt_builder
93
- ):
94
- adapter = OpenAICompatibleAdapter(
95
- config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
96
- )
79
+ async def test_response_format_options_unstructured(config, mock_task):
80
+ adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
97
81
 
98
82
  # Mock has_structured_output to return False
99
83
  with patch.object(adapter, "has_structured_output", return_value=False):
@@ -109,12 +93,8 @@ async def test_response_format_options_unstructured(
109
93
  ],
110
94
  )
111
95
  @pytest.mark.asyncio
112
- async def test_response_format_options_json_mode(
113
- config, mock_task, mock_prompt_builder, mode
114
- ):
115
- adapter = OpenAICompatibleAdapter(
116
- config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
117
- )
96
+ async def test_response_format_options_json_mode(config, mock_task, mode):
97
+ adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
118
98
 
119
99
  with (
120
100
  patch.object(adapter, "has_structured_output", return_value=True),
@@ -134,12 +114,8 @@ async def test_response_format_options_json_mode(
134
114
  ],
135
115
  )
136
116
  @pytest.mark.asyncio
137
- async def test_response_format_options_function_calling(
138
- config, mock_task, mock_prompt_builder, mode
139
- ):
140
- adapter = OpenAICompatibleAdapter(
141
- config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
142
- )
117
+ async def test_response_format_options_function_calling(config, mock_task, mode):
118
+ adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
143
119
 
144
120
  with (
145
121
  patch.object(adapter, "has_structured_output", return_value=True),
@@ -153,12 +129,8 @@ async def test_response_format_options_function_calling(
153
129
 
154
130
 
155
131
  @pytest.mark.asyncio
156
- async def test_response_format_options_json_instructions(
157
- config, mock_task, mock_prompt_builder
158
- ):
159
- adapter = OpenAICompatibleAdapter(
160
- config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
161
- )
132
+ async def test_response_format_options_json_instructions(config, mock_task):
133
+ adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
162
134
 
163
135
  with (
164
136
  patch.object(adapter, "has_structured_output", return_value=True),
@@ -172,12 +144,8 @@ async def test_response_format_options_json_instructions(
172
144
 
173
145
 
174
146
  @pytest.mark.asyncio
175
- async def test_response_format_options_json_schema(
176
- config, mock_task, mock_prompt_builder
177
- ):
178
- adapter = OpenAICompatibleAdapter(
179
- config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
180
- )
147
+ async def test_response_format_options_json_schema(config, mock_task):
148
+ adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
181
149
 
182
150
  with (
183
151
  patch.object(adapter, "has_structured_output", return_value=True),
@@ -198,12 +166,35 @@ async def test_response_format_options_json_schema(
198
166
  }
199
167
 
200
168
 
201
- def test_tool_call_params(config, mock_task, mock_prompt_builder):
202
- adapter = OpenAICompatibleAdapter(
203
- config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
204
- )
169
+ def test_tool_call_params_weak(config, mock_task):
170
+ adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
171
+
172
+ params = adapter.tool_call_params(strict=False)
173
+ expected_schema = mock_task.output_schema()
174
+ expected_schema["additionalProperties"] = False
175
+
176
+ assert params == {
177
+ "tools": [
178
+ {
179
+ "type": "function",
180
+ "function": {
181
+ "name": "task_response",
182
+ "parameters": expected_schema,
183
+ },
184
+ }
185
+ ],
186
+ "tool_choice": {
187
+ "type": "function",
188
+ "function": {"name": "task_response"},
189
+ },
190
+ }
191
+
192
+
193
+ def test_tool_call_params_strict(config, mock_task):
194
+ config.provider_name = "openai"
195
+ adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
205
196
 
206
- params = adapter.tool_call_params()
197
+ params = adapter.tool_call_params(strict=True)
207
198
  expected_schema = mock_task.output_schema()
208
199
  expected_schema["additionalProperties"] = False
209
200
 
@@ -3,7 +3,6 @@ from unittest.mock import patch
3
3
  import pytest
4
4
 
5
5
  from kiln_ai.adapters.model_adapters.base_adapter import (
6
- AdapterInfo,
7
6
  BaseAdapter,
8
7
  RunOutput,
9
8
  )
@@ -13,6 +12,7 @@ from kiln_ai.datamodel import (
13
12
  Project,
14
13
  Task,
15
14
  )
15
+ from kiln_ai.datamodel.task import RunConfig
16
16
  from kiln_ai.utils.config import Config
17
17
 
18
18
 
@@ -20,14 +20,8 @@ class MockAdapter(BaseAdapter):
20
20
  async def _run(self, input: dict | str) -> dict | str:
21
21
  return RunOutput(output="Test output", intermediate_outputs=None)
22
22
 
23
- def adapter_info(self) -> AdapterInfo:
24
- return AdapterInfo(
25
- adapter_name="mock_adapter",
26
- model_name="mock_model",
27
- model_provider="mock_provider",
28
- prompt_builder_name="mock_prompt_builder",
29
- prompt_id="mock_prompt_id",
30
- )
23
+ def adapter_name(self) -> str:
24
+ return "mock_adapter"
31
25
 
32
26
 
33
27
  @pytest.fixture
@@ -45,7 +39,14 @@ def test_task(tmp_path):
45
39
 
46
40
  @pytest.fixture
47
41
  def adapter(test_task):
48
- return MockAdapter(test_task, model_name="phi_3_5", model_provider_name="ollama")
42
+ return MockAdapter(
43
+ run_config=RunConfig(
44
+ task=test_task,
45
+ model_name="phi_3_5",
46
+ model_provider_name="ollama",
47
+ prompt_id="simple_chain_of_thought_prompt_builder",
48
+ ),
49
+ )
49
50
 
50
51
 
51
52
  def test_save_run_isolation(test_task, adapter):
@@ -94,13 +95,12 @@ def test_save_run_isolation(test_task, adapter):
94
95
  assert reloaded_output.source.type == DataSourceType.synthetic
95
96
  assert reloaded_output.rating is None
96
97
  assert reloaded_output.source.properties["adapter_name"] == "mock_adapter"
97
- assert reloaded_output.source.properties["model_name"] == "mock_model"
98
- assert reloaded_output.source.properties["model_provider"] == "mock_provider"
98
+ assert reloaded_output.source.properties["model_name"] == "phi_3_5"
99
+ assert reloaded_output.source.properties["model_provider"] == "ollama"
99
100
  assert (
100
- reloaded_output.source.properties["prompt_builder_name"]
101
- == "mock_prompt_builder"
101
+ reloaded_output.source.properties["prompt_id"]
102
+ == "simple_chain_of_thought_prompt_builder"
102
103
  )
103
- assert reloaded_output.source.properties["prompt_id"] == "mock_prompt_id"
104
104
  # Run again, with same input and different output. Should create a new TaskRun.
105
105
  different_run_output = RunOutput(
106
106
  output="Different output", intermediate_outputs=None
@@ -118,7 +118,7 @@ def test_save_run_isolation(test_task, adapter):
118
118
  properties={
119
119
  "model_name": "mock_model",
120
120
  "model_provider": "mock_provider",
121
- "prompt_builder_name": "mock_prompt_builder",
121
+ "prompt_id": "mock_prompt_builder",
122
122
  "adapter_name": "mock_adapter",
123
123
  },
124
124
  ),
@@ -178,6 +178,25 @@ async def test_autosave_false(test_task, adapter):
178
178
  assert run.id is None
179
179
 
180
180
 
181
+ @pytest.mark.asyncio
182
+ async def test_autosave_true_with_disabled(test_task, adapter):
183
+ with patch("kiln_ai.utils.config.Config.shared") as mock_shared:
184
+ mock_config = mock_shared.return_value
185
+ mock_config.autosave_runs = True
186
+ mock_config.user_id = "test_user"
187
+
188
+ input_data = "Test input"
189
+
190
+ adapter.base_adapter_config.allow_saving = False
191
+ run = await adapter.invoke(input_data)
192
+
193
+ # Check that no runs were saved
194
+ assert len(test_task.runs()) == 0
195
+
196
+ # Check that the run ID is not set
197
+ assert run.id is None
198
+
199
+
181
200
  @pytest.mark.asyncio
182
201
  async def test_autosave_true(test_task, adapter):
183
202
  with patch("kiln_ai.utils.config.Config.shared") as mock_shared:
@@ -202,6 +221,9 @@ async def test_autosave_true(test_task, adapter):
202
221
  assert output.output == "Test output"
203
222
  assert output.source.type == DataSourceType.synthetic
204
223
  assert output.source.properties["adapter_name"] == "mock_adapter"
205
- assert output.source.properties["model_name"] == "mock_model"
206
- assert output.source.properties["model_provider"] == "mock_provider"
207
- assert output.source.properties["prompt_builder_name"] == "mock_prompt_builder"
224
+ assert output.source.properties["model_name"] == "phi_3_5"
225
+ assert output.source.properties["model_provider"] == "ollama"
226
+ assert (
227
+ output.source.properties["prompt_id"]
228
+ == "simple_chain_of_thought_prompt_builder"
229
+ )