kiln-ai 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/adapter_registry.py +12 -13
- kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
- kiln_ai/adapters/eval/base_eval.py +164 -0
- kiln_ai/adapters/eval/eval_runner.py +267 -0
- kiln_ai/adapters/eval/g_eval.py +367 -0
- kiln_ai/adapters/eval/registry.py +16 -0
- kiln_ai/adapters/eval/test_base_eval.py +324 -0
- kiln_ai/adapters/eval/test_eval_runner.py +640 -0
- kiln_ai/adapters/eval/test_g_eval.py +497 -0
- kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
- kiln_ai/adapters/ml_model_list.py +141 -29
- kiln_ai/adapters/model_adapters/base_adapter.py +50 -35
- kiln_ai/adapters/model_adapters/langchain_adapters.py +27 -20
- kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -1
- kiln_ai/adapters/model_adapters/openai_model_adapter.py +93 -50
- kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
- kiln_ai/adapters/model_adapters/test_langchain_adapter.py +7 -14
- kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +55 -64
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
- kiln_ai/adapters/model_adapters/test_structured_output.py +36 -30
- kiln_ai/adapters/ollama_tools.py +0 -1
- kiln_ai/adapters/prompt_builders.py +80 -42
- kiln_ai/adapters/repair/repair_task.py +9 -21
- kiln_ai/adapters/repair/test_repair_task.py +3 -3
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +10 -10
- kiln_ai/adapters/test_generate_docs.py +6 -6
- kiln_ai/adapters/test_ollama_tools.py +0 -1
- kiln_ai/adapters/test_prompt_adaptors.py +17 -14
- kiln_ai/adapters/test_prompt_builders.py +91 -31
- kiln_ai/datamodel/__init__.py +50 -952
- kiln_ai/datamodel/datamodel_enums.py +58 -0
- kiln_ai/datamodel/dataset_filters.py +114 -0
- kiln_ai/datamodel/dataset_split.py +170 -0
- kiln_ai/datamodel/eval.py +298 -0
- kiln_ai/datamodel/finetune.py +105 -0
- kiln_ai/datamodel/json_schema.py +6 -0
- kiln_ai/datamodel/project.py +23 -0
- kiln_ai/datamodel/prompt.py +37 -0
- kiln_ai/datamodel/prompt_id.py +83 -0
- kiln_ai/datamodel/strict_mode.py +24 -0
- kiln_ai/datamodel/task.py +181 -0
- kiln_ai/datamodel/task_output.py +321 -0
- kiln_ai/datamodel/task_run.py +164 -0
- kiln_ai/datamodel/test_basemodel.py +10 -11
- kiln_ai/datamodel/test_dataset_filters.py +71 -0
- kiln_ai/datamodel/test_dataset_split.py +32 -8
- kiln_ai/datamodel/test_datasource.py +3 -2
- kiln_ai/datamodel/test_eval_model.py +635 -0
- kiln_ai/datamodel/test_example_models.py +9 -13
- kiln_ai/datamodel/test_json_schema.py +23 -0
- kiln_ai/datamodel/test_models.py +2 -2
- kiln_ai/datamodel/test_prompt_id.py +129 -0
- kiln_ai/datamodel/test_task.py +159 -0
- kiln_ai/utils/config.py +6 -1
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/METADATA +37 -1
- kiln_ai-0.12.0.dist-info/RECORD +100 -0
- kiln_ai-0.11.1.dist-info/RECORD +0 -76
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -9,18 +9,23 @@ from openai.types.chat import (
|
|
|
9
9
|
)
|
|
10
10
|
|
|
11
11
|
import kiln_ai.datamodel as datamodel
|
|
12
|
-
from kiln_ai.adapters.ml_model_list import
|
|
12
|
+
from kiln_ai.adapters.ml_model_list import (
|
|
13
|
+
KilnModelProvider,
|
|
14
|
+
ModelProviderName,
|
|
15
|
+
StructuredOutputMode,
|
|
16
|
+
)
|
|
13
17
|
from kiln_ai.adapters.model_adapters.base_adapter import (
|
|
14
18
|
COT_FINAL_ANSWER_PROMPT,
|
|
15
|
-
|
|
19
|
+
AdapterConfig,
|
|
16
20
|
BaseAdapter,
|
|
17
|
-
BasePromptBuilder,
|
|
18
21
|
RunOutput,
|
|
19
22
|
)
|
|
20
23
|
from kiln_ai.adapters.model_adapters.openai_compatible_config import (
|
|
21
24
|
OpenAICompatibleConfig,
|
|
22
25
|
)
|
|
23
26
|
from kiln_ai.adapters.parsers.json_parser import parse_json_string
|
|
27
|
+
from kiln_ai.datamodel import PromptGenerators, PromptId
|
|
28
|
+
from kiln_ai.datamodel.task import RunConfig
|
|
24
29
|
from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
|
|
25
30
|
|
|
26
31
|
|
|
@@ -29,8 +34,8 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
29
34
|
self,
|
|
30
35
|
config: OpenAICompatibleConfig,
|
|
31
36
|
kiln_task: datamodel.Task,
|
|
32
|
-
|
|
33
|
-
|
|
37
|
+
prompt_id: PromptId | None = None,
|
|
38
|
+
base_adapter_config: AdapterConfig | None = None,
|
|
34
39
|
):
|
|
35
40
|
self.config = config
|
|
36
41
|
self.client = AsyncOpenAI(
|
|
@@ -39,12 +44,16 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
39
44
|
default_headers=config.default_headers,
|
|
40
45
|
)
|
|
41
46
|
|
|
42
|
-
|
|
43
|
-
kiln_task,
|
|
47
|
+
run_config = RunConfig(
|
|
48
|
+
task=kiln_task,
|
|
44
49
|
model_name=config.model_name,
|
|
45
50
|
model_provider_name=config.provider_name,
|
|
46
|
-
|
|
47
|
-
|
|
51
|
+
prompt_id=prompt_id or PromptGenerators.SIMPLE,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
super().__init__(
|
|
55
|
+
run_config=run_config,
|
|
56
|
+
config=base_adapter_config,
|
|
48
57
|
)
|
|
49
58
|
|
|
50
59
|
async def _run(self, input: Dict | str) -> RunOutput:
|
|
@@ -93,21 +102,8 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
93
102
|
]
|
|
94
103
|
)
|
|
95
104
|
|
|
96
|
-
#
|
|
97
|
-
extra_body =
|
|
98
|
-
require_or_reasoning = (
|
|
99
|
-
self.config.openrouter_style_reasoning and provider.reasoning_capable
|
|
100
|
-
)
|
|
101
|
-
if require_or_reasoning:
|
|
102
|
-
extra_body["include_reasoning"] = True
|
|
103
|
-
# Filter to providers that support the reasoning parameter
|
|
104
|
-
extra_body["provider"] = {
|
|
105
|
-
"require_parameters": True,
|
|
106
|
-
# Ugly to have these here, but big range of quality of R1 providers
|
|
107
|
-
"order": ["Fireworks", "Together"],
|
|
108
|
-
# fp8 quants are awful
|
|
109
|
-
"ignore": ["DeepInfra"],
|
|
110
|
-
}
|
|
105
|
+
# Build custom request params based on model provider
|
|
106
|
+
extra_body = self.build_extra_body(provider)
|
|
111
107
|
|
|
112
108
|
# Main completion call
|
|
113
109
|
response_format_options = await self.response_format_options()
|
|
@@ -115,6 +111,8 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
115
111
|
model=provider.provider_options["model"],
|
|
116
112
|
messages=messages,
|
|
117
113
|
extra_body=extra_body,
|
|
114
|
+
logprobs=self.base_adapter_config.top_logprobs is not None,
|
|
115
|
+
top_logprobs=self.base_adapter_config.top_logprobs,
|
|
118
116
|
**response_format_options,
|
|
119
117
|
)
|
|
120
118
|
|
|
@@ -133,9 +131,14 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
133
131
|
)
|
|
134
132
|
|
|
135
133
|
message = response.choices[0].message
|
|
134
|
+
logprobs = response.choices[0].logprobs
|
|
135
|
+
|
|
136
|
+
# Check logprobs worked, if requested
|
|
137
|
+
if self.base_adapter_config.top_logprobs is not None and logprobs is None:
|
|
138
|
+
raise RuntimeError("Logprobs were required, but no logprobs were returned.")
|
|
136
139
|
|
|
137
|
-
# Save reasoning if it exists (OpenRouter specific
|
|
138
|
-
if
|
|
140
|
+
# Save reasoning if it exists (OpenRouter specific api response field)
|
|
141
|
+
if provider.require_openrouter_reasoning:
|
|
139
142
|
if (
|
|
140
143
|
hasattr(message, "reasoning") and message.reasoning # pyright: ignore
|
|
141
144
|
):
|
|
@@ -164,26 +167,19 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
164
167
|
if not isinstance(response_content, str):
|
|
165
168
|
raise RuntimeError(f"response is not a string: {response_content}")
|
|
166
169
|
|
|
170
|
+
# Parse to dict if we have structured output
|
|
171
|
+
output: Dict | str = response_content
|
|
167
172
|
if self.has_structured_output():
|
|
168
|
-
|
|
169
|
-
return RunOutput(
|
|
170
|
-
output=structured_response,
|
|
171
|
-
intermediate_outputs=intermediate_outputs,
|
|
172
|
-
)
|
|
173
|
+
output = parse_json_string(response_content)
|
|
173
174
|
|
|
174
175
|
return RunOutput(
|
|
175
|
-
output=
|
|
176
|
+
output=output,
|
|
176
177
|
intermediate_outputs=intermediate_outputs,
|
|
178
|
+
output_logprobs=logprobs,
|
|
177
179
|
)
|
|
178
180
|
|
|
179
|
-
def
|
|
180
|
-
return
|
|
181
|
-
model_name=self.model_name,
|
|
182
|
-
model_provider=self.model_provider_name,
|
|
183
|
-
adapter_name="kiln_openai_compatible_adapter",
|
|
184
|
-
prompt_builder_name=self.prompt_builder.__class__.prompt_builder_name(),
|
|
185
|
-
prompt_id=self.prompt_builder.prompt_id(),
|
|
186
|
-
)
|
|
181
|
+
def adapter_name(self) -> str:
|
|
182
|
+
return "kiln_openai_compatible_adapter"
|
|
187
183
|
|
|
188
184
|
async def response_format_options(self) -> dict[str, Any]:
|
|
189
185
|
# Unstructured if task isn't structured
|
|
@@ -195,7 +191,7 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
195
191
|
case StructuredOutputMode.json_mode:
|
|
196
192
|
return {"response_format": {"type": "json_object"}}
|
|
197
193
|
case StructuredOutputMode.json_schema:
|
|
198
|
-
output_schema = self.
|
|
194
|
+
output_schema = self.task().output_schema()
|
|
199
195
|
return {
|
|
200
196
|
"response_format": {
|
|
201
197
|
"type": "json_schema",
|
|
@@ -205,8 +201,10 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
205
201
|
},
|
|
206
202
|
}
|
|
207
203
|
}
|
|
204
|
+
case StructuredOutputMode.function_calling_weak:
|
|
205
|
+
return self.tool_call_params(strict=False)
|
|
208
206
|
case StructuredOutputMode.function_calling:
|
|
209
|
-
return self.tool_call_params()
|
|
207
|
+
return self.tool_call_params(strict=True)
|
|
210
208
|
case StructuredOutputMode.json_instructions:
|
|
211
209
|
# JSON done via instructions in prompt, not the API response format. Do not ask for json_object (see option below).
|
|
212
210
|
return {}
|
|
@@ -215,28 +213,32 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
215
213
|
return {"response_format": {"type": "json_object"}}
|
|
216
214
|
case StructuredOutputMode.default:
|
|
217
215
|
# Default to function calling -- it's older than the other modes. Higher compatibility.
|
|
218
|
-
return self.tool_call_params()
|
|
216
|
+
return self.tool_call_params(strict=True)
|
|
219
217
|
case _:
|
|
220
218
|
raise_exhaustive_enum_error(provider.structured_output_mode)
|
|
221
219
|
|
|
222
|
-
def tool_call_params(self) -> dict[str, Any]:
|
|
220
|
+
def tool_call_params(self, strict: bool) -> dict[str, Any]:
|
|
223
221
|
# Add additional_properties: false to the schema (OpenAI requires this for some models)
|
|
224
|
-
output_schema = self.
|
|
222
|
+
output_schema = self.task().output_schema()
|
|
225
223
|
if not isinstance(output_schema, dict):
|
|
226
224
|
raise ValueError(
|
|
227
225
|
"Invalid output schema for this task. Can not use tool calls."
|
|
228
226
|
)
|
|
229
227
|
output_schema["additionalProperties"] = False
|
|
230
228
|
|
|
229
|
+
function_params = {
|
|
230
|
+
"name": "task_response",
|
|
231
|
+
"parameters": output_schema,
|
|
232
|
+
}
|
|
233
|
+
# This should be on, but we allow setting function_calling_weak for APIs that don't support it.
|
|
234
|
+
if strict:
|
|
235
|
+
function_params["strict"] = True
|
|
236
|
+
|
|
231
237
|
return {
|
|
232
238
|
"tools": [
|
|
233
239
|
{
|
|
234
240
|
"type": "function",
|
|
235
|
-
"function":
|
|
236
|
-
"name": "task_response",
|
|
237
|
-
"parameters": output_schema,
|
|
238
|
-
"strict": True,
|
|
239
|
-
},
|
|
241
|
+
"function": function_params,
|
|
240
242
|
}
|
|
241
243
|
],
|
|
242
244
|
"tool_choice": {
|
|
@@ -244,3 +246,44 @@ class OpenAICompatibleAdapter(BaseAdapter):
|
|
|
244
246
|
"function": {"name": "task_response"},
|
|
245
247
|
},
|
|
246
248
|
}
|
|
249
|
+
|
|
250
|
+
def build_extra_body(self, provider: KilnModelProvider) -> dict[str, Any]:
|
|
251
|
+
# TODO P1: Don't love having this logic here. But it's a usability improvement
|
|
252
|
+
# so better to keep it than exclude it. Should figure out how I want to isolate
|
|
253
|
+
# this sort of logic so it's config driven and can be overridden
|
|
254
|
+
|
|
255
|
+
extra_body = {}
|
|
256
|
+
provider_options = {}
|
|
257
|
+
|
|
258
|
+
if provider.require_openrouter_reasoning:
|
|
259
|
+
# https://openrouter.ai/docs/use-cases/reasoning-tokens
|
|
260
|
+
extra_body["reasoning"] = {
|
|
261
|
+
"exclude": False,
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if provider.r1_openrouter_options:
|
|
265
|
+
# Require providers that support the reasoning parameter
|
|
266
|
+
provider_options["require_parameters"] = True
|
|
267
|
+
# Prefer R1 providers with reasonable perf/quants
|
|
268
|
+
provider_options["order"] = ["Fireworks", "Together"]
|
|
269
|
+
# R1 providers with unreasonable quants
|
|
270
|
+
provider_options["ignore"] = ["DeepInfra"]
|
|
271
|
+
|
|
272
|
+
# Only set of this request is to get logprobs.
|
|
273
|
+
if (
|
|
274
|
+
provider.logprobs_openrouter_options
|
|
275
|
+
and self.base_adapter_config.top_logprobs is not None
|
|
276
|
+
):
|
|
277
|
+
# Don't let OpenRouter choose a provider that doesn't support logprobs.
|
|
278
|
+
provider_options["require_parameters"] = True
|
|
279
|
+
# DeepInfra silently fails to return logprobs consistently.
|
|
280
|
+
provider_options["ignore"] = ["DeepInfra"]
|
|
281
|
+
|
|
282
|
+
if provider.openrouter_skip_required_parameters:
|
|
283
|
+
# Oddball case, R1 14/8/1.5B fail with this param, even though they support thinking params.
|
|
284
|
+
provider_options["require_parameters"] = False
|
|
285
|
+
|
|
286
|
+
if len(provider_options) > 0:
|
|
287
|
+
extra_body["provider"] = provider_options
|
|
288
|
+
|
|
289
|
+
return extra_body
|
|
@@ -3,8 +3,9 @@ from unittest.mock import MagicMock, patch
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
5
|
from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
|
|
6
|
-
from kiln_ai.adapters.model_adapters.base_adapter import
|
|
6
|
+
from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
|
|
7
7
|
from kiln_ai.datamodel import Task
|
|
8
|
+
from kiln_ai.datamodel.task import RunConfig
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class MockAdapter(BaseAdapter):
|
|
@@ -13,13 +14,8 @@ class MockAdapter(BaseAdapter):
|
|
|
13
14
|
async def _run(self, input):
|
|
14
15
|
return None
|
|
15
16
|
|
|
16
|
-
def
|
|
17
|
-
return
|
|
18
|
-
adapter_name="test",
|
|
19
|
-
model_name=self.model_name,
|
|
20
|
-
model_provider=self.model_provider_name,
|
|
21
|
-
prompt_builder_name="test",
|
|
22
|
-
)
|
|
17
|
+
def adapter_name(self) -> str:
|
|
18
|
+
return "test"
|
|
23
19
|
|
|
24
20
|
|
|
25
21
|
@pytest.fixture
|
|
@@ -37,9 +33,12 @@ def base_task():
|
|
|
37
33
|
@pytest.fixture
|
|
38
34
|
def adapter(base_task):
|
|
39
35
|
return MockAdapter(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
run_config=RunConfig(
|
|
37
|
+
task=base_task,
|
|
38
|
+
model_name="test_model",
|
|
39
|
+
model_provider_name="test_provider",
|
|
40
|
+
prompt_id="simple_prompt_builder",
|
|
41
|
+
),
|
|
43
42
|
)
|
|
44
43
|
|
|
45
44
|
|
|
@@ -85,7 +84,12 @@ async def test_model_provider_missing_names(base_task):
|
|
|
85
84
|
"""Test error when model or provider name is missing"""
|
|
86
85
|
# Test with missing model name
|
|
87
86
|
adapter = MockAdapter(
|
|
88
|
-
|
|
87
|
+
run_config=RunConfig(
|
|
88
|
+
task=base_task,
|
|
89
|
+
model_name="",
|
|
90
|
+
model_provider_name="",
|
|
91
|
+
prompt_id="simple_prompt_builder",
|
|
92
|
+
),
|
|
89
93
|
)
|
|
90
94
|
with pytest.raises(
|
|
91
95
|
ValueError, match="model_name and model_provider_name must be provided"
|
|
@@ -94,7 +98,12 @@ async def test_model_provider_missing_names(base_task):
|
|
|
94
98
|
|
|
95
99
|
# Test with missing provider name
|
|
96
100
|
adapter = MockAdapter(
|
|
97
|
-
|
|
101
|
+
run_config=RunConfig(
|
|
102
|
+
task=base_task,
|
|
103
|
+
model_name="test_model",
|
|
104
|
+
model_provider_name="",
|
|
105
|
+
prompt_id="simple_prompt_builder",
|
|
106
|
+
),
|
|
98
107
|
)
|
|
99
108
|
with pytest.raises(
|
|
100
109
|
ValueError, match="model_name and model_provider_name must be provided"
|
|
@@ -18,8 +18,8 @@ from kiln_ai.adapters.model_adapters.langchain_adapters import (
|
|
|
18
18
|
LangchainAdapter,
|
|
19
19
|
langchain_model_from_provider,
|
|
20
20
|
)
|
|
21
|
-
from kiln_ai.adapters.prompt_builders import SimpleChainOfThoughtPromptBuilder
|
|
22
21
|
from kiln_ai.adapters.test_prompt_adaptors import build_test_task
|
|
22
|
+
from kiln_ai.datamodel.task import RunConfig
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@pytest.fixture
|
|
@@ -56,9 +56,8 @@ def test_langchain_adapter_infer_model_name(tmp_path):
|
|
|
56
56
|
|
|
57
57
|
lca = LangchainAdapter(kiln_task=task, custom_model=custom)
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
assert
|
|
61
|
-
assert model_info.model_provider == "custom.langchain:ChatGroq"
|
|
59
|
+
assert lca.run_config.model_name == "custom.langchain:llama-3.1-8b-instant"
|
|
60
|
+
assert lca.run_config.model_provider_name == "custom.langchain:ChatGroq"
|
|
62
61
|
|
|
63
62
|
|
|
64
63
|
def test_langchain_adapter_info(tmp_path):
|
|
@@ -66,10 +65,9 @@ def test_langchain_adapter_info(tmp_path):
|
|
|
66
65
|
|
|
67
66
|
lca = LangchainAdapter(kiln_task=task, model_name="llama_3_1_8b", provider="ollama")
|
|
68
67
|
|
|
69
|
-
|
|
70
|
-
assert
|
|
71
|
-
assert
|
|
72
|
-
assert model_info.model_provider == "ollama"
|
|
68
|
+
assert lca.adapter_name() == "kiln_langchain_adapter"
|
|
69
|
+
assert lca.run_config.model_name == "llama_3_1_8b"
|
|
70
|
+
assert lca.run_config.model_provider_name == "ollama"
|
|
73
71
|
|
|
74
72
|
|
|
75
73
|
async def test_langchain_adapter_with_cot(tmp_path):
|
|
@@ -81,7 +79,7 @@ async def test_langchain_adapter_with_cot(tmp_path):
|
|
|
81
79
|
kiln_task=task,
|
|
82
80
|
model_name="llama_3_1_8b",
|
|
83
81
|
provider="ollama",
|
|
84
|
-
|
|
82
|
+
prompt_id="simple_chain_of_thought_prompt_builder",
|
|
85
83
|
)
|
|
86
84
|
|
|
87
85
|
# Mock the base model and its invoke method
|
|
@@ -324,11 +322,6 @@ async def test_langchain_adapter_model_no_structured_output_support(tmp_path):
|
|
|
324
322
|
|
|
325
323
|
import pytest
|
|
326
324
|
|
|
327
|
-
from kiln_ai.adapters.ml_model_list import KilnModelProvider, ModelProviderName
|
|
328
|
-
from kiln_ai.adapters.model_adapters.langchain_adapters import (
|
|
329
|
-
langchain_model_from_provider,
|
|
330
|
-
)
|
|
331
|
-
|
|
332
325
|
|
|
333
326
|
@pytest.mark.parametrize(
|
|
334
327
|
"provider_name",
|
|
@@ -5,7 +5,7 @@ import pytest
|
|
|
5
5
|
from openai import AsyncOpenAI
|
|
6
6
|
|
|
7
7
|
from kiln_ai.adapters.ml_model_list import StructuredOutputMode
|
|
8
|
-
from kiln_ai.adapters.model_adapters.base_adapter import
|
|
8
|
+
from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
|
|
9
9
|
from kiln_ai.adapters.model_adapters.openai_compatible_config import (
|
|
10
10
|
OpenAICompatibleConfig,
|
|
11
11
|
)
|
|
@@ -37,63 +37,47 @@ def mock_task(tmp_path):
|
|
|
37
37
|
return task
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
@pytest.fixture
|
|
41
|
-
def mock_prompt_builder():
|
|
42
|
-
builder = Mock(spec=BasePromptBuilder)
|
|
43
|
-
type(builder).prompt_builder_name = Mock(return_value="test_prompt_builder")
|
|
44
|
-
builder.prompt_id = Mock(return_value="test_prompt_id")
|
|
45
|
-
return builder
|
|
46
|
-
|
|
47
|
-
|
|
48
40
|
@pytest.fixture
|
|
49
41
|
def config():
|
|
50
42
|
return OpenAICompatibleConfig(
|
|
51
43
|
api_key="test_key",
|
|
52
44
|
base_url="https://api.test.com",
|
|
53
45
|
model_name="test-model",
|
|
54
|
-
provider_name="
|
|
46
|
+
provider_name="openrouter",
|
|
55
47
|
default_headers={"X-Test": "test"},
|
|
56
48
|
)
|
|
57
49
|
|
|
58
50
|
|
|
59
|
-
def test_initialization(config, mock_task
|
|
51
|
+
def test_initialization(config, mock_task):
|
|
60
52
|
adapter = OpenAICompatibleAdapter(
|
|
61
53
|
config=config,
|
|
62
54
|
kiln_task=mock_task,
|
|
63
|
-
|
|
64
|
-
|
|
55
|
+
prompt_id="simple_prompt_builder",
|
|
56
|
+
base_adapter_config=AdapterConfig(default_tags=["test-tag"]),
|
|
65
57
|
)
|
|
66
58
|
|
|
67
59
|
assert isinstance(adapter.client, AsyncOpenAI)
|
|
68
60
|
assert adapter.config == config
|
|
69
|
-
assert adapter.
|
|
70
|
-
assert adapter.
|
|
71
|
-
assert adapter.default_tags == ["test-tag"]
|
|
72
|
-
assert adapter.model_name == config.model_name
|
|
73
|
-
assert adapter.model_provider_name == config.provider_name
|
|
61
|
+
assert adapter.run_config.task == mock_task
|
|
62
|
+
assert adapter.run_config.prompt_id == "simple_prompt_builder"
|
|
63
|
+
assert adapter.base_adapter_config.default_tags == ["test-tag"]
|
|
64
|
+
assert adapter.run_config.model_name == config.model_name
|
|
65
|
+
assert adapter.run_config.model_provider_name == config.provider_name
|
|
74
66
|
|
|
75
67
|
|
|
76
|
-
def test_adapter_info(config, mock_task
|
|
77
|
-
adapter = OpenAICompatibleAdapter(
|
|
78
|
-
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
79
|
-
)
|
|
68
|
+
def test_adapter_info(config, mock_task):
|
|
69
|
+
adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
|
|
80
70
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
assert
|
|
84
|
-
assert
|
|
85
|
-
assert
|
|
86
|
-
assert info.prompt_builder_name == "base_prompt_builder"
|
|
87
|
-
assert info.prompt_id == "test_prompt_id"
|
|
71
|
+
assert adapter.adapter_name() == "kiln_openai_compatible_adapter"
|
|
72
|
+
|
|
73
|
+
assert adapter.run_config.model_name == config.model_name
|
|
74
|
+
assert adapter.run_config.model_provider_name == config.provider_name
|
|
75
|
+
assert adapter.run_config.prompt_id == "simple_prompt_builder"
|
|
88
76
|
|
|
89
77
|
|
|
90
78
|
@pytest.mark.asyncio
|
|
91
|
-
async def test_response_format_options_unstructured(
|
|
92
|
-
config, mock_task
|
|
93
|
-
):
|
|
94
|
-
adapter = OpenAICompatibleAdapter(
|
|
95
|
-
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
96
|
-
)
|
|
79
|
+
async def test_response_format_options_unstructured(config, mock_task):
|
|
80
|
+
adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
|
|
97
81
|
|
|
98
82
|
# Mock has_structured_output to return False
|
|
99
83
|
with patch.object(adapter, "has_structured_output", return_value=False):
|
|
@@ -109,12 +93,8 @@ async def test_response_format_options_unstructured(
|
|
|
109
93
|
],
|
|
110
94
|
)
|
|
111
95
|
@pytest.mark.asyncio
|
|
112
|
-
async def test_response_format_options_json_mode(
|
|
113
|
-
config, mock_task
|
|
114
|
-
):
|
|
115
|
-
adapter = OpenAICompatibleAdapter(
|
|
116
|
-
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
117
|
-
)
|
|
96
|
+
async def test_response_format_options_json_mode(config, mock_task, mode):
|
|
97
|
+
adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
|
|
118
98
|
|
|
119
99
|
with (
|
|
120
100
|
patch.object(adapter, "has_structured_output", return_value=True),
|
|
@@ -134,12 +114,8 @@ async def test_response_format_options_json_mode(
|
|
|
134
114
|
],
|
|
135
115
|
)
|
|
136
116
|
@pytest.mark.asyncio
|
|
137
|
-
async def test_response_format_options_function_calling(
|
|
138
|
-
config, mock_task
|
|
139
|
-
):
|
|
140
|
-
adapter = OpenAICompatibleAdapter(
|
|
141
|
-
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
142
|
-
)
|
|
117
|
+
async def test_response_format_options_function_calling(config, mock_task, mode):
|
|
118
|
+
adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
|
|
143
119
|
|
|
144
120
|
with (
|
|
145
121
|
patch.object(adapter, "has_structured_output", return_value=True),
|
|
@@ -153,12 +129,8 @@ async def test_response_format_options_function_calling(
|
|
|
153
129
|
|
|
154
130
|
|
|
155
131
|
@pytest.mark.asyncio
|
|
156
|
-
async def test_response_format_options_json_instructions(
|
|
157
|
-
config, mock_task
|
|
158
|
-
):
|
|
159
|
-
adapter = OpenAICompatibleAdapter(
|
|
160
|
-
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
161
|
-
)
|
|
132
|
+
async def test_response_format_options_json_instructions(config, mock_task):
|
|
133
|
+
adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
|
|
162
134
|
|
|
163
135
|
with (
|
|
164
136
|
patch.object(adapter, "has_structured_output", return_value=True),
|
|
@@ -172,12 +144,8 @@ async def test_response_format_options_json_instructions(
|
|
|
172
144
|
|
|
173
145
|
|
|
174
146
|
@pytest.mark.asyncio
|
|
175
|
-
async def test_response_format_options_json_schema(
|
|
176
|
-
config, mock_task
|
|
177
|
-
):
|
|
178
|
-
adapter = OpenAICompatibleAdapter(
|
|
179
|
-
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
180
|
-
)
|
|
147
|
+
async def test_response_format_options_json_schema(config, mock_task):
|
|
148
|
+
adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
|
|
181
149
|
|
|
182
150
|
with (
|
|
183
151
|
patch.object(adapter, "has_structured_output", return_value=True),
|
|
@@ -198,12 +166,35 @@ async def test_response_format_options_json_schema(
|
|
|
198
166
|
}
|
|
199
167
|
|
|
200
168
|
|
|
201
|
-
def
|
|
202
|
-
adapter = OpenAICompatibleAdapter(
|
|
203
|
-
|
|
204
|
-
)
|
|
169
|
+
def test_tool_call_params_weak(config, mock_task):
|
|
170
|
+
adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
|
|
171
|
+
|
|
172
|
+
params = adapter.tool_call_params(strict=False)
|
|
173
|
+
expected_schema = mock_task.output_schema()
|
|
174
|
+
expected_schema["additionalProperties"] = False
|
|
175
|
+
|
|
176
|
+
assert params == {
|
|
177
|
+
"tools": [
|
|
178
|
+
{
|
|
179
|
+
"type": "function",
|
|
180
|
+
"function": {
|
|
181
|
+
"name": "task_response",
|
|
182
|
+
"parameters": expected_schema,
|
|
183
|
+
},
|
|
184
|
+
}
|
|
185
|
+
],
|
|
186
|
+
"tool_choice": {
|
|
187
|
+
"type": "function",
|
|
188
|
+
"function": {"name": "task_response"},
|
|
189
|
+
},
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def test_tool_call_params_strict(config, mock_task):
|
|
194
|
+
config.provider_name = "openai"
|
|
195
|
+
adapter = OpenAICompatibleAdapter(config=config, kiln_task=mock_task)
|
|
205
196
|
|
|
206
|
-
params = adapter.tool_call_params()
|
|
197
|
+
params = adapter.tool_call_params(strict=True)
|
|
207
198
|
expected_schema = mock_task.output_schema()
|
|
208
199
|
expected_schema["additionalProperties"] = False
|
|
209
200
|
|
|
@@ -3,7 +3,6 @@ from unittest.mock import patch
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
5
|
from kiln_ai.adapters.model_adapters.base_adapter import (
|
|
6
|
-
AdapterInfo,
|
|
7
6
|
BaseAdapter,
|
|
8
7
|
RunOutput,
|
|
9
8
|
)
|
|
@@ -13,6 +12,7 @@ from kiln_ai.datamodel import (
|
|
|
13
12
|
Project,
|
|
14
13
|
Task,
|
|
15
14
|
)
|
|
15
|
+
from kiln_ai.datamodel.task import RunConfig
|
|
16
16
|
from kiln_ai.utils.config import Config
|
|
17
17
|
|
|
18
18
|
|
|
@@ -20,14 +20,8 @@ class MockAdapter(BaseAdapter):
|
|
|
20
20
|
async def _run(self, input: dict | str) -> dict | str:
|
|
21
21
|
return RunOutput(output="Test output", intermediate_outputs=None)
|
|
22
22
|
|
|
23
|
-
def
|
|
24
|
-
return
|
|
25
|
-
adapter_name="mock_adapter",
|
|
26
|
-
model_name="mock_model",
|
|
27
|
-
model_provider="mock_provider",
|
|
28
|
-
prompt_builder_name="mock_prompt_builder",
|
|
29
|
-
prompt_id="mock_prompt_id",
|
|
30
|
-
)
|
|
23
|
+
def adapter_name(self) -> str:
|
|
24
|
+
return "mock_adapter"
|
|
31
25
|
|
|
32
26
|
|
|
33
27
|
@pytest.fixture
|
|
@@ -45,7 +39,14 @@ def test_task(tmp_path):
|
|
|
45
39
|
|
|
46
40
|
@pytest.fixture
|
|
47
41
|
def adapter(test_task):
|
|
48
|
-
return MockAdapter(
|
|
42
|
+
return MockAdapter(
|
|
43
|
+
run_config=RunConfig(
|
|
44
|
+
task=test_task,
|
|
45
|
+
model_name="phi_3_5",
|
|
46
|
+
model_provider_name="ollama",
|
|
47
|
+
prompt_id="simple_chain_of_thought_prompt_builder",
|
|
48
|
+
),
|
|
49
|
+
)
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
def test_save_run_isolation(test_task, adapter):
|
|
@@ -94,13 +95,12 @@ def test_save_run_isolation(test_task, adapter):
|
|
|
94
95
|
assert reloaded_output.source.type == DataSourceType.synthetic
|
|
95
96
|
assert reloaded_output.rating is None
|
|
96
97
|
assert reloaded_output.source.properties["adapter_name"] == "mock_adapter"
|
|
97
|
-
assert reloaded_output.source.properties["model_name"] == "
|
|
98
|
-
assert reloaded_output.source.properties["model_provider"] == "
|
|
98
|
+
assert reloaded_output.source.properties["model_name"] == "phi_3_5"
|
|
99
|
+
assert reloaded_output.source.properties["model_provider"] == "ollama"
|
|
99
100
|
assert (
|
|
100
|
-
reloaded_output.source.properties["
|
|
101
|
-
== "
|
|
101
|
+
reloaded_output.source.properties["prompt_id"]
|
|
102
|
+
== "simple_chain_of_thought_prompt_builder"
|
|
102
103
|
)
|
|
103
|
-
assert reloaded_output.source.properties["prompt_id"] == "mock_prompt_id"
|
|
104
104
|
# Run again, with same input and different output. Should create a new TaskRun.
|
|
105
105
|
different_run_output = RunOutput(
|
|
106
106
|
output="Different output", intermediate_outputs=None
|
|
@@ -118,7 +118,7 @@ def test_save_run_isolation(test_task, adapter):
|
|
|
118
118
|
properties={
|
|
119
119
|
"model_name": "mock_model",
|
|
120
120
|
"model_provider": "mock_provider",
|
|
121
|
-
"
|
|
121
|
+
"prompt_id": "mock_prompt_builder",
|
|
122
122
|
"adapter_name": "mock_adapter",
|
|
123
123
|
},
|
|
124
124
|
),
|
|
@@ -178,6 +178,25 @@ async def test_autosave_false(test_task, adapter):
|
|
|
178
178
|
assert run.id is None
|
|
179
179
|
|
|
180
180
|
|
|
181
|
+
@pytest.mark.asyncio
|
|
182
|
+
async def test_autosave_true_with_disabled(test_task, adapter):
|
|
183
|
+
with patch("kiln_ai.utils.config.Config.shared") as mock_shared:
|
|
184
|
+
mock_config = mock_shared.return_value
|
|
185
|
+
mock_config.autosave_runs = True
|
|
186
|
+
mock_config.user_id = "test_user"
|
|
187
|
+
|
|
188
|
+
input_data = "Test input"
|
|
189
|
+
|
|
190
|
+
adapter.base_adapter_config.allow_saving = False
|
|
191
|
+
run = await adapter.invoke(input_data)
|
|
192
|
+
|
|
193
|
+
# Check that no runs were saved
|
|
194
|
+
assert len(test_task.runs()) == 0
|
|
195
|
+
|
|
196
|
+
# Check that the run ID is not set
|
|
197
|
+
assert run.id is None
|
|
198
|
+
|
|
199
|
+
|
|
181
200
|
@pytest.mark.asyncio
|
|
182
201
|
async def test_autosave_true(test_task, adapter):
|
|
183
202
|
with patch("kiln_ai.utils.config.Config.shared") as mock_shared:
|
|
@@ -202,6 +221,9 @@ async def test_autosave_true(test_task, adapter):
|
|
|
202
221
|
assert output.output == "Test output"
|
|
203
222
|
assert output.source.type == DataSourceType.synthetic
|
|
204
223
|
assert output.source.properties["adapter_name"] == "mock_adapter"
|
|
205
|
-
assert output.source.properties["model_name"] == "
|
|
206
|
-
assert output.source.properties["model_provider"] == "
|
|
207
|
-
assert
|
|
224
|
+
assert output.source.properties["model_name"] == "phi_3_5"
|
|
225
|
+
assert output.source.properties["model_provider"] == "ollama"
|
|
226
|
+
assert (
|
|
227
|
+
output.source.properties["prompt_id"]
|
|
228
|
+
== "simple_chain_of_thought_prompt_builder"
|
|
229
|
+
)
|