kiln-ai 0.8.1__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +7 -7
- kiln_ai/adapters/adapter_registry.py +77 -5
- kiln_ai/adapters/data_gen/data_gen_task.py +3 -3
- kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
- kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
- kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
- kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
- kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
- kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +469 -129
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +113 -21
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
- kiln_ai/adapters/ml_model_list.py +323 -94
- kiln_ai/adapters/model_adapters/__init__.py +18 -0
- kiln_ai/adapters/{base_adapter.py → model_adapters/base_adapter.py} +81 -37
- kiln_ai/adapters/{langchain_adapters.py → model_adapters/langchain_adapters.py} +130 -84
- kiln_ai/adapters/model_adapters/openai_compatible_config.py +11 -0
- kiln_ai/adapters/model_adapters/openai_model_adapter.py +246 -0
- kiln_ai/adapters/model_adapters/test_base_adapter.py +190 -0
- kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +103 -88
- kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +225 -0
- kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +43 -15
- kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +93 -20
- kiln_ai/adapters/parsers/__init__.py +10 -0
- kiln_ai/adapters/parsers/base_parser.py +12 -0
- kiln_ai/adapters/parsers/json_parser.py +37 -0
- kiln_ai/adapters/parsers/parser_registry.py +19 -0
- kiln_ai/adapters/parsers/r1_parser.py +69 -0
- kiln_ai/adapters/parsers/test_json_parser.py +81 -0
- kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
- kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
- kiln_ai/adapters/prompt_builders.py +126 -20
- kiln_ai/adapters/provider_tools.py +91 -36
- kiln_ai/adapters/repair/repair_task.py +17 -6
- kiln_ai/adapters/repair/test_repair_task.py +4 -4
- kiln_ai/adapters/run_output.py +8 -0
- kiln_ai/adapters/test_adapter_registry.py +177 -0
- kiln_ai/adapters/test_generate_docs.py +69 -0
- kiln_ai/adapters/test_prompt_adaptors.py +8 -4
- kiln_ai/adapters/test_prompt_builders.py +190 -29
- kiln_ai/adapters/test_provider_tools.py +268 -46
- kiln_ai/datamodel/__init__.py +193 -12
- kiln_ai/datamodel/basemodel.py +31 -11
- kiln_ai/datamodel/json_schema.py +8 -3
- kiln_ai/datamodel/model_cache.py +8 -3
- kiln_ai/datamodel/test_basemodel.py +81 -2
- kiln_ai/datamodel/test_dataset_split.py +100 -3
- kiln_ai/datamodel/test_example_models.py +25 -4
- kiln_ai/datamodel/test_model_cache.py +24 -0
- kiln_ai/datamodel/test_model_perf.py +125 -0
- kiln_ai/datamodel/test_models.py +129 -0
- kiln_ai/utils/exhaustive_error.py +6 -0
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/METADATA +9 -7
- kiln_ai-0.11.1.dist-info/RECORD +76 -0
- kiln_ai-0.8.1.dist-info/RECORD +0 -58
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/WHEEL +0 -0
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -7,21 +7,31 @@ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
|
|
|
7
7
|
from langchain_fireworks import ChatFireworks
|
|
8
8
|
from langchain_groq import ChatGroq
|
|
9
9
|
from langchain_ollama import ChatOllama
|
|
10
|
-
from langchain_openai import ChatOpenAI
|
|
11
10
|
|
|
12
|
-
from kiln_ai.adapters.
|
|
11
|
+
from kiln_ai.adapters.ml_model_list import (
|
|
12
|
+
KilnModelProvider,
|
|
13
|
+
ModelProviderName,
|
|
14
|
+
StructuredOutputMode,
|
|
15
|
+
)
|
|
16
|
+
from kiln_ai.adapters.model_adapters.base_adapter import COT_FINAL_ANSWER_PROMPT
|
|
17
|
+
from kiln_ai.adapters.model_adapters.langchain_adapters import (
|
|
13
18
|
LangchainAdapter,
|
|
14
|
-
get_structured_output_options,
|
|
15
19
|
langchain_model_from_provider,
|
|
16
20
|
)
|
|
17
|
-
from kiln_ai.adapters.ml_model_list import KilnModelProvider, ModelProviderName
|
|
18
21
|
from kiln_ai.adapters.prompt_builders import SimpleChainOfThoughtPromptBuilder
|
|
19
22
|
from kiln_ai.adapters.test_prompt_adaptors import build_test_task
|
|
20
23
|
|
|
21
24
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
+
@pytest.fixture
|
|
26
|
+
def mock_adapter(tmp_path):
|
|
27
|
+
return LangchainAdapter(
|
|
28
|
+
kiln_task=build_test_task(tmp_path),
|
|
29
|
+
model_name="llama_3_1_8b",
|
|
30
|
+
provider="ollama",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_langchain_adapter_munge_response(mock_adapter):
|
|
25
35
|
# Mistral Large tool calling format is a bit different
|
|
26
36
|
response = {
|
|
27
37
|
"name": "task_response",
|
|
@@ -30,12 +40,12 @@ def test_langchain_adapter_munge_response(tmp_path):
|
|
|
30
40
|
"punchline": "Because she wanted to be a moo-sician!",
|
|
31
41
|
},
|
|
32
42
|
}
|
|
33
|
-
munged =
|
|
43
|
+
munged = mock_adapter._munge_response(response)
|
|
34
44
|
assert munged["setup"] == "Why did the cow join a band?"
|
|
35
45
|
assert munged["punchline"] == "Because she wanted to be a moo-sician!"
|
|
36
46
|
|
|
37
47
|
# non mistral format should continue to work
|
|
38
|
-
munged =
|
|
48
|
+
munged = mock_adapter._munge_response(response["arguments"])
|
|
39
49
|
assert munged["setup"] == "Why did the cow join a band?"
|
|
40
50
|
assert munged["punchline"] == "Because she wanted to be a moo-sician!"
|
|
41
51
|
|
|
@@ -89,9 +99,7 @@ async def test_langchain_adapter_with_cot(tmp_path):
|
|
|
89
99
|
|
|
90
100
|
# Patch both the langchain_model_from function and self.model()
|
|
91
101
|
with (
|
|
92
|
-
patch(
|
|
93
|
-
"kiln_ai.adapters.langchain_adapters.langchain_model_from", mock_model_from
|
|
94
|
-
),
|
|
102
|
+
patch.object(LangchainAdapter, "langchain_model_from", mock_model_from),
|
|
95
103
|
patch.object(LangchainAdapter, "model", return_value=mock_model_instance),
|
|
96
104
|
):
|
|
97
105
|
response = await lca._run("test input")
|
|
@@ -121,8 +129,8 @@ async def test_langchain_adapter_with_cot(tmp_path):
|
|
|
121
129
|
invoke_args = mock_model_instance.ainvoke.call_args[0][0]
|
|
122
130
|
assert isinstance(invoke_args[3], AIMessage)
|
|
123
131
|
assert "Chain of thought reasoning..." in invoke_args[3].content
|
|
124
|
-
assert isinstance(invoke_args[4],
|
|
125
|
-
assert
|
|
132
|
+
assert isinstance(invoke_args[4], HumanMessage)
|
|
133
|
+
assert COT_FINAL_ANSWER_PROMPT in invoke_args[4].content
|
|
126
134
|
|
|
127
135
|
assert (
|
|
128
136
|
response.intermediate_outputs["chain_of_thought"]
|
|
@@ -131,46 +139,28 @@ async def test_langchain_adapter_with_cot(tmp_path):
|
|
|
131
139
|
assert response.output == {"count": 1}
|
|
132
140
|
|
|
133
141
|
|
|
134
|
-
|
|
142
|
+
@pytest.mark.parametrize(
|
|
143
|
+
"structured_output_mode,expected_method",
|
|
144
|
+
[
|
|
145
|
+
(StructuredOutputMode.function_calling, "function_calling"),
|
|
146
|
+
(StructuredOutputMode.json_mode, "json_mode"),
|
|
147
|
+
(StructuredOutputMode.json_schema, "json_schema"),
|
|
148
|
+
(StructuredOutputMode.json_instruction_and_object, "json_mode"),
|
|
149
|
+
(StructuredOutputMode.default, None),
|
|
150
|
+
],
|
|
151
|
+
)
|
|
152
|
+
async def test_get_structured_output_options(
|
|
153
|
+
mock_adapter, structured_output_mode, expected_method
|
|
154
|
+
):
|
|
135
155
|
# Mock the provider response
|
|
136
156
|
mock_provider = MagicMock()
|
|
137
|
-
mock_provider.
|
|
138
|
-
"langchain": {
|
|
139
|
-
"with_structured_output_options": {
|
|
140
|
-
"force_json_response": True,
|
|
141
|
-
"max_retries": 3,
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
}
|
|
157
|
+
mock_provider.structured_output_mode = structured_output_mode
|
|
145
158
|
|
|
146
|
-
#
|
|
147
|
-
|
|
148
|
-
"kiln_ai.adapters.langchain_adapters.kiln_model_provider_from",
|
|
149
|
-
AsyncMock(return_value=mock_provider),
|
|
150
|
-
):
|
|
151
|
-
options = await get_structured_output_options("model_name", "provider")
|
|
152
|
-
assert options == {"force_json_response": True, "max_retries": 3}
|
|
159
|
+
# Mock adapter.model_provider()
|
|
160
|
+
mock_adapter.model_provider = MagicMock(return_value=mock_provider)
|
|
153
161
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
"kiln_ai.adapters.langchain_adapters.kiln_model_provider_from",
|
|
157
|
-
AsyncMock(return_value=None),
|
|
158
|
-
):
|
|
159
|
-
options = await get_structured_output_options("model_name", "provider")
|
|
160
|
-
assert options == {}
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
@pytest.mark.asyncio
|
|
164
|
-
async def test_langchain_model_from_provider_openai():
|
|
165
|
-
provider = KilnModelProvider(
|
|
166
|
-
name=ModelProviderName.openai, provider_options={"model": "gpt-4"}
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
with patch("kiln_ai.adapters.langchain_adapters.Config.shared") as mock_config:
|
|
170
|
-
mock_config.return_value.open_ai_api_key = "test_key"
|
|
171
|
-
model = await langchain_model_from_provider(provider, "gpt-4")
|
|
172
|
-
assert isinstance(model, ChatOpenAI)
|
|
173
|
-
assert model.model_name == "gpt-4"
|
|
162
|
+
options = mock_adapter.get_structured_output_options("model_name", "provider")
|
|
163
|
+
assert options.get("method") == expected_method
|
|
174
164
|
|
|
175
165
|
|
|
176
166
|
@pytest.mark.asyncio
|
|
@@ -179,7 +169,9 @@ async def test_langchain_model_from_provider_groq():
|
|
|
179
169
|
name=ModelProviderName.groq, provider_options={"model": "mixtral-8x7b"}
|
|
180
170
|
)
|
|
181
171
|
|
|
182
|
-
with patch(
|
|
172
|
+
with patch(
|
|
173
|
+
"kiln_ai.adapters.model_adapters.langchain_adapters.Config.shared"
|
|
174
|
+
) as mock_config:
|
|
183
175
|
mock_config.return_value.groq_api_key = "test_key"
|
|
184
176
|
model = await langchain_model_from_provider(provider, "mixtral-8x7b")
|
|
185
177
|
assert isinstance(model, ChatGroq)
|
|
@@ -193,7 +185,9 @@ async def test_langchain_model_from_provider_bedrock():
|
|
|
193
185
|
provider_options={"model": "anthropic.claude-v2", "region_name": "us-east-1"},
|
|
194
186
|
)
|
|
195
187
|
|
|
196
|
-
with patch(
|
|
188
|
+
with patch(
|
|
189
|
+
"kiln_ai.adapters.model_adapters.langchain_adapters.Config.shared"
|
|
190
|
+
) as mock_config:
|
|
197
191
|
mock_config.return_value.bedrock_access_key = "test_access"
|
|
198
192
|
mock_config.return_value.bedrock_secret_key = "test_secret"
|
|
199
193
|
model = await langchain_model_from_provider(provider, "anthropic.claude-v2")
|
|
@@ -208,7 +202,9 @@ async def test_langchain_model_from_provider_fireworks():
|
|
|
208
202
|
name=ModelProviderName.fireworks_ai, provider_options={"model": "mixtral-8x7b"}
|
|
209
203
|
)
|
|
210
204
|
|
|
211
|
-
with patch(
|
|
205
|
+
with patch(
|
|
206
|
+
"kiln_ai.adapters.model_adapters.langchain_adapters.Config.shared"
|
|
207
|
+
) as mock_config:
|
|
212
208
|
mock_config.return_value.fireworks_api_key = "test_key"
|
|
213
209
|
model = await langchain_model_from_provider(provider, "mixtral-8x7b")
|
|
214
210
|
assert isinstance(model, ChatFireworks)
|
|
@@ -224,15 +220,15 @@ async def test_langchain_model_from_provider_ollama():
|
|
|
224
220
|
mock_connection = MagicMock()
|
|
225
221
|
with (
|
|
226
222
|
patch(
|
|
227
|
-
"kiln_ai.adapters.langchain_adapters.get_ollama_connection",
|
|
223
|
+
"kiln_ai.adapters.model_adapters.langchain_adapters.get_ollama_connection",
|
|
228
224
|
return_value=AsyncMock(return_value=mock_connection),
|
|
229
225
|
),
|
|
230
226
|
patch(
|
|
231
|
-
"kiln_ai.adapters.langchain_adapters.ollama_model_installed",
|
|
227
|
+
"kiln_ai.adapters.model_adapters.langchain_adapters.ollama_model_installed",
|
|
232
228
|
return_value=True,
|
|
233
229
|
),
|
|
234
230
|
patch(
|
|
235
|
-
"kiln_ai.adapters.langchain_adapters.ollama_base_url",
|
|
231
|
+
"kiln_ai.adapters.model_adapters.langchain_adapters.ollama_base_url",
|
|
236
232
|
return_value="http://localhost:11434",
|
|
237
233
|
),
|
|
238
234
|
):
|
|
@@ -283,33 +279,27 @@ async def test_langchain_adapter_model_structured_output(tmp_path):
|
|
|
283
279
|
mock_model.with_structured_output = MagicMock(return_value="structured_model")
|
|
284
280
|
|
|
285
281
|
adapter = LangchainAdapter(
|
|
286
|
-
kiln_task=task, model_name="test_model", provider="
|
|
282
|
+
kiln_task=task, model_name="test_model", provider="ollama"
|
|
287
283
|
)
|
|
284
|
+
adapter.get_structured_output_options = MagicMock(
|
|
285
|
+
return_value={"option1": "value1"}
|
|
286
|
+
)
|
|
287
|
+
adapter.langchain_model_from = AsyncMock(return_value=mock_model)
|
|
288
288
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
"
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
{
|
|
304
|
-
"type": "object",
|
|
305
|
-
"properties": {"count": {"type": "integer"}},
|
|
306
|
-
"title": "task_response",
|
|
307
|
-
"description": "A response from the task",
|
|
308
|
-
},
|
|
309
|
-
include_raw=True,
|
|
310
|
-
option1="value1",
|
|
311
|
-
)
|
|
312
|
-
assert model == "structured_model"
|
|
289
|
+
model = await adapter.model()
|
|
290
|
+
|
|
291
|
+
# Verify the model was configured with structured output
|
|
292
|
+
mock_model.with_structured_output.assert_called_once_with(
|
|
293
|
+
{
|
|
294
|
+
"type": "object",
|
|
295
|
+
"properties": {"count": {"type": "integer"}},
|
|
296
|
+
"title": "task_response",
|
|
297
|
+
"description": "A response from the task",
|
|
298
|
+
},
|
|
299
|
+
include_raw=True,
|
|
300
|
+
option1="value1",
|
|
301
|
+
)
|
|
302
|
+
assert model == "structured_model"
|
|
313
303
|
|
|
314
304
|
|
|
315
305
|
@pytest.mark.asyncio
|
|
@@ -324,12 +314,37 @@ async def test_langchain_adapter_model_no_structured_output_support(tmp_path):
|
|
|
324
314
|
del mock_model.with_structured_output
|
|
325
315
|
|
|
326
316
|
adapter = LangchainAdapter(
|
|
327
|
-
kiln_task=task, model_name="test_model", provider="
|
|
317
|
+
kiln_task=task, model_name="test_model", provider="ollama"
|
|
328
318
|
)
|
|
319
|
+
adapter.langchain_model_from = AsyncMock(return_value=mock_model)
|
|
329
320
|
|
|
330
|
-
with
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
321
|
+
with pytest.raises(ValueError, match="does not support structured output"):
|
|
322
|
+
await adapter.model()
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
import pytest
|
|
326
|
+
|
|
327
|
+
from kiln_ai.adapters.ml_model_list import KilnModelProvider, ModelProviderName
|
|
328
|
+
from kiln_ai.adapters.model_adapters.langchain_adapters import (
|
|
329
|
+
langchain_model_from_provider,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
@pytest.mark.parametrize(
|
|
334
|
+
"provider_name",
|
|
335
|
+
[
|
|
336
|
+
(ModelProviderName.openai),
|
|
337
|
+
(ModelProviderName.openai_compatible),
|
|
338
|
+
(ModelProviderName.openrouter),
|
|
339
|
+
],
|
|
340
|
+
)
|
|
341
|
+
@pytest.mark.asyncio
|
|
342
|
+
async def test_langchain_model_from_provider_unsupported_providers(provider_name):
|
|
343
|
+
# Arrange
|
|
344
|
+
provider = KilnModelProvider(
|
|
345
|
+
name=provider_name, provider_options={}, structured_output_mode="default"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# Assert unsupported providers raise an error
|
|
349
|
+
with pytest.raises(ValueError):
|
|
350
|
+
await langchain_model_from_provider(provider, "test-model")
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from unittest.mock import Mock, patch
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
from openai import AsyncOpenAI
|
|
6
|
+
|
|
7
|
+
from kiln_ai.adapters.ml_model_list import StructuredOutputMode
|
|
8
|
+
from kiln_ai.adapters.model_adapters.base_adapter import AdapterInfo, BasePromptBuilder
|
|
9
|
+
from kiln_ai.adapters.model_adapters.openai_compatible_config import (
|
|
10
|
+
OpenAICompatibleConfig,
|
|
11
|
+
)
|
|
12
|
+
from kiln_ai.adapters.model_adapters.openai_model_adapter import OpenAICompatibleAdapter
|
|
13
|
+
from kiln_ai.datamodel import Project, Task
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def mock_task(tmp_path):
|
|
18
|
+
# Create a project first since Task requires a parent
|
|
19
|
+
project_path = tmp_path / "test_project" / "project.kiln"
|
|
20
|
+
project_path.parent.mkdir()
|
|
21
|
+
|
|
22
|
+
project = Project(name="Test Project", path=str(project_path))
|
|
23
|
+
project.save_to_file()
|
|
24
|
+
|
|
25
|
+
schema = {
|
|
26
|
+
"type": "object",
|
|
27
|
+
"properties": {"test": {"type": "string"}},
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
task = Task(
|
|
31
|
+
name="Test Task",
|
|
32
|
+
instruction="Test instruction",
|
|
33
|
+
parent=project,
|
|
34
|
+
output_json_schema=json.dumps(schema),
|
|
35
|
+
)
|
|
36
|
+
task.save_to_file()
|
|
37
|
+
return task
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.fixture
|
|
41
|
+
def mock_prompt_builder():
|
|
42
|
+
builder = Mock(spec=BasePromptBuilder)
|
|
43
|
+
type(builder).prompt_builder_name = Mock(return_value="test_prompt_builder")
|
|
44
|
+
builder.prompt_id = Mock(return_value="test_prompt_id")
|
|
45
|
+
return builder
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@pytest.fixture
|
|
49
|
+
def config():
|
|
50
|
+
return OpenAICompatibleConfig(
|
|
51
|
+
api_key="test_key",
|
|
52
|
+
base_url="https://api.test.com",
|
|
53
|
+
model_name="test-model",
|
|
54
|
+
provider_name="test-provider",
|
|
55
|
+
default_headers={"X-Test": "test"},
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_initialization(config, mock_task, mock_prompt_builder):
|
|
60
|
+
adapter = OpenAICompatibleAdapter(
|
|
61
|
+
config=config,
|
|
62
|
+
kiln_task=mock_task,
|
|
63
|
+
prompt_builder=mock_prompt_builder,
|
|
64
|
+
tags=["test-tag"],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
assert isinstance(adapter.client, AsyncOpenAI)
|
|
68
|
+
assert adapter.config == config
|
|
69
|
+
assert adapter.kiln_task == mock_task
|
|
70
|
+
assert adapter.prompt_builder == mock_prompt_builder
|
|
71
|
+
assert adapter.default_tags == ["test-tag"]
|
|
72
|
+
assert adapter.model_name == config.model_name
|
|
73
|
+
assert adapter.model_provider_name == config.provider_name
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_adapter_info(config, mock_task, mock_prompt_builder):
|
|
77
|
+
adapter = OpenAICompatibleAdapter(
|
|
78
|
+
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
info = adapter.adapter_info()
|
|
82
|
+
assert isinstance(info, AdapterInfo)
|
|
83
|
+
assert info.model_name == config.model_name
|
|
84
|
+
assert info.model_provider == config.provider_name
|
|
85
|
+
assert info.adapter_name == "kiln_openai_compatible_adapter"
|
|
86
|
+
assert info.prompt_builder_name == "base_prompt_builder"
|
|
87
|
+
assert info.prompt_id == "test_prompt_id"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest.mark.asyncio
|
|
91
|
+
async def test_response_format_options_unstructured(
|
|
92
|
+
config, mock_task, mock_prompt_builder
|
|
93
|
+
):
|
|
94
|
+
adapter = OpenAICompatibleAdapter(
|
|
95
|
+
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Mock has_structured_output to return False
|
|
99
|
+
with patch.object(adapter, "has_structured_output", return_value=False):
|
|
100
|
+
options = await adapter.response_format_options()
|
|
101
|
+
assert options == {}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@pytest.mark.parametrize(
|
|
105
|
+
"mode",
|
|
106
|
+
[
|
|
107
|
+
StructuredOutputMode.json_mode,
|
|
108
|
+
StructuredOutputMode.json_instruction_and_object,
|
|
109
|
+
],
|
|
110
|
+
)
|
|
111
|
+
@pytest.mark.asyncio
|
|
112
|
+
async def test_response_format_options_json_mode(
|
|
113
|
+
config, mock_task, mock_prompt_builder, mode
|
|
114
|
+
):
|
|
115
|
+
adapter = OpenAICompatibleAdapter(
|
|
116
|
+
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
with (
|
|
120
|
+
patch.object(adapter, "has_structured_output", return_value=True),
|
|
121
|
+
patch.object(adapter, "model_provider") as mock_provider,
|
|
122
|
+
):
|
|
123
|
+
mock_provider.return_value.structured_output_mode = mode
|
|
124
|
+
|
|
125
|
+
options = await adapter.response_format_options()
|
|
126
|
+
assert options == {"response_format": {"type": "json_object"}}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@pytest.mark.parametrize(
|
|
130
|
+
"mode",
|
|
131
|
+
[
|
|
132
|
+
StructuredOutputMode.default,
|
|
133
|
+
StructuredOutputMode.function_calling,
|
|
134
|
+
],
|
|
135
|
+
)
|
|
136
|
+
@pytest.mark.asyncio
|
|
137
|
+
async def test_response_format_options_function_calling(
|
|
138
|
+
config, mock_task, mock_prompt_builder, mode
|
|
139
|
+
):
|
|
140
|
+
adapter = OpenAICompatibleAdapter(
|
|
141
|
+
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
with (
|
|
145
|
+
patch.object(adapter, "has_structured_output", return_value=True),
|
|
146
|
+
patch.object(adapter, "model_provider") as mock_provider,
|
|
147
|
+
):
|
|
148
|
+
mock_provider.return_value.structured_output_mode = mode
|
|
149
|
+
|
|
150
|
+
options = await adapter.response_format_options()
|
|
151
|
+
assert "tools" in options
|
|
152
|
+
# full tool structure validated below
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@pytest.mark.asyncio
|
|
156
|
+
async def test_response_format_options_json_instructions(
|
|
157
|
+
config, mock_task, mock_prompt_builder
|
|
158
|
+
):
|
|
159
|
+
adapter = OpenAICompatibleAdapter(
|
|
160
|
+
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
with (
|
|
164
|
+
patch.object(adapter, "has_structured_output", return_value=True),
|
|
165
|
+
patch.object(adapter, "model_provider") as mock_provider,
|
|
166
|
+
):
|
|
167
|
+
mock_provider.return_value.structured_output_mode = (
|
|
168
|
+
StructuredOutputMode.json_instructions
|
|
169
|
+
)
|
|
170
|
+
options = await adapter.response_format_options()
|
|
171
|
+
assert options == {}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@pytest.mark.asyncio
|
|
175
|
+
async def test_response_format_options_json_schema(
|
|
176
|
+
config, mock_task, mock_prompt_builder
|
|
177
|
+
):
|
|
178
|
+
adapter = OpenAICompatibleAdapter(
|
|
179
|
+
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
with (
|
|
183
|
+
patch.object(adapter, "has_structured_output", return_value=True),
|
|
184
|
+
patch.object(adapter, "model_provider") as mock_provider,
|
|
185
|
+
):
|
|
186
|
+
mock_provider.return_value.structured_output_mode = (
|
|
187
|
+
StructuredOutputMode.json_schema
|
|
188
|
+
)
|
|
189
|
+
options = await adapter.response_format_options()
|
|
190
|
+
assert options == {
|
|
191
|
+
"response_format": {
|
|
192
|
+
"type": "json_schema",
|
|
193
|
+
"json_schema": {
|
|
194
|
+
"name": "task_response",
|
|
195
|
+
"schema": mock_task.output_schema(),
|
|
196
|
+
},
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def test_tool_call_params(config, mock_task, mock_prompt_builder):
|
|
202
|
+
adapter = OpenAICompatibleAdapter(
|
|
203
|
+
config=config, kiln_task=mock_task, prompt_builder=mock_prompt_builder
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
params = adapter.tool_call_params()
|
|
207
|
+
expected_schema = mock_task.output_schema()
|
|
208
|
+
expected_schema["additionalProperties"] = False
|
|
209
|
+
|
|
210
|
+
assert params == {
|
|
211
|
+
"tools": [
|
|
212
|
+
{
|
|
213
|
+
"type": "function",
|
|
214
|
+
"function": {
|
|
215
|
+
"name": "task_response",
|
|
216
|
+
"parameters": expected_schema,
|
|
217
|
+
"strict": True,
|
|
218
|
+
},
|
|
219
|
+
}
|
|
220
|
+
],
|
|
221
|
+
"tool_choice": {
|
|
222
|
+
"type": "function",
|
|
223
|
+
"function": {"name": "task_response"},
|
|
224
|
+
},
|
|
225
|
+
}
|
kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py}
RENAMED
|
@@ -2,7 +2,11 @@ from unittest.mock import patch
|
|
|
2
2
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
|
-
from kiln_ai.adapters.base_adapter import
|
|
5
|
+
from kiln_ai.adapters.model_adapters.base_adapter import (
|
|
6
|
+
AdapterInfo,
|
|
7
|
+
BaseAdapter,
|
|
8
|
+
RunOutput,
|
|
9
|
+
)
|
|
6
10
|
from kiln_ai.datamodel import (
|
|
7
11
|
DataSource,
|
|
8
12
|
DataSourceType,
|
|
@@ -22,6 +26,7 @@ class MockAdapter(BaseAdapter):
|
|
|
22
26
|
model_name="mock_model",
|
|
23
27
|
model_provider="mock_provider",
|
|
24
28
|
prompt_builder_name="mock_prompt_builder",
|
|
29
|
+
prompt_id="mock_prompt_id",
|
|
25
30
|
)
|
|
26
31
|
|
|
27
32
|
|
|
@@ -38,8 +43,12 @@ def test_task(tmp_path):
|
|
|
38
43
|
return task
|
|
39
44
|
|
|
40
45
|
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
@pytest.fixture
|
|
47
|
+
def adapter(test_task):
|
|
48
|
+
return MockAdapter(test_task, model_name="phi_3_5", model_provider_name="ollama")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_save_run_isolation(test_task, adapter):
|
|
43
52
|
input_data = "Test input"
|
|
44
53
|
output_data = "Test output"
|
|
45
54
|
run_output = RunOutput(
|
|
@@ -91,7 +100,7 @@ def test_save_run_isolation(test_task):
|
|
|
91
100
|
reloaded_output.source.properties["prompt_builder_name"]
|
|
92
101
|
== "mock_prompt_builder"
|
|
93
102
|
)
|
|
94
|
-
|
|
103
|
+
assert reloaded_output.source.properties["prompt_id"] == "mock_prompt_id"
|
|
95
104
|
# Run again, with same input and different output. Should create a new TaskRun.
|
|
96
105
|
different_run_output = RunOutput(
|
|
97
106
|
output="Different output", intermediate_outputs=None
|
|
@@ -101,13 +110,6 @@ def test_save_run_isolation(test_task):
|
|
|
101
110
|
assert len(test_task.runs()) == 2
|
|
102
111
|
assert "Different output" in set(run.output.output for run in test_task.runs())
|
|
103
112
|
|
|
104
|
-
# run again with same input and same output. Should not create a new TaskRun.
|
|
105
|
-
task_output = adapter.generate_run(input_data, None, run_output)
|
|
106
|
-
task_output.save_to_file()
|
|
107
|
-
assert len(test_task.runs()) == 2
|
|
108
|
-
assert "Different output" in set(run.output.output for run in test_task.runs())
|
|
109
|
-
assert output_data in set(run.output.output for run in test_task.runs())
|
|
110
|
-
|
|
111
113
|
# run again with input of different type. Should create a new TaskRun and TaskOutput.
|
|
112
114
|
task_output = adapter.generate_run(
|
|
113
115
|
input_data,
|
|
@@ -130,14 +132,41 @@ def test_save_run_isolation(test_task):
|
|
|
130
132
|
assert output_data in set(run.output.output for run in test_task.runs())
|
|
131
133
|
|
|
132
134
|
|
|
135
|
+
def test_generate_run_non_ascii(test_task, adapter):
|
|
136
|
+
input_data = {"key": "input with non-ascii character: 你好"}
|
|
137
|
+
output_data = {"key": "output with non-ascii character: 你好"}
|
|
138
|
+
run_output = RunOutput(
|
|
139
|
+
output=output_data,
|
|
140
|
+
intermediate_outputs=None,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
task_run = adapter.generate_run(
|
|
144
|
+
input=input_data, input_source=None, run_output=run_output
|
|
145
|
+
)
|
|
146
|
+
task_run.save_to_file()
|
|
147
|
+
|
|
148
|
+
# as these values are saved as strings, they should properly represent the non-ascii characters
|
|
149
|
+
assert task_run.input == '{"key": "input with non-ascii character: 你好"}'
|
|
150
|
+
assert task_run.output.output == '{"key": "output with non-ascii character: 你好"}'
|
|
151
|
+
|
|
152
|
+
# check that the stringified unicode strings can be read back from the file
|
|
153
|
+
reloaded_task = Task.load_from_file(test_task.path)
|
|
154
|
+
reloaded_runs = reloaded_task.runs()
|
|
155
|
+
assert len(reloaded_runs) == 1
|
|
156
|
+
reloaded_run = reloaded_runs[0]
|
|
157
|
+
assert reloaded_run.input == '{"key": "input with non-ascii character: 你好"}'
|
|
158
|
+
assert (
|
|
159
|
+
reloaded_run.output.output == '{"key": "output with non-ascii character: 你好"}'
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
133
163
|
@pytest.mark.asyncio
|
|
134
|
-
async def test_autosave_false(test_task):
|
|
164
|
+
async def test_autosave_false(test_task, adapter):
|
|
135
165
|
with patch("kiln_ai.utils.config.Config.shared") as mock_shared:
|
|
136
166
|
mock_config = mock_shared.return_value
|
|
137
167
|
mock_config.autosave_runs = False
|
|
138
168
|
mock_config.user_id = "test_user"
|
|
139
169
|
|
|
140
|
-
adapter = MockAdapter(test_task)
|
|
141
170
|
input_data = "Test input"
|
|
142
171
|
|
|
143
172
|
run = await adapter.invoke(input_data)
|
|
@@ -150,13 +179,12 @@ async def test_autosave_false(test_task):
|
|
|
150
179
|
|
|
151
180
|
|
|
152
181
|
@pytest.mark.asyncio
|
|
153
|
-
async def test_autosave_true(test_task):
|
|
182
|
+
async def test_autosave_true(test_task, adapter):
|
|
154
183
|
with patch("kiln_ai.utils.config.Config.shared") as mock_shared:
|
|
155
184
|
mock_config = mock_shared.return_value
|
|
156
185
|
mock_config.autosave_runs = True
|
|
157
186
|
mock_config.user_id = "test_user"
|
|
158
187
|
|
|
159
|
-
adapter = MockAdapter(test_task)
|
|
160
188
|
input_data = "Test input"
|
|
161
189
|
|
|
162
190
|
run = await adapter.invoke(input_data)
|