kiln-ai 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kiln_ai/adapters/__init__.py +2 -0
- kiln_ai/adapters/adapter_registry.py +22 -44
- kiln_ai/adapters/chat/__init__.py +8 -0
- kiln_ai/adapters/chat/chat_formatter.py +233 -0
- kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
- kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
- kiln_ai/adapters/data_gen/data_gen_task.py +49 -36
- kiln_ai/adapters/data_gen/test_data_gen_task.py +330 -40
- kiln_ai/adapters/eval/base_eval.py +7 -6
- kiln_ai/adapters/eval/eval_runner.py +9 -2
- kiln_ai/adapters/eval/g_eval.py +40 -17
- kiln_ai/adapters/eval/test_base_eval.py +174 -17
- kiln_ai/adapters/eval/test_eval_runner.py +3 -0
- kiln_ai/adapters/eval/test_g_eval.py +116 -5
- kiln_ai/adapters/fine_tune/base_finetune.py +3 -8
- kiln_ai/adapters/fine_tune/dataset_formatter.py +135 -273
- kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
- kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +6 -11
- kiln_ai/adapters/fine_tune/together_finetune.py +13 -2
- kiln_ai/adapters/ml_model_list.py +370 -84
- kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
- kiln_ai/adapters/model_adapters/litellm_adapter.py +88 -97
- kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
- kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -61
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +104 -21
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
- kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
- kiln_ai/adapters/parsers/parser_registry.py +0 -2
- kiln_ai/adapters/parsers/r1_parser.py +0 -1
- kiln_ai/adapters/prompt_builders.py +0 -16
- kiln_ai/adapters/provider_tools.py +27 -9
- kiln_ai/adapters/remote_config.py +66 -0
- kiln_ai/adapters/repair/repair_task.py +1 -6
- kiln_ai/adapters/repair/test_repair_task.py +24 -3
- kiln_ai/adapters/test_adapter_registry.py +88 -28
- kiln_ai/adapters/test_ml_model_list.py +176 -0
- kiln_ai/adapters/test_prompt_adaptors.py +17 -7
- kiln_ai/adapters/test_prompt_builders.py +3 -16
- kiln_ai/adapters/test_provider_tools.py +69 -20
- kiln_ai/adapters/test_remote_config.py +100 -0
- kiln_ai/datamodel/__init__.py +0 -2
- kiln_ai/datamodel/datamodel_enums.py +38 -13
- kiln_ai/datamodel/eval.py +32 -0
- kiln_ai/datamodel/finetune.py +12 -8
- kiln_ai/datamodel/task.py +68 -7
- kiln_ai/datamodel/task_output.py +0 -2
- kiln_ai/datamodel/task_run.py +0 -2
- kiln_ai/datamodel/test_basemodel.py +2 -1
- kiln_ai/datamodel/test_dataset_split.py +0 -8
- kiln_ai/datamodel/test_eval_model.py +146 -4
- kiln_ai/datamodel/test_models.py +33 -10
- kiln_ai/datamodel/test_task.py +168 -2
- kiln_ai/utils/config.py +3 -2
- kiln_ai/utils/dataset_import.py +1 -1
- kiln_ai/utils/logging.py +166 -0
- kiln_ai/utils/test_config.py +23 -0
- kiln_ai/utils/test_dataset_import.py +30 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/METADATA +2 -2
- kiln_ai-0.18.0.dist-info/RECORD +115 -0
- kiln_ai-0.16.0.dist-info/RECORD +0 -108
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from kiln_ai.adapters.ml_model_list import (
|
|
4
|
+
ModelName,
|
|
5
|
+
default_structured_output_mode_for_model_provider,
|
|
6
|
+
get_model_by_name,
|
|
7
|
+
)
|
|
8
|
+
from kiln_ai.datamodel.datamodel_enums import ModelProviderName, StructuredOutputMode
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestDefaultStructuredOutputModeForModelProvider:
|
|
12
|
+
"""Test cases for default_structured_output_mode_for_model_provider function"""
|
|
13
|
+
|
|
14
|
+
def test_valid_model_and_provider_returns_provider_mode(self):
|
|
15
|
+
"""Test that valid model and provider returns the provider's structured output mode"""
|
|
16
|
+
# GPT 4.1 has OpenAI provider with json_schema mode
|
|
17
|
+
result = default_structured_output_mode_for_model_provider(
|
|
18
|
+
model_name="gpt_4_1",
|
|
19
|
+
provider=ModelProviderName.openai,
|
|
20
|
+
)
|
|
21
|
+
assert result == StructuredOutputMode.json_schema
|
|
22
|
+
|
|
23
|
+
def test_valid_model_different_provider_modes(self):
|
|
24
|
+
"""Test that different providers for the same model return different modes"""
|
|
25
|
+
# Claude 3.5 Sonnet has different modes for different providers
|
|
26
|
+
# Anthropic provider uses function_calling
|
|
27
|
+
result_anthropic = default_structured_output_mode_for_model_provider(
|
|
28
|
+
model_name="claude_3_5_sonnet",
|
|
29
|
+
provider=ModelProviderName.anthropic,
|
|
30
|
+
)
|
|
31
|
+
assert result_anthropic == StructuredOutputMode.function_calling
|
|
32
|
+
|
|
33
|
+
# Vertex provider uses function_calling_weak
|
|
34
|
+
result_vertex = default_structured_output_mode_for_model_provider(
|
|
35
|
+
model_name="claude_3_5_sonnet",
|
|
36
|
+
provider=ModelProviderName.vertex,
|
|
37
|
+
)
|
|
38
|
+
assert result_vertex == StructuredOutputMode.function_calling_weak
|
|
39
|
+
|
|
40
|
+
def test_invalid_model_name_returns_default(self):
|
|
41
|
+
"""Test that invalid model name returns the default value"""
|
|
42
|
+
result = default_structured_output_mode_for_model_provider(
|
|
43
|
+
model_name="invalid_model_name",
|
|
44
|
+
provider=ModelProviderName.openai,
|
|
45
|
+
)
|
|
46
|
+
assert result == StructuredOutputMode.default
|
|
47
|
+
|
|
48
|
+
def test_invalid_model_name_returns_custom_default(self):
|
|
49
|
+
"""Test that invalid model name returns custom default when specified"""
|
|
50
|
+
custom_default = StructuredOutputMode.json_instructions
|
|
51
|
+
result = default_structured_output_mode_for_model_provider(
|
|
52
|
+
model_name="invalid_model_name",
|
|
53
|
+
provider=ModelProviderName.openai,
|
|
54
|
+
default=custom_default,
|
|
55
|
+
)
|
|
56
|
+
assert result == custom_default
|
|
57
|
+
|
|
58
|
+
def test_valid_model_invalid_provider_returns_default(self):
|
|
59
|
+
"""Test that valid model but invalid provider returns default"""
|
|
60
|
+
result = default_structured_output_mode_for_model_provider(
|
|
61
|
+
model_name="gpt_4_1",
|
|
62
|
+
provider=ModelProviderName.gemini_api, # GPT 4.1 doesn't have gemini_api provider
|
|
63
|
+
)
|
|
64
|
+
assert result == StructuredOutputMode.default
|
|
65
|
+
|
|
66
|
+
def test_disallowed_modes_returns_default(self):
|
|
67
|
+
"""Test that when provider's mode is in disallowed_modes, returns default"""
|
|
68
|
+
# GPT 4.1 OpenAI provider uses json_schema, but we disallow it
|
|
69
|
+
result = default_structured_output_mode_for_model_provider(
|
|
70
|
+
model_name="gpt_4_1",
|
|
71
|
+
provider=ModelProviderName.openai,
|
|
72
|
+
disallowed_modes=[StructuredOutputMode.json_schema],
|
|
73
|
+
)
|
|
74
|
+
assert result == StructuredOutputMode.default
|
|
75
|
+
|
|
76
|
+
def test_disallowed_modes_with_custom_default(self):
|
|
77
|
+
"""Test disallowed modes with custom default value"""
|
|
78
|
+
custom_default = StructuredOutputMode.json_instructions
|
|
79
|
+
result = default_structured_output_mode_for_model_provider(
|
|
80
|
+
model_name="gpt_4_1",
|
|
81
|
+
provider=ModelProviderName.openai,
|
|
82
|
+
default=custom_default,
|
|
83
|
+
disallowed_modes=[StructuredOutputMode.json_schema],
|
|
84
|
+
)
|
|
85
|
+
assert result == custom_default
|
|
86
|
+
|
|
87
|
+
def test_empty_disallowed_modes_list(self):
|
|
88
|
+
"""Test that empty disallowed_modes list works correctly"""
|
|
89
|
+
result = default_structured_output_mode_for_model_provider(
|
|
90
|
+
model_name="gpt_4_1",
|
|
91
|
+
provider=ModelProviderName.openai,
|
|
92
|
+
disallowed_modes=[],
|
|
93
|
+
)
|
|
94
|
+
assert result == StructuredOutputMode.json_schema
|
|
95
|
+
|
|
96
|
+
def test_multiple_disallowed_modes(self):
|
|
97
|
+
"""Test with multiple disallowed modes"""
|
|
98
|
+
result = default_structured_output_mode_for_model_provider(
|
|
99
|
+
model_name="gpt_4_1",
|
|
100
|
+
provider=ModelProviderName.openai,
|
|
101
|
+
disallowed_modes=[
|
|
102
|
+
StructuredOutputMode.json_schema,
|
|
103
|
+
StructuredOutputMode.function_calling,
|
|
104
|
+
],
|
|
105
|
+
)
|
|
106
|
+
assert result == StructuredOutputMode.default
|
|
107
|
+
|
|
108
|
+
def test_reasoning_model_with_different_providers(self):
|
|
109
|
+
"""Test reasoning models that have different structured output modes"""
|
|
110
|
+
# DeepSeek R1 uses json_instructions for reasoning
|
|
111
|
+
result = default_structured_output_mode_for_model_provider(
|
|
112
|
+
model_name="deepseek_r1",
|
|
113
|
+
provider=ModelProviderName.openrouter,
|
|
114
|
+
)
|
|
115
|
+
assert result == StructuredOutputMode.json_instructions
|
|
116
|
+
|
|
117
|
+
@pytest.mark.parametrize(
|
|
118
|
+
"model_name,provider,expected_mode",
|
|
119
|
+
[
|
|
120
|
+
("gpt_4o", ModelProviderName.openai, StructuredOutputMode.json_schema),
|
|
121
|
+
(
|
|
122
|
+
"claude_3_5_haiku",
|
|
123
|
+
ModelProviderName.anthropic,
|
|
124
|
+
StructuredOutputMode.function_calling,
|
|
125
|
+
),
|
|
126
|
+
(
|
|
127
|
+
"gemini_2_5_pro",
|
|
128
|
+
ModelProviderName.gemini_api,
|
|
129
|
+
StructuredOutputMode.json_schema,
|
|
130
|
+
),
|
|
131
|
+
("llama_3_1_8b", ModelProviderName.groq, StructuredOutputMode.default),
|
|
132
|
+
(
|
|
133
|
+
"qwq_32b",
|
|
134
|
+
ModelProviderName.fireworks_ai,
|
|
135
|
+
StructuredOutputMode.json_instructions,
|
|
136
|
+
),
|
|
137
|
+
],
|
|
138
|
+
)
|
|
139
|
+
def test_parametrized_valid_combinations(self, model_name, provider, expected_mode):
|
|
140
|
+
"""Test multiple valid model/provider combinations"""
|
|
141
|
+
result = default_structured_output_mode_for_model_provider(
|
|
142
|
+
model_name=model_name,
|
|
143
|
+
provider=provider,
|
|
144
|
+
)
|
|
145
|
+
assert result == expected_mode
|
|
146
|
+
|
|
147
|
+
def test_model_with_single_provider(self):
|
|
148
|
+
"""Test model that only has one provider"""
|
|
149
|
+
# Find a model with only one provider for this test
|
|
150
|
+
model = get_model_by_name(ModelName.gpt_4_1_nano)
|
|
151
|
+
assert len(model.providers) >= 1 # Verify it has providers
|
|
152
|
+
|
|
153
|
+
first_provider = model.providers[0]
|
|
154
|
+
result = default_structured_output_mode_for_model_provider(
|
|
155
|
+
model_name="gpt_4_1_nano",
|
|
156
|
+
provider=first_provider.name,
|
|
157
|
+
)
|
|
158
|
+
assert result == first_provider.structured_output_mode
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def test_uncensored():
|
|
162
|
+
"""Test that uncensored is set correctly"""
|
|
163
|
+
model = get_model_by_name(ModelName.grok_3_mini)
|
|
164
|
+
for provider in model.providers:
|
|
165
|
+
assert provider.uncensored
|
|
166
|
+
assert not provider.suggested_for_uncensored_data_gen
|
|
167
|
+
|
|
168
|
+
model = get_model_by_name(ModelName.gpt_4_1_nano)
|
|
169
|
+
for provider in model.providers:
|
|
170
|
+
assert not provider.uncensored
|
|
171
|
+
assert not provider.suggested_for_uncensored_data_gen
|
|
172
|
+
|
|
173
|
+
model = get_model_by_name(ModelName.grok_4)
|
|
174
|
+
for provider in model.providers:
|
|
175
|
+
assert provider.uncensored
|
|
176
|
+
assert provider.suggested_for_uncensored_data_gen
|
|
@@ -13,11 +13,8 @@ from kiln_ai.adapters.model_adapters.litellm_adapter import (
|
|
|
13
13
|
LiteLlmConfig,
|
|
14
14
|
)
|
|
15
15
|
from kiln_ai.adapters.ollama_tools import ollama_online
|
|
16
|
-
from kiln_ai.adapters.prompt_builders import (
|
|
17
|
-
BasePromptBuilder,
|
|
18
|
-
SimpleChainOfThoughtPromptBuilder,
|
|
19
|
-
)
|
|
20
16
|
from kiln_ai.datamodel import PromptId
|
|
17
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
21
18
|
|
|
22
19
|
|
|
23
20
|
def get_all_models_and_providers():
|
|
@@ -124,8 +121,12 @@ async def test_mock_returning_run(tmp_path):
|
|
|
124
121
|
|
|
125
122
|
adapter = LiteLlmAdapter(
|
|
126
123
|
config=LiteLlmConfig(
|
|
127
|
-
|
|
128
|
-
|
|
124
|
+
run_config_properties=RunConfigProperties(
|
|
125
|
+
model_name="custom_model",
|
|
126
|
+
model_provider_name="ollama",
|
|
127
|
+
prompt_id="simple_prompt_builder",
|
|
128
|
+
structured_output_mode="json_schema",
|
|
129
|
+
),
|
|
129
130
|
base_url="http://localhost:11434",
|
|
130
131
|
additional_body_options={"api_key": "test_key"},
|
|
131
132
|
),
|
|
@@ -145,6 +146,9 @@ async def test_mock_returning_run(tmp_path):
|
|
|
145
146
|
"model_name": "custom_model",
|
|
146
147
|
"model_provider": "ollama",
|
|
147
148
|
"prompt_id": "simple_prompt_builder",
|
|
149
|
+
"structured_output_mode": "json_schema",
|
|
150
|
+
"temperature": 1.0,
|
|
151
|
+
"top_p": 1.0,
|
|
148
152
|
}
|
|
149
153
|
|
|
150
154
|
|
|
@@ -212,7 +216,13 @@ async def run_simple_task(
|
|
|
212
216
|
prompt_id: PromptId | None = None,
|
|
213
217
|
) -> datamodel.TaskRun:
|
|
214
218
|
adapter = adapter_for_task(
|
|
215
|
-
task,
|
|
219
|
+
task,
|
|
220
|
+
RunConfigProperties(
|
|
221
|
+
structured_output_mode="json_schema",
|
|
222
|
+
model_name=model_name,
|
|
223
|
+
model_provider_name=provider,
|
|
224
|
+
prompt_id=prompt_id or "simple_prompt_builder",
|
|
225
|
+
),
|
|
216
226
|
)
|
|
217
227
|
|
|
218
228
|
run = await adapter.invoke(
|
|
@@ -27,7 +27,6 @@ from kiln_ai.datamodel import (
|
|
|
27
27
|
DataSource,
|
|
28
28
|
DataSourceType,
|
|
29
29
|
Finetune,
|
|
30
|
-
FinetuneDataStrategy,
|
|
31
30
|
Project,
|
|
32
31
|
Prompt,
|
|
33
32
|
Task,
|
|
@@ -36,6 +35,7 @@ from kiln_ai.datamodel import (
|
|
|
36
35
|
TaskRun,
|
|
37
36
|
Usage,
|
|
38
37
|
)
|
|
38
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
39
39
|
from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
|
|
40
40
|
|
|
41
41
|
logger = logging.getLogger(__name__)
|
|
@@ -54,9 +54,6 @@ def test_simple_prompt_builder(tmp_path):
|
|
|
54
54
|
assert "1) " + task.requirements[0].instruction in prompt
|
|
55
55
|
assert "2) " + task.requirements[1].instruction in prompt
|
|
56
56
|
assert "3) " + task.requirements[2].instruction in prompt
|
|
57
|
-
|
|
58
|
-
user_msg = builder.build_user_message(input)
|
|
59
|
-
assert input in user_msg
|
|
60
57
|
assert input not in prompt
|
|
61
58
|
|
|
62
59
|
|
|
@@ -93,20 +90,9 @@ def test_simple_prompt_builder_structured_output(tmp_path):
|
|
|
93
90
|
input = "Cows"
|
|
94
91
|
prompt = builder.build_prompt(include_json_instructions=False)
|
|
95
92
|
assert "You are an assistant which tells a joke, given a subject." in prompt
|
|
96
|
-
|
|
97
|
-
user_msg = builder.build_user_message(input)
|
|
98
|
-
assert input in user_msg
|
|
99
93
|
assert input not in prompt
|
|
100
94
|
|
|
101
95
|
|
|
102
|
-
def test_simple_prompt_builder_structured_input_non_ascii(tmp_path):
|
|
103
|
-
task = build_structured_output_test_task(tmp_path)
|
|
104
|
-
builder = SimplePromptBuilder(task=task)
|
|
105
|
-
input = {"key": "你好👋"}
|
|
106
|
-
user_msg = builder.build_user_message(input)
|
|
107
|
-
assert "你好👋" in user_msg
|
|
108
|
-
|
|
109
|
-
|
|
110
96
|
@pytest.fixture
|
|
111
97
|
def task_with_examples(tmp_path):
|
|
112
98
|
# Create a project and task hierarchy
|
|
@@ -404,7 +390,7 @@ def test_prompt_builder_from_id(task_with_examples):
|
|
|
404
390
|
base_model_id="test_base_model_id",
|
|
405
391
|
dataset_split_id="asdf",
|
|
406
392
|
provider="test_provider",
|
|
407
|
-
data_strategy=
|
|
393
|
+
data_strategy=ChatStrategy.two_message_cot,
|
|
408
394
|
)
|
|
409
395
|
finetune.save_to_file()
|
|
410
396
|
nested_fine_tune_id = (
|
|
@@ -619,6 +605,7 @@ def test_task_run_config_prompt_builder(tmp_path):
|
|
|
619
605
|
model_name="gpt-4",
|
|
620
606
|
model_provider_name="openai",
|
|
621
607
|
prompt_id="simple_prompt_builder",
|
|
608
|
+
structured_output_mode="json_schema",
|
|
622
609
|
),
|
|
623
610
|
prompt=Prompt(
|
|
624
611
|
name="test prompt name",
|
|
@@ -18,7 +18,7 @@ from kiln_ai.adapters.provider_tools import (
|
|
|
18
18
|
finetune_provider_model,
|
|
19
19
|
get_model_and_provider,
|
|
20
20
|
kiln_model_provider_from,
|
|
21
|
-
|
|
21
|
+
lite_llm_config_for_openai_compatible,
|
|
22
22
|
lite_llm_provider_model,
|
|
23
23
|
parse_custom_model_id,
|
|
24
24
|
provider_enabled,
|
|
@@ -27,10 +27,11 @@ from kiln_ai.adapters.provider_tools import (
|
|
|
27
27
|
)
|
|
28
28
|
from kiln_ai.datamodel import (
|
|
29
29
|
Finetune,
|
|
30
|
-
FinetuneDataStrategy,
|
|
31
30
|
StructuredOutputMode,
|
|
32
31
|
Task,
|
|
33
32
|
)
|
|
33
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
34
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
@pytest.fixture(autouse=True)
|
|
@@ -71,7 +72,7 @@ def mock_finetune():
|
|
|
71
72
|
finetune.provider = ModelProviderName.openai
|
|
72
73
|
finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
|
|
73
74
|
finetune.structured_output_mode = StructuredOutputMode.json_schema
|
|
74
|
-
finetune.data_strategy =
|
|
75
|
+
finetune.data_strategy = ChatStrategy.single_turn
|
|
75
76
|
mock.return_value = finetune
|
|
76
77
|
yield mock
|
|
77
78
|
|
|
@@ -83,7 +84,7 @@ def mock_finetune_final_and_intermediate():
|
|
|
83
84
|
finetune.provider = ModelProviderName.openai
|
|
84
85
|
finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
|
|
85
86
|
finetune.structured_output_mode = StructuredOutputMode.json_schema
|
|
86
|
-
finetune.data_strategy =
|
|
87
|
+
finetune.data_strategy = ChatStrategy.two_message_cot
|
|
87
88
|
mock.return_value = finetune
|
|
88
89
|
yield mock
|
|
89
90
|
|
|
@@ -95,9 +96,7 @@ def mock_finetune_r1_compatible():
|
|
|
95
96
|
finetune.provider = ModelProviderName.ollama
|
|
96
97
|
finetune.fine_tune_model_id = "ft:deepseek-r1:671b:custom:model-123"
|
|
97
98
|
finetune.structured_output_mode = StructuredOutputMode.json_schema
|
|
98
|
-
finetune.data_strategy =
|
|
99
|
-
FinetuneDataStrategy.final_and_intermediate_r1_compatible
|
|
100
|
-
)
|
|
99
|
+
finetune.data_strategy = ChatStrategy.single_turn_r1_thinking
|
|
101
100
|
mock.return_value = finetune
|
|
102
101
|
yield mock
|
|
103
102
|
|
|
@@ -357,6 +356,7 @@ async def test_kiln_model_provider_from_custom_model_valid(mock_config):
|
|
|
357
356
|
assert provider.supports_data_gen is False
|
|
358
357
|
assert provider.untested_model is True
|
|
359
358
|
assert provider.model_id == "custom_model"
|
|
359
|
+
assert provider.structured_output_mode == StructuredOutputMode.json_instructions
|
|
360
360
|
|
|
361
361
|
|
|
362
362
|
@pytest.mark.asyncio
|
|
@@ -374,6 +374,7 @@ async def test_kiln_model_provider_from_custom_registry(mock_config):
|
|
|
374
374
|
assert provider.supports_data_gen is False
|
|
375
375
|
assert provider.untested_model is True
|
|
376
376
|
assert provider.model_id == "gpt-4-turbo"
|
|
377
|
+
assert provider.structured_output_mode == StructuredOutputMode.json_instructions
|
|
377
378
|
|
|
378
379
|
|
|
379
380
|
@pytest.mark.asyncio
|
|
@@ -474,7 +475,7 @@ def test_finetune_provider_model_success_final_and_intermediate(
|
|
|
474
475
|
assert provider.name == ModelProviderName.openai
|
|
475
476
|
assert provider.model_id == "ft:gpt-3.5-turbo:custom:model-123"
|
|
476
477
|
assert provider.structured_output_mode == StructuredOutputMode.json_schema
|
|
477
|
-
assert provider.reasoning_capable is
|
|
478
|
+
assert provider.reasoning_capable is False
|
|
478
479
|
assert provider.parser == None
|
|
479
480
|
|
|
480
481
|
|
|
@@ -580,7 +581,7 @@ def test_finetune_provider_model_structured_mode(
|
|
|
580
581
|
finetune.provider = provider_name
|
|
581
582
|
finetune.fine_tune_model_id = "fireworks-model-123"
|
|
582
583
|
finetune.structured_output_mode = structured_output_mode
|
|
583
|
-
finetune.data_strategy =
|
|
584
|
+
finetune.data_strategy = ChatStrategy.single_turn
|
|
584
585
|
mock_finetune.return_value = finetune
|
|
585
586
|
|
|
586
587
|
provider = finetune_provider_model("project-123::task-456::finetune-789")
|
|
@@ -596,10 +597,20 @@ def test_openai_compatible_provider_config(mock_shared_config):
|
|
|
596
597
|
"""Test successful creation of an OpenAI compatible provider"""
|
|
597
598
|
model_id = "test_provider::gpt-4"
|
|
598
599
|
|
|
599
|
-
config =
|
|
600
|
+
config = lite_llm_config_for_openai_compatible(
|
|
601
|
+
RunConfigProperties(
|
|
602
|
+
model_name=model_id,
|
|
603
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
604
|
+
prompt_id="simple_prompt_builder",
|
|
605
|
+
structured_output_mode="json_schema",
|
|
606
|
+
)
|
|
607
|
+
)
|
|
600
608
|
|
|
601
|
-
assert
|
|
602
|
-
|
|
609
|
+
assert (
|
|
610
|
+
config.run_config_properties.model_provider_name
|
|
611
|
+
== ModelProviderName.openai_compatible
|
|
612
|
+
)
|
|
613
|
+
assert config.run_config_properties.model_name == "gpt-4"
|
|
603
614
|
assert config.additional_body_options == {"api_key": "test-key"}
|
|
604
615
|
assert config.base_url == "https://api.test.com"
|
|
605
616
|
|
|
@@ -621,10 +632,20 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
|
|
|
621
632
|
"""Test provider creation without API key (should work as some providers don't require it, but should pass NA to LiteLLM as it requires one)"""
|
|
622
633
|
model_id = "no_key_provider::gpt-4"
|
|
623
634
|
|
|
624
|
-
config =
|
|
635
|
+
config = lite_llm_config_for_openai_compatible(
|
|
636
|
+
RunConfigProperties(
|
|
637
|
+
model_name=model_id,
|
|
638
|
+
model_provider_name=ModelProviderName.openai,
|
|
639
|
+
prompt_id="simple_prompt_builder",
|
|
640
|
+
structured_output_mode="json_schema",
|
|
641
|
+
)
|
|
642
|
+
)
|
|
625
643
|
|
|
626
|
-
assert
|
|
627
|
-
|
|
644
|
+
assert (
|
|
645
|
+
config.run_config_properties.model_provider_name
|
|
646
|
+
== ModelProviderName.openai_compatible
|
|
647
|
+
)
|
|
648
|
+
assert config.run_config_properties.model_name == "gpt-4"
|
|
628
649
|
assert config.additional_body_options == {"api_key": "NA"}
|
|
629
650
|
assert config.base_url == "https://api.nokey.com"
|
|
630
651
|
|
|
@@ -632,7 +653,14 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
|
|
|
632
653
|
def test_lite_llm_config_invalid_id():
|
|
633
654
|
"""Test handling of invalid model ID format"""
|
|
634
655
|
with pytest.raises(ValueError) as exc_info:
|
|
635
|
-
|
|
656
|
+
lite_llm_config_for_openai_compatible(
|
|
657
|
+
RunConfigProperties(
|
|
658
|
+
model_name="invalid-id-format",
|
|
659
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
660
|
+
prompt_id="simple_prompt_builder",
|
|
661
|
+
structured_output_mode="json_schema",
|
|
662
|
+
)
|
|
663
|
+
)
|
|
636
664
|
assert (
|
|
637
665
|
str(exc_info.value) == "Invalid openai compatible model ID: invalid-id-format"
|
|
638
666
|
)
|
|
@@ -643,14 +671,28 @@ def test_lite_llm_config_no_providers(mock_shared_config):
|
|
|
643
671
|
mock_shared_config.return_value.openai_compatible_providers = None
|
|
644
672
|
|
|
645
673
|
with pytest.raises(ValueError) as exc_info:
|
|
646
|
-
|
|
674
|
+
lite_llm_config_for_openai_compatible(
|
|
675
|
+
RunConfigProperties(
|
|
676
|
+
model_name="test_provider::gpt-4",
|
|
677
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
678
|
+
prompt_id="simple_prompt_builder",
|
|
679
|
+
structured_output_mode="json_schema",
|
|
680
|
+
)
|
|
681
|
+
)
|
|
647
682
|
assert str(exc_info.value) == "OpenAI compatible provider test_provider not found"
|
|
648
683
|
|
|
649
684
|
|
|
650
685
|
def test_lite_llm_config_provider_not_found(mock_shared_config):
|
|
651
686
|
"""Test handling of non-existent provider"""
|
|
652
687
|
with pytest.raises(ValueError) as exc_info:
|
|
653
|
-
|
|
688
|
+
lite_llm_config_for_openai_compatible(
|
|
689
|
+
RunConfigProperties(
|
|
690
|
+
model_name="unknown_provider::gpt-4",
|
|
691
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
692
|
+
prompt_id="simple_prompt_builder",
|
|
693
|
+
structured_output_mode="json_schema",
|
|
694
|
+
)
|
|
695
|
+
)
|
|
654
696
|
assert (
|
|
655
697
|
str(exc_info.value) == "OpenAI compatible provider unknown_provider not found"
|
|
656
698
|
)
|
|
@@ -666,7 +708,14 @@ def test_lite_llm_config_no_base_url(mock_shared_config):
|
|
|
666
708
|
]
|
|
667
709
|
|
|
668
710
|
with pytest.raises(ValueError) as exc_info:
|
|
669
|
-
|
|
711
|
+
lite_llm_config_for_openai_compatible(
|
|
712
|
+
RunConfigProperties(
|
|
713
|
+
model_name="test_provider::gpt-4",
|
|
714
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
715
|
+
prompt_id="simple_prompt_builder",
|
|
716
|
+
structured_output_mode="json_schema",
|
|
717
|
+
)
|
|
718
|
+
)
|
|
670
719
|
assert (
|
|
671
720
|
str(exc_info.value)
|
|
672
721
|
== "OpenAI compatible provider test_provider has no base URL"
|
|
@@ -867,7 +916,7 @@ def test_finetune_provider_model_vertex_ai(mock_project, mock_task, mock_finetun
|
|
|
867
916
|
finetune.provider = ModelProviderName.vertex
|
|
868
917
|
finetune.fine_tune_model_id = "projects/123/locations/us-central1/endpoints/456"
|
|
869
918
|
finetune.structured_output_mode = StructuredOutputMode.json_mode
|
|
870
|
-
finetune.data_strategy =
|
|
919
|
+
finetune.data_strategy = ChatStrategy.single_turn
|
|
871
920
|
mock_finetune.return_value = finetune
|
|
872
921
|
|
|
873
922
|
provider = finetune_provider_model("project-123::task-456::finetune-789")
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
from unittest.mock import patch
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from kiln_ai.adapters.ml_model_list import built_in_models
|
|
8
|
+
from kiln_ai.adapters.remote_config import (
|
|
9
|
+
deserialize_config,
|
|
10
|
+
dump_builtin_config,
|
|
11
|
+
load_from_url,
|
|
12
|
+
load_remote_models,
|
|
13
|
+
serialize_config,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_round_trip(tmp_path):
|
|
18
|
+
path = tmp_path / "models.json"
|
|
19
|
+
serialize_config(built_in_models, path)
|
|
20
|
+
loaded = deserialize_config(path)
|
|
21
|
+
assert [m.model_dump(mode="json") for m in loaded] == [
|
|
22
|
+
m.model_dump(mode="json") for m in built_in_models
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_load_from_url():
|
|
27
|
+
sample = [built_in_models[0].model_dump(mode="json")]
|
|
28
|
+
|
|
29
|
+
class FakeResponse:
|
|
30
|
+
def raise_for_status(self):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
def json(self):
|
|
34
|
+
return {"model_list": sample}
|
|
35
|
+
|
|
36
|
+
with patch(
|
|
37
|
+
"kiln_ai.adapters.remote_config.requests.get", return_value=FakeResponse()
|
|
38
|
+
):
|
|
39
|
+
models = load_from_url("http://example.com/models.json")
|
|
40
|
+
assert [m.model_dump(mode="json") for m in models] == sample
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_dump_builtin_config(tmp_path):
|
|
44
|
+
path = tmp_path / "out.json"
|
|
45
|
+
dump_builtin_config(path)
|
|
46
|
+
loaded = deserialize_config(path)
|
|
47
|
+
assert [m.model_dump(mode="json") for m in loaded] == [
|
|
48
|
+
m.model_dump(mode="json") for m in built_in_models
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@pytest.mark.asyncio
|
|
53
|
+
async def test_load_remote_models_success(monkeypatch):
|
|
54
|
+
del os.environ["KILN_SKIP_REMOTE_MODEL_LIST"]
|
|
55
|
+
original = built_in_models.copy()
|
|
56
|
+
sample_models = [built_in_models[0]]
|
|
57
|
+
|
|
58
|
+
def fake_fetch(url):
|
|
59
|
+
return sample_models
|
|
60
|
+
|
|
61
|
+
monkeypatch.setattr("kiln_ai.adapters.remote_config.load_from_url", fake_fetch)
|
|
62
|
+
|
|
63
|
+
load_remote_models("http://example.com/models.json")
|
|
64
|
+
await asyncio.sleep(0.01)
|
|
65
|
+
assert built_in_models == sample_models
|
|
66
|
+
built_in_models[:] = original
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pytest.mark.asyncio
|
|
70
|
+
async def test_load_remote_models_failure(monkeypatch):
|
|
71
|
+
original = built_in_models.copy()
|
|
72
|
+
|
|
73
|
+
def fake_fetch(url):
|
|
74
|
+
raise RuntimeError("fail")
|
|
75
|
+
|
|
76
|
+
monkeypatch.setattr("kiln_ai.adapters.remote_config.load_from_url", fake_fetch)
|
|
77
|
+
|
|
78
|
+
load_remote_models("http://example.com/models.json")
|
|
79
|
+
await asyncio.sleep(0.01)
|
|
80
|
+
assert built_in_models == original
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_deserialize_config_with_extra_keys(tmp_path):
|
|
84
|
+
# Take a valid model and add an extra key, ensure it is ignored and still loads
|
|
85
|
+
import json
|
|
86
|
+
|
|
87
|
+
from kiln_ai.adapters.ml_model_list import built_in_models
|
|
88
|
+
|
|
89
|
+
model_dict = built_in_models[0].model_dump(mode="json")
|
|
90
|
+
model_dict["extra_key"] = "should be ignored or error"
|
|
91
|
+
model_dict["providers"][0]["extra_key"] = "should be ignored or error"
|
|
92
|
+
data = {"model_list": [model_dict]}
|
|
93
|
+
path = tmp_path / "extra.json"
|
|
94
|
+
path.write_text(json.dumps(data))
|
|
95
|
+
# Should NOT raise, and extra key should be ignored
|
|
96
|
+
models = deserialize_config(path)
|
|
97
|
+
assert hasattr(models[0], "family")
|
|
98
|
+
assert not hasattr(models[0], "extra_key")
|
|
99
|
+
assert hasattr(models[0], "providers")
|
|
100
|
+
assert not hasattr(models[0].providers[0], "extra_key")
|
kiln_ai/datamodel/__init__.py
CHANGED
|
@@ -13,7 +13,6 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
from kiln_ai.datamodel import dataset_split, eval, strict_mode
|
|
15
15
|
from kiln_ai.datamodel.datamodel_enums import (
|
|
16
|
-
FinetuneDataStrategy,
|
|
17
16
|
FineTuneStatusType,
|
|
18
17
|
Priority,
|
|
19
18
|
StructuredOutputMode,
|
|
@@ -71,7 +70,6 @@ __all__ = [
|
|
|
71
70
|
"Prompt",
|
|
72
71
|
"TaskOutputRating",
|
|
73
72
|
"StructuredOutputMode",
|
|
74
|
-
"FinetuneDataStrategy",
|
|
75
73
|
"PromptId",
|
|
76
74
|
"PromptGenerators",
|
|
77
75
|
"prompt_generator_values",
|