kiln-ai 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +2 -0
- kiln_ai/adapters/adapter_registry.py +22 -44
- kiln_ai/adapters/chat/__init__.py +8 -0
- kiln_ai/adapters/chat/chat_formatter.py +234 -0
- kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
- kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
- kiln_ai/adapters/eval/base_eval.py +8 -6
- kiln_ai/adapters/eval/eval_runner.py +4 -1
- kiln_ai/adapters/eval/g_eval.py +23 -5
- kiln_ai/adapters/eval/test_base_eval.py +166 -15
- kiln_ai/adapters/eval/test_eval_runner.py +3 -0
- kiln_ai/adapters/eval/test_g_eval.py +1 -0
- kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
- kiln_ai/adapters/fine_tune/dataset_formatter.py +138 -272
- kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
- kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
- kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
- kiln_ai/adapters/ml_model_list.py +80 -43
- kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
- kiln_ai/adapters/model_adapters/litellm_adapter.py +79 -97
- kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
- kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -60
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +56 -21
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
- kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
- kiln_ai/adapters/prompt_builders.py +0 -16
- kiln_ai/adapters/provider_tools.py +27 -9
- kiln_ai/adapters/repair/test_repair_task.py +24 -3
- kiln_ai/adapters/test_adapter_registry.py +88 -28
- kiln_ai/adapters/test_ml_model_list.py +158 -0
- kiln_ai/adapters/test_prompt_adaptors.py +17 -3
- kiln_ai/adapters/test_prompt_builders.py +3 -16
- kiln_ai/adapters/test_provider_tools.py +69 -20
- kiln_ai/datamodel/__init__.py +0 -2
- kiln_ai/datamodel/datamodel_enums.py +38 -13
- kiln_ai/datamodel/finetune.py +12 -7
- kiln_ai/datamodel/task.py +68 -7
- kiln_ai/datamodel/test_basemodel.py +2 -1
- kiln_ai/datamodel/test_dataset_split.py +0 -8
- kiln_ai/datamodel/test_models.py +33 -10
- kiln_ai/datamodel/test_task.py +168 -2
- kiln_ai/utils/config.py +3 -2
- kiln_ai/utils/dataset_import.py +1 -1
- kiln_ai/utils/logging.py +165 -0
- kiln_ai/utils/test_config.py +23 -0
- kiln_ai/utils/test_dataset_import.py +30 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/RECORD +54 -49
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -13,11 +13,11 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
|
|
|
13
13
|
from kiln_ai.adapters.fine_tune.fireworks_finetune import FireworksFinetune
|
|
14
14
|
from kiln_ai.datamodel import (
|
|
15
15
|
DatasetSplit,
|
|
16
|
-
FinetuneDataStrategy,
|
|
17
16
|
StructuredOutputMode,
|
|
18
17
|
Task,
|
|
19
18
|
)
|
|
20
19
|
from kiln_ai.datamodel import Finetune as FinetuneModel
|
|
20
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
21
21
|
from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
|
|
22
22
|
from kiln_ai.utils.config import Config
|
|
23
23
|
|
|
@@ -232,8 +232,8 @@ def mock_task():
|
|
|
232
232
|
@pytest.mark.parametrize(
|
|
233
233
|
"data_strategy,thinking_instructions",
|
|
234
234
|
[
|
|
235
|
-
(
|
|
236
|
-
(
|
|
235
|
+
(ChatStrategy.two_message_cot, "thinking instructions"),
|
|
236
|
+
(ChatStrategy.single_turn, None),
|
|
237
237
|
],
|
|
238
238
|
)
|
|
239
239
|
async def test_generate_and_upload_jsonl_success(
|
|
@@ -12,11 +12,11 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
|
|
|
12
12
|
from kiln_ai.adapters.fine_tune.openai_finetune import OpenAIFinetune
|
|
13
13
|
from kiln_ai.datamodel import (
|
|
14
14
|
DatasetSplit,
|
|
15
|
-
FinetuneDataStrategy,
|
|
16
15
|
StructuredOutputMode,
|
|
17
16
|
Task,
|
|
18
17
|
)
|
|
19
18
|
from kiln_ai.datamodel import Finetune as FinetuneModel
|
|
19
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
20
20
|
from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
|
|
21
21
|
from kiln_ai.utils.config import Config
|
|
22
22
|
|
|
@@ -35,7 +35,7 @@ def openai_finetune(tmp_path):
|
|
|
35
35
|
system_message="Test system message",
|
|
36
36
|
fine_tune_model_id="ft-123",
|
|
37
37
|
path=tmp_file,
|
|
38
|
-
data_strategy=
|
|
38
|
+
data_strategy=ChatStrategy.single_turn,
|
|
39
39
|
),
|
|
40
40
|
)
|
|
41
41
|
return finetune
|
|
@@ -247,7 +247,7 @@ async def test_generate_and_upload_jsonl_success(
|
|
|
247
247
|
mock_formatter.dump_to_file.assert_called_once_with(
|
|
248
248
|
"train",
|
|
249
249
|
DatasetFormat.OPENAI_CHAT_JSONL,
|
|
250
|
-
|
|
250
|
+
ChatStrategy.single_turn,
|
|
251
251
|
)
|
|
252
252
|
|
|
253
253
|
# Verify file was opened and uploaded
|
|
@@ -299,7 +299,7 @@ async def test_generate_and_upload_jsonl_schema_success(
|
|
|
299
299
|
mock_formatter.dump_to_file.assert_called_once_with(
|
|
300
300
|
"train",
|
|
301
301
|
DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
|
|
302
|
-
|
|
302
|
+
ChatStrategy.single_turn,
|
|
303
303
|
)
|
|
304
304
|
|
|
305
305
|
# Verify file was opened and uploaded
|
|
@@ -555,8 +555,8 @@ async def test_status_updates_latest_status(openai_finetune, mock_response):
|
|
|
555
555
|
@pytest.mark.parametrize(
|
|
556
556
|
"data_strategy,thinking_instructions",
|
|
557
557
|
[
|
|
558
|
-
(
|
|
559
|
-
(
|
|
558
|
+
(ChatStrategy.two_message_cot, "Custom thinking instructions"),
|
|
559
|
+
(ChatStrategy.single_turn, None),
|
|
560
560
|
],
|
|
561
561
|
)
|
|
562
562
|
async def test_generate_and_upload_jsonl_with_data_strategy(
|
|
@@ -183,6 +183,7 @@ async def test_status_job_states(
|
|
|
183
183
|
# Mock the retrieve method of the fine_tuning object
|
|
184
184
|
mock_job = MagicMock()
|
|
185
185
|
mock_job.status = together_status
|
|
186
|
+
mock_job.output_name = None
|
|
186
187
|
mock_together_client.fine_tuning.retrieve.return_value = mock_job
|
|
187
188
|
|
|
188
189
|
status = await together_finetune.status()
|
|
@@ -12,11 +12,11 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
|
|
|
12
12
|
from kiln_ai.adapters.fine_tune.vertex_finetune import VertexFinetune
|
|
13
13
|
from kiln_ai.datamodel import (
|
|
14
14
|
DatasetSplit,
|
|
15
|
-
FinetuneDataStrategy,
|
|
16
15
|
StructuredOutputMode,
|
|
17
16
|
Task,
|
|
18
17
|
)
|
|
19
18
|
from kiln_ai.datamodel import Finetune as FinetuneModel
|
|
19
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
20
20
|
from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
|
|
21
21
|
from kiln_ai.utils.config import Config
|
|
22
22
|
|
|
@@ -35,7 +35,7 @@ def vertex_finetune(tmp_path):
|
|
|
35
35
|
system_message="Test system message",
|
|
36
36
|
fine_tune_model_id="ft-123",
|
|
37
37
|
path=tmp_file,
|
|
38
|
-
data_strategy=
|
|
38
|
+
data_strategy=ChatStrategy.single_turn,
|
|
39
39
|
),
|
|
40
40
|
)
|
|
41
41
|
return finetune
|
|
@@ -252,8 +252,8 @@ async def test_status_model_id_update_exception(vertex_finetune, mock_response):
|
|
|
252
252
|
@pytest.mark.parametrize(
|
|
253
253
|
"data_strategy,thinking_instructions",
|
|
254
254
|
[
|
|
255
|
-
(
|
|
256
|
-
(
|
|
255
|
+
(ChatStrategy.two_message_cot, "Custom thinking instructions"),
|
|
256
|
+
(ChatStrategy.single_turn, None),
|
|
257
257
|
],
|
|
258
258
|
)
|
|
259
259
|
async def test_generate_and_upload_jsonl(
|
|
@@ -66,6 +66,12 @@ class TogetherFinetune(BaseFinetuneAdapter):
|
|
|
66
66
|
# retrieve the fine-tuning job
|
|
67
67
|
together_finetune = self.client.fine_tuning.retrieve(id=fine_tuning_job_id)
|
|
68
68
|
|
|
69
|
+
# update the fine tune model ID if it has changed (sometimes it's not set at training time)
|
|
70
|
+
if self.datamodel.fine_tune_model_id != together_finetune.output_name:
|
|
71
|
+
self.datamodel.fine_tune_model_id = together_finetune.output_name
|
|
72
|
+
if self.datamodel.path:
|
|
73
|
+
self.datamodel.save_to_file()
|
|
74
|
+
|
|
69
75
|
status = together_finetune.status
|
|
70
76
|
if status in _pending_statuses:
|
|
71
77
|
return FineTuneStatus(
|
|
@@ -135,8 +141,13 @@ class TogetherFinetune(BaseFinetuneAdapter):
|
|
|
135
141
|
**self._build_finetune_parameters(),
|
|
136
142
|
)
|
|
137
143
|
|
|
138
|
-
# 2 different IDs, output_name is the name of the model that results from the fine-tune job, the
|
|
144
|
+
# 2 different IDs, output_name is the name of the model that results from the fine-tune job, while the id is the ID of the fine-tune job itself
|
|
145
|
+
if not together_finetune.id:
|
|
146
|
+
raise ValueError(
|
|
147
|
+
"Together failed to return a fine-tune job ID. While tuning job was dispatched, Kiln never received the ID so won't be able to reference it. Check for errors before dispatching more jobs."
|
|
148
|
+
)
|
|
139
149
|
self.datamodel.provider_id = together_finetune.id
|
|
150
|
+
# Output name is sometimes returned here, and save it if it is. But it might be populated later by status call
|
|
140
151
|
self.datamodel.fine_tune_model_id = together_finetune.output_name
|
|
141
152
|
|
|
142
153
|
if self.datamodel.path:
|
|
@@ -3,7 +3,11 @@ from typing import Dict, List, Literal
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
|
-
from kiln_ai.datamodel import
|
|
6
|
+
from kiln_ai.datamodel.datamodel_enums import (
|
|
7
|
+
ChatStrategy,
|
|
8
|
+
ModelProviderName,
|
|
9
|
+
StructuredOutputMode,
|
|
10
|
+
)
|
|
7
11
|
|
|
8
12
|
"""
|
|
9
13
|
Provides model configuration and management for various LLM providers and models.
|
|
@@ -12,28 +16,6 @@ including configuration, validation, and instantiation of language models.
|
|
|
12
16
|
"""
|
|
13
17
|
|
|
14
18
|
|
|
15
|
-
class ModelProviderName(str, Enum):
|
|
16
|
-
"""
|
|
17
|
-
Enumeration of supported AI model providers.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
openai = "openai"
|
|
21
|
-
groq = "groq"
|
|
22
|
-
amazon_bedrock = "amazon_bedrock"
|
|
23
|
-
ollama = "ollama"
|
|
24
|
-
openrouter = "openrouter"
|
|
25
|
-
fireworks_ai = "fireworks_ai"
|
|
26
|
-
kiln_fine_tune = "kiln_fine_tune"
|
|
27
|
-
kiln_custom_registry = "kiln_custom_registry"
|
|
28
|
-
openai_compatible = "openai_compatible"
|
|
29
|
-
anthropic = "anthropic"
|
|
30
|
-
gemini_api = "gemini_api"
|
|
31
|
-
azure_openai = "azure_openai"
|
|
32
|
-
huggingface = "huggingface"
|
|
33
|
-
vertex = "vertex"
|
|
34
|
-
together_ai = "together_ai"
|
|
35
|
-
|
|
36
|
-
|
|
37
19
|
class ModelFamily(str, Enum):
|
|
38
20
|
"""
|
|
39
21
|
Enumeration of supported model families/architectures.
|
|
@@ -102,6 +84,8 @@ class ModelName(str, Enum):
|
|
|
102
84
|
claude_3_5_sonnet = "claude_3_5_sonnet"
|
|
103
85
|
claude_3_7_sonnet = "claude_3_7_sonnet"
|
|
104
86
|
claude_3_7_sonnet_thinking = "claude_3_7_sonnet_thinking"
|
|
87
|
+
claude_sonnet_4 = "claude_sonnet_4"
|
|
88
|
+
claude_opus_4 = "claude_opus_4"
|
|
105
89
|
gemini_1_5_flash = "gemini_1_5_flash"
|
|
106
90
|
gemini_1_5_flash_8b = "gemini_1_5_flash_8b"
|
|
107
91
|
gemini_1_5_pro = "gemini_1_5_pro"
|
|
@@ -174,6 +158,7 @@ class KilnModelProvider(BaseModel):
|
|
|
174
158
|
structured_output_mode: The mode we should use to call the model for structured output, if it was trained with structured output.
|
|
175
159
|
parser: A parser to use for the model, if applicable
|
|
176
160
|
reasoning_capable: Whether the model is designed to output thinking in a structured format (eg <think></think>). If so we don't use COT across 2 calls, and ask for thinking and final response in the same call.
|
|
161
|
+
tuned_chat_strategy: Used when a model is finetuned with a specific chat strategy, and it's best to use it at call time.
|
|
177
162
|
"""
|
|
178
163
|
|
|
179
164
|
name: ModelProviderName
|
|
@@ -189,6 +174,7 @@ class KilnModelProvider(BaseModel):
|
|
|
189
174
|
reasoning_capable: bool = False
|
|
190
175
|
supports_logprobs: bool = False
|
|
191
176
|
suggested_for_evals: bool = False
|
|
177
|
+
tuned_chat_strategy: ChatStrategy | None = None
|
|
192
178
|
|
|
193
179
|
# TODO P1: Need a more generalized way to handle custom provider parameters.
|
|
194
180
|
# Making them quite declarative here for now, isolating provider specific logic
|
|
@@ -666,14 +652,12 @@ built_in_models: List[KilnModel] = [
|
|
|
666
652
|
structured_output_mode=StructuredOutputMode.function_calling,
|
|
667
653
|
model_id="anthropic/claude-3.7-sonnet",
|
|
668
654
|
suggested_for_data_gen=True,
|
|
669
|
-
suggested_for_evals=True,
|
|
670
655
|
),
|
|
671
656
|
KilnModelProvider(
|
|
672
657
|
name=ModelProviderName.anthropic,
|
|
673
658
|
model_id="claude-3-7-sonnet-20250219",
|
|
674
659
|
structured_output_mode=StructuredOutputMode.function_calling,
|
|
675
660
|
suggested_for_data_gen=True,
|
|
676
|
-
suggested_for_evals=True,
|
|
677
661
|
),
|
|
678
662
|
],
|
|
679
663
|
),
|
|
@@ -700,6 +684,46 @@ built_in_models: List[KilnModel] = [
|
|
|
700
684
|
),
|
|
701
685
|
],
|
|
702
686
|
),
|
|
687
|
+
# Claude Sonnet 4
|
|
688
|
+
KilnModel(
|
|
689
|
+
family=ModelFamily.claude,
|
|
690
|
+
name=ModelName.claude_sonnet_4,
|
|
691
|
+
friendly_name="Claude Sonnet 4",
|
|
692
|
+
providers=[
|
|
693
|
+
KilnModelProvider(
|
|
694
|
+
name=ModelProviderName.openrouter,
|
|
695
|
+
model_id="anthropic/claude-sonnet-4",
|
|
696
|
+
structured_output_mode=StructuredOutputMode.function_calling,
|
|
697
|
+
suggested_for_data_gen=True,
|
|
698
|
+
suggested_for_evals=True,
|
|
699
|
+
),
|
|
700
|
+
KilnModelProvider(
|
|
701
|
+
name=ModelProviderName.anthropic,
|
|
702
|
+
model_id="claude-sonnet-4-20250514",
|
|
703
|
+
structured_output_mode=StructuredOutputMode.function_calling,
|
|
704
|
+
suggested_for_data_gen=True,
|
|
705
|
+
suggested_for_evals=True,
|
|
706
|
+
),
|
|
707
|
+
],
|
|
708
|
+
),
|
|
709
|
+
# Claude Opus 4
|
|
710
|
+
KilnModel(
|
|
711
|
+
family=ModelFamily.claude,
|
|
712
|
+
name=ModelName.claude_opus_4,
|
|
713
|
+
friendly_name="Claude Opus 4",
|
|
714
|
+
providers=[
|
|
715
|
+
KilnModelProvider(
|
|
716
|
+
name=ModelProviderName.openrouter,
|
|
717
|
+
model_id="anthropic/claude-opus-4",
|
|
718
|
+
structured_output_mode=StructuredOutputMode.function_calling,
|
|
719
|
+
),
|
|
720
|
+
KilnModelProvider(
|
|
721
|
+
name=ModelProviderName.anthropic,
|
|
722
|
+
model_id="claude-opus-4-20250514",
|
|
723
|
+
structured_output_mode=StructuredOutputMode.function_calling,
|
|
724
|
+
),
|
|
725
|
+
],
|
|
726
|
+
),
|
|
703
727
|
# Gemini 2.5 Pro
|
|
704
728
|
KilnModel(
|
|
705
729
|
family=ModelFamily.gemini,
|
|
@@ -920,7 +944,7 @@ built_in_models: List[KilnModel] = [
|
|
|
920
944
|
model_id="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
|
921
945
|
supports_data_gen=False,
|
|
922
946
|
structured_output_mode=StructuredOutputMode.function_calling_weak,
|
|
923
|
-
provider_finetune_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
947
|
+
provider_finetune_id="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
|
|
924
948
|
),
|
|
925
949
|
],
|
|
926
950
|
),
|
|
@@ -939,8 +963,7 @@ built_in_models: List[KilnModel] = [
|
|
|
939
963
|
KilnModelProvider(
|
|
940
964
|
name=ModelProviderName.openrouter,
|
|
941
965
|
supports_data_gen=False,
|
|
942
|
-
|
|
943
|
-
structured_output_mode=StructuredOutputMode.function_calling_weak,
|
|
966
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
944
967
|
model_id="meta-llama/llama-3.1-70b-instruct",
|
|
945
968
|
supports_logprobs=True,
|
|
946
969
|
logprobs_openrouter_options=True,
|
|
@@ -961,7 +984,7 @@ built_in_models: List[KilnModel] = [
|
|
|
961
984
|
model_id="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
|
962
985
|
supports_data_gen=False,
|
|
963
986
|
structured_output_mode=StructuredOutputMode.function_calling_weak,
|
|
964
|
-
provider_finetune_id="meta-llama/Meta-Llama-3.1-70B-Instruct",
|
|
987
|
+
provider_finetune_id="meta-llama/Meta-Llama-3.1-70B-Instruct-Reference",
|
|
965
988
|
),
|
|
966
989
|
],
|
|
967
990
|
),
|
|
@@ -1068,10 +1091,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1068
1091
|
supports_structured_output=False,
|
|
1069
1092
|
supports_data_gen=False,
|
|
1070
1093
|
),
|
|
1071
|
-
KilnModelProvider(
|
|
1072
|
-
name=ModelProviderName.together_ai,
|
|
1073
|
-
provider_finetune_id="meta-llama/Llama-3.2-1B-Instruct",
|
|
1074
|
-
),
|
|
1075
1094
|
],
|
|
1076
1095
|
),
|
|
1077
1096
|
# Llama 3.2 3B
|
|
@@ -1108,7 +1127,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1108
1127
|
model_id="meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
|
1109
1128
|
supports_structured_output=False,
|
|
1110
1129
|
supports_data_gen=False,
|
|
1111
|
-
provider_finetune_id="meta-llama/Llama-3.2-3B-Instruct",
|
|
1112
1130
|
),
|
|
1113
1131
|
],
|
|
1114
1132
|
),
|
|
@@ -1158,10 +1176,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1158
1176
|
name=ModelName.llama_3_2_90b,
|
|
1159
1177
|
friendly_name="Llama 3.2 90B",
|
|
1160
1178
|
providers=[
|
|
1161
|
-
KilnModelProvider(
|
|
1162
|
-
name=ModelProviderName.groq,
|
|
1163
|
-
model_id="llama-3.2-90b-vision-preview",
|
|
1164
|
-
),
|
|
1165
1179
|
KilnModelProvider(
|
|
1166
1180
|
name=ModelProviderName.openrouter,
|
|
1167
1181
|
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
|
|
@@ -1434,11 +1448,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1434
1448
|
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
|
|
1435
1449
|
model_id="google/gemma-3-27b-it",
|
|
1436
1450
|
),
|
|
1437
|
-
KilnModelProvider(
|
|
1438
|
-
name=ModelProviderName.huggingface,
|
|
1439
|
-
model_id="google/gemma-3-27b-it",
|
|
1440
|
-
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1441
|
-
),
|
|
1442
1451
|
],
|
|
1443
1452
|
),
|
|
1444
1453
|
# Mixtral 8x7B
|
|
@@ -2294,3 +2303,31 @@ def get_model_by_name(name: ModelName) -> KilnModel:
|
|
|
2294
2303
|
if model.name == name:
|
|
2295
2304
|
return model
|
|
2296
2305
|
raise ValueError(f"Model {name} not found in the list of built-in models")
|
|
2306
|
+
|
|
2307
|
+
|
|
2308
|
+
def default_structured_output_mode_for_model_provider(
|
|
2309
|
+
model_name: str,
|
|
2310
|
+
provider: ModelProviderName,
|
|
2311
|
+
default: StructuredOutputMode = StructuredOutputMode.default,
|
|
2312
|
+
disallowed_modes: List[StructuredOutputMode] = [],
|
|
2313
|
+
) -> StructuredOutputMode:
|
|
2314
|
+
"""
|
|
2315
|
+
We don't expose setting this manually in the UI, so pull a recommended mode from ml_model_list
|
|
2316
|
+
"""
|
|
2317
|
+
try:
|
|
2318
|
+
# Convert string to ModelName enum
|
|
2319
|
+
model_name_enum = ModelName(model_name)
|
|
2320
|
+
model = get_model_by_name(model_name_enum)
|
|
2321
|
+
except (ValueError, KeyError):
|
|
2322
|
+
# If model not found, return default
|
|
2323
|
+
return default
|
|
2324
|
+
|
|
2325
|
+
# Find the provider within the model's providers
|
|
2326
|
+
for model_provider in model.providers:
|
|
2327
|
+
if model_provider.name == provider:
|
|
2328
|
+
mode = model_provider.structured_output_mode
|
|
2329
|
+
if mode not in disallowed_modes:
|
|
2330
|
+
return mode
|
|
2331
|
+
|
|
2332
|
+
# If provider not found, return default
|
|
2333
|
+
return default
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from abc import ABCMeta, abstractmethod
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Dict,
|
|
4
|
+
from typing import Dict, Tuple
|
|
5
5
|
|
|
6
|
-
import
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
from kiln_ai.adapters.chat.chat_formatter import (
|
|
7
|
+
ChatFormatter,
|
|
8
|
+
get_chat_formatter,
|
|
9
|
+
)
|
|
10
|
+
from kiln_ai.adapters.ml_model_list import (
|
|
11
|
+
KilnModelProvider,
|
|
12
|
+
StructuredOutputMode,
|
|
13
|
+
default_structured_output_mode_for_model_provider,
|
|
14
|
+
)
|
|
9
15
|
from kiln_ai.adapters.parsers.json_parser import parse_json_string
|
|
10
16
|
from kiln_ai.adapters.parsers.parser_registry import model_parser_from_id
|
|
11
17
|
from kiln_ai.adapters.parsers.request_formatters import request_formatter_from_id
|
|
@@ -20,6 +26,7 @@ from kiln_ai.datamodel import (
|
|
|
20
26
|
TaskRun,
|
|
21
27
|
Usage,
|
|
22
28
|
)
|
|
29
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
23
30
|
from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
|
|
24
31
|
from kiln_ai.datamodel.task import RunConfig
|
|
25
32
|
from kiln_ai.utils.config import Config
|
|
@@ -38,9 +45,6 @@ class AdapterConfig:
|
|
|
38
45
|
default_tags: list[str] | None = None
|
|
39
46
|
|
|
40
47
|
|
|
41
|
-
COT_FINAL_ANSWER_PROMPT = "Considering the above, return a final result."
|
|
42
|
-
|
|
43
|
-
|
|
44
48
|
class BaseAdapter(metaclass=ABCMeta):
|
|
45
49
|
"""Base class for AI model adapters that handle task execution.
|
|
46
50
|
|
|
@@ -61,6 +65,7 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
61
65
|
config: AdapterConfig | None = None,
|
|
62
66
|
):
|
|
63
67
|
self.run_config = run_config
|
|
68
|
+
self.update_run_config_unknown_structured_output_mode()
|
|
64
69
|
self.prompt_builder = prompt_builder_from_id(
|
|
65
70
|
run_config.prompt_id, run_config.task
|
|
66
71
|
)
|
|
@@ -188,10 +193,10 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
188
193
|
|
|
189
194
|
def build_prompt(self) -> str:
|
|
190
195
|
# The prompt builder needs to know if we want to inject formatting instructions
|
|
191
|
-
|
|
196
|
+
structured_output_mode = self.run_config.structured_output_mode
|
|
192
197
|
add_json_instructions = self.has_structured_output() and (
|
|
193
|
-
|
|
194
|
-
or
|
|
198
|
+
structured_output_mode == StructuredOutputMode.json_instructions
|
|
199
|
+
or structured_output_mode
|
|
195
200
|
== StructuredOutputMode.json_instruction_and_object
|
|
196
201
|
)
|
|
197
202
|
|
|
@@ -199,26 +204,51 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
199
204
|
include_json_instructions=add_json_instructions
|
|
200
205
|
)
|
|
201
206
|
|
|
202
|
-
def
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
# Determine the run strategy for COT prompting. 3 options:
|
|
206
|
-
# 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message.
|
|
207
|
-
# 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response.
|
|
208
|
-
# 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt.
|
|
207
|
+
def build_chat_formatter(self, input: Dict | str) -> ChatFormatter:
|
|
208
|
+
# Determine the chat strategy to use based on the prompt the user selected, the model's capabilities, and if the model was finetuned with a specific chat strategy.
|
|
209
|
+
|
|
209
210
|
cot_prompt = self.prompt_builder.chain_of_thought_prompt()
|
|
210
|
-
|
|
211
|
+
system_message = self.build_prompt()
|
|
212
|
+
|
|
213
|
+
# If no COT prompt, use the single turn strategy. Even when a tuned strategy is set, as the tuned strategy is either already single turn, or won't work without a COT prompt.
|
|
214
|
+
if not cot_prompt:
|
|
215
|
+
return get_chat_formatter(
|
|
216
|
+
strategy=ChatStrategy.single_turn,
|
|
217
|
+
system_message=system_message,
|
|
218
|
+
user_input=input,
|
|
219
|
+
)
|
|
211
220
|
|
|
212
|
-
|
|
213
|
-
|
|
221
|
+
# Some models like finetunes are trained with a specific chat strategy. Use that.
|
|
222
|
+
# However, don't use that if it is single turn. The user selected a COT prompt, and we give explicit prompt selection priority over the tuned strategy.
|
|
223
|
+
tuned_chat_strategy = self.model_provider().tuned_chat_strategy
|
|
224
|
+
if tuned_chat_strategy and tuned_chat_strategy != ChatStrategy.single_turn:
|
|
225
|
+
return get_chat_formatter(
|
|
226
|
+
strategy=tuned_chat_strategy,
|
|
227
|
+
system_message=system_message,
|
|
228
|
+
user_input=input,
|
|
229
|
+
thinking_instructions=cot_prompt,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Pick the best chat strategy for the model given it has a cot prompt.
|
|
233
|
+
reasoning_capable = self.model_provider().reasoning_capable
|
|
234
|
+
if reasoning_capable:
|
|
235
|
+
# "Thinking" LLM designed to output thinking in a structured format. We'll use it's native format.
|
|
214
236
|
# A simple message with the COT prompt appended to the message list is sufficient
|
|
215
|
-
return
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
237
|
+
return get_chat_formatter(
|
|
238
|
+
strategy=ChatStrategy.single_turn_r1_thinking,
|
|
239
|
+
system_message=system_message,
|
|
240
|
+
user_input=input,
|
|
241
|
+
thinking_instructions=cot_prompt,
|
|
242
|
+
)
|
|
220
243
|
else:
|
|
221
|
-
|
|
244
|
+
# Unstructured output with COT
|
|
245
|
+
# Two calls to separate the thinking from the final response
|
|
246
|
+
return get_chat_formatter(
|
|
247
|
+
strategy=ChatStrategy.two_message_cot,
|
|
248
|
+
system_message=system_message,
|
|
249
|
+
user_input=input,
|
|
250
|
+
thinking_instructions=cot_prompt,
|
|
251
|
+
)
|
|
222
252
|
|
|
223
253
|
# create a run and task output
|
|
224
254
|
def generate_run(
|
|
@@ -272,5 +302,22 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
272
302
|
props["model_name"] = self.run_config.model_name
|
|
273
303
|
props["model_provider"] = self.run_config.model_provider_name
|
|
274
304
|
props["prompt_id"] = self.run_config.prompt_id
|
|
305
|
+
props["structured_output_mode"] = self.run_config.structured_output_mode
|
|
306
|
+
props["temperature"] = self.run_config.temperature
|
|
307
|
+
props["top_p"] = self.run_config.top_p
|
|
275
308
|
|
|
276
309
|
return props
|
|
310
|
+
|
|
311
|
+
def update_run_config_unknown_structured_output_mode(self) -> None:
|
|
312
|
+
structured_output_mode = self.run_config.structured_output_mode
|
|
313
|
+
|
|
314
|
+
# Old datamodels didn't save the structured output mode. Some clients (tests, end users) might not set it.
|
|
315
|
+
# Look up our recommended mode from ml_model_list if we have one
|
|
316
|
+
if structured_output_mode == StructuredOutputMode.unknown:
|
|
317
|
+
new_run_config = self.run_config.model_copy(deep=True)
|
|
318
|
+
structured_output_mode = default_structured_output_mode_for_model_provider(
|
|
319
|
+
self.run_config.model_name,
|
|
320
|
+
self.run_config.model_provider_name,
|
|
321
|
+
)
|
|
322
|
+
new_run_config.structured_output_mode = structured_output_mode
|
|
323
|
+
self.run_config = new_run_config
|