kiln-ai 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kiln_ai/adapters/chat/chat_formatter.py +0 -1
- kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
- kiln_ai/adapters/data_gen/data_gen_task.py +49 -36
- kiln_ai/adapters/data_gen/test_data_gen_task.py +311 -34
- kiln_ai/adapters/eval/base_eval.py +6 -7
- kiln_ai/adapters/eval/eval_runner.py +5 -1
- kiln_ai/adapters/eval/g_eval.py +17 -12
- kiln_ai/adapters/eval/test_base_eval.py +8 -2
- kiln_ai/adapters/eval/test_g_eval.py +115 -5
- kiln_ai/adapters/fine_tune/base_finetune.py +1 -6
- kiln_ai/adapters/fine_tune/dataset_formatter.py +1 -5
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +1 -1
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +2 -7
- kiln_ai/adapters/fine_tune/together_finetune.py +1 -1
- kiln_ai/adapters/ml_model_list.py +293 -44
- kiln_ai/adapters/model_adapters/litellm_adapter.py +9 -0
- kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -1
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +48 -0
- kiln_ai/adapters/model_adapters/test_structured_output.py +3 -3
- kiln_ai/adapters/parsers/parser_registry.py +0 -2
- kiln_ai/adapters/parsers/r1_parser.py +0 -1
- kiln_ai/adapters/remote_config.py +66 -0
- kiln_ai/adapters/repair/repair_task.py +1 -6
- kiln_ai/adapters/test_ml_model_list.py +18 -0
- kiln_ai/adapters/test_prompt_adaptors.py +0 -4
- kiln_ai/adapters/test_remote_config.py +100 -0
- kiln_ai/datamodel/eval.py +32 -0
- kiln_ai/datamodel/finetune.py +0 -1
- kiln_ai/datamodel/task_output.py +0 -2
- kiln_ai/datamodel/task_run.py +0 -2
- kiln_ai/datamodel/test_eval_model.py +146 -4
- kiln_ai/utils/logging.py +4 -3
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.18.0.dist-info}/METADATA +2 -2
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.18.0.dist-info}/RECORD +36 -34
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.18.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.18.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import List, Literal
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
@@ -33,6 +33,7 @@ class ModelFamily(str, Enum):
|
|
|
33
33
|
deepseek = "deepseek"
|
|
34
34
|
dolphin = "dolphin"
|
|
35
35
|
grok = "grok"
|
|
36
|
+
kimi = "kimi"
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
# Where models have instruct and raw versions, instruct is default and raw is specified
|
|
@@ -50,6 +51,8 @@ class ModelName(str, Enum):
|
|
|
50
51
|
llama_3_2_11b = "llama_3_2_11b"
|
|
51
52
|
llama_3_2_90b = "llama_3_2_90b"
|
|
52
53
|
llama_3_3_70b = "llama_3_3_70b"
|
|
54
|
+
llama_4_maverick = "llama_4_maverick"
|
|
55
|
+
llama_4_scout = "llama_4_scout"
|
|
53
56
|
gpt_4o_mini = "gpt_4o_mini"
|
|
54
57
|
gpt_4o = "gpt_4o"
|
|
55
58
|
gpt_4_1 = "gpt_4_1"
|
|
@@ -73,6 +76,9 @@ class ModelName(str, Enum):
|
|
|
73
76
|
phi_4_mini = "phi_4_mini"
|
|
74
77
|
mistral_large = "mistral_large"
|
|
75
78
|
mistral_nemo = "mistral_nemo"
|
|
79
|
+
mistral_small_3 = "mistral_small_3"
|
|
80
|
+
magistral_medium = "magistral_medium"
|
|
81
|
+
magistral_medium_thinking = "magistral_medium_thinking"
|
|
76
82
|
gemma_2_2b = "gemma_2_2b"
|
|
77
83
|
gemma_2_9b = "gemma_2_9b"
|
|
78
84
|
gemma_2_27b = "gemma_2_27b"
|
|
@@ -80,6 +86,8 @@ class ModelName(str, Enum):
|
|
|
80
86
|
gemma_3_4b = "gemma_3_4b"
|
|
81
87
|
gemma_3_12b = "gemma_3_12b"
|
|
82
88
|
gemma_3_27b = "gemma_3_27b"
|
|
89
|
+
gemma_3n_2b = "gemma_3n_2b"
|
|
90
|
+
gemma_3n_4b = "gemma_3n_4b"
|
|
83
91
|
claude_3_5_haiku = "claude_3_5_haiku"
|
|
84
92
|
claude_3_5_sonnet = "claude_3_5_sonnet"
|
|
85
93
|
claude_3_7_sonnet = "claude_3_7_sonnet"
|
|
@@ -93,6 +101,7 @@ class ModelName(str, Enum):
|
|
|
93
101
|
gemini_2_0_flash_lite = "gemini_2_0_flash_lite"
|
|
94
102
|
gemini_2_5_pro = "gemini_2_5_pro"
|
|
95
103
|
gemini_2_5_flash = "gemini_2_5_flash"
|
|
104
|
+
gemini_2_5_flash_lite = "gemini_2_5_flash_lite"
|
|
96
105
|
nemotron_70b = "nemotron_70b"
|
|
97
106
|
mixtral_8x7b = "mixtral_8x7b"
|
|
98
107
|
qwen_2p5_7b = "qwen_2p5_7b"
|
|
@@ -101,7 +110,7 @@ class ModelName(str, Enum):
|
|
|
101
110
|
qwq_32b = "qwq_32b"
|
|
102
111
|
deepseek_3 = "deepseek_3"
|
|
103
112
|
deepseek_r1 = "deepseek_r1"
|
|
104
|
-
|
|
113
|
+
deepseek_r1_0528 = "deepseek_r1_0528"
|
|
105
114
|
deepseek_r1_distill_qwen_32b = "deepseek_r1_distill_qwen_32b"
|
|
106
115
|
deepseek_r1_distill_llama_70b = "deepseek_r1_distill_llama_70b"
|
|
107
116
|
deepseek_r1_distill_qwen_14b = "deepseek_r1_distill_qwen_14b"
|
|
@@ -110,6 +119,9 @@ class ModelName(str, Enum):
|
|
|
110
119
|
deepseek_r1_distill_llama_8b = "deepseek_r1_distill_llama_8b"
|
|
111
120
|
dolphin_2_9_8x22b = "dolphin_2_9_8x22b"
|
|
112
121
|
grok_2 = "grok_2"
|
|
122
|
+
grok_3 = "grok_3"
|
|
123
|
+
grok_3_mini = "grok_3_mini"
|
|
124
|
+
grok_4 = "grok_4"
|
|
113
125
|
qwen_3_0p6b = "qwen_3_0p6b"
|
|
114
126
|
qwen_3_0p6b_no_thinking = "qwen_3_0p6b_no_thinking"
|
|
115
127
|
qwen_3_1p7b = "qwen_3_1p7b"
|
|
@@ -126,6 +138,7 @@ class ModelName(str, Enum):
|
|
|
126
138
|
qwen_3_32b_no_thinking = "qwen_3_32b_no_thinking"
|
|
127
139
|
qwen_3_235b_a22b = "qwen_3_235b_a22b"
|
|
128
140
|
qwen_3_235b_a22b_no_thinking = "qwen_3_235b_a22b_no_thinking"
|
|
141
|
+
kimi_k2 = "kimi_k2"
|
|
129
142
|
|
|
130
143
|
|
|
131
144
|
class ModelParserID(str, Enum):
|
|
@@ -174,6 +187,8 @@ class KilnModelProvider(BaseModel):
|
|
|
174
187
|
reasoning_capable: bool = False
|
|
175
188
|
supports_logprobs: bool = False
|
|
176
189
|
suggested_for_evals: bool = False
|
|
190
|
+
uncensored: bool = False
|
|
191
|
+
suggested_for_uncensored_data_gen: bool = False
|
|
177
192
|
tuned_chat_strategy: ChatStrategy | None = None
|
|
178
193
|
|
|
179
194
|
# TODO P1: Need a more generalized way to handle custom provider parameters.
|
|
@@ -250,22 +265,16 @@ built_in_models: List[KilnModel] = [
|
|
|
250
265
|
provider_finetune_id="gpt-4.1-mini-2025-04-14",
|
|
251
266
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
252
267
|
supports_logprobs=True,
|
|
253
|
-
suggested_for_data_gen=True,
|
|
254
|
-
suggested_for_evals=True,
|
|
255
268
|
),
|
|
256
269
|
KilnModelProvider(
|
|
257
270
|
name=ModelProviderName.openrouter,
|
|
258
271
|
model_id="openai/gpt-4.1-mini",
|
|
259
272
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
260
273
|
supports_logprobs=True,
|
|
261
|
-
suggested_for_data_gen=True,
|
|
262
|
-
suggested_for_evals=True,
|
|
263
274
|
),
|
|
264
275
|
KilnModelProvider(
|
|
265
276
|
name=ModelProviderName.azure_openai,
|
|
266
277
|
model_id="gpt-4.1-mini",
|
|
267
|
-
suggested_for_data_gen=True,
|
|
268
|
-
suggested_for_evals=True,
|
|
269
278
|
),
|
|
270
279
|
],
|
|
271
280
|
),
|
|
@@ -305,6 +314,8 @@ built_in_models: List[KilnModel] = [
|
|
|
305
314
|
provider_finetune_id="gpt-4o-2024-08-06",
|
|
306
315
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
307
316
|
supports_logprobs=True,
|
|
317
|
+
suggested_for_data_gen=True,
|
|
318
|
+
suggested_for_evals=True,
|
|
308
319
|
),
|
|
309
320
|
KilnModelProvider(
|
|
310
321
|
name=ModelProviderName.openrouter,
|
|
@@ -312,10 +323,14 @@ built_in_models: List[KilnModel] = [
|
|
|
312
323
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
313
324
|
supports_logprobs=True,
|
|
314
325
|
logprobs_openrouter_options=True,
|
|
326
|
+
suggested_for_data_gen=True,
|
|
327
|
+
suggested_for_evals=True,
|
|
315
328
|
),
|
|
316
329
|
KilnModelProvider(
|
|
317
330
|
name=ModelProviderName.azure_openai,
|
|
318
331
|
model_id="gpt-4o",
|
|
332
|
+
suggested_for_data_gen=True,
|
|
333
|
+
suggested_for_evals=True,
|
|
319
334
|
),
|
|
320
335
|
],
|
|
321
336
|
),
|
|
@@ -651,13 +666,11 @@ built_in_models: List[KilnModel] = [
|
|
|
651
666
|
name=ModelProviderName.openrouter,
|
|
652
667
|
structured_output_mode=StructuredOutputMode.function_calling,
|
|
653
668
|
model_id="anthropic/claude-3.7-sonnet",
|
|
654
|
-
suggested_for_data_gen=True,
|
|
655
669
|
),
|
|
656
670
|
KilnModelProvider(
|
|
657
671
|
name=ModelProviderName.anthropic,
|
|
658
672
|
model_id="claude-3-7-sonnet-20250219",
|
|
659
673
|
structured_output_mode=StructuredOutputMode.function_calling,
|
|
660
|
-
suggested_for_data_gen=True,
|
|
661
674
|
),
|
|
662
675
|
],
|
|
663
676
|
),
|
|
@@ -732,24 +745,31 @@ built_in_models: List[KilnModel] = [
|
|
|
732
745
|
providers=[
|
|
733
746
|
KilnModelProvider(
|
|
734
747
|
name=ModelProviderName.openrouter,
|
|
735
|
-
model_id="google/gemini-2.5-pro
|
|
748
|
+
model_id="google/gemini-2.5-pro",
|
|
736
749
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
737
750
|
suggested_for_data_gen=True,
|
|
738
751
|
suggested_for_evals=True,
|
|
752
|
+
reasoning_capable=True,
|
|
739
753
|
),
|
|
740
754
|
KilnModelProvider(
|
|
741
755
|
name=ModelProviderName.gemini_api,
|
|
742
|
-
model_id="gemini-2.5-pro
|
|
756
|
+
model_id="gemini-2.5-pro",
|
|
743
757
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
744
758
|
suggested_for_data_gen=True,
|
|
745
759
|
suggested_for_evals=True,
|
|
760
|
+
# TODO: Gemini API doesn't return reasoning here, so we don't ask for it. Strange.
|
|
761
|
+
# reasoning_capable=True,
|
|
762
|
+
# thinking_level="medium",
|
|
746
763
|
),
|
|
747
764
|
KilnModelProvider(
|
|
748
765
|
name=ModelProviderName.vertex,
|
|
749
|
-
model_id="gemini-2.5-pro
|
|
766
|
+
model_id="gemini-2.5-pro",
|
|
750
767
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
751
768
|
suggested_for_data_gen=True,
|
|
752
769
|
suggested_for_evals=True,
|
|
770
|
+
# TODO: Vertex doesn't return reasoning here, so we don't ask for it. Strange.
|
|
771
|
+
# reasoning_capable=True,
|
|
772
|
+
# thinking_level="medium",
|
|
753
773
|
),
|
|
754
774
|
],
|
|
755
775
|
),
|
|
@@ -761,18 +781,23 @@ built_in_models: List[KilnModel] = [
|
|
|
761
781
|
providers=[
|
|
762
782
|
KilnModelProvider(
|
|
763
783
|
name=ModelProviderName.openrouter,
|
|
764
|
-
model_id="google/gemini-2.5-flash
|
|
784
|
+
model_id="google/gemini-2.5-flash",
|
|
765
785
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
786
|
+
reasoning_capable=True,
|
|
766
787
|
),
|
|
767
788
|
KilnModelProvider(
|
|
768
789
|
name=ModelProviderName.gemini_api,
|
|
769
|
-
model_id="gemini-2.5-flash
|
|
790
|
+
model_id="gemini-2.5-flash",
|
|
770
791
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
792
|
+
reasoning_capable=True,
|
|
793
|
+
thinking_level="medium",
|
|
771
794
|
),
|
|
772
795
|
KilnModelProvider(
|
|
773
796
|
name=ModelProviderName.vertex,
|
|
774
|
-
model_id="gemini-2.5-flash
|
|
797
|
+
model_id="gemini-2.5-flash",
|
|
775
798
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
799
|
+
reasoning_capable=True,
|
|
800
|
+
thinking_level="medium",
|
|
776
801
|
),
|
|
777
802
|
],
|
|
778
803
|
),
|
|
@@ -904,6 +929,52 @@ built_in_models: List[KilnModel] = [
|
|
|
904
929
|
),
|
|
905
930
|
],
|
|
906
931
|
),
|
|
932
|
+
# Llama 4 Maverick Basic
|
|
933
|
+
KilnModel(
|
|
934
|
+
family=ModelFamily.llama,
|
|
935
|
+
name=ModelName.llama_4_maverick,
|
|
936
|
+
friendly_name="Llama 4 Maverick",
|
|
937
|
+
providers=[
|
|
938
|
+
KilnModelProvider(
|
|
939
|
+
name=ModelProviderName.openrouter,
|
|
940
|
+
model_id="meta-llama/llama-4-maverick",
|
|
941
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
942
|
+
),
|
|
943
|
+
KilnModelProvider(
|
|
944
|
+
name=ModelProviderName.fireworks_ai,
|
|
945
|
+
model_id="accounts/fireworks/models/llama4-maverick-instruct-basic",
|
|
946
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
947
|
+
),
|
|
948
|
+
KilnModelProvider(
|
|
949
|
+
name=ModelProviderName.together_ai,
|
|
950
|
+
model_id="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
951
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
952
|
+
),
|
|
953
|
+
],
|
|
954
|
+
),
|
|
955
|
+
# Llama 4 Scout Basic
|
|
956
|
+
KilnModel(
|
|
957
|
+
family=ModelFamily.llama,
|
|
958
|
+
name=ModelName.llama_4_scout,
|
|
959
|
+
friendly_name="Llama 4 Scout",
|
|
960
|
+
providers=[
|
|
961
|
+
KilnModelProvider(
|
|
962
|
+
name=ModelProviderName.openrouter,
|
|
963
|
+
model_id="meta-llama/llama-4-scout",
|
|
964
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
965
|
+
),
|
|
966
|
+
KilnModelProvider(
|
|
967
|
+
name=ModelProviderName.fireworks_ai,
|
|
968
|
+
model_id="accounts/fireworks/models/llama4-scout-instruct-basic",
|
|
969
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
970
|
+
),
|
|
971
|
+
KilnModelProvider(
|
|
972
|
+
name=ModelProviderName.together_ai,
|
|
973
|
+
model_id="meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
974
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
975
|
+
),
|
|
976
|
+
],
|
|
977
|
+
),
|
|
907
978
|
# Llama 3.1-8b
|
|
908
979
|
KilnModel(
|
|
909
980
|
family=ModelFamily.llama,
|
|
@@ -1024,6 +1095,33 @@ built_in_models: List[KilnModel] = [
|
|
|
1024
1095
|
),
|
|
1025
1096
|
],
|
|
1026
1097
|
),
|
|
1098
|
+
# Magistral Medium (Thinking)
|
|
1099
|
+
KilnModel(
|
|
1100
|
+
family=ModelFamily.mistral,
|
|
1101
|
+
name=ModelName.magistral_medium_thinking,
|
|
1102
|
+
friendly_name="Magistral Medium (Thinking)",
|
|
1103
|
+
providers=[
|
|
1104
|
+
KilnModelProvider(
|
|
1105
|
+
name=ModelProviderName.openrouter,
|
|
1106
|
+
model_id="mistralai/magistral-medium-2506:thinking",
|
|
1107
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
1108
|
+
# Thinking tokens are hidden by Mistral so not "reasoning" from Kiln API POV
|
|
1109
|
+
),
|
|
1110
|
+
],
|
|
1111
|
+
),
|
|
1112
|
+
# Magistral Medium (No Thinking)
|
|
1113
|
+
KilnModel(
|
|
1114
|
+
family=ModelFamily.mistral,
|
|
1115
|
+
name=ModelName.magistral_medium,
|
|
1116
|
+
friendly_name="Magistral Medium (No Thinking)",
|
|
1117
|
+
providers=[
|
|
1118
|
+
KilnModelProvider(
|
|
1119
|
+
name=ModelProviderName.openrouter,
|
|
1120
|
+
model_id="mistralai/magistral-medium-2506",
|
|
1121
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
1122
|
+
),
|
|
1123
|
+
],
|
|
1124
|
+
),
|
|
1027
1125
|
# Mistral Nemo
|
|
1028
1126
|
KilnModel(
|
|
1029
1127
|
family=ModelFamily.mistral,
|
|
@@ -1047,16 +1145,22 @@ built_in_models: List[KilnModel] = [
|
|
|
1047
1145
|
name=ModelProviderName.amazon_bedrock,
|
|
1048
1146
|
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1049
1147
|
model_id="mistral.mistral-large-2407-v1:0",
|
|
1148
|
+
uncensored=True,
|
|
1149
|
+
suggested_for_uncensored_data_gen=True,
|
|
1050
1150
|
),
|
|
1051
1151
|
KilnModelProvider(
|
|
1052
1152
|
name=ModelProviderName.openrouter,
|
|
1053
1153
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
1054
1154
|
model_id="mistralai/mistral-large",
|
|
1155
|
+
uncensored=True,
|
|
1156
|
+
suggested_for_uncensored_data_gen=True,
|
|
1055
1157
|
),
|
|
1056
1158
|
KilnModelProvider(
|
|
1057
1159
|
name=ModelProviderName.ollama,
|
|
1058
1160
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
1059
1161
|
model_id="mistral-large",
|
|
1162
|
+
uncensored=True,
|
|
1163
|
+
suggested_for_uncensored_data_gen=True,
|
|
1060
1164
|
),
|
|
1061
1165
|
],
|
|
1062
1166
|
),
|
|
@@ -1085,12 +1189,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1085
1189
|
supports_data_gen=False,
|
|
1086
1190
|
model_id="llama3.2:1b",
|
|
1087
1191
|
),
|
|
1088
|
-
KilnModelProvider(
|
|
1089
|
-
name=ModelProviderName.huggingface,
|
|
1090
|
-
model_id="meta-llama/Llama-3.2-1B-Instruct",
|
|
1091
|
-
supports_structured_output=False,
|
|
1092
|
-
supports_data_gen=False,
|
|
1093
|
-
),
|
|
1094
1192
|
],
|
|
1095
1193
|
),
|
|
1096
1194
|
# Llama 3.2 3B
|
|
@@ -1116,12 +1214,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1116
1214
|
supports_data_gen=False,
|
|
1117
1215
|
model_id="llama3.2",
|
|
1118
1216
|
),
|
|
1119
|
-
KilnModelProvider(
|
|
1120
|
-
name=ModelProviderName.huggingface,
|
|
1121
|
-
model_id="meta-llama/Llama-3.2-3B-Instruct",
|
|
1122
|
-
supports_structured_output=False,
|
|
1123
|
-
supports_data_gen=False,
|
|
1124
|
-
),
|
|
1125
1217
|
KilnModelProvider(
|
|
1126
1218
|
name=ModelProviderName.together_ai,
|
|
1127
1219
|
model_id="meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
|
@@ -1450,6 +1542,52 @@ built_in_models: List[KilnModel] = [
|
|
|
1450
1542
|
),
|
|
1451
1543
|
],
|
|
1452
1544
|
),
|
|
1545
|
+
# Gemma 3n 2B
|
|
1546
|
+
KilnModel(
|
|
1547
|
+
family=ModelFamily.gemma,
|
|
1548
|
+
name=ModelName.gemma_3n_2b,
|
|
1549
|
+
friendly_name="Gemma 3n 2B",
|
|
1550
|
+
providers=[
|
|
1551
|
+
KilnModelProvider(
|
|
1552
|
+
name=ModelProviderName.ollama,
|
|
1553
|
+
model_id="gemma3n:e2b",
|
|
1554
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
1555
|
+
supports_data_gen=False,
|
|
1556
|
+
),
|
|
1557
|
+
KilnModelProvider(
|
|
1558
|
+
name=ModelProviderName.gemini_api,
|
|
1559
|
+
model_id="gemma-3n-e2b-it",
|
|
1560
|
+
supports_structured_output=False,
|
|
1561
|
+
supports_data_gen=False,
|
|
1562
|
+
),
|
|
1563
|
+
],
|
|
1564
|
+
),
|
|
1565
|
+
# Gemma 3n 4B
|
|
1566
|
+
KilnModel(
|
|
1567
|
+
family=ModelFamily.gemma,
|
|
1568
|
+
name=ModelName.gemma_3n_4b,
|
|
1569
|
+
friendly_name="Gemma 3n 4B",
|
|
1570
|
+
providers=[
|
|
1571
|
+
KilnModelProvider(
|
|
1572
|
+
name=ModelProviderName.openrouter,
|
|
1573
|
+
model_id="google/gemma-3n-e4b-it",
|
|
1574
|
+
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
|
|
1575
|
+
supports_data_gen=False,
|
|
1576
|
+
),
|
|
1577
|
+
KilnModelProvider(
|
|
1578
|
+
name=ModelProviderName.ollama,
|
|
1579
|
+
model_id="gemma3n:e4b",
|
|
1580
|
+
supports_data_gen=False,
|
|
1581
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
1582
|
+
),
|
|
1583
|
+
KilnModelProvider(
|
|
1584
|
+
name=ModelProviderName.gemini_api,
|
|
1585
|
+
model_id="gemma-3n-e4b-it",
|
|
1586
|
+
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1587
|
+
supports_data_gen=False,
|
|
1588
|
+
),
|
|
1589
|
+
],
|
|
1590
|
+
),
|
|
1453
1591
|
# Mixtral 8x7B
|
|
1454
1592
|
KilnModel(
|
|
1455
1593
|
family=ModelFamily.mixtral,
|
|
@@ -1587,10 +1725,46 @@ built_in_models: List[KilnModel] = [
|
|
|
1587
1725
|
name=ModelProviderName.openrouter,
|
|
1588
1726
|
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
|
|
1589
1727
|
model_id="mistralai/mistral-small-24b-instruct-2501",
|
|
1728
|
+
uncensored=True,
|
|
1590
1729
|
),
|
|
1591
1730
|
KilnModelProvider(
|
|
1592
1731
|
name=ModelProviderName.ollama,
|
|
1593
1732
|
model_id="mistral-small:24b",
|
|
1733
|
+
uncensored=True,
|
|
1734
|
+
),
|
|
1735
|
+
],
|
|
1736
|
+
),
|
|
1737
|
+
# DeepSeek R1 0528
|
|
1738
|
+
KilnModel(
|
|
1739
|
+
family=ModelFamily.deepseek,
|
|
1740
|
+
name=ModelName.deepseek_r1_0528,
|
|
1741
|
+
friendly_name="DeepSeek R1 0528",
|
|
1742
|
+
providers=[
|
|
1743
|
+
KilnModelProvider(
|
|
1744
|
+
name=ModelProviderName.openrouter,
|
|
1745
|
+
model_id="deepseek/deepseek-r1-0528",
|
|
1746
|
+
parser=ModelParserID.r1_thinking,
|
|
1747
|
+
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1748
|
+
reasoning_capable=True,
|
|
1749
|
+
r1_openrouter_options=True,
|
|
1750
|
+
require_openrouter_reasoning=True,
|
|
1751
|
+
supports_data_gen=True,
|
|
1752
|
+
),
|
|
1753
|
+
KilnModelProvider(
|
|
1754
|
+
name=ModelProviderName.fireworks_ai,
|
|
1755
|
+
model_id="accounts/fireworks/models/deepseek-r1-0528",
|
|
1756
|
+
parser=ModelParserID.r1_thinking,
|
|
1757
|
+
supports_data_gen=True,
|
|
1758
|
+
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1759
|
+
reasoning_capable=True,
|
|
1760
|
+
),
|
|
1761
|
+
KilnModelProvider(
|
|
1762
|
+
name=ModelProviderName.together_ai,
|
|
1763
|
+
model_id="deepseek-ai/DeepSeek-R1", # Note: Together remapped the R1 endpoint to this 0528 model
|
|
1764
|
+
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1765
|
+
parser=ModelParserID.r1_thinking,
|
|
1766
|
+
reasoning_capable=True,
|
|
1767
|
+
supports_data_gen=True,
|
|
1594
1768
|
),
|
|
1595
1769
|
],
|
|
1596
1770
|
),
|
|
@@ -1623,7 +1797,7 @@ built_in_models: List[KilnModel] = [
|
|
|
1623
1797
|
KilnModel(
|
|
1624
1798
|
family=ModelFamily.deepseek,
|
|
1625
1799
|
name=ModelName.deepseek_r1,
|
|
1626
|
-
friendly_name="DeepSeek R1",
|
|
1800
|
+
friendly_name="DeepSeek R1 (Original)",
|
|
1627
1801
|
providers=[
|
|
1628
1802
|
KilnModelProvider(
|
|
1629
1803
|
name=ModelProviderName.openrouter,
|
|
@@ -1633,8 +1807,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1633
1807
|
reasoning_capable=True,
|
|
1634
1808
|
r1_openrouter_options=True,
|
|
1635
1809
|
require_openrouter_reasoning=True,
|
|
1636
|
-
suggested_for_data_gen=True,
|
|
1637
|
-
suggested_for_evals=True,
|
|
1638
1810
|
),
|
|
1639
1811
|
KilnModelProvider(
|
|
1640
1812
|
name=ModelProviderName.fireworks_ai,
|
|
@@ -1642,8 +1814,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1642
1814
|
parser=ModelParserID.r1_thinking,
|
|
1643
1815
|
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1644
1816
|
reasoning_capable=True,
|
|
1645
|
-
suggested_for_data_gen=True,
|
|
1646
|
-
suggested_for_evals=True,
|
|
1647
1817
|
),
|
|
1648
1818
|
KilnModelProvider(
|
|
1649
1819
|
# I want your RAM
|
|
@@ -1652,17 +1822,6 @@ built_in_models: List[KilnModel] = [
|
|
|
1652
1822
|
parser=ModelParserID.r1_thinking,
|
|
1653
1823
|
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1654
1824
|
reasoning_capable=True,
|
|
1655
|
-
suggested_for_data_gen=True,
|
|
1656
|
-
suggested_for_evals=True,
|
|
1657
|
-
),
|
|
1658
|
-
KilnModelProvider(
|
|
1659
|
-
name=ModelProviderName.together_ai,
|
|
1660
|
-
model_id="deepseek-ai/DeepSeek-R1",
|
|
1661
|
-
structured_output_mode=StructuredOutputMode.json_instructions,
|
|
1662
|
-
parser=ModelParserID.r1_thinking,
|
|
1663
|
-
reasoning_capable=True,
|
|
1664
|
-
suggested_for_data_gen=True,
|
|
1665
|
-
suggested_for_evals=True,
|
|
1666
1825
|
),
|
|
1667
1826
|
],
|
|
1668
1827
|
),
|
|
@@ -1858,12 +2017,67 @@ built_in_models: List[KilnModel] = [
|
|
|
1858
2017
|
structured_output_mode=StructuredOutputMode.json_schema,
|
|
1859
2018
|
supports_data_gen=True,
|
|
1860
2019
|
model_id="dolphin-mixtral:8x22b",
|
|
2020
|
+
uncensored=True,
|
|
2021
|
+
suggested_for_uncensored_data_gen=True,
|
|
1861
2022
|
),
|
|
1862
2023
|
KilnModelProvider(
|
|
1863
2024
|
name=ModelProviderName.openrouter,
|
|
1864
2025
|
supports_data_gen=True,
|
|
1865
2026
|
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
|
|
1866
2027
|
model_id="cognitivecomputations/dolphin-mixtral-8x22b",
|
|
2028
|
+
uncensored=True,
|
|
2029
|
+
suggested_for_uncensored_data_gen=True,
|
|
2030
|
+
),
|
|
2031
|
+
],
|
|
2032
|
+
),
|
|
2033
|
+
# Grok 4
|
|
2034
|
+
KilnModel(
|
|
2035
|
+
family=ModelFamily.grok,
|
|
2036
|
+
name=ModelName.grok_4,
|
|
2037
|
+
friendly_name="Grok 4",
|
|
2038
|
+
providers=[
|
|
2039
|
+
KilnModelProvider(
|
|
2040
|
+
name=ModelProviderName.openrouter,
|
|
2041
|
+
model_id="x-ai/grok-4",
|
|
2042
|
+
supports_structured_output=True,
|
|
2043
|
+
supports_data_gen=True,
|
|
2044
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
2045
|
+
suggested_for_data_gen=True,
|
|
2046
|
+
uncensored=True,
|
|
2047
|
+
suggested_for_uncensored_data_gen=True,
|
|
2048
|
+
),
|
|
2049
|
+
],
|
|
2050
|
+
),
|
|
2051
|
+
# Grok 3
|
|
2052
|
+
KilnModel(
|
|
2053
|
+
family=ModelFamily.grok,
|
|
2054
|
+
name=ModelName.grok_3,
|
|
2055
|
+
friendly_name="Grok 3",
|
|
2056
|
+
providers=[
|
|
2057
|
+
KilnModelProvider(
|
|
2058
|
+
name=ModelProviderName.openrouter,
|
|
2059
|
+
model_id="x-ai/grok-3",
|
|
2060
|
+
supports_structured_output=True,
|
|
2061
|
+
supports_data_gen=True,
|
|
2062
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
2063
|
+
suggested_for_data_gen=True,
|
|
2064
|
+
uncensored=True,
|
|
2065
|
+
),
|
|
2066
|
+
],
|
|
2067
|
+
),
|
|
2068
|
+
# Grok 3 Mini
|
|
2069
|
+
KilnModel(
|
|
2070
|
+
family=ModelFamily.grok,
|
|
2071
|
+
name=ModelName.grok_3_mini,
|
|
2072
|
+
friendly_name="Grok 3 Mini",
|
|
2073
|
+
providers=[
|
|
2074
|
+
KilnModelProvider(
|
|
2075
|
+
name=ModelProviderName.openrouter,
|
|
2076
|
+
model_id="x-ai/grok-3-mini",
|
|
2077
|
+
supports_structured_output=True,
|
|
2078
|
+
supports_data_gen=True,
|
|
2079
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
2080
|
+
uncensored=True,
|
|
1867
2081
|
),
|
|
1868
2082
|
],
|
|
1869
2083
|
),
|
|
@@ -2295,6 +2509,41 @@ built_in_models: List[KilnModel] = [
|
|
|
2295
2509
|
),
|
|
2296
2510
|
],
|
|
2297
2511
|
),
|
|
2512
|
+
# Kimi K2 Instruct
|
|
2513
|
+
KilnModel(
|
|
2514
|
+
family=ModelFamily.kimi,
|
|
2515
|
+
name=ModelName.kimi_k2,
|
|
2516
|
+
friendly_name="Kimi K2",
|
|
2517
|
+
providers=[
|
|
2518
|
+
KilnModelProvider(
|
|
2519
|
+
name=ModelProviderName.fireworks_ai,
|
|
2520
|
+
model_id="accounts/fireworks/models/kimi-k2-instruct",
|
|
2521
|
+
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
|
|
2522
|
+
# Ignoring json mode for now, so not suggested for evals
|
|
2523
|
+
),
|
|
2524
|
+
KilnModelProvider(
|
|
2525
|
+
name=ModelProviderName.openrouter,
|
|
2526
|
+
model_id="moonshotai/kimi-k2",
|
|
2527
|
+
structured_output_mode=StructuredOutputMode.json_schema,
|
|
2528
|
+
supports_data_gen=True,
|
|
2529
|
+
suggested_for_evals=True,
|
|
2530
|
+
),
|
|
2531
|
+
KilnModelProvider(
|
|
2532
|
+
name=ModelProviderName.together_ai,
|
|
2533
|
+
model_id="moonshotai/Kimi-K2-Instruct",
|
|
2534
|
+
supports_data_gen=True,
|
|
2535
|
+
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
|
|
2536
|
+
suggested_for_evals=True,
|
|
2537
|
+
),
|
|
2538
|
+
KilnModelProvider(
|
|
2539
|
+
name=ModelProviderName.groq,
|
|
2540
|
+
model_id="moonshotai/kimi-k2-instruct",
|
|
2541
|
+
supports_data_gen=True,
|
|
2542
|
+
structured_output_mode=StructuredOutputMode.function_calling,
|
|
2543
|
+
suggested_for_evals=True,
|
|
2544
|
+
),
|
|
2545
|
+
],
|
|
2546
|
+
),
|
|
2298
2547
|
]
|
|
2299
2548
|
|
|
2300
2549
|
|
|
@@ -251,6 +251,10 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
251
251
|
"exclude": False,
|
|
252
252
|
}
|
|
253
253
|
|
|
254
|
+
if provider.name == ModelProviderName.openrouter:
|
|
255
|
+
# Ask OpenRouter to include usage in the response (cost)
|
|
256
|
+
extra_body["usage"] = {"include": True}
|
|
257
|
+
|
|
254
258
|
if provider.anthropic_extended_thinking:
|
|
255
259
|
extra_body["thinking"] = {"type": "enabled", "budget_tokens": 4000}
|
|
256
260
|
|
|
@@ -386,7 +390,12 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
386
390
|
|
|
387
391
|
def usage_from_response(self, response: ModelResponse) -> Usage | None:
|
|
388
392
|
litellm_usage = response.get("usage", None)
|
|
393
|
+
|
|
394
|
+
# LiteLLM isn't consistent in how it returns the cost.
|
|
389
395
|
cost = response._hidden_params.get("response_cost", None)
|
|
396
|
+
if cost is None and litellm_usage:
|
|
397
|
+
cost = litellm_usage.get("cost", None)
|
|
398
|
+
|
|
390
399
|
if not litellm_usage and not cost:
|
|
391
400
|
return None
|
|
392
401
|
|
|
@@ -4,7 +4,6 @@ import pytest
|
|
|
4
4
|
|
|
5
5
|
from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
|
|
6
6
|
from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter, RunOutput
|
|
7
|
-
from kiln_ai.adapters.parsers.request_formatters import request_formatter_from_id
|
|
8
7
|
from kiln_ai.datamodel import Task
|
|
9
8
|
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
10
9
|
from kiln_ai.datamodel.task import RunConfig, RunConfigProperties
|
|
@@ -352,6 +352,43 @@ def test_litellm_model_id_unknown_provider(config, mock_task):
|
|
|
352
352
|
adapter.litellm_model_id()
|
|
353
353
|
|
|
354
354
|
|
|
355
|
+
@pytest.mark.parametrize(
|
|
356
|
+
"provider_name,expected_usage_param",
|
|
357
|
+
[
|
|
358
|
+
(ModelProviderName.openrouter, {"usage": {"include": True}}),
|
|
359
|
+
(ModelProviderName.openai, {}),
|
|
360
|
+
(ModelProviderName.anthropic, {}),
|
|
361
|
+
(ModelProviderName.groq, {}),
|
|
362
|
+
],
|
|
363
|
+
)
|
|
364
|
+
def test_build_extra_body_openrouter_usage(
|
|
365
|
+
config, mock_task, provider_name, expected_usage_param
|
|
366
|
+
):
|
|
367
|
+
"""Test build_extra_body includes usage parameter for OpenRouter providers"""
|
|
368
|
+
adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
|
|
369
|
+
|
|
370
|
+
# Create a mock provider with the specified name and minimal required attributes
|
|
371
|
+
mock_provider = Mock()
|
|
372
|
+
mock_provider.name = provider_name
|
|
373
|
+
mock_provider.thinking_level = None
|
|
374
|
+
mock_provider.require_openrouter_reasoning = False
|
|
375
|
+
mock_provider.anthropic_extended_thinking = False
|
|
376
|
+
mock_provider.r1_openrouter_options = False
|
|
377
|
+
mock_provider.logprobs_openrouter_options = False
|
|
378
|
+
mock_provider.openrouter_skip_required_parameters = False
|
|
379
|
+
|
|
380
|
+
# Call build_extra_body
|
|
381
|
+
extra_body = adapter.build_extra_body(mock_provider)
|
|
382
|
+
|
|
383
|
+
# Verify the usage parameter is included only for OpenRouter
|
|
384
|
+
for key, value in expected_usage_param.items():
|
|
385
|
+
assert extra_body.get(key) == value
|
|
386
|
+
|
|
387
|
+
# Verify non-OpenRouter providers don't have the usage parameter
|
|
388
|
+
if provider_name != ModelProviderName.openrouter:
|
|
389
|
+
assert "usage" not in extra_body
|
|
390
|
+
|
|
391
|
+
|
|
355
392
|
@pytest.mark.asyncio
|
|
356
393
|
async def test_build_completion_kwargs_custom_temperature_top_p(config, mock_task):
|
|
357
394
|
"""Test build_completion_kwargs with custom temperature and top_p values"""
|
|
@@ -474,6 +511,17 @@ async def test_build_completion_kwargs(
|
|
|
474
511
|
({"prompt_tokens": 10}, None, None),
|
|
475
512
|
# Invalid cost type (should be ignored)
|
|
476
513
|
(None, "0.5", None),
|
|
514
|
+
# Cost in OpenRouter format
|
|
515
|
+
(
|
|
516
|
+
litellm.types.utils.Usage(
|
|
517
|
+
prompt_tokens=10,
|
|
518
|
+
completion_tokens=20,
|
|
519
|
+
total_tokens=30,
|
|
520
|
+
cost=0.5,
|
|
521
|
+
),
|
|
522
|
+
None,
|
|
523
|
+
Usage(input_tokens=10, output_tokens=20, total_tokens=30, cost=0.5),
|
|
524
|
+
),
|
|
477
525
|
],
|
|
478
526
|
)
|
|
479
527
|
def test_usage_from_response(config, mock_task, litellm_usage, cost, expected_usage):
|