kiln-ai 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. kiln_ai/adapters/chat/chat_formatter.py +0 -1
  2. kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
  3. kiln_ai/adapters/data_gen/data_gen_task.py +49 -36
  4. kiln_ai/adapters/data_gen/test_data_gen_task.py +311 -34
  5. kiln_ai/adapters/eval/base_eval.py +6 -7
  6. kiln_ai/adapters/eval/eval_runner.py +5 -1
  7. kiln_ai/adapters/eval/g_eval.py +17 -12
  8. kiln_ai/adapters/eval/test_base_eval.py +8 -2
  9. kiln_ai/adapters/eval/test_g_eval.py +115 -5
  10. kiln_ai/adapters/fine_tune/base_finetune.py +1 -6
  11. kiln_ai/adapters/fine_tune/dataset_formatter.py +1 -5
  12. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +1 -1
  13. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +2 -7
  14. kiln_ai/adapters/fine_tune/together_finetune.py +1 -1
  15. kiln_ai/adapters/ml_model_list.py +293 -44
  16. kiln_ai/adapters/model_adapters/litellm_adapter.py +9 -0
  17. kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -1
  18. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +48 -0
  19. kiln_ai/adapters/model_adapters/test_structured_output.py +3 -3
  20. kiln_ai/adapters/parsers/parser_registry.py +0 -2
  21. kiln_ai/adapters/parsers/r1_parser.py +0 -1
  22. kiln_ai/adapters/remote_config.py +66 -0
  23. kiln_ai/adapters/repair/repair_task.py +1 -6
  24. kiln_ai/adapters/test_ml_model_list.py +18 -0
  25. kiln_ai/adapters/test_prompt_adaptors.py +0 -4
  26. kiln_ai/adapters/test_remote_config.py +100 -0
  27. kiln_ai/datamodel/eval.py +32 -0
  28. kiln_ai/datamodel/finetune.py +0 -1
  29. kiln_ai/datamodel/task_output.py +0 -2
  30. kiln_ai/datamodel/task_run.py +0 -2
  31. kiln_ai/datamodel/test_eval_model.py +146 -4
  32. kiln_ai/utils/logging.py +4 -3
  33. {kiln_ai-0.17.0.dist-info → kiln_ai-0.18.0.dist-info}/METADATA +2 -2
  34. {kiln_ai-0.17.0.dist-info → kiln_ai-0.18.0.dist-info}/RECORD +36 -34
  35. {kiln_ai-0.17.0.dist-info → kiln_ai-0.18.0.dist-info}/WHEEL +0 -0
  36. {kiln_ai-0.17.0.dist-info → kiln_ai-0.18.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from enum import Enum
2
- from typing import Dict, List, Literal
2
+ from typing import List, Literal
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
@@ -33,6 +33,7 @@ class ModelFamily(str, Enum):
33
33
  deepseek = "deepseek"
34
34
  dolphin = "dolphin"
35
35
  grok = "grok"
36
+ kimi = "kimi"
36
37
 
37
38
 
38
39
  # Where models have instruct and raw versions, instruct is default and raw is specified
@@ -50,6 +51,8 @@ class ModelName(str, Enum):
50
51
  llama_3_2_11b = "llama_3_2_11b"
51
52
  llama_3_2_90b = "llama_3_2_90b"
52
53
  llama_3_3_70b = "llama_3_3_70b"
54
+ llama_4_maverick = "llama_4_maverick"
55
+ llama_4_scout = "llama_4_scout"
53
56
  gpt_4o_mini = "gpt_4o_mini"
54
57
  gpt_4o = "gpt_4o"
55
58
  gpt_4_1 = "gpt_4_1"
@@ -73,6 +76,9 @@ class ModelName(str, Enum):
73
76
  phi_4_mini = "phi_4_mini"
74
77
  mistral_large = "mistral_large"
75
78
  mistral_nemo = "mistral_nemo"
79
+ mistral_small_3 = "mistral_small_3"
80
+ magistral_medium = "magistral_medium"
81
+ magistral_medium_thinking = "magistral_medium_thinking"
76
82
  gemma_2_2b = "gemma_2_2b"
77
83
  gemma_2_9b = "gemma_2_9b"
78
84
  gemma_2_27b = "gemma_2_27b"
@@ -80,6 +86,8 @@ class ModelName(str, Enum):
80
86
  gemma_3_4b = "gemma_3_4b"
81
87
  gemma_3_12b = "gemma_3_12b"
82
88
  gemma_3_27b = "gemma_3_27b"
89
+ gemma_3n_2b = "gemma_3n_2b"
90
+ gemma_3n_4b = "gemma_3n_4b"
83
91
  claude_3_5_haiku = "claude_3_5_haiku"
84
92
  claude_3_5_sonnet = "claude_3_5_sonnet"
85
93
  claude_3_7_sonnet = "claude_3_7_sonnet"
@@ -93,6 +101,7 @@ class ModelName(str, Enum):
93
101
  gemini_2_0_flash_lite = "gemini_2_0_flash_lite"
94
102
  gemini_2_5_pro = "gemini_2_5_pro"
95
103
  gemini_2_5_flash = "gemini_2_5_flash"
104
+ gemini_2_5_flash_lite = "gemini_2_5_flash_lite"
96
105
  nemotron_70b = "nemotron_70b"
97
106
  mixtral_8x7b = "mixtral_8x7b"
98
107
  qwen_2p5_7b = "qwen_2p5_7b"
@@ -101,7 +110,7 @@ class ModelName(str, Enum):
101
110
  qwq_32b = "qwq_32b"
102
111
  deepseek_3 = "deepseek_3"
103
112
  deepseek_r1 = "deepseek_r1"
104
- mistral_small_3 = "mistral_small_3"
113
+ deepseek_r1_0528 = "deepseek_r1_0528"
105
114
  deepseek_r1_distill_qwen_32b = "deepseek_r1_distill_qwen_32b"
106
115
  deepseek_r1_distill_llama_70b = "deepseek_r1_distill_llama_70b"
107
116
  deepseek_r1_distill_qwen_14b = "deepseek_r1_distill_qwen_14b"
@@ -110,6 +119,9 @@ class ModelName(str, Enum):
110
119
  deepseek_r1_distill_llama_8b = "deepseek_r1_distill_llama_8b"
111
120
  dolphin_2_9_8x22b = "dolphin_2_9_8x22b"
112
121
  grok_2 = "grok_2"
122
+ grok_3 = "grok_3"
123
+ grok_3_mini = "grok_3_mini"
124
+ grok_4 = "grok_4"
113
125
  qwen_3_0p6b = "qwen_3_0p6b"
114
126
  qwen_3_0p6b_no_thinking = "qwen_3_0p6b_no_thinking"
115
127
  qwen_3_1p7b = "qwen_3_1p7b"
@@ -126,6 +138,7 @@ class ModelName(str, Enum):
126
138
  qwen_3_32b_no_thinking = "qwen_3_32b_no_thinking"
127
139
  qwen_3_235b_a22b = "qwen_3_235b_a22b"
128
140
  qwen_3_235b_a22b_no_thinking = "qwen_3_235b_a22b_no_thinking"
141
+ kimi_k2 = "kimi_k2"
129
142
 
130
143
 
131
144
  class ModelParserID(str, Enum):
@@ -174,6 +187,8 @@ class KilnModelProvider(BaseModel):
174
187
  reasoning_capable: bool = False
175
188
  supports_logprobs: bool = False
176
189
  suggested_for_evals: bool = False
190
+ uncensored: bool = False
191
+ suggested_for_uncensored_data_gen: bool = False
177
192
  tuned_chat_strategy: ChatStrategy | None = None
178
193
 
179
194
  # TODO P1: Need a more generalized way to handle custom provider parameters.
@@ -250,22 +265,16 @@ built_in_models: List[KilnModel] = [
250
265
  provider_finetune_id="gpt-4.1-mini-2025-04-14",
251
266
  structured_output_mode=StructuredOutputMode.json_schema,
252
267
  supports_logprobs=True,
253
- suggested_for_data_gen=True,
254
- suggested_for_evals=True,
255
268
  ),
256
269
  KilnModelProvider(
257
270
  name=ModelProviderName.openrouter,
258
271
  model_id="openai/gpt-4.1-mini",
259
272
  structured_output_mode=StructuredOutputMode.json_schema,
260
273
  supports_logprobs=True,
261
- suggested_for_data_gen=True,
262
- suggested_for_evals=True,
263
274
  ),
264
275
  KilnModelProvider(
265
276
  name=ModelProviderName.azure_openai,
266
277
  model_id="gpt-4.1-mini",
267
- suggested_for_data_gen=True,
268
- suggested_for_evals=True,
269
278
  ),
270
279
  ],
271
280
  ),
@@ -305,6 +314,8 @@ built_in_models: List[KilnModel] = [
305
314
  provider_finetune_id="gpt-4o-2024-08-06",
306
315
  structured_output_mode=StructuredOutputMode.json_schema,
307
316
  supports_logprobs=True,
317
+ suggested_for_data_gen=True,
318
+ suggested_for_evals=True,
308
319
  ),
309
320
  KilnModelProvider(
310
321
  name=ModelProviderName.openrouter,
@@ -312,10 +323,14 @@ built_in_models: List[KilnModel] = [
312
323
  structured_output_mode=StructuredOutputMode.json_schema,
313
324
  supports_logprobs=True,
314
325
  logprobs_openrouter_options=True,
326
+ suggested_for_data_gen=True,
327
+ suggested_for_evals=True,
315
328
  ),
316
329
  KilnModelProvider(
317
330
  name=ModelProviderName.azure_openai,
318
331
  model_id="gpt-4o",
332
+ suggested_for_data_gen=True,
333
+ suggested_for_evals=True,
319
334
  ),
320
335
  ],
321
336
  ),
@@ -651,13 +666,11 @@ built_in_models: List[KilnModel] = [
651
666
  name=ModelProviderName.openrouter,
652
667
  structured_output_mode=StructuredOutputMode.function_calling,
653
668
  model_id="anthropic/claude-3.7-sonnet",
654
- suggested_for_data_gen=True,
655
669
  ),
656
670
  KilnModelProvider(
657
671
  name=ModelProviderName.anthropic,
658
672
  model_id="claude-3-7-sonnet-20250219",
659
673
  structured_output_mode=StructuredOutputMode.function_calling,
660
- suggested_for_data_gen=True,
661
674
  ),
662
675
  ],
663
676
  ),
@@ -732,24 +745,31 @@ built_in_models: List[KilnModel] = [
732
745
  providers=[
733
746
  KilnModelProvider(
734
747
  name=ModelProviderName.openrouter,
735
- model_id="google/gemini-2.5-pro-preview-03-25",
748
+ model_id="google/gemini-2.5-pro",
736
749
  structured_output_mode=StructuredOutputMode.json_schema,
737
750
  suggested_for_data_gen=True,
738
751
  suggested_for_evals=True,
752
+ reasoning_capable=True,
739
753
  ),
740
754
  KilnModelProvider(
741
755
  name=ModelProviderName.gemini_api,
742
- model_id="gemini-2.5-pro-preview-03-25",
756
+ model_id="gemini-2.5-pro",
743
757
  structured_output_mode=StructuredOutputMode.json_schema,
744
758
  suggested_for_data_gen=True,
745
759
  suggested_for_evals=True,
760
+ # TODO: Gemini API doesn't return reasoning here, so we don't ask for it. Strange.
761
+ # reasoning_capable=True,
762
+ # thinking_level="medium",
746
763
  ),
747
764
  KilnModelProvider(
748
765
  name=ModelProviderName.vertex,
749
- model_id="gemini-2.5-pro-preview-03-25",
766
+ model_id="gemini-2.5-pro",
750
767
  structured_output_mode=StructuredOutputMode.json_schema,
751
768
  suggested_for_data_gen=True,
752
769
  suggested_for_evals=True,
770
+ # TODO: Vertex doesn't return reasoning here, so we don't ask for it. Strange.
771
+ # reasoning_capable=True,
772
+ # thinking_level="medium",
753
773
  ),
754
774
  ],
755
775
  ),
@@ -761,18 +781,23 @@ built_in_models: List[KilnModel] = [
761
781
  providers=[
762
782
  KilnModelProvider(
763
783
  name=ModelProviderName.openrouter,
764
- model_id="google/gemini-2.5-flash-preview",
784
+ model_id="google/gemini-2.5-flash",
765
785
  structured_output_mode=StructuredOutputMode.json_schema,
786
+ reasoning_capable=True,
766
787
  ),
767
788
  KilnModelProvider(
768
789
  name=ModelProviderName.gemini_api,
769
- model_id="gemini-2.5-flash-preview-04-17",
790
+ model_id="gemini-2.5-flash",
770
791
  structured_output_mode=StructuredOutputMode.json_schema,
792
+ reasoning_capable=True,
793
+ thinking_level="medium",
771
794
  ),
772
795
  KilnModelProvider(
773
796
  name=ModelProviderName.vertex,
774
- model_id="gemini-2.5-flash-preview-04-17",
797
+ model_id="gemini-2.5-flash",
775
798
  structured_output_mode=StructuredOutputMode.json_schema,
799
+ reasoning_capable=True,
800
+ thinking_level="medium",
776
801
  ),
777
802
  ],
778
803
  ),
@@ -904,6 +929,52 @@ built_in_models: List[KilnModel] = [
904
929
  ),
905
930
  ],
906
931
  ),
932
+ # Llama 4 Maverick Basic
933
+ KilnModel(
934
+ family=ModelFamily.llama,
935
+ name=ModelName.llama_4_maverick,
936
+ friendly_name="Llama 4 Maverick",
937
+ providers=[
938
+ KilnModelProvider(
939
+ name=ModelProviderName.openrouter,
940
+ model_id="meta-llama/llama-4-maverick",
941
+ structured_output_mode=StructuredOutputMode.json_schema,
942
+ ),
943
+ KilnModelProvider(
944
+ name=ModelProviderName.fireworks_ai,
945
+ model_id="accounts/fireworks/models/llama4-maverick-instruct-basic",
946
+ structured_output_mode=StructuredOutputMode.json_schema,
947
+ ),
948
+ KilnModelProvider(
949
+ name=ModelProviderName.together_ai,
950
+ model_id="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
951
+ structured_output_mode=StructuredOutputMode.json_schema,
952
+ ),
953
+ ],
954
+ ),
955
+ # Llama 4 Scout Basic
956
+ KilnModel(
957
+ family=ModelFamily.llama,
958
+ name=ModelName.llama_4_scout,
959
+ friendly_name="Llama 4 Scout",
960
+ providers=[
961
+ KilnModelProvider(
962
+ name=ModelProviderName.openrouter,
963
+ model_id="meta-llama/llama-4-scout",
964
+ structured_output_mode=StructuredOutputMode.json_schema,
965
+ ),
966
+ KilnModelProvider(
967
+ name=ModelProviderName.fireworks_ai,
968
+ model_id="accounts/fireworks/models/llama4-scout-instruct-basic",
969
+ structured_output_mode=StructuredOutputMode.json_schema,
970
+ ),
971
+ KilnModelProvider(
972
+ name=ModelProviderName.together_ai,
973
+ model_id="meta-llama/Llama-4-Scout-17B-16E-Instruct",
974
+ structured_output_mode=StructuredOutputMode.json_schema,
975
+ ),
976
+ ],
977
+ ),
907
978
  # Llama 3.1-8b
908
979
  KilnModel(
909
980
  family=ModelFamily.llama,
@@ -1024,6 +1095,33 @@ built_in_models: List[KilnModel] = [
1024
1095
  ),
1025
1096
  ],
1026
1097
  ),
1098
+ # Magistral Medium (Thinking)
1099
+ KilnModel(
1100
+ family=ModelFamily.mistral,
1101
+ name=ModelName.magistral_medium_thinking,
1102
+ friendly_name="Magistral Medium (Thinking)",
1103
+ providers=[
1104
+ KilnModelProvider(
1105
+ name=ModelProviderName.openrouter,
1106
+ model_id="mistralai/magistral-medium-2506:thinking",
1107
+ structured_output_mode=StructuredOutputMode.json_schema,
1108
+ # Thinking tokens are hidden by Mistral so not "reasoning" from Kiln API POV
1109
+ ),
1110
+ ],
1111
+ ),
1112
+ # Magistral Medium (No Thinking)
1113
+ KilnModel(
1114
+ family=ModelFamily.mistral,
1115
+ name=ModelName.magistral_medium,
1116
+ friendly_name="Magistral Medium (No Thinking)",
1117
+ providers=[
1118
+ KilnModelProvider(
1119
+ name=ModelProviderName.openrouter,
1120
+ model_id="mistralai/magistral-medium-2506",
1121
+ structured_output_mode=StructuredOutputMode.json_schema,
1122
+ ),
1123
+ ],
1124
+ ),
1027
1125
  # Mistral Nemo
1028
1126
  KilnModel(
1029
1127
  family=ModelFamily.mistral,
@@ -1047,16 +1145,22 @@ built_in_models: List[KilnModel] = [
1047
1145
  name=ModelProviderName.amazon_bedrock,
1048
1146
  structured_output_mode=StructuredOutputMode.json_instructions,
1049
1147
  model_id="mistral.mistral-large-2407-v1:0",
1148
+ uncensored=True,
1149
+ suggested_for_uncensored_data_gen=True,
1050
1150
  ),
1051
1151
  KilnModelProvider(
1052
1152
  name=ModelProviderName.openrouter,
1053
1153
  structured_output_mode=StructuredOutputMode.json_schema,
1054
1154
  model_id="mistralai/mistral-large",
1155
+ uncensored=True,
1156
+ suggested_for_uncensored_data_gen=True,
1055
1157
  ),
1056
1158
  KilnModelProvider(
1057
1159
  name=ModelProviderName.ollama,
1058
1160
  structured_output_mode=StructuredOutputMode.json_schema,
1059
1161
  model_id="mistral-large",
1162
+ uncensored=True,
1163
+ suggested_for_uncensored_data_gen=True,
1060
1164
  ),
1061
1165
  ],
1062
1166
  ),
@@ -1085,12 +1189,6 @@ built_in_models: List[KilnModel] = [
1085
1189
  supports_data_gen=False,
1086
1190
  model_id="llama3.2:1b",
1087
1191
  ),
1088
- KilnModelProvider(
1089
- name=ModelProviderName.huggingface,
1090
- model_id="meta-llama/Llama-3.2-1B-Instruct",
1091
- supports_structured_output=False,
1092
- supports_data_gen=False,
1093
- ),
1094
1192
  ],
1095
1193
  ),
1096
1194
  # Llama 3.2 3B
@@ -1116,12 +1214,6 @@ built_in_models: List[KilnModel] = [
1116
1214
  supports_data_gen=False,
1117
1215
  model_id="llama3.2",
1118
1216
  ),
1119
- KilnModelProvider(
1120
- name=ModelProviderName.huggingface,
1121
- model_id="meta-llama/Llama-3.2-3B-Instruct",
1122
- supports_structured_output=False,
1123
- supports_data_gen=False,
1124
- ),
1125
1217
  KilnModelProvider(
1126
1218
  name=ModelProviderName.together_ai,
1127
1219
  model_id="meta-llama/Llama-3.2-3B-Instruct-Turbo",
@@ -1450,6 +1542,52 @@ built_in_models: List[KilnModel] = [
1450
1542
  ),
1451
1543
  ],
1452
1544
  ),
1545
+ # Gemma 3n 2B
1546
+ KilnModel(
1547
+ family=ModelFamily.gemma,
1548
+ name=ModelName.gemma_3n_2b,
1549
+ friendly_name="Gemma 3n 2B",
1550
+ providers=[
1551
+ KilnModelProvider(
1552
+ name=ModelProviderName.ollama,
1553
+ model_id="gemma3n:e2b",
1554
+ structured_output_mode=StructuredOutputMode.json_schema,
1555
+ supports_data_gen=False,
1556
+ ),
1557
+ KilnModelProvider(
1558
+ name=ModelProviderName.gemini_api,
1559
+ model_id="gemma-3n-e2b-it",
1560
+ supports_structured_output=False,
1561
+ supports_data_gen=False,
1562
+ ),
1563
+ ],
1564
+ ),
1565
+ # Gemma 3n 4B
1566
+ KilnModel(
1567
+ family=ModelFamily.gemma,
1568
+ name=ModelName.gemma_3n_4b,
1569
+ friendly_name="Gemma 3n 4B",
1570
+ providers=[
1571
+ KilnModelProvider(
1572
+ name=ModelProviderName.openrouter,
1573
+ model_id="google/gemma-3n-e4b-it",
1574
+ structured_output_mode=StructuredOutputMode.json_instruction_and_object,
1575
+ supports_data_gen=False,
1576
+ ),
1577
+ KilnModelProvider(
1578
+ name=ModelProviderName.ollama,
1579
+ model_id="gemma3n:e4b",
1580
+ supports_data_gen=False,
1581
+ structured_output_mode=StructuredOutputMode.json_schema,
1582
+ ),
1583
+ KilnModelProvider(
1584
+ name=ModelProviderName.gemini_api,
1585
+ model_id="gemma-3n-e4b-it",
1586
+ structured_output_mode=StructuredOutputMode.json_instructions,
1587
+ supports_data_gen=False,
1588
+ ),
1589
+ ],
1590
+ ),
1453
1591
  # Mixtral 8x7B
1454
1592
  KilnModel(
1455
1593
  family=ModelFamily.mixtral,
@@ -1587,10 +1725,46 @@ built_in_models: List[KilnModel] = [
1587
1725
  name=ModelProviderName.openrouter,
1588
1726
  structured_output_mode=StructuredOutputMode.json_instruction_and_object,
1589
1727
  model_id="mistralai/mistral-small-24b-instruct-2501",
1728
+ uncensored=True,
1590
1729
  ),
1591
1730
  KilnModelProvider(
1592
1731
  name=ModelProviderName.ollama,
1593
1732
  model_id="mistral-small:24b",
1733
+ uncensored=True,
1734
+ ),
1735
+ ],
1736
+ ),
1737
+ # DeepSeek R1 0528
1738
+ KilnModel(
1739
+ family=ModelFamily.deepseek,
1740
+ name=ModelName.deepseek_r1_0528,
1741
+ friendly_name="DeepSeek R1 0528",
1742
+ providers=[
1743
+ KilnModelProvider(
1744
+ name=ModelProviderName.openrouter,
1745
+ model_id="deepseek/deepseek-r1-0528",
1746
+ parser=ModelParserID.r1_thinking,
1747
+ structured_output_mode=StructuredOutputMode.json_instructions,
1748
+ reasoning_capable=True,
1749
+ r1_openrouter_options=True,
1750
+ require_openrouter_reasoning=True,
1751
+ supports_data_gen=True,
1752
+ ),
1753
+ KilnModelProvider(
1754
+ name=ModelProviderName.fireworks_ai,
1755
+ model_id="accounts/fireworks/models/deepseek-r1-0528",
1756
+ parser=ModelParserID.r1_thinking,
1757
+ supports_data_gen=True,
1758
+ structured_output_mode=StructuredOutputMode.json_instructions,
1759
+ reasoning_capable=True,
1760
+ ),
1761
+ KilnModelProvider(
1762
+ name=ModelProviderName.together_ai,
1763
+ model_id="deepseek-ai/DeepSeek-R1", # Note: Together remapped the R1 endpoint to this 0528 model
1764
+ structured_output_mode=StructuredOutputMode.json_instructions,
1765
+ parser=ModelParserID.r1_thinking,
1766
+ reasoning_capable=True,
1767
+ supports_data_gen=True,
1594
1768
  ),
1595
1769
  ],
1596
1770
  ),
@@ -1623,7 +1797,7 @@ built_in_models: List[KilnModel] = [
1623
1797
  KilnModel(
1624
1798
  family=ModelFamily.deepseek,
1625
1799
  name=ModelName.deepseek_r1,
1626
- friendly_name="DeepSeek R1",
1800
+ friendly_name="DeepSeek R1 (Original)",
1627
1801
  providers=[
1628
1802
  KilnModelProvider(
1629
1803
  name=ModelProviderName.openrouter,
@@ -1633,8 +1807,6 @@ built_in_models: List[KilnModel] = [
1633
1807
  reasoning_capable=True,
1634
1808
  r1_openrouter_options=True,
1635
1809
  require_openrouter_reasoning=True,
1636
- suggested_for_data_gen=True,
1637
- suggested_for_evals=True,
1638
1810
  ),
1639
1811
  KilnModelProvider(
1640
1812
  name=ModelProviderName.fireworks_ai,
@@ -1642,8 +1814,6 @@ built_in_models: List[KilnModel] = [
1642
1814
  parser=ModelParserID.r1_thinking,
1643
1815
  structured_output_mode=StructuredOutputMode.json_instructions,
1644
1816
  reasoning_capable=True,
1645
- suggested_for_data_gen=True,
1646
- suggested_for_evals=True,
1647
1817
  ),
1648
1818
  KilnModelProvider(
1649
1819
  # I want your RAM
@@ -1652,17 +1822,6 @@ built_in_models: List[KilnModel] = [
1652
1822
  parser=ModelParserID.r1_thinking,
1653
1823
  structured_output_mode=StructuredOutputMode.json_instructions,
1654
1824
  reasoning_capable=True,
1655
- suggested_for_data_gen=True,
1656
- suggested_for_evals=True,
1657
- ),
1658
- KilnModelProvider(
1659
- name=ModelProviderName.together_ai,
1660
- model_id="deepseek-ai/DeepSeek-R1",
1661
- structured_output_mode=StructuredOutputMode.json_instructions,
1662
- parser=ModelParserID.r1_thinking,
1663
- reasoning_capable=True,
1664
- suggested_for_data_gen=True,
1665
- suggested_for_evals=True,
1666
1825
  ),
1667
1826
  ],
1668
1827
  ),
@@ -1858,12 +2017,67 @@ built_in_models: List[KilnModel] = [
1858
2017
  structured_output_mode=StructuredOutputMode.json_schema,
1859
2018
  supports_data_gen=True,
1860
2019
  model_id="dolphin-mixtral:8x22b",
2020
+ uncensored=True,
2021
+ suggested_for_uncensored_data_gen=True,
1861
2022
  ),
1862
2023
  KilnModelProvider(
1863
2024
  name=ModelProviderName.openrouter,
1864
2025
  supports_data_gen=True,
1865
2026
  structured_output_mode=StructuredOutputMode.json_instruction_and_object,
1866
2027
  model_id="cognitivecomputations/dolphin-mixtral-8x22b",
2028
+ uncensored=True,
2029
+ suggested_for_uncensored_data_gen=True,
2030
+ ),
2031
+ ],
2032
+ ),
2033
+ # Grok 4
2034
+ KilnModel(
2035
+ family=ModelFamily.grok,
2036
+ name=ModelName.grok_4,
2037
+ friendly_name="Grok 4",
2038
+ providers=[
2039
+ KilnModelProvider(
2040
+ name=ModelProviderName.openrouter,
2041
+ model_id="x-ai/grok-4",
2042
+ supports_structured_output=True,
2043
+ supports_data_gen=True,
2044
+ structured_output_mode=StructuredOutputMode.json_schema,
2045
+ suggested_for_data_gen=True,
2046
+ uncensored=True,
2047
+ suggested_for_uncensored_data_gen=True,
2048
+ ),
2049
+ ],
2050
+ ),
2051
+ # Grok 3
2052
+ KilnModel(
2053
+ family=ModelFamily.grok,
2054
+ name=ModelName.grok_3,
2055
+ friendly_name="Grok 3",
2056
+ providers=[
2057
+ KilnModelProvider(
2058
+ name=ModelProviderName.openrouter,
2059
+ model_id="x-ai/grok-3",
2060
+ supports_structured_output=True,
2061
+ supports_data_gen=True,
2062
+ structured_output_mode=StructuredOutputMode.json_schema,
2063
+ suggested_for_data_gen=True,
2064
+ uncensored=True,
2065
+ ),
2066
+ ],
2067
+ ),
2068
+ # Grok 3 Mini
2069
+ KilnModel(
2070
+ family=ModelFamily.grok,
2071
+ name=ModelName.grok_3_mini,
2072
+ friendly_name="Grok 3 Mini",
2073
+ providers=[
2074
+ KilnModelProvider(
2075
+ name=ModelProviderName.openrouter,
2076
+ model_id="x-ai/grok-3-mini",
2077
+ supports_structured_output=True,
2078
+ supports_data_gen=True,
2079
+ structured_output_mode=StructuredOutputMode.json_schema,
2080
+ uncensored=True,
1867
2081
  ),
1868
2082
  ],
1869
2083
  ),
@@ -2295,6 +2509,41 @@ built_in_models: List[KilnModel] = [
2295
2509
  ),
2296
2510
  ],
2297
2511
  ),
2512
+ # Kimi K2 Instruct
2513
+ KilnModel(
2514
+ family=ModelFamily.kimi,
2515
+ name=ModelName.kimi_k2,
2516
+ friendly_name="Kimi K2",
2517
+ providers=[
2518
+ KilnModelProvider(
2519
+ name=ModelProviderName.fireworks_ai,
2520
+ model_id="accounts/fireworks/models/kimi-k2-instruct",
2521
+ structured_output_mode=StructuredOutputMode.json_instruction_and_object,
2522
+ # Ignoring json mode for now, so not suggested for evals
2523
+ ),
2524
+ KilnModelProvider(
2525
+ name=ModelProviderName.openrouter,
2526
+ model_id="moonshotai/kimi-k2",
2527
+ structured_output_mode=StructuredOutputMode.json_schema,
2528
+ supports_data_gen=True,
2529
+ suggested_for_evals=True,
2530
+ ),
2531
+ KilnModelProvider(
2532
+ name=ModelProviderName.together_ai,
2533
+ model_id="moonshotai/Kimi-K2-Instruct",
2534
+ supports_data_gen=True,
2535
+ structured_output_mode=StructuredOutputMode.json_instruction_and_object,
2536
+ suggested_for_evals=True,
2537
+ ),
2538
+ KilnModelProvider(
2539
+ name=ModelProviderName.groq,
2540
+ model_id="moonshotai/kimi-k2-instruct",
2541
+ supports_data_gen=True,
2542
+ structured_output_mode=StructuredOutputMode.function_calling,
2543
+ suggested_for_evals=True,
2544
+ ),
2545
+ ],
2546
+ ),
2298
2547
  ]
2299
2548
 
2300
2549
 
@@ -251,6 +251,10 @@ class LiteLlmAdapter(BaseAdapter):
251
251
  "exclude": False,
252
252
  }
253
253
 
254
+ if provider.name == ModelProviderName.openrouter:
255
+ # Ask OpenRouter to include usage in the response (cost)
256
+ extra_body["usage"] = {"include": True}
257
+
254
258
  if provider.anthropic_extended_thinking:
255
259
  extra_body["thinking"] = {"type": "enabled", "budget_tokens": 4000}
256
260
 
@@ -386,7 +390,12 @@ class LiteLlmAdapter(BaseAdapter):
386
390
 
387
391
  def usage_from_response(self, response: ModelResponse) -> Usage | None:
388
392
  litellm_usage = response.get("usage", None)
393
+
394
+ # LiteLLM isn't consistent in how it returns the cost.
389
395
  cost = response._hidden_params.get("response_cost", None)
396
+ if cost is None and litellm_usage:
397
+ cost = litellm_usage.get("cost", None)
398
+
390
399
  if not litellm_usage and not cost:
391
400
  return None
392
401
 
@@ -4,7 +4,6 @@ import pytest
4
4
 
5
5
  from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
6
6
  from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter, RunOutput
7
- from kiln_ai.adapters.parsers.request_formatters import request_formatter_from_id
8
7
  from kiln_ai.datamodel import Task
9
8
  from kiln_ai.datamodel.datamodel_enums import ChatStrategy
10
9
  from kiln_ai.datamodel.task import RunConfig, RunConfigProperties
@@ -352,6 +352,43 @@ def test_litellm_model_id_unknown_provider(config, mock_task):
352
352
  adapter.litellm_model_id()
353
353
 
354
354
 
355
+ @pytest.mark.parametrize(
356
+ "provider_name,expected_usage_param",
357
+ [
358
+ (ModelProviderName.openrouter, {"usage": {"include": True}}),
359
+ (ModelProviderName.openai, {}),
360
+ (ModelProviderName.anthropic, {}),
361
+ (ModelProviderName.groq, {}),
362
+ ],
363
+ )
364
+ def test_build_extra_body_openrouter_usage(
365
+ config, mock_task, provider_name, expected_usage_param
366
+ ):
367
+ """Test build_extra_body includes usage parameter for OpenRouter providers"""
368
+ adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
369
+
370
+ # Create a mock provider with the specified name and minimal required attributes
371
+ mock_provider = Mock()
372
+ mock_provider.name = provider_name
373
+ mock_provider.thinking_level = None
374
+ mock_provider.require_openrouter_reasoning = False
375
+ mock_provider.anthropic_extended_thinking = False
376
+ mock_provider.r1_openrouter_options = False
377
+ mock_provider.logprobs_openrouter_options = False
378
+ mock_provider.openrouter_skip_required_parameters = False
379
+
380
+ # Call build_extra_body
381
+ extra_body = adapter.build_extra_body(mock_provider)
382
+
383
+ # Verify the usage parameter is included only for OpenRouter
384
+ for key, value in expected_usage_param.items():
385
+ assert extra_body.get(key) == value
386
+
387
+ # Verify non-OpenRouter providers don't have the usage parameter
388
+ if provider_name != ModelProviderName.openrouter:
389
+ assert "usage" not in extra_body
390
+
391
+
355
392
  @pytest.mark.asyncio
356
393
  async def test_build_completion_kwargs_custom_temperature_top_p(config, mock_task):
357
394
  """Test build_completion_kwargs with custom temperature and top_p values"""
@@ -474,6 +511,17 @@ async def test_build_completion_kwargs(
474
511
  ({"prompt_tokens": 10}, None, None),
475
512
  # Invalid cost type (should be ignored)
476
513
  (None, "0.5", None),
514
+ # Cost in OpenRouter format
515
+ (
516
+ litellm.types.utils.Usage(
517
+ prompt_tokens=10,
518
+ completion_tokens=20,
519
+ total_tokens=30,
520
+ cost=0.5,
521
+ ),
522
+ None,
523
+ Usage(input_tokens=10, output_tokens=20, total_tokens=30, cost=0.5),
524
+ ),
477
525
  ],
478
526
  )
479
527
  def test_usage_from_response(config, mock_task, litellm_usage, cost, expected_usage):