kiln-ai 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (54) hide show
  1. kiln_ai/adapters/__init__.py +2 -0
  2. kiln_ai/adapters/adapter_registry.py +22 -44
  3. kiln_ai/adapters/chat/__init__.py +8 -0
  4. kiln_ai/adapters/chat/chat_formatter.py +234 -0
  5. kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
  6. kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
  7. kiln_ai/adapters/eval/base_eval.py +8 -6
  8. kiln_ai/adapters/eval/eval_runner.py +4 -1
  9. kiln_ai/adapters/eval/g_eval.py +23 -5
  10. kiln_ai/adapters/eval/test_base_eval.py +166 -15
  11. kiln_ai/adapters/eval/test_eval_runner.py +3 -0
  12. kiln_ai/adapters/eval/test_g_eval.py +1 -0
  13. kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
  14. kiln_ai/adapters/fine_tune/dataset_formatter.py +138 -272
  15. kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
  16. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
  17. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
  18. kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
  19. kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
  20. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
  21. kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
  22. kiln_ai/adapters/ml_model_list.py +80 -43
  23. kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
  24. kiln_ai/adapters/model_adapters/litellm_adapter.py +79 -97
  25. kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
  26. kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -60
  27. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +56 -21
  28. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
  29. kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
  30. kiln_ai/adapters/prompt_builders.py +0 -16
  31. kiln_ai/adapters/provider_tools.py +27 -9
  32. kiln_ai/adapters/repair/test_repair_task.py +24 -3
  33. kiln_ai/adapters/test_adapter_registry.py +88 -28
  34. kiln_ai/adapters/test_ml_model_list.py +158 -0
  35. kiln_ai/adapters/test_prompt_adaptors.py +17 -3
  36. kiln_ai/adapters/test_prompt_builders.py +3 -16
  37. kiln_ai/adapters/test_provider_tools.py +69 -20
  38. kiln_ai/datamodel/__init__.py +0 -2
  39. kiln_ai/datamodel/datamodel_enums.py +38 -13
  40. kiln_ai/datamodel/finetune.py +12 -7
  41. kiln_ai/datamodel/task.py +68 -7
  42. kiln_ai/datamodel/test_basemodel.py +2 -1
  43. kiln_ai/datamodel/test_dataset_split.py +0 -8
  44. kiln_ai/datamodel/test_models.py +33 -10
  45. kiln_ai/datamodel/test_task.py +168 -2
  46. kiln_ai/utils/config.py +3 -2
  47. kiln_ai/utils/dataset_import.py +1 -1
  48. kiln_ai/utils/logging.py +165 -0
  49. kiln_ai/utils/test_config.py +23 -0
  50. kiln_ai/utils/test_dataset_import.py +30 -0
  51. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
  52. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/RECORD +54 -49
  53. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
  54. {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -13,11 +13,11 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
13
13
  from kiln_ai.adapters.fine_tune.fireworks_finetune import FireworksFinetune
14
14
  from kiln_ai.datamodel import (
15
15
  DatasetSplit,
16
- FinetuneDataStrategy,
17
16
  StructuredOutputMode,
18
17
  Task,
19
18
  )
20
19
  from kiln_ai.datamodel import Finetune as FinetuneModel
20
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
21
21
  from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
22
22
  from kiln_ai.utils.config import Config
23
23
 
@@ -232,8 +232,8 @@ def mock_task():
232
232
  @pytest.mark.parametrize(
233
233
  "data_strategy,thinking_instructions",
234
234
  [
235
- (FinetuneDataStrategy.final_and_intermediate, "thinking instructions"),
236
- (FinetuneDataStrategy.final_only, None),
235
+ (ChatStrategy.two_message_cot, "thinking instructions"),
236
+ (ChatStrategy.single_turn, None),
237
237
  ],
238
238
  )
239
239
  async def test_generate_and_upload_jsonl_success(
@@ -12,11 +12,11 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
12
12
  from kiln_ai.adapters.fine_tune.openai_finetune import OpenAIFinetune
13
13
  from kiln_ai.datamodel import (
14
14
  DatasetSplit,
15
- FinetuneDataStrategy,
16
15
  StructuredOutputMode,
17
16
  Task,
18
17
  )
19
18
  from kiln_ai.datamodel import Finetune as FinetuneModel
19
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
20
20
  from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
21
21
  from kiln_ai.utils.config import Config
22
22
 
@@ -35,7 +35,7 @@ def openai_finetune(tmp_path):
35
35
  system_message="Test system message",
36
36
  fine_tune_model_id="ft-123",
37
37
  path=tmp_file,
38
- data_strategy=FinetuneDataStrategy.final_only,
38
+ data_strategy=ChatStrategy.single_turn,
39
39
  ),
40
40
  )
41
41
  return finetune
@@ -247,7 +247,7 @@ async def test_generate_and_upload_jsonl_success(
247
247
  mock_formatter.dump_to_file.assert_called_once_with(
248
248
  "train",
249
249
  DatasetFormat.OPENAI_CHAT_JSONL,
250
- FinetuneDataStrategy.final_only,
250
+ ChatStrategy.single_turn,
251
251
  )
252
252
 
253
253
  # Verify file was opened and uploaded
@@ -299,7 +299,7 @@ async def test_generate_and_upload_jsonl_schema_success(
299
299
  mock_formatter.dump_to_file.assert_called_once_with(
300
300
  "train",
301
301
  DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
302
- FinetuneDataStrategy.final_only,
302
+ ChatStrategy.single_turn,
303
303
  )
304
304
 
305
305
  # Verify file was opened and uploaded
@@ -555,8 +555,8 @@ async def test_status_updates_latest_status(openai_finetune, mock_response):
555
555
  @pytest.mark.parametrize(
556
556
  "data_strategy,thinking_instructions",
557
557
  [
558
- (FinetuneDataStrategy.final_and_intermediate, "Custom thinking instructions"),
559
- (FinetuneDataStrategy.final_only, None),
558
+ (ChatStrategy.two_message_cot, "Custom thinking instructions"),
559
+ (ChatStrategy.single_turn, None),
560
560
  ],
561
561
  )
562
562
  async def test_generate_and_upload_jsonl_with_data_strategy(
@@ -183,6 +183,7 @@ async def test_status_job_states(
183
183
  # Mock the retrieve method of the fine_tuning object
184
184
  mock_job = MagicMock()
185
185
  mock_job.status = together_status
186
+ mock_job.output_name = None
186
187
  mock_together_client.fine_tuning.retrieve.return_value = mock_job
187
188
 
188
189
  status = await together_finetune.status()
@@ -12,11 +12,11 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
12
12
  from kiln_ai.adapters.fine_tune.vertex_finetune import VertexFinetune
13
13
  from kiln_ai.datamodel import (
14
14
  DatasetSplit,
15
- FinetuneDataStrategy,
16
15
  StructuredOutputMode,
17
16
  Task,
18
17
  )
19
18
  from kiln_ai.datamodel import Finetune as FinetuneModel
19
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
20
20
  from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
21
21
  from kiln_ai.utils.config import Config
22
22
 
@@ -35,7 +35,7 @@ def vertex_finetune(tmp_path):
35
35
  system_message="Test system message",
36
36
  fine_tune_model_id="ft-123",
37
37
  path=tmp_file,
38
- data_strategy=FinetuneDataStrategy.final_only,
38
+ data_strategy=ChatStrategy.single_turn,
39
39
  ),
40
40
  )
41
41
  return finetune
@@ -252,8 +252,8 @@ async def test_status_model_id_update_exception(vertex_finetune, mock_response):
252
252
  @pytest.mark.parametrize(
253
253
  "data_strategy,thinking_instructions",
254
254
  [
255
- (FinetuneDataStrategy.final_and_intermediate, "Custom thinking instructions"),
256
- (FinetuneDataStrategy.final_only, None),
255
+ (ChatStrategy.two_message_cot, "Custom thinking instructions"),
256
+ (ChatStrategy.single_turn, None),
257
257
  ],
258
258
  )
259
259
  async def test_generate_and_upload_jsonl(
@@ -66,6 +66,12 @@ class TogetherFinetune(BaseFinetuneAdapter):
66
66
  # retrieve the fine-tuning job
67
67
  together_finetune = self.client.fine_tuning.retrieve(id=fine_tuning_job_id)
68
68
 
69
+ # update the fine tune model ID if it has changed (sometimes it's not set at training time)
70
+ if self.datamodel.fine_tune_model_id != together_finetune.output_name:
71
+ self.datamodel.fine_tune_model_id = together_finetune.output_name
72
+ if self.datamodel.path:
73
+ self.datamodel.save_to_file()
74
+
69
75
  status = together_finetune.status
70
76
  if status in _pending_statuses:
71
77
  return FineTuneStatus(
@@ -135,8 +141,13 @@ class TogetherFinetune(BaseFinetuneAdapter):
135
141
  **self._build_finetune_parameters(),
136
142
  )
137
143
 
138
- # 2 different IDs, output_name is the name of the model that results from the fine-tune job, the finetune_job_id is the ID of the fine-tune job
144
+ # 2 different IDs, output_name is the name of the model that results from the fine-tune job, while the id is the ID of the fine-tune job itself
145
+ if not together_finetune.id:
146
+ raise ValueError(
147
+ "Together failed to return a fine-tune job ID. While tuning job was dispatched, Kiln never received the ID so won't be able to reference it. Check for errors before dispatching more jobs."
148
+ )
139
149
  self.datamodel.provider_id = together_finetune.id
150
+ # Output name is sometimes returned here, and save it if it is. But it might be populated later by status call
140
151
  self.datamodel.fine_tune_model_id = together_finetune.output_name
141
152
 
142
153
  if self.datamodel.path:
@@ -3,7 +3,11 @@ from typing import Dict, List, Literal
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
- from kiln_ai.datamodel import StructuredOutputMode
6
+ from kiln_ai.datamodel.datamodel_enums import (
7
+ ChatStrategy,
8
+ ModelProviderName,
9
+ StructuredOutputMode,
10
+ )
7
11
 
8
12
  """
9
13
  Provides model configuration and management for various LLM providers and models.
@@ -12,28 +16,6 @@ including configuration, validation, and instantiation of language models.
12
16
  """
13
17
 
14
18
 
15
- class ModelProviderName(str, Enum):
16
- """
17
- Enumeration of supported AI model providers.
18
- """
19
-
20
- openai = "openai"
21
- groq = "groq"
22
- amazon_bedrock = "amazon_bedrock"
23
- ollama = "ollama"
24
- openrouter = "openrouter"
25
- fireworks_ai = "fireworks_ai"
26
- kiln_fine_tune = "kiln_fine_tune"
27
- kiln_custom_registry = "kiln_custom_registry"
28
- openai_compatible = "openai_compatible"
29
- anthropic = "anthropic"
30
- gemini_api = "gemini_api"
31
- azure_openai = "azure_openai"
32
- huggingface = "huggingface"
33
- vertex = "vertex"
34
- together_ai = "together_ai"
35
-
36
-
37
19
  class ModelFamily(str, Enum):
38
20
  """
39
21
  Enumeration of supported model families/architectures.
@@ -102,6 +84,8 @@ class ModelName(str, Enum):
102
84
  claude_3_5_sonnet = "claude_3_5_sonnet"
103
85
  claude_3_7_sonnet = "claude_3_7_sonnet"
104
86
  claude_3_7_sonnet_thinking = "claude_3_7_sonnet_thinking"
87
+ claude_sonnet_4 = "claude_sonnet_4"
88
+ claude_opus_4 = "claude_opus_4"
105
89
  gemini_1_5_flash = "gemini_1_5_flash"
106
90
  gemini_1_5_flash_8b = "gemini_1_5_flash_8b"
107
91
  gemini_1_5_pro = "gemini_1_5_pro"
@@ -174,6 +158,7 @@ class KilnModelProvider(BaseModel):
174
158
  structured_output_mode: The mode we should use to call the model for structured output, if it was trained with structured output.
175
159
  parser: A parser to use for the model, if applicable
176
160
  reasoning_capable: Whether the model is designed to output thinking in a structured format (eg <think></think>). If so we don't use COT across 2 calls, and ask for thinking and final response in the same call.
161
+ tuned_chat_strategy: Used when a model is finetuned with a specific chat strategy, and it's best to use it at call time.
177
162
  """
178
163
 
179
164
  name: ModelProviderName
@@ -189,6 +174,7 @@ class KilnModelProvider(BaseModel):
189
174
  reasoning_capable: bool = False
190
175
  supports_logprobs: bool = False
191
176
  suggested_for_evals: bool = False
177
+ tuned_chat_strategy: ChatStrategy | None = None
192
178
 
193
179
  # TODO P1: Need a more generalized way to handle custom provider parameters.
194
180
  # Making them quite declarative here for now, isolating provider specific logic
@@ -666,14 +652,12 @@ built_in_models: List[KilnModel] = [
666
652
  structured_output_mode=StructuredOutputMode.function_calling,
667
653
  model_id="anthropic/claude-3.7-sonnet",
668
654
  suggested_for_data_gen=True,
669
- suggested_for_evals=True,
670
655
  ),
671
656
  KilnModelProvider(
672
657
  name=ModelProviderName.anthropic,
673
658
  model_id="claude-3-7-sonnet-20250219",
674
659
  structured_output_mode=StructuredOutputMode.function_calling,
675
660
  suggested_for_data_gen=True,
676
- suggested_for_evals=True,
677
661
  ),
678
662
  ],
679
663
  ),
@@ -700,6 +684,46 @@ built_in_models: List[KilnModel] = [
700
684
  ),
701
685
  ],
702
686
  ),
687
+ # Claude Sonnet 4
688
+ KilnModel(
689
+ family=ModelFamily.claude,
690
+ name=ModelName.claude_sonnet_4,
691
+ friendly_name="Claude Sonnet 4",
692
+ providers=[
693
+ KilnModelProvider(
694
+ name=ModelProviderName.openrouter,
695
+ model_id="anthropic/claude-sonnet-4",
696
+ structured_output_mode=StructuredOutputMode.function_calling,
697
+ suggested_for_data_gen=True,
698
+ suggested_for_evals=True,
699
+ ),
700
+ KilnModelProvider(
701
+ name=ModelProviderName.anthropic,
702
+ model_id="claude-sonnet-4-20250514",
703
+ structured_output_mode=StructuredOutputMode.function_calling,
704
+ suggested_for_data_gen=True,
705
+ suggested_for_evals=True,
706
+ ),
707
+ ],
708
+ ),
709
+ # Claude Opus 4
710
+ KilnModel(
711
+ family=ModelFamily.claude,
712
+ name=ModelName.claude_opus_4,
713
+ friendly_name="Claude Opus 4",
714
+ providers=[
715
+ KilnModelProvider(
716
+ name=ModelProviderName.openrouter,
717
+ model_id="anthropic/claude-opus-4",
718
+ structured_output_mode=StructuredOutputMode.function_calling,
719
+ ),
720
+ KilnModelProvider(
721
+ name=ModelProviderName.anthropic,
722
+ model_id="claude-opus-4-20250514",
723
+ structured_output_mode=StructuredOutputMode.function_calling,
724
+ ),
725
+ ],
726
+ ),
703
727
  # Gemini 2.5 Pro
704
728
  KilnModel(
705
729
  family=ModelFamily.gemini,
@@ -920,7 +944,7 @@ built_in_models: List[KilnModel] = [
920
944
  model_id="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
921
945
  supports_data_gen=False,
922
946
  structured_output_mode=StructuredOutputMode.function_calling_weak,
923
- provider_finetune_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
947
+ provider_finetune_id="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
924
948
  ),
925
949
  ],
926
950
  ),
@@ -939,8 +963,7 @@ built_in_models: List[KilnModel] = [
939
963
  KilnModelProvider(
940
964
  name=ModelProviderName.openrouter,
941
965
  supports_data_gen=False,
942
- # Need to not pass "strict=True" to the function call to get this to work with logprobs for some reason. Openrouter issue.
943
- structured_output_mode=StructuredOutputMode.function_calling_weak,
966
+ structured_output_mode=StructuredOutputMode.json_schema,
944
967
  model_id="meta-llama/llama-3.1-70b-instruct",
945
968
  supports_logprobs=True,
946
969
  logprobs_openrouter_options=True,
@@ -961,7 +984,7 @@ built_in_models: List[KilnModel] = [
961
984
  model_id="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
962
985
  supports_data_gen=False,
963
986
  structured_output_mode=StructuredOutputMode.function_calling_weak,
964
- provider_finetune_id="meta-llama/Meta-Llama-3.1-70B-Instruct",
987
+ provider_finetune_id="meta-llama/Meta-Llama-3.1-70B-Instruct-Reference",
965
988
  ),
966
989
  ],
967
990
  ),
@@ -1068,10 +1091,6 @@ built_in_models: List[KilnModel] = [
1068
1091
  supports_structured_output=False,
1069
1092
  supports_data_gen=False,
1070
1093
  ),
1071
- KilnModelProvider(
1072
- name=ModelProviderName.together_ai,
1073
- provider_finetune_id="meta-llama/Llama-3.2-1B-Instruct",
1074
- ),
1075
1094
  ],
1076
1095
  ),
1077
1096
  # Llama 3.2 3B
@@ -1108,7 +1127,6 @@ built_in_models: List[KilnModel] = [
1108
1127
  model_id="meta-llama/Llama-3.2-3B-Instruct-Turbo",
1109
1128
  supports_structured_output=False,
1110
1129
  supports_data_gen=False,
1111
- provider_finetune_id="meta-llama/Llama-3.2-3B-Instruct",
1112
1130
  ),
1113
1131
  ],
1114
1132
  ),
@@ -1158,10 +1176,6 @@ built_in_models: List[KilnModel] = [
1158
1176
  name=ModelName.llama_3_2_90b,
1159
1177
  friendly_name="Llama 3.2 90B",
1160
1178
  providers=[
1161
- KilnModelProvider(
1162
- name=ModelProviderName.groq,
1163
- model_id="llama-3.2-90b-vision-preview",
1164
- ),
1165
1179
  KilnModelProvider(
1166
1180
  name=ModelProviderName.openrouter,
1167
1181
  structured_output_mode=StructuredOutputMode.json_instruction_and_object,
@@ -1434,11 +1448,6 @@ built_in_models: List[KilnModel] = [
1434
1448
  structured_output_mode=StructuredOutputMode.json_instruction_and_object,
1435
1449
  model_id="google/gemma-3-27b-it",
1436
1450
  ),
1437
- KilnModelProvider(
1438
- name=ModelProviderName.huggingface,
1439
- model_id="google/gemma-3-27b-it",
1440
- structured_output_mode=StructuredOutputMode.json_instructions,
1441
- ),
1442
1451
  ],
1443
1452
  ),
1444
1453
  # Mixtral 8x7B
@@ -2294,3 +2303,31 @@ def get_model_by_name(name: ModelName) -> KilnModel:
2294
2303
  if model.name == name:
2295
2304
  return model
2296
2305
  raise ValueError(f"Model {name} not found in the list of built-in models")
2306
+
2307
+
2308
+ def default_structured_output_mode_for_model_provider(
2309
+ model_name: str,
2310
+ provider: ModelProviderName,
2311
+ default: StructuredOutputMode = StructuredOutputMode.default,
2312
+ disallowed_modes: List[StructuredOutputMode] = [],
2313
+ ) -> StructuredOutputMode:
2314
+ """
2315
+ We don't expose setting this manually in the UI, so pull a recommended mode from ml_model_list
2316
+ """
2317
+ try:
2318
+ # Convert string to ModelName enum
2319
+ model_name_enum = ModelName(model_name)
2320
+ model = get_model_by_name(model_name_enum)
2321
+ except (ValueError, KeyError):
2322
+ # If model not found, return default
2323
+ return default
2324
+
2325
+ # Find the provider within the model's providers
2326
+ for model_provider in model.providers:
2327
+ if model_provider.name == provider:
2328
+ mode = model_provider.structured_output_mode
2329
+ if mode not in disallowed_modes:
2330
+ return mode
2331
+
2332
+ # If provider not found, return default
2333
+ return default
@@ -1,11 +1,17 @@
1
1
  import json
2
2
  from abc import ABCMeta, abstractmethod
3
3
  from dataclasses import dataclass
4
- from typing import Dict, Literal, Tuple
4
+ from typing import Dict, Tuple
5
5
 
6
- import jsonschema
7
-
8
- from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
6
+ from kiln_ai.adapters.chat.chat_formatter import (
7
+ ChatFormatter,
8
+ get_chat_formatter,
9
+ )
10
+ from kiln_ai.adapters.ml_model_list import (
11
+ KilnModelProvider,
12
+ StructuredOutputMode,
13
+ default_structured_output_mode_for_model_provider,
14
+ )
9
15
  from kiln_ai.adapters.parsers.json_parser import parse_json_string
10
16
  from kiln_ai.adapters.parsers.parser_registry import model_parser_from_id
11
17
  from kiln_ai.adapters.parsers.request_formatters import request_formatter_from_id
@@ -20,6 +26,7 @@ from kiln_ai.datamodel import (
20
26
  TaskRun,
21
27
  Usage,
22
28
  )
29
+ from kiln_ai.datamodel.datamodel_enums import ChatStrategy
23
30
  from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
24
31
  from kiln_ai.datamodel.task import RunConfig
25
32
  from kiln_ai.utils.config import Config
@@ -38,9 +45,6 @@ class AdapterConfig:
38
45
  default_tags: list[str] | None = None
39
46
 
40
47
 
41
- COT_FINAL_ANSWER_PROMPT = "Considering the above, return a final result."
42
-
43
-
44
48
  class BaseAdapter(metaclass=ABCMeta):
45
49
  """Base class for AI model adapters that handle task execution.
46
50
 
@@ -61,6 +65,7 @@ class BaseAdapter(metaclass=ABCMeta):
61
65
  config: AdapterConfig | None = None,
62
66
  ):
63
67
  self.run_config = run_config
68
+ self.update_run_config_unknown_structured_output_mode()
64
69
  self.prompt_builder = prompt_builder_from_id(
65
70
  run_config.prompt_id, run_config.task
66
71
  )
@@ -188,10 +193,10 @@ class BaseAdapter(metaclass=ABCMeta):
188
193
 
189
194
  def build_prompt(self) -> str:
190
195
  # The prompt builder needs to know if we want to inject formatting instructions
191
- provider = self.model_provider()
196
+ structured_output_mode = self.run_config.structured_output_mode
192
197
  add_json_instructions = self.has_structured_output() and (
193
- provider.structured_output_mode == StructuredOutputMode.json_instructions
194
- or provider.structured_output_mode
198
+ structured_output_mode == StructuredOutputMode.json_instructions
199
+ or structured_output_mode
195
200
  == StructuredOutputMode.json_instruction_and_object
196
201
  )
197
202
 
@@ -199,26 +204,51 @@ class BaseAdapter(metaclass=ABCMeta):
199
204
  include_json_instructions=add_json_instructions
200
205
  )
201
206
 
202
- def run_strategy(
203
- self,
204
- ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]:
205
- # Determine the run strategy for COT prompting. 3 options:
206
- # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message.
207
- # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response.
208
- # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt.
207
+ def build_chat_formatter(self, input: Dict | str) -> ChatFormatter:
208
+ # Determine the chat strategy to use based on the prompt the user selected, the model's capabilities, and if the model was finetuned with a specific chat strategy.
209
+
209
210
  cot_prompt = self.prompt_builder.chain_of_thought_prompt()
210
- reasoning_capable = self.model_provider().reasoning_capable
211
+ system_message = self.build_prompt()
212
+
213
+ # If no COT prompt, use the single turn strategy. Even when a tuned strategy is set, as the tuned strategy is either already single turn, or won't work without a COT prompt.
214
+ if not cot_prompt:
215
+ return get_chat_formatter(
216
+ strategy=ChatStrategy.single_turn,
217
+ system_message=system_message,
218
+ user_input=input,
219
+ )
211
220
 
212
- if cot_prompt and reasoning_capable:
213
- # 1: "Thinking" LLM designed to output thinking in a structured format
221
+ # Some models like finetunes are trained with a specific chat strategy. Use that.
222
+ # However, don't use that if it is single turn. The user selected a COT prompt, and we give explicit prompt selection priority over the tuned strategy.
223
+ tuned_chat_strategy = self.model_provider().tuned_chat_strategy
224
+ if tuned_chat_strategy and tuned_chat_strategy != ChatStrategy.single_turn:
225
+ return get_chat_formatter(
226
+ strategy=tuned_chat_strategy,
227
+ system_message=system_message,
228
+ user_input=input,
229
+ thinking_instructions=cot_prompt,
230
+ )
231
+
232
+ # Pick the best chat strategy for the model given it has a cot prompt.
233
+ reasoning_capable = self.model_provider().reasoning_capable
234
+ if reasoning_capable:
235
+ # "Thinking" LLM designed to output thinking in a structured format. We'll use it's native format.
214
236
  # A simple message with the COT prompt appended to the message list is sufficient
215
- return "cot_as_message", cot_prompt
216
- elif cot_prompt:
217
- # 2: Unstructured output with COT
218
- # Two calls to separate the thinking from the final response
219
- return "cot_two_call", cot_prompt
237
+ return get_chat_formatter(
238
+ strategy=ChatStrategy.single_turn_r1_thinking,
239
+ system_message=system_message,
240
+ user_input=input,
241
+ thinking_instructions=cot_prompt,
242
+ )
220
243
  else:
221
- return "basic", None
244
+ # Unstructured output with COT
245
+ # Two calls to separate the thinking from the final response
246
+ return get_chat_formatter(
247
+ strategy=ChatStrategy.two_message_cot,
248
+ system_message=system_message,
249
+ user_input=input,
250
+ thinking_instructions=cot_prompt,
251
+ )
222
252
 
223
253
  # create a run and task output
224
254
  def generate_run(
@@ -272,5 +302,22 @@ class BaseAdapter(metaclass=ABCMeta):
272
302
  props["model_name"] = self.run_config.model_name
273
303
  props["model_provider"] = self.run_config.model_provider_name
274
304
  props["prompt_id"] = self.run_config.prompt_id
305
+ props["structured_output_mode"] = self.run_config.structured_output_mode
306
+ props["temperature"] = self.run_config.temperature
307
+ props["top_p"] = self.run_config.top_p
275
308
 
276
309
  return props
310
+
311
+ def update_run_config_unknown_structured_output_mode(self) -> None:
312
+ structured_output_mode = self.run_config.structured_output_mode
313
+
314
+ # Old datamodels didn't save the structured output mode. Some clients (tests, end users) might not set it.
315
+ # Look up our recommended mode from ml_model_list if we have one
316
+ if structured_output_mode == StructuredOutputMode.unknown:
317
+ new_run_config = self.run_config.model_copy(deep=True)
318
+ structured_output_mode = default_structured_output_mode_for_model_provider(
319
+ self.run_config.model_name,
320
+ self.run_config.model_provider_name,
321
+ )
322
+ new_run_config.structured_output_mode = structured_output_mode
323
+ self.run_config = new_run_config