kiln-ai 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kiln_ai/adapters/__init__.py +2 -0
- kiln_ai/adapters/adapter_registry.py +22 -44
- kiln_ai/adapters/chat/__init__.py +8 -0
- kiln_ai/adapters/chat/chat_formatter.py +233 -0
- kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
- kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
- kiln_ai/adapters/data_gen/data_gen_task.py +49 -36
- kiln_ai/adapters/data_gen/test_data_gen_task.py +330 -40
- kiln_ai/adapters/eval/base_eval.py +7 -6
- kiln_ai/adapters/eval/eval_runner.py +9 -2
- kiln_ai/adapters/eval/g_eval.py +40 -17
- kiln_ai/adapters/eval/test_base_eval.py +174 -17
- kiln_ai/adapters/eval/test_eval_runner.py +3 -0
- kiln_ai/adapters/eval/test_g_eval.py +116 -5
- kiln_ai/adapters/fine_tune/base_finetune.py +3 -8
- kiln_ai/adapters/fine_tune/dataset_formatter.py +135 -273
- kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
- kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +6 -11
- kiln_ai/adapters/fine_tune/together_finetune.py +13 -2
- kiln_ai/adapters/ml_model_list.py +370 -84
- kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
- kiln_ai/adapters/model_adapters/litellm_adapter.py +88 -97
- kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
- kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -61
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +104 -21
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
- kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
- kiln_ai/adapters/parsers/parser_registry.py +0 -2
- kiln_ai/adapters/parsers/r1_parser.py +0 -1
- kiln_ai/adapters/prompt_builders.py +0 -16
- kiln_ai/adapters/provider_tools.py +27 -9
- kiln_ai/adapters/remote_config.py +66 -0
- kiln_ai/adapters/repair/repair_task.py +1 -6
- kiln_ai/adapters/repair/test_repair_task.py +24 -3
- kiln_ai/adapters/test_adapter_registry.py +88 -28
- kiln_ai/adapters/test_ml_model_list.py +176 -0
- kiln_ai/adapters/test_prompt_adaptors.py +17 -7
- kiln_ai/adapters/test_prompt_builders.py +3 -16
- kiln_ai/adapters/test_provider_tools.py +69 -20
- kiln_ai/adapters/test_remote_config.py +100 -0
- kiln_ai/datamodel/__init__.py +0 -2
- kiln_ai/datamodel/datamodel_enums.py +38 -13
- kiln_ai/datamodel/eval.py +32 -0
- kiln_ai/datamodel/finetune.py +12 -8
- kiln_ai/datamodel/task.py +68 -7
- kiln_ai/datamodel/task_output.py +0 -2
- kiln_ai/datamodel/task_run.py +0 -2
- kiln_ai/datamodel/test_basemodel.py +2 -1
- kiln_ai/datamodel/test_dataset_split.py +0 -8
- kiln_ai/datamodel/test_eval_model.py +146 -4
- kiln_ai/datamodel/test_models.py +33 -10
- kiln_ai/datamodel/test_task.py +168 -2
- kiln_ai/utils/config.py +3 -2
- kiln_ai/utils/dataset_import.py +1 -1
- kiln_ai/utils/logging.py +166 -0
- kiln_ai/utils/test_config.py +23 -0
- kiln_ai/utils/test_dataset_import.py +30 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/METADATA +2 -2
- kiln_ai-0.18.0.dist-info/RECORD +115 -0
- kiln_ai-0.16.0.dist-info/RECORD +0 -108
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from abc import ABCMeta, abstractmethod
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Dict,
|
|
4
|
+
from typing import Dict, Tuple
|
|
5
5
|
|
|
6
|
-
import
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
from kiln_ai.adapters.chat.chat_formatter import (
|
|
7
|
+
ChatFormatter,
|
|
8
|
+
get_chat_formatter,
|
|
9
|
+
)
|
|
10
|
+
from kiln_ai.adapters.ml_model_list import (
|
|
11
|
+
KilnModelProvider,
|
|
12
|
+
StructuredOutputMode,
|
|
13
|
+
default_structured_output_mode_for_model_provider,
|
|
14
|
+
)
|
|
9
15
|
from kiln_ai.adapters.parsers.json_parser import parse_json_string
|
|
10
16
|
from kiln_ai.adapters.parsers.parser_registry import model_parser_from_id
|
|
11
17
|
from kiln_ai.adapters.parsers.request_formatters import request_formatter_from_id
|
|
@@ -20,6 +26,7 @@ from kiln_ai.datamodel import (
|
|
|
20
26
|
TaskRun,
|
|
21
27
|
Usage,
|
|
22
28
|
)
|
|
29
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
23
30
|
from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
|
|
24
31
|
from kiln_ai.datamodel.task import RunConfig
|
|
25
32
|
from kiln_ai.utils.config import Config
|
|
@@ -38,9 +45,6 @@ class AdapterConfig:
|
|
|
38
45
|
default_tags: list[str] | None = None
|
|
39
46
|
|
|
40
47
|
|
|
41
|
-
COT_FINAL_ANSWER_PROMPT = "Considering the above, return a final result."
|
|
42
|
-
|
|
43
|
-
|
|
44
48
|
class BaseAdapter(metaclass=ABCMeta):
|
|
45
49
|
"""Base class for AI model adapters that handle task execution.
|
|
46
50
|
|
|
@@ -61,6 +65,7 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
61
65
|
config: AdapterConfig | None = None,
|
|
62
66
|
):
|
|
63
67
|
self.run_config = run_config
|
|
68
|
+
self.update_run_config_unknown_structured_output_mode()
|
|
64
69
|
self.prompt_builder = prompt_builder_from_id(
|
|
65
70
|
run_config.prompt_id, run_config.task
|
|
66
71
|
)
|
|
@@ -188,10 +193,10 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
188
193
|
|
|
189
194
|
def build_prompt(self) -> str:
|
|
190
195
|
# The prompt builder needs to know if we want to inject formatting instructions
|
|
191
|
-
|
|
196
|
+
structured_output_mode = self.run_config.structured_output_mode
|
|
192
197
|
add_json_instructions = self.has_structured_output() and (
|
|
193
|
-
|
|
194
|
-
or
|
|
198
|
+
structured_output_mode == StructuredOutputMode.json_instructions
|
|
199
|
+
or structured_output_mode
|
|
195
200
|
== StructuredOutputMode.json_instruction_and_object
|
|
196
201
|
)
|
|
197
202
|
|
|
@@ -199,26 +204,51 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
199
204
|
include_json_instructions=add_json_instructions
|
|
200
205
|
)
|
|
201
206
|
|
|
202
|
-
def
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
# Determine the run strategy for COT prompting. 3 options:
|
|
206
|
-
# 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message.
|
|
207
|
-
# 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response.
|
|
208
|
-
# 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt.
|
|
207
|
+
def build_chat_formatter(self, input: Dict | str) -> ChatFormatter:
|
|
208
|
+
# Determine the chat strategy to use based on the prompt the user selected, the model's capabilities, and if the model was finetuned with a specific chat strategy.
|
|
209
|
+
|
|
209
210
|
cot_prompt = self.prompt_builder.chain_of_thought_prompt()
|
|
210
|
-
|
|
211
|
+
system_message = self.build_prompt()
|
|
212
|
+
|
|
213
|
+
# If no COT prompt, use the single turn strategy. Even when a tuned strategy is set, as the tuned strategy is either already single turn, or won't work without a COT prompt.
|
|
214
|
+
if not cot_prompt:
|
|
215
|
+
return get_chat_formatter(
|
|
216
|
+
strategy=ChatStrategy.single_turn,
|
|
217
|
+
system_message=system_message,
|
|
218
|
+
user_input=input,
|
|
219
|
+
)
|
|
211
220
|
|
|
212
|
-
|
|
213
|
-
|
|
221
|
+
# Some models like finetunes are trained with a specific chat strategy. Use that.
|
|
222
|
+
# However, don't use that if it is single turn. The user selected a COT prompt, and we give explicit prompt selection priority over the tuned strategy.
|
|
223
|
+
tuned_chat_strategy = self.model_provider().tuned_chat_strategy
|
|
224
|
+
if tuned_chat_strategy and tuned_chat_strategy != ChatStrategy.single_turn:
|
|
225
|
+
return get_chat_formatter(
|
|
226
|
+
strategy=tuned_chat_strategy,
|
|
227
|
+
system_message=system_message,
|
|
228
|
+
user_input=input,
|
|
229
|
+
thinking_instructions=cot_prompt,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Pick the best chat strategy for the model given it has a cot prompt.
|
|
233
|
+
reasoning_capable = self.model_provider().reasoning_capable
|
|
234
|
+
if reasoning_capable:
|
|
235
|
+
# "Thinking" LLM designed to output thinking in a structured format. We'll use it's native format.
|
|
214
236
|
# A simple message with the COT prompt appended to the message list is sufficient
|
|
215
|
-
return
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
237
|
+
return get_chat_formatter(
|
|
238
|
+
strategy=ChatStrategy.single_turn_r1_thinking,
|
|
239
|
+
system_message=system_message,
|
|
240
|
+
user_input=input,
|
|
241
|
+
thinking_instructions=cot_prompt,
|
|
242
|
+
)
|
|
220
243
|
else:
|
|
221
|
-
|
|
244
|
+
# Unstructured output with COT
|
|
245
|
+
# Two calls to separate the thinking from the final response
|
|
246
|
+
return get_chat_formatter(
|
|
247
|
+
strategy=ChatStrategy.two_message_cot,
|
|
248
|
+
system_message=system_message,
|
|
249
|
+
user_input=input,
|
|
250
|
+
thinking_instructions=cot_prompt,
|
|
251
|
+
)
|
|
222
252
|
|
|
223
253
|
# create a run and task output
|
|
224
254
|
def generate_run(
|
|
@@ -272,5 +302,22 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
272
302
|
props["model_name"] = self.run_config.model_name
|
|
273
303
|
props["model_provider"] = self.run_config.model_provider_name
|
|
274
304
|
props["prompt_id"] = self.run_config.prompt_id
|
|
305
|
+
props["structured_output_mode"] = self.run_config.structured_output_mode
|
|
306
|
+
props["temperature"] = self.run_config.temperature
|
|
307
|
+
props["top_p"] = self.run_config.top_p
|
|
275
308
|
|
|
276
309
|
return props
|
|
310
|
+
|
|
311
|
+
def update_run_config_unknown_structured_output_mode(self) -> None:
|
|
312
|
+
structured_output_mode = self.run_config.structured_output_mode
|
|
313
|
+
|
|
314
|
+
# Old datamodels didn't save the structured output mode. Some clients (tests, end users) might not set it.
|
|
315
|
+
# Look up our recommended mode from ml_model_list if we have one
|
|
316
|
+
if structured_output_mode == StructuredOutputMode.unknown:
|
|
317
|
+
new_run_config = self.run_config.model_copy(deep=True)
|
|
318
|
+
structured_output_mode = default_structured_output_mode_for_model_provider(
|
|
319
|
+
self.run_config.model_name,
|
|
320
|
+
self.run_config.model_provider_name,
|
|
321
|
+
)
|
|
322
|
+
new_run_config.structured_output_mode = structured_output_mode
|
|
323
|
+
self.run_config = new_run_config
|
|
@@ -12,15 +12,13 @@ from kiln_ai.adapters.ml_model_list import (
|
|
|
12
12
|
StructuredOutputMode,
|
|
13
13
|
)
|
|
14
14
|
from kiln_ai.adapters.model_adapters.base_adapter import (
|
|
15
|
-
COT_FINAL_ANSWER_PROMPT,
|
|
16
15
|
AdapterConfig,
|
|
17
16
|
BaseAdapter,
|
|
18
17
|
RunOutput,
|
|
19
18
|
Usage,
|
|
20
19
|
)
|
|
21
20
|
from kiln_ai.adapters.model_adapters.litellm_config import LiteLlmConfig
|
|
22
|
-
from kiln_ai.datamodel import
|
|
23
|
-
from kiln_ai.datamodel.task import RunConfig
|
|
21
|
+
from kiln_ai.datamodel.task import run_config_from_run_config_properties
|
|
24
22
|
from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
|
|
25
23
|
|
|
26
24
|
logger = logging.getLogger(__name__)
|
|
@@ -31,7 +29,6 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
31
29
|
self,
|
|
32
30
|
config: LiteLlmConfig,
|
|
33
31
|
kiln_task: datamodel.Task,
|
|
34
|
-
prompt_id: PromptId | None = None,
|
|
35
32
|
base_adapter_config: AdapterConfig | None = None,
|
|
36
33
|
):
|
|
37
34
|
self.config = config
|
|
@@ -40,11 +37,10 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
40
37
|
self._headers = config.default_headers
|
|
41
38
|
self._litellm_model_id: str | None = None
|
|
42
39
|
|
|
43
|
-
|
|
40
|
+
# Create a RunConfig, adding the task to the RunConfigProperties
|
|
41
|
+
run_config = run_config_from_run_config_properties(
|
|
44
42
|
task=kiln_task,
|
|
45
|
-
|
|
46
|
-
model_provider_name=config.provider_name,
|
|
47
|
-
prompt_id=prompt_id or PromptGenerators.SIMPLE,
|
|
43
|
+
run_config_properties=config.run_config_properties,
|
|
48
44
|
)
|
|
49
45
|
|
|
50
46
|
super().__init__(
|
|
@@ -57,79 +53,69 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
57
53
|
if not provider.model_id:
|
|
58
54
|
raise ValueError("Model ID is required for OpenAI compatible models")
|
|
59
55
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
# First call for chain of thought
|
|
81
|
-
# No response format as this request is for "thinking" in plain text
|
|
82
|
-
# No logprobs as only needed for final answer
|
|
56
|
+
chat_formatter = self.build_chat_formatter(input)
|
|
57
|
+
|
|
58
|
+
prior_output = None
|
|
59
|
+
prior_message = None
|
|
60
|
+
response = None
|
|
61
|
+
turns = 0
|
|
62
|
+
while True:
|
|
63
|
+
turns += 1
|
|
64
|
+
if turns > 10:
|
|
65
|
+
raise RuntimeError(
|
|
66
|
+
"Too many turns. Stopping iteration to avoid using too many tokens."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
turn = chat_formatter.next_turn(prior_output)
|
|
70
|
+
if turn is None:
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
skip_response_format = not turn.final_call
|
|
74
|
+
all_messages = chat_formatter.message_dicts()
|
|
83
75
|
completion_kwargs = await self.build_completion_kwargs(
|
|
84
|
-
provider,
|
|
76
|
+
provider,
|
|
77
|
+
all_messages,
|
|
78
|
+
self.base_adapter_config.top_logprobs if turn.final_call else None,
|
|
79
|
+
skip_response_format,
|
|
85
80
|
)
|
|
86
|
-
|
|
81
|
+
response = await litellm.acompletion(**completion_kwargs)
|
|
87
82
|
if (
|
|
88
|
-
not isinstance(
|
|
89
|
-
or not
|
|
90
|
-
or len(
|
|
91
|
-
or not isinstance(
|
|
83
|
+
not isinstance(response, ModelResponse)
|
|
84
|
+
or not response.choices
|
|
85
|
+
or len(response.choices) == 0
|
|
86
|
+
or not isinstance(response.choices[0], Choices)
|
|
92
87
|
):
|
|
93
88
|
raise RuntimeError(
|
|
94
|
-
f"Expected ModelResponse with Choices, got {type(
|
|
89
|
+
f"Expected ModelResponse with Choices, got {type(response)}."
|
|
95
90
|
)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
intermediate_outputs["chain_of_thought"] = cot_content
|
|
99
|
-
|
|
100
|
-
messages.extend(
|
|
101
|
-
[
|
|
102
|
-
{"role": "assistant", "content": cot_content or ""},
|
|
103
|
-
{"role": "user", "content": COT_FINAL_ANSWER_PROMPT},
|
|
104
|
-
]
|
|
105
|
-
)
|
|
91
|
+
prior_message = response.choices[0].message
|
|
92
|
+
prior_output = prior_message.content
|
|
106
93
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
94
|
+
# Fallback: Use args of first tool call to task_response if it exists
|
|
95
|
+
if (
|
|
96
|
+
not prior_output
|
|
97
|
+
and hasattr(prior_message, "tool_calls")
|
|
98
|
+
and prior_message.tool_calls
|
|
99
|
+
):
|
|
100
|
+
tool_call = next(
|
|
101
|
+
(
|
|
102
|
+
tool_call
|
|
103
|
+
for tool_call in prior_message.tool_calls
|
|
104
|
+
if tool_call.function.name == "task_response"
|
|
105
|
+
),
|
|
106
|
+
None,
|
|
107
|
+
)
|
|
108
|
+
if tool_call:
|
|
109
|
+
prior_output = tool_call.function.arguments
|
|
112
110
|
|
|
113
|
-
|
|
114
|
-
|
|
111
|
+
if not prior_output:
|
|
112
|
+
raise RuntimeError("No output returned from model")
|
|
115
113
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if hasattr(response, "error") and response.__getattribute__("error"):
|
|
119
|
-
raise RuntimeError(
|
|
120
|
-
f"LLM API returned an error: {response.__getattribute__('error')}"
|
|
121
|
-
)
|
|
114
|
+
if response is None or prior_message is None:
|
|
115
|
+
raise RuntimeError("No response returned from model")
|
|
122
116
|
|
|
123
|
-
|
|
124
|
-
not response.choices
|
|
125
|
-
or len(response.choices) == 0
|
|
126
|
-
or not isinstance(response.choices[0], Choices)
|
|
127
|
-
):
|
|
128
|
-
raise RuntimeError(
|
|
129
|
-
"No message content returned in the response from LLM API"
|
|
130
|
-
)
|
|
117
|
+
intermediate_outputs = chat_formatter.intermediate_outputs()
|
|
131
118
|
|
|
132
|
-
message = response.choices[0].message
|
|
133
119
|
logprobs = (
|
|
134
120
|
response.choices[0].logprobs
|
|
135
121
|
if hasattr(response.choices[0], "logprobs")
|
|
@@ -143,31 +129,15 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
143
129
|
|
|
144
130
|
# Save reasoning if it exists and was parsed by LiteLLM (or openrouter, or anyone upstream)
|
|
145
131
|
if (
|
|
146
|
-
|
|
147
|
-
and
|
|
148
|
-
and
|
|
132
|
+
prior_message is not None
|
|
133
|
+
and hasattr(prior_message, "reasoning_content")
|
|
134
|
+
and prior_message.reasoning_content
|
|
135
|
+
and len(prior_message.reasoning_content.strip()) > 0
|
|
149
136
|
):
|
|
150
|
-
intermediate_outputs["reasoning"] =
|
|
137
|
+
intermediate_outputs["reasoning"] = prior_message.reasoning_content.strip()
|
|
151
138
|
|
|
152
139
|
# the string content of the response
|
|
153
|
-
response_content =
|
|
154
|
-
|
|
155
|
-
# Fallback: Use args of first tool call to task_response if it exists
|
|
156
|
-
if (
|
|
157
|
-
not response_content
|
|
158
|
-
and hasattr(message, "tool_calls")
|
|
159
|
-
and message.tool_calls
|
|
160
|
-
):
|
|
161
|
-
tool_call = next(
|
|
162
|
-
(
|
|
163
|
-
tool_call
|
|
164
|
-
for tool_call in message.tool_calls
|
|
165
|
-
if tool_call.function.name == "task_response"
|
|
166
|
-
),
|
|
167
|
-
None,
|
|
168
|
-
)
|
|
169
|
-
if tool_call:
|
|
170
|
-
response_content = tool_call.function.arguments
|
|
140
|
+
response_content = prior_output
|
|
171
141
|
|
|
172
142
|
if not isinstance(response_content, str):
|
|
173
143
|
raise RuntimeError(f"response is not a string: {response_content}")
|
|
@@ -186,8 +156,9 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
186
156
|
if not self.has_structured_output():
|
|
187
157
|
return {}
|
|
188
158
|
|
|
189
|
-
|
|
190
|
-
|
|
159
|
+
structured_output_mode = self.run_config.structured_output_mode
|
|
160
|
+
|
|
161
|
+
match structured_output_mode:
|
|
191
162
|
case StructuredOutputMode.json_mode:
|
|
192
163
|
return {"response_format": {"type": "json_object"}}
|
|
193
164
|
case StructuredOutputMode.json_schema:
|
|
@@ -206,16 +177,20 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
206
177
|
# We set response_format to json_object and also set json instructions in the prompt
|
|
207
178
|
return {"response_format": {"type": "json_object"}}
|
|
208
179
|
case StructuredOutputMode.default:
|
|
209
|
-
|
|
180
|
+
provider_name = self.run_config.model_provider_name
|
|
181
|
+
if provider_name == ModelProviderName.ollama:
|
|
210
182
|
# Ollama added json_schema to all models: https://ollama.com/blog/structured-outputs
|
|
211
183
|
return self.json_schema_response_format()
|
|
212
184
|
else:
|
|
213
185
|
# Default to function calling -- it's older than the other modes. Higher compatibility.
|
|
214
186
|
# Strict isn't widely supported yet, so we don't use it by default unless it's OpenAI.
|
|
215
|
-
strict =
|
|
187
|
+
strict = provider_name == ModelProviderName.openai
|
|
216
188
|
return self.tool_call_params(strict=strict)
|
|
189
|
+
case StructuredOutputMode.unknown:
|
|
190
|
+
# See above, but this case should never happen.
|
|
191
|
+
raise ValueError("Structured output mode is unknown.")
|
|
217
192
|
case _:
|
|
218
|
-
raise_exhaustive_enum_error(
|
|
193
|
+
raise_exhaustive_enum_error(structured_output_mode)
|
|
219
194
|
|
|
220
195
|
def json_schema_response_format(self) -> dict[str, Any]:
|
|
221
196
|
output_schema = self.task().output_schema()
|
|
@@ -276,6 +251,10 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
276
251
|
"exclude": False,
|
|
277
252
|
}
|
|
278
253
|
|
|
254
|
+
if provider.name == ModelProviderName.openrouter:
|
|
255
|
+
# Ask OpenRouter to include usage in the response (cost)
|
|
256
|
+
extra_body["usage"] = {"include": True}
|
|
257
|
+
|
|
279
258
|
if provider.anthropic_extended_thinking:
|
|
280
259
|
extra_body["thinking"] = {"type": "enabled", "budget_tokens": 4000}
|
|
281
260
|
|
|
@@ -387,6 +366,13 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
387
366
|
"messages": messages,
|
|
388
367
|
"api_base": self._api_base,
|
|
389
368
|
"headers": self._headers,
|
|
369
|
+
"temperature": self.run_config.temperature,
|
|
370
|
+
"top_p": self.run_config.top_p,
|
|
371
|
+
# This drops params that are not supported by the model. Only openai params like top_p, temperature -- not litellm params like model, etc.
|
|
372
|
+
# Not all models and providers support all openai params (for example, o3 doesn't support top_p)
|
|
373
|
+
# Better to ignore them than to fail the model call.
|
|
374
|
+
# https://docs.litellm.ai/docs/completion/input
|
|
375
|
+
"drop_params": True,
|
|
390
376
|
**extra_body,
|
|
391
377
|
**self._additional_body_options,
|
|
392
378
|
}
|
|
@@ -404,7 +390,12 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
404
390
|
|
|
405
391
|
def usage_from_response(self, response: ModelResponse) -> Usage | None:
|
|
406
392
|
litellm_usage = response.get("usage", None)
|
|
393
|
+
|
|
394
|
+
# LiteLLM isn't consistent in how it returns the cost.
|
|
407
395
|
cost = response._hidden_params.get("response_cost", None)
|
|
396
|
+
if cost is None and litellm_usage:
|
|
397
|
+
cost = litellm_usage.get("cost", None)
|
|
398
|
+
|
|
408
399
|
if not litellm_usage and not cost:
|
|
409
400
|
return None
|
|
410
401
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
|
|
3
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
@dataclass
|
|
5
7
|
class LiteLlmConfig:
|
|
6
|
-
|
|
7
|
-
provider_name: str
|
|
8
|
+
run_config_properties: RunConfigProperties
|
|
8
9
|
# If set, over rides the provider-name based URL from litellm
|
|
9
10
|
base_url: str | None = None
|
|
10
11
|
# Headers to send with every request
|