kiln-ai 0.19.0__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +2 -2
- kiln_ai/adapters/adapter_registry.py +19 -1
- kiln_ai/adapters/chat/chat_formatter.py +8 -12
- kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
- kiln_ai/adapters/docker_model_runner_tools.py +119 -0
- kiln_ai/adapters/eval/base_eval.py +2 -2
- kiln_ai/adapters/eval/eval_runner.py +3 -1
- kiln_ai/adapters/eval/g_eval.py +2 -2
- kiln_ai/adapters/eval/test_base_eval.py +1 -1
- kiln_ai/adapters/eval/test_g_eval.py +3 -4
- kiln_ai/adapters/fine_tune/__init__.py +1 -1
- kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
- kiln_ai/adapters/ml_model_list.py +380 -34
- kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
- kiln_ai/adapters/model_adapters/litellm_adapter.py +383 -79
- kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +406 -1
- kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
- kiln_ai/adapters/model_adapters/test_structured_output.py +110 -4
- kiln_ai/adapters/parsers/__init__.py +1 -1
- kiln_ai/adapters/provider_tools.py +15 -1
- kiln_ai/adapters/repair/test_repair_task.py +12 -9
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +80 -1
- kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
- kiln_ai/adapters/test_ml_model_list.py +39 -1
- kiln_ai/adapters/test_prompt_adaptors.py +13 -6
- kiln_ai/adapters/test_provider_tools.py +55 -0
- kiln_ai/adapters/test_remote_config.py +98 -0
- kiln_ai/datamodel/__init__.py +23 -21
- kiln_ai/datamodel/datamodel_enums.py +1 -0
- kiln_ai/datamodel/eval.py +1 -1
- kiln_ai/datamodel/external_tool_server.py +298 -0
- kiln_ai/datamodel/json_schema.py +25 -10
- kiln_ai/datamodel/project.py +8 -1
- kiln_ai/datamodel/registry.py +0 -15
- kiln_ai/datamodel/run_config.py +62 -0
- kiln_ai/datamodel/task.py +2 -77
- kiln_ai/datamodel/task_output.py +6 -1
- kiln_ai/datamodel/task_run.py +41 -0
- kiln_ai/datamodel/test_basemodel.py +3 -3
- kiln_ai/datamodel/test_example_models.py +175 -0
- kiln_ai/datamodel/test_external_tool_server.py +691 -0
- kiln_ai/datamodel/test_registry.py +8 -3
- kiln_ai/datamodel/test_task.py +15 -47
- kiln_ai/datamodel/test_tool_id.py +239 -0
- kiln_ai/datamodel/tool_id.py +83 -0
- kiln_ai/tools/__init__.py +8 -0
- kiln_ai/tools/base_tool.py +82 -0
- kiln_ai/tools/built_in_tools/__init__.py +13 -0
- kiln_ai/tools/built_in_tools/math_tools.py +124 -0
- kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
- kiln_ai/tools/mcp_server_tool.py +95 -0
- kiln_ai/tools/mcp_session_manager.py +243 -0
- kiln_ai/tools/test_base_tools.py +199 -0
- kiln_ai/tools/test_mcp_server_tool.py +457 -0
- kiln_ai/tools/test_mcp_session_manager.py +1585 -0
- kiln_ai/tools/test_tool_registry.py +473 -0
- kiln_ai/tools/tool_registry.py +64 -0
- kiln_ai/utils/config.py +22 -0
- kiln_ai/utils/open_ai_types.py +94 -0
- kiln_ai/utils/project_utils.py +17 -0
- kiln_ai/utils/test_config.py +138 -1
- kiln_ai/utils/test_open_ai_types.py +131 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/METADATA +6 -5
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/RECORD +70 -47
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/WHEEL +0 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -25,8 +25,11 @@ from kiln_ai.datamodel import (
|
|
|
25
25
|
)
|
|
26
26
|
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
27
27
|
from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
|
|
28
|
-
from kiln_ai.datamodel.task import
|
|
28
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
29
|
+
from kiln_ai.tools import KilnToolInterface
|
|
30
|
+
from kiln_ai.tools.tool_registry import tool_from_id
|
|
29
31
|
from kiln_ai.utils.config import Config
|
|
32
|
+
from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam
|
|
30
33
|
|
|
31
34
|
|
|
32
35
|
@dataclass
|
|
@@ -48,33 +51,24 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
48
51
|
This abstract class provides the foundation for implementing model-specific adapters
|
|
49
52
|
that can process tasks with structured or unstructured inputs/outputs. It handles
|
|
50
53
|
input/output validation, prompt building, and run tracking.
|
|
51
|
-
|
|
52
|
-
Attributes:
|
|
53
|
-
prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
|
|
54
|
-
kiln_task (Task): The task configuration and metadata
|
|
55
|
-
output_schema (dict | None): JSON schema for validating structured outputs
|
|
56
|
-
input_schema (dict | None): JSON schema for validating structured inputs
|
|
57
54
|
"""
|
|
58
55
|
|
|
59
56
|
def __init__(
|
|
60
57
|
self,
|
|
61
|
-
|
|
58
|
+
task: Task,
|
|
59
|
+
run_config: RunConfigProperties,
|
|
62
60
|
config: AdapterConfig | None = None,
|
|
63
61
|
):
|
|
62
|
+
self.task = task
|
|
64
63
|
self.run_config = run_config
|
|
65
64
|
self.update_run_config_unknown_structured_output_mode()
|
|
66
|
-
self.prompt_builder = prompt_builder_from_id(
|
|
67
|
-
run_config.prompt_id, run_config.task
|
|
68
|
-
)
|
|
65
|
+
self.prompt_builder = prompt_builder_from_id(run_config.prompt_id, task)
|
|
69
66
|
self._model_provider: KilnModelProvider | None = None
|
|
70
67
|
|
|
71
|
-
self.output_schema =
|
|
72
|
-
self.input_schema =
|
|
68
|
+
self.output_schema = task.output_json_schema
|
|
69
|
+
self.input_schema = task.input_json_schema
|
|
73
70
|
self.base_adapter_config = config or AdapterConfig()
|
|
74
71
|
|
|
75
|
-
def task(self) -> Task:
|
|
76
|
-
return self.run_config.task
|
|
77
|
-
|
|
78
72
|
def model_provider(self) -> KilnModelProvider:
|
|
79
73
|
"""
|
|
80
74
|
Lazy load the model provider for this adapter.
|
|
@@ -152,7 +146,11 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
152
146
|
f"response is not a string for non-structured task: {parsed_output.output}"
|
|
153
147
|
)
|
|
154
148
|
|
|
155
|
-
# Validate reasoning content is present
|
|
149
|
+
# Validate reasoning content is present and required
|
|
150
|
+
# We don't require reasoning when using tools as models tend not to return any on the final turn (both Sonnet and Gemini).
|
|
151
|
+
trace_has_toolcalls = parsed_output.trace is not None and any(
|
|
152
|
+
message.get("role", None) == "tool" for message in parsed_output.trace
|
|
153
|
+
)
|
|
156
154
|
if (
|
|
157
155
|
provider.reasoning_capable
|
|
158
156
|
and (
|
|
@@ -163,19 +161,22 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
163
161
|
provider.reasoning_optional_for_structured_output
|
|
164
162
|
and self.has_structured_output()
|
|
165
163
|
)
|
|
164
|
+
and not (trace_has_toolcalls)
|
|
166
165
|
):
|
|
167
166
|
raise RuntimeError(
|
|
168
167
|
"Reasoning is required for this model, but no reasoning was returned."
|
|
169
168
|
)
|
|
170
169
|
|
|
171
170
|
# Generate the run and output
|
|
172
|
-
run = self.generate_run(
|
|
171
|
+
run = self.generate_run(
|
|
172
|
+
input, input_source, parsed_output, usage, run_output.trace
|
|
173
|
+
)
|
|
173
174
|
|
|
174
175
|
# Save the run if configured to do so, and we have a path to save to
|
|
175
176
|
if (
|
|
176
177
|
self.base_adapter_config.allow_saving
|
|
177
178
|
and Config.shared().autosave_runs
|
|
178
|
-
and self.task
|
|
179
|
+
and self.task.path is not None
|
|
179
180
|
):
|
|
180
181
|
run.save_to_file()
|
|
181
182
|
else:
|
|
@@ -261,6 +262,7 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
261
262
|
input_source: DataSource | None,
|
|
262
263
|
run_output: RunOutput,
|
|
263
264
|
usage: Usage | None = None,
|
|
265
|
+
trace: list[ChatCompletionMessageParam] | None = None,
|
|
264
266
|
) -> TaskRun:
|
|
265
267
|
# Convert input and output to JSON strings if they are dictionaries
|
|
266
268
|
input_str = (
|
|
@@ -280,7 +282,7 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
280
282
|
)
|
|
281
283
|
|
|
282
284
|
new_task_run = TaskRun(
|
|
283
|
-
parent=self.task
|
|
285
|
+
parent=self.task,
|
|
284
286
|
input=input_str,
|
|
285
287
|
input_source=input_source,
|
|
286
288
|
output=TaskOutput(
|
|
@@ -289,11 +291,13 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
289
291
|
source=DataSource(
|
|
290
292
|
type=DataSourceType.synthetic,
|
|
291
293
|
properties=self._properties_for_task_output(),
|
|
294
|
+
run_config=self.run_config,
|
|
292
295
|
),
|
|
293
296
|
),
|
|
294
297
|
intermediate_outputs=run_output.intermediate_outputs,
|
|
295
298
|
tags=self.base_adapter_config.default_tags or [],
|
|
296
299
|
usage=usage,
|
|
300
|
+
trace=trace,
|
|
297
301
|
)
|
|
298
302
|
|
|
299
303
|
return new_task_run
|
|
@@ -301,8 +305,10 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
301
305
|
def _properties_for_task_output(self) -> Dict[str, str | int | float]:
|
|
302
306
|
props = {}
|
|
303
307
|
|
|
304
|
-
# adapter info
|
|
305
308
|
props["adapter_name"] = self.adapter_name()
|
|
309
|
+
|
|
310
|
+
# Legacy properties where we save the run_config details into custom properties.
|
|
311
|
+
# These are now also be saved in the run_config field.
|
|
306
312
|
props["model_name"] = self.run_config.model_name
|
|
307
313
|
props["model_provider"] = self.run_config.model_provider_name
|
|
308
314
|
props["prompt_id"] = self.run_config.prompt_id
|
|
@@ -325,3 +331,27 @@ class BaseAdapter(metaclass=ABCMeta):
|
|
|
325
331
|
)
|
|
326
332
|
new_run_config.structured_output_mode = structured_output_mode
|
|
327
333
|
self.run_config = new_run_config
|
|
334
|
+
|
|
335
|
+
async def available_tools(self) -> list[KilnToolInterface]:
|
|
336
|
+
tool_config = self.run_config.tools_config
|
|
337
|
+
if tool_config is None or tool_config.tools is None:
|
|
338
|
+
return []
|
|
339
|
+
|
|
340
|
+
project = self.task.parent_project()
|
|
341
|
+
if project is None:
|
|
342
|
+
raise ValueError("Task must have a parent project to resolve tools")
|
|
343
|
+
|
|
344
|
+
project_id = project.id
|
|
345
|
+
if project_id is None:
|
|
346
|
+
raise ValueError("Project must have an ID to resolve tools")
|
|
347
|
+
|
|
348
|
+
tools = [tool_from_id(tool_id, self.task) for tool_id in tool_config.tools]
|
|
349
|
+
|
|
350
|
+
# Check each tool has a unique name
|
|
351
|
+
tool_names = [await tool.name() for tool in tools]
|
|
352
|
+
if len(tool_names) != len(set(tool_names)):
|
|
353
|
+
raise ValueError(
|
|
354
|
+
"Each tool must have a unique name. Either de-select the duplicate tools, or modify their names to describe their unique purpose. Model will struggle if tools do not have descriptive names and tool execution will be undefined."
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
return tools
|