kiln-ai 0.8.1__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +7 -7
- kiln_ai/adapters/adapter_registry.py +77 -5
- kiln_ai/adapters/data_gen/data_gen_task.py +3 -3
- kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
- kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
- kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
- kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
- kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
- kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +469 -129
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +113 -21
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
- kiln_ai/adapters/ml_model_list.py +323 -94
- kiln_ai/adapters/model_adapters/__init__.py +18 -0
- kiln_ai/adapters/{base_adapter.py → model_adapters/base_adapter.py} +81 -37
- kiln_ai/adapters/{langchain_adapters.py → model_adapters/langchain_adapters.py} +130 -84
- kiln_ai/adapters/model_adapters/openai_compatible_config.py +11 -0
- kiln_ai/adapters/model_adapters/openai_model_adapter.py +246 -0
- kiln_ai/adapters/model_adapters/test_base_adapter.py +190 -0
- kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +103 -88
- kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +225 -0
- kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +43 -15
- kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +93 -20
- kiln_ai/adapters/parsers/__init__.py +10 -0
- kiln_ai/adapters/parsers/base_parser.py +12 -0
- kiln_ai/adapters/parsers/json_parser.py +37 -0
- kiln_ai/adapters/parsers/parser_registry.py +19 -0
- kiln_ai/adapters/parsers/r1_parser.py +69 -0
- kiln_ai/adapters/parsers/test_json_parser.py +81 -0
- kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
- kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
- kiln_ai/adapters/prompt_builders.py +126 -20
- kiln_ai/adapters/provider_tools.py +91 -36
- kiln_ai/adapters/repair/repair_task.py +17 -6
- kiln_ai/adapters/repair/test_repair_task.py +4 -4
- kiln_ai/adapters/run_output.py +8 -0
- kiln_ai/adapters/test_adapter_registry.py +177 -0
- kiln_ai/adapters/test_generate_docs.py +69 -0
- kiln_ai/adapters/test_prompt_adaptors.py +8 -4
- kiln_ai/adapters/test_prompt_builders.py +190 -29
- kiln_ai/adapters/test_provider_tools.py +268 -46
- kiln_ai/datamodel/__init__.py +193 -12
- kiln_ai/datamodel/basemodel.py +31 -11
- kiln_ai/datamodel/json_schema.py +8 -3
- kiln_ai/datamodel/model_cache.py +8 -3
- kiln_ai/datamodel/test_basemodel.py +81 -2
- kiln_ai/datamodel/test_dataset_split.py +100 -3
- kiln_ai/datamodel/test_example_models.py +25 -4
- kiln_ai/datamodel/test_model_cache.py +24 -0
- kiln_ai/datamodel/test_model_perf.py +125 -0
- kiln_ai/datamodel/test_models.py +129 -0
- kiln_ai/utils/exhaustive_error.py +6 -0
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/METADATA +9 -7
- kiln_ai-0.11.1.dist-info/RECORD +76 -0
- kiln_ai-0.8.1.dist-info/RECORD +0 -58
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/WHEEL +0 -0
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/licenses/LICENSE.txt +0 -0
kiln_ai/adapters/__init__.py
CHANGED
|
@@ -3,31 +3,31 @@
|
|
|
3
3
|
|
|
4
4
|
Adapters are used to connect Kiln to external systems, or to add new functionality to Kiln.
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
Model adapters are used to call AI models, like Ollama, OpenAI, etc.
|
|
7
7
|
|
|
8
8
|
The ml_model_list submodule contains a list of models that can be used for machine learning tasks. More can easily be added, but we keep a list here of models that are known to work well with Kiln's structured data and tool calling systems.
|
|
9
9
|
|
|
10
10
|
The prompt_builders submodule contains classes that build prompts for use with the AI agents.
|
|
11
11
|
|
|
12
12
|
The repair submodule contains an adapter for the repair task.
|
|
13
|
+
|
|
14
|
+
The parser submodule contains parsers for the output of the AI models.
|
|
13
15
|
"""
|
|
14
16
|
|
|
15
17
|
from . import (
|
|
16
|
-
base_adapter,
|
|
17
18
|
data_gen,
|
|
18
19
|
fine_tune,
|
|
19
|
-
langchain_adapters,
|
|
20
20
|
ml_model_list,
|
|
21
|
+
model_adapters,
|
|
21
22
|
prompt_builders,
|
|
22
23
|
repair,
|
|
23
24
|
)
|
|
24
25
|
|
|
25
26
|
__all__ = [
|
|
26
|
-
"
|
|
27
|
-
"
|
|
27
|
+
"model_adapters",
|
|
28
|
+
"data_gen",
|
|
29
|
+
"fine_tune",
|
|
28
30
|
"ml_model_list",
|
|
29
31
|
"prompt_builders",
|
|
30
32
|
"repair",
|
|
31
|
-
"data_gen",
|
|
32
|
-
"fine_tune",
|
|
33
33
|
]
|
|
@@ -1,17 +1,89 @@
|
|
|
1
|
+
from os import getenv
|
|
2
|
+
|
|
1
3
|
from kiln_ai import datamodel
|
|
2
|
-
from kiln_ai.adapters.
|
|
3
|
-
from kiln_ai.adapters.
|
|
4
|
+
from kiln_ai.adapters.ml_model_list import ModelProviderName
|
|
5
|
+
from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
|
|
6
|
+
from kiln_ai.adapters.model_adapters.langchain_adapters import LangchainAdapter
|
|
7
|
+
from kiln_ai.adapters.model_adapters.openai_model_adapter import (
|
|
8
|
+
OpenAICompatibleAdapter,
|
|
9
|
+
OpenAICompatibleConfig,
|
|
10
|
+
)
|
|
4
11
|
from kiln_ai.adapters.prompt_builders import BasePromptBuilder
|
|
12
|
+
from kiln_ai.adapters.provider_tools import core_provider, openai_compatible_config
|
|
13
|
+
from kiln_ai.utils.config import Config
|
|
14
|
+
from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
|
|
5
15
|
|
|
6
16
|
|
|
7
17
|
def adapter_for_task(
|
|
8
18
|
kiln_task: datamodel.Task,
|
|
9
|
-
model_name: str
|
|
10
|
-
provider:
|
|
19
|
+
model_name: str,
|
|
20
|
+
provider: ModelProviderName,
|
|
11
21
|
prompt_builder: BasePromptBuilder | None = None,
|
|
12
22
|
tags: list[str] | None = None,
|
|
13
23
|
) -> BaseAdapter:
|
|
14
|
-
#
|
|
24
|
+
# Get the provider to run. For things like the fine-tune provider, we want to run the underlying provider
|
|
25
|
+
core_provider_name = core_provider(model_name, provider)
|
|
26
|
+
|
|
27
|
+
match core_provider_name:
|
|
28
|
+
case ModelProviderName.openrouter:
|
|
29
|
+
return OpenAICompatibleAdapter(
|
|
30
|
+
kiln_task=kiln_task,
|
|
31
|
+
config=OpenAICompatibleConfig(
|
|
32
|
+
base_url=getenv("OPENROUTER_BASE_URL")
|
|
33
|
+
or "https://openrouter.ai/api/v1",
|
|
34
|
+
api_key=Config.shared().open_router_api_key,
|
|
35
|
+
model_name=model_name,
|
|
36
|
+
provider_name=provider,
|
|
37
|
+
openrouter_style_reasoning=True,
|
|
38
|
+
default_headers={
|
|
39
|
+
"HTTP-Referer": "https://getkiln.ai/openrouter",
|
|
40
|
+
"X-Title": "KilnAI",
|
|
41
|
+
},
|
|
42
|
+
),
|
|
43
|
+
prompt_builder=prompt_builder,
|
|
44
|
+
tags=tags,
|
|
45
|
+
)
|
|
46
|
+
case ModelProviderName.openai:
|
|
47
|
+
return OpenAICompatibleAdapter(
|
|
48
|
+
kiln_task=kiln_task,
|
|
49
|
+
config=OpenAICompatibleConfig(
|
|
50
|
+
api_key=Config.shared().open_ai_api_key,
|
|
51
|
+
model_name=model_name,
|
|
52
|
+
provider_name=provider,
|
|
53
|
+
),
|
|
54
|
+
prompt_builder=prompt_builder,
|
|
55
|
+
tags=tags,
|
|
56
|
+
)
|
|
57
|
+
case ModelProviderName.openai_compatible:
|
|
58
|
+
config = openai_compatible_config(model_name)
|
|
59
|
+
return OpenAICompatibleAdapter(
|
|
60
|
+
kiln_task=kiln_task,
|
|
61
|
+
config=config,
|
|
62
|
+
prompt_builder=prompt_builder,
|
|
63
|
+
tags=tags,
|
|
64
|
+
)
|
|
65
|
+
# Use LangchainAdapter for the rest
|
|
66
|
+
case ModelProviderName.groq:
|
|
67
|
+
pass
|
|
68
|
+
case ModelProviderName.amazon_bedrock:
|
|
69
|
+
pass
|
|
70
|
+
case ModelProviderName.ollama:
|
|
71
|
+
pass
|
|
72
|
+
case ModelProviderName.fireworks_ai:
|
|
73
|
+
pass
|
|
74
|
+
# These are virtual providers that should have mapped to an actual provider in core_provider
|
|
75
|
+
case ModelProviderName.kiln_fine_tune:
|
|
76
|
+
raise ValueError(
|
|
77
|
+
"Fine tune is not a supported core provider. It should map to an actual provider."
|
|
78
|
+
)
|
|
79
|
+
case ModelProviderName.kiln_custom_registry:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
"Custom openai compatible provider is not a supported core provider. It should map to an actual provider."
|
|
82
|
+
)
|
|
83
|
+
case _:
|
|
84
|
+
raise_exhaustive_enum_error(core_provider_name)
|
|
85
|
+
|
|
86
|
+
# We use langchain for all others right now, but moving off it as we touch anything.
|
|
15
87
|
return LangchainAdapter(
|
|
16
88
|
kiln_task,
|
|
17
89
|
model_name=model_name,
|
|
@@ -55,7 +55,7 @@ class DataGenCategoriesTaskInput(BaseModel):
|
|
|
55
55
|
num_subtopics=num_subtopics,
|
|
56
56
|
human_guidance=human_guidance,
|
|
57
57
|
existing_topics=existing_topics,
|
|
58
|
-
system_prompt=prompt_builder.build_prompt(),
|
|
58
|
+
system_prompt=prompt_builder.build_prompt(include_json_instructions=False),
|
|
59
59
|
)
|
|
60
60
|
|
|
61
61
|
|
|
@@ -132,7 +132,7 @@ class DataGenSampleTaskInput(BaseModel):
|
|
|
132
132
|
topic=topic,
|
|
133
133
|
num_samples=num_samples,
|
|
134
134
|
human_guidance=human_guidance,
|
|
135
|
-
system_prompt=prompt_builder.build_prompt(),
|
|
135
|
+
system_prompt=prompt_builder.build_prompt(include_json_instructions=False),
|
|
136
136
|
)
|
|
137
137
|
|
|
138
138
|
|
|
@@ -163,7 +163,7 @@ def list_json_schema_for_task(task: Task) -> str:
|
|
|
163
163
|
"required": ["generated_samples"],
|
|
164
164
|
}
|
|
165
165
|
|
|
166
|
-
return json.dumps(top_level_schema)
|
|
166
|
+
return json.dumps(top_level_schema, ensure_ascii=False)
|
|
167
167
|
|
|
168
168
|
|
|
169
169
|
class DataGenSampleTask(Task, parent_of={}):
|
|
@@ -180,7 +180,7 @@ def test_data_gen_sample_task_initialization(base_task):
|
|
|
180
180
|
}
|
|
181
181
|
|
|
182
182
|
|
|
183
|
-
def
|
|
183
|
+
def test_list_json_schema_for_task_with_input_schema(base_task):
|
|
184
184
|
# Arrange
|
|
185
185
|
base_task.input_json_schema = json.dumps(
|
|
186
186
|
{
|
|
@@ -202,9 +202,29 @@ def test_list_json_schema_for_task_with_output_schema(base_task):
|
|
|
202
202
|
assert generated_samples_schema["items"]["properties"]["age"]["type"] == "integer"
|
|
203
203
|
|
|
204
204
|
|
|
205
|
-
def
|
|
205
|
+
def test_list_json_schema_for_task_with_input_schema_non_ascii(base_task):
|
|
206
206
|
# Arrange
|
|
207
|
-
base_task.
|
|
207
|
+
base_task.input_json_schema = json.dumps(
|
|
208
|
+
{
|
|
209
|
+
"type": "object",
|
|
210
|
+
"properties": {
|
|
211
|
+
"名字": {"type": "string"},
|
|
212
|
+
"年齢": {"type": "integer"},
|
|
213
|
+
},
|
|
214
|
+
}
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Act
|
|
218
|
+
schema = list_json_schema_for_task(base_task)
|
|
219
|
+
|
|
220
|
+
# Assert
|
|
221
|
+
assert "名字" in schema
|
|
222
|
+
assert "年齢" in schema
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def test_list_json_schema_for_task_without_input_schema(base_task):
|
|
226
|
+
# Arrange
|
|
227
|
+
base_task.input_json_schema = None
|
|
208
228
|
|
|
209
229
|
# Act
|
|
210
230
|
schema = list_json_schema_for_task(base_task)
|
|
@@ -4,7 +4,7 @@ from typing import Literal
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
6
|
from kiln_ai.adapters.ml_model_list import built_in_models
|
|
7
|
-
from kiln_ai.datamodel import DatasetSplit, FineTuneStatusType
|
|
7
|
+
from kiln_ai.datamodel import DatasetSplit, FinetuneDataStrategy, FineTuneStatusType
|
|
8
8
|
from kiln_ai.datamodel import Finetune as FinetuneModel
|
|
9
9
|
from kiln_ai.utils.name_generator import generate_memorable_name
|
|
10
10
|
|
|
@@ -56,6 +56,8 @@ class BaseFinetuneAdapter(ABC):
|
|
|
56
56
|
provider_base_model_id: str,
|
|
57
57
|
train_split_name: str,
|
|
58
58
|
system_message: str,
|
|
59
|
+
thinking_instructions: str | None,
|
|
60
|
+
data_strategy: FinetuneDataStrategy,
|
|
59
61
|
parameters: dict[str, str | int | float | bool] = {},
|
|
60
62
|
name: str | None = None,
|
|
61
63
|
description: str | None = None,
|
|
@@ -100,7 +102,9 @@ class BaseFinetuneAdapter(ABC):
|
|
|
100
102
|
validation_split_name=validation_split_name,
|
|
101
103
|
parameters=parameters,
|
|
102
104
|
system_message=system_message,
|
|
105
|
+
thinking_instructions=thinking_instructions,
|
|
103
106
|
parent=parent_task,
|
|
107
|
+
data_strategy=data_strategy,
|
|
104
108
|
)
|
|
105
109
|
|
|
106
110
|
adapter = cls(datamodel)
|