kiln-ai 0.8.0__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +7 -7
- kiln_ai/adapters/adapter_registry.py +77 -5
- kiln_ai/adapters/data_gen/data_gen_task.py +3 -3
- kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
- kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
- kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
- kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
- kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
- kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +469 -129
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +113 -21
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
- kiln_ai/adapters/ml_model_list.py +323 -94
- kiln_ai/adapters/model_adapters/__init__.py +18 -0
- kiln_ai/adapters/{base_adapter.py → model_adapters/base_adapter.py} +81 -37
- kiln_ai/adapters/{langchain_adapters.py → model_adapters/langchain_adapters.py} +130 -84
- kiln_ai/adapters/model_adapters/openai_compatible_config.py +11 -0
- kiln_ai/adapters/model_adapters/openai_model_adapter.py +246 -0
- kiln_ai/adapters/model_adapters/test_base_adapter.py +190 -0
- kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +103 -88
- kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +225 -0
- kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +43 -15
- kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +93 -20
- kiln_ai/adapters/parsers/__init__.py +10 -0
- kiln_ai/adapters/parsers/base_parser.py +12 -0
- kiln_ai/adapters/parsers/json_parser.py +37 -0
- kiln_ai/adapters/parsers/parser_registry.py +19 -0
- kiln_ai/adapters/parsers/r1_parser.py +69 -0
- kiln_ai/adapters/parsers/test_json_parser.py +81 -0
- kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
- kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
- kiln_ai/adapters/prompt_builders.py +126 -20
- kiln_ai/adapters/provider_tools.py +91 -36
- kiln_ai/adapters/repair/repair_task.py +17 -6
- kiln_ai/adapters/repair/test_repair_task.py +4 -4
- kiln_ai/adapters/run_output.py +8 -0
- kiln_ai/adapters/test_adapter_registry.py +177 -0
- kiln_ai/adapters/test_generate_docs.py +69 -0
- kiln_ai/adapters/test_prompt_adaptors.py +8 -4
- kiln_ai/adapters/test_prompt_builders.py +190 -29
- kiln_ai/adapters/test_provider_tools.py +268 -46
- kiln_ai/datamodel/__init__.py +199 -12
- kiln_ai/datamodel/basemodel.py +31 -11
- kiln_ai/datamodel/json_schema.py +8 -3
- kiln_ai/datamodel/model_cache.py +8 -3
- kiln_ai/datamodel/test_basemodel.py +81 -2
- kiln_ai/datamodel/test_dataset_split.py +100 -3
- kiln_ai/datamodel/test_example_models.py +25 -4
- kiln_ai/datamodel/test_model_cache.py +24 -0
- kiln_ai/datamodel/test_model_perf.py +125 -0
- kiln_ai/datamodel/test_models.py +129 -0
- kiln_ai/utils/exhaustive_error.py +6 -0
- {kiln_ai-0.8.0.dist-info → kiln_ai-0.11.1.dist-info}/METADATA +9 -7
- kiln_ai-0.11.1.dist-info/RECORD +76 -0
- kiln_ai-0.8.0.dist-info/RECORD +0 -58
- {kiln_ai-0.8.0.dist-info → kiln_ai-0.11.1.dist-info}/WHEEL +0 -0
- {kiln_ai-0.8.0.dist-info → kiln_ai-0.11.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -10,7 +10,7 @@ from kiln_ai.adapters.fine_tune.base_finetune import (
|
|
|
10
10
|
FineTuneStatusType,
|
|
11
11
|
)
|
|
12
12
|
from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetFormatter
|
|
13
|
-
from kiln_ai.datamodel import DatasetSplit, Task
|
|
13
|
+
from kiln_ai.datamodel import DatasetSplit, StructuredOutputMode, Task
|
|
14
14
|
from kiln_ai.utils.config import Config
|
|
15
15
|
|
|
16
16
|
oai_client = openai.AsyncOpenAI(
|
|
@@ -124,13 +124,18 @@ class OpenAIFinetune(BaseFinetuneAdapter):
|
|
|
124
124
|
if not task:
|
|
125
125
|
raise ValueError("Task is required to start a fine-tune")
|
|
126
126
|
|
|
127
|
+
# Use chat format for unstructured output, and JSON for formatted output (was previously function calls)
|
|
128
|
+
format = DatasetFormat.OPENAI_CHAT_JSONL
|
|
129
|
+
if task.output_json_schema:
|
|
130
|
+
format = DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL
|
|
131
|
+
self.datamodel.structured_output_mode = StructuredOutputMode.json_schema
|
|
127
132
|
train_file_id = await self.generate_and_upload_jsonl(
|
|
128
|
-
dataset, self.datamodel.train_split_name, task
|
|
133
|
+
dataset, self.datamodel.train_split_name, task, format
|
|
129
134
|
)
|
|
130
135
|
validation_file_id = None
|
|
131
136
|
if self.datamodel.validation_split_name:
|
|
132
137
|
validation_file_id = await self.generate_and_upload_jsonl(
|
|
133
|
-
dataset, self.datamodel.validation_split_name, task
|
|
138
|
+
dataset, self.datamodel.validation_split_name, task, format
|
|
134
139
|
)
|
|
135
140
|
|
|
136
141
|
# Filter to hyperparameters which are set via the hyperparameters field (some like seed are set via the API)
|
|
@@ -156,16 +161,12 @@ class OpenAIFinetune(BaseFinetuneAdapter):
|
|
|
156
161
|
return None
|
|
157
162
|
|
|
158
163
|
async def generate_and_upload_jsonl(
|
|
159
|
-
self, dataset: DatasetSplit, split_name: str, task: Task
|
|
164
|
+
self, dataset: DatasetSplit, split_name: str, task: Task, format: DatasetFormat
|
|
160
165
|
) -> str:
|
|
161
|
-
formatter = DatasetFormatter(
|
|
162
|
-
|
|
163
|
-
format = (
|
|
164
|
-
DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL
|
|
165
|
-
if task.output_json_schema
|
|
166
|
-
else DatasetFormat.OPENAI_CHAT_JSONL
|
|
166
|
+
formatter = DatasetFormatter(
|
|
167
|
+
dataset, self.datamodel.system_message, self.datamodel.thinking_instructions
|
|
167
168
|
)
|
|
168
|
-
path = formatter.dump_to_file(split_name, format)
|
|
169
|
+
path = formatter.dump_to_file(split_name, format, self.datamodel.data_strategy)
|
|
169
170
|
|
|
170
171
|
response = await oai_client.files.create(
|
|
171
172
|
file=open(path, "rb"),
|
|
@@ -4,6 +4,7 @@ import pytest
|
|
|
4
4
|
|
|
5
5
|
from kiln_ai.adapters.fine_tune.base_finetune import (
|
|
6
6
|
BaseFinetuneAdapter,
|
|
7
|
+
FinetuneDataStrategy,
|
|
7
8
|
FineTuneParameter,
|
|
8
9
|
FineTuneStatus,
|
|
9
10
|
FineTuneStatusType,
|
|
@@ -154,6 +155,8 @@ async def test_create_and_start_success(mock_dataset):
|
|
|
154
155
|
train_split_name="train",
|
|
155
156
|
parameters={"epochs": 10}, # Required parameter
|
|
156
157
|
system_message="Test system message",
|
|
158
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
159
|
+
thinking_instructions=None,
|
|
157
160
|
)
|
|
158
161
|
|
|
159
162
|
assert isinstance(adapter, MockFinetune)
|
|
@@ -166,6 +169,8 @@ async def test_create_and_start_success(mock_dataset):
|
|
|
166
169
|
assert datamodel.parameters == {"epochs": 10}
|
|
167
170
|
assert datamodel.system_message == "Test system message"
|
|
168
171
|
assert datamodel.path.exists()
|
|
172
|
+
assert datamodel.data_strategy == FinetuneDataStrategy.final_only
|
|
173
|
+
assert datamodel.thinking_instructions is None
|
|
169
174
|
|
|
170
175
|
|
|
171
176
|
async def test_create_and_start_with_all_params(mock_dataset):
|
|
@@ -180,6 +185,8 @@ async def test_create_and_start_with_all_params(mock_dataset):
|
|
|
180
185
|
description="Custom Description",
|
|
181
186
|
validation_split_name="test",
|
|
182
187
|
system_message="Test system message",
|
|
188
|
+
data_strategy=FinetuneDataStrategy.final_and_intermediate,
|
|
189
|
+
thinking_instructions="Custom thinking instructions",
|
|
183
190
|
)
|
|
184
191
|
|
|
185
192
|
assert datamodel.name == "Custom Name"
|
|
@@ -188,6 +195,8 @@ async def test_create_and_start_with_all_params(mock_dataset):
|
|
|
188
195
|
assert datamodel.parameters == {"epochs": 10, "learning_rate": 0.001}
|
|
189
196
|
assert datamodel.system_message == "Test system message"
|
|
190
197
|
assert adapter.datamodel == datamodel
|
|
198
|
+
assert datamodel.data_strategy == FinetuneDataStrategy.final_and_intermediate
|
|
199
|
+
assert datamodel.thinking_instructions == "Custom thinking instructions"
|
|
191
200
|
|
|
192
201
|
# load the datamodel from the file, confirm it's saved
|
|
193
202
|
loaded_datamodel = FinetuneModel.load_from_file(datamodel.path)
|
|
@@ -204,6 +213,8 @@ async def test_create_and_start_invalid_parameters(mock_dataset):
|
|
|
204
213
|
train_split_name="train",
|
|
205
214
|
parameters={"learning_rate": 0.001}, # Missing required 'epochs'
|
|
206
215
|
system_message="Test system message",
|
|
216
|
+
thinking_instructions=None,
|
|
217
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
207
218
|
)
|
|
208
219
|
|
|
209
220
|
|
|
@@ -222,6 +233,8 @@ async def test_create_and_start_no_parent_task():
|
|
|
222
233
|
train_split_name="train",
|
|
223
234
|
parameters={"epochs": 10},
|
|
224
235
|
system_message="Test system message",
|
|
236
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
237
|
+
thinking_instructions=None,
|
|
225
238
|
)
|
|
226
239
|
|
|
227
240
|
|
|
@@ -243,6 +256,8 @@ async def test_create_and_start_no_parent_task_path():
|
|
|
243
256
|
train_split_name="train",
|
|
244
257
|
parameters={"epochs": 10},
|
|
245
258
|
system_message="Test system message",
|
|
259
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
260
|
+
thinking_instructions=None,
|
|
246
261
|
)
|
|
247
262
|
|
|
248
263
|
|
|
@@ -269,6 +284,8 @@ async def test_create_and_start_invalid_train_split(mock_dataset):
|
|
|
269
284
|
train_split_name="invalid_train", # Invalid train split
|
|
270
285
|
parameters={"epochs": 10},
|
|
271
286
|
system_message="Test system message",
|
|
287
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
288
|
+
thinking_instructions=None,
|
|
272
289
|
)
|
|
273
290
|
|
|
274
291
|
|
|
@@ -287,4 +304,6 @@ async def test_create_and_start_invalid_validation_split(mock_dataset):
|
|
|
287
304
|
validation_split_name="invalid_test", # Invalid validation split
|
|
288
305
|
parameters={"epochs": 10},
|
|
289
306
|
system_message="Test system message",
|
|
307
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
308
|
+
thinking_instructions=None,
|
|
290
309
|
)
|