kiln-ai 0.8.1__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (57) hide show
  1. kiln_ai/adapters/__init__.py +7 -7
  2. kiln_ai/adapters/adapter_registry.py +77 -5
  3. kiln_ai/adapters/data_gen/data_gen_task.py +3 -3
  4. kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
  5. kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
  6. kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
  7. kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
  8. kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
  9. kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
  10. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +469 -129
  11. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +113 -21
  12. kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
  13. kiln_ai/adapters/ml_model_list.py +323 -94
  14. kiln_ai/adapters/model_adapters/__init__.py +18 -0
  15. kiln_ai/adapters/{base_adapter.py → model_adapters/base_adapter.py} +81 -37
  16. kiln_ai/adapters/{langchain_adapters.py → model_adapters/langchain_adapters.py} +130 -84
  17. kiln_ai/adapters/model_adapters/openai_compatible_config.py +11 -0
  18. kiln_ai/adapters/model_adapters/openai_model_adapter.py +246 -0
  19. kiln_ai/adapters/model_adapters/test_base_adapter.py +190 -0
  20. kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +103 -88
  21. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +225 -0
  22. kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +43 -15
  23. kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +93 -20
  24. kiln_ai/adapters/parsers/__init__.py +10 -0
  25. kiln_ai/adapters/parsers/base_parser.py +12 -0
  26. kiln_ai/adapters/parsers/json_parser.py +37 -0
  27. kiln_ai/adapters/parsers/parser_registry.py +19 -0
  28. kiln_ai/adapters/parsers/r1_parser.py +69 -0
  29. kiln_ai/adapters/parsers/test_json_parser.py +81 -0
  30. kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
  31. kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
  32. kiln_ai/adapters/prompt_builders.py +126 -20
  33. kiln_ai/adapters/provider_tools.py +91 -36
  34. kiln_ai/adapters/repair/repair_task.py +17 -6
  35. kiln_ai/adapters/repair/test_repair_task.py +4 -4
  36. kiln_ai/adapters/run_output.py +8 -0
  37. kiln_ai/adapters/test_adapter_registry.py +177 -0
  38. kiln_ai/adapters/test_generate_docs.py +69 -0
  39. kiln_ai/adapters/test_prompt_adaptors.py +8 -4
  40. kiln_ai/adapters/test_prompt_builders.py +190 -29
  41. kiln_ai/adapters/test_provider_tools.py +268 -46
  42. kiln_ai/datamodel/__init__.py +193 -12
  43. kiln_ai/datamodel/basemodel.py +31 -11
  44. kiln_ai/datamodel/json_schema.py +8 -3
  45. kiln_ai/datamodel/model_cache.py +8 -3
  46. kiln_ai/datamodel/test_basemodel.py +81 -2
  47. kiln_ai/datamodel/test_dataset_split.py +100 -3
  48. kiln_ai/datamodel/test_example_models.py +25 -4
  49. kiln_ai/datamodel/test_model_cache.py +24 -0
  50. kiln_ai/datamodel/test_model_perf.py +125 -0
  51. kiln_ai/datamodel/test_models.py +129 -0
  52. kiln_ai/utils/exhaustive_error.py +6 -0
  53. {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/METADATA +9 -7
  54. kiln_ai-0.11.1.dist-info/RECORD +76 -0
  55. kiln_ai-0.8.1.dist-info/RECORD +0 -58
  56. {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/WHEEL +0 -0
  57. {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -10,7 +10,7 @@ from kiln_ai.adapters.fine_tune.base_finetune import (
10
10
  FineTuneStatusType,
11
11
  )
12
12
  from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetFormatter
13
- from kiln_ai.datamodel import DatasetSplit, Task
13
+ from kiln_ai.datamodel import DatasetSplit, StructuredOutputMode, Task
14
14
  from kiln_ai.utils.config import Config
15
15
 
16
16
  oai_client = openai.AsyncOpenAI(
@@ -124,13 +124,18 @@ class OpenAIFinetune(BaseFinetuneAdapter):
124
124
  if not task:
125
125
  raise ValueError("Task is required to start a fine-tune")
126
126
 
127
+ # Use chat format for unstructured output, and JSON for formatted output (was previously function calls)
128
+ format = DatasetFormat.OPENAI_CHAT_JSONL
129
+ if task.output_json_schema:
130
+ format = DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL
131
+ self.datamodel.structured_output_mode = StructuredOutputMode.json_schema
127
132
  train_file_id = await self.generate_and_upload_jsonl(
128
- dataset, self.datamodel.train_split_name, task
133
+ dataset, self.datamodel.train_split_name, task, format
129
134
  )
130
135
  validation_file_id = None
131
136
  if self.datamodel.validation_split_name:
132
137
  validation_file_id = await self.generate_and_upload_jsonl(
133
- dataset, self.datamodel.validation_split_name, task
138
+ dataset, self.datamodel.validation_split_name, task, format
134
139
  )
135
140
 
136
141
  # Filter to hyperparameters which are set via the hyperparameters field (some like seed are set via the API)
@@ -156,16 +161,12 @@ class OpenAIFinetune(BaseFinetuneAdapter):
156
161
  return None
157
162
 
158
163
  async def generate_and_upload_jsonl(
159
- self, dataset: DatasetSplit, split_name: str, task: Task
164
+ self, dataset: DatasetSplit, split_name: str, task: Task, format: DatasetFormat
160
165
  ) -> str:
161
- formatter = DatasetFormatter(dataset, self.datamodel.system_message)
162
- # All OpenAI models support tool calls for structured outputs
163
- format = (
164
- DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL
165
- if task.output_json_schema
166
- else DatasetFormat.OPENAI_CHAT_JSONL
166
+ formatter = DatasetFormatter(
167
+ dataset, self.datamodel.system_message, self.datamodel.thinking_instructions
167
168
  )
168
- path = formatter.dump_to_file(split_name, format)
169
+ path = formatter.dump_to_file(split_name, format, self.datamodel.data_strategy)
169
170
 
170
171
  response = await oai_client.files.create(
171
172
  file=open(path, "rb"),
@@ -4,6 +4,7 @@ import pytest
4
4
 
5
5
  from kiln_ai.adapters.fine_tune.base_finetune import (
6
6
  BaseFinetuneAdapter,
7
+ FinetuneDataStrategy,
7
8
  FineTuneParameter,
8
9
  FineTuneStatus,
9
10
  FineTuneStatusType,
@@ -154,6 +155,8 @@ async def test_create_and_start_success(mock_dataset):
154
155
  train_split_name="train",
155
156
  parameters={"epochs": 10}, # Required parameter
156
157
  system_message="Test system message",
158
+ data_strategy=FinetuneDataStrategy.final_only,
159
+ thinking_instructions=None,
157
160
  )
158
161
 
159
162
  assert isinstance(adapter, MockFinetune)
@@ -166,6 +169,8 @@ async def test_create_and_start_success(mock_dataset):
166
169
  assert datamodel.parameters == {"epochs": 10}
167
170
  assert datamodel.system_message == "Test system message"
168
171
  assert datamodel.path.exists()
172
+ assert datamodel.data_strategy == FinetuneDataStrategy.final_only
173
+ assert datamodel.thinking_instructions is None
169
174
 
170
175
 
171
176
  async def test_create_and_start_with_all_params(mock_dataset):
@@ -180,6 +185,8 @@ async def test_create_and_start_with_all_params(mock_dataset):
180
185
  description="Custom Description",
181
186
  validation_split_name="test",
182
187
  system_message="Test system message",
188
+ data_strategy=FinetuneDataStrategy.final_and_intermediate,
189
+ thinking_instructions="Custom thinking instructions",
183
190
  )
184
191
 
185
192
  assert datamodel.name == "Custom Name"
@@ -188,6 +195,8 @@ async def test_create_and_start_with_all_params(mock_dataset):
188
195
  assert datamodel.parameters == {"epochs": 10, "learning_rate": 0.001}
189
196
  assert datamodel.system_message == "Test system message"
190
197
  assert adapter.datamodel == datamodel
198
+ assert datamodel.data_strategy == FinetuneDataStrategy.final_and_intermediate
199
+ assert datamodel.thinking_instructions == "Custom thinking instructions"
191
200
 
192
201
  # load the datamodel from the file, confirm it's saved
193
202
  loaded_datamodel = FinetuneModel.load_from_file(datamodel.path)
@@ -204,6 +213,8 @@ async def test_create_and_start_invalid_parameters(mock_dataset):
204
213
  train_split_name="train",
205
214
  parameters={"learning_rate": 0.001}, # Missing required 'epochs'
206
215
  system_message="Test system message",
216
+ thinking_instructions=None,
217
+ data_strategy=FinetuneDataStrategy.final_only,
207
218
  )
208
219
 
209
220
 
@@ -222,6 +233,8 @@ async def test_create_and_start_no_parent_task():
222
233
  train_split_name="train",
223
234
  parameters={"epochs": 10},
224
235
  system_message="Test system message",
236
+ data_strategy=FinetuneDataStrategy.final_only,
237
+ thinking_instructions=None,
225
238
  )
226
239
 
227
240
 
@@ -243,6 +256,8 @@ async def test_create_and_start_no_parent_task_path():
243
256
  train_split_name="train",
244
257
  parameters={"epochs": 10},
245
258
  system_message="Test system message",
259
+ data_strategy=FinetuneDataStrategy.final_only,
260
+ thinking_instructions=None,
246
261
  )
247
262
 
248
263
 
@@ -269,6 +284,8 @@ async def test_create_and_start_invalid_train_split(mock_dataset):
269
284
  train_split_name="invalid_train", # Invalid train split
270
285
  parameters={"epochs": 10},
271
286
  system_message="Test system message",
287
+ data_strategy=FinetuneDataStrategy.final_only,
288
+ thinking_instructions=None,
272
289
  )
273
290
 
274
291
 
@@ -287,4 +304,6 @@ async def test_create_and_start_invalid_validation_split(mock_dataset):
287
304
  validation_split_name="invalid_test", # Invalid validation split
288
305
  parameters={"epochs": 10},
289
306
  system_message="Test system message",
307
+ data_strategy=FinetuneDataStrategy.final_only,
308
+ thinking_instructions=None,
290
309
  )