kiln-ai 0.8.1__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +7 -7
- kiln_ai/adapters/adapter_registry.py +77 -5
- kiln_ai/adapters/data_gen/data_gen_task.py +3 -3
- kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
- kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
- kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
- kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
- kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
- kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +469 -129
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +113 -21
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
- kiln_ai/adapters/ml_model_list.py +323 -94
- kiln_ai/adapters/model_adapters/__init__.py +18 -0
- kiln_ai/adapters/{base_adapter.py → model_adapters/base_adapter.py} +81 -37
- kiln_ai/adapters/{langchain_adapters.py → model_adapters/langchain_adapters.py} +130 -84
- kiln_ai/adapters/model_adapters/openai_compatible_config.py +11 -0
- kiln_ai/adapters/model_adapters/openai_model_adapter.py +246 -0
- kiln_ai/adapters/model_adapters/test_base_adapter.py +190 -0
- kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +103 -88
- kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +225 -0
- kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +43 -15
- kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +93 -20
- kiln_ai/adapters/parsers/__init__.py +10 -0
- kiln_ai/adapters/parsers/base_parser.py +12 -0
- kiln_ai/adapters/parsers/json_parser.py +37 -0
- kiln_ai/adapters/parsers/parser_registry.py +19 -0
- kiln_ai/adapters/parsers/r1_parser.py +69 -0
- kiln_ai/adapters/parsers/test_json_parser.py +81 -0
- kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
- kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
- kiln_ai/adapters/prompt_builders.py +126 -20
- kiln_ai/adapters/provider_tools.py +91 -36
- kiln_ai/adapters/repair/repair_task.py +17 -6
- kiln_ai/adapters/repair/test_repair_task.py +4 -4
- kiln_ai/adapters/run_output.py +8 -0
- kiln_ai/adapters/test_adapter_registry.py +177 -0
- kiln_ai/adapters/test_generate_docs.py +69 -0
- kiln_ai/adapters/test_prompt_adaptors.py +8 -4
- kiln_ai/adapters/test_prompt_builders.py +190 -29
- kiln_ai/adapters/test_provider_tools.py +268 -46
- kiln_ai/datamodel/__init__.py +193 -12
- kiln_ai/datamodel/basemodel.py +31 -11
- kiln_ai/datamodel/json_schema.py +8 -3
- kiln_ai/datamodel/model_cache.py +8 -3
- kiln_ai/datamodel/test_basemodel.py +81 -2
- kiln_ai/datamodel/test_dataset_split.py +100 -3
- kiln_ai/datamodel/test_example_models.py +25 -4
- kiln_ai/datamodel/test_model_cache.py +24 -0
- kiln_ai/datamodel/test_model_perf.py +125 -0
- kiln_ai/datamodel/test_models.py +129 -0
- kiln_ai/utils/exhaustive_error.py +6 -0
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/METADATA +9 -7
- kiln_ai-0.11.1.dist-info/RECORD +76 -0
- kiln_ai-0.8.1.dist-info/RECORD +0 -58
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/WHEEL +0 -0
- {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -13,6 +13,8 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
|
|
|
13
13
|
from kiln_ai.adapters.fine_tune.fireworks_finetune import FireworksFinetune
|
|
14
14
|
from kiln_ai.datamodel import (
|
|
15
15
|
DatasetSplit,
|
|
16
|
+
FinetuneDataStrategy,
|
|
17
|
+
StructuredOutputMode,
|
|
16
18
|
Task,
|
|
17
19
|
Train80Test20SplitDefinition,
|
|
18
20
|
)
|
|
@@ -33,7 +35,6 @@ def fireworks_finetune(tmp_path):
|
|
|
33
35
|
dataset_split_id="dataset-123",
|
|
34
36
|
system_message="Test system message",
|
|
35
37
|
path=tmp_file,
|
|
36
|
-
properties={"undeployed_model_id": "ftm-123"},
|
|
37
38
|
),
|
|
38
39
|
)
|
|
39
40
|
return finetune
|
|
@@ -228,8 +229,20 @@ def mock_task():
|
|
|
228
229
|
)
|
|
229
230
|
|
|
230
231
|
|
|
232
|
+
@pytest.mark.parametrize(
|
|
233
|
+
"data_strategy,thinking_instructions",
|
|
234
|
+
[
|
|
235
|
+
(FinetuneDataStrategy.final_and_intermediate, "thinking instructions"),
|
|
236
|
+
(FinetuneDataStrategy.final_only, None),
|
|
237
|
+
],
|
|
238
|
+
)
|
|
231
239
|
async def test_generate_and_upload_jsonl_success(
|
|
232
|
-
|
|
240
|
+
mock_dataset,
|
|
241
|
+
mock_task,
|
|
242
|
+
mock_api_key,
|
|
243
|
+
data_strategy,
|
|
244
|
+
thinking_instructions,
|
|
245
|
+
tmp_path,
|
|
233
246
|
):
|
|
234
247
|
mock_path = Path("mock_path.jsonl")
|
|
235
248
|
mock_dataset_id = "dataset-123"
|
|
@@ -249,11 +262,27 @@ async def test_generate_and_upload_jsonl_success(
|
|
|
249
262
|
status_response.status_code = 200
|
|
250
263
|
status_response.json.return_value = {"state": "READY"}
|
|
251
264
|
|
|
265
|
+
# Set the data strategy on the finetune model
|
|
266
|
+
tmp_file = tmp_path / "test-finetune.kiln"
|
|
267
|
+
fireworks_finetune = FireworksFinetune(
|
|
268
|
+
datamodel=FinetuneModel(
|
|
269
|
+
name="test-finetune",
|
|
270
|
+
provider="fireworks",
|
|
271
|
+
provider_id="fw-123",
|
|
272
|
+
base_model_id="llama-v2-7b",
|
|
273
|
+
train_split_name="train",
|
|
274
|
+
dataset_split_id="dataset-123",
|
|
275
|
+
system_message="Test system message",
|
|
276
|
+
path=tmp_file,
|
|
277
|
+
data_strategy=data_strategy,
|
|
278
|
+
thinking_instructions=thinking_instructions,
|
|
279
|
+
),
|
|
280
|
+
)
|
|
281
|
+
|
|
252
282
|
with (
|
|
253
283
|
patch(
|
|
254
284
|
"kiln_ai.adapters.fine_tune.fireworks_finetune.DatasetFormatter",
|
|
255
|
-
|
|
256
|
-
),
|
|
285
|
+
) as mock_formatter_constructor,
|
|
257
286
|
patch("httpx.AsyncClient") as mock_client_class,
|
|
258
287
|
patch("builtins.open"),
|
|
259
288
|
patch(
|
|
@@ -261,26 +290,58 @@ async def test_generate_and_upload_jsonl_success(
|
|
|
261
290
|
return_value=mock_dataset_id,
|
|
262
291
|
),
|
|
263
292
|
):
|
|
293
|
+
mock_formatter_constructor.return_value = mock_formatter
|
|
264
294
|
mock_client = AsyncMock()
|
|
265
295
|
mock_client.post = AsyncMock(side_effect=[create_response, upload_response])
|
|
266
296
|
mock_client.get = AsyncMock(return_value=status_response)
|
|
267
297
|
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
268
298
|
|
|
269
299
|
result = await fireworks_finetune.generate_and_upload_jsonl(
|
|
270
|
-
mock_dataset, "train", mock_task
|
|
300
|
+
mock_dataset, "train", mock_task, DatasetFormat.OPENAI_CHAT_JSONL
|
|
271
301
|
)
|
|
272
302
|
|
|
273
303
|
# Verify formatter was created with correct parameters
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
304
|
+
assert mock_formatter_constructor.call_count == 1
|
|
305
|
+
assert mock_formatter_constructor.call_args[1] == {
|
|
306
|
+
"dataset": mock_dataset,
|
|
307
|
+
"system_message": "Test system message",
|
|
308
|
+
"thinking_instructions": thinking_instructions,
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
# Verify the thinking instructions were set on the formatter
|
|
312
|
+
mock_formatter.method_calls[0][0] == "dump_to_file"
|
|
313
|
+
mock_formatter.method_calls[0][1] == {
|
|
314
|
+
"dataset": mock_dataset,
|
|
315
|
+
"thinking_instructions": thinking_instructions,
|
|
316
|
+
}
|
|
277
317
|
|
|
278
318
|
assert result == mock_dataset_id
|
|
279
319
|
assert mock_client.post.call_count == 2
|
|
280
320
|
assert mock_client.get.call_count == 1
|
|
281
321
|
|
|
282
322
|
|
|
283
|
-
|
|
323
|
+
@pytest.mark.parametrize(
|
|
324
|
+
"output_schema,expected_mode,expected_format",
|
|
325
|
+
[
|
|
326
|
+
(
|
|
327
|
+
'{"type": "object", "properties": {"key": {"type": "string"}}}',
|
|
328
|
+
StructuredOutputMode.json_mode,
|
|
329
|
+
DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
|
|
330
|
+
),
|
|
331
|
+
(None, None, DatasetFormat.OPENAI_CHAT_JSONL),
|
|
332
|
+
],
|
|
333
|
+
)
|
|
334
|
+
async def test_start_success(
|
|
335
|
+
fireworks_finetune,
|
|
336
|
+
mock_dataset,
|
|
337
|
+
mock_task,
|
|
338
|
+
mock_api_key,
|
|
339
|
+
output_schema,
|
|
340
|
+
expected_mode,
|
|
341
|
+
expected_format,
|
|
342
|
+
):
|
|
343
|
+
mock_task.output_json_schema = output_schema
|
|
344
|
+
|
|
284
345
|
fireworks_finetune.datamodel.parent = mock_task
|
|
285
346
|
mock_dataset_id = "dataset-123"
|
|
286
347
|
mock_model_id = "ft-model-123"
|
|
@@ -306,11 +367,16 @@ async def test_start_success(fireworks_finetune, mock_dataset, mock_task, mock_a
|
|
|
306
367
|
|
|
307
368
|
# Verify dataset was uploaded
|
|
308
369
|
fireworks_finetune.generate_and_upload_jsonl.assert_called_once_with(
|
|
309
|
-
mock_dataset,
|
|
370
|
+
mock_dataset,
|
|
371
|
+
fireworks_finetune.datamodel.train_split_name,
|
|
372
|
+
mock_task,
|
|
373
|
+
expected_format,
|
|
310
374
|
)
|
|
311
375
|
|
|
312
376
|
# Verify model ID was updated
|
|
313
377
|
assert fireworks_finetune.datamodel.provider_id == mock_model_id
|
|
378
|
+
assert fireworks_finetune.datamodel.structured_output_mode == expected_mode
|
|
379
|
+
assert fireworks_finetune.datamodel.properties["endpoint_version"] == "v2"
|
|
314
380
|
|
|
315
381
|
|
|
316
382
|
async def test_start_api_error(
|
|
@@ -369,7 +435,15 @@ async def test_deploy_success(fireworks_finetune, mock_api_key):
|
|
|
369
435
|
success_response.status_code = 200
|
|
370
436
|
assert fireworks_finetune.datamodel.fine_tune_model_id is None
|
|
371
437
|
|
|
372
|
-
|
|
438
|
+
status_response = (
|
|
439
|
+
FineTuneStatus(status=FineTuneStatusType.completed, message=""),
|
|
440
|
+
"ftm-123",
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
with (
|
|
444
|
+
patch("httpx.AsyncClient") as mock_client_class,
|
|
445
|
+
patch.object(fireworks_finetune, "_status", return_value=status_response),
|
|
446
|
+
):
|
|
373
447
|
mock_client = AsyncMock()
|
|
374
448
|
mock_client.post.return_value = success_response
|
|
375
449
|
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
@@ -388,13 +462,22 @@ async def test_deploy_already_deployed(fireworks_finetune, mock_api_key):
|
|
|
388
462
|
"message": "Model already deployed",
|
|
389
463
|
}
|
|
390
464
|
|
|
391
|
-
|
|
465
|
+
status_response = (
|
|
466
|
+
FineTuneStatus(status=FineTuneStatusType.completed, message=""),
|
|
467
|
+
"ftm-123",
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
with (
|
|
471
|
+
patch("httpx.AsyncClient") as mock_client_class,
|
|
472
|
+
patch.object(fireworks_finetune, "_status", return_value=status_response),
|
|
473
|
+
):
|
|
392
474
|
mock_client = AsyncMock()
|
|
393
475
|
mock_client.post.return_value = already_deployed_response
|
|
394
476
|
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
395
477
|
|
|
396
478
|
result = await fireworks_finetune._deploy()
|
|
397
479
|
assert result is True
|
|
480
|
+
assert fireworks_finetune.datamodel.fine_tune_model_id == "ftm-123"
|
|
398
481
|
|
|
399
482
|
|
|
400
483
|
async def test_deploy_failure(fireworks_finetune, mock_api_key):
|
|
@@ -423,22 +506,31 @@ async def test_deploy_missing_credentials(fireworks_finetune):
|
|
|
423
506
|
|
|
424
507
|
|
|
425
508
|
async def test_deploy_missing_model_id(fireworks_finetune, mock_api_key):
|
|
426
|
-
#
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
509
|
+
# Mock _status to return no model ID
|
|
510
|
+
status_response = (
|
|
511
|
+
FineTuneStatus(
|
|
512
|
+
status=FineTuneStatusType.completed, message="Fine-tuning job completed"
|
|
513
|
+
),
|
|
514
|
+
None,
|
|
515
|
+
)
|
|
516
|
+
with (
|
|
517
|
+
patch.object(fireworks_finetune, "_status", return_value=status_response),
|
|
518
|
+
):
|
|
519
|
+
response = await fireworks_finetune._deploy()
|
|
520
|
+
assert response is False
|
|
431
521
|
|
|
432
522
|
|
|
433
523
|
async def test_status_with_deploy(fireworks_finetune, mock_api_key):
|
|
434
524
|
# Mock _status to return completed
|
|
435
|
-
|
|
436
|
-
|
|
525
|
+
status_response = (
|
|
526
|
+
FineTuneStatus(
|
|
527
|
+
status=FineTuneStatusType.completed, message="Fine-tuning job completed"
|
|
528
|
+
),
|
|
529
|
+
"ftm-123",
|
|
437
530
|
)
|
|
438
|
-
|
|
439
531
|
with (
|
|
440
532
|
patch.object(
|
|
441
|
-
fireworks_finetune, "_status", return_value=
|
|
533
|
+
fireworks_finetune, "_status", return_value=status_response
|
|
442
534
|
) as mock_status,
|
|
443
535
|
patch.object(fireworks_finetune, "_deploy", return_value=False) as mock_deploy,
|
|
444
536
|
):
|
|
@@ -10,7 +10,13 @@ from openai.types.fine_tuning import FineTuningJob
|
|
|
10
10
|
from kiln_ai.adapters.fine_tune.base_finetune import FineTuneStatusType
|
|
11
11
|
from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetFormatter
|
|
12
12
|
from kiln_ai.adapters.fine_tune.openai_finetune import OpenAIFinetune
|
|
13
|
-
from kiln_ai.datamodel import
|
|
13
|
+
from kiln_ai.datamodel import (
|
|
14
|
+
DatasetSplit,
|
|
15
|
+
FinetuneDataStrategy,
|
|
16
|
+
StructuredOutputMode,
|
|
17
|
+
Task,
|
|
18
|
+
Train80Test20SplitDefinition,
|
|
19
|
+
)
|
|
14
20
|
from kiln_ai.datamodel import Finetune as FinetuneModel
|
|
15
21
|
from kiln_ai.utils.config import Config
|
|
16
22
|
|
|
@@ -29,6 +35,7 @@ def openai_finetune(tmp_path):
|
|
|
29
35
|
system_message="Test system message",
|
|
30
36
|
fine_tune_model_id="ft-123",
|
|
31
37
|
path=tmp_file,
|
|
38
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
32
39
|
),
|
|
33
40
|
)
|
|
34
41
|
return finetune
|
|
@@ -225,17 +232,22 @@ async def test_generate_and_upload_jsonl_success(
|
|
|
225
232
|
patch("builtins.open") as mock_open,
|
|
226
233
|
):
|
|
227
234
|
result = await openai_finetune.generate_and_upload_jsonl(
|
|
228
|
-
mock_dataset,
|
|
235
|
+
mock_dataset,
|
|
236
|
+
"train",
|
|
237
|
+
mock_task,
|
|
238
|
+
DatasetFormat.OPENAI_CHAT_JSONL,
|
|
229
239
|
)
|
|
230
240
|
|
|
231
241
|
# Verify formatter was created with correct parameters
|
|
232
242
|
mock_formatter_class.assert_called_once_with(
|
|
233
|
-
mock_dataset, openai_finetune.datamodel.system_message
|
|
243
|
+
mock_dataset, openai_finetune.datamodel.system_message, None
|
|
234
244
|
)
|
|
235
245
|
|
|
236
246
|
# Verify correct format was used
|
|
237
247
|
mock_formatter.dump_to_file.assert_called_once_with(
|
|
238
|
-
"train",
|
|
248
|
+
"train",
|
|
249
|
+
DatasetFormat.OPENAI_CHAT_JSONL,
|
|
250
|
+
FinetuneDataStrategy.final_only,
|
|
239
251
|
)
|
|
240
252
|
|
|
241
253
|
# Verify file was opened and uploaded
|
|
@@ -245,7 +257,7 @@ async def test_generate_and_upload_jsonl_success(
|
|
|
245
257
|
assert result == mock_file_id
|
|
246
258
|
|
|
247
259
|
|
|
248
|
-
async def
|
|
260
|
+
async def test_generate_and_upload_jsonl_schema_success(
|
|
249
261
|
openai_finetune, mock_dataset, mock_task
|
|
250
262
|
):
|
|
251
263
|
mock_path = Path("mock_path.jsonl")
|
|
@@ -272,17 +284,22 @@ async def test_generate_and_upload_jsonl_toolcall_success(
|
|
|
272
284
|
patch("builtins.open") as mock_open,
|
|
273
285
|
):
|
|
274
286
|
result = await openai_finetune.generate_and_upload_jsonl(
|
|
275
|
-
mock_dataset,
|
|
287
|
+
mock_dataset,
|
|
288
|
+
"train",
|
|
289
|
+
mock_task,
|
|
290
|
+
DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
|
|
276
291
|
)
|
|
277
292
|
|
|
278
293
|
# Verify formatter was created with correct parameters
|
|
279
294
|
mock_formatter_class.assert_called_once_with(
|
|
280
|
-
mock_dataset, openai_finetune.datamodel.system_message
|
|
295
|
+
mock_dataset, openai_finetune.datamodel.system_message, None
|
|
281
296
|
)
|
|
282
297
|
|
|
283
298
|
# Verify correct format was used
|
|
284
299
|
mock_formatter.dump_to_file.assert_called_once_with(
|
|
285
|
-
"train",
|
|
300
|
+
"train",
|
|
301
|
+
DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
|
|
302
|
+
FinetuneDataStrategy.final_only,
|
|
286
303
|
)
|
|
287
304
|
|
|
288
305
|
# Verify file was opened and uploaded
|
|
@@ -317,7 +334,7 @@ async def test_generate_and_upload_jsonl_upload_failure(
|
|
|
317
334
|
):
|
|
318
335
|
with pytest.raises(ValueError, match="Failed to upload file to OpenAI"):
|
|
319
336
|
await openai_finetune.generate_and_upload_jsonl(
|
|
320
|
-
mock_dataset, "train", mock_task
|
|
337
|
+
mock_dataset, "train", mock_task, DatasetFormat.OPENAI_CHAT_JSONL
|
|
321
338
|
)
|
|
322
339
|
|
|
323
340
|
|
|
@@ -344,13 +361,33 @@ async def test_generate_and_upload_jsonl_api_error(
|
|
|
344
361
|
):
|
|
345
362
|
with pytest.raises(openai.APIError):
|
|
346
363
|
await openai_finetune.generate_and_upload_jsonl(
|
|
347
|
-
mock_dataset, "train", mock_task
|
|
364
|
+
mock_dataset, "train", mock_task, DatasetFormat.OPENAI_CHAT_JSONL
|
|
348
365
|
)
|
|
349
366
|
|
|
350
367
|
|
|
351
|
-
|
|
368
|
+
@pytest.mark.parametrize(
|
|
369
|
+
"output_schema,expected_mode,expected_format",
|
|
370
|
+
[
|
|
371
|
+
(
|
|
372
|
+
'{"type": "object", "properties": {"key": {"type": "string"}}}',
|
|
373
|
+
StructuredOutputMode.json_schema,
|
|
374
|
+
DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
|
|
375
|
+
),
|
|
376
|
+
(None, None, DatasetFormat.OPENAI_CHAT_JSONL),
|
|
377
|
+
],
|
|
378
|
+
)
|
|
379
|
+
async def test_start_success(
|
|
380
|
+
openai_finetune,
|
|
381
|
+
mock_dataset,
|
|
382
|
+
mock_task,
|
|
383
|
+
output_schema,
|
|
384
|
+
expected_mode,
|
|
385
|
+
expected_format,
|
|
386
|
+
):
|
|
352
387
|
openai_finetune.datamodel.parent = mock_task
|
|
353
388
|
|
|
389
|
+
mock_task.output_json_schema = output_schema
|
|
390
|
+
|
|
354
391
|
# Mock parameters
|
|
355
392
|
openai_finetune.datamodel.parameters = {
|
|
356
393
|
"n_epochs": 3,
|
|
@@ -381,7 +418,10 @@ async def test_start_success(openai_finetune, mock_dataset, mock_task):
|
|
|
381
418
|
# Verify file uploads
|
|
382
419
|
assert mock_upload.call_count == 1 # Only training file
|
|
383
420
|
mock_upload.assert_called_with(
|
|
384
|
-
mock_dataset,
|
|
421
|
+
mock_dataset,
|
|
422
|
+
openai_finetune.datamodel.train_split_name,
|
|
423
|
+
mock_task,
|
|
424
|
+
expected_format,
|
|
385
425
|
)
|
|
386
426
|
|
|
387
427
|
# Verify fine-tune creation
|
|
@@ -401,6 +441,7 @@ async def test_start_success(openai_finetune, mock_dataset, mock_task):
|
|
|
401
441
|
# Verify model updates
|
|
402
442
|
assert openai_finetune.datamodel.provider_id == "ft-123"
|
|
403
443
|
assert openai_finetune.datamodel.base_model_id == "gpt-4o-mini-2024-07-18"
|
|
444
|
+
assert openai_finetune.datamodel.structured_output_mode == expected_mode
|
|
404
445
|
|
|
405
446
|
|
|
406
447
|
async def test_start_with_validation(openai_finetune, mock_dataset, mock_task):
|
|
@@ -430,9 +471,17 @@ async def test_start_with_validation(openai_finetune, mock_dataset, mock_task):
|
|
|
430
471
|
mock_upload.assert_has_calls(
|
|
431
472
|
[
|
|
432
473
|
mock.call(
|
|
433
|
-
mock_dataset,
|
|
474
|
+
mock_dataset,
|
|
475
|
+
openai_finetune.datamodel.train_split_name,
|
|
476
|
+
mock_task,
|
|
477
|
+
DatasetFormat.OPENAI_CHAT_JSONL,
|
|
478
|
+
),
|
|
479
|
+
mock.call(
|
|
480
|
+
mock_dataset,
|
|
481
|
+
"validation",
|
|
482
|
+
mock_task,
|
|
483
|
+
DatasetFormat.OPENAI_CHAT_JSONL,
|
|
434
484
|
),
|
|
435
|
-
mock.call(mock_dataset, "validation", mock_task),
|
|
436
485
|
]
|
|
437
486
|
)
|
|
438
487
|
|
|
@@ -501,3 +550,65 @@ async def test_status_updates_latest_status(openai_finetune, mock_response):
|
|
|
501
550
|
|
|
502
551
|
# Verify file was saved
|
|
503
552
|
assert openai_finetune.datamodel.path.exists()
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
@pytest.mark.parametrize(
|
|
556
|
+
"data_strategy,thinking_instructions",
|
|
557
|
+
[
|
|
558
|
+
(FinetuneDataStrategy.final_and_intermediate, "Custom thinking instructions"),
|
|
559
|
+
(FinetuneDataStrategy.final_only, None),
|
|
560
|
+
],
|
|
561
|
+
)
|
|
562
|
+
async def test_generate_and_upload_jsonl_with_data_strategy(
|
|
563
|
+
mock_dataset, mock_task, data_strategy, thinking_instructions, tmp_path
|
|
564
|
+
):
|
|
565
|
+
mock_path = Path("mock_path.jsonl")
|
|
566
|
+
mock_file_id = "file-123"
|
|
567
|
+
|
|
568
|
+
openai_finetune = OpenAIFinetune(
|
|
569
|
+
datamodel=FinetuneModel(
|
|
570
|
+
name="test-finetune",
|
|
571
|
+
provider="openai",
|
|
572
|
+
provider_id="openai-123",
|
|
573
|
+
base_model_id="gpt-4o",
|
|
574
|
+
train_split_name="train",
|
|
575
|
+
dataset_split_id="dataset-123",
|
|
576
|
+
system_message="Test system message",
|
|
577
|
+
fine_tune_model_id="ft-123",
|
|
578
|
+
path=tmp_path / "test-finetune.kiln",
|
|
579
|
+
data_strategy=data_strategy,
|
|
580
|
+
thinking_instructions=thinking_instructions,
|
|
581
|
+
),
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
# Mock the formatter
|
|
585
|
+
mock_formatter = MagicMock(spec=DatasetFormatter)
|
|
586
|
+
mock_formatter.dump_to_file.return_value = mock_path
|
|
587
|
+
|
|
588
|
+
# Mock the file response
|
|
589
|
+
mock_file_response = MagicMock()
|
|
590
|
+
mock_file_response.id = mock_file_id
|
|
591
|
+
|
|
592
|
+
with (
|
|
593
|
+
patch(
|
|
594
|
+
"kiln_ai.adapters.fine_tune.openai_finetune.DatasetFormatter",
|
|
595
|
+
return_value=mock_formatter,
|
|
596
|
+
),
|
|
597
|
+
patch(
|
|
598
|
+
"kiln_ai.adapters.fine_tune.openai_finetune.oai_client.files.create",
|
|
599
|
+
return_value=mock_file_response,
|
|
600
|
+
),
|
|
601
|
+
patch("builtins.open"),
|
|
602
|
+
):
|
|
603
|
+
result = await openai_finetune.generate_and_upload_jsonl(
|
|
604
|
+
mock_dataset, "train", mock_task, DatasetFormat.OPENAI_CHAT_JSONL
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
# Verify formatter was created with correct parameters
|
|
608
|
+
mock_formatter.dump_to_file.assert_called_once_with(
|
|
609
|
+
"train",
|
|
610
|
+
DatasetFormat.OPENAI_CHAT_JSONL,
|
|
611
|
+
data_strategy, # Verify data_strategy is passed through
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
assert result == mock_file_id
|