kiln-ai 0.8.1__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (57) hide show
  1. kiln_ai/adapters/__init__.py +7 -7
  2. kiln_ai/adapters/adapter_registry.py +77 -5
  3. kiln_ai/adapters/data_gen/data_gen_task.py +3 -3
  4. kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
  5. kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
  6. kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
  7. kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
  8. kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
  9. kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
  10. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +469 -129
  11. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +113 -21
  12. kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
  13. kiln_ai/adapters/ml_model_list.py +323 -94
  14. kiln_ai/adapters/model_adapters/__init__.py +18 -0
  15. kiln_ai/adapters/{base_adapter.py → model_adapters/base_adapter.py} +81 -37
  16. kiln_ai/adapters/{langchain_adapters.py → model_adapters/langchain_adapters.py} +130 -84
  17. kiln_ai/adapters/model_adapters/openai_compatible_config.py +11 -0
  18. kiln_ai/adapters/model_adapters/openai_model_adapter.py +246 -0
  19. kiln_ai/adapters/model_adapters/test_base_adapter.py +190 -0
  20. kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +103 -88
  21. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +225 -0
  22. kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +43 -15
  23. kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +93 -20
  24. kiln_ai/adapters/parsers/__init__.py +10 -0
  25. kiln_ai/adapters/parsers/base_parser.py +12 -0
  26. kiln_ai/adapters/parsers/json_parser.py +37 -0
  27. kiln_ai/adapters/parsers/parser_registry.py +19 -0
  28. kiln_ai/adapters/parsers/r1_parser.py +69 -0
  29. kiln_ai/adapters/parsers/test_json_parser.py +81 -0
  30. kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
  31. kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
  32. kiln_ai/adapters/prompt_builders.py +126 -20
  33. kiln_ai/adapters/provider_tools.py +91 -36
  34. kiln_ai/adapters/repair/repair_task.py +17 -6
  35. kiln_ai/adapters/repair/test_repair_task.py +4 -4
  36. kiln_ai/adapters/run_output.py +8 -0
  37. kiln_ai/adapters/test_adapter_registry.py +177 -0
  38. kiln_ai/adapters/test_generate_docs.py +69 -0
  39. kiln_ai/adapters/test_prompt_adaptors.py +8 -4
  40. kiln_ai/adapters/test_prompt_builders.py +190 -29
  41. kiln_ai/adapters/test_provider_tools.py +268 -46
  42. kiln_ai/datamodel/__init__.py +193 -12
  43. kiln_ai/datamodel/basemodel.py +31 -11
  44. kiln_ai/datamodel/json_schema.py +8 -3
  45. kiln_ai/datamodel/model_cache.py +8 -3
  46. kiln_ai/datamodel/test_basemodel.py +81 -2
  47. kiln_ai/datamodel/test_dataset_split.py +100 -3
  48. kiln_ai/datamodel/test_example_models.py +25 -4
  49. kiln_ai/datamodel/test_model_cache.py +24 -0
  50. kiln_ai/datamodel/test_model_perf.py +125 -0
  51. kiln_ai/datamodel/test_models.py +129 -0
  52. kiln_ai/utils/exhaustive_error.py +6 -0
  53. {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/METADATA +9 -7
  54. kiln_ai-0.11.1.dist-info/RECORD +76 -0
  55. kiln_ai-0.8.1.dist-info/RECORD +0 -58
  56. {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/WHEEL +0 -0
  57. {kiln_ai-0.8.1.dist-info → kiln_ai-0.11.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -13,6 +13,8 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
13
13
  from kiln_ai.adapters.fine_tune.fireworks_finetune import FireworksFinetune
14
14
  from kiln_ai.datamodel import (
15
15
  DatasetSplit,
16
+ FinetuneDataStrategy,
17
+ StructuredOutputMode,
16
18
  Task,
17
19
  Train80Test20SplitDefinition,
18
20
  )
@@ -33,7 +35,6 @@ def fireworks_finetune(tmp_path):
33
35
  dataset_split_id="dataset-123",
34
36
  system_message="Test system message",
35
37
  path=tmp_file,
36
- properties={"undeployed_model_id": "ftm-123"},
37
38
  ),
38
39
  )
39
40
  return finetune
@@ -228,8 +229,20 @@ def mock_task():
228
229
  )
229
230
 
230
231
 
232
+ @pytest.mark.parametrize(
233
+ "data_strategy,thinking_instructions",
234
+ [
235
+ (FinetuneDataStrategy.final_and_intermediate, "thinking instructions"),
236
+ (FinetuneDataStrategy.final_only, None),
237
+ ],
238
+ )
231
239
  async def test_generate_and_upload_jsonl_success(
232
- fireworks_finetune, mock_dataset, mock_task, mock_api_key
240
+ mock_dataset,
241
+ mock_task,
242
+ mock_api_key,
243
+ data_strategy,
244
+ thinking_instructions,
245
+ tmp_path,
233
246
  ):
234
247
  mock_path = Path("mock_path.jsonl")
235
248
  mock_dataset_id = "dataset-123"
@@ -249,11 +262,27 @@ async def test_generate_and_upload_jsonl_success(
249
262
  status_response.status_code = 200
250
263
  status_response.json.return_value = {"state": "READY"}
251
264
 
265
+ # Set the data strategy on the finetune model
266
+ tmp_file = tmp_path / "test-finetune.kiln"
267
+ fireworks_finetune = FireworksFinetune(
268
+ datamodel=FinetuneModel(
269
+ name="test-finetune",
270
+ provider="fireworks",
271
+ provider_id="fw-123",
272
+ base_model_id="llama-v2-7b",
273
+ train_split_name="train",
274
+ dataset_split_id="dataset-123",
275
+ system_message="Test system message",
276
+ path=tmp_file,
277
+ data_strategy=data_strategy,
278
+ thinking_instructions=thinking_instructions,
279
+ ),
280
+ )
281
+
252
282
  with (
253
283
  patch(
254
284
  "kiln_ai.adapters.fine_tune.fireworks_finetune.DatasetFormatter",
255
- return_value=mock_formatter,
256
- ),
285
+ ) as mock_formatter_constructor,
257
286
  patch("httpx.AsyncClient") as mock_client_class,
258
287
  patch("builtins.open"),
259
288
  patch(
@@ -261,26 +290,58 @@ async def test_generate_and_upload_jsonl_success(
261
290
  return_value=mock_dataset_id,
262
291
  ),
263
292
  ):
293
+ mock_formatter_constructor.return_value = mock_formatter
264
294
  mock_client = AsyncMock()
265
295
  mock_client.post = AsyncMock(side_effect=[create_response, upload_response])
266
296
  mock_client.get = AsyncMock(return_value=status_response)
267
297
  mock_client_class.return_value.__aenter__.return_value = mock_client
268
298
 
269
299
  result = await fireworks_finetune.generate_and_upload_jsonl(
270
- mock_dataset, "train", mock_task
300
+ mock_dataset, "train", mock_task, DatasetFormat.OPENAI_CHAT_JSONL
271
301
  )
272
302
 
273
303
  # Verify formatter was created with correct parameters
274
- mock_formatter.dump_to_file.assert_called_once_with(
275
- "train", DatasetFormat.OPENAI_CHAT_JSONL
276
- )
304
+ assert mock_formatter_constructor.call_count == 1
305
+ assert mock_formatter_constructor.call_args[1] == {
306
+ "dataset": mock_dataset,
307
+ "system_message": "Test system message",
308
+ "thinking_instructions": thinking_instructions,
309
+ }
310
+
311
+ # Verify the thinking instructions were set on the formatter
312
+ mock_formatter.method_calls[0][0] == "dump_to_file"
313
+ mock_formatter.method_calls[0][1] == {
314
+ "dataset": mock_dataset,
315
+ "thinking_instructions": thinking_instructions,
316
+ }
277
317
 
278
318
  assert result == mock_dataset_id
279
319
  assert mock_client.post.call_count == 2
280
320
  assert mock_client.get.call_count == 1
281
321
 
282
322
 
283
- async def test_start_success(fireworks_finetune, mock_dataset, mock_task, mock_api_key):
323
+ @pytest.mark.parametrize(
324
+ "output_schema,expected_mode,expected_format",
325
+ [
326
+ (
327
+ '{"type": "object", "properties": {"key": {"type": "string"}}}',
328
+ StructuredOutputMode.json_mode,
329
+ DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
330
+ ),
331
+ (None, None, DatasetFormat.OPENAI_CHAT_JSONL),
332
+ ],
333
+ )
334
+ async def test_start_success(
335
+ fireworks_finetune,
336
+ mock_dataset,
337
+ mock_task,
338
+ mock_api_key,
339
+ output_schema,
340
+ expected_mode,
341
+ expected_format,
342
+ ):
343
+ mock_task.output_json_schema = output_schema
344
+
284
345
  fireworks_finetune.datamodel.parent = mock_task
285
346
  mock_dataset_id = "dataset-123"
286
347
  mock_model_id = "ft-model-123"
@@ -306,11 +367,16 @@ async def test_start_success(fireworks_finetune, mock_dataset, mock_task, mock_a
306
367
 
307
368
  # Verify dataset was uploaded
308
369
  fireworks_finetune.generate_and_upload_jsonl.assert_called_once_with(
309
- mock_dataset, fireworks_finetune.datamodel.train_split_name, mock_task
370
+ mock_dataset,
371
+ fireworks_finetune.datamodel.train_split_name,
372
+ mock_task,
373
+ expected_format,
310
374
  )
311
375
 
312
376
  # Verify model ID was updated
313
377
  assert fireworks_finetune.datamodel.provider_id == mock_model_id
378
+ assert fireworks_finetune.datamodel.structured_output_mode == expected_mode
379
+ assert fireworks_finetune.datamodel.properties["endpoint_version"] == "v2"
314
380
 
315
381
 
316
382
  async def test_start_api_error(
@@ -369,7 +435,15 @@ async def test_deploy_success(fireworks_finetune, mock_api_key):
369
435
  success_response.status_code = 200
370
436
  assert fireworks_finetune.datamodel.fine_tune_model_id is None
371
437
 
372
- with patch("httpx.AsyncClient") as mock_client_class:
438
+ status_response = (
439
+ FineTuneStatus(status=FineTuneStatusType.completed, message=""),
440
+ "ftm-123",
441
+ )
442
+
443
+ with (
444
+ patch("httpx.AsyncClient") as mock_client_class,
445
+ patch.object(fireworks_finetune, "_status", return_value=status_response),
446
+ ):
373
447
  mock_client = AsyncMock()
374
448
  mock_client.post.return_value = success_response
375
449
  mock_client_class.return_value.__aenter__.return_value = mock_client
@@ -388,13 +462,22 @@ async def test_deploy_already_deployed(fireworks_finetune, mock_api_key):
388
462
  "message": "Model already deployed",
389
463
  }
390
464
 
391
- with patch("httpx.AsyncClient") as mock_client_class:
465
+ status_response = (
466
+ FineTuneStatus(status=FineTuneStatusType.completed, message=""),
467
+ "ftm-123",
468
+ )
469
+
470
+ with (
471
+ patch("httpx.AsyncClient") as mock_client_class,
472
+ patch.object(fireworks_finetune, "_status", return_value=status_response),
473
+ ):
392
474
  mock_client = AsyncMock()
393
475
  mock_client.post.return_value = already_deployed_response
394
476
  mock_client_class.return_value.__aenter__.return_value = mock_client
395
477
 
396
478
  result = await fireworks_finetune._deploy()
397
479
  assert result is True
480
+ assert fireworks_finetune.datamodel.fine_tune_model_id == "ftm-123"
398
481
 
399
482
 
400
483
  async def test_deploy_failure(fireworks_finetune, mock_api_key):
@@ -423,22 +506,31 @@ async def test_deploy_missing_credentials(fireworks_finetune):
423
506
 
424
507
 
425
508
  async def test_deploy_missing_model_id(fireworks_finetune, mock_api_key):
426
- # Test missing model ID
427
- fireworks_finetune.datamodel.properties["undeployed_model_id"] = None
428
-
429
- response = await fireworks_finetune._deploy()
430
- assert response is False
509
+ # Mock _status to return no model ID
510
+ status_response = (
511
+ FineTuneStatus(
512
+ status=FineTuneStatusType.completed, message="Fine-tuning job completed"
513
+ ),
514
+ None,
515
+ )
516
+ with (
517
+ patch.object(fireworks_finetune, "_status", return_value=status_response),
518
+ ):
519
+ response = await fireworks_finetune._deploy()
520
+ assert response is False
431
521
 
432
522
 
433
523
  async def test_status_with_deploy(fireworks_finetune, mock_api_key):
434
524
  # Mock _status to return completed
435
- mock_status_response = FineTuneStatus(
436
- status=FineTuneStatusType.completed, message="Fine-tuning job completed"
525
+ status_response = (
526
+ FineTuneStatus(
527
+ status=FineTuneStatusType.completed, message="Fine-tuning job completed"
528
+ ),
529
+ "ftm-123",
437
530
  )
438
-
439
531
  with (
440
532
  patch.object(
441
- fireworks_finetune, "_status", return_value=mock_status_response
533
+ fireworks_finetune, "_status", return_value=status_response
442
534
  ) as mock_status,
443
535
  patch.object(fireworks_finetune, "_deploy", return_value=False) as mock_deploy,
444
536
  ):
@@ -10,7 +10,13 @@ from openai.types.fine_tuning import FineTuningJob
10
10
  from kiln_ai.adapters.fine_tune.base_finetune import FineTuneStatusType
11
11
  from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetFormatter
12
12
  from kiln_ai.adapters.fine_tune.openai_finetune import OpenAIFinetune
13
- from kiln_ai.datamodel import DatasetSplit, Task, Train80Test20SplitDefinition
13
+ from kiln_ai.datamodel import (
14
+ DatasetSplit,
15
+ FinetuneDataStrategy,
16
+ StructuredOutputMode,
17
+ Task,
18
+ Train80Test20SplitDefinition,
19
+ )
14
20
  from kiln_ai.datamodel import Finetune as FinetuneModel
15
21
  from kiln_ai.utils.config import Config
16
22
 
@@ -29,6 +35,7 @@ def openai_finetune(tmp_path):
29
35
  system_message="Test system message",
30
36
  fine_tune_model_id="ft-123",
31
37
  path=tmp_file,
38
+ data_strategy=FinetuneDataStrategy.final_only,
32
39
  ),
33
40
  )
34
41
  return finetune
@@ -225,17 +232,22 @@ async def test_generate_and_upload_jsonl_success(
225
232
  patch("builtins.open") as mock_open,
226
233
  ):
227
234
  result = await openai_finetune.generate_and_upload_jsonl(
228
- mock_dataset, "train", mock_task
235
+ mock_dataset,
236
+ "train",
237
+ mock_task,
238
+ DatasetFormat.OPENAI_CHAT_JSONL,
229
239
  )
230
240
 
231
241
  # Verify formatter was created with correct parameters
232
242
  mock_formatter_class.assert_called_once_with(
233
- mock_dataset, openai_finetune.datamodel.system_message
243
+ mock_dataset, openai_finetune.datamodel.system_message, None
234
244
  )
235
245
 
236
246
  # Verify correct format was used
237
247
  mock_formatter.dump_to_file.assert_called_once_with(
238
- "train", DatasetFormat.OPENAI_CHAT_JSONL
248
+ "train",
249
+ DatasetFormat.OPENAI_CHAT_JSONL,
250
+ FinetuneDataStrategy.final_only,
239
251
  )
240
252
 
241
253
  # Verify file was opened and uploaded
@@ -245,7 +257,7 @@ async def test_generate_and_upload_jsonl_success(
245
257
  assert result == mock_file_id
246
258
 
247
259
 
248
- async def test_generate_and_upload_jsonl_toolcall_success(
260
+ async def test_generate_and_upload_jsonl_schema_success(
249
261
  openai_finetune, mock_dataset, mock_task
250
262
  ):
251
263
  mock_path = Path("mock_path.jsonl")
@@ -272,17 +284,22 @@ async def test_generate_and_upload_jsonl_toolcall_success(
272
284
  patch("builtins.open") as mock_open,
273
285
  ):
274
286
  result = await openai_finetune.generate_and_upload_jsonl(
275
- mock_dataset, "train", mock_task
287
+ mock_dataset,
288
+ "train",
289
+ mock_task,
290
+ DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
276
291
  )
277
292
 
278
293
  # Verify formatter was created with correct parameters
279
294
  mock_formatter_class.assert_called_once_with(
280
- mock_dataset, openai_finetune.datamodel.system_message
295
+ mock_dataset, openai_finetune.datamodel.system_message, None
281
296
  )
282
297
 
283
298
  # Verify correct format was used
284
299
  mock_formatter.dump_to_file.assert_called_once_with(
285
- "train", DatasetFormat.OPENAI_CHAT_TOOLCALL_JSONL
300
+ "train",
301
+ DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
302
+ FinetuneDataStrategy.final_only,
286
303
  )
287
304
 
288
305
  # Verify file was opened and uploaded
@@ -317,7 +334,7 @@ async def test_generate_and_upload_jsonl_upload_failure(
317
334
  ):
318
335
  with pytest.raises(ValueError, match="Failed to upload file to OpenAI"):
319
336
  await openai_finetune.generate_and_upload_jsonl(
320
- mock_dataset, "train", mock_task
337
+ mock_dataset, "train", mock_task, DatasetFormat.OPENAI_CHAT_JSONL
321
338
  )
322
339
 
323
340
 
@@ -344,13 +361,33 @@ async def test_generate_and_upload_jsonl_api_error(
344
361
  ):
345
362
  with pytest.raises(openai.APIError):
346
363
  await openai_finetune.generate_and_upload_jsonl(
347
- mock_dataset, "train", mock_task
364
+ mock_dataset, "train", mock_task, DatasetFormat.OPENAI_CHAT_JSONL
348
365
  )
349
366
 
350
367
 
351
- async def test_start_success(openai_finetune, mock_dataset, mock_task):
368
+ @pytest.mark.parametrize(
369
+ "output_schema,expected_mode,expected_format",
370
+ [
371
+ (
372
+ '{"type": "object", "properties": {"key": {"type": "string"}}}',
373
+ StructuredOutputMode.json_schema,
374
+ DatasetFormat.OPENAI_CHAT_JSON_SCHEMA_JSONL,
375
+ ),
376
+ (None, None, DatasetFormat.OPENAI_CHAT_JSONL),
377
+ ],
378
+ )
379
+ async def test_start_success(
380
+ openai_finetune,
381
+ mock_dataset,
382
+ mock_task,
383
+ output_schema,
384
+ expected_mode,
385
+ expected_format,
386
+ ):
352
387
  openai_finetune.datamodel.parent = mock_task
353
388
 
389
+ mock_task.output_json_schema = output_schema
390
+
354
391
  # Mock parameters
355
392
  openai_finetune.datamodel.parameters = {
356
393
  "n_epochs": 3,
@@ -381,7 +418,10 @@ async def test_start_success(openai_finetune, mock_dataset, mock_task):
381
418
  # Verify file uploads
382
419
  assert mock_upload.call_count == 1 # Only training file
383
420
  mock_upload.assert_called_with(
384
- mock_dataset, openai_finetune.datamodel.train_split_name, mock_task
421
+ mock_dataset,
422
+ openai_finetune.datamodel.train_split_name,
423
+ mock_task,
424
+ expected_format,
385
425
  )
386
426
 
387
427
  # Verify fine-tune creation
@@ -401,6 +441,7 @@ async def test_start_success(openai_finetune, mock_dataset, mock_task):
401
441
  # Verify model updates
402
442
  assert openai_finetune.datamodel.provider_id == "ft-123"
403
443
  assert openai_finetune.datamodel.base_model_id == "gpt-4o-mini-2024-07-18"
444
+ assert openai_finetune.datamodel.structured_output_mode == expected_mode
404
445
 
405
446
 
406
447
  async def test_start_with_validation(openai_finetune, mock_dataset, mock_task):
@@ -430,9 +471,17 @@ async def test_start_with_validation(openai_finetune, mock_dataset, mock_task):
430
471
  mock_upload.assert_has_calls(
431
472
  [
432
473
  mock.call(
433
- mock_dataset, openai_finetune.datamodel.train_split_name, mock_task
474
+ mock_dataset,
475
+ openai_finetune.datamodel.train_split_name,
476
+ mock_task,
477
+ DatasetFormat.OPENAI_CHAT_JSONL,
478
+ ),
479
+ mock.call(
480
+ mock_dataset,
481
+ "validation",
482
+ mock_task,
483
+ DatasetFormat.OPENAI_CHAT_JSONL,
434
484
  ),
435
- mock.call(mock_dataset, "validation", mock_task),
436
485
  ]
437
486
  )
438
487
 
@@ -501,3 +550,65 @@ async def test_status_updates_latest_status(openai_finetune, mock_response):
501
550
 
502
551
  # Verify file was saved
503
552
  assert openai_finetune.datamodel.path.exists()
553
+
554
+
555
+ @pytest.mark.parametrize(
556
+ "data_strategy,thinking_instructions",
557
+ [
558
+ (FinetuneDataStrategy.final_and_intermediate, "Custom thinking instructions"),
559
+ (FinetuneDataStrategy.final_only, None),
560
+ ],
561
+ )
562
+ async def test_generate_and_upload_jsonl_with_data_strategy(
563
+ mock_dataset, mock_task, data_strategy, thinking_instructions, tmp_path
564
+ ):
565
+ mock_path = Path("mock_path.jsonl")
566
+ mock_file_id = "file-123"
567
+
568
+ openai_finetune = OpenAIFinetune(
569
+ datamodel=FinetuneModel(
570
+ name="test-finetune",
571
+ provider="openai",
572
+ provider_id="openai-123",
573
+ base_model_id="gpt-4o",
574
+ train_split_name="train",
575
+ dataset_split_id="dataset-123",
576
+ system_message="Test system message",
577
+ fine_tune_model_id="ft-123",
578
+ path=tmp_path / "test-finetune.kiln",
579
+ data_strategy=data_strategy,
580
+ thinking_instructions=thinking_instructions,
581
+ ),
582
+ )
583
+
584
+ # Mock the formatter
585
+ mock_formatter = MagicMock(spec=DatasetFormatter)
586
+ mock_formatter.dump_to_file.return_value = mock_path
587
+
588
+ # Mock the file response
589
+ mock_file_response = MagicMock()
590
+ mock_file_response.id = mock_file_id
591
+
592
+ with (
593
+ patch(
594
+ "kiln_ai.adapters.fine_tune.openai_finetune.DatasetFormatter",
595
+ return_value=mock_formatter,
596
+ ),
597
+ patch(
598
+ "kiln_ai.adapters.fine_tune.openai_finetune.oai_client.files.create",
599
+ return_value=mock_file_response,
600
+ ),
601
+ patch("builtins.open"),
602
+ ):
603
+ result = await openai_finetune.generate_and_upload_jsonl(
604
+ mock_dataset, "train", mock_task, DatasetFormat.OPENAI_CHAT_JSONL
605
+ )
606
+
607
+ # Verify formatter was created with correct parameters
608
+ mock_formatter.dump_to_file.assert_called_once_with(
609
+ "train",
610
+ DatasetFormat.OPENAI_CHAT_JSONL,
611
+ data_strategy, # Verify data_strategy is passed through
612
+ )
613
+
614
+ assert result == mock_file_id