kiln-ai 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kiln_ai/adapters/eval/eval_runner.py +5 -64
- kiln_ai/adapters/eval/g_eval.py +3 -3
- kiln_ai/adapters/fine_tune/dataset_formatter.py +124 -34
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +264 -7
- kiln_ai/adapters/ml_model_list.py +478 -4
- kiln_ai/adapters/model_adapters/base_adapter.py +26 -8
- kiln_ai/adapters/model_adapters/litellm_adapter.py +41 -7
- kiln_ai/adapters/model_adapters/test_base_adapter.py +74 -2
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +65 -1
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +3 -2
- kiln_ai/adapters/model_adapters/test_structured_output.py +4 -6
- kiln_ai/adapters/parsers/base_parser.py +0 -3
- kiln_ai/adapters/parsers/parser_registry.py +5 -3
- kiln_ai/adapters/parsers/r1_parser.py +17 -2
- kiln_ai/adapters/parsers/request_formatters.py +40 -0
- kiln_ai/adapters/parsers/test_parser_registry.py +2 -2
- kiln_ai/adapters/parsers/test_r1_parser.py +44 -1
- kiln_ai/adapters/parsers/test_request_formatters.py +76 -0
- kiln_ai/adapters/prompt_builders.py +14 -1
- kiln_ai/adapters/provider_tools.py +18 -1
- kiln_ai/adapters/repair/test_repair_task.py +3 -2
- kiln_ai/adapters/test_prompt_builders.py +24 -3
- kiln_ai/adapters/test_provider_tools.py +70 -1
- kiln_ai/datamodel/__init__.py +2 -0
- kiln_ai/datamodel/datamodel_enums.py +14 -0
- kiln_ai/datamodel/dataset_filters.py +69 -1
- kiln_ai/datamodel/dataset_split.py +4 -0
- kiln_ai/datamodel/eval.py +8 -0
- kiln_ai/datamodel/finetune.py +1 -0
- kiln_ai/datamodel/prompt_id.py +1 -0
- kiln_ai/datamodel/task_output.py +1 -1
- kiln_ai/datamodel/task_run.py +39 -7
- kiln_ai/datamodel/test_basemodel.py +3 -7
- kiln_ai/datamodel/test_dataset_filters.py +82 -0
- kiln_ai/datamodel/test_dataset_split.py +2 -0
- kiln_ai/datamodel/test_example_models.py +54 -0
- kiln_ai/datamodel/test_models.py +50 -2
- kiln_ai/utils/async_job_runner.py +106 -0
- kiln_ai/utils/dataset_import.py +80 -18
- kiln_ai/utils/test_async_job_runner.py +199 -0
- kiln_ai/utils/test_dataset_import.py +242 -10
- {kiln_ai-0.15.0.dist-info → kiln_ai-0.16.0.dist-info}/METADATA +1 -1
- {kiln_ai-0.15.0.dist-info → kiln_ai-0.16.0.dist-info}/RECORD +45 -41
- {kiln_ai-0.15.0.dist-info → kiln_ai-0.16.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.15.0.dist-info → kiln_ai-0.16.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import re
|
|
3
4
|
import tempfile
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from unittest.mock import Mock
|
|
@@ -16,6 +17,7 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import (
|
|
|
16
17
|
generate_huggingface_chat_template,
|
|
17
18
|
generate_huggingface_chat_template_toolcall,
|
|
18
19
|
generate_vertex_gemini,
|
|
20
|
+
serialize_r1_style_message,
|
|
19
21
|
)
|
|
20
22
|
from kiln_ai.adapters.model_adapters.base_adapter import COT_FINAL_ANSWER_PROMPT
|
|
21
23
|
from kiln_ai.datamodel import (
|
|
@@ -42,6 +44,7 @@ def mock_task():
|
|
|
42
44
|
"input": '{"test": "input 你好"}',
|
|
43
45
|
"repaired_output": None,
|
|
44
46
|
"intermediate_outputs": {},
|
|
47
|
+
"thinking_training_data": Mock(return_value=None),
|
|
45
48
|
"input_source": Mock(
|
|
46
49
|
spec=DataSource,
|
|
47
50
|
**{
|
|
@@ -83,6 +86,7 @@ def mock_task():
|
|
|
83
86
|
def mock_intermediate_outputs(mock_task):
|
|
84
87
|
for run in mock_task.runs():
|
|
85
88
|
run.intermediate_outputs = {"reasoning": "thinking output"}
|
|
89
|
+
run.thinking_training_data.return_value = "thinking output"
|
|
86
90
|
mock_task.thinking_instruction = "thinking instructions"
|
|
87
91
|
return mock_task
|
|
88
92
|
|
|
@@ -138,6 +142,31 @@ def test_generate_chat_message_response_thinking():
|
|
|
138
142
|
}
|
|
139
143
|
|
|
140
144
|
|
|
145
|
+
def test_generate_chat_message_response_thinking_r1_style():
|
|
146
|
+
thinking_data = ModelTrainingData(
|
|
147
|
+
input="test input",
|
|
148
|
+
system_message="system message",
|
|
149
|
+
final_output="test output",
|
|
150
|
+
thinking="thinking output",
|
|
151
|
+
thinking_instructions=None,
|
|
152
|
+
thinking_final_answer_prompt=None,
|
|
153
|
+
thinking_r1_style=True,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
result = generate_chat_message_response(thinking_data)
|
|
157
|
+
|
|
158
|
+
assert result == {
|
|
159
|
+
"messages": [
|
|
160
|
+
{"role": "system", "content": "system message"},
|
|
161
|
+
{"role": "user", "content": "test input"},
|
|
162
|
+
{
|
|
163
|
+
"role": "assistant",
|
|
164
|
+
"content": "<think>\nthinking output\n</think>\n\ntest output",
|
|
165
|
+
},
|
|
166
|
+
]
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
|
|
141
170
|
def test_generate_chat_message_toolcall():
|
|
142
171
|
training_data = ModelTrainingData(
|
|
143
172
|
input="test input 你好",
|
|
@@ -206,6 +235,24 @@ def test_generate_chat_message_toolcall_thinking():
|
|
|
206
235
|
}
|
|
207
236
|
|
|
208
237
|
|
|
238
|
+
def test_generate_chat_message_toolcall_thinking_r1_style():
|
|
239
|
+
training_data = ModelTrainingData(
|
|
240
|
+
input="test input",
|
|
241
|
+
system_message="system message",
|
|
242
|
+
final_output='{"key": "value"}',
|
|
243
|
+
thinking="thinking output",
|
|
244
|
+
thinking_instructions=None,
|
|
245
|
+
thinking_final_answer_prompt=None,
|
|
246
|
+
thinking_r1_style=True,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
with pytest.raises(
|
|
250
|
+
ValueError,
|
|
251
|
+
match="R1 style thinking is not supported for tool call downloads",
|
|
252
|
+
):
|
|
253
|
+
generate_chat_message_toolcall(training_data)
|
|
254
|
+
|
|
255
|
+
|
|
209
256
|
def test_generate_chat_message_toolcall_invalid_json():
|
|
210
257
|
training_data = ModelTrainingData(
|
|
211
258
|
input="test input",
|
|
@@ -368,6 +415,37 @@ def test_dataset_formatter_dump_with_intermediate_data(
|
|
|
368
415
|
assert "thinking instructions" in line
|
|
369
416
|
|
|
370
417
|
|
|
418
|
+
def test_dataset_formatter_dump_with_intermediate_data_r1_style(
|
|
419
|
+
mock_dataset, mock_intermediate_outputs
|
|
420
|
+
):
|
|
421
|
+
formatter = DatasetFormatter(
|
|
422
|
+
mock_dataset,
|
|
423
|
+
"system message 你好",
|
|
424
|
+
thinking_instructions=None,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
result_path = formatter.dump_to_file(
|
|
428
|
+
"train",
|
|
429
|
+
DatasetFormat.OPENAI_CHAT_JSONL,
|
|
430
|
+
data_strategy=FinetuneDataStrategy.final_and_intermediate_r1_compatible,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
assert result_path.exists()
|
|
434
|
+
assert result_path.parent == Path(tempfile.gettempdir())
|
|
435
|
+
# Test our nice naming, with cot
|
|
436
|
+
assert (
|
|
437
|
+
result_path.name
|
|
438
|
+
== "test_dataset -- split-train -- format-openai_chat_jsonl -- cot.jsonl"
|
|
439
|
+
)
|
|
440
|
+
# Verify file contents
|
|
441
|
+
with open(result_path) as f:
|
|
442
|
+
lines = f.readlines()
|
|
443
|
+
assert len(lines) == 2
|
|
444
|
+
for line in lines:
|
|
445
|
+
assert "<think>" in line
|
|
446
|
+
assert "</think>" in line
|
|
447
|
+
|
|
448
|
+
|
|
371
449
|
def test_dataset_formatter_dump_with_intermediate_data_custom_instructions(
|
|
372
450
|
mock_dataset, mock_intermediate_outputs
|
|
373
451
|
):
|
|
@@ -440,6 +518,31 @@ def test_generate_huggingface_chat_template_thinking():
|
|
|
440
518
|
}
|
|
441
519
|
|
|
442
520
|
|
|
521
|
+
def test_generate_huggingface_chat_template_thinking_r1_style():
|
|
522
|
+
training_data = ModelTrainingData(
|
|
523
|
+
input="test input",
|
|
524
|
+
system_message="system message",
|
|
525
|
+
final_output="test output",
|
|
526
|
+
thinking="thinking output",
|
|
527
|
+
thinking_instructions=None,
|
|
528
|
+
thinking_final_answer_prompt=None,
|
|
529
|
+
thinking_r1_style=True,
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
result = generate_huggingface_chat_template(training_data)
|
|
533
|
+
|
|
534
|
+
assert result == {
|
|
535
|
+
"conversations": [
|
|
536
|
+
{"role": "system", "content": "system message"},
|
|
537
|
+
{"role": "user", "content": "test input"},
|
|
538
|
+
{
|
|
539
|
+
"role": "assistant",
|
|
540
|
+
"content": "<think>\nthinking output\n</think>\n\ntest output",
|
|
541
|
+
},
|
|
542
|
+
]
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
|
|
443
546
|
def test_generate_vertex_template():
|
|
444
547
|
training_data = ModelTrainingData(
|
|
445
548
|
input="test input",
|
|
@@ -477,8 +580,6 @@ def test_generate_vertex_template_thinking():
|
|
|
477
580
|
|
|
478
581
|
result = generate_vertex_gemini(training_data)
|
|
479
582
|
|
|
480
|
-
logger.info(result)
|
|
481
|
-
|
|
482
583
|
assert result == {
|
|
483
584
|
"systemInstruction": {
|
|
484
585
|
"role": "system",
|
|
@@ -498,6 +599,23 @@ def test_generate_vertex_template_thinking():
|
|
|
498
599
|
}
|
|
499
600
|
|
|
500
601
|
|
|
602
|
+
def test_generate_vertex_template_thinking_r1_style():
|
|
603
|
+
training_data = ModelTrainingData(
|
|
604
|
+
input="test input",
|
|
605
|
+
system_message="system message",
|
|
606
|
+
final_output="test output",
|
|
607
|
+
thinking="thinking output",
|
|
608
|
+
thinking_instructions=None,
|
|
609
|
+
thinking_final_answer_prompt=None,
|
|
610
|
+
thinking_r1_style=True,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
with pytest.raises(
|
|
614
|
+
ValueError, match="R1 style thinking is not supported for Vertex Gemini"
|
|
615
|
+
):
|
|
616
|
+
generate_vertex_gemini(training_data)
|
|
617
|
+
|
|
618
|
+
|
|
501
619
|
def test_generate_huggingface_chat_template_toolcall():
|
|
502
620
|
training_data = ModelTrainingData(
|
|
503
621
|
input="test input",
|
|
@@ -558,6 +676,24 @@ def test_generate_huggingface_chat_template_toolcall_thinking():
|
|
|
558
676
|
assert tool_call["function"]["arguments"] == {"key": "value"}
|
|
559
677
|
|
|
560
678
|
|
|
679
|
+
def test_generate_huggingface_chat_template_toolcall_thinking_r1_style():
|
|
680
|
+
training_data = ModelTrainingData(
|
|
681
|
+
input="test input",
|
|
682
|
+
system_message="system message",
|
|
683
|
+
final_output='{"key": "value"}',
|
|
684
|
+
thinking="thinking output",
|
|
685
|
+
thinking_instructions=None,
|
|
686
|
+
thinking_final_answer_prompt=None,
|
|
687
|
+
thinking_r1_style=True,
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
with pytest.raises(
|
|
691
|
+
ValueError,
|
|
692
|
+
match="R1 style thinking is not supported for tool call downloads",
|
|
693
|
+
):
|
|
694
|
+
generate_huggingface_chat_template_toolcall(training_data)
|
|
695
|
+
|
|
696
|
+
|
|
561
697
|
def test_generate_huggingface_chat_template_toolcall_invalid_json():
|
|
562
698
|
training_data = ModelTrainingData(
|
|
563
699
|
input="test input",
|
|
@@ -572,7 +708,11 @@ def test_generate_huggingface_chat_template_toolcall_invalid_json():
|
|
|
572
708
|
def test_build_training_data(mock_task):
|
|
573
709
|
# Non repaired should use original output
|
|
574
710
|
mock_task_run = mock_task.runs()[0]
|
|
575
|
-
training_data_output = build_training_data(
|
|
711
|
+
training_data_output = build_training_data(
|
|
712
|
+
mock_task_run,
|
|
713
|
+
"system message",
|
|
714
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
715
|
+
)
|
|
576
716
|
assert training_data_output.final_output == '{"test": "output 你好"}'
|
|
577
717
|
assert training_data_output.thinking is None
|
|
578
718
|
assert training_data_output.thinking_instructions is None
|
|
@@ -587,11 +727,12 @@ def test_build_training_data_with_COT(mock_task):
|
|
|
587
727
|
mock_task_run = mock_task.runs()[0]
|
|
588
728
|
assert mock_task_run.parent_task() == mock_task
|
|
589
729
|
mock_task_run.intermediate_outputs = {"chain_of_thought": "cot output"}
|
|
730
|
+
mock_task_run.thinking_training_data.return_value = "cot output"
|
|
590
731
|
|
|
591
732
|
training_data_output = build_training_data(
|
|
592
733
|
mock_task_run,
|
|
593
734
|
"system message",
|
|
594
|
-
|
|
735
|
+
data_strategy=FinetuneDataStrategy.final_and_intermediate,
|
|
595
736
|
thinking_instructions="thinking instructions",
|
|
596
737
|
)
|
|
597
738
|
assert training_data_output.final_output == '{"test": "output 你好"}'
|
|
@@ -600,9 +741,59 @@ def test_build_training_data_with_COT(mock_task):
|
|
|
600
741
|
assert training_data_output.thinking_final_answer_prompt == COT_FINAL_ANSWER_PROMPT
|
|
601
742
|
assert training_data_output.input == '{"test": "input 你好"}'
|
|
602
743
|
assert training_data_output.system_message == "system message"
|
|
744
|
+
assert training_data_output.thinking_r1_style == False
|
|
603
745
|
assert training_data_output.supports_cot()
|
|
604
746
|
|
|
605
747
|
|
|
748
|
+
def test_model_training_data_supports_cot(mock_task):
|
|
749
|
+
training_data = ModelTrainingData(
|
|
750
|
+
input="test input",
|
|
751
|
+
system_message="system message",
|
|
752
|
+
final_output="test output",
|
|
753
|
+
thinking="thinking output",
|
|
754
|
+
thinking_instructions="thinking instructions",
|
|
755
|
+
thinking_final_answer_prompt=COT_FINAL_ANSWER_PROMPT,
|
|
756
|
+
thinking_r1_style=False,
|
|
757
|
+
)
|
|
758
|
+
assert training_data.supports_cot() == True
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def test_model_training_data_supports_cot_r1_style(mock_task):
|
|
762
|
+
training_data = ModelTrainingData(
|
|
763
|
+
input="test input",
|
|
764
|
+
system_message="system message",
|
|
765
|
+
final_output="test output",
|
|
766
|
+
thinking="thinking output",
|
|
767
|
+
thinking_instructions="thinking instructions",
|
|
768
|
+
thinking_r1_style=True,
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
with pytest.raises(ValueError, match="R1 style does not support COT"):
|
|
772
|
+
training_data.supports_cot()
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
def test_build_training_data_with_COT_r1_style(mock_task):
|
|
776
|
+
# Setup with needed fields for thinking
|
|
777
|
+
mock_task_run = mock_task.runs()[0]
|
|
778
|
+
assert mock_task_run.parent_task() == mock_task
|
|
779
|
+
mock_task_run.intermediate_outputs = {"chain_of_thought": "cot output"}
|
|
780
|
+
mock_task_run.thinking_training_data.return_value = "cot output"
|
|
781
|
+
|
|
782
|
+
training_data_output = build_training_data(
|
|
783
|
+
mock_task_run,
|
|
784
|
+
"system message",
|
|
785
|
+
data_strategy=FinetuneDataStrategy.final_and_intermediate_r1_compatible,
|
|
786
|
+
thinking_instructions=None,
|
|
787
|
+
)
|
|
788
|
+
assert training_data_output.final_output == '{"test": "output 你好"}'
|
|
789
|
+
assert training_data_output.thinking == "cot output"
|
|
790
|
+
assert training_data_output.thinking_instructions == None
|
|
791
|
+
assert training_data_output.thinking_final_answer_prompt == None
|
|
792
|
+
assert training_data_output.input == '{"test": "input 你好"}'
|
|
793
|
+
assert training_data_output.system_message == "system message"
|
|
794
|
+
assert training_data_output.thinking_r1_style == True
|
|
795
|
+
|
|
796
|
+
|
|
606
797
|
def test_build_training_data_with_thinking(mock_task):
|
|
607
798
|
# Setup with needed fields for thinking
|
|
608
799
|
mock_task_run = mock_task.runs()[0]
|
|
@@ -612,13 +803,14 @@ def test_build_training_data_with_thinking(mock_task):
|
|
|
612
803
|
"reasoning": "thinking output",
|
|
613
804
|
"chain_of_thought": "cot output",
|
|
614
805
|
}
|
|
806
|
+
mock_task_run.thinking_training_data.return_value = "thinking output"
|
|
615
807
|
mock_task.thinking_instruction = "thinking instructions"
|
|
616
808
|
assert mock_task.thinking_instruction == "thinking instructions"
|
|
617
809
|
|
|
618
810
|
training_data_output = build_training_data(
|
|
619
811
|
mock_task_run,
|
|
620
812
|
"system message",
|
|
621
|
-
|
|
813
|
+
FinetuneDataStrategy.final_and_intermediate,
|
|
622
814
|
thinking_instructions="thinking instructions",
|
|
623
815
|
)
|
|
624
816
|
assert training_data_output.final_output == '{"test": "output 你好"}'
|
|
@@ -627,7 +819,36 @@ def test_build_training_data_with_thinking(mock_task):
|
|
|
627
819
|
assert training_data_output.thinking_final_answer_prompt == COT_FINAL_ANSWER_PROMPT
|
|
628
820
|
assert training_data_output.input == '{"test": "input 你好"}'
|
|
629
821
|
assert training_data_output.system_message == "system message"
|
|
630
|
-
assert training_data_output.
|
|
822
|
+
assert training_data_output.thinking_r1_style == False
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
def test_build_training_data_with_thinking_r1_style(mock_task):
|
|
826
|
+
# Setup with needed fields for thinking
|
|
827
|
+
mock_task_run = mock_task.runs()[0]
|
|
828
|
+
assert mock_task_run.parent_task() == mock_task
|
|
829
|
+
# It should just use the reasoning output if both thinking and chain_of_thought are present
|
|
830
|
+
mock_task_run.intermediate_outputs = {
|
|
831
|
+
"reasoning": "thinking output",
|
|
832
|
+
"chain_of_thought": "cot output",
|
|
833
|
+
}
|
|
834
|
+
mock_task_run.thinking_training_data.return_value = "thinking output"
|
|
835
|
+
mock_task.thinking_instruction = "thinking instructions"
|
|
836
|
+
|
|
837
|
+
assert mock_task.thinking_instruction == "thinking instructions"
|
|
838
|
+
|
|
839
|
+
training_data_output = build_training_data(
|
|
840
|
+
mock_task_run,
|
|
841
|
+
"system message",
|
|
842
|
+
FinetuneDataStrategy.final_and_intermediate_r1_compatible,
|
|
843
|
+
thinking_instructions=None,
|
|
844
|
+
)
|
|
845
|
+
assert training_data_output.final_output == '{"test": "output 你好"}'
|
|
846
|
+
assert training_data_output.thinking == "thinking output"
|
|
847
|
+
assert training_data_output.thinking_instructions == None
|
|
848
|
+
assert training_data_output.thinking_final_answer_prompt == None
|
|
849
|
+
assert training_data_output.input == '{"test": "input 你好"}'
|
|
850
|
+
assert training_data_output.system_message == "system message"
|
|
851
|
+
assert training_data_output.thinking_r1_style == True
|
|
631
852
|
|
|
632
853
|
|
|
633
854
|
def test_build_training_data_with_repaired_output(mock_task):
|
|
@@ -642,7 +863,11 @@ def test_build_training_data_with_repaired_output(mock_task):
|
|
|
642
863
|
),
|
|
643
864
|
)
|
|
644
865
|
|
|
645
|
-
training_data_output = build_training_data(
|
|
866
|
+
training_data_output = build_training_data(
|
|
867
|
+
mock_task_run,
|
|
868
|
+
"system message",
|
|
869
|
+
data_strategy=FinetuneDataStrategy.final_only,
|
|
870
|
+
)
|
|
646
871
|
assert training_data_output.final_output == '{"test": "repaired output"}'
|
|
647
872
|
assert training_data_output.thinking is None
|
|
648
873
|
assert training_data_output.thinking_instructions is None
|
|
@@ -683,3 +908,35 @@ def test_dataset_formatter_dump_to_file_json_schema_format(mock_dataset, tmp_pat
|
|
|
683
908
|
assert assistant_msg["content"] == '{"test": "output 你好"}'
|
|
684
909
|
json_content = json.loads(assistant_msg["content"])
|
|
685
910
|
assert json_content == {"test": "output 你好"}
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
@pytest.mark.parametrize(
|
|
914
|
+
"thinking,final_output,expected_output",
|
|
915
|
+
[
|
|
916
|
+
("thinking", "final output", "<think>\nthinking\n</think>\n\nfinal output"),
|
|
917
|
+
("thinking", '{"name":"joe"}', '<think>\nthinking\n</think>\n\n{"name":"joe"}'),
|
|
918
|
+
],
|
|
919
|
+
)
|
|
920
|
+
def test_serialize_r1_style_message(thinking, final_output, expected_output):
|
|
921
|
+
assert (
|
|
922
|
+
serialize_r1_style_message(thinking=thinking, final_output=final_output)
|
|
923
|
+
== expected_output
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
@pytest.mark.parametrize(
|
|
928
|
+
"thinking,final_output",
|
|
929
|
+
[
|
|
930
|
+
(None, "final output"),
|
|
931
|
+
("", "final output"),
|
|
932
|
+
(" ", "final output"),
|
|
933
|
+
],
|
|
934
|
+
)
|
|
935
|
+
def test_serialize_r1_style_message_missing_thinking(thinking, final_output):
|
|
936
|
+
with pytest.raises(
|
|
937
|
+
ValueError,
|
|
938
|
+
match=re.escape(
|
|
939
|
+
"Thinking data is required when fine-tuning thinking models (R1, QwQ, etc). Please ensure your fine-tuning dataset contains reasoning or chain of thought output for every entry."
|
|
940
|
+
),
|
|
941
|
+
):
|
|
942
|
+
serialize_r1_style_message(thinking=thinking, final_output=final_output)
|