kiln-ai 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +2 -0
- kiln_ai/adapters/adapter_registry.py +22 -44
- kiln_ai/adapters/chat/__init__.py +8 -0
- kiln_ai/adapters/chat/chat_formatter.py +234 -0
- kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
- kiln_ai/adapters/data_gen/test_data_gen_task.py +19 -6
- kiln_ai/adapters/eval/base_eval.py +8 -6
- kiln_ai/adapters/eval/eval_runner.py +4 -1
- kiln_ai/adapters/eval/g_eval.py +23 -5
- kiln_ai/adapters/eval/test_base_eval.py +166 -15
- kiln_ai/adapters/eval/test_eval_runner.py +3 -0
- kiln_ai/adapters/eval/test_g_eval.py +1 -0
- kiln_ai/adapters/fine_tune/base_finetune.py +2 -2
- kiln_ai/adapters/fine_tune/dataset_formatter.py +138 -272
- kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
- kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +4 -4
- kiln_ai/adapters/fine_tune/together_finetune.py +12 -1
- kiln_ai/adapters/ml_model_list.py +80 -43
- kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
- kiln_ai/adapters/model_adapters/litellm_adapter.py +79 -97
- kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
- kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -60
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +56 -21
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
- kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
- kiln_ai/adapters/prompt_builders.py +0 -16
- kiln_ai/adapters/provider_tools.py +27 -9
- kiln_ai/adapters/repair/test_repair_task.py +24 -3
- kiln_ai/adapters/test_adapter_registry.py +88 -28
- kiln_ai/adapters/test_ml_model_list.py +158 -0
- kiln_ai/adapters/test_prompt_adaptors.py +17 -3
- kiln_ai/adapters/test_prompt_builders.py +3 -16
- kiln_ai/adapters/test_provider_tools.py +69 -20
- kiln_ai/datamodel/__init__.py +0 -2
- kiln_ai/datamodel/datamodel_enums.py +38 -13
- kiln_ai/datamodel/finetune.py +12 -7
- kiln_ai/datamodel/task.py +68 -7
- kiln_ai/datamodel/test_basemodel.py +2 -1
- kiln_ai/datamodel/test_dataset_split.py +0 -8
- kiln_ai/datamodel/test_models.py +33 -10
- kiln_ai/datamodel/test_task.py +168 -2
- kiln_ai/utils/config.py +3 -2
- kiln_ai/utils/dataset_import.py +1 -1
- kiln_ai/utils/logging.py +165 -0
- kiln_ai/utils/test_config.py +23 -0
- kiln_ai/utils/test_dataset_import.py +30 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/METADATA +1 -1
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/RECORD +54 -49
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.16.0.dist-info → kiln_ai-0.17.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -18,7 +18,7 @@ from kiln_ai.adapters.provider_tools import (
|
|
|
18
18
|
finetune_provider_model,
|
|
19
19
|
get_model_and_provider,
|
|
20
20
|
kiln_model_provider_from,
|
|
21
|
-
|
|
21
|
+
lite_llm_config_for_openai_compatible,
|
|
22
22
|
lite_llm_provider_model,
|
|
23
23
|
parse_custom_model_id,
|
|
24
24
|
provider_enabled,
|
|
@@ -27,10 +27,11 @@ from kiln_ai.adapters.provider_tools import (
|
|
|
27
27
|
)
|
|
28
28
|
from kiln_ai.datamodel import (
|
|
29
29
|
Finetune,
|
|
30
|
-
FinetuneDataStrategy,
|
|
31
30
|
StructuredOutputMode,
|
|
32
31
|
Task,
|
|
33
32
|
)
|
|
33
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
34
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
@pytest.fixture(autouse=True)
|
|
@@ -71,7 +72,7 @@ def mock_finetune():
|
|
|
71
72
|
finetune.provider = ModelProviderName.openai
|
|
72
73
|
finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
|
|
73
74
|
finetune.structured_output_mode = StructuredOutputMode.json_schema
|
|
74
|
-
finetune.data_strategy =
|
|
75
|
+
finetune.data_strategy = ChatStrategy.single_turn
|
|
75
76
|
mock.return_value = finetune
|
|
76
77
|
yield mock
|
|
77
78
|
|
|
@@ -83,7 +84,7 @@ def mock_finetune_final_and_intermediate():
|
|
|
83
84
|
finetune.provider = ModelProviderName.openai
|
|
84
85
|
finetune.fine_tune_model_id = "ft:gpt-3.5-turbo:custom:model-123"
|
|
85
86
|
finetune.structured_output_mode = StructuredOutputMode.json_schema
|
|
86
|
-
finetune.data_strategy =
|
|
87
|
+
finetune.data_strategy = ChatStrategy.two_message_cot
|
|
87
88
|
mock.return_value = finetune
|
|
88
89
|
yield mock
|
|
89
90
|
|
|
@@ -95,9 +96,7 @@ def mock_finetune_r1_compatible():
|
|
|
95
96
|
finetune.provider = ModelProviderName.ollama
|
|
96
97
|
finetune.fine_tune_model_id = "ft:deepseek-r1:671b:custom:model-123"
|
|
97
98
|
finetune.structured_output_mode = StructuredOutputMode.json_schema
|
|
98
|
-
finetune.data_strategy =
|
|
99
|
-
FinetuneDataStrategy.final_and_intermediate_r1_compatible
|
|
100
|
-
)
|
|
99
|
+
finetune.data_strategy = ChatStrategy.single_turn_r1_thinking
|
|
101
100
|
mock.return_value = finetune
|
|
102
101
|
yield mock
|
|
103
102
|
|
|
@@ -357,6 +356,7 @@ async def test_kiln_model_provider_from_custom_model_valid(mock_config):
|
|
|
357
356
|
assert provider.supports_data_gen is False
|
|
358
357
|
assert provider.untested_model is True
|
|
359
358
|
assert provider.model_id == "custom_model"
|
|
359
|
+
assert provider.structured_output_mode == StructuredOutputMode.json_instructions
|
|
360
360
|
|
|
361
361
|
|
|
362
362
|
@pytest.mark.asyncio
|
|
@@ -374,6 +374,7 @@ async def test_kiln_model_provider_from_custom_registry(mock_config):
|
|
|
374
374
|
assert provider.supports_data_gen is False
|
|
375
375
|
assert provider.untested_model is True
|
|
376
376
|
assert provider.model_id == "gpt-4-turbo"
|
|
377
|
+
assert provider.structured_output_mode == StructuredOutputMode.json_instructions
|
|
377
378
|
|
|
378
379
|
|
|
379
380
|
@pytest.mark.asyncio
|
|
@@ -474,7 +475,7 @@ def test_finetune_provider_model_success_final_and_intermediate(
|
|
|
474
475
|
assert provider.name == ModelProviderName.openai
|
|
475
476
|
assert provider.model_id == "ft:gpt-3.5-turbo:custom:model-123"
|
|
476
477
|
assert provider.structured_output_mode == StructuredOutputMode.json_schema
|
|
477
|
-
assert provider.reasoning_capable is
|
|
478
|
+
assert provider.reasoning_capable is False
|
|
478
479
|
assert provider.parser == None
|
|
479
480
|
|
|
480
481
|
|
|
@@ -580,7 +581,7 @@ def test_finetune_provider_model_structured_mode(
|
|
|
580
581
|
finetune.provider = provider_name
|
|
581
582
|
finetune.fine_tune_model_id = "fireworks-model-123"
|
|
582
583
|
finetune.structured_output_mode = structured_output_mode
|
|
583
|
-
finetune.data_strategy =
|
|
584
|
+
finetune.data_strategy = ChatStrategy.single_turn
|
|
584
585
|
mock_finetune.return_value = finetune
|
|
585
586
|
|
|
586
587
|
provider = finetune_provider_model("project-123::task-456::finetune-789")
|
|
@@ -596,10 +597,20 @@ def test_openai_compatible_provider_config(mock_shared_config):
|
|
|
596
597
|
"""Test successful creation of an OpenAI compatible provider"""
|
|
597
598
|
model_id = "test_provider::gpt-4"
|
|
598
599
|
|
|
599
|
-
config =
|
|
600
|
+
config = lite_llm_config_for_openai_compatible(
|
|
601
|
+
RunConfigProperties(
|
|
602
|
+
model_name=model_id,
|
|
603
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
604
|
+
prompt_id="simple_prompt_builder",
|
|
605
|
+
structured_output_mode="json_schema",
|
|
606
|
+
)
|
|
607
|
+
)
|
|
600
608
|
|
|
601
|
-
assert
|
|
602
|
-
|
|
609
|
+
assert (
|
|
610
|
+
config.run_config_properties.model_provider_name
|
|
611
|
+
== ModelProviderName.openai_compatible
|
|
612
|
+
)
|
|
613
|
+
assert config.run_config_properties.model_name == "gpt-4"
|
|
603
614
|
assert config.additional_body_options == {"api_key": "test-key"}
|
|
604
615
|
assert config.base_url == "https://api.test.com"
|
|
605
616
|
|
|
@@ -621,10 +632,20 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
|
|
|
621
632
|
"""Test provider creation without API key (should work as some providers don't require it, but should pass NA to LiteLLM as it requires one)"""
|
|
622
633
|
model_id = "no_key_provider::gpt-4"
|
|
623
634
|
|
|
624
|
-
config =
|
|
635
|
+
config = lite_llm_config_for_openai_compatible(
|
|
636
|
+
RunConfigProperties(
|
|
637
|
+
model_name=model_id,
|
|
638
|
+
model_provider_name=ModelProviderName.openai,
|
|
639
|
+
prompt_id="simple_prompt_builder",
|
|
640
|
+
structured_output_mode="json_schema",
|
|
641
|
+
)
|
|
642
|
+
)
|
|
625
643
|
|
|
626
|
-
assert
|
|
627
|
-
|
|
644
|
+
assert (
|
|
645
|
+
config.run_config_properties.model_provider_name
|
|
646
|
+
== ModelProviderName.openai_compatible
|
|
647
|
+
)
|
|
648
|
+
assert config.run_config_properties.model_name == "gpt-4"
|
|
628
649
|
assert config.additional_body_options == {"api_key": "NA"}
|
|
629
650
|
assert config.base_url == "https://api.nokey.com"
|
|
630
651
|
|
|
@@ -632,7 +653,14 @@ def test_lite_llm_config_no_api_key(mock_shared_config):
|
|
|
632
653
|
def test_lite_llm_config_invalid_id():
|
|
633
654
|
"""Test handling of invalid model ID format"""
|
|
634
655
|
with pytest.raises(ValueError) as exc_info:
|
|
635
|
-
|
|
656
|
+
lite_llm_config_for_openai_compatible(
|
|
657
|
+
RunConfigProperties(
|
|
658
|
+
model_name="invalid-id-format",
|
|
659
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
660
|
+
prompt_id="simple_prompt_builder",
|
|
661
|
+
structured_output_mode="json_schema",
|
|
662
|
+
)
|
|
663
|
+
)
|
|
636
664
|
assert (
|
|
637
665
|
str(exc_info.value) == "Invalid openai compatible model ID: invalid-id-format"
|
|
638
666
|
)
|
|
@@ -643,14 +671,28 @@ def test_lite_llm_config_no_providers(mock_shared_config):
|
|
|
643
671
|
mock_shared_config.return_value.openai_compatible_providers = None
|
|
644
672
|
|
|
645
673
|
with pytest.raises(ValueError) as exc_info:
|
|
646
|
-
|
|
674
|
+
lite_llm_config_for_openai_compatible(
|
|
675
|
+
RunConfigProperties(
|
|
676
|
+
model_name="test_provider::gpt-4",
|
|
677
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
678
|
+
prompt_id="simple_prompt_builder",
|
|
679
|
+
structured_output_mode="json_schema",
|
|
680
|
+
)
|
|
681
|
+
)
|
|
647
682
|
assert str(exc_info.value) == "OpenAI compatible provider test_provider not found"
|
|
648
683
|
|
|
649
684
|
|
|
650
685
|
def test_lite_llm_config_provider_not_found(mock_shared_config):
|
|
651
686
|
"""Test handling of non-existent provider"""
|
|
652
687
|
with pytest.raises(ValueError) as exc_info:
|
|
653
|
-
|
|
688
|
+
lite_llm_config_for_openai_compatible(
|
|
689
|
+
RunConfigProperties(
|
|
690
|
+
model_name="unknown_provider::gpt-4",
|
|
691
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
692
|
+
prompt_id="simple_prompt_builder",
|
|
693
|
+
structured_output_mode="json_schema",
|
|
694
|
+
)
|
|
695
|
+
)
|
|
654
696
|
assert (
|
|
655
697
|
str(exc_info.value) == "OpenAI compatible provider unknown_provider not found"
|
|
656
698
|
)
|
|
@@ -666,7 +708,14 @@ def test_lite_llm_config_no_base_url(mock_shared_config):
|
|
|
666
708
|
]
|
|
667
709
|
|
|
668
710
|
with pytest.raises(ValueError) as exc_info:
|
|
669
|
-
|
|
711
|
+
lite_llm_config_for_openai_compatible(
|
|
712
|
+
RunConfigProperties(
|
|
713
|
+
model_name="test_provider::gpt-4",
|
|
714
|
+
model_provider_name=ModelProviderName.openai_compatible,
|
|
715
|
+
prompt_id="simple_prompt_builder",
|
|
716
|
+
structured_output_mode="json_schema",
|
|
717
|
+
)
|
|
718
|
+
)
|
|
670
719
|
assert (
|
|
671
720
|
str(exc_info.value)
|
|
672
721
|
== "OpenAI compatible provider test_provider has no base URL"
|
|
@@ -867,7 +916,7 @@ def test_finetune_provider_model_vertex_ai(mock_project, mock_task, mock_finetun
|
|
|
867
916
|
finetune.provider = ModelProviderName.vertex
|
|
868
917
|
finetune.fine_tune_model_id = "projects/123/locations/us-central1/endpoints/456"
|
|
869
918
|
finetune.structured_output_mode = StructuredOutputMode.json_mode
|
|
870
|
-
finetune.data_strategy =
|
|
919
|
+
finetune.data_strategy = ChatStrategy.single_turn
|
|
871
920
|
mock_finetune.return_value = finetune
|
|
872
921
|
|
|
873
922
|
provider = finetune_provider_model("project-123::task-456::finetune-789")
|
kiln_ai/datamodel/__init__.py
CHANGED
|
@@ -13,7 +13,6 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
from kiln_ai.datamodel import dataset_split, eval, strict_mode
|
|
15
15
|
from kiln_ai.datamodel.datamodel_enums import (
|
|
16
|
-
FinetuneDataStrategy,
|
|
17
16
|
FineTuneStatusType,
|
|
18
17
|
Priority,
|
|
19
18
|
StructuredOutputMode,
|
|
@@ -71,7 +70,6 @@ __all__ = [
|
|
|
71
70
|
"Prompt",
|
|
72
71
|
"TaskOutputRating",
|
|
73
72
|
"StructuredOutputMode",
|
|
74
|
-
"FinetuneDataStrategy",
|
|
75
73
|
"PromptId",
|
|
76
74
|
"PromptGenerators",
|
|
77
75
|
"prompt_generator_values",
|
|
@@ -24,13 +24,14 @@ class StructuredOutputMode(str, Enum):
|
|
|
24
24
|
"""
|
|
25
25
|
Enumeration of supported structured output modes.
|
|
26
26
|
|
|
27
|
-
- default: let the adapter decide
|
|
28
27
|
- json_schema: request json using API capabilities for json_schema
|
|
29
28
|
- function_calling: request json using API capabilities for function calling
|
|
30
29
|
- json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
|
|
31
30
|
- json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
|
|
32
31
|
- json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
|
|
33
32
|
- json_custom_instructions: The model should output JSON, but custom instructions are already included in the system prompt. Don't append additional JSON instructions.
|
|
33
|
+
- default: let the adapter decide (legacy, do not use for new use cases)
|
|
34
|
+
- unknown: used for cases where the structured output mode is not known (on old models where it wasn't saved). Should lookup best option at runtime.
|
|
34
35
|
"""
|
|
35
36
|
|
|
36
37
|
default = "default"
|
|
@@ -41,6 +42,7 @@ class StructuredOutputMode(str, Enum):
|
|
|
41
42
|
json_instructions = "json_instructions"
|
|
42
43
|
json_instruction_and_object = "json_instruction_and_object"
|
|
43
44
|
json_custom_instructions = "json_custom_instructions"
|
|
45
|
+
unknown = "unknown"
|
|
44
46
|
|
|
45
47
|
|
|
46
48
|
class FineTuneStatusType(str, Enum):
|
|
@@ -55,20 +57,43 @@ class FineTuneStatusType(str, Enum):
|
|
|
55
57
|
failed = "failed"
|
|
56
58
|
|
|
57
59
|
|
|
58
|
-
class
|
|
59
|
-
"""Strategy for
|
|
60
|
+
class ChatStrategy(str, Enum):
|
|
61
|
+
"""Strategy for how a chat is structured."""
|
|
60
62
|
|
|
61
|
-
#
|
|
62
|
-
|
|
63
|
+
# Single turn, immediately return the answer
|
|
64
|
+
single_turn = "final_only"
|
|
65
|
+
# Two turn, first turn is the thinking, second turn is the answer. Legacy format - used for old fine tunes but not new trains.
|
|
66
|
+
two_message_cot_legacy = "final_and_intermediate"
|
|
67
|
+
# Two turn, first turn is the thinking, second turn is the answer. New format - used for new trains.
|
|
68
|
+
two_message_cot = "two_message_cot"
|
|
69
|
+
# Single turn, with both the thinking and the answer in the same message, using R1-style thinking format in <think> tags
|
|
70
|
+
single_turn_r1_thinking = "final_and_intermediate_r1_compatible"
|
|
63
71
|
|
|
64
|
-
# Train on both the final response and any intermediate steps/chain of thought
|
|
65
|
-
final_and_intermediate = "final_and_intermediate"
|
|
66
72
|
|
|
67
|
-
|
|
68
|
-
|
|
73
|
+
THINKING_DATA_STRATEGIES: list[ChatStrategy] = [
|
|
74
|
+
ChatStrategy.two_message_cot_legacy,
|
|
75
|
+
ChatStrategy.single_turn_r1_thinking,
|
|
76
|
+
ChatStrategy.two_message_cot,
|
|
77
|
+
]
|
|
69
78
|
|
|
70
79
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
80
|
+
class ModelProviderName(str, Enum):
|
|
81
|
+
"""
|
|
82
|
+
Enumeration of supported AI model providers.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
openai = "openai"
|
|
86
|
+
groq = "groq"
|
|
87
|
+
amazon_bedrock = "amazon_bedrock"
|
|
88
|
+
ollama = "ollama"
|
|
89
|
+
openrouter = "openrouter"
|
|
90
|
+
fireworks_ai = "fireworks_ai"
|
|
91
|
+
kiln_fine_tune = "kiln_fine_tune"
|
|
92
|
+
kiln_custom_registry = "kiln_custom_registry"
|
|
93
|
+
openai_compatible = "openai_compatible"
|
|
94
|
+
anthropic = "anthropic"
|
|
95
|
+
gemini_api = "gemini_api"
|
|
96
|
+
azure_openai = "azure_openai"
|
|
97
|
+
huggingface = "huggingface"
|
|
98
|
+
vertex = "vertex"
|
|
99
|
+
together_ai = "together_ai"
|
kiln_ai/datamodel/finetune.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing_extensions import Self
|
|
|
6
6
|
from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
|
|
7
7
|
from kiln_ai.datamodel.datamodel_enums import (
|
|
8
8
|
THINKING_DATA_STRATEGIES,
|
|
9
|
-
|
|
9
|
+
ChatStrategy,
|
|
10
10
|
FineTuneStatusType,
|
|
11
11
|
StructuredOutputMode,
|
|
12
12
|
)
|
|
@@ -14,6 +14,11 @@ from kiln_ai.datamodel.datamodel_enums import (
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from kiln_ai.datamodel.task import Task
|
|
16
16
|
|
|
17
|
+
DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS = [
|
|
18
|
+
ChatStrategy.two_message_cot_legacy,
|
|
19
|
+
ChatStrategy.two_message_cot,
|
|
20
|
+
]
|
|
21
|
+
|
|
17
22
|
|
|
18
23
|
class Finetune(KilnParentedModel):
|
|
19
24
|
"""
|
|
@@ -76,8 +81,8 @@ class Finetune(KilnParentedModel):
|
|
|
76
81
|
default={},
|
|
77
82
|
description="Properties of the fine-tune. Different providers may use different properties.",
|
|
78
83
|
)
|
|
79
|
-
data_strategy:
|
|
80
|
-
default=
|
|
84
|
+
data_strategy: ChatStrategy = Field(
|
|
85
|
+
default=ChatStrategy.single_turn,
|
|
81
86
|
description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).",
|
|
82
87
|
)
|
|
83
88
|
|
|
@@ -91,16 +96,16 @@ class Finetune(KilnParentedModel):
|
|
|
91
96
|
def validate_thinking_instructions(self) -> Self:
|
|
92
97
|
if (
|
|
93
98
|
self.thinking_instructions is not None
|
|
94
|
-
and self.data_strategy
|
|
99
|
+
and self.data_strategy not in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
|
|
95
100
|
):
|
|
96
101
|
raise ValueError(
|
|
97
|
-
"Thinking instructions can only be used when data_strategy is
|
|
102
|
+
f"Thinking instructions can only be used when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
|
|
98
103
|
)
|
|
99
104
|
if (
|
|
100
105
|
self.thinking_instructions is None
|
|
101
|
-
and self.data_strategy
|
|
106
|
+
and self.data_strategy in DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS
|
|
102
107
|
):
|
|
103
108
|
raise ValueError(
|
|
104
|
-
"Thinking instructions are required when data_strategy is
|
|
109
|
+
f"Thinking instructions are required when data_strategy is one of the following: {DATA_STRATIGIES_REQUIRED_THINKING_INSTRUCTIONS}"
|
|
105
110
|
)
|
|
106
111
|
return self
|
kiln_ai/datamodel/task.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Dict, List, Union
|
|
2
2
|
|
|
3
|
-
from pydantic import BaseModel, Field
|
|
3
|
+
from pydantic import BaseModel, Field, ValidationInfo, model_validator
|
|
4
|
+
from typing_extensions import Self
|
|
4
5
|
|
|
5
6
|
from kiln_ai.datamodel import Finetune
|
|
6
7
|
from kiln_ai.datamodel.basemodel import (
|
|
@@ -11,7 +12,12 @@ from kiln_ai.datamodel.basemodel import (
|
|
|
11
12
|
KilnParentedModel,
|
|
12
13
|
KilnParentModel,
|
|
13
14
|
)
|
|
14
|
-
from kiln_ai.datamodel.datamodel_enums import
|
|
15
|
+
from kiln_ai.datamodel.datamodel_enums import (
|
|
16
|
+
ModelProviderName,
|
|
17
|
+
Priority,
|
|
18
|
+
StructuredOutputMode,
|
|
19
|
+
TaskOutputRatingType,
|
|
20
|
+
)
|
|
15
21
|
from kiln_ai.datamodel.dataset_split import DatasetSplit
|
|
16
22
|
from kiln_ai.datamodel.eval import Eval
|
|
17
23
|
from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
|
|
@@ -47,12 +53,33 @@ class RunConfigProperties(BaseModel):
|
|
|
47
53
|
"""
|
|
48
54
|
|
|
49
55
|
model_name: str = Field(description="The model to use for this run config.")
|
|
50
|
-
model_provider_name:
|
|
56
|
+
model_provider_name: ModelProviderName = Field(
|
|
51
57
|
description="The provider to use for this run config."
|
|
52
58
|
)
|
|
53
59
|
prompt_id: PromptId = Field(
|
|
54
60
|
description="The prompt to use for this run config. Defaults to building a simple prompt from the task if not provided.",
|
|
55
61
|
)
|
|
62
|
+
top_p: float = Field(
|
|
63
|
+
default=1.0,
|
|
64
|
+
description="The top-p value to use for this run config. Defaults to 1.0.",
|
|
65
|
+
)
|
|
66
|
+
temperature: float = Field(
|
|
67
|
+
default=1.0,
|
|
68
|
+
description="The temperature to use for this run config. Defaults to 1.0.",
|
|
69
|
+
)
|
|
70
|
+
structured_output_mode: StructuredOutputMode = Field(
|
|
71
|
+
description="The structured output mode to use for this run config.",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
@model_validator(mode="after")
|
|
75
|
+
def validate_required_fields(self) -> Self:
|
|
76
|
+
if not (0 <= self.top_p <= 1):
|
|
77
|
+
raise ValueError("top_p must be between 0 and 1")
|
|
78
|
+
|
|
79
|
+
elif self.temperature < 0 or self.temperature > 2:
|
|
80
|
+
raise ValueError("temperature must be between 0 and 2")
|
|
81
|
+
|
|
82
|
+
return self
|
|
56
83
|
|
|
57
84
|
|
|
58
85
|
class RunConfig(RunConfigProperties):
|
|
@@ -101,12 +128,46 @@ class TaskRunConfig(KilnParentedModel):
|
|
|
101
128
|
parent_task = self.parent_task()
|
|
102
129
|
if parent_task is None:
|
|
103
130
|
raise ValueError("Run config must be parented to a task")
|
|
104
|
-
return
|
|
131
|
+
return run_config_from_run_config_properties(
|
|
105
132
|
task=parent_task,
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
133
|
+
run_config_properties=self.run_config_properties,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Previously we didn't store structured_output_mode in the run_config_properties. Updgrade old models when loading from file.
|
|
137
|
+
@model_validator(mode="before")
|
|
138
|
+
def upgrade_old_entries(cls, data: dict, info: ValidationInfo) -> dict:
|
|
139
|
+
if not info.context or not info.context.get("loading_from_file", False):
|
|
140
|
+
# Not loading from file, so no need to upgrade
|
|
141
|
+
return data
|
|
142
|
+
|
|
143
|
+
if not isinstance(data, dict):
|
|
144
|
+
return data
|
|
145
|
+
|
|
146
|
+
structured_output_mode = data.get("run_config_properties", {}).get(
|
|
147
|
+
"structured_output_mode", None
|
|
109
148
|
)
|
|
149
|
+
if structured_output_mode is None and "run_config_properties" in data:
|
|
150
|
+
# Default to unknown. Adapter will have to guess at runtime.
|
|
151
|
+
data["run_config_properties"]["structured_output_mode"] = (
|
|
152
|
+
StructuredOutputMode.unknown
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return data
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def run_config_from_run_config_properties(
|
|
159
|
+
task: "Task",
|
|
160
|
+
run_config_properties: RunConfigProperties,
|
|
161
|
+
) -> RunConfig:
|
|
162
|
+
return RunConfig(
|
|
163
|
+
task=task,
|
|
164
|
+
model_name=run_config_properties.model_name,
|
|
165
|
+
model_provider_name=run_config_properties.model_provider_name,
|
|
166
|
+
prompt_id=run_config_properties.prompt_id,
|
|
167
|
+
top_p=run_config_properties.top_p,
|
|
168
|
+
temperature=run_config_properties.temperature,
|
|
169
|
+
structured_output_mode=run_config_properties.structured_output_mode,
|
|
170
|
+
)
|
|
110
171
|
|
|
111
172
|
|
|
112
173
|
class Task(
|
|
@@ -500,8 +500,9 @@ def adapter(base_task):
|
|
|
500
500
|
run_config=RunConfig(
|
|
501
501
|
task=base_task,
|
|
502
502
|
model_name="test_model",
|
|
503
|
-
model_provider_name="
|
|
503
|
+
model_provider_name="openai",
|
|
504
504
|
prompt_id="simple_prompt_builder",
|
|
505
|
+
structured_output_mode="json_schema",
|
|
505
506
|
),
|
|
506
507
|
)
|
|
507
508
|
|
|
@@ -72,14 +72,6 @@ def sample_task_runs(sample_task):
|
|
|
72
72
|
return task_runs
|
|
73
73
|
|
|
74
74
|
|
|
75
|
-
@pytest.fixture
|
|
76
|
-
def standard_splitstandard_splitss():
|
|
77
|
-
return [
|
|
78
|
-
DatasetSplitDefinition(name="train", percentage=0.8),
|
|
79
|
-
DatasetSplitDefinition(name="test", percentage=0.2),
|
|
80
|
-
]
|
|
81
|
-
|
|
82
|
-
|
|
83
75
|
@pytest.fixture
|
|
84
76
|
def task_run():
|
|
85
77
|
return TaskRun(
|
kiln_ai/datamodel/test_models.py
CHANGED
|
@@ -9,13 +9,13 @@ from kiln_ai.datamodel import (
|
|
|
9
9
|
DataSource,
|
|
10
10
|
DataSourceType,
|
|
11
11
|
Finetune,
|
|
12
|
-
FinetuneDataStrategy,
|
|
13
12
|
Project,
|
|
14
13
|
Prompt,
|
|
15
14
|
Task,
|
|
16
15
|
TaskOutput,
|
|
17
16
|
TaskRun,
|
|
18
17
|
)
|
|
18
|
+
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
19
19
|
from kiln_ai.datamodel.test_json_schema import json_joke_schema
|
|
20
20
|
|
|
21
21
|
|
|
@@ -536,44 +536,58 @@ def test_prompt_parent_task():
|
|
|
536
536
|
# Test 1: Valid case - no thinking instructions with final_only
|
|
537
537
|
(
|
|
538
538
|
None,
|
|
539
|
-
|
|
539
|
+
ChatStrategy.single_turn,
|
|
540
540
|
False,
|
|
541
541
|
None,
|
|
542
542
|
),
|
|
543
543
|
# Test 2: Valid case - thinking instructions with final_and_intermediate
|
|
544
544
|
(
|
|
545
545
|
"Think step by step",
|
|
546
|
-
|
|
546
|
+
ChatStrategy.two_message_cot_legacy,
|
|
547
547
|
False,
|
|
548
548
|
None,
|
|
549
549
|
),
|
|
550
550
|
# Test 3: Valid case - no thinking instructions with final_and_intermediate_r1_compatible
|
|
551
551
|
(
|
|
552
552
|
None,
|
|
553
|
-
|
|
553
|
+
ChatStrategy.single_turn_r1_thinking,
|
|
554
554
|
False,
|
|
555
555
|
None,
|
|
556
556
|
),
|
|
557
557
|
# Test 4: Invalid case - thinking instructions with final_only
|
|
558
558
|
(
|
|
559
559
|
"Think step by step",
|
|
560
|
-
|
|
560
|
+
ChatStrategy.single_turn,
|
|
561
561
|
True,
|
|
562
|
-
"Thinking instructions can only be used when data_strategy is
|
|
562
|
+
"Thinking instructions can only be used when data_strategy is",
|
|
563
563
|
),
|
|
564
564
|
# Test 5: Invalid case - no thinking instructions with final_and_intermediate
|
|
565
565
|
(
|
|
566
566
|
None,
|
|
567
|
-
|
|
567
|
+
ChatStrategy.two_message_cot_legacy,
|
|
568
568
|
True,
|
|
569
|
-
"Thinking instructions are required when data_strategy is
|
|
569
|
+
"Thinking instructions are required when data_strategy is",
|
|
570
570
|
),
|
|
571
571
|
# Test 6: Invalid case - thinking instructions with final_and_intermediate_r1_compatible
|
|
572
572
|
(
|
|
573
573
|
"Think step by step",
|
|
574
|
-
|
|
574
|
+
ChatStrategy.single_turn_r1_thinking,
|
|
575
575
|
True,
|
|
576
|
-
"Thinking instructions can only be used when data_strategy is
|
|
576
|
+
"Thinking instructions can only be used when data_strategy is",
|
|
577
|
+
),
|
|
578
|
+
# Test 7: new COT format
|
|
579
|
+
(
|
|
580
|
+
"Think step by step",
|
|
581
|
+
ChatStrategy.two_message_cot,
|
|
582
|
+
False,
|
|
583
|
+
None,
|
|
584
|
+
),
|
|
585
|
+
# Test 8: new COT format
|
|
586
|
+
(
|
|
587
|
+
None,
|
|
588
|
+
ChatStrategy.two_message_cot,
|
|
589
|
+
True,
|
|
590
|
+
"Thinking instructions are required when data_strategy is",
|
|
577
591
|
),
|
|
578
592
|
],
|
|
579
593
|
)
|
|
@@ -665,3 +679,12 @@ def test_task_run_thinking_training_data(intermediate_outputs, expected):
|
|
|
665
679
|
intermediate_outputs=intermediate_outputs,
|
|
666
680
|
)
|
|
667
681
|
assert task_run.thinking_training_data() == expected
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def test_chat_strategy_enum():
|
|
685
|
+
# This has to align to the old FinetuneDataStrategy enum
|
|
686
|
+
assert ChatStrategy.single_turn == "final_only"
|
|
687
|
+
assert ChatStrategy.two_message_cot_legacy == "final_and_intermediate"
|
|
688
|
+
assert (
|
|
689
|
+
ChatStrategy.single_turn_r1_thinking == "final_and_intermediate_r1_compatible"
|
|
690
|
+
)
|