kiln-ai 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +4 -0
- kiln_ai/adapters/adapter_registry.py +163 -39
- kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
- kiln_ai/adapters/eval/__init__.py +28 -0
- kiln_ai/adapters/eval/base_eval.py +164 -0
- kiln_ai/adapters/eval/eval_runner.py +270 -0
- kiln_ai/adapters/eval/g_eval.py +368 -0
- kiln_ai/adapters/eval/registry.py +16 -0
- kiln_ai/adapters/eval/test_base_eval.py +325 -0
- kiln_ai/adapters/eval/test_eval_runner.py +641 -0
- kiln_ai/adapters/eval/test_g_eval.py +498 -0
- kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
- kiln_ai/adapters/fine_tune/base_finetune.py +16 -2
- kiln_ai/adapters/fine_tune/finetune_registry.py +2 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
- kiln_ai/adapters/fine_tune/test_together_finetune.py +531 -0
- kiln_ai/adapters/fine_tune/together_finetune.py +325 -0
- kiln_ai/adapters/ml_model_list.py +758 -163
- kiln_ai/adapters/model_adapters/__init__.py +2 -4
- kiln_ai/adapters/model_adapters/base_adapter.py +61 -43
- kiln_ai/adapters/model_adapters/litellm_adapter.py +391 -0
- kiln_ai/adapters/model_adapters/litellm_config.py +13 -0
- kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -0
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
- kiln_ai/adapters/model_adapters/test_structured_output.py +59 -35
- kiln_ai/adapters/ollama_tools.py +3 -3
- kiln_ai/adapters/parsers/r1_parser.py +19 -14
- kiln_ai/adapters/parsers/test_r1_parser.py +17 -5
- kiln_ai/adapters/prompt_builders.py +80 -42
- kiln_ai/adapters/provider_tools.py +50 -58
- kiln_ai/adapters/repair/repair_task.py +9 -21
- kiln_ai/adapters/repair/test_repair_task.py +6 -6
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +26 -29
- kiln_ai/adapters/test_generate_docs.py +4 -4
- kiln_ai/adapters/test_ollama_tools.py +0 -1
- kiln_ai/adapters/test_prompt_adaptors.py +47 -33
- kiln_ai/adapters/test_prompt_builders.py +91 -31
- kiln_ai/adapters/test_provider_tools.py +26 -81
- kiln_ai/datamodel/__init__.py +50 -952
- kiln_ai/datamodel/basemodel.py +2 -0
- kiln_ai/datamodel/datamodel_enums.py +60 -0
- kiln_ai/datamodel/dataset_filters.py +114 -0
- kiln_ai/datamodel/dataset_split.py +170 -0
- kiln_ai/datamodel/eval.py +298 -0
- kiln_ai/datamodel/finetune.py +105 -0
- kiln_ai/datamodel/json_schema.py +7 -1
- kiln_ai/datamodel/project.py +23 -0
- kiln_ai/datamodel/prompt.py +37 -0
- kiln_ai/datamodel/prompt_id.py +83 -0
- kiln_ai/datamodel/strict_mode.py +24 -0
- kiln_ai/datamodel/task.py +181 -0
- kiln_ai/datamodel/task_output.py +328 -0
- kiln_ai/datamodel/task_run.py +164 -0
- kiln_ai/datamodel/test_basemodel.py +19 -11
- kiln_ai/datamodel/test_dataset_filters.py +71 -0
- kiln_ai/datamodel/test_dataset_split.py +32 -8
- kiln_ai/datamodel/test_datasource.py +22 -2
- kiln_ai/datamodel/test_eval_model.py +635 -0
- kiln_ai/datamodel/test_example_models.py +9 -13
- kiln_ai/datamodel/test_json_schema.py +23 -0
- kiln_ai/datamodel/test_models.py +2 -2
- kiln_ai/datamodel/test_prompt_id.py +129 -0
- kiln_ai/datamodel/test_task.py +159 -0
- kiln_ai/utils/config.py +43 -1
- kiln_ai/utils/dataset_import.py +232 -0
- kiln_ai/utils/test_dataset_import.py +596 -0
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/METADATA +86 -6
- kiln_ai-0.13.0.dist-info/RECORD +103 -0
- kiln_ai/adapters/model_adapters/langchain_adapters.py +0 -302
- kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -11
- kiln_ai/adapters/model_adapters/openai_model_adapter.py +0 -246
- kiln_ai/adapters/model_adapters/test_langchain_adapter.py +0 -350
- kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +0 -225
- kiln_ai-0.11.1.dist-info/RECORD +0 -76
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
|
|
|
6
6
|
from kiln_ai.adapters.prompt_builders import (
|
|
7
7
|
BasePromptBuilder,
|
|
8
8
|
SavedPromptBuilder,
|
|
9
|
-
|
|
9
|
+
prompt_builder_from_id,
|
|
10
10
|
)
|
|
11
11
|
from kiln_ai.datamodel import Priority, Project, Task, TaskRequirement, TaskRun
|
|
12
12
|
|
|
@@ -49,28 +49,16 @@ feedback describing what should be improved. Your job is to understand the evalu
|
|
|
49
49
|
if run.output.source is None or run.output.source.properties is None:
|
|
50
50
|
raise ValueError("No source properties found")
|
|
51
51
|
|
|
52
|
-
#
|
|
53
|
-
prompt_id = run.output.source.properties.get(
|
|
52
|
+
# Get the prompt builder id. Need the second check because we used to store this in a prompt_builder_name field, so loading legacy runs will need this.
|
|
53
|
+
prompt_id = run.output.source.properties.get(
|
|
54
|
+
"prompt_id"
|
|
55
|
+
) or run.output.source.properties.get("prompt_builder_name", None)
|
|
54
56
|
if prompt_id is not None and isinstance(prompt_id, str):
|
|
55
|
-
|
|
56
|
-
|
|
57
|
+
prompt_builder = prompt_builder_from_id(prompt_id, task)
|
|
58
|
+
if isinstance(prompt_builder, BasePromptBuilder):
|
|
59
|
+
return prompt_builder.build_prompt(include_json_instructions=False)
|
|
57
60
|
|
|
58
|
-
|
|
59
|
-
prompt_builder_name = run.output.source.properties.get(
|
|
60
|
-
"prompt_builder_name", None
|
|
61
|
-
)
|
|
62
|
-
if prompt_builder_name is not None and isinstance(prompt_builder_name, str):
|
|
63
|
-
prompt_builder_class = prompt_builder_registry.get(
|
|
64
|
-
prompt_builder_name, None
|
|
65
|
-
)
|
|
66
|
-
if prompt_builder_class is None:
|
|
67
|
-
raise ValueError(f"No prompt builder found for name: {prompt_builder_name}")
|
|
68
|
-
prompt_builder = prompt_builder_class(task=task)
|
|
69
|
-
if not isinstance(prompt_builder, BasePromptBuilder):
|
|
70
|
-
raise ValueError(
|
|
71
|
-
f"Prompt builder {prompt_builder_name} is not a valid prompt builder"
|
|
72
|
-
)
|
|
73
|
-
return prompt_builder.build_prompt(include_json_instructions=False)
|
|
61
|
+
raise ValueError(f"Prompt builder '{prompt_id}' is not a valid prompt builder")
|
|
74
62
|
|
|
75
63
|
@classmethod
|
|
76
64
|
def build_repair_task_input(
|
|
@@ -7,7 +7,7 @@ from pydantic import ValidationError
|
|
|
7
7
|
|
|
8
8
|
from kiln_ai.adapters.adapter_registry import adapter_for_task
|
|
9
9
|
from kiln_ai.adapters.model_adapters.base_adapter import RunOutput
|
|
10
|
-
from kiln_ai.adapters.model_adapters.
|
|
10
|
+
from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
|
|
11
11
|
from kiln_ai.adapters.repair.repair_task import (
|
|
12
12
|
RepairTaskInput,
|
|
13
13
|
RepairTaskRun,
|
|
@@ -95,7 +95,7 @@ def sample_task_run(sample_task):
|
|
|
95
95
|
"model_name": "gpt_4o",
|
|
96
96
|
"model_provider": "openai",
|
|
97
97
|
"adapter_name": "langchain_adapter",
|
|
98
|
-
"
|
|
98
|
+
"prompt_id": "simple_prompt_builder",
|
|
99
99
|
},
|
|
100
100
|
),
|
|
101
101
|
),
|
|
@@ -201,7 +201,7 @@ async def test_live_run(sample_task, sample_task_run, sample_repair_data):
|
|
|
201
201
|
"adapter_name": "kiln_langchain_adapter",
|
|
202
202
|
"model_name": "llama_3_1_8b",
|
|
203
203
|
"model_provider": "groq",
|
|
204
|
-
"
|
|
204
|
+
"prompt_id": "simple_prompt_builder",
|
|
205
205
|
}
|
|
206
206
|
|
|
207
207
|
|
|
@@ -217,7 +217,7 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
|
|
|
217
217
|
"rating": 8,
|
|
218
218
|
}
|
|
219
219
|
|
|
220
|
-
with patch.object(
|
|
220
|
+
with patch.object(LiteLlmAdapter, "_run", new_callable=AsyncMock) as mock_run:
|
|
221
221
|
mock_run.return_value = RunOutput(
|
|
222
222
|
output=mocked_output, intermediate_outputs=None
|
|
223
223
|
)
|
|
@@ -235,10 +235,10 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
|
|
|
235
235
|
parsed_output = json.loads(run.output.output)
|
|
236
236
|
assert parsed_output == mocked_output
|
|
237
237
|
assert run.output.source.properties == {
|
|
238
|
-
"adapter_name": "
|
|
238
|
+
"adapter_name": "kiln_openai_compatible_adapter",
|
|
239
239
|
"model_name": "llama_3_1_8b",
|
|
240
240
|
"model_provider": "ollama",
|
|
241
|
-
"
|
|
241
|
+
"prompt_id": "simple_prompt_builder",
|
|
242
242
|
}
|
|
243
243
|
assert run.input_source.type == DataSourceType.human
|
|
244
244
|
assert "created_by" in run.input_source.properties
|
kiln_ai/adapters/run_output.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from typing import Dict
|
|
3
3
|
|
|
4
|
+
from litellm.types.utils import ChoiceLogprobs
|
|
5
|
+
|
|
4
6
|
|
|
5
7
|
@dataclass
|
|
6
8
|
class RunOutput:
|
|
7
9
|
output: Dict | str
|
|
8
10
|
intermediate_outputs: Dict[str, str] | None
|
|
11
|
+
output_logprobs: ChoiceLogprobs | None = None
|
|
@@ -5,8 +5,8 @@ import pytest
|
|
|
5
5
|
from kiln_ai import datamodel
|
|
6
6
|
from kiln_ai.adapters.adapter_registry import adapter_for_task
|
|
7
7
|
from kiln_ai.adapters.ml_model_list import ModelProviderName
|
|
8
|
-
from kiln_ai.adapters.model_adapters.
|
|
9
|
-
from kiln_ai.adapters.model_adapters.
|
|
8
|
+
from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig
|
|
9
|
+
from kiln_ai.adapters.model_adapters.litellm_adapter import LiteLlmAdapter
|
|
10
10
|
from kiln_ai.adapters.prompt_builders import BasePromptBuilder
|
|
11
11
|
from kiln_ai.adapters.provider_tools import kiln_model_provider_from
|
|
12
12
|
|
|
@@ -43,9 +43,9 @@ def test_openai_adapter_creation(mock_config, basic_task):
|
|
|
43
43
|
kiln_task=basic_task, model_name="gpt-4", provider=ModelProviderName.openai
|
|
44
44
|
)
|
|
45
45
|
|
|
46
|
-
assert isinstance(adapter,
|
|
46
|
+
assert isinstance(adapter, LiteLlmAdapter)
|
|
47
47
|
assert adapter.config.model_name == "gpt-4"
|
|
48
|
-
assert adapter.config.
|
|
48
|
+
assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
|
|
49
49
|
assert adapter.config.provider_name == ModelProviderName.openai
|
|
50
50
|
assert adapter.config.base_url is None # OpenAI url is default
|
|
51
51
|
assert adapter.config.default_headers is None
|
|
@@ -58,11 +58,10 @@ def test_openrouter_adapter_creation(mock_config, basic_task):
|
|
|
58
58
|
provider=ModelProviderName.openrouter,
|
|
59
59
|
)
|
|
60
60
|
|
|
61
|
-
assert isinstance(adapter,
|
|
61
|
+
assert isinstance(adapter, LiteLlmAdapter)
|
|
62
62
|
assert adapter.config.model_name == "anthropic/claude-3-opus"
|
|
63
|
-
assert adapter.config.
|
|
63
|
+
assert adapter.config.additional_body_options == {"api_key": "test-openrouter-key"}
|
|
64
64
|
assert adapter.config.provider_name == ModelProviderName.openrouter
|
|
65
|
-
assert adapter.config.base_url == "https://openrouter.ai/api/v1"
|
|
66
65
|
assert adapter.config.default_headers == {
|
|
67
66
|
"HTTP-Referer": "https://getkiln.ai/openrouter",
|
|
68
67
|
"X-Title": "KilnAI",
|
|
@@ -78,30 +77,25 @@ def test_openrouter_adapter_creation(mock_config, basic_task):
|
|
|
78
77
|
ModelProviderName.fireworks_ai,
|
|
79
78
|
],
|
|
80
79
|
)
|
|
81
|
-
def
|
|
80
|
+
def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
|
|
82
81
|
adapter = adapter_for_task(
|
|
83
82
|
kiln_task=basic_task, model_name="test-model", provider=provider
|
|
84
83
|
)
|
|
85
84
|
|
|
86
|
-
assert isinstance(adapter,
|
|
87
|
-
assert adapter.model_name == "test-model"
|
|
85
|
+
assert isinstance(adapter, LiteLlmAdapter)
|
|
86
|
+
assert adapter.run_config.model_name == "test-model"
|
|
88
87
|
|
|
89
88
|
|
|
90
89
|
# TODO should run for all cases
|
|
91
90
|
def test_custom_prompt_builder(mock_config, basic_task):
|
|
92
|
-
class TestPromptBuilder(BasePromptBuilder):
|
|
93
|
-
def build_base_prompt(self, kiln_task) -> str:
|
|
94
|
-
return "test-prompt"
|
|
95
|
-
|
|
96
|
-
prompt_builder = TestPromptBuilder(basic_task)
|
|
97
91
|
adapter = adapter_for_task(
|
|
98
92
|
kiln_task=basic_task,
|
|
99
93
|
model_name="gpt-4",
|
|
100
94
|
provider=ModelProviderName.openai,
|
|
101
|
-
|
|
95
|
+
prompt_id="simple_chain_of_thought_prompt_builder",
|
|
102
96
|
)
|
|
103
97
|
|
|
104
|
-
assert adapter.
|
|
98
|
+
assert adapter.run_config.prompt_id == "simple_chain_of_thought_prompt_builder"
|
|
105
99
|
|
|
106
100
|
|
|
107
101
|
# TODO should run for all cases
|
|
@@ -111,10 +105,12 @@ def test_tags_passed_through(mock_config, basic_task):
|
|
|
111
105
|
kiln_task=basic_task,
|
|
112
106
|
model_name="gpt-4",
|
|
113
107
|
provider=ModelProviderName.openai,
|
|
114
|
-
|
|
108
|
+
base_adapter_config=AdapterConfig(
|
|
109
|
+
default_tags=tags,
|
|
110
|
+
),
|
|
115
111
|
)
|
|
116
112
|
|
|
117
|
-
assert adapter.default_tags == tags
|
|
113
|
+
assert adapter.base_adapter_config.default_tags == tags
|
|
118
114
|
|
|
119
115
|
|
|
120
116
|
def test_invalid_provider(mock_config, basic_task):
|
|
@@ -124,11 +120,14 @@ def test_invalid_provider(mock_config, basic_task):
|
|
|
124
120
|
)
|
|
125
121
|
|
|
126
122
|
|
|
127
|
-
@patch("kiln_ai.adapters.adapter_registry.
|
|
123
|
+
@patch("kiln_ai.adapters.adapter_registry.lite_llm_config")
|
|
128
124
|
def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_task):
|
|
129
125
|
mock_compatible_config.return_value.model_name = "test-model"
|
|
130
|
-
mock_compatible_config.return_value.
|
|
126
|
+
mock_compatible_config.return_value.additional_body_options = {
|
|
127
|
+
"api_key": "test-key"
|
|
128
|
+
}
|
|
131
129
|
mock_compatible_config.return_value.base_url = "https://test.com/v1"
|
|
130
|
+
mock_compatible_config.return_value.provider_name = "CustomProvider99"
|
|
132
131
|
|
|
133
132
|
adapter = adapter_for_task(
|
|
134
133
|
kiln_task=basic_task,
|
|
@@ -136,11 +135,9 @@ def test_openai_compatible_adapter(mock_compatible_config, mock_config, basic_ta
|
|
|
136
135
|
provider=ModelProviderName.openai_compatible,
|
|
137
136
|
)
|
|
138
137
|
|
|
139
|
-
assert isinstance(adapter,
|
|
138
|
+
assert isinstance(adapter, LiteLlmAdapter)
|
|
140
139
|
mock_compatible_config.assert_called_once_with("provider::test-model")
|
|
141
|
-
assert adapter.config
|
|
142
|
-
assert adapter.config.api_key == "test-key"
|
|
143
|
-
assert adapter.config.base_url == "https://test.com/v1"
|
|
140
|
+
assert adapter.config == mock_compatible_config.return_value
|
|
144
141
|
|
|
145
142
|
|
|
146
143
|
def test_custom_openai_compatible_provider(mock_config, basic_task):
|
|
@@ -150,9 +147,9 @@ def test_custom_openai_compatible_provider(mock_config, basic_task):
|
|
|
150
147
|
provider=ModelProviderName.kiln_custom_registry,
|
|
151
148
|
)
|
|
152
149
|
|
|
153
|
-
assert isinstance(adapter,
|
|
150
|
+
assert isinstance(adapter, LiteLlmAdapter)
|
|
154
151
|
assert adapter.config.model_name == "openai::test-model"
|
|
155
|
-
assert adapter.config.
|
|
152
|
+
assert adapter.config.additional_body_options == {"api_key": "test-openai-key"}
|
|
156
153
|
assert adapter.config.base_url is None # openai is none
|
|
157
154
|
assert adapter.config.provider_name == ModelProviderName.kiln_custom_registry
|
|
158
155
|
|
|
@@ -165,7 +162,7 @@ async def test_fine_tune_provider(mock_config, basic_task, mock_finetune_from_id
|
|
|
165
162
|
)
|
|
166
163
|
|
|
167
164
|
mock_finetune_from_id.assert_called_once_with("proj::task::tune")
|
|
168
|
-
assert isinstance(adapter,
|
|
165
|
+
assert isinstance(adapter, LiteLlmAdapter)
|
|
169
166
|
assert adapter.config.provider_name == ModelProviderName.kiln_fine_tune
|
|
170
167
|
# Kiln model name here, but the underlying openai model id below
|
|
171
168
|
assert adapter.config.model_name == "proj::task::tune"
|
|
@@ -174,4 +171,4 @@ async def test_fine_tune_provider(mock_config, basic_task, mock_finetune_from_id
|
|
|
174
171
|
"proj::task::tune", provider_name=ModelProviderName.kiln_fine_tune
|
|
175
172
|
)
|
|
176
173
|
# The actual model name from the fine tune object
|
|
177
|
-
assert provider.
|
|
174
|
+
assert provider.model_id == "test-model"
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from typing import List
|
|
2
3
|
|
|
3
4
|
import pytest
|
|
4
5
|
|
|
5
|
-
from libs.core.kiln_ai.adapters.ml_model_list import
|
|
6
|
-
KilnModelProvider,
|
|
7
|
-
built_in_models,
|
|
8
|
-
)
|
|
6
|
+
from libs.core.kiln_ai.adapters.ml_model_list import KilnModelProvider, built_in_models
|
|
9
7
|
from libs.core.kiln_ai.adapters.provider_tools import provider_name_from_id
|
|
10
8
|
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
11
|
|
|
12
12
|
def _all_providers_support(providers: List[KilnModelProvider], attribute: str) -> bool:
|
|
13
13
|
"""Check if all providers support a given feature"""
|
|
@@ -10,7 +10,6 @@ from kiln_ai.adapters.ollama_tools import (
|
|
|
10
10
|
def test_parse_ollama_tags_no_models():
|
|
11
11
|
json_response = '{"models":[{"name":"scosman_net","model":"scosman_net:latest"},{"name":"phi3.5:latest","model":"phi3.5:latest","modified_at":"2024-10-02T12:04:35.191519822-04:00","size":2176178843,"digest":"61819fb370a3c1a9be6694869331e5f85f867a079e9271d66cb223acb81d04ba","details":{"parent_model":"","format":"gguf","family":"phi3","families":["phi3"],"parameter_size":"3.8B","quantization_level":"Q4_0"}},{"name":"gemma2:2b","model":"gemma2:2b","modified_at":"2024-09-09T16:46:38.64348929-04:00","size":1629518495,"digest":"8ccf136fdd5298f3ffe2d69862750ea7fb56555fa4d5b18c04e3fa4d82ee09d7","details":{"parent_model":"","format":"gguf","family":"gemma2","families":["gemma2"],"parameter_size":"2.6B","quantization_level":"Q4_0"}},{"name":"llama3.1:latest","model":"llama3.1:latest","modified_at":"2024-09-01T17:19:43.481523695-04:00","size":4661230720,"digest":"f66fc8dc39ea206e03ff6764fcc696b1b4dfb693f0b6ef751731dd4e6269046e","details":{"parent_model":"","format":"gguf","family":"llama","families":["llama"],"parameter_size":"8.0B","quantization_level":"Q4_0"}}]}'
|
|
12
12
|
tags = json.loads(json_response)
|
|
13
|
-
print(json.dumps(tags, indent=2))
|
|
14
13
|
conn = parse_ollama_tags(tags)
|
|
15
14
|
assert "phi3.5:latest" in conn.supported_models
|
|
16
15
|
assert "gemma2:2b" in conn.supported_models
|
|
@@ -1,24 +1,32 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
|
+
from unittest.mock import patch
|
|
3
4
|
|
|
4
5
|
import pytest
|
|
5
|
-
from
|
|
6
|
+
from litellm.utils import ModelResponse
|
|
6
7
|
|
|
7
8
|
import kiln_ai.datamodel as datamodel
|
|
8
9
|
from kiln_ai.adapters.adapter_registry import adapter_for_task
|
|
9
10
|
from kiln_ai.adapters.ml_model_list import built_in_models
|
|
10
|
-
from kiln_ai.adapters.model_adapters.
|
|
11
|
+
from kiln_ai.adapters.model_adapters.litellm_adapter import (
|
|
12
|
+
LiteLlmAdapter,
|
|
13
|
+
LiteLlmConfig,
|
|
14
|
+
)
|
|
11
15
|
from kiln_ai.adapters.ollama_tools import ollama_online
|
|
12
16
|
from kiln_ai.adapters.prompt_builders import (
|
|
13
17
|
BasePromptBuilder,
|
|
14
18
|
SimpleChainOfThoughtPromptBuilder,
|
|
15
19
|
)
|
|
20
|
+
from kiln_ai.datamodel import PromptId
|
|
16
21
|
|
|
17
22
|
|
|
18
23
|
def get_all_models_and_providers():
|
|
19
24
|
model_provider_pairs = []
|
|
20
25
|
for model in built_in_models:
|
|
21
26
|
for provider in model.providers:
|
|
27
|
+
if not provider.model_id:
|
|
28
|
+
# it's possible for models to not have an ID (fine-tune only model)
|
|
29
|
+
continue
|
|
22
30
|
model_provider_pairs.append((model.name, provider.name))
|
|
23
31
|
return model_provider_pairs
|
|
24
32
|
|
|
@@ -105,23 +113,27 @@ async def test_amazon_bedrock(tmp_path):
|
|
|
105
113
|
await run_simple_test(tmp_path, "llama_3_1_8b", "amazon_bedrock")
|
|
106
114
|
|
|
107
115
|
|
|
108
|
-
async def test_mock(tmp_path):
|
|
109
|
-
task = build_test_task(tmp_path)
|
|
110
|
-
mockChatModel = FakeListChatModel(responses=["mock response"])
|
|
111
|
-
adapter = LangchainAdapter(
|
|
112
|
-
task,
|
|
113
|
-
custom_model=mockChatModel,
|
|
114
|
-
provider="ollama",
|
|
115
|
-
)
|
|
116
|
-
run = await adapter.invoke("You are a mock, send me the response!")
|
|
117
|
-
assert "mock response" in run.output.output
|
|
118
|
-
|
|
119
|
-
|
|
120
116
|
async def test_mock_returning_run(tmp_path):
|
|
121
117
|
task = build_test_task(tmp_path)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
118
|
+
with patch("litellm.acompletion") as mock_acompletion:
|
|
119
|
+
# Configure the mock to return a properly structured response
|
|
120
|
+
mock_acompletion.return_value = ModelResponse(
|
|
121
|
+
model="custom_model",
|
|
122
|
+
choices=[{"message": {"content": "mock response"}}],
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
adapter = LiteLlmAdapter(
|
|
126
|
+
config=LiteLlmConfig(
|
|
127
|
+
model_name="custom_model",
|
|
128
|
+
provider_name="ollama",
|
|
129
|
+
base_url="http://localhost:11434",
|
|
130
|
+
additional_body_options={"api_key": "test_key"},
|
|
131
|
+
),
|
|
132
|
+
kiln_task=task,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
run = await adapter.invoke("You are a mock, send me the response!")
|
|
136
|
+
|
|
125
137
|
assert run.output.output == "mock response"
|
|
126
138
|
assert run is not None
|
|
127
139
|
assert run.id is not None
|
|
@@ -129,10 +141,10 @@ async def test_mock_returning_run(tmp_path):
|
|
|
129
141
|
assert run.output.output == "mock response"
|
|
130
142
|
assert "created_by" in run.input_source.properties
|
|
131
143
|
assert run.output.source.properties == {
|
|
132
|
-
"adapter_name": "
|
|
133
|
-
"model_name": "
|
|
144
|
+
"adapter_name": "kiln_openai_compatible_adapter",
|
|
145
|
+
"model_name": "custom_model",
|
|
134
146
|
"model_provider": "ollama",
|
|
135
|
-
"
|
|
147
|
+
"prompt_id": "simple_prompt_builder",
|
|
136
148
|
}
|
|
137
149
|
|
|
138
150
|
|
|
@@ -149,8 +161,9 @@ async def test_all_models_providers_plaintext(tmp_path, model_name, provider_nam
|
|
|
149
161
|
@pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
|
|
150
162
|
async def test_cot_prompt_builder(tmp_path, model_name, provider_name):
|
|
151
163
|
task = build_test_task(tmp_path)
|
|
152
|
-
|
|
153
|
-
|
|
164
|
+
await run_simple_task(
|
|
165
|
+
task, model_name, provider_name, "simple_chain_of_thought_prompt_builder"
|
|
166
|
+
)
|
|
154
167
|
|
|
155
168
|
|
|
156
169
|
def build_test_task(tmp_path: Path):
|
|
@@ -186,20 +199,20 @@ async def run_simple_test(
|
|
|
186
199
|
tmp_path: Path,
|
|
187
200
|
model_name: str,
|
|
188
201
|
provider: str | None = None,
|
|
189
|
-
|
|
202
|
+
prompt_id: PromptId | None = None,
|
|
190
203
|
):
|
|
191
204
|
task = build_test_task(tmp_path)
|
|
192
|
-
return await run_simple_task(task, model_name, provider,
|
|
205
|
+
return await run_simple_task(task, model_name, provider, prompt_id)
|
|
193
206
|
|
|
194
207
|
|
|
195
208
|
async def run_simple_task(
|
|
196
209
|
task: datamodel.Task,
|
|
197
210
|
model_name: str,
|
|
198
211
|
provider: str,
|
|
199
|
-
|
|
212
|
+
prompt_id: PromptId | None = None,
|
|
200
213
|
) -> datamodel.TaskRun:
|
|
201
214
|
adapter = adapter_for_task(
|
|
202
|
-
task, model_name=model_name, provider=provider,
|
|
215
|
+
task, model_name=model_name, provider=provider, prompt_id=prompt_id
|
|
203
216
|
)
|
|
204
217
|
|
|
205
218
|
run = await adapter.invoke(
|
|
@@ -212,13 +225,14 @@ async def run_simple_task(
|
|
|
212
225
|
)
|
|
213
226
|
assert "64" in run.output.output
|
|
214
227
|
source_props = run.output.source.properties
|
|
215
|
-
assert source_props["adapter_name"]
|
|
228
|
+
assert source_props["adapter_name"] in [
|
|
229
|
+
"kiln_langchain_adapter",
|
|
230
|
+
"kiln_openai_compatible_adapter",
|
|
231
|
+
]
|
|
216
232
|
assert source_props["model_name"] == model_name
|
|
217
233
|
assert source_props["model_provider"] == provider
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
)
|
|
223
|
-
assert source_props["prompt_builder_name"] == expected_prompt_builder_name
|
|
234
|
+
if prompt_id is None:
|
|
235
|
+
assert source_props["prompt_id"] == "simple_prompt_builder"
|
|
236
|
+
else:
|
|
237
|
+
assert source_props["prompt_id"] == prompt_id
|
|
224
238
|
return run
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
|
|
3
4
|
import pytest
|
|
4
5
|
|
|
5
|
-
from kiln_ai.adapters.model_adapters.base_adapter import
|
|
6
|
+
from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
|
|
6
7
|
from kiln_ai.adapters.model_adapters.test_structured_output import (
|
|
7
8
|
build_structured_output_test_task,
|
|
8
9
|
)
|
|
@@ -16,8 +17,9 @@ from kiln_ai.adapters.prompt_builders import (
|
|
|
16
17
|
SavedPromptBuilder,
|
|
17
18
|
SimpleChainOfThoughtPromptBuilder,
|
|
18
19
|
SimplePromptBuilder,
|
|
20
|
+
TaskRunConfigPromptBuilder,
|
|
19
21
|
chain_of_thought_prompt,
|
|
20
|
-
|
|
22
|
+
prompt_builder_from_id,
|
|
21
23
|
)
|
|
22
24
|
from kiln_ai.adapters.test_prompt_adaptors import build_test_task
|
|
23
25
|
from kiln_ai.datamodel import (
|
|
@@ -32,6 +34,9 @@ from kiln_ai.datamodel import (
|
|
|
32
34
|
TaskOutputRating,
|
|
33
35
|
TaskRun,
|
|
34
36
|
)
|
|
37
|
+
from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
35
40
|
|
|
36
41
|
|
|
37
42
|
def test_simple_prompt_builder(tmp_path):
|
|
@@ -57,12 +62,8 @@ class MockAdapter(BaseAdapter):
|
|
|
57
62
|
def _run(self, input: str) -> str:
|
|
58
63
|
return "mock response"
|
|
59
64
|
|
|
60
|
-
def
|
|
61
|
-
return
|
|
62
|
-
adapter_name="mock_adapter",
|
|
63
|
-
model_name="mock_model",
|
|
64
|
-
model_provider="mock_provider",
|
|
65
|
-
)
|
|
65
|
+
def adapter_name(self) -> str:
|
|
66
|
+
return "mock_adapter"
|
|
66
67
|
|
|
67
68
|
|
|
68
69
|
def test_simple_prompt_builder_structured_output(tmp_path):
|
|
@@ -269,7 +270,6 @@ def test_few_shot_prompt_builder(tmp_path):
|
|
|
269
270
|
rating=TaskOutputRating(value=4 + (i % 2), reason="Good joke"),
|
|
270
271
|
),
|
|
271
272
|
)
|
|
272
|
-
print("RATING", "Joke Initial Output ", i + 1, " - RATED:", 4 + (i % 2), "\n")
|
|
273
273
|
if i < 2:
|
|
274
274
|
run = run.model_copy(
|
|
275
275
|
update={
|
|
@@ -290,7 +290,7 @@ def test_few_shot_prompt_builder(tmp_path):
|
|
|
290
290
|
prompt = prompt_builder.build_prompt(include_json_instructions=False)
|
|
291
291
|
assert prompt.count("## Example") == 4
|
|
292
292
|
|
|
293
|
-
|
|
293
|
+
logger.info("PROMPT: %s", prompt)
|
|
294
294
|
# Verify the order of examples (2 repaired, then 2 highest-rated)
|
|
295
295
|
assert "Repaired Joke 1" in prompt
|
|
296
296
|
assert "Repaired Joke 2" in prompt
|
|
@@ -314,54 +314,53 @@ def check_example_outputs(task: Task, count: int):
|
|
|
314
314
|
assert f"## Example {count}" in prompt
|
|
315
315
|
|
|
316
316
|
|
|
317
|
-
def
|
|
318
|
-
assert SimplePromptBuilder.prompt_builder_name() == "simple_prompt_builder"
|
|
319
|
-
assert MultiShotPromptBuilder.prompt_builder_name() == "multi_shot_prompt_builder"
|
|
320
|
-
assert RepairsPromptBuilder.prompt_builder_name() == "repairs_prompt_builder"
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
def test_prompt_builder_from_ui_name(task_with_examples):
|
|
317
|
+
def test_prompt_builder_from_id(task_with_examples):
|
|
324
318
|
task = task_with_examples
|
|
325
|
-
assert isinstance(prompt_builder_from_ui_name("basic", task), SimplePromptBuilder)
|
|
326
319
|
assert isinstance(
|
|
327
|
-
|
|
320
|
+
prompt_builder_from_id("simple_prompt_builder", task), SimplePromptBuilder
|
|
328
321
|
)
|
|
329
322
|
assert isinstance(
|
|
330
|
-
|
|
323
|
+
prompt_builder_from_id("few_shot_prompt_builder", task),
|
|
324
|
+
FewShotPromptBuilder,
|
|
331
325
|
)
|
|
332
326
|
assert isinstance(
|
|
333
|
-
|
|
327
|
+
prompt_builder_from_id("multi_shot_prompt_builder", task),
|
|
328
|
+
MultiShotPromptBuilder,
|
|
334
329
|
)
|
|
335
330
|
assert isinstance(
|
|
336
|
-
|
|
331
|
+
prompt_builder_from_id("repairs_prompt_builder", task),
|
|
332
|
+
RepairsPromptBuilder,
|
|
333
|
+
)
|
|
334
|
+
assert isinstance(
|
|
335
|
+
prompt_builder_from_id("simple_chain_of_thought_prompt_builder", task),
|
|
337
336
|
SimpleChainOfThoughtPromptBuilder,
|
|
338
337
|
)
|
|
339
338
|
assert isinstance(
|
|
340
|
-
|
|
339
|
+
prompt_builder_from_id("few_shot_chain_of_thought_prompt_builder", task),
|
|
341
340
|
FewShotChainOfThoughtPromptBuilder,
|
|
342
341
|
)
|
|
343
342
|
assert isinstance(
|
|
344
|
-
|
|
343
|
+
prompt_builder_from_id("multi_shot_chain_of_thought_prompt_builder", task),
|
|
345
344
|
MultiShotChainOfThoughtPromptBuilder,
|
|
346
345
|
)
|
|
347
346
|
|
|
348
|
-
with pytest.raises(ValueError, match="Unknown prompt
|
|
349
|
-
|
|
347
|
+
with pytest.raises(ValueError, match="Unknown prompt generator: invalid_name"):
|
|
348
|
+
prompt_builder_from_id("invalid_name", task)
|
|
350
349
|
|
|
351
350
|
with pytest.raises(ValueError, match="Prompt ID not found: 123"):
|
|
352
|
-
|
|
351
|
+
prompt_builder_from_id("id::123", task)
|
|
353
352
|
|
|
354
353
|
with pytest.raises(
|
|
355
354
|
ValueError,
|
|
356
355
|
match="Invalid fine-tune ID format. Expected 'project_id::task_id::fine_tune_id'",
|
|
357
356
|
):
|
|
358
|
-
|
|
357
|
+
prompt_builder_from_id("fine_tune_prompt::123", task)
|
|
359
358
|
|
|
360
359
|
with pytest.raises(
|
|
361
360
|
ValueError,
|
|
362
361
|
match="Fine-tune ID not found",
|
|
363
362
|
):
|
|
364
|
-
|
|
363
|
+
prompt_builder_from_id("fine_tune_prompt::123::456::789", task)
|
|
365
364
|
|
|
366
365
|
prompt = Prompt(
|
|
367
366
|
name="test_prompt_name",
|
|
@@ -370,7 +369,7 @@ def test_prompt_builder_from_ui_name(task_with_examples):
|
|
|
370
369
|
parent=task,
|
|
371
370
|
)
|
|
372
371
|
prompt.save_to_file()
|
|
373
|
-
pb =
|
|
372
|
+
pb = prompt_builder_from_id("id::" + prompt.id, task)
|
|
374
373
|
assert isinstance(pb, SavedPromptBuilder)
|
|
375
374
|
assert pb.prompt_id() == prompt.id
|
|
376
375
|
assert pb.build_prompt(include_json_instructions=False) == "test_prompt"
|
|
@@ -390,7 +389,7 @@ def test_prompt_builder_from_ui_name(task_with_examples):
|
|
|
390
389
|
nested_fine_tune_id = (
|
|
391
390
|
task_with_examples.parent.id + "::" + task_with_examples.id + "::" + finetune.id
|
|
392
391
|
)
|
|
393
|
-
pb =
|
|
392
|
+
pb = prompt_builder_from_id(
|
|
394
393
|
"fine_tune_prompt::" + nested_fine_tune_id,
|
|
395
394
|
task_with_examples,
|
|
396
395
|
)
|
|
@@ -587,3 +586,64 @@ def test_build_prompt_with_json_instructions(tmp_path):
|
|
|
587
586
|
assert task.instruction in prompt_with_json
|
|
588
587
|
for requirement in task.requirements:
|
|
589
588
|
assert requirement.instruction in prompt_with_json
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def test_task_run_config_prompt_builder(tmp_path):
|
|
592
|
+
task = build_test_task(tmp_path)
|
|
593
|
+
|
|
594
|
+
run_config = TaskRunConfig(
|
|
595
|
+
name="test_run_config",
|
|
596
|
+
parent=task,
|
|
597
|
+
run_config_properties=RunConfigProperties(
|
|
598
|
+
model_name="gpt-4",
|
|
599
|
+
model_provider_name="openai",
|
|
600
|
+
prompt_id="simple_prompt_builder",
|
|
601
|
+
),
|
|
602
|
+
prompt=Prompt(
|
|
603
|
+
name="test prompt name",
|
|
604
|
+
prompt="test prompt content",
|
|
605
|
+
chain_of_thought_instructions="test step by step",
|
|
606
|
+
),
|
|
607
|
+
)
|
|
608
|
+
run_config.save_to_file()
|
|
609
|
+
|
|
610
|
+
# Construct the eval prompt ID
|
|
611
|
+
run_config_prompt_id = (
|
|
612
|
+
f"task_run_config::{task.parent.id}::{task.id}::{run_config.id}"
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
# Test successful creation 2 ways: constructor and ID creation
|
|
616
|
+
builders = [
|
|
617
|
+
TaskRunConfigPromptBuilder(
|
|
618
|
+
task=task, run_config_prompt_id=run_config_prompt_id
|
|
619
|
+
),
|
|
620
|
+
prompt_builder_from_id(run_config_prompt_id, task),
|
|
621
|
+
]
|
|
622
|
+
|
|
623
|
+
for builder in builders:
|
|
624
|
+
assert (
|
|
625
|
+
builder.build_prompt(include_json_instructions=False)
|
|
626
|
+
== "test prompt content"
|
|
627
|
+
)
|
|
628
|
+
assert builder.chain_of_thought_prompt() == "test step by step"
|
|
629
|
+
assert builder.prompt_id() == run_config_prompt_id
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def test_task_run_config_prompt_builder_validation_errors(tmp_path):
|
|
633
|
+
task = build_test_task(tmp_path)
|
|
634
|
+
|
|
635
|
+
# Test invalid format
|
|
636
|
+
with pytest.raises(ValueError, match="Invalid task run config prompt ID"):
|
|
637
|
+
TaskRunConfigPromptBuilder(
|
|
638
|
+
task=task, run_config_prompt_id="task_run_config::wrong::format"
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
# Test task ID mismatch
|
|
642
|
+
wrong_task_id = f"task_run_config::{task.parent.id}::wrong_task_id::config_id"
|
|
643
|
+
with pytest.raises(ValueError, match="Task ID mismatch"):
|
|
644
|
+
TaskRunConfigPromptBuilder(task=task, run_config_prompt_id=wrong_task_id)
|
|
645
|
+
|
|
646
|
+
# Test eval not found
|
|
647
|
+
nonexistent_eval = f"task_run_config::{task.parent.id}::{task.id}::nonexistent_id"
|
|
648
|
+
with pytest.raises(ValueError, match="Task run config ID not found"):
|
|
649
|
+
TaskRunConfigPromptBuilder(task=task, run_config_prompt_id=nonexistent_eval)
|