kiln-ai 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +4 -0
- kiln_ai/adapters/adapter_registry.py +163 -39
- kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
- kiln_ai/adapters/eval/__init__.py +28 -0
- kiln_ai/adapters/eval/base_eval.py +164 -0
- kiln_ai/adapters/eval/eval_runner.py +270 -0
- kiln_ai/adapters/eval/g_eval.py +368 -0
- kiln_ai/adapters/eval/registry.py +16 -0
- kiln_ai/adapters/eval/test_base_eval.py +325 -0
- kiln_ai/adapters/eval/test_eval_runner.py +641 -0
- kiln_ai/adapters/eval/test_g_eval.py +498 -0
- kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
- kiln_ai/adapters/fine_tune/base_finetune.py +16 -2
- kiln_ai/adapters/fine_tune/finetune_registry.py +2 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
- kiln_ai/adapters/fine_tune/test_together_finetune.py +531 -0
- kiln_ai/adapters/fine_tune/together_finetune.py +325 -0
- kiln_ai/adapters/ml_model_list.py +758 -163
- kiln_ai/adapters/model_adapters/__init__.py +2 -4
- kiln_ai/adapters/model_adapters/base_adapter.py +61 -43
- kiln_ai/adapters/model_adapters/litellm_adapter.py +391 -0
- kiln_ai/adapters/model_adapters/litellm_config.py +13 -0
- kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -0
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
- kiln_ai/adapters/model_adapters/test_structured_output.py +59 -35
- kiln_ai/adapters/ollama_tools.py +3 -3
- kiln_ai/adapters/parsers/r1_parser.py +19 -14
- kiln_ai/adapters/parsers/test_r1_parser.py +17 -5
- kiln_ai/adapters/prompt_builders.py +80 -42
- kiln_ai/adapters/provider_tools.py +50 -58
- kiln_ai/adapters/repair/repair_task.py +9 -21
- kiln_ai/adapters/repair/test_repair_task.py +6 -6
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +26 -29
- kiln_ai/adapters/test_generate_docs.py +4 -4
- kiln_ai/adapters/test_ollama_tools.py +0 -1
- kiln_ai/adapters/test_prompt_adaptors.py +47 -33
- kiln_ai/adapters/test_prompt_builders.py +91 -31
- kiln_ai/adapters/test_provider_tools.py +26 -81
- kiln_ai/datamodel/__init__.py +50 -952
- kiln_ai/datamodel/basemodel.py +2 -0
- kiln_ai/datamodel/datamodel_enums.py +60 -0
- kiln_ai/datamodel/dataset_filters.py +114 -0
- kiln_ai/datamodel/dataset_split.py +170 -0
- kiln_ai/datamodel/eval.py +298 -0
- kiln_ai/datamodel/finetune.py +105 -0
- kiln_ai/datamodel/json_schema.py +7 -1
- kiln_ai/datamodel/project.py +23 -0
- kiln_ai/datamodel/prompt.py +37 -0
- kiln_ai/datamodel/prompt_id.py +83 -0
- kiln_ai/datamodel/strict_mode.py +24 -0
- kiln_ai/datamodel/task.py +181 -0
- kiln_ai/datamodel/task_output.py +328 -0
- kiln_ai/datamodel/task_run.py +164 -0
- kiln_ai/datamodel/test_basemodel.py +19 -11
- kiln_ai/datamodel/test_dataset_filters.py +71 -0
- kiln_ai/datamodel/test_dataset_split.py +32 -8
- kiln_ai/datamodel/test_datasource.py +22 -2
- kiln_ai/datamodel/test_eval_model.py +635 -0
- kiln_ai/datamodel/test_example_models.py +9 -13
- kiln_ai/datamodel/test_json_schema.py +23 -0
- kiln_ai/datamodel/test_models.py +2 -2
- kiln_ai/datamodel/test_prompt_id.py +129 -0
- kiln_ai/datamodel/test_task.py +159 -0
- kiln_ai/utils/config.py +43 -1
- kiln_ai/utils/dataset_import.py +232 -0
- kiln_ai/utils/test_dataset_import.py +596 -0
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/METADATA +86 -6
- kiln_ai-0.13.0.dist-info/RECORD +103 -0
- kiln_ai/adapters/model_adapters/langchain_adapters.py +0 -302
- kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -11
- kiln_ai/adapters/model_adapters/openai_model_adapter.py +0 -246
- kiln_ai/adapters/model_adapters/test_langchain_adapter.py +0 -350
- kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +0 -225
- kiln_ai-0.11.1.dist-info/RECORD +0 -76
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -3,24 +3,28 @@ from pydantic import ValidationError
|
|
|
3
3
|
|
|
4
4
|
# import datamodel first or we get circular import errors
|
|
5
5
|
from kiln_ai.datamodel import (
|
|
6
|
-
AllDatasetFilter,
|
|
7
|
-
AllSplitDefinition,
|
|
8
|
-
DatasetFilterType,
|
|
9
6
|
DatasetSplit,
|
|
10
7
|
DatasetSplitDefinition,
|
|
11
8
|
DataSource,
|
|
12
9
|
DataSourceType,
|
|
13
|
-
HighRatingDatasetFilter,
|
|
14
10
|
Task,
|
|
15
11
|
TaskOutput,
|
|
16
12
|
TaskOutputRating,
|
|
17
13
|
TaskOutputRatingType,
|
|
18
14
|
TaskRun,
|
|
19
|
-
|
|
20
|
-
|
|
15
|
+
)
|
|
16
|
+
from kiln_ai.datamodel.dataset_split import (
|
|
17
|
+
AllSplitDefinition,
|
|
21
18
|
Train60Test20Val20SplitDefinition,
|
|
22
19
|
Train80Test20SplitDefinition,
|
|
23
20
|
)
|
|
21
|
+
from kiln_ai.datamodel.test_dataset_filters import (
|
|
22
|
+
AllDatasetFilter,
|
|
23
|
+
HighRatingDatasetFilter,
|
|
24
|
+
TagFilter,
|
|
25
|
+
ThinkingModelDatasetFilter,
|
|
26
|
+
ThinkingModelHighRatedFilter,
|
|
27
|
+
)
|
|
24
28
|
|
|
25
29
|
|
|
26
30
|
@pytest.fixture
|
|
@@ -42,6 +46,7 @@ def sample_task_runs(sample_task):
|
|
|
42
46
|
task_runs = []
|
|
43
47
|
for i in range(10):
|
|
44
48
|
rating = 5 if i < 6 else 1 # 6 high, 4 low ratings
|
|
49
|
+
tags = ["tag1"] if i < 6 else []
|
|
45
50
|
task_run = TaskRun(
|
|
46
51
|
parent=sample_task,
|
|
47
52
|
input=f"input_{i}",
|
|
@@ -59,6 +64,7 @@ def sample_task_runs(sample_task):
|
|
|
59
64
|
value=rating, type=TaskOutputRatingType.five_star
|
|
60
65
|
),
|
|
61
66
|
),
|
|
67
|
+
tags=tags,
|
|
62
68
|
)
|
|
63
69
|
task_run.save_to_file()
|
|
64
70
|
task_runs.append(task_run)
|
|
@@ -199,10 +205,10 @@ def test_dataset_split_with_high_rating_filter(sample_task, sample_task_runs):
|
|
|
199
205
|
"Split Name",
|
|
200
206
|
sample_task,
|
|
201
207
|
Train80Test20SplitDefinition,
|
|
202
|
-
|
|
208
|
+
filter_id="high_rating",
|
|
203
209
|
)
|
|
204
210
|
|
|
205
|
-
assert dataset.filter ==
|
|
211
|
+
assert dataset.filter == "high_rating"
|
|
206
212
|
|
|
207
213
|
# Check that only high-rated task runs are included
|
|
208
214
|
all_ids = []
|
|
@@ -329,3 +335,21 @@ def test_thinking_model_dataset_filter_high_rated(
|
|
|
329
335
|
)
|
|
330
336
|
|
|
331
337
|
assert ThinkingModelHighRatedFilter(task_run) is expected_result
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def test_tag_dataset_filter(sample_task_runs):
|
|
341
|
+
num_tagged = 0
|
|
342
|
+
num_untagged = 0
|
|
343
|
+
filter = TagFilter("tag1")
|
|
344
|
+
for task_run in sample_task_runs:
|
|
345
|
+
if "tag1" in task_run.tags:
|
|
346
|
+
num_tagged += 1
|
|
347
|
+
assert "tag1" in task_run.tags
|
|
348
|
+
assert filter(task_run) is True
|
|
349
|
+
else:
|
|
350
|
+
num_untagged += 1
|
|
351
|
+
assert "tag1" not in task_run.tags
|
|
352
|
+
assert filter(task_run) is False
|
|
353
|
+
|
|
354
|
+
assert num_tagged == 6
|
|
355
|
+
assert num_untagged == 4
|
|
@@ -18,22 +18,41 @@ def test_valid_synthetic_data_source():
|
|
|
18
18
|
properties={
|
|
19
19
|
"model_name": "GPT-4",
|
|
20
20
|
"model_provider": "OpenAI",
|
|
21
|
-
"
|
|
21
|
+
"prompt_id": "simple_prompt_builder",
|
|
22
22
|
"adapter_name": "langchain",
|
|
23
23
|
},
|
|
24
24
|
)
|
|
25
25
|
assert data_source.type == DataSourceType.synthetic
|
|
26
26
|
assert data_source.properties["model_name"] == "GPT-4"
|
|
27
27
|
assert data_source.properties["model_provider"] == "OpenAI"
|
|
28
|
-
assert data_source.properties["
|
|
28
|
+
assert data_source.properties["prompt_id"] == "simple_prompt_builder"
|
|
29
29
|
assert data_source.properties["adapter_name"] == "langchain"
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
def test_valid_file_import_data_source():
|
|
33
|
+
data_source = DataSource(
|
|
34
|
+
type=DataSourceType.file_import,
|
|
35
|
+
properties={"file_name": "test.txt"},
|
|
36
|
+
)
|
|
37
|
+
assert data_source.type == DataSourceType.file_import
|
|
38
|
+
assert data_source.properties["file_name"] == "test.txt"
|
|
39
|
+
|
|
40
|
+
|
|
32
41
|
def test_missing_required_property():
|
|
33
42
|
with pytest.raises(ValidationError, match="'created_by' is required for"):
|
|
34
43
|
DataSource(type=DataSourceType.human)
|
|
35
44
|
|
|
36
45
|
|
|
46
|
+
def test_missing_required_property_file_import():
|
|
47
|
+
with pytest.raises(ValidationError, match="'file_name' is required for"):
|
|
48
|
+
DataSource(type=DataSourceType.file_import)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_not_allowed_property_file_import():
|
|
52
|
+
with pytest.raises(ValidationError, match="'model_name' is not allowed for"):
|
|
53
|
+
DataSource(type=DataSourceType.file_import, properties={"model_name": "GPT-4"})
|
|
54
|
+
|
|
55
|
+
|
|
37
56
|
def test_wrong_property_type():
|
|
38
57
|
with pytest.raises(
|
|
39
58
|
ValidationError,
|
|
@@ -85,6 +104,7 @@ def test_prompt_type_optional_for_synthetic():
|
|
|
85
104
|
},
|
|
86
105
|
)
|
|
87
106
|
assert "prompt_builder_name" not in data_source.properties
|
|
107
|
+
assert "prompt_id" not in data_source.properties
|
|
88
108
|
|
|
89
109
|
|
|
90
110
|
def test_private_data_source_properties_not_serialized():
|