kiln-ai 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (80) hide show
  1. kiln_ai/adapters/__init__.py +4 -0
  2. kiln_ai/adapters/adapter_registry.py +163 -39
  3. kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
  4. kiln_ai/adapters/eval/__init__.py +28 -0
  5. kiln_ai/adapters/eval/base_eval.py +164 -0
  6. kiln_ai/adapters/eval/eval_runner.py +270 -0
  7. kiln_ai/adapters/eval/g_eval.py +368 -0
  8. kiln_ai/adapters/eval/registry.py +16 -0
  9. kiln_ai/adapters/eval/test_base_eval.py +325 -0
  10. kiln_ai/adapters/eval/test_eval_runner.py +641 -0
  11. kiln_ai/adapters/eval/test_g_eval.py +498 -0
  12. kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
  13. kiln_ai/adapters/fine_tune/base_finetune.py +16 -2
  14. kiln_ai/adapters/fine_tune/finetune_registry.py +2 -0
  15. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
  16. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
  17. kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
  18. kiln_ai/adapters/fine_tune/test_together_finetune.py +531 -0
  19. kiln_ai/adapters/fine_tune/together_finetune.py +325 -0
  20. kiln_ai/adapters/ml_model_list.py +758 -163
  21. kiln_ai/adapters/model_adapters/__init__.py +2 -4
  22. kiln_ai/adapters/model_adapters/base_adapter.py +61 -43
  23. kiln_ai/adapters/model_adapters/litellm_adapter.py +391 -0
  24. kiln_ai/adapters/model_adapters/litellm_config.py +13 -0
  25. kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
  26. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -0
  27. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
  28. kiln_ai/adapters/model_adapters/test_structured_output.py +59 -35
  29. kiln_ai/adapters/ollama_tools.py +3 -3
  30. kiln_ai/adapters/parsers/r1_parser.py +19 -14
  31. kiln_ai/adapters/parsers/test_r1_parser.py +17 -5
  32. kiln_ai/adapters/prompt_builders.py +80 -42
  33. kiln_ai/adapters/provider_tools.py +50 -58
  34. kiln_ai/adapters/repair/repair_task.py +9 -21
  35. kiln_ai/adapters/repair/test_repair_task.py +6 -6
  36. kiln_ai/adapters/run_output.py +3 -0
  37. kiln_ai/adapters/test_adapter_registry.py +26 -29
  38. kiln_ai/adapters/test_generate_docs.py +4 -4
  39. kiln_ai/adapters/test_ollama_tools.py +0 -1
  40. kiln_ai/adapters/test_prompt_adaptors.py +47 -33
  41. kiln_ai/adapters/test_prompt_builders.py +91 -31
  42. kiln_ai/adapters/test_provider_tools.py +26 -81
  43. kiln_ai/datamodel/__init__.py +50 -952
  44. kiln_ai/datamodel/basemodel.py +2 -0
  45. kiln_ai/datamodel/datamodel_enums.py +60 -0
  46. kiln_ai/datamodel/dataset_filters.py +114 -0
  47. kiln_ai/datamodel/dataset_split.py +170 -0
  48. kiln_ai/datamodel/eval.py +298 -0
  49. kiln_ai/datamodel/finetune.py +105 -0
  50. kiln_ai/datamodel/json_schema.py +7 -1
  51. kiln_ai/datamodel/project.py +23 -0
  52. kiln_ai/datamodel/prompt.py +37 -0
  53. kiln_ai/datamodel/prompt_id.py +83 -0
  54. kiln_ai/datamodel/strict_mode.py +24 -0
  55. kiln_ai/datamodel/task.py +181 -0
  56. kiln_ai/datamodel/task_output.py +328 -0
  57. kiln_ai/datamodel/task_run.py +164 -0
  58. kiln_ai/datamodel/test_basemodel.py +19 -11
  59. kiln_ai/datamodel/test_dataset_filters.py +71 -0
  60. kiln_ai/datamodel/test_dataset_split.py +32 -8
  61. kiln_ai/datamodel/test_datasource.py +22 -2
  62. kiln_ai/datamodel/test_eval_model.py +635 -0
  63. kiln_ai/datamodel/test_example_models.py +9 -13
  64. kiln_ai/datamodel/test_json_schema.py +23 -0
  65. kiln_ai/datamodel/test_models.py +2 -2
  66. kiln_ai/datamodel/test_prompt_id.py +129 -0
  67. kiln_ai/datamodel/test_task.py +159 -0
  68. kiln_ai/utils/config.py +43 -1
  69. kiln_ai/utils/dataset_import.py +232 -0
  70. kiln_ai/utils/test_dataset_import.py +596 -0
  71. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/METADATA +86 -6
  72. kiln_ai-0.13.0.dist-info/RECORD +103 -0
  73. kiln_ai/adapters/model_adapters/langchain_adapters.py +0 -302
  74. kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -11
  75. kiln_ai/adapters/model_adapters/openai_model_adapter.py +0 -246
  76. kiln_ai/adapters/model_adapters/test_langchain_adapter.py +0 -350
  77. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +0 -225
  78. kiln_ai-0.11.1.dist-info/RECORD +0 -76
  79. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/WHEEL +0 -0
  80. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -3,24 +3,28 @@ from pydantic import ValidationError
3
3
 
4
4
  # import datamodel first or we get circular import errors
5
5
  from kiln_ai.datamodel import (
6
- AllDatasetFilter,
7
- AllSplitDefinition,
8
- DatasetFilterType,
9
6
  DatasetSplit,
10
7
  DatasetSplitDefinition,
11
8
  DataSource,
12
9
  DataSourceType,
13
- HighRatingDatasetFilter,
14
10
  Task,
15
11
  TaskOutput,
16
12
  TaskOutputRating,
17
13
  TaskOutputRatingType,
18
14
  TaskRun,
19
- ThinkingModelDatasetFilter,
20
- ThinkingModelHighRatedFilter,
15
+ )
16
+ from kiln_ai.datamodel.dataset_split import (
17
+ AllSplitDefinition,
21
18
  Train60Test20Val20SplitDefinition,
22
19
  Train80Test20SplitDefinition,
23
20
  )
21
+ from kiln_ai.datamodel.test_dataset_filters import (
22
+ AllDatasetFilter,
23
+ HighRatingDatasetFilter,
24
+ TagFilter,
25
+ ThinkingModelDatasetFilter,
26
+ ThinkingModelHighRatedFilter,
27
+ )
24
28
 
25
29
 
26
30
  @pytest.fixture
@@ -42,6 +46,7 @@ def sample_task_runs(sample_task):
42
46
  task_runs = []
43
47
  for i in range(10):
44
48
  rating = 5 if i < 6 else 1 # 6 high, 4 low ratings
49
+ tags = ["tag1"] if i < 6 else []
45
50
  task_run = TaskRun(
46
51
  parent=sample_task,
47
52
  input=f"input_{i}",
@@ -59,6 +64,7 @@ def sample_task_runs(sample_task):
59
64
  value=rating, type=TaskOutputRatingType.five_star
60
65
  ),
61
66
  ),
67
+ tags=tags,
62
68
  )
63
69
  task_run.save_to_file()
64
70
  task_runs.append(task_run)
@@ -199,10 +205,10 @@ def test_dataset_split_with_high_rating_filter(sample_task, sample_task_runs):
199
205
  "Split Name",
200
206
  sample_task,
201
207
  Train80Test20SplitDefinition,
202
- filter_type=DatasetFilterType.HIGH_RATING,
208
+ filter_id="high_rating",
203
209
  )
204
210
 
205
- assert dataset.filter == DatasetFilterType.HIGH_RATING
211
+ assert dataset.filter == "high_rating"
206
212
 
207
213
  # Check that only high-rated task runs are included
208
214
  all_ids = []
@@ -329,3 +335,21 @@ def test_thinking_model_dataset_filter_high_rated(
329
335
  )
330
336
 
331
337
  assert ThinkingModelHighRatedFilter(task_run) is expected_result
338
+
339
+
340
+ def test_tag_dataset_filter(sample_task_runs):
341
+ num_tagged = 0
342
+ num_untagged = 0
343
+ filter = TagFilter("tag1")
344
+ for task_run in sample_task_runs:
345
+ if "tag1" in task_run.tags:
346
+ num_tagged += 1
347
+ assert "tag1" in task_run.tags
348
+ assert filter(task_run) is True
349
+ else:
350
+ num_untagged += 1
351
+ assert "tag1" not in task_run.tags
352
+ assert filter(task_run) is False
353
+
354
+ assert num_tagged == 6
355
+ assert num_untagged == 4
@@ -18,22 +18,41 @@ def test_valid_synthetic_data_source():
18
18
  properties={
19
19
  "model_name": "GPT-4",
20
20
  "model_provider": "OpenAI",
21
- "prompt_builder_name": "completion",
21
+ "prompt_id": "simple_prompt_builder",
22
22
  "adapter_name": "langchain",
23
23
  },
24
24
  )
25
25
  assert data_source.type == DataSourceType.synthetic
26
26
  assert data_source.properties["model_name"] == "GPT-4"
27
27
  assert data_source.properties["model_provider"] == "OpenAI"
28
- assert data_source.properties["prompt_builder_name"] == "completion"
28
+ assert data_source.properties["prompt_id"] == "simple_prompt_builder"
29
29
  assert data_source.properties["adapter_name"] == "langchain"
30
30
 
31
31
 
32
+ def test_valid_file_import_data_source():
33
+ data_source = DataSource(
34
+ type=DataSourceType.file_import,
35
+ properties={"file_name": "test.txt"},
36
+ )
37
+ assert data_source.type == DataSourceType.file_import
38
+ assert data_source.properties["file_name"] == "test.txt"
39
+
40
+
32
41
  def test_missing_required_property():
33
42
  with pytest.raises(ValidationError, match="'created_by' is required for"):
34
43
  DataSource(type=DataSourceType.human)
35
44
 
36
45
 
46
+ def test_missing_required_property_file_import():
47
+ with pytest.raises(ValidationError, match="'file_name' is required for"):
48
+ DataSource(type=DataSourceType.file_import)
49
+
50
+
51
+ def test_not_allowed_property_file_import():
52
+ with pytest.raises(ValidationError, match="'model_name' is not allowed for"):
53
+ DataSource(type=DataSourceType.file_import, properties={"model_name": "GPT-4"})
54
+
55
+
37
56
  def test_wrong_property_type():
38
57
  with pytest.raises(
39
58
  ValidationError,
@@ -85,6 +104,7 @@ def test_prompt_type_optional_for_synthetic():
85
104
  },
86
105
  )
87
106
  assert "prompt_builder_name" not in data_source.properties
107
+ assert "prompt_id" not in data_source.properties
88
108
 
89
109
 
90
110
  def test_private_data_source_properties_not_serialized():