kiln-ai 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (63) hide show
  1. kiln_ai/adapters/adapter_registry.py +12 -13
  2. kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
  3. kiln_ai/adapters/eval/base_eval.py +164 -0
  4. kiln_ai/adapters/eval/eval_runner.py +267 -0
  5. kiln_ai/adapters/eval/g_eval.py +367 -0
  6. kiln_ai/adapters/eval/registry.py +16 -0
  7. kiln_ai/adapters/eval/test_base_eval.py +324 -0
  8. kiln_ai/adapters/eval/test_eval_runner.py +640 -0
  9. kiln_ai/adapters/eval/test_g_eval.py +497 -0
  10. kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
  11. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
  12. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
  13. kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
  14. kiln_ai/adapters/ml_model_list.py +141 -29
  15. kiln_ai/adapters/model_adapters/base_adapter.py +50 -35
  16. kiln_ai/adapters/model_adapters/langchain_adapters.py +27 -20
  17. kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -1
  18. kiln_ai/adapters/model_adapters/openai_model_adapter.py +93 -50
  19. kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
  20. kiln_ai/adapters/model_adapters/test_langchain_adapter.py +7 -14
  21. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +55 -64
  22. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
  23. kiln_ai/adapters/model_adapters/test_structured_output.py +36 -30
  24. kiln_ai/adapters/ollama_tools.py +0 -1
  25. kiln_ai/adapters/prompt_builders.py +80 -42
  26. kiln_ai/adapters/repair/repair_task.py +9 -21
  27. kiln_ai/adapters/repair/test_repair_task.py +3 -3
  28. kiln_ai/adapters/run_output.py +3 -0
  29. kiln_ai/adapters/test_adapter_registry.py +10 -10
  30. kiln_ai/adapters/test_generate_docs.py +6 -6
  31. kiln_ai/adapters/test_ollama_tools.py +0 -1
  32. kiln_ai/adapters/test_prompt_adaptors.py +17 -14
  33. kiln_ai/adapters/test_prompt_builders.py +91 -31
  34. kiln_ai/datamodel/__init__.py +50 -952
  35. kiln_ai/datamodel/datamodel_enums.py +58 -0
  36. kiln_ai/datamodel/dataset_filters.py +114 -0
  37. kiln_ai/datamodel/dataset_split.py +170 -0
  38. kiln_ai/datamodel/eval.py +298 -0
  39. kiln_ai/datamodel/finetune.py +105 -0
  40. kiln_ai/datamodel/json_schema.py +6 -0
  41. kiln_ai/datamodel/project.py +23 -0
  42. kiln_ai/datamodel/prompt.py +37 -0
  43. kiln_ai/datamodel/prompt_id.py +83 -0
  44. kiln_ai/datamodel/strict_mode.py +24 -0
  45. kiln_ai/datamodel/task.py +181 -0
  46. kiln_ai/datamodel/task_output.py +321 -0
  47. kiln_ai/datamodel/task_run.py +164 -0
  48. kiln_ai/datamodel/test_basemodel.py +10 -11
  49. kiln_ai/datamodel/test_dataset_filters.py +71 -0
  50. kiln_ai/datamodel/test_dataset_split.py +32 -8
  51. kiln_ai/datamodel/test_datasource.py +3 -2
  52. kiln_ai/datamodel/test_eval_model.py +635 -0
  53. kiln_ai/datamodel/test_example_models.py +9 -13
  54. kiln_ai/datamodel/test_json_schema.py +23 -0
  55. kiln_ai/datamodel/test_models.py +2 -2
  56. kiln_ai/datamodel/test_prompt_id.py +129 -0
  57. kiln_ai/datamodel/test_task.py +159 -0
  58. kiln_ai/utils/config.py +6 -1
  59. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/METADATA +37 -1
  60. kiln_ai-0.12.0.dist-info/RECORD +100 -0
  61. kiln_ai-0.11.1.dist-info/RECORD +0 -76
  62. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/WHEEL +0 -0
  63. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -11,7 +11,6 @@ from kiln_ai.datamodel import (
11
11
  Finetune,
12
12
  Project,
13
13
  Task,
14
- TaskDeterminism,
15
14
  TaskOutput,
16
15
  TaskOutputRating,
17
16
  TaskOutputRatingType,
@@ -125,7 +124,6 @@ def test_structured_output_workflow(tmp_path):
125
124
  name="Structured Output Task",
126
125
  parent=project,
127
126
  instruction="Generate a JSON object with name and age",
128
- determinism=TaskDeterminism.semantic_match,
129
127
  output_json_schema=json.dumps(
130
128
  {
131
129
  "type": "object",
@@ -142,7 +140,7 @@ def test_structured_output_workflow(tmp_path):
142
140
 
143
141
  # Create runs
144
142
  runs = []
145
- for source in DataSourceType:
143
+ for source in [DataSourceType.human, DataSourceType.synthetic]:
146
144
  for _ in range(2):
147
145
  task_run = TaskRun(
148
146
  input="Generate info for John Doe",
@@ -157,7 +155,7 @@ def test_structured_output_workflow(tmp_path):
157
155
  "adapter_name": "TestAdapter",
158
156
  "model_name": "GPT-4",
159
157
  "model_provider": "OpenAI",
160
- "prompt_builder_name": "TestPromptBuilder",
158
+ "prompt_id": "simple_prompt_builder",
161
159
  },
162
160
  ),
163
161
  parent=task,
@@ -216,9 +214,9 @@ def test_structured_output_workflow(tmp_path):
216
214
 
217
215
  assert loaded_task.name == "Structured Output Task"
218
216
  assert len(loaded_task.requirements) == 2
219
- assert len(loaded_task.runs()) == 5
220
-
221
217
  loaded_runs = loaded_task.runs()
218
+ assert len(loaded_runs) == 5
219
+
222
220
  for task_run in loaded_runs:
223
221
  output = task_run.output
224
222
  assert output.rating is not None
@@ -472,7 +470,7 @@ def test_valid_synthetic_task_output():
472
470
  "adapter_name": "TestAdapter",
473
471
  "model_name": "GPT-4",
474
472
  "model_provider": "OpenAI",
475
- "prompt_builder_name": "TestPromptBuilder",
473
+ "prompt_id": "simple_prompt_builder",
476
474
  },
477
475
  ),
478
476
  )
@@ -480,7 +478,7 @@ def test_valid_synthetic_task_output():
480
478
  assert output.source.properties["adapter_name"] == "TestAdapter"
481
479
  assert output.source.properties["model_name"] == "GPT-4"
482
480
  assert output.source.properties["model_provider"] == "OpenAI"
483
- assert output.source.properties["prompt_builder_name"] == "TestPromptBuilder"
481
+ assert output.source.properties["prompt_id"] == "simple_prompt_builder"
484
482
 
485
483
 
486
484
  def test_invalid_synthetic_task_output_missing_keys():
@@ -509,23 +507,21 @@ def test_invalid_synthetic_task_output_empty_values():
509
507
  "adapter_name": "TestAdapter",
510
508
  "model_name": "",
511
509
  "model_provider": "OpenAI",
512
- "prompt_builder_name": "TestPromptBuilder",
510
+ "prompt_id": "simple_prompt_builder",
513
511
  },
514
512
  ),
515
513
  )
516
514
 
517
515
 
518
516
  def test_invalid_synthetic_task_output_non_string_values():
519
- with pytest.raises(
520
- ValidationError, match="'prompt_builder_name' must be of type str"
521
- ):
517
+ with pytest.raises(ValidationError, match="'prompt_id' must be of type str"):
522
518
  DataSource(
523
519
  type=DataSourceType.synthetic,
524
520
  properties={
525
521
  "adapter_name": "TestAdapter",
526
522
  "model_name": "GPT-4",
527
523
  "model_provider": "OpenAI",
528
- "prompt_builder_name": 123,
524
+ "prompt_id": 123,
529
525
  },
530
526
  )
531
527
 
@@ -4,6 +4,7 @@ from pydantic import BaseModel
4
4
  from kiln_ai.datamodel.json_schema import (
5
5
  JsonObjectSchema,
6
6
  schema_from_json_str,
7
+ string_to_json_key,
7
8
  validate_schema,
8
9
  )
9
10
 
@@ -123,3 +124,25 @@ def test_triangle_schema():
123
124
  validate_schema({"a": 1, "b": 2, "c": 3}, json_triangle_schema)
124
125
  with pytest.raises(Exception):
125
126
  validate_schema({"a": 1, "b": 2, "c": "3"}, json_triangle_schema)
127
+
128
+
129
+ @pytest.mark.parametrize(
130
+ "input_str,expected",
131
+ [
132
+ ("hello world", "hello_world"),
133
+ ("Hello World", "hello_world"),
134
+ ("hello_world", "hello_world"),
135
+ ("HELLO WORLD", "hello_world"),
136
+ ("hello123", "hello123"),
137
+ ("hello-world", "helloworld"),
138
+ ("hello!@#$%^&*()world", "helloworld"),
139
+ (" hello world ", "hello__world"),
140
+ ("hello__world", "hello__world"),
141
+ ("", ""),
142
+ ("!@#$%", ""),
143
+ ("snake_case_string", "snake_case_string"),
144
+ ("camelCaseString", "camelcasestring"),
145
+ ],
146
+ )
147
+ def test_string_to_json_key(input_str: str, expected: str):
148
+ assert string_to_json_key(input_str) == expected
@@ -385,7 +385,7 @@ def test_task_run_input_source_validation(tmp_path):
385
385
  assert task_run.input_source is not None
386
386
 
387
387
  # Test 3: Creating without input_source should fail when strict mode is on
388
- with patch("kiln_ai.datamodel.strict_mode", return_value=True):
388
+ with patch("kiln_ai.datamodel.task_run.strict_mode", return_value=True):
389
389
  with pytest.raises(ValueError) as exc_info:
390
390
  task_run = TaskRun(
391
391
  input="test input 3",
@@ -442,7 +442,7 @@ def test_task_output_source_validation(tmp_path):
442
442
  assert task_output.source is not None
443
443
 
444
444
  # Test 3: Creating without source should fail when strict mode is on
445
- with patch("kiln_ai.datamodel.strict_mode", return_value=True):
445
+ with patch("kiln_ai.datamodel.task_output.strict_mode", return_value=True):
446
446
  with pytest.raises(ValueError) as exc_info:
447
447
  task_output = TaskOutput(
448
448
  output="test output 3",
@@ -0,0 +1,129 @@
1
+ import pytest
2
+ from pydantic import BaseModel, ValidationError
3
+
4
+ from kiln_ai.datamodel import (
5
+ PromptGenerators,
6
+ PromptId,
7
+ )
8
+ from kiln_ai.datamodel.prompt_id import is_frozen_prompt
9
+
10
+
11
+ # Test model to validate the PromptId type
12
+ class ModelTester(BaseModel):
13
+ prompt_id: PromptId
14
+
15
+
16
+ def test_valid_prompt_generator_names():
17
+ """Test that valid prompt generator names are accepted"""
18
+ for generator in PromptGenerators:
19
+ model = ModelTester(prompt_id=generator.value)
20
+ assert model.prompt_id == generator.value
21
+
22
+
23
+ def test_valid_saved_prompt_id():
24
+ """Test that valid saved prompt IDs are accepted"""
25
+ valid_id = "id::prompt_789"
26
+ model = ModelTester(prompt_id=valid_id)
27
+ assert model.prompt_id == valid_id
28
+
29
+
30
+ def test_valid_fine_tune_prompt_id():
31
+ """Test that valid fine-tune prompt IDs are accepted"""
32
+ valid_id = "fine_tune_prompt::ft_123456"
33
+ model = ModelTester(prompt_id=valid_id)
34
+ assert model.prompt_id == valid_id
35
+
36
+
37
+ @pytest.mark.parametrize(
38
+ "invalid_id",
39
+ [
40
+ pytest.param("id::project_123::task_456", id="missing_prompt_id"),
41
+ pytest.param("id::task_456::prompt_789", id="too_many_parts"),
42
+ pytest.param("id::", id="empty_parts"),
43
+ ],
44
+ )
45
+ def test_invalid_saved_prompt_id_format(invalid_id):
46
+ """Test that invalid saved prompt ID formats are rejected"""
47
+ with pytest.raises(ValidationError, match="Invalid saved prompt ID"):
48
+ ModelTester(prompt_id=invalid_id)
49
+
50
+
51
+ @pytest.mark.parametrize(
52
+ "invalid_id,expected_error",
53
+ [
54
+ ("fine_tune_prompt::", "Invalid fine-tune prompt ID: fine_tune_prompt::"),
55
+ ("fine_tune_prompt", "Invalid prompt ID: fine_tune_prompt"),
56
+ ],
57
+ )
58
+ def test_invalid_fine_tune_prompt_id_format(invalid_id, expected_error):
59
+ """Test that invalid fine-tune prompt ID formats are rejected"""
60
+ with pytest.raises(ValidationError, match=expected_error):
61
+ ModelTester(prompt_id=invalid_id)
62
+
63
+
64
+ def test_completely_invalid_formats():
65
+ """Test that completely invalid formats are rejected"""
66
+ invalid_ids = [
67
+ "", # Empty string
68
+ "invalid_format", # Random string
69
+ "id:wrong_format", # Almost correct but wrong separator
70
+ "fine_tune:wrong_format", # Almost correct but wrong prefix
71
+ ":::", # Just separators
72
+ ]
73
+
74
+ for invalid_id in invalid_ids:
75
+ with pytest.raises(ValidationError, match="Invalid prompt ID"):
76
+ ModelTester(prompt_id=invalid_id)
77
+
78
+
79
+ def test_prompt_generator_case_sensitivity():
80
+ """Test that prompt generator names are case sensitive"""
81
+ # Take first generator and modify its case
82
+ first_generator = next(iter(PromptGenerators)).value
83
+ wrong_case = first_generator.upper()
84
+ if wrong_case == first_generator:
85
+ wrong_case = first_generator.lower()
86
+
87
+ with pytest.raises(ValidationError):
88
+ ModelTester(prompt_id=wrong_case)
89
+
90
+
91
+ @pytest.mark.parametrize(
92
+ "valid_id",
93
+ [
94
+ "task_run_config::project_123::task_456::config_123", # Valid task run config prompt ID
95
+ ],
96
+ )
97
+ def test_valid_task_run_config_prompt_id(valid_id):
98
+ """Test that valid eval prompt IDs are accepted"""
99
+ model = ModelTester(prompt_id=valid_id)
100
+ assert model.prompt_id == valid_id
101
+
102
+
103
+ @pytest.mark.parametrize(
104
+ "invalid_id,expected_error",
105
+ [
106
+ ("task_run_config::", "Invalid task run config prompt ID"),
107
+ ("task_run_config::p1", "Invalid task run config prompt ID"),
108
+ ("task_run_config::p1::t1", "Invalid task run config prompt ID"),
109
+ ("task_run_config::p1::t1::c1::extra", "Invalid task run config prompt ID"),
110
+ ],
111
+ )
112
+ def test_invalid_eval_prompt_id_format(invalid_id, expected_error):
113
+ """Test that invalid eval prompt ID formats are rejected"""
114
+ with pytest.raises(ValidationError, match=expected_error):
115
+ ModelTester(prompt_id=invalid_id)
116
+
117
+
118
+ @pytest.mark.parametrize(
119
+ "id,should_be_frozen",
120
+ [
121
+ ("simple_prompt_builder", False),
122
+ ("id::prompt_123", True),
123
+ ("task_run_config::p1::t1", True),
124
+ ("fine_tune_prompt::ft_123", True),
125
+ ],
126
+ )
127
+ def test_is_frozen_prompt(id, should_be_frozen):
128
+ """Test that the is_frozen_prompt function works"""
129
+ assert is_frozen_prompt(id) == should_be_frozen
@@ -0,0 +1,159 @@
1
+ import pytest
2
+ from pydantic import ValidationError
3
+
4
+ from kiln_ai.datamodel.datamodel_enums import TaskOutputRatingType
5
+ from kiln_ai.datamodel.prompt_id import PromptGenerators
6
+ from kiln_ai.datamodel.task import RunConfig, RunConfigProperties, Task, TaskRunConfig
7
+ from kiln_ai.datamodel.task_output import normalize_rating
8
+
9
+
10
+ def test_runconfig_valid_creation():
11
+ task = Task(id="task1", name="Test Task", instruction="Do something")
12
+
13
+ config = RunConfig(
14
+ task=task,
15
+ model_name="gpt-4",
16
+ model_provider_name="openai",
17
+ prompt_id=PromptGenerators.SIMPLE,
18
+ )
19
+
20
+ assert config.task == task
21
+ assert config.model_name == "gpt-4"
22
+ assert config.model_provider_name == "openai"
23
+ assert config.prompt_id == PromptGenerators.SIMPLE # Check default value
24
+
25
+
26
+ def test_runconfig_missing_required_fields():
27
+ with pytest.raises(ValidationError) as exc_info:
28
+ RunConfig()
29
+
30
+ errors = exc_info.value.errors()
31
+ assert (
32
+ len(errors) == 4
33
+ ) # task, model_name, model_provider_name, and prompt_id are required
34
+ assert any(error["loc"][0] == "task" for error in errors)
35
+ assert any(error["loc"][0] == "model_name" for error in errors)
36
+ assert any(error["loc"][0] == "model_provider_name" for error in errors)
37
+ assert any(error["loc"][0] == "prompt_id" for error in errors)
38
+
39
+
40
+ def test_runconfig_custom_prompt_id():
41
+ task = Task(id="task1", name="Test Task", instruction="Do something")
42
+
43
+ config = RunConfig(
44
+ task=task,
45
+ model_name="gpt-4",
46
+ model_provider_name="openai",
47
+ prompt_id=PromptGenerators.SIMPLE_CHAIN_OF_THOUGHT,
48
+ )
49
+
50
+ assert config.prompt_id == PromptGenerators.SIMPLE_CHAIN_OF_THOUGHT
51
+
52
+
53
+ @pytest.fixture
54
+ def sample_task():
55
+ return Task(name="Test Task", instruction="Test instruction")
56
+
57
+
58
+ @pytest.fixture
59
+ def sample_run_config_props(sample_task):
60
+ return RunConfigProperties(
61
+ model_name="gpt-4",
62
+ model_provider_name="openai",
63
+ prompt_id=PromptGenerators.SIMPLE,
64
+ )
65
+
66
+
67
+ def test_task_run_config_valid_creation(sample_task, sample_run_config_props):
68
+ config = TaskRunConfig(
69
+ name="Test Config",
70
+ description="Test description",
71
+ run_config_properties=sample_run_config_props,
72
+ parent=sample_task,
73
+ )
74
+
75
+ assert config.name == "Test Config"
76
+ assert config.description == "Test description"
77
+ assert config.run_config_properties == sample_run_config_props
78
+ assert config.parent_task() == sample_task
79
+
80
+
81
+ def test_task_run_config_minimal_creation(sample_task, sample_run_config_props):
82
+ # Test creation with only required fields
83
+ config = TaskRunConfig(
84
+ name="Test Config",
85
+ run_config_properties=sample_run_config_props,
86
+ parent=sample_task,
87
+ )
88
+
89
+ assert config.name == "Test Config"
90
+ assert config.description is None
91
+ assert config.run_config_properties == sample_run_config_props
92
+
93
+
94
+ def test_task_run_config_missing_required_fields(sample_task):
95
+ # Test missing name
96
+ with pytest.raises(ValidationError) as exc_info:
97
+ TaskRunConfig(
98
+ run_config_properties=RunConfigProperties(
99
+ task=sample_task, model_name="gpt-4", model_provider_name="openai"
100
+ ),
101
+ parent=sample_task,
102
+ )
103
+ assert "Field required" in str(exc_info.value)
104
+
105
+ # Test missing run_config
106
+ with pytest.raises(ValidationError) as exc_info:
107
+ TaskRunConfig(name="Test Config", parent=sample_task)
108
+ assert "Field required" in str(exc_info.value)
109
+
110
+
111
+ def test_task_run_config_missing_task_in_run_config(sample_task):
112
+ with pytest.raises(
113
+ ValidationError, match="Input should be a valid dictionary or instance of Task"
114
+ ):
115
+ # Create a run config without a task
116
+ RunConfig(
117
+ model_name="gpt-4",
118
+ model_provider_name="openai",
119
+ task=None, # type: ignore
120
+ )
121
+
122
+
123
+ @pytest.mark.parametrize(
124
+ "rating_type,rating,expected",
125
+ [
126
+ (TaskOutputRatingType.five_star, 1, 0),
127
+ (TaskOutputRatingType.five_star, 2, 0.25),
128
+ (TaskOutputRatingType.five_star, 3, 0.5),
129
+ (TaskOutputRatingType.five_star, 4, 0.75),
130
+ (TaskOutputRatingType.five_star, 5, 1),
131
+ (TaskOutputRatingType.pass_fail, 0, 0),
132
+ (TaskOutputRatingType.pass_fail, 1, 1),
133
+ (TaskOutputRatingType.pass_fail, 0.5, 0.5),
134
+ (TaskOutputRatingType.pass_fail_critical, -1, 0),
135
+ (TaskOutputRatingType.pass_fail_critical, 0, 0.5),
136
+ (TaskOutputRatingType.pass_fail_critical, 1, 1),
137
+ (TaskOutputRatingType.pass_fail_critical, 0.5, 0.75),
138
+ ],
139
+ )
140
+ def test_normalize_rating(rating_type, rating, expected):
141
+ assert normalize_rating(rating, rating_type) == expected
142
+
143
+
144
+ @pytest.mark.parametrize(
145
+ "rating_type,rating",
146
+ [
147
+ (TaskOutputRatingType.five_star, 0),
148
+ (TaskOutputRatingType.five_star, 6),
149
+ (TaskOutputRatingType.pass_fail, -0.5),
150
+ (TaskOutputRatingType.pass_fail, 1.5),
151
+ (TaskOutputRatingType.pass_fail_critical, -1.5),
152
+ (TaskOutputRatingType.pass_fail_critical, 1.5),
153
+ (TaskOutputRatingType.custom, 0),
154
+ (TaskOutputRatingType.custom, 99),
155
+ ],
156
+ )
157
+ def test_normalize_rating_errors(rating_type, rating):
158
+ with pytest.raises(ValueError):
159
+ normalize_rating(rating, rating_type)
kiln_ai/utils/config.py CHANGED
@@ -142,10 +142,15 @@ class Config:
142
142
  raise AttributeError(f"Config has no attribute '{name}'")
143
143
 
144
144
  @classmethod
145
- def settings_path(cls, create=True):
145
+ def settings_dir(cls, create=True):
146
146
  settings_dir = os.path.join(Path.home(), ".kiln_ai")
147
147
  if create and not os.path.exists(settings_dir):
148
148
  os.makedirs(settings_dir)
149
+ return settings_dir
150
+
151
+ @classmethod
152
+ def settings_path(cls, create=True):
153
+ settings_dir = cls.settings_dir(create)
149
154
  return os.path.join(settings_dir, "settings.yaml")
150
155
 
151
156
  @classmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.11.1
3
+ Version: 0.12.0
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://getkiln.ai
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -71,6 +71,7 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
71
71
  - [Load an Existing Dataset into a Kiln Task Dataset](#load-an-existing-dataset-into-a-kiln-task-dataset)
72
72
  - [Using your Kiln Dataset in a Notebook or Project](#using-your-kiln-dataset-in-a-notebook-or-project)
73
73
  - [Using Kiln Dataset in Pandas](#using-kiln-dataset-in-pandas)
74
+ - [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
74
75
  - [Full API Reference](#full-api-reference)
75
76
 
76
77
  ## Installation
@@ -232,6 +233,41 @@ final_df = pd.concat(dfs, ignore_index=True)
232
233
  print(final_df)
233
234
  ```
234
235
 
236
+ ### Building and Running a Kiln Task from Code
237
+
238
+ ```python
239
+ # Step 1: Create or Load a Task -- choose one of the following 1.A or 1.B
240
+
241
+ # Step 1.A: Optionally load an existing task from disk
242
+ # task = datamodel.Task.load_from_file("path/to/task.kiln")
243
+
244
+ # Step 1.B: Create a new task in code, without saving to disk.
245
+ task = datamodel.Task(
246
+ name="test task",
247
+ instruction="Tell a joke, given a subject.",
248
+ )
249
+ # replace with a valid JSON schema https://json-schema.org for your task (json string, not a python dict).
250
+ # Or delete this line to use plaintext output
251
+ task.output_json_schema = json_joke_schema
252
+
253
+ # Step 2: Create an Adapter to run the task, with a specific model and provider
254
+ adapter = adapter_for_task(task, model_name="llama_3_1_8b", provider="groq")
255
+
256
+ # Step 3: Invoke the Adapter to run the task
257
+ task_input = "cows"
258
+ response = await adapter.invoke(task_input)
259
+ print(f"Output: {response.output.output}")
260
+
261
+ # Step 4 (optional): Load the task from disk and print the results.
262
+ # This will only work if the task was loaded from disk, or you called task.save_to_file() before invoking the adapter (epemerial tasks don't save their result to disk)
263
+ task = datamodel.Task.load_from_file(tmp_path / "test_task.kiln")
264
+ for run in task.runs():
265
+ print(f"Run: {run.id}")
266
+ print(f"Input: {run.input}")
267
+ print(f"Output: {run.output}")
268
+
269
+ ```
270
+
235
271
  ## Full API Reference
236
272
 
237
273
  The library can do a lot more than the examples we've shown here.
@@ -0,0 +1,100 @@
1
+ kiln_ai/__init__.py,sha256=Sc4z8LRVFMwJUoc_DPVUriSXTZ6PO9MaJ80PhRbKyB8,34
2
+ kiln_ai/adapters/__init__.py,sha256=4qEnFkkRSHPKDU7AvYNkqXECjZO_K7PzDCK3HbsY7o4,902
3
+ kiln_ai/adapters/adapter_registry.py,sha256=o7JhzL627W3WMvpztsI_D0pqLPXP-IgIf3e-o7DAVxE,3720
4
+ kiln_ai/adapters/ml_model_list.py,sha256=IzxswO2zORd7bsovswvRpZMwIs1BZcMtPcnhLZ7xzkk,40969
5
+ kiln_ai/adapters/ollama_tools.py,sha256=ZkiGCaocKTMsb1JoySupv9a1OQuE72CCuKsNpyp6jNU,3551
6
+ kiln_ai/adapters/prompt_builders.py,sha256=LYHTIaisQMBFtWDRIGo1QJgOsmQ-NBpQ8fI4eImHxaQ,15269
7
+ kiln_ai/adapters/provider_tools.py,sha256=CGNLW0xhFyj93HFwznCoDrrbyQbQAaS2mJuOKaMB6gU,14435
8
+ kiln_ai/adapters/run_output.py,sha256=_WVNqJ9cQehgEJR3Jy5_Pp29QnurpmEf-S6UU6WAegE,271
9
+ kiln_ai/adapters/test_adapter_registry.py,sha256=opowxLBWm0lZTPL9S4qEaOS8HB82dTpZeNuhpE_cNHU,6379
10
+ kiln_ai/adapters/test_generate_docs.py,sha256=RQ5flkg4fbosj_fB3RiRZyXSqD_UtOHNBYJsne6UkzU,2782
11
+ kiln_ai/adapters/test_ollama_tools.py,sha256=xAUzL0IVmmXadVehJu1WjqbhpKEYGAgGt3pWx7hrubc,2514
12
+ kiln_ai/adapters/test_prompt_adaptors.py,sha256=CKyToDKS4v-tQO9pq4tiq_ypnY4ePhY9yrRAtG-58p0,7516
13
+ kiln_ai/adapters/test_prompt_builders.py,sha256=5Xvfr-oQg_LLrle6UqfpRHWcPUYa8ywG3aL1rM7q1Jw,22054
14
+ kiln_ai/adapters/test_provider_tools.py,sha256=DtnC6oFuiBvvbhD-kdCcWzEYqXZfMBM_DexuQdyAVR8,28664
15
+ kiln_ai/adapters/data_gen/__init__.py,sha256=QTZWaf7kq5BorhPvexJfwDEKmjRmIbhwW9ei8LW2SIs,276
16
+ kiln_ai/adapters/data_gen/data_gen_prompts.py,sha256=kudjHnAz7L3q0k_NLyTlaIV7M0uRFrxXNcfcnjOE2uc,5810
17
+ kiln_ai/adapters/data_gen/data_gen_task.py,sha256=0PuYCcj09BtpgNj23mKj_L45mKZBdV5VreUeZ-Tj_xM,6642
18
+ kiln_ai/adapters/data_gen/test_data_gen_task.py,sha256=cRKUKMvC0uVompbmPTKwbnQ_N3c0cQDm4J_9H4Y5U18,10129
19
+ kiln_ai/adapters/eval/base_eval.py,sha256=jVXMiVBC07ZnLEuZVAjUAYewsnuV99put39n_GZcG1M,7261
20
+ kiln_ai/adapters/eval/eval_runner.py,sha256=A8GhVEt4J_p2-EZlN592blVxY0anKiMiLgac155pcfQ,10688
21
+ kiln_ai/adapters/eval/g_eval.py,sha256=VXYZi-5WG8Go4E2shaOL4D3V2cL_c2zmEno2N1b2WPM,14295
22
+ kiln_ai/adapters/eval/registry.py,sha256=gZ_s0VgEx79Fswkgi1tS4yOl7lzpkvUBJZ62RldhM_w,626
23
+ kiln_ai/adapters/eval/test_base_eval.py,sha256=AGq09bziZm9zh_37PP59AzpkWW7hQU1o7vHSiDsObhY,10723
24
+ kiln_ai/adapters/eval/test_eval_runner.py,sha256=58jRE_2qHZmsDlMj41DrbgE8w9PKC9wwAT-vbi_R4Ec,18567
25
+ kiln_ai/adapters/eval/test_g_eval.py,sha256=wcR56b3vSKn88JeCCQz92p8TMgZlgSP1ged_XlFrYlg,16162
26
+ kiln_ai/adapters/eval/test_g_eval_data.py,sha256=8caiZfLWnXVX8alrBPrH7L7gqqSS9vO7u6PzcHurQcA,27769
27
+ kiln_ai/adapters/fine_tune/__init__.py,sha256=DxdTR60chwgck1aEoVYWyfWi6Ed2ZkdJj0lar-SEAj4,257
28
+ kiln_ai/adapters/fine_tune/base_finetune.py,sha256=n3mfE_3bhhzmN_MQxO5qNezN-qpl4WFamZ3ih41dx4o,6069
29
+ kiln_ai/adapters/fine_tune/dataset_formatter.py,sha256=qRhSSkMhTWn13OMb6LKPVwAU7uY4bB49GDiVSuhDkNg,14449
30
+ kiln_ai/adapters/fine_tune/finetune_registry.py,sha256=H1B-opCTlIyd9JlIFTKsY_ctxUX9ziEc49_gnmg1SZg,483
31
+ kiln_ai/adapters/fine_tune/fireworks_finetune.py,sha256=6IfTDn_8tg6PR0OFudRx6V7Wjvf4P7t0fm_xyRwII68,13978
32
+ kiln_ai/adapters/fine_tune/openai_finetune.py,sha256=Dz9E_0BWfrIkvv8ArZe-RKPwbIKPZ3v8rfbc3JELyTY,8571
33
+ kiln_ai/adapters/fine_tune/test_base_finetune.py,sha256=0zWxFYrDGVuoQNQmi9vVUEkBc4mstfHnsUjQmiJA-sE,10864
34
+ kiln_ai/adapters/fine_tune/test_dataset_formatter.py,sha256=T3jbFZooLVBaGCE0LUVxwPxzM3l8IY41zUj3jPk-Zi8,24027
35
+ kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py,sha256=e88z5-KtU9Y7frXqCVy6r6iE6S-tInn5oMOqmSnbR2I,18144
36
+ kiln_ai/adapters/fine_tune/test_openai_finetune.py,sha256=H63Xk2PNHbt5Ev5IQpdR9JZ4uz-Huo2gfuC4mHHqe0w,20011
37
+ kiln_ai/adapters/model_adapters/__init__.py,sha256=FyNq-twr1zQR55Hd_sDlTcVZ8JsZ9jrIBBURtZNjlss,272
38
+ kiln_ai/adapters/model_adapters/base_adapter.py,sha256=tY67FJlWzYY-Ha1FyBMes3KacklFfTSqvU6-crILQsc,9597
39
+ kiln_ai/adapters/model_adapters/langchain_adapters.py,sha256=LVggQGeg_fIol1uYo375JHmjh7pwDhCACVe1K7lh28Y,12996
40
+ kiln_ai/adapters/model_adapters/openai_compatible_config.py,sha256=oxiUUESM9f5EVMePDSJI7s6YaMmmBkFMCEdxA50mwgw,241
41
+ kiln_ai/adapters/model_adapters/openai_model_adapter.py,sha256=KuCPFuNZ5aZwymLL8k87PNjUVOs9JyR91W7VKtw9yN8,11438
42
+ kiln_ai/adapters/model_adapters/test_base_adapter.py,sha256=uQyKrHLN3Jha6R-6SWkEME6brQecVFdPTSXogo-xpt0,6556
43
+ kiln_ai/adapters/model_adapters/test_langchain_adapter.py,sha256=PArWTKytzUoM9Lc9Q0bEOcvZDNTF2SzfR9ln8sf0Hzg,11884
44
+ kiln_ai/adapters/model_adapters/test_openai_model_adapter.py,sha256=CV6a3Sf3f3szpk1MLBoAjcuL5B5-4n86j8tMlEA-Bhg,6844
45
+ kiln_ai/adapters/model_adapters/test_saving_adapter_results.py,sha256=1XFQQxdSIbqSoQEdxHOYJcY0cMb59qpTDPOmL9bW4B8,7870
46
+ kiln_ai/adapters/model_adapters/test_structured_output.py,sha256=7N5xniBWXDxwb4gvV8k0bbrlTir2kWBE6Q_z2azBJvs,11865
47
+ kiln_ai/adapters/parsers/__init__.py,sha256=TGJS_8JhjUwg5Bnq4cDmwt5eIRo4vowmcL2A72L1Hzk,202
48
+ kiln_ai/adapters/parsers/base_parser.py,sha256=DaoZVEOOuFTMZd5ZTpl_as6-xc9NPWGP2fAmP12J58M,389
49
+ kiln_ai/adapters/parsers/json_parser.py,sha256=IszrBrhIFrrVr76UZsuejkBdqpZG27mU72264HVgVzE,1274
50
+ kiln_ai/adapters/parsers/parser_registry.py,sha256=G9bAZrnWrR0a82JAQHsSqA2o7-CjrZUBANZljY_6ZxE,623
51
+ kiln_ai/adapters/parsers/r1_parser.py,sha256=9nMEWDAbRSTFuu_--0HMVfVg9IYSoUNQHHw9OxETlRw,2558
52
+ kiln_ai/adapters/parsers/test_json_parser.py,sha256=9kdWe_vRC5wjP8A1Ym6Zu6enDIz4ARCNiRpcZr7_3ak,1971
53
+ kiln_ai/adapters/parsers/test_parser_registry.py,sha256=S4MdX7cnhCbmeKq8tZwMwRdGWr-019Z-fw5zey9Wm08,1043
54
+ kiln_ai/adapters/parsers/test_r1_parser.py,sha256=Ys1ICRNVgt54rf8IEKNav5sz9zHYvvcVAUuoSwwftg8,4517
55
+ kiln_ai/adapters/repair/__init__.py,sha256=dOO9MEpEhjiwzDVFg3MNfA2bKMPlax9iekDatpTkX8E,217
56
+ kiln_ai/adapters/repair/repair_task.py,sha256=iW0bHWQq6Tir6ULTATWFS0zpwNji8Tbwhm2lZu52RsM,3342
57
+ kiln_ai/adapters/repair/test_repair_task.py,sha256=iY7h-o-hnB0zwlkX--WuQlCsd5sKbhksS0hIIPCxt7E,7944
58
+ kiln_ai/datamodel/__init__.py,sha256=GbRfDrdSq9d_-HxzmFIicTmLO3qz-O3XGvSwDPh3XCk,1957
59
+ kiln_ai/datamodel/basemodel.py,sha256=TwMBfNFJ7-5bp2QOoTQUl_YVrF0pkDAk5Rdk6EWEXxI,22143
60
+ kiln_ai/datamodel/datamodel_enums.py,sha256=w8aJeuLWdrH6ZCZ0y2-o0IOmXcl7qXpOMHyrHHoJmkA,2040
61
+ kiln_ai/datamodel/dataset_filters.py,sha256=hWKxGJ-mSl4y0igyNcpmRoRYCiGrf0_uN4MMU9Fe_ng,3180
62
+ kiln_ai/datamodel/dataset_split.py,sha256=q4l4SlUvjLV547bzk7Z-fbmj_o26GDcYOZ2rA5RPh3c,5612
63
+ kiln_ai/datamodel/eval.py,sha256=kio2LqQ87MsP75DJTiIVdVfopTZXH4xjGN9g11V1mUU,13826
64
+ kiln_ai/datamodel/finetune.py,sha256=TYoNVRAfbjqvrY-1YmHwG6xSoDljiJWuuVcTbvQAJL4,4569
65
+ kiln_ai/datamodel/json_schema.py,sha256=sjc2LkbWWFhlqX5QOvLeWrovkmoX_tn3iQquxKDA8Pk,2990
66
+ kiln_ai/datamodel/model_cache.py,sha256=9X4aAigbkFdytckgw8InCMh86uBna0ME_1HJSeMPEn0,4495
67
+ kiln_ai/datamodel/project.py,sha256=uVH2_3TDFtsG_tpts81A-zbd9uPDFxAwMCKZt_km3IE,727
68
+ kiln_ai/datamodel/prompt.py,sha256=70JPYHfgyX18cHW_DXoMzIOA28Jbaz6gyabElmpycyc,1161
69
+ kiln_ai/datamodel/prompt_id.py,sha256=eU2TV0RZapn-BgnZ4sOSNOOVEQ3aPaLzW4YSYCd3OBo,2531
70
+ kiln_ai/datamodel/registry.py,sha256=XwGFXJFKZtOpR1Z9ven6SftggfADdZRm8TFxCEVtfUQ,957
71
+ kiln_ai/datamodel/strict_mode.py,sha256=sm4Xka8mnJHCShtbh6MMU5dDQv-cLj8lHgHkmFKpsl0,849
72
+ kiln_ai/datamodel/task.py,sha256=r-_zgrQCIiIkN8gvBISdU449Z9oKp7E1XL0lkik_rVI,7036
73
+ kiln_ai/datamodel/task_output.py,sha256=0h4QvzV-hksE_AGHWsUHbYf5F95Zn7uU7WFbeIbAEck,12507
74
+ kiln_ai/datamodel/task_run.py,sha256=yquE0jyr_9WzcvrMsEmZfXUnn8zZDEZIXZhVcVBMrT8,7038
75
+ kiln_ai/datamodel/test_basemodel.py,sha256=KJLJf0stuQq4ksOtoPM_w1VQrGz2FGdOT6cdrMkib9s,17750
76
+ kiln_ai/datamodel/test_dataset_filters.py,sha256=v88QPkIsq4diUmoUF3-qj5KAW2rLRp0KDAm_pexbFy4,1894
77
+ kiln_ai/datamodel/test_dataset_split.py,sha256=5CHO1Lq4xQBB72tV2SPER7OZODJNvj15qxi_cYBV2Rs,11157
78
+ kiln_ai/datamodel/test_datasource.py,sha256=Pzh1l20__xObgdBaIMNJarG-jwmEsujRFkRmUBLuK0g,3220
79
+ kiln_ai/datamodel/test_eval_model.py,sha256=J7MqwWBgPpeXGqh3IacVUUHdZFJSZ2MgTsUNu-hNOJw,19528
80
+ kiln_ai/datamodel/test_example_models.py,sha256=fpqh0u7zFhWHcRHgtxCjX8RD2oKHYOP_mJJymaUhEZU,20944
81
+ kiln_ai/datamodel/test_json_schema.py,sha256=UgKwAFcdrJTq2byh7Yf-HoSAtiHiGAsNZxfkIvoMxIg,3915
82
+ kiln_ai/datamodel/test_model_cache.py,sha256=Fy-ucYNzS5JEG-8SFY4nVHA8iRbXXxai20f8_oGl97o,8184
83
+ kiln_ai/datamodel/test_model_perf.py,sha256=NdD7L8XraGkunaEKGPsfYwdcbIgdjhFanOO3G6hU158,3235
84
+ kiln_ai/datamodel/test_models.py,sha256=hmV7sTbOamWJCwOY96w-g4PQRv4Uai-XaHtg0QKH-ak,19295
85
+ kiln_ai/datamodel/test_nested_save.py,sha256=xciCddqvPyKyoyjC5Lx_3Kh1t4LJv1xYRAPazR3SRcs,5588
86
+ kiln_ai/datamodel/test_output_rating.py,sha256=zvPIp2shAgCs2RQBgwYoL09fRA3krHvgAqUa91RlWR0,15125
87
+ kiln_ai/datamodel/test_prompt_id.py,sha256=ihyXVPQi0dSLGnBM7rTXRnVaiWXhh7HJmSy4nZZKmso,4225
88
+ kiln_ai/datamodel/test_registry.py,sha256=PhS4anLi5Bf_023obuTlO5DALhtPB8WIc_bX12Yg6Po,2705
89
+ kiln_ai/datamodel/test_task.py,sha256=FYyoEqJXQIy8rcBsLTdki4-1z9COnZQk1-aoS3ZoNuU,5307
90
+ kiln_ai/utils/__init__.py,sha256=PTD0MwBCKAMIOGsTAwsFaJOusTJJoRFTfOGqRvCaU-E,142
91
+ kiln_ai/utils/config.py,sha256=9navMS2ooSviz74Bq8raf5-01DFfDf9SSVfpasIIPlo,6993
92
+ kiln_ai/utils/exhaustive_error.py,sha256=TkkRixIAR3CPEKHeAJzyv0mtxp6BxUBKMvobA3vzQug,262
93
+ kiln_ai/utils/formatting.py,sha256=VtB9oag0lOGv17dwT7OPX_3HzBfaU9GsLH-iLete0yM,97
94
+ kiln_ai/utils/name_generator.py,sha256=v26TgpCwQbhQFcZvzgjZvURinjrOyyFhxpsI6NQrHKc,1914
95
+ kiln_ai/utils/test_config.py,sha256=Jw3nMFeIgZUsZDRJJY2HpB-2EkR2NoZ-rDe_o9oA7ws,9174
96
+ kiln_ai/utils/test_name_geneator.py,sha256=9-hSTBshyakqlPbFnNcggwLrL7lcPTitauBYHg9jFWI,1513
97
+ kiln_ai-0.12.0.dist-info/METADATA,sha256=CnOHwkyknO0XqFIRL65XVEQl_375cHnvMneIy8J3PWE,10656
98
+ kiln_ai-0.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
+ kiln_ai-0.12.0.dist-info/licenses/LICENSE.txt,sha256=_NA5pnTYgRRr4qH6lE3X-TuZJ8iRcMUi5ASoGr-lEx8,1209
100
+ kiln_ai-0.12.0.dist-info/RECORD,,