kiln-ai 0.0.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (33) hide show
  1. kiln_ai/adapters/base_adapter.py +168 -0
  2. kiln_ai/adapters/langchain_adapters.py +113 -0
  3. kiln_ai/adapters/ml_model_list.py +436 -0
  4. kiln_ai/adapters/prompt_builders.py +122 -0
  5. kiln_ai/adapters/repair/repair_task.py +71 -0
  6. kiln_ai/adapters/repair/test_repair_task.py +248 -0
  7. kiln_ai/adapters/test_langchain_adapter.py +50 -0
  8. kiln_ai/adapters/test_ml_model_list.py +99 -0
  9. kiln_ai/adapters/test_prompt_adaptors.py +167 -0
  10. kiln_ai/adapters/test_prompt_builders.py +315 -0
  11. kiln_ai/adapters/test_saving_adapter_results.py +168 -0
  12. kiln_ai/adapters/test_structured_output.py +218 -0
  13. kiln_ai/datamodel/__init__.py +362 -2
  14. kiln_ai/datamodel/basemodel.py +372 -0
  15. kiln_ai/datamodel/json_schema.py +45 -0
  16. kiln_ai/datamodel/test_basemodel.py +277 -0
  17. kiln_ai/datamodel/test_datasource.py +107 -0
  18. kiln_ai/datamodel/test_example_models.py +644 -0
  19. kiln_ai/datamodel/test_json_schema.py +124 -0
  20. kiln_ai/datamodel/test_models.py +190 -0
  21. kiln_ai/datamodel/test_nested_save.py +205 -0
  22. kiln_ai/datamodel/test_output_rating.py +88 -0
  23. kiln_ai/utils/config.py +170 -0
  24. kiln_ai/utils/formatting.py +5 -0
  25. kiln_ai/utils/test_config.py +245 -0
  26. {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.0.dist-info}/METADATA +20 -1
  27. kiln_ai-0.5.0.dist-info/RECORD +29 -0
  28. kiln_ai/__init.__.py +0 -3
  29. kiln_ai/coreadd.py +0 -3
  30. kiln_ai/datamodel/project.py +0 -15
  31. kiln_ai-0.0.4.dist-info/RECORD +0 -8
  32. {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.0.dist-info}/LICENSE.txt +0 -0
  33. {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.0.dist-info}/WHEEL +0 -0
@@ -1,3 +1,363 @@
1
- from .project import KilnProject
1
+ from __future__ import annotations
2
2
 
3
- __all__ = ["KilnProject"]
3
+ import json
4
+ from enum import Enum, IntEnum
5
+ from typing import TYPE_CHECKING, Dict, List, Self, Type, Union
6
+
7
+ import jsonschema
8
+ import jsonschema.exceptions
9
+ from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
10
+ from pydantic import BaseModel, Field, model_validator
11
+
12
+ from .basemodel import (
13
+ ID_FIELD,
14
+ ID_TYPE,
15
+ KilnBaseModel,
16
+ KilnParentedModel,
17
+ KilnParentModel,
18
+ )
19
+ from .json_schema import validate_schema
20
+
21
+ if TYPE_CHECKING:
22
+ from . import Task
23
+
24
+ # Conventions:
25
+ # 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation.
26
+ # 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead.
27
+
28
+ # Filename compatible names
29
+ NAME_REGEX = r"^[A-Za-z0-9 _-]+$"
30
+ NAME_FIELD = Field(min_length=1, max_length=120, pattern=NAME_REGEX)
31
+ SHORT_NAME_FIELD = Field(min_length=1, max_length=20, pattern=NAME_REGEX)
32
+
33
+
34
+ class Priority(IntEnum):
35
+ p0 = 0
36
+ p1 = 1
37
+ p2 = 2
38
+ p3 = 3
39
+
40
+
41
+ # Only one rating type for now, but this allows for extensibility if we want to add more in the future
42
+ class TaskOutputRatingType(str, Enum):
43
+ five_star = "five_star"
44
+ custom = "custom"
45
+
46
+
47
+ class TaskOutputRating(KilnBaseModel):
48
+ """
49
+ A rating for a task output, including an overall rating and ratings for each requirement.
50
+
51
+ Only supports five star ratings for now, but extensible for custom values.
52
+ """
53
+
54
+ type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
55
+ value: float | None = Field(
56
+ description="The overall rating value (typically 1-5 stars).",
57
+ default=None,
58
+ )
59
+ requirement_ratings: Dict[ID_TYPE, float] = Field(
60
+ default={},
61
+ description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).",
62
+ )
63
+
64
+ # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc)
65
+ def is_high_quality(self) -> bool:
66
+ if self.type == TaskOutputRatingType.five_star:
67
+ return self.value is not None and self.value >= 4
68
+ return False
69
+
70
+ @model_validator(mode="after")
71
+ def validate_rating(self) -> Self:
72
+ if self.type not in TaskOutputRatingType:
73
+ raise ValueError(f"Invalid rating type: {self.type}")
74
+
75
+ if self.type == TaskOutputRatingType.five_star:
76
+ if self.value is not None:
77
+ self._validate_five_star(self.value, "overall rating")
78
+ for req_id, req_rating in self.requirement_ratings.items():
79
+ self._validate_five_star(req_rating, f"requirement rating for {req_id}")
80
+
81
+ return self
82
+
83
+ def _validate_five_star(self, rating: float, rating_name: str) -> None:
84
+ if not isinstance(rating, float) or not rating.is_integer():
85
+ raise ValueError(
86
+ f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)"
87
+ )
88
+ if rating < 1 or rating > 5:
89
+ raise ValueError(
90
+ f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars"
91
+ )
92
+
93
+ def validate_requirement_rating_keys(self, task: Task) -> Self:
94
+ if len(self.requirement_ratings) == 0:
95
+ return self
96
+
97
+ valid_requirement_ids = {req.id for req in task.requirements}
98
+ for key in self.requirement_ratings.keys():
99
+ if key not in valid_requirement_ids:
100
+ raise ValueError(
101
+ f"Requirement ID '{key}' is not a valid requirement ID for this task"
102
+ )
103
+ return self
104
+
105
+
106
+ class TaskOutput(KilnBaseModel):
107
+ """
108
+ An output for a specific task run.
109
+ """
110
+
111
+ output: str = Field(
112
+ description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
113
+ )
114
+ source: DataSource = Field(
115
+ description="The source of the output: human or synthetic."
116
+ )
117
+ rating: TaskOutputRating | None = Field(
118
+ default=None, description="The rating of the output"
119
+ )
120
+
121
+ def validate_output_format(self, task: Task) -> Self:
122
+ # validate output
123
+ if task.output_json_schema is not None:
124
+ try:
125
+ validate_schema(json.loads(self.output), task.output_json_schema)
126
+ except json.JSONDecodeError:
127
+ raise ValueError("Output is not a valid JSON object")
128
+ except jsonschema.exceptions.ValidationError as e:
129
+ raise ValueError(f"Output does not match task output schema: {e}")
130
+ return self
131
+
132
+
133
+ class DataSourceType(str, Enum):
134
+ """
135
+ The source of a piece of data.
136
+ """
137
+
138
+ human = "human"
139
+ synthetic = "synthetic"
140
+
141
+
142
+ class DataSourceProperty(BaseModel):
143
+ name: str
144
+ type: Type[Union[str, int, float]]
145
+ required_for: List[DataSourceType] = []
146
+ not_allowed_for: List[DataSourceType] = []
147
+
148
+
149
+ class DataSource(BaseModel):
150
+ type: DataSourceType
151
+ properties: Dict[str, str | int | float] = Field(
152
+ default={},
153
+ description="Properties describing the data source. For synthetic things like model. For human, the human's name.",
154
+ )
155
+
156
+ _data_source_properties = [
157
+ DataSourceProperty(
158
+ name="created_by",
159
+ type=str,
160
+ required_for=[DataSourceType.human],
161
+ not_allowed_for=[DataSourceType.synthetic],
162
+ ),
163
+ DataSourceProperty(
164
+ name="model_name",
165
+ type=str,
166
+ required_for=[DataSourceType.synthetic],
167
+ not_allowed_for=[DataSourceType.human],
168
+ ),
169
+ DataSourceProperty(
170
+ name="model_provider",
171
+ type=str,
172
+ required_for=[DataSourceType.synthetic],
173
+ not_allowed_for=[DataSourceType.human],
174
+ ),
175
+ DataSourceProperty(
176
+ name="adapter_name",
177
+ type=str,
178
+ required_for=[DataSourceType.synthetic],
179
+ not_allowed_for=[DataSourceType.human],
180
+ ),
181
+ DataSourceProperty(
182
+ name="prompt_builder_name",
183
+ type=str,
184
+ not_allowed_for=[DataSourceType.human],
185
+ ),
186
+ ]
187
+
188
+ @model_validator(mode="after")
189
+ def validate_type(self) -> "DataSource":
190
+ if self.type not in DataSourceType:
191
+ raise ValueError(f"Invalid data source type: {self.type}")
192
+ return self
193
+
194
+ @model_validator(mode="after")
195
+ def validate_properties(self) -> "DataSource":
196
+ for prop in self._data_source_properties:
197
+ # Check the property type is correct
198
+ if prop.name in self.properties:
199
+ if not isinstance(self.properties[prop.name], prop.type):
200
+ raise ValueError(
201
+ f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
202
+ )
203
+ # Check the property is required for the data source type
204
+ if self.type in prop.required_for:
205
+ if prop.name not in self.properties:
206
+ raise ValueError(
207
+ f"'{prop.name}' is required for {self.type} data source"
208
+ )
209
+ # Check the property is not allowed for the data source type
210
+ elif self.type in prop.not_allowed_for and prop.name in self.properties:
211
+ raise ValueError(
212
+ f"'{prop.name}' is not allowed for {self.type} data source"
213
+ )
214
+ return self
215
+
216
+ @model_validator(mode="after")
217
+ def validate_no_empty_properties(self) -> Self:
218
+ for prop, value in self.properties.items():
219
+ if isinstance(value, str) and value == "":
220
+ raise ValueError(
221
+ f"Property '{prop}' must be a non-empty string for {self.type} data source"
222
+ )
223
+ return self
224
+
225
+
226
+ class TaskRun(KilnParentedModel):
227
+ """
228
+ An run of a specific Task, including the input and output.
229
+ """
230
+
231
+ input: str = Field(
232
+ description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input."
233
+ )
234
+ input_source: DataSource = Field(
235
+ description="The source of the input: human or synthetic."
236
+ )
237
+
238
+ output: TaskOutput = Field(description="The output of the task run.")
239
+ repair_instructions: str | None = Field(
240
+ default=None,
241
+ description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.",
242
+ )
243
+ repaired_output: TaskOutput | None = Field(
244
+ default=None,
245
+ description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.",
246
+ )
247
+
248
+ def parent_task(self) -> Task | None:
249
+ if not isinstance(self.parent, Task):
250
+ return None
251
+ return self.parent
252
+
253
+ @model_validator(mode="after")
254
+ def validate_input_format(self) -> Self:
255
+ task = self.parent_task()
256
+ if task is None:
257
+ # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
258
+ return self
259
+
260
+ # validate output
261
+ if task.input_json_schema is not None:
262
+ try:
263
+ validate_schema(json.loads(self.input), task.input_json_schema)
264
+ except json.JSONDecodeError:
265
+ raise ValueError("Input is not a valid JSON object")
266
+ except jsonschema.exceptions.ValidationError as e:
267
+ raise ValueError(f"Input does not match task input schema: {e}")
268
+ return self
269
+
270
+ @model_validator(mode="after")
271
+ def validate_output_format(self) -> Self:
272
+ task = self.parent_task()
273
+ if task is None:
274
+ return self
275
+
276
+ self.output.validate_output_format(task)
277
+ return self
278
+
279
+ @model_validator(mode="after")
280
+ def validate_requirement_ratings(self) -> Self:
281
+ task = self.parent_task()
282
+ if task is None:
283
+ return self
284
+
285
+ if self.output.rating is not None:
286
+ self.output.rating.validate_requirement_rating_keys(task)
287
+ if self.repaired_output is not None and self.repaired_output.rating is not None:
288
+ self.repaired_output.rating.validate_requirement_rating_keys(task)
289
+
290
+ return self
291
+
292
+ @model_validator(mode="after")
293
+ def validate_repaired_output(self) -> Self:
294
+ if self.repaired_output is not None:
295
+ if self.repaired_output.rating is not None:
296
+ raise ValueError(
297
+ "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
298
+ )
299
+ if self.repair_instructions is None and self.repaired_output is not None:
300
+ raise ValueError(
301
+ "Repair instructions are required if providing a repaired output."
302
+ )
303
+ if self.repair_instructions is not None and self.repaired_output is None:
304
+ raise ValueError(
305
+ "A repaired output is required if providing repair instructions."
306
+ )
307
+ return self
308
+
309
+
310
+ class TaskRequirement(BaseModel):
311
+ id: ID_TYPE = ID_FIELD
312
+ name: str = SHORT_NAME_FIELD
313
+ description: str | None = Field(default=None)
314
+ instruction: str = Field(min_length=1)
315
+ priority: Priority = Field(default=Priority.p2)
316
+
317
+
318
+ class TaskDeterminism(str, Enum):
319
+ deterministic = "deterministic" # Expect exact match
320
+ semantic_match = "semantic_match" # Expect same meaning, but flexible on expression of the meaning
321
+ flexible = "flexible" # Flexible on semantic output. Eval should be custom based on parsing requirements.
322
+
323
+
324
+ class Task(
325
+ KilnParentedModel,
326
+ KilnParentModel,
327
+ parent_of={"runs": TaskRun},
328
+ ):
329
+ name: str = NAME_FIELD
330
+ description: str = Field(default="")
331
+ priority: Priority = Field(default=Priority.p2)
332
+ determinism: TaskDeterminism = Field(default=TaskDeterminism.flexible)
333
+ instruction: str = Field(min_length=1)
334
+ requirements: List[TaskRequirement] = Field(default=[])
335
+ # TODO: make this required, or formalize the default message output schema
336
+ output_json_schema: JsonObjectSchema | None = None
337
+ input_json_schema: JsonObjectSchema | None = None
338
+
339
+ def output_schema(self) -> Dict | None:
340
+ if self.output_json_schema is None:
341
+ return None
342
+ return schema_from_json_str(self.output_json_schema)
343
+
344
+ def input_schema(self) -> Dict | None:
345
+ if self.input_json_schema is None:
346
+ return None
347
+ return schema_from_json_str(self.input_json_schema)
348
+
349
+ # Needed for typechecking. TODO P2: fix this in KilnParentModel
350
+ def runs(self) -> list[TaskRun]:
351
+ return super().runs() # type: ignore
352
+
353
+
354
+ class Project(KilnParentModel, parent_of={"tasks": Task}):
355
+ name: str = NAME_FIELD
356
+ description: str | None = Field(
357
+ default=None,
358
+ description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
359
+ )
360
+
361
+ # Needed for typechecking. TODO P2: fix this in KilnParentModel
362
+ def tasks(self) -> list[Task]:
363
+ return super().tasks() # type: ignore