kiln-ai 0.0.4__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/base_adapter.py +168 -0
- kiln_ai/adapters/langchain_adapters.py +113 -0
- kiln_ai/adapters/ml_model_list.py +436 -0
- kiln_ai/adapters/prompt_builders.py +122 -0
- kiln_ai/adapters/repair/repair_task.py +71 -0
- kiln_ai/adapters/repair/test_repair_task.py +248 -0
- kiln_ai/adapters/test_langchain_adapter.py +50 -0
- kiln_ai/adapters/test_ml_model_list.py +99 -0
- kiln_ai/adapters/test_prompt_adaptors.py +167 -0
- kiln_ai/adapters/test_prompt_builders.py +315 -0
- kiln_ai/adapters/test_saving_adapter_results.py +168 -0
- kiln_ai/adapters/test_structured_output.py +218 -0
- kiln_ai/datamodel/__init__.py +362 -2
- kiln_ai/datamodel/basemodel.py +372 -0
- kiln_ai/datamodel/json_schema.py +45 -0
- kiln_ai/datamodel/test_basemodel.py +277 -0
- kiln_ai/datamodel/test_datasource.py +107 -0
- kiln_ai/datamodel/test_example_models.py +644 -0
- kiln_ai/datamodel/test_json_schema.py +124 -0
- kiln_ai/datamodel/test_models.py +190 -0
- kiln_ai/datamodel/test_nested_save.py +205 -0
- kiln_ai/datamodel/test_output_rating.py +88 -0
- kiln_ai/utils/config.py +170 -0
- kiln_ai/utils/formatting.py +5 -0
- kiln_ai/utils/test_config.py +245 -0
- {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.1.dist-info}/METADATA +22 -1
- kiln_ai-0.5.1.dist-info/RECORD +29 -0
- kiln_ai/__init.__.py +0 -3
- kiln_ai/coreadd.py +0 -3
- kiln_ai/datamodel/project.py +0 -15
- kiln_ai-0.0.4.dist-info/RECORD +0 -8
- {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.1.dist-info}/LICENSE.txt +0 -0
- {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from abc import ABCMeta
|
|
4
|
+
from builtins import classmethod
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import (
|
|
8
|
+
TYPE_CHECKING,
|
|
9
|
+
Any,
|
|
10
|
+
Dict,
|
|
11
|
+
List,
|
|
12
|
+
Optional,
|
|
13
|
+
Self,
|
|
14
|
+
Type,
|
|
15
|
+
TypeVar,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from kiln_ai.utils.config import Config
|
|
19
|
+
from kiln_ai.utils.formatting import snake_case
|
|
20
|
+
from pydantic import (
|
|
21
|
+
BaseModel,
|
|
22
|
+
ConfigDict,
|
|
23
|
+
Field,
|
|
24
|
+
ValidationError,
|
|
25
|
+
computed_field,
|
|
26
|
+
model_validator,
|
|
27
|
+
)
|
|
28
|
+
from pydantic_core import ErrorDetails
|
|
29
|
+
|
|
30
|
+
# ID is a 12 digit random integer string.
|
|
31
|
+
# Should be unique per item, at least inside the context of a parent/child relationship.
|
|
32
|
+
# Use integers to make it easier to type for a search function.
|
|
33
|
+
# Allow none, even though we generate it, because we clear it in the REST API if the object is ephemeral (not persisted to disk)
|
|
34
|
+
ID_FIELD = Field(default_factory=lambda: str(uuid.uuid4().int)[:12])
|
|
35
|
+
ID_TYPE = Optional[str]
|
|
36
|
+
T = TypeVar("T", bound="KilnBaseModel")
|
|
37
|
+
PT = TypeVar("PT", bound="KilnParentedModel")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class KilnBaseModel(BaseModel):
|
|
41
|
+
model_config = ConfigDict(validate_assignment=True)
|
|
42
|
+
|
|
43
|
+
v: int = Field(default=1) # schema_version
|
|
44
|
+
id: ID_TYPE = ID_FIELD
|
|
45
|
+
path: Optional[Path] = Field(default=None)
|
|
46
|
+
created_at: datetime = Field(default_factory=datetime.now)
|
|
47
|
+
created_by: str = Field(default_factory=lambda: Config.shared().user_id)
|
|
48
|
+
|
|
49
|
+
@computed_field()
|
|
50
|
+
def model_type(self) -> str:
|
|
51
|
+
return self.type_name()
|
|
52
|
+
|
|
53
|
+
# if changing the model name, should keep the original name here for parsing old files
|
|
54
|
+
@classmethod
|
|
55
|
+
def type_name(cls) -> str:
|
|
56
|
+
return snake_case(cls.__name__)
|
|
57
|
+
|
|
58
|
+
# used as /obj_folder/base_filename.kiln
|
|
59
|
+
@classmethod
|
|
60
|
+
def base_filename(cls) -> str:
|
|
61
|
+
return cls.type_name() + ".kiln"
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def load_from_folder(cls: Type[T], folderPath: Path) -> T:
|
|
65
|
+
path = folderPath / cls.base_filename()
|
|
66
|
+
return cls.load_from_file(path)
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def load_from_file(cls: Type[T], path: Path) -> T:
|
|
70
|
+
with open(path, "r") as file:
|
|
71
|
+
file_data = file.read()
|
|
72
|
+
# TODO P2 perf: parsing the JSON twice here.
|
|
73
|
+
# Once for model_type, once for model. Can't call model_validate with parsed json because enum types break; they get strings instead of enums.
|
|
74
|
+
parsed_json = json.loads(file_data)
|
|
75
|
+
m = cls.model_validate_json(file_data, strict=True)
|
|
76
|
+
if not isinstance(m, cls):
|
|
77
|
+
raise ValueError(f"Loaded model is not of type {cls.__name__}")
|
|
78
|
+
file_data = None
|
|
79
|
+
m.path = path
|
|
80
|
+
if m.v > m.max_schema_version():
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Cannot load from file because the schema version is higher than the current version. Upgrade kiln to the latest version. "
|
|
83
|
+
f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, "
|
|
84
|
+
f"version: {m.v}, max version: {m.max_schema_version()}"
|
|
85
|
+
)
|
|
86
|
+
if parsed_json["model_type"] != cls.type_name():
|
|
87
|
+
raise ValueError(
|
|
88
|
+
f"Cannot load from file because the model type is incorrect. Expected {cls.type_name()}, got {parsed_json['model_type']}. "
|
|
89
|
+
f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, "
|
|
90
|
+
f"version: {m.v}, max version: {m.max_schema_version()}"
|
|
91
|
+
)
|
|
92
|
+
return m
|
|
93
|
+
|
|
94
|
+
def save_to_file(self) -> None:
|
|
95
|
+
path = self.build_path()
|
|
96
|
+
if path is None:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
f"Cannot save to file because 'path' is not set. Class: {self.__class__.__name__}, "
|
|
99
|
+
f"id: {getattr(self, 'id', None)}, path: {path}"
|
|
100
|
+
)
|
|
101
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
102
|
+
json_data = self.model_dump_json(indent=2, exclude={"path"})
|
|
103
|
+
with open(path, "w") as file:
|
|
104
|
+
file.write(json_data)
|
|
105
|
+
# save the path so even if something like name changes, the file doesn't move
|
|
106
|
+
self.path = path
|
|
107
|
+
|
|
108
|
+
def build_path(self) -> Path | None:
|
|
109
|
+
if self.path is not None:
|
|
110
|
+
return self.path
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
# increment for breaking changes
|
|
114
|
+
def max_schema_version(self) -> int:
|
|
115
|
+
return 1
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
|
|
119
|
+
_parent: KilnBaseModel | None = None
|
|
120
|
+
|
|
121
|
+
# workaround to tell typechecker that we support the parent property, even though it's not a stock property
|
|
122
|
+
if TYPE_CHECKING:
|
|
123
|
+
parent: KilnBaseModel # type: ignore
|
|
124
|
+
|
|
125
|
+
def __init__(self, **data):
|
|
126
|
+
super().__init__(**data)
|
|
127
|
+
if "parent" in data:
|
|
128
|
+
self.parent = data["parent"]
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def parent(self) -> Optional[KilnBaseModel]:
|
|
132
|
+
if self._parent is not None:
|
|
133
|
+
return self._parent
|
|
134
|
+
# lazy load parent from path
|
|
135
|
+
if self.path is None:
|
|
136
|
+
return None
|
|
137
|
+
# TODO: this only works with base_filename. If we every support custom names, we need to change this.
|
|
138
|
+
parent_path = (
|
|
139
|
+
self.path.parent.parent.parent
|
|
140
|
+
/ self.__class__.parent_type().base_filename()
|
|
141
|
+
)
|
|
142
|
+
if parent_path is None:
|
|
143
|
+
return None
|
|
144
|
+
self._parent = self.__class__.parent_type().load_from_file(parent_path)
|
|
145
|
+
return self._parent
|
|
146
|
+
|
|
147
|
+
@parent.setter
|
|
148
|
+
def parent(self, value: Optional[KilnBaseModel]):
|
|
149
|
+
if value is not None:
|
|
150
|
+
expected_parent_type = self.__class__.parent_type()
|
|
151
|
+
if not isinstance(value, expected_parent_type):
|
|
152
|
+
raise ValueError(
|
|
153
|
+
f"Parent must be of type {expected_parent_type}, but was {type(value)}"
|
|
154
|
+
)
|
|
155
|
+
self._parent = value
|
|
156
|
+
|
|
157
|
+
# Dynamically implemented by KilnParentModel method injection
|
|
158
|
+
@classmethod
|
|
159
|
+
def relationship_name(cls) -> str:
|
|
160
|
+
raise NotImplementedError("Relationship name must be implemented")
|
|
161
|
+
|
|
162
|
+
# Dynamically implemented by KilnParentModel method injection
|
|
163
|
+
@classmethod
|
|
164
|
+
def parent_type(cls) -> Type[KilnBaseModel]:
|
|
165
|
+
raise NotImplementedError("Parent type must be implemented")
|
|
166
|
+
|
|
167
|
+
@model_validator(mode="after")
|
|
168
|
+
def check_parent_type(self) -> Self:
|
|
169
|
+
if self._parent is not None:
|
|
170
|
+
expected_parent_type = self.__class__.parent_type()
|
|
171
|
+
if not isinstance(self._parent, expected_parent_type):
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"Parent must be of type {expected_parent_type}, but was {type(self._parent)}"
|
|
174
|
+
)
|
|
175
|
+
return self
|
|
176
|
+
|
|
177
|
+
def build_child_dirname(self) -> Path:
|
|
178
|
+
# Default implementation for readable folder names.
|
|
179
|
+
# {id} - {name}/{type}.kiln
|
|
180
|
+
if self.id is None:
|
|
181
|
+
# consider generating an ID here. But if it's been cleared, we've already used this without one so raise for now.
|
|
182
|
+
raise ValueError("ID is not set - can not save or build path")
|
|
183
|
+
path = self.id
|
|
184
|
+
name = getattr(self, "name", None)
|
|
185
|
+
if name is not None:
|
|
186
|
+
path = f"{path} - {name[:32]}"
|
|
187
|
+
return Path(path)
|
|
188
|
+
|
|
189
|
+
def build_path(self) -> Path | None:
|
|
190
|
+
# if specifically loaded from an existing path, keep that no matter what
|
|
191
|
+
# this ensures the file structure is easy to use with git/version control
|
|
192
|
+
# and that changes to things like name (which impacts default path) don't leave dangling files
|
|
193
|
+
if self.path is not None:
|
|
194
|
+
return self.path
|
|
195
|
+
# Build a path under parent_folder/relationship/file.kiln
|
|
196
|
+
if self.parent is None:
|
|
197
|
+
return None
|
|
198
|
+
parent_path = self.parent.build_path()
|
|
199
|
+
if parent_path is None:
|
|
200
|
+
return None
|
|
201
|
+
parent_folder = parent_path.parent
|
|
202
|
+
if parent_folder is None:
|
|
203
|
+
return None
|
|
204
|
+
return (
|
|
205
|
+
parent_folder
|
|
206
|
+
/ self.__class__.relationship_name()
|
|
207
|
+
/ self.build_child_dirname()
|
|
208
|
+
/ self.__class__.base_filename()
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
@classmethod
|
|
212
|
+
def all_children_of_parent_path(
|
|
213
|
+
cls: Type[PT], parent_path: Path | None
|
|
214
|
+
) -> list[PT]:
|
|
215
|
+
if parent_path is None:
|
|
216
|
+
# children are disk based. If not saved, they don't exist
|
|
217
|
+
return []
|
|
218
|
+
|
|
219
|
+
# Determine the parent folder
|
|
220
|
+
if parent_path.is_file():
|
|
221
|
+
parent_folder = parent_path.parent
|
|
222
|
+
else:
|
|
223
|
+
parent_folder = parent_path
|
|
224
|
+
|
|
225
|
+
parent = cls.parent_type().load_from_file(parent_path)
|
|
226
|
+
if parent is None:
|
|
227
|
+
raise ValueError("Parent must be set to load children")
|
|
228
|
+
|
|
229
|
+
# Ignore type error: this is abstract base class, but children must implement relationship_name
|
|
230
|
+
relationship_folder = parent_folder / Path(cls.relationship_name()) # type: ignore
|
|
231
|
+
|
|
232
|
+
if not relationship_folder.exists() or not relationship_folder.is_dir():
|
|
233
|
+
return []
|
|
234
|
+
|
|
235
|
+
# Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder
|
|
236
|
+
children = []
|
|
237
|
+
for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"):
|
|
238
|
+
child = cls.load_from_file(child_file)
|
|
239
|
+
children.append(child)
|
|
240
|
+
|
|
241
|
+
return children
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# Parent create methods for all child relationships
|
|
245
|
+
# You must pass in parent_of in the subclass definition, defining the child relationships
|
|
246
|
+
class KilnParentModel(KilnBaseModel, metaclass=ABCMeta):
|
|
247
|
+
@classmethod
|
|
248
|
+
def _create_child_method(
|
|
249
|
+
cls, relationship_name: str, child_class: Type[KilnParentedModel]
|
|
250
|
+
):
|
|
251
|
+
def child_method(self) -> list[child_class]:
|
|
252
|
+
return child_class.all_children_of_parent_path(self.path)
|
|
253
|
+
|
|
254
|
+
child_method.__name__ = relationship_name
|
|
255
|
+
child_method.__annotations__ = {"return": List[child_class]}
|
|
256
|
+
setattr(cls, relationship_name, child_method)
|
|
257
|
+
|
|
258
|
+
@classmethod
|
|
259
|
+
def _create_parent_methods(
|
|
260
|
+
cls, targetCls: Type[KilnParentedModel], relationship_name: str
|
|
261
|
+
):
|
|
262
|
+
def parent_class_method() -> Type[KilnParentModel]:
|
|
263
|
+
return cls
|
|
264
|
+
|
|
265
|
+
parent_class_method.__name__ = "parent_type"
|
|
266
|
+
parent_class_method.__annotations__ = {"return": Type[KilnParentModel]}
|
|
267
|
+
setattr(targetCls, "parent_type", parent_class_method)
|
|
268
|
+
|
|
269
|
+
def relationship_name_method() -> str:
|
|
270
|
+
return relationship_name
|
|
271
|
+
|
|
272
|
+
relationship_name_method.__name__ = "relationship_name"
|
|
273
|
+
relationship_name_method.__annotations__ = {"return": str}
|
|
274
|
+
setattr(targetCls, "relationship_name", relationship_name_method)
|
|
275
|
+
|
|
276
|
+
@classmethod
|
|
277
|
+
def __init_subclass__(cls, parent_of: Dict[str, Type[KilnParentedModel]], **kwargs):
|
|
278
|
+
super().__init_subclass__(**kwargs)
|
|
279
|
+
cls._parent_of = parent_of
|
|
280
|
+
for relationship_name, child_class in parent_of.items():
|
|
281
|
+
cls._create_child_method(relationship_name, child_class)
|
|
282
|
+
cls._create_parent_methods(child_class, relationship_name)
|
|
283
|
+
|
|
284
|
+
@classmethod
|
|
285
|
+
def validate_and_save_with_subrelations(
|
|
286
|
+
cls,
|
|
287
|
+
data: Dict[str, Any],
|
|
288
|
+
path: Path | None = None,
|
|
289
|
+
parent: KilnBaseModel | None = None,
|
|
290
|
+
):
|
|
291
|
+
# Validate first, then save. Don't want error half way through, and partly persisted
|
|
292
|
+
# TODO P2: save to tmp dir, then move atomically. But need to merge directories so later.
|
|
293
|
+
cls._validate_nested(data, save=False, path=path, parent=parent)
|
|
294
|
+
instance = cls._validate_nested(data, save=True, path=path, parent=parent)
|
|
295
|
+
return instance
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def _validate_nested(
|
|
299
|
+
cls,
|
|
300
|
+
data: Dict[str, Any],
|
|
301
|
+
save: bool = False,
|
|
302
|
+
parent: KilnBaseModel | None = None,
|
|
303
|
+
path: Path | None = None,
|
|
304
|
+
):
|
|
305
|
+
# Collect all validation errors so we can report them all at once
|
|
306
|
+
validation_errors = []
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
instance = cls.model_validate(data, strict=True)
|
|
310
|
+
if path is not None:
|
|
311
|
+
instance.path = path
|
|
312
|
+
if parent is not None and isinstance(instance, KilnParentedModel):
|
|
313
|
+
instance.parent = parent
|
|
314
|
+
if save:
|
|
315
|
+
instance.save_to_file()
|
|
316
|
+
except ValidationError as e:
|
|
317
|
+
instance = None
|
|
318
|
+
for suberror in e.errors():
|
|
319
|
+
validation_errors.append(suberror)
|
|
320
|
+
|
|
321
|
+
for key, value_list in data.items():
|
|
322
|
+
if key in cls._parent_of:
|
|
323
|
+
parent_type = cls._parent_of[key]
|
|
324
|
+
if not isinstance(value_list, list):
|
|
325
|
+
raise ValueError(
|
|
326
|
+
f"Expected a list for {key}, but got {type(value_list)}"
|
|
327
|
+
)
|
|
328
|
+
for value_index, value in enumerate(value_list):
|
|
329
|
+
try:
|
|
330
|
+
if issubclass(parent_type, KilnParentModel):
|
|
331
|
+
kwargs = {"data": value, "save": save}
|
|
332
|
+
if instance is not None:
|
|
333
|
+
kwargs["parent"] = instance
|
|
334
|
+
parent_type._validate_nested(**kwargs)
|
|
335
|
+
elif issubclass(parent_type, KilnParentedModel):
|
|
336
|
+
# Root node
|
|
337
|
+
subinstance = parent_type.model_validate(value, strict=True)
|
|
338
|
+
if instance is not None:
|
|
339
|
+
subinstance.parent = instance
|
|
340
|
+
if save:
|
|
341
|
+
subinstance.save_to_file()
|
|
342
|
+
else:
|
|
343
|
+
raise ValueError(
|
|
344
|
+
f"Invalid type {parent_type}. Should be KilnBaseModel based."
|
|
345
|
+
)
|
|
346
|
+
except ValidationError as e:
|
|
347
|
+
for suberror in e.errors():
|
|
348
|
+
cls._append_loc(suberror, key, value_index)
|
|
349
|
+
validation_errors.append(suberror)
|
|
350
|
+
|
|
351
|
+
if len(validation_errors) > 0:
|
|
352
|
+
raise ValidationError.from_exception_data(
|
|
353
|
+
title=f"Validation failed for {cls.__name__}",
|
|
354
|
+
line_errors=validation_errors,
|
|
355
|
+
input_type="json",
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
return instance
|
|
359
|
+
|
|
360
|
+
@classmethod
|
|
361
|
+
def _append_loc(
|
|
362
|
+
cls, error: ErrorDetails, current_loc: str, value_index: int | None = None
|
|
363
|
+
):
|
|
364
|
+
orig_loc = error["loc"] if "loc" in error else None
|
|
365
|
+
new_loc: list[str | int] = [current_loc]
|
|
366
|
+
if value_index is not None:
|
|
367
|
+
new_loc.append(value_index)
|
|
368
|
+
if isinstance(orig_loc, tuple):
|
|
369
|
+
new_loc.extend(list(orig_loc))
|
|
370
|
+
elif isinstance(orig_loc, list):
|
|
371
|
+
new_loc.extend(orig_loc)
|
|
372
|
+
error["loc"] = tuple(new_loc)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Annotated, Dict
|
|
3
|
+
|
|
4
|
+
import jsonschema
|
|
5
|
+
import jsonschema.exceptions
|
|
6
|
+
import jsonschema.validators
|
|
7
|
+
from pydantic import AfterValidator
|
|
8
|
+
|
|
9
|
+
JsonObjectSchema = Annotated[
|
|
10
|
+
str,
|
|
11
|
+
AfterValidator(lambda v: _check_json_schema(v)),
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _check_json_schema(v: str) -> str:
|
|
16
|
+
# parsing returns needed errors
|
|
17
|
+
schema_from_json_str(v)
|
|
18
|
+
return v
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def validate_schema(instance: Dict, schema_str: str) -> None:
|
|
22
|
+
schema = schema_from_json_str(schema_str)
|
|
23
|
+
v = jsonschema.Draft202012Validator(schema)
|
|
24
|
+
return v.validate(instance)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def schema_from_json_str(v: str) -> Dict:
|
|
28
|
+
try:
|
|
29
|
+
parsed = json.loads(v)
|
|
30
|
+
jsonschema.Draft202012Validator.check_schema(parsed)
|
|
31
|
+
if not isinstance(parsed, dict):
|
|
32
|
+
raise ValueError(f"JSON schema must be a dict, not {type(parsed)}")
|
|
33
|
+
if (
|
|
34
|
+
"type" not in parsed
|
|
35
|
+
or parsed["type"] != "object"
|
|
36
|
+
or "properties" not in parsed
|
|
37
|
+
):
|
|
38
|
+
raise ValueError(f"JSON schema must be an object with properties: {v}")
|
|
39
|
+
return parsed
|
|
40
|
+
except jsonschema.exceptions.SchemaError as e:
|
|
41
|
+
raise ValueError(f"Invalid JSON schema: {v} \n{e}")
|
|
42
|
+
except json.JSONDecodeError as e:
|
|
43
|
+
raise ValueError(f"Invalid JSON: {v}\n {e}")
|
|
44
|
+
except Exception as e:
|
|
45
|
+
raise ValueError(f"Unexpected error parsing JSON schema: {v}\n {e}")
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
from kiln_ai.datamodel.basemodel import KilnBaseModel, KilnParentedModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def test_base_file(tmp_path) -> Path:
|
|
12
|
+
test_file_path = tmp_path / "test_model.json"
|
|
13
|
+
data = {"v": 1, "model_type": "kiln_base_model"}
|
|
14
|
+
|
|
15
|
+
with open(test_file_path, "w") as file:
|
|
16
|
+
json.dump(data, file, indent=4)
|
|
17
|
+
|
|
18
|
+
return test_file_path
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.fixture
|
|
22
|
+
def test_base_parented_file(tmp_path) -> Path:
|
|
23
|
+
test_file_path = tmp_path / "test_model.json"
|
|
24
|
+
data = {"v": 1, "model_type": "base_parent_example"}
|
|
25
|
+
|
|
26
|
+
with open(test_file_path, "w") as file:
|
|
27
|
+
json.dump(data, file, indent=4)
|
|
28
|
+
|
|
29
|
+
return test_file_path
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.fixture
|
|
33
|
+
def test_newer_file(tmp_path) -> Path:
|
|
34
|
+
test_file_path = tmp_path / "test_model_sv.json"
|
|
35
|
+
data = {"v": 99}
|
|
36
|
+
|
|
37
|
+
with open(test_file_path, "w") as file:
|
|
38
|
+
json.dump(data, file, indent=4)
|
|
39
|
+
|
|
40
|
+
return test_file_path
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_load_from_file(test_base_file):
|
|
44
|
+
model = KilnBaseModel.load_from_file(test_base_file)
|
|
45
|
+
assert model.v == 1
|
|
46
|
+
assert model.path == test_base_file
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_save_to_file(test_base_file):
|
|
50
|
+
model = KilnBaseModel(path=test_base_file)
|
|
51
|
+
model.save_to_file()
|
|
52
|
+
|
|
53
|
+
with open(test_base_file, "r") as file:
|
|
54
|
+
data = json.load(file)
|
|
55
|
+
|
|
56
|
+
assert data["v"] == 1
|
|
57
|
+
assert data["model_type"] == "kiln_base_model"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_save_to_file_without_path():
|
|
61
|
+
model = KilnBaseModel()
|
|
62
|
+
with pytest.raises(ValueError):
|
|
63
|
+
model.save_to_file()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_max_schema_version(test_newer_file):
|
|
67
|
+
with pytest.raises(ValueError):
|
|
68
|
+
KilnBaseModel.load_from_file(test_newer_file)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_type_name():
|
|
72
|
+
model = KilnBaseModel()
|
|
73
|
+
assert model.model_type == "kiln_base_model"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_created_atby():
|
|
77
|
+
model = KilnBaseModel()
|
|
78
|
+
assert model.created_at is not None
|
|
79
|
+
# Check it's within 2 seconds of now
|
|
80
|
+
now = datetime.datetime.now()
|
|
81
|
+
assert abs((model.created_at - now).total_seconds()) < 2
|
|
82
|
+
|
|
83
|
+
# Created by
|
|
84
|
+
assert len(model.created_by) > 0
|
|
85
|
+
# assert model.created_by == "scosman"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# Instance of the parented model for abstract methods
|
|
89
|
+
class NamedParentedModel(KilnParentedModel):
|
|
90
|
+
@classmethod
|
|
91
|
+
def relationship_name(cls) -> str:
|
|
92
|
+
return "tests"
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def parent_type(cls):
|
|
96
|
+
return KilnBaseModel
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def test_parented_model_path_gen(tmp_path):
|
|
100
|
+
parent = KilnBaseModel(path=tmp_path)
|
|
101
|
+
assert parent.id is not None
|
|
102
|
+
child = NamedParentedModel(parent=parent)
|
|
103
|
+
child_path = child.build_path()
|
|
104
|
+
assert child_path.name == "named_parented_model.kiln"
|
|
105
|
+
assert child_path.parent.name == child.id
|
|
106
|
+
assert child_path.parent.parent.name == "tests"
|
|
107
|
+
assert child_path.parent.parent.parent == tmp_path.parent
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class BaseParentExample(KilnBaseModel):
|
|
111
|
+
name: Optional[str] = None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# Instance of the parented model for abstract methods, with default name builder
|
|
115
|
+
class DefaultParentedModel(KilnParentedModel):
|
|
116
|
+
name: Optional[str] = None
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def relationship_name(self):
|
|
120
|
+
return "children"
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def parent_type(cls):
|
|
124
|
+
return BaseParentExample
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_build_default_child_filename(tmp_path):
|
|
128
|
+
parent = BaseParentExample(path=tmp_path)
|
|
129
|
+
child = DefaultParentedModel(parent=parent)
|
|
130
|
+
child_path = child.build_path()
|
|
131
|
+
assert child_path.name == "default_parented_model.kiln"
|
|
132
|
+
assert child_path.parent.name == child.id
|
|
133
|
+
assert child_path.parent.parent.name == "children"
|
|
134
|
+
assert child_path.parent.parent.parent == tmp_path.parent
|
|
135
|
+
# now with name
|
|
136
|
+
child = DefaultParentedModel(parent=parent, name="Name")
|
|
137
|
+
child_path = child.build_path()
|
|
138
|
+
assert child_path.name == "default_parented_model.kiln"
|
|
139
|
+
assert child_path.parent.name == child.id + " - Name"
|
|
140
|
+
assert child_path.parent.parent.name == "children"
|
|
141
|
+
assert child_path.parent.parent.parent == tmp_path.parent
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_serialize_child(tmp_path):
|
|
145
|
+
parent = BaseParentExample(path=tmp_path)
|
|
146
|
+
child = DefaultParentedModel(parent=parent, name="Name")
|
|
147
|
+
|
|
148
|
+
expected_path = child.build_path()
|
|
149
|
+
assert child.path is None
|
|
150
|
+
child.save_to_file()
|
|
151
|
+
|
|
152
|
+
# ensure we save exact path
|
|
153
|
+
assert child.path is not None
|
|
154
|
+
assert child.path == expected_path
|
|
155
|
+
|
|
156
|
+
# should have made the directory, and saved the file
|
|
157
|
+
with open(child.path, "r") as file:
|
|
158
|
+
data = json.load(file)
|
|
159
|
+
|
|
160
|
+
assert data["v"] == 1
|
|
161
|
+
assert data["name"] == "Name"
|
|
162
|
+
assert data["model_type"] == "default_parented_model"
|
|
163
|
+
assert len(data["id"]) == 12
|
|
164
|
+
assert child.path.parent.name == child.id + " - Name"
|
|
165
|
+
assert child.path.parent.parent.name == "children"
|
|
166
|
+
assert child.path.parent.parent.parent == tmp_path.parent
|
|
167
|
+
|
|
168
|
+
# change name, see it serializes, but path stays the same
|
|
169
|
+
child.name = "Name2"
|
|
170
|
+
child.save_to_file()
|
|
171
|
+
assert child.path == expected_path
|
|
172
|
+
with open(child.path, "r") as file:
|
|
173
|
+
data = json.load(file)
|
|
174
|
+
assert data["name"] == "Name2"
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def test_save_to_set_location(tmp_path):
|
|
178
|
+
# Keeps the OG path if parent and path are both set
|
|
179
|
+
parent = BaseParentExample(path=tmp_path)
|
|
180
|
+
child_path = tmp_path.parent / "child.kiln"
|
|
181
|
+
child = DefaultParentedModel(path=child_path, parent=parent, name="Name")
|
|
182
|
+
assert child.build_path() == child_path
|
|
183
|
+
|
|
184
|
+
# check file created at child_path, not the default smart path
|
|
185
|
+
assert not child_path.exists()
|
|
186
|
+
child.save_to_file()
|
|
187
|
+
assert child_path.exists()
|
|
188
|
+
|
|
189
|
+
# if we don't set the path, use the parent + smartpath
|
|
190
|
+
child2 = DefaultParentedModel(parent=parent, name="Name2")
|
|
191
|
+
assert child2.build_path().parent.name == child2.id + " - Name2"
|
|
192
|
+
assert child2.build_path().parent.parent.name == "children"
|
|
193
|
+
assert child2.build_path().parent.parent.parent == tmp_path.parent
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def test_parent_without_path():
|
|
197
|
+
# no path from parent or direct path
|
|
198
|
+
parent = BaseParentExample()
|
|
199
|
+
child = DefaultParentedModel(parent=parent, name="Name")
|
|
200
|
+
with pytest.raises(ValueError):
|
|
201
|
+
child.save_to_file()
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def test_parent_wrong_type():
|
|
205
|
+
# DefaultParentedModel is parented to BaseParentExample, not KilnBaseModel
|
|
206
|
+
parent = KilnBaseModel()
|
|
207
|
+
with pytest.raises(ValueError):
|
|
208
|
+
DefaultParentedModel(parent=parent, name="Name")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def test_load_children(test_base_parented_file):
|
|
212
|
+
# Set up parent and children models
|
|
213
|
+
parent = BaseParentExample.load_from_file(test_base_parented_file)
|
|
214
|
+
|
|
215
|
+
child1 = DefaultParentedModel(parent=parent, name="Child1")
|
|
216
|
+
child2 = DefaultParentedModel(parent=parent, name="Child2")
|
|
217
|
+
child3 = DefaultParentedModel(parent=parent, name="Child3")
|
|
218
|
+
|
|
219
|
+
# Ensure the children are saved correctly
|
|
220
|
+
child1.save_to_file()
|
|
221
|
+
child2.save_to_file()
|
|
222
|
+
child3.save_to_file()
|
|
223
|
+
|
|
224
|
+
# Load children from parent path
|
|
225
|
+
children = DefaultParentedModel.all_children_of_parent_path(test_base_parented_file)
|
|
226
|
+
|
|
227
|
+
# Verify that all children are loaded correctly
|
|
228
|
+
assert len(children) == 3
|
|
229
|
+
names = [child.name for child in children]
|
|
230
|
+
assert "Child1" in names
|
|
231
|
+
assert "Child2" in names
|
|
232
|
+
assert "Child3" in names
|
|
233
|
+
assert all(child.model_type == "default_parented_model" for child in children)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_base_filename():
|
|
237
|
+
model = DefaultParentedModel(name="Test")
|
|
238
|
+
assert model.base_filename() == "default_parented_model.kiln"
|
|
239
|
+
model = NamedParentedModel(name="Test")
|
|
240
|
+
assert model.base_filename() == "named_parented_model.kiln"
|
|
241
|
+
assert NamedParentedModel.base_filename() == "named_parented_model.kiln"
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def test_load_from_folder(test_base_parented_file):
|
|
245
|
+
parent = BaseParentExample.load_from_file(test_base_parented_file)
|
|
246
|
+
child1 = DefaultParentedModel(parent=parent, name="Child1")
|
|
247
|
+
child1.save_to_file()
|
|
248
|
+
|
|
249
|
+
loaded_child1 = DefaultParentedModel.load_from_folder(child1.path.parent)
|
|
250
|
+
assert loaded_child1.name == "Child1"
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def test_lazy_load_parent(tmp_path):
|
|
254
|
+
# Create a parent
|
|
255
|
+
parent = BaseParentExample(
|
|
256
|
+
name="Parent", path=(tmp_path / BaseParentExample.base_filename())
|
|
257
|
+
)
|
|
258
|
+
parent.save_to_file()
|
|
259
|
+
|
|
260
|
+
# Create a child
|
|
261
|
+
child = DefaultParentedModel(parent=parent, name="Child")
|
|
262
|
+
child.save_to_file()
|
|
263
|
+
|
|
264
|
+
# Load the child by path
|
|
265
|
+
loaded_child = DefaultParentedModel.load_from_file(child.path)
|
|
266
|
+
|
|
267
|
+
# Access the parent to trigger lazy loading
|
|
268
|
+
loaded_parent = loaded_child.parent
|
|
269
|
+
|
|
270
|
+
# Verify that the parent is now loaded and correct
|
|
271
|
+
assert loaded_parent is not None
|
|
272
|
+
assert loaded_parent.name == "Parent"
|
|
273
|
+
assert loaded_parent.path == parent.path
|
|
274
|
+
|
|
275
|
+
# Verify that the _parent attribute is now set
|
|
276
|
+
assert hasattr(loaded_child, "_parent")
|
|
277
|
+
assert loaded_child._parent is loaded_parent
|