kiln-ai 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/adapter_registry.py +2 -0
- kiln_ai/adapters/base_adapter.py +6 -1
- kiln_ai/adapters/langchain_adapters.py +5 -1
- kiln_ai/adapters/ml_model_list.py +43 -12
- kiln_ai/adapters/ollama_tools.py +4 -3
- kiln_ai/adapters/provider_tools.py +63 -2
- kiln_ai/adapters/repair/repair_task.py +4 -2
- kiln_ai/adapters/test_langchain_adapter.py +183 -0
- kiln_ai/adapters/test_provider_tools.py +315 -1
- kiln_ai/datamodel/__init__.py +162 -19
- kiln_ai/datamodel/basemodel.py +90 -42
- kiln_ai/datamodel/model_cache.py +116 -0
- kiln_ai/datamodel/test_basemodel.py +138 -3
- kiln_ai/datamodel/test_dataset_split.py +1 -1
- kiln_ai/datamodel/test_model_cache.py +244 -0
- kiln_ai/datamodel/test_models.py +173 -0
- kiln_ai/datamodel/test_output_rating.py +377 -10
- kiln_ai/utils/config.py +33 -10
- kiln_ai/utils/test_config.py +48 -0
- kiln_ai-0.8.0.dist-info/METADATA +237 -0
- {kiln_ai-0.7.0.dist-info → kiln_ai-0.8.0.dist-info}/RECORD +23 -21
- {kiln_ai-0.7.0.dist-info → kiln_ai-0.8.0.dist-info}/WHEEL +1 -1
- kiln_ai-0.7.0.dist-info/METADATA +0 -90
- {kiln_ai-0.7.0.dist-info → kiln_ai-0.8.0.dist-info}/licenses/LICENSE.txt +0 -0
kiln_ai/datamodel/basemodel.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import os
|
|
2
3
|
import re
|
|
3
4
|
import shutil
|
|
4
5
|
import uuid
|
|
@@ -7,7 +8,6 @@ from builtins import classmethod
|
|
|
7
8
|
from datetime import datetime
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from typing import (
|
|
10
|
-
TYPE_CHECKING,
|
|
11
11
|
Any,
|
|
12
12
|
Dict,
|
|
13
13
|
List,
|
|
@@ -21,12 +21,14 @@ from pydantic import (
|
|
|
21
21
|
ConfigDict,
|
|
22
22
|
Field,
|
|
23
23
|
ValidationError,
|
|
24
|
+
ValidationInfo,
|
|
24
25
|
computed_field,
|
|
25
26
|
model_validator,
|
|
26
27
|
)
|
|
27
28
|
from pydantic_core import ErrorDetails
|
|
28
29
|
from typing_extensions import Self
|
|
29
30
|
|
|
31
|
+
from kiln_ai.datamodel.model_cache import ModelCache
|
|
30
32
|
from kiln_ai.utils.config import Config
|
|
31
33
|
from kiln_ai.utils.formatting import snake_case
|
|
32
34
|
|
|
@@ -39,6 +41,7 @@ ID_TYPE = Optional[str]
|
|
|
39
41
|
T = TypeVar("T", bound="KilnBaseModel")
|
|
40
42
|
PT = TypeVar("PT", bound="KilnParentedModel")
|
|
41
43
|
|
|
44
|
+
|
|
42
45
|
# Naming conventions:
|
|
43
46
|
# 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation.
|
|
44
47
|
# 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead.
|
|
@@ -87,6 +90,8 @@ class KilnBaseModel(BaseModel):
|
|
|
87
90
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
88
91
|
created_by: str = Field(default_factory=lambda: Config.shared().user_id)
|
|
89
92
|
|
|
93
|
+
_loaded_from_file: bool = False
|
|
94
|
+
|
|
90
95
|
@computed_field()
|
|
91
96
|
def model_type(self) -> str:
|
|
92
97
|
return self.type_name()
|
|
@@ -115,7 +120,7 @@ class KilnBaseModel(BaseModel):
|
|
|
115
120
|
return cls.load_from_file(path)
|
|
116
121
|
|
|
117
122
|
@classmethod
|
|
118
|
-
def load_from_file(cls: Type[T], path: Path) -> T:
|
|
123
|
+
def load_from_file(cls: Type[T], path: Path | str) -> T:
|
|
119
124
|
"""Load a model instance from a specific file path.
|
|
120
125
|
|
|
121
126
|
Args:
|
|
@@ -128,14 +133,20 @@ class KilnBaseModel(BaseModel):
|
|
|
128
133
|
ValueError: If the loaded model is not of the expected type or version
|
|
129
134
|
FileNotFoundError: If the file does not exist
|
|
130
135
|
"""
|
|
136
|
+
if isinstance(path, str):
|
|
137
|
+
path = Path(path)
|
|
138
|
+
cached_model = ModelCache.shared().get_model(path, cls)
|
|
139
|
+
if cached_model is not None:
|
|
140
|
+
return cached_model
|
|
131
141
|
with open(path, "r") as file:
|
|
142
|
+
# modified time of file for cache invalidation. From file descriptor so it's atomic w read.
|
|
143
|
+
mtime_ns = os.fstat(file.fileno()).st_mtime_ns
|
|
132
144
|
file_data = file.read()
|
|
133
|
-
# TODO P2 perf: parsing the JSON twice here.
|
|
134
|
-
# Once for model_type, once for model. Can't call model_validate with parsed json because enum types break; they get strings instead of enums.
|
|
135
145
|
parsed_json = json.loads(file_data)
|
|
136
|
-
m = cls.
|
|
146
|
+
m = cls.model_validate(parsed_json, context={"loading_from_file": True})
|
|
137
147
|
if not isinstance(m, cls):
|
|
138
148
|
raise ValueError(f"Loaded model is not of type {cls.__name__}")
|
|
149
|
+
m._loaded_from_file = True
|
|
139
150
|
file_data = None
|
|
140
151
|
m.path = path
|
|
141
152
|
if m.v > m.max_schema_version():
|
|
@@ -150,8 +161,21 @@ class KilnBaseModel(BaseModel):
|
|
|
150
161
|
f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, "
|
|
151
162
|
f"version: {m.v}, max version: {m.max_schema_version()}"
|
|
152
163
|
)
|
|
164
|
+
ModelCache.shared().set_model(path, m, mtime_ns)
|
|
153
165
|
return m
|
|
154
166
|
|
|
167
|
+
def loaded_from_file(self, info: ValidationInfo | None = None) -> bool:
|
|
168
|
+
# Two methods of indicated it's loaded from file:
|
|
169
|
+
# 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file
|
|
170
|
+
# 2) self._loaded_from_file -> After loading, set by the loader
|
|
171
|
+
if (
|
|
172
|
+
info is not None
|
|
173
|
+
and info.context is not None
|
|
174
|
+
and info.context.get("loading_from_file", False)
|
|
175
|
+
):
|
|
176
|
+
return True
|
|
177
|
+
return self._loaded_from_file
|
|
178
|
+
|
|
155
179
|
def save_to_file(self) -> None:
|
|
156
180
|
"""Save the model instance to a file.
|
|
157
181
|
|
|
@@ -170,6 +194,9 @@ class KilnBaseModel(BaseModel):
|
|
|
170
194
|
file.write(json_data)
|
|
171
195
|
# save the path so even if something like name changes, the file doesn't move
|
|
172
196
|
self.path = path
|
|
197
|
+
# We could save, but invalidating will trigger load on next use.
|
|
198
|
+
# This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk
|
|
199
|
+
ModelCache.shared().invalidate(path)
|
|
173
200
|
|
|
174
201
|
def delete(self) -> None:
|
|
175
202
|
if self.path is None:
|
|
@@ -178,6 +205,7 @@ class KilnBaseModel(BaseModel):
|
|
|
178
205
|
if dir_path is None:
|
|
179
206
|
raise ValueError("Cannot delete model because path is not set")
|
|
180
207
|
shutil.rmtree(dir_path)
|
|
208
|
+
ModelCache.shared().invalidate(self.path)
|
|
181
209
|
self.path = None
|
|
182
210
|
|
|
183
211
|
def build_path(self) -> Path | None:
|
|
@@ -197,51 +225,44 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
|
|
|
197
225
|
including parent reference handling and file system organization.
|
|
198
226
|
|
|
199
227
|
Attributes:
|
|
200
|
-
|
|
228
|
+
parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
|
|
201
229
|
"""
|
|
202
230
|
|
|
203
|
-
|
|
231
|
+
# Parent is an in memory only reference to parent. If it's set we use that. If not we'll try to load it from disk based on the path.
|
|
232
|
+
# We don't persist the parent reference to disk. See the accessors below for how we make it a clean api (parent accessor will lazy load from disk)
|
|
233
|
+
parent: Optional[KilnBaseModel] = Field(default=None, exclude=True)
|
|
204
234
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
235
|
+
def __getattribute__(self, name: str) -> Any:
|
|
236
|
+
if name == "parent":
|
|
237
|
+
return self.load_parent()
|
|
238
|
+
return super().__getattribute__(name)
|
|
208
239
|
|
|
209
|
-
def
|
|
210
|
-
|
|
211
|
-
if "parent" in data:
|
|
212
|
-
self.parent = data["parent"]
|
|
240
|
+
def cached_parent(self) -> Optional[KilnBaseModel]:
|
|
241
|
+
return object.__getattribute__(self, "parent")
|
|
213
242
|
|
|
214
|
-
|
|
215
|
-
def parent(self) -> Optional[KilnBaseModel]:
|
|
243
|
+
def load_parent(self) -> Optional[KilnBaseModel]:
|
|
216
244
|
"""Get the parent model instance, loading it from disk if necessary.
|
|
217
245
|
|
|
218
246
|
Returns:
|
|
219
247
|
Optional[KilnBaseModel]: The parent model instance or None if not set
|
|
220
248
|
"""
|
|
221
|
-
|
|
222
|
-
|
|
249
|
+
cached_parent = self.cached_parent()
|
|
250
|
+
if cached_parent is not None:
|
|
251
|
+
return cached_parent
|
|
252
|
+
|
|
223
253
|
# lazy load parent from path
|
|
224
254
|
if self.path is None:
|
|
225
255
|
return None
|
|
226
|
-
#
|
|
256
|
+
# Note: this only works with base_filename. If we every support custom names, we need to change this.
|
|
227
257
|
parent_path = (
|
|
228
258
|
self.path.parent.parent.parent
|
|
229
259
|
/ self.__class__.parent_type().base_filename()
|
|
230
260
|
)
|
|
231
261
|
if parent_path is None:
|
|
232
262
|
return None
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
@parent.setter
|
|
237
|
-
def parent(self, value: Optional[KilnBaseModel]):
|
|
238
|
-
if value is not None:
|
|
239
|
-
expected_parent_type = self.__class__.parent_type()
|
|
240
|
-
if not isinstance(value, expected_parent_type):
|
|
241
|
-
raise ValueError(
|
|
242
|
-
f"Parent must be of type {expected_parent_type}, but was {type(value)}"
|
|
243
|
-
)
|
|
244
|
-
self._parent = value
|
|
263
|
+
loaded_parent = self.__class__.parent_type().load_from_file(parent_path)
|
|
264
|
+
self.parent = loaded_parent
|
|
265
|
+
return loaded_parent
|
|
245
266
|
|
|
246
267
|
# Dynamically implemented by KilnParentModel method injection
|
|
247
268
|
@classmethod
|
|
@@ -255,11 +276,12 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
|
|
|
255
276
|
|
|
256
277
|
@model_validator(mode="after")
|
|
257
278
|
def check_parent_type(self) -> Self:
|
|
258
|
-
|
|
279
|
+
cached_parent = self.cached_parent()
|
|
280
|
+
if cached_parent is not None:
|
|
259
281
|
expected_parent_type = self.__class__.parent_type()
|
|
260
|
-
if not isinstance(
|
|
282
|
+
if not isinstance(cached_parent, expected_parent_type):
|
|
261
283
|
raise ValueError(
|
|
262
|
-
f"Parent must be of type {expected_parent_type}, but was {type(
|
|
284
|
+
f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}"
|
|
263
285
|
)
|
|
264
286
|
return self
|
|
265
287
|
|
|
@@ -298,9 +320,7 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
|
|
|
298
320
|
)
|
|
299
321
|
|
|
300
322
|
@classmethod
|
|
301
|
-
def
|
|
302
|
-
cls: Type[PT], parent_path: Path | None
|
|
303
|
-
) -> list[PT]:
|
|
323
|
+
def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None):
|
|
304
324
|
if parent_path is None:
|
|
305
325
|
# children are disk based. If not saved, they don't exist
|
|
306
326
|
return []
|
|
@@ -322,13 +342,41 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
|
|
|
322
342
|
return []
|
|
323
343
|
|
|
324
344
|
# Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder
|
|
325
|
-
children = []
|
|
326
345
|
for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"):
|
|
327
|
-
|
|
328
|
-
children.append(child)
|
|
346
|
+
yield child_file
|
|
329
347
|
|
|
348
|
+
@classmethod
|
|
349
|
+
def all_children_of_parent_path(
|
|
350
|
+
cls: Type[PT], parent_path: Path | None
|
|
351
|
+
) -> list[PT]:
|
|
352
|
+
children = []
|
|
353
|
+
for child_path in cls.iterate_children_paths_of_parent_path(parent_path):
|
|
354
|
+
children.append(cls.load_from_file(child_path))
|
|
330
355
|
return children
|
|
331
356
|
|
|
357
|
+
@classmethod
|
|
358
|
+
def from_id_and_parent_path(
|
|
359
|
+
cls: Type[PT], id: str, parent_path: Path | None
|
|
360
|
+
) -> PT | None:
|
|
361
|
+
"""
|
|
362
|
+
Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match.
|
|
363
|
+
|
|
364
|
+
Uses cache so still slow on first load.
|
|
365
|
+
"""
|
|
366
|
+
if parent_path is None:
|
|
367
|
+
return None
|
|
368
|
+
|
|
369
|
+
# Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth.
|
|
370
|
+
for child_path in cls.iterate_children_paths_of_parent_path(parent_path):
|
|
371
|
+
child_id = ModelCache.shared().get_model_id(child_path, cls)
|
|
372
|
+
if child_id == id:
|
|
373
|
+
return cls.load_from_file(child_path)
|
|
374
|
+
if child_id is None:
|
|
375
|
+
child = cls.load_from_file(child_path)
|
|
376
|
+
if child.id == id:
|
|
377
|
+
return child
|
|
378
|
+
return None
|
|
379
|
+
|
|
332
380
|
|
|
333
381
|
# Parent create methods for all child relationships
|
|
334
382
|
# You must pass in parent_of in the subclass definition, defining the child relationships
|
|
@@ -417,7 +465,7 @@ class KilnParentModel(KilnBaseModel, metaclass=ABCMeta):
|
|
|
417
465
|
validation_errors = []
|
|
418
466
|
|
|
419
467
|
try:
|
|
420
|
-
instance = cls.model_validate(data
|
|
468
|
+
instance = cls.model_validate(data)
|
|
421
469
|
if path is not None:
|
|
422
470
|
instance.path = path
|
|
423
471
|
if parent is not None and isinstance(instance, KilnParentedModel):
|
|
@@ -445,7 +493,7 @@ class KilnParentModel(KilnBaseModel, metaclass=ABCMeta):
|
|
|
445
493
|
parent_type._validate_nested(**kwargs)
|
|
446
494
|
elif issubclass(parent_type, KilnParentedModel):
|
|
447
495
|
# Root node
|
|
448
|
-
subinstance = parent_type.model_validate(value
|
|
496
|
+
subinstance = parent_type.model_validate(value)
|
|
449
497
|
if instance is not None:
|
|
450
498
|
subinstance.parent = instance
|
|
451
499
|
if save:
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A simple cache for our datamodel.
|
|
3
|
+
|
|
4
|
+
Works at the file level, caching the pydantic model based on the file path.
|
|
5
|
+
|
|
6
|
+
Keeping this really simple. Our goal is to really be "disk-backed" data model, so using disk primitives.
|
|
7
|
+
|
|
8
|
+
- Use disk mtime to determine if the cached model is stale.
|
|
9
|
+
- Still using glob for iterating over projects, just caching at the file level
|
|
10
|
+
- Use path as the cache key
|
|
11
|
+
- Cache always populated from a disk read, so we know it refects what's on disk. Even if we had a memory-constructed version, we don't cache that.
|
|
12
|
+
- Cache the parsed model, not the raw file contents. Parsing and validating is what's expensive. >99% speedup when measured.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
17
|
+
import warnings
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Dict, Optional, Tuple, Type, TypeVar
|
|
20
|
+
|
|
21
|
+
from pydantic import BaseModel
|
|
22
|
+
|
|
23
|
+
T = TypeVar("T", bound=BaseModel)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ModelCache:
|
|
27
|
+
_shared_instance = None
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
# Store both the model and the modified time of the cached file contents
|
|
31
|
+
self.model_cache: Dict[Path, Tuple[BaseModel, int]] = {}
|
|
32
|
+
self._enabled = self._check_timestamp_granularity()
|
|
33
|
+
if not self._enabled:
|
|
34
|
+
warnings.warn(
|
|
35
|
+
"File system does not support fine-grained timestamps. "
|
|
36
|
+
"Model caching has been disabled to ensure consistency."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def shared(cls):
|
|
41
|
+
if cls._shared_instance is None:
|
|
42
|
+
cls._shared_instance = cls()
|
|
43
|
+
return cls._shared_instance
|
|
44
|
+
|
|
45
|
+
def _is_cache_valid(self, path: Path, cached_mtime_ns: int) -> bool:
|
|
46
|
+
try:
|
|
47
|
+
current_mtime_ns = path.stat().st_mtime_ns
|
|
48
|
+
except Exception:
|
|
49
|
+
return False
|
|
50
|
+
return cached_mtime_ns == current_mtime_ns
|
|
51
|
+
|
|
52
|
+
def _get_model(self, path: Path, model_type: Type[T]) -> Optional[T]:
|
|
53
|
+
if path not in self.model_cache:
|
|
54
|
+
return None
|
|
55
|
+
model, cached_mtime_ns = self.model_cache[path]
|
|
56
|
+
if not self._is_cache_valid(path, cached_mtime_ns):
|
|
57
|
+
self.invalidate(path)
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
if not isinstance(model, model_type):
|
|
61
|
+
self.invalidate(path)
|
|
62
|
+
raise ValueError(f"Model at {path} is not of type {model_type.__name__}")
|
|
63
|
+
return model
|
|
64
|
+
|
|
65
|
+
def get_model(self, path: Path, model_type: Type[T]) -> Optional[T]:
|
|
66
|
+
# We return a copy so in-memory edits don't impact the cache until they are saved
|
|
67
|
+
# Benchmark shows about 2x slower, but much more foolproof
|
|
68
|
+
model = self._get_model(path, model_type)
|
|
69
|
+
if model:
|
|
70
|
+
return model.model_copy(deep=True)
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
def get_model_id(self, path: Path, model_type: Type[T]) -> Optional[str]:
|
|
74
|
+
model = self._get_model(path, model_type)
|
|
75
|
+
if model and hasattr(model, "id"):
|
|
76
|
+
id = model.id # type: ignore
|
|
77
|
+
if isinstance(id, str):
|
|
78
|
+
return id
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
def set_model(self, path: Path, model: BaseModel, mtime_ns: int):
|
|
82
|
+
# disable caching if the filesystem doesn't support fine-grained timestamps
|
|
83
|
+
if not self._enabled:
|
|
84
|
+
return
|
|
85
|
+
self.model_cache[path] = (model, mtime_ns)
|
|
86
|
+
|
|
87
|
+
def invalidate(self, path: Path):
|
|
88
|
+
if path in self.model_cache:
|
|
89
|
+
del self.model_cache[path]
|
|
90
|
+
|
|
91
|
+
def clear(self):
|
|
92
|
+
self.model_cache.clear()
|
|
93
|
+
|
|
94
|
+
def _check_timestamp_granularity(self) -> bool:
|
|
95
|
+
"""Check if filesystem supports fine-grained timestamps (microseconds or better)."""
|
|
96
|
+
|
|
97
|
+
# MacOS and Windows support fine-grained timestamps
|
|
98
|
+
if sys.platform in ["darwin", "win32"]:
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
# Linux supports fine-grained timestamps SOMETIMES. ext4 should work.
|
|
102
|
+
try:
|
|
103
|
+
# Get filesystem stats for the current directory
|
|
104
|
+
stats = os.statvfs(Path(__file__).parent)
|
|
105
|
+
|
|
106
|
+
# f_timespec was added in Linux 5.6 (2020)
|
|
107
|
+
# Returns nanoseconds precision as a power of 10
|
|
108
|
+
# e.g., 1 = decisecond, 2 = centisecond, 3 = millisecond, etc.
|
|
109
|
+
timespec = getattr(stats, "f_timespec", 0)
|
|
110
|
+
|
|
111
|
+
# Consider microsecond precision (6) or better as "fine-grained"
|
|
112
|
+
return timespec >= 6
|
|
113
|
+
except (AttributeError, OSError):
|
|
114
|
+
# If f_timespec isn't available or other errors occur,
|
|
115
|
+
# assume poor granularity to be safe
|
|
116
|
+
return False
|
|
@@ -2,6 +2,7 @@ import datetime
|
|
|
2
2
|
import json
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Optional
|
|
5
|
+
from unittest.mock import MagicMock, patch
|
|
5
6
|
|
|
6
7
|
import pytest
|
|
7
8
|
|
|
@@ -10,6 +11,7 @@ from kiln_ai.datamodel.basemodel import (
|
|
|
10
11
|
KilnParentedModel,
|
|
11
12
|
string_to_valid_name,
|
|
12
13
|
)
|
|
14
|
+
from kiln_ai.datamodel.model_cache import ModelCache
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
@pytest.fixture
|
|
@@ -45,6 +47,17 @@ def test_newer_file(tmp_path) -> Path:
|
|
|
45
47
|
return test_file_path
|
|
46
48
|
|
|
47
49
|
|
|
50
|
+
@pytest.fixture
|
|
51
|
+
def tmp_model_cache():
|
|
52
|
+
temp_cache = ModelCache()
|
|
53
|
+
# We're testing integration, not cache functions, in this file
|
|
54
|
+
temp_cache._enabled = True
|
|
55
|
+
with (
|
|
56
|
+
patch("kiln_ai.datamodel.basemodel.ModelCache.shared", return_value=temp_cache),
|
|
57
|
+
):
|
|
58
|
+
yield temp_cache
|
|
59
|
+
|
|
60
|
+
|
|
48
61
|
def test_load_from_file(test_base_file):
|
|
49
62
|
model = KilnBaseModel.load_from_file(test_base_file)
|
|
50
63
|
assert model.v == 1
|
|
@@ -277,9 +290,8 @@ def test_lazy_load_parent(tmp_path):
|
|
|
277
290
|
assert loaded_parent.name == "Parent"
|
|
278
291
|
assert loaded_parent.path == parent.path
|
|
279
292
|
|
|
280
|
-
# Verify that the
|
|
281
|
-
assert
|
|
282
|
-
assert loaded_child._parent is loaded_parent
|
|
293
|
+
# Verify that the parent is cached
|
|
294
|
+
assert loaded_child.cached_parent() is loaded_parent
|
|
283
295
|
|
|
284
296
|
|
|
285
297
|
def test_delete(tmp_path):
|
|
@@ -334,3 +346,126 @@ def test_string_to_valid_name():
|
|
|
334
346
|
# Test empty string and whitespace
|
|
335
347
|
assert string_to_valid_name("") == ""
|
|
336
348
|
assert string_to_valid_name(" ") == ""
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def test_load_from_file_with_cache(test_base_file, tmp_model_cache):
|
|
352
|
+
tmp_model_cache.get_model = MagicMock(return_value=None)
|
|
353
|
+
tmp_model_cache.set_model = MagicMock()
|
|
354
|
+
|
|
355
|
+
# Load the model
|
|
356
|
+
model = KilnBaseModel.load_from_file(test_base_file)
|
|
357
|
+
|
|
358
|
+
# Check that the cache was checked and set
|
|
359
|
+
tmp_model_cache.get_model.assert_called_once_with(test_base_file, KilnBaseModel)
|
|
360
|
+
tmp_model_cache.set_model.assert_called_once()
|
|
361
|
+
|
|
362
|
+
# Ensure the model is correctly loaded
|
|
363
|
+
assert model.v == 1
|
|
364
|
+
assert model.path == test_base_file
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def test_save_to_file_invalidates_cache(test_base_file, tmp_model_cache):
|
|
368
|
+
# Create and save the model
|
|
369
|
+
model = KilnBaseModel(path=test_base_file)
|
|
370
|
+
|
|
371
|
+
# Set mock after to ignore any previous calls, we want to see save calls it
|
|
372
|
+
tmp_model_cache.invalidate = MagicMock()
|
|
373
|
+
model.save_to_file()
|
|
374
|
+
|
|
375
|
+
# Check that the cache was invalidated. Might be called multiple times for setting props like path. but must be called at least once.
|
|
376
|
+
tmp_model_cache.invalidate.assert_called_with(test_base_file)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def test_delete_invalidates_cache(tmp_path, tmp_model_cache):
|
|
380
|
+
# Create and save the model
|
|
381
|
+
file_path = tmp_path / "test.kiln"
|
|
382
|
+
model = KilnBaseModel(path=file_path)
|
|
383
|
+
model.save_to_file()
|
|
384
|
+
|
|
385
|
+
# populate and check cache
|
|
386
|
+
model = KilnBaseModel.load_from_file(file_path)
|
|
387
|
+
cached_model = tmp_model_cache.get_model(file_path, KilnBaseModel)
|
|
388
|
+
assert cached_model.id == model.id
|
|
389
|
+
|
|
390
|
+
tmp_model_cache.invalidate = MagicMock()
|
|
391
|
+
|
|
392
|
+
# Delete the model
|
|
393
|
+
model.delete()
|
|
394
|
+
|
|
395
|
+
# Check that the cache was invalidated
|
|
396
|
+
tmp_model_cache.invalidate.assert_called_with(file_path)
|
|
397
|
+
assert tmp_model_cache.get_model(file_path, KilnBaseModel) is None
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def test_load_from_file_with_cached_model(test_base_file, tmp_model_cache):
|
|
401
|
+
# Set up the mock to return a cached model
|
|
402
|
+
cached_model = KilnBaseModel(v=1, path=test_base_file)
|
|
403
|
+
tmp_model_cache.get_model = MagicMock(return_value=cached_model)
|
|
404
|
+
|
|
405
|
+
with patch("builtins.open", create=True) as mock_open:
|
|
406
|
+
# Load the model
|
|
407
|
+
model = KilnBaseModel.load_from_file(test_base_file)
|
|
408
|
+
|
|
409
|
+
# Check that the cache was checked and the cached model was returned
|
|
410
|
+
tmp_model_cache.get_model.assert_called_once_with(test_base_file, KilnBaseModel)
|
|
411
|
+
assert model is cached_model
|
|
412
|
+
|
|
413
|
+
# Assert that open was not called (we used the cached model, not file)
|
|
414
|
+
mock_open.assert_not_called()
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def test_from_id_and_parent_path(test_base_parented_file, tmp_model_cache):
|
|
418
|
+
# Set up parent and children models
|
|
419
|
+
parent = BaseParentExample.load_from_file(test_base_parented_file)
|
|
420
|
+
|
|
421
|
+
child1 = DefaultParentedModel(parent=parent, name="Child1")
|
|
422
|
+
child2 = DefaultParentedModel(parent=parent, name="Child2")
|
|
423
|
+
child3 = DefaultParentedModel(parent=parent, name="Child3")
|
|
424
|
+
|
|
425
|
+
# Save all children
|
|
426
|
+
child1.save_to_file()
|
|
427
|
+
child2.save_to_file()
|
|
428
|
+
child3.save_to_file()
|
|
429
|
+
|
|
430
|
+
# Test finding existing child by ID
|
|
431
|
+
found_child = DefaultParentedModel.from_id_and_parent_path(
|
|
432
|
+
child2.id, test_base_parented_file
|
|
433
|
+
)
|
|
434
|
+
assert found_child is not None
|
|
435
|
+
assert found_child.id == child2.id
|
|
436
|
+
assert found_child.name == "Child2"
|
|
437
|
+
assert found_child is not child2 # not same instance (deep copy)
|
|
438
|
+
|
|
439
|
+
# Test non-existent ID returns None
|
|
440
|
+
not_found = DefaultParentedModel.from_id_and_parent_path(
|
|
441
|
+
"nonexistent", test_base_parented_file
|
|
442
|
+
)
|
|
443
|
+
assert not_found is None
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def test_from_id_and_parent_path_with_cache(test_base_parented_file, tmp_model_cache):
|
|
447
|
+
# Set up parent and child
|
|
448
|
+
parent = BaseParentExample.load_from_file(test_base_parented_file)
|
|
449
|
+
child = DefaultParentedModel(parent=parent, name="Child")
|
|
450
|
+
child.save_to_file()
|
|
451
|
+
|
|
452
|
+
# First load to populate cache
|
|
453
|
+
_ = DefaultParentedModel.from_id_and_parent_path(child.id, test_base_parented_file)
|
|
454
|
+
|
|
455
|
+
# Mock cache to verify it's used
|
|
456
|
+
tmp_model_cache.get_model_id = MagicMock(return_value=child.id)
|
|
457
|
+
|
|
458
|
+
# Load again - should use cache
|
|
459
|
+
found_child = DefaultParentedModel.from_id_and_parent_path(
|
|
460
|
+
child.id, test_base_parented_file
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
assert found_child is not None
|
|
464
|
+
assert found_child.id == child.id
|
|
465
|
+
tmp_model_cache.get_model_id.assert_called()
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def test_from_id_and_parent_path_without_parent():
|
|
469
|
+
# Test with None parent_path
|
|
470
|
+
not_found = DefaultParentedModel.from_id_and_parent_path("any-id", None)
|
|
471
|
+
assert not_found is None
|