kiln-ai 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

@@ -1,3 +1,7 @@
1
+ """
2
+ See our docs for details about our datamodel: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
3
+ """
4
+
1
5
  from __future__ import annotations
2
6
 
3
7
  import json
@@ -8,7 +12,12 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Type, Union
8
12
 
9
13
  import jsonschema
10
14
  import jsonschema.exceptions
11
- from pydantic import BaseModel, Field, model_validator
15
+ from pydantic import (
16
+ BaseModel,
17
+ Field,
18
+ ValidationInfo,
19
+ model_validator,
20
+ )
12
21
  from typing_extensions import Self
13
22
 
14
23
  from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
@@ -43,9 +52,25 @@ __all__ = [
43
52
  "TaskOutputRatingType",
44
53
  "TaskRequirement",
45
54
  "TaskDeterminism",
55
+ "strict_mode",
56
+ "set_strict_mode",
46
57
  ]
47
58
 
48
59
 
60
+ # We want to be hard on ourselves for data completeness generated by the Kiln App, but don't want to make it hard for users to use the datamodel/library.
61
+ # Strict mode enables extra validations that we want to enforce in Kiln App (and any other client that wants best practices), but not in the library (unless they opt in)
62
+ _strict_mode: bool = False
63
+
64
+
65
+ def strict_mode() -> bool:
66
+ return _strict_mode
67
+
68
+
69
+ def set_strict_mode(value: bool) -> None:
70
+ global _strict_mode
71
+ _strict_mode = value
72
+
73
+
49
74
  class Priority(IntEnum):
50
75
  """Defines priority levels for tasks and requirements, where P0 is highest priority."""
51
76
 
@@ -121,8 +146,9 @@ class TaskOutput(KilnBaseModel):
121
146
  output: str = Field(
122
147
  description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
123
148
  )
124
- source: DataSource = Field(
125
- description="The source of the output: human or synthetic."
149
+ source: DataSource | None = Field(
150
+ description="The source of the output: human or synthetic.",
151
+ default=None,
126
152
  )
127
153
  rating: TaskOutputRating | None = Field(
128
154
  default=None, description="The rating of the output"
@@ -139,6 +165,18 @@ class TaskOutput(KilnBaseModel):
139
165
  raise ValueError(f"Output does not match task output schema: {e}")
140
166
  return self
141
167
 
168
+ @model_validator(mode="after")
169
+ def validate_output_source(self, info: ValidationInfo) -> Self:
170
+ # On strict mode and not loaded from file, we validate output_source is not None.
171
+ # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
172
+ if not strict_mode():
173
+ return self
174
+ if self.loaded_from_file(info):
175
+ return self
176
+ if self.source is None:
177
+ raise ValueError("Output source is required when strict mode is enabled")
178
+ return self
179
+
142
180
 
143
181
  class FineTuneStatusType(str, Enum):
144
182
  """
@@ -326,8 +364,8 @@ class TaskRun(KilnParentedModel):
326
364
  input: str = Field(
327
365
  description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input."
328
366
  )
329
- input_source: DataSource = Field(
330
- description="The source of the input: human or synthetic."
367
+ input_source: DataSource | None = Field(
368
+ default=None, description="The source of the input: human or synthetic."
331
369
  )
332
370
 
333
371
  output: TaskOutput = Field(description="The output of the task run.")
@@ -392,6 +430,18 @@ class TaskRun(KilnParentedModel):
392
430
  )
393
431
  return self
394
432
 
433
+ @model_validator(mode="after")
434
+ def validate_input_source(self, info: ValidationInfo) -> Self:
435
+ # On strict mode and not loaded from file, we validate input_source is not None.
436
+ # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
437
+ if not strict_mode():
438
+ return self
439
+ if self.loaded_from_file(info):
440
+ return self
441
+ if self.input_source is None:
442
+ raise ValueError("input_source is required when strict mode is enabled")
443
+ return self
444
+
395
445
 
396
446
  # Define the type alias for clarity
397
447
  DatasetFilter = Callable[[TaskRun], bool]
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import os
2
3
  import re
3
4
  import shutil
4
5
  import uuid
@@ -7,7 +8,6 @@ from builtins import classmethod
7
8
  from datetime import datetime
8
9
  from pathlib import Path
9
10
  from typing import (
10
- TYPE_CHECKING,
11
11
  Any,
12
12
  Dict,
13
13
  List,
@@ -21,12 +21,14 @@ from pydantic import (
21
21
  ConfigDict,
22
22
  Field,
23
23
  ValidationError,
24
+ ValidationInfo,
24
25
  computed_field,
25
26
  model_validator,
26
27
  )
27
28
  from pydantic_core import ErrorDetails
28
29
  from typing_extensions import Self
29
30
 
31
+ from kiln_ai.datamodel.model_cache import ModelCache
30
32
  from kiln_ai.utils.config import Config
31
33
  from kiln_ai.utils.formatting import snake_case
32
34
 
@@ -39,6 +41,7 @@ ID_TYPE = Optional[str]
39
41
  T = TypeVar("T", bound="KilnBaseModel")
40
42
  PT = TypeVar("PT", bound="KilnParentedModel")
41
43
 
44
+
42
45
  # Naming conventions:
43
46
  # 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation.
44
47
  # 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead.
@@ -87,6 +90,8 @@ class KilnBaseModel(BaseModel):
87
90
  created_at: datetime = Field(default_factory=datetime.now)
88
91
  created_by: str = Field(default_factory=lambda: Config.shared().user_id)
89
92
 
93
+ _loaded_from_file: bool = False
94
+
90
95
  @computed_field()
91
96
  def model_type(self) -> str:
92
97
  return self.type_name()
@@ -115,7 +120,7 @@ class KilnBaseModel(BaseModel):
115
120
  return cls.load_from_file(path)
116
121
 
117
122
  @classmethod
118
- def load_from_file(cls: Type[T], path: Path) -> T:
123
+ def load_from_file(cls: Type[T], path: Path | str) -> T:
119
124
  """Load a model instance from a specific file path.
120
125
 
121
126
  Args:
@@ -128,14 +133,26 @@ class KilnBaseModel(BaseModel):
128
133
  ValueError: If the loaded model is not of the expected type or version
129
134
  FileNotFoundError: If the file does not exist
130
135
  """
136
+ if isinstance(path, str):
137
+ path = Path(path)
138
+ cached_model = ModelCache.shared().get_model(path, cls)
139
+ if cached_model is not None:
140
+ return cached_model
131
141
  with open(path, "r") as file:
142
+ # modified time of file for cache invalidation. From file descriptor so it's atomic w read.
143
+ mtime_ns = os.fstat(file.fileno()).st_mtime_ns
132
144
  file_data = file.read()
133
145
  # TODO P2 perf: parsing the JSON twice here.
134
146
  # Once for model_type, once for model. Can't call model_validate with parsed json because enum types break; they get strings instead of enums.
135
147
  parsed_json = json.loads(file_data)
136
- m = cls.model_validate_json(file_data, strict=True)
148
+ m = cls.model_validate_json(
149
+ file_data,
150
+ strict=True,
151
+ context={"loading_from_file": True},
152
+ )
137
153
  if not isinstance(m, cls):
138
154
  raise ValueError(f"Loaded model is not of type {cls.__name__}")
155
+ m._loaded_from_file = True
139
156
  file_data = None
140
157
  m.path = path
141
158
  if m.v > m.max_schema_version():
@@ -150,8 +167,21 @@ class KilnBaseModel(BaseModel):
150
167
  f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, "
151
168
  f"version: {m.v}, max version: {m.max_schema_version()}"
152
169
  )
170
+ ModelCache.shared().set_model(path, m, mtime_ns)
153
171
  return m
154
172
 
173
+ def loaded_from_file(self, info: ValidationInfo | None = None) -> bool:
174
+ # Two methods of indicated it's loaded from file:
175
+ # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file
176
+ # 2) self._loaded_from_file -> After loading, set by the loader
177
+ if (
178
+ info is not None
179
+ and info.context is not None
180
+ and info.context.get("loading_from_file", False)
181
+ ):
182
+ return True
183
+ return self._loaded_from_file
184
+
155
185
  def save_to_file(self) -> None:
156
186
  """Save the model instance to a file.
157
187
 
@@ -170,6 +200,9 @@ class KilnBaseModel(BaseModel):
170
200
  file.write(json_data)
171
201
  # save the path so even if something like name changes, the file doesn't move
172
202
  self.path = path
203
+ # We could save, but invalidating will trigger load on next use.
204
+ # This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk
205
+ ModelCache.shared().invalidate(path)
173
206
 
174
207
  def delete(self) -> None:
175
208
  if self.path is None:
@@ -178,6 +211,7 @@ class KilnBaseModel(BaseModel):
178
211
  if dir_path is None:
179
212
  raise ValueError("Cannot delete model because path is not set")
180
213
  shutil.rmtree(dir_path)
214
+ ModelCache.shared().invalidate(self.path)
181
215
  self.path = None
182
216
 
183
217
  def build_path(self) -> Path | None:
@@ -197,51 +231,44 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
197
231
  including parent reference handling and file system organization.
198
232
 
199
233
  Attributes:
200
- _parent (KilnBaseModel): Reference to the parent model instance
234
+ parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
201
235
  """
202
236
 
203
- _parent: KilnBaseModel | None = None
237
+ # Parent is an in memory only reference to parent. If it's set we use that. If not we'll try to load it from disk based on the path.
238
+ # We don't persist the parent reference to disk. See the accessors below for how we make it a clean api (parent accessor will lazy load from disk)
239
+ parent: Optional[KilnBaseModel] = Field(default=None, exclude=True)
204
240
 
205
- # workaround to tell typechecker that we support the parent property, even though it's not a stock property
206
- if TYPE_CHECKING:
207
- parent: KilnBaseModel # type: ignore
241
+ def __getattribute__(self, name: str) -> Any:
242
+ if name == "parent":
243
+ return self.load_parent()
244
+ return super().__getattribute__(name)
208
245
 
209
- def __init__(self, **data):
210
- super().__init__(**data)
211
- if "parent" in data:
212
- self.parent = data["parent"]
246
+ def cached_parent(self) -> Optional[KilnBaseModel]:
247
+ return object.__getattribute__(self, "parent")
213
248
 
214
- @property
215
- def parent(self) -> Optional[KilnBaseModel]:
249
+ def load_parent(self) -> Optional[KilnBaseModel]:
216
250
  """Get the parent model instance, loading it from disk if necessary.
217
251
 
218
252
  Returns:
219
253
  Optional[KilnBaseModel]: The parent model instance or None if not set
220
254
  """
221
- if self._parent is not None:
222
- return self._parent
255
+ cached_parent = self.cached_parent()
256
+ if cached_parent is not None:
257
+ return cached_parent
258
+
223
259
  # lazy load parent from path
224
260
  if self.path is None:
225
261
  return None
226
- # TODO: this only works with base_filename. If we every support custom names, we need to change this.
262
+ # Note: this only works with base_filename. If we every support custom names, we need to change this.
227
263
  parent_path = (
228
264
  self.path.parent.parent.parent
229
265
  / self.__class__.parent_type().base_filename()
230
266
  )
231
267
  if parent_path is None:
232
268
  return None
233
- self._parent = self.__class__.parent_type().load_from_file(parent_path)
234
- return self._parent
235
-
236
- @parent.setter
237
- def parent(self, value: Optional[KilnBaseModel]):
238
- if value is not None:
239
- expected_parent_type = self.__class__.parent_type()
240
- if not isinstance(value, expected_parent_type):
241
- raise ValueError(
242
- f"Parent must be of type {expected_parent_type}, but was {type(value)}"
243
- )
244
- self._parent = value
269
+ loaded_parent = self.__class__.parent_type().load_from_file(parent_path)
270
+ self.parent = loaded_parent
271
+ return loaded_parent
245
272
 
246
273
  # Dynamically implemented by KilnParentModel method injection
247
274
  @classmethod
@@ -255,11 +282,12 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
255
282
 
256
283
  @model_validator(mode="after")
257
284
  def check_parent_type(self) -> Self:
258
- if self._parent is not None:
285
+ cached_parent = self.cached_parent()
286
+ if cached_parent is not None:
259
287
  expected_parent_type = self.__class__.parent_type()
260
- if not isinstance(self._parent, expected_parent_type):
288
+ if not isinstance(cached_parent, expected_parent_type):
261
289
  raise ValueError(
262
- f"Parent must be of type {expected_parent_type}, but was {type(self._parent)}"
290
+ f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}"
263
291
  )
264
292
  return self
265
293
 
@@ -298,9 +326,7 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
298
326
  )
299
327
 
300
328
  @classmethod
301
- def all_children_of_parent_path(
302
- cls: Type[PT], parent_path: Path | None
303
- ) -> list[PT]:
329
+ def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None):
304
330
  if parent_path is None:
305
331
  # children are disk based. If not saved, they don't exist
306
332
  return []
@@ -322,13 +348,41 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
322
348
  return []
323
349
 
324
350
  # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder
325
- children = []
326
351
  for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"):
327
- child = cls.load_from_file(child_file)
328
- children.append(child)
352
+ yield child_file
329
353
 
354
+ @classmethod
355
+ def all_children_of_parent_path(
356
+ cls: Type[PT], parent_path: Path | None
357
+ ) -> list[PT]:
358
+ children = []
359
+ for child_path in cls.iterate_children_paths_of_parent_path(parent_path):
360
+ children.append(cls.load_from_file(child_path))
330
361
  return children
331
362
 
363
+ @classmethod
364
+ def from_id_and_parent_path(
365
+ cls: Type[PT], id: str, parent_path: Path | None
366
+ ) -> PT | None:
367
+ """
368
+ Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match.
369
+
370
+ Uses cache so still slow on first load.
371
+ """
372
+ if parent_path is None:
373
+ return None
374
+
375
+ # Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth.
376
+ for child_path in cls.iterate_children_paths_of_parent_path(parent_path):
377
+ child_id = ModelCache.shared().get_model_id(child_path, cls)
378
+ if child_id == id:
379
+ return cls.load_from_file(child_path)
380
+ if child_id is None:
381
+ child = cls.load_from_file(child_path)
382
+ if child.id == id:
383
+ return child
384
+ return None
385
+
332
386
 
333
387
  # Parent create methods for all child relationships
334
388
  # You must pass in parent_of in the subclass definition, defining the child relationships
@@ -0,0 +1,116 @@
1
+ """
2
+ A simple cache for our datamodel.
3
+
4
+ Works at the file level, caching the pydantic model based on the file path.
5
+
6
+ Keeping this really simple. Our goal is to really be "disk-backed" data model, so using disk primitives.
7
+
8
+ - Use disk mtime to determine if the cached model is stale.
9
+ - Still using glob for iterating over projects, just caching at the file level
10
+ - Use path as the cache key
11
+ - Cache always populated from a disk read, so we know it refects what's on disk. Even if we had a memory-constructed version, we don't cache that.
12
+ - Cache the parsed model, not the raw file contents. Parsing and validating is what's expensive. >99% speedup when measured.
13
+ """
14
+
15
+ import os
16
+ import sys
17
+ import warnings
18
+ from pathlib import Path
19
+ from typing import Dict, Optional, Tuple, Type, TypeVar
20
+
21
+ from pydantic import BaseModel
22
+
23
+ T = TypeVar("T", bound=BaseModel)
24
+
25
+
26
+ class ModelCache:
27
+ _shared_instance = None
28
+
29
+ def __init__(self):
30
+ # Store both the model and the modified time of the cached file contents
31
+ self.model_cache: Dict[Path, Tuple[BaseModel, int]] = {}
32
+ self._enabled = self._check_timestamp_granularity()
33
+ if not self._enabled:
34
+ warnings.warn(
35
+ "File system does not support fine-grained timestamps. "
36
+ "Model caching has been disabled to ensure consistency."
37
+ )
38
+
39
+ @classmethod
40
+ def shared(cls):
41
+ if cls._shared_instance is None:
42
+ cls._shared_instance = cls()
43
+ return cls._shared_instance
44
+
45
+ def _is_cache_valid(self, path: Path, cached_mtime_ns: int) -> bool:
46
+ try:
47
+ current_mtime_ns = path.stat().st_mtime_ns
48
+ except Exception:
49
+ return False
50
+ return cached_mtime_ns == current_mtime_ns
51
+
52
+ def _get_model(self, path: Path, model_type: Type[T]) -> Optional[T]:
53
+ if path not in self.model_cache:
54
+ return None
55
+ model, cached_mtime_ns = self.model_cache[path]
56
+ if not self._is_cache_valid(path, cached_mtime_ns):
57
+ self.invalidate(path)
58
+ return None
59
+
60
+ if not isinstance(model, model_type):
61
+ self.invalidate(path)
62
+ raise ValueError(f"Model at {path} is not of type {model_type.__name__}")
63
+ return model
64
+
65
+ def get_model(self, path: Path, model_type: Type[T]) -> Optional[T]:
66
+ # We return a copy so in-memory edits don't impact the cache until they are saved
67
+ # Benchmark shows about 2x slower, but much more foolproof
68
+ model = self._get_model(path, model_type)
69
+ if model:
70
+ return model.model_copy(deep=True)
71
+ return None
72
+
73
+ def get_model_id(self, path: Path, model_type: Type[T]) -> Optional[str]:
74
+ model = self._get_model(path, model_type)
75
+ if model and hasattr(model, "id"):
76
+ id = model.id # type: ignore
77
+ if isinstance(id, str):
78
+ return id
79
+ return None
80
+
81
+ def set_model(self, path: Path, model: BaseModel, mtime_ns: int):
82
+ # disable caching if the filesystem doesn't support fine-grained timestamps
83
+ if not self._enabled:
84
+ return
85
+ self.model_cache[path] = (model, mtime_ns)
86
+
87
+ def invalidate(self, path: Path):
88
+ if path in self.model_cache:
89
+ del self.model_cache[path]
90
+
91
+ def clear(self):
92
+ self.model_cache.clear()
93
+
94
+ def _check_timestamp_granularity(self) -> bool:
95
+ """Check if filesystem supports fine-grained timestamps (microseconds or better)."""
96
+
97
+ # MacOS and Windows support fine-grained timestamps
98
+ if sys.platform in ["darwin", "win32"]:
99
+ return True
100
+
101
+ # Linux supports fine-grained timestamps SOMETIMES. ext4 should work.
102
+ try:
103
+ # Get filesystem stats for the current directory
104
+ stats = os.statvfs(Path(__file__).parent)
105
+
106
+ # f_timespec was added in Linux 5.6 (2020)
107
+ # Returns nanoseconds precision as a power of 10
108
+ # e.g., 1 = decisecond, 2 = centisecond, 3 = millisecond, etc.
109
+ timespec = getattr(stats, "f_timespec", 0)
110
+
111
+ # Consider microsecond precision (6) or better as "fine-grained"
112
+ return timespec >= 6
113
+ except (AttributeError, OSError):
114
+ # If f_timespec isn't available or other errors occur,
115
+ # assume poor granularity to be safe
116
+ return False
@@ -2,6 +2,7 @@ import datetime
2
2
  import json
3
3
  from pathlib import Path
4
4
  from typing import Optional
5
+ from unittest.mock import MagicMock, patch
5
6
 
6
7
  import pytest
7
8
 
@@ -10,6 +11,7 @@ from kiln_ai.datamodel.basemodel import (
10
11
  KilnParentedModel,
11
12
  string_to_valid_name,
12
13
  )
14
+ from kiln_ai.datamodel.model_cache import ModelCache
13
15
 
14
16
 
15
17
  @pytest.fixture
@@ -45,6 +47,17 @@ def test_newer_file(tmp_path) -> Path:
45
47
  return test_file_path
46
48
 
47
49
 
50
+ @pytest.fixture
51
+ def tmp_model_cache():
52
+ temp_cache = ModelCache()
53
+ # We're testing integration, not cache functions, in this file
54
+ temp_cache._enabled = True
55
+ with (
56
+ patch("kiln_ai.datamodel.basemodel.ModelCache.shared", return_value=temp_cache),
57
+ ):
58
+ yield temp_cache
59
+
60
+
48
61
  def test_load_from_file(test_base_file):
49
62
  model = KilnBaseModel.load_from_file(test_base_file)
50
63
  assert model.v == 1
@@ -277,9 +290,8 @@ def test_lazy_load_parent(tmp_path):
277
290
  assert loaded_parent.name == "Parent"
278
291
  assert loaded_parent.path == parent.path
279
292
 
280
- # Verify that the _parent attribute is now set
281
- assert hasattr(loaded_child, "_parent")
282
- assert loaded_child._parent is loaded_parent
293
+ # Verify that the parent is cached
294
+ assert loaded_child.cached_parent() is loaded_parent
283
295
 
284
296
 
285
297
  def test_delete(tmp_path):
@@ -334,3 +346,126 @@ def test_string_to_valid_name():
334
346
  # Test empty string and whitespace
335
347
  assert string_to_valid_name("") == ""
336
348
  assert string_to_valid_name(" ") == ""
349
+
350
+
351
+ def test_load_from_file_with_cache(test_base_file, tmp_model_cache):
352
+ tmp_model_cache.get_model = MagicMock(return_value=None)
353
+ tmp_model_cache.set_model = MagicMock()
354
+
355
+ # Load the model
356
+ model = KilnBaseModel.load_from_file(test_base_file)
357
+
358
+ # Check that the cache was checked and set
359
+ tmp_model_cache.get_model.assert_called_once_with(test_base_file, KilnBaseModel)
360
+ tmp_model_cache.set_model.assert_called_once()
361
+
362
+ # Ensure the model is correctly loaded
363
+ assert model.v == 1
364
+ assert model.path == test_base_file
365
+
366
+
367
+ def test_save_to_file_invalidates_cache(test_base_file, tmp_model_cache):
368
+ # Create and save the model
369
+ model = KilnBaseModel(path=test_base_file)
370
+
371
+ # Set mock after to ignore any previous calls, we want to see save calls it
372
+ tmp_model_cache.invalidate = MagicMock()
373
+ model.save_to_file()
374
+
375
+ # Check that the cache was invalidated. Might be called multiple times for setting props like path. but must be called at least once.
376
+ tmp_model_cache.invalidate.assert_called_with(test_base_file)
377
+
378
+
379
+ def test_delete_invalidates_cache(tmp_path, tmp_model_cache):
380
+ # Create and save the model
381
+ file_path = tmp_path / "test.kiln"
382
+ model = KilnBaseModel(path=file_path)
383
+ model.save_to_file()
384
+
385
+ # populate and check cache
386
+ model = KilnBaseModel.load_from_file(file_path)
387
+ cached_model = tmp_model_cache.get_model(file_path, KilnBaseModel)
388
+ assert cached_model.id == model.id
389
+
390
+ tmp_model_cache.invalidate = MagicMock()
391
+
392
+ # Delete the model
393
+ model.delete()
394
+
395
+ # Check that the cache was invalidated
396
+ tmp_model_cache.invalidate.assert_called_with(file_path)
397
+ assert tmp_model_cache.get_model(file_path, KilnBaseModel) is None
398
+
399
+
400
+ def test_load_from_file_with_cached_model(test_base_file, tmp_model_cache):
401
+ # Set up the mock to return a cached model
402
+ cached_model = KilnBaseModel(v=1, path=test_base_file)
403
+ tmp_model_cache.get_model = MagicMock(return_value=cached_model)
404
+
405
+ with patch("builtins.open", create=True) as mock_open:
406
+ # Load the model
407
+ model = KilnBaseModel.load_from_file(test_base_file)
408
+
409
+ # Check that the cache was checked and the cached model was returned
410
+ tmp_model_cache.get_model.assert_called_once_with(test_base_file, KilnBaseModel)
411
+ assert model is cached_model
412
+
413
+ # Assert that open was not called (we used the cached model, not file)
414
+ mock_open.assert_not_called()
415
+
416
+
417
+ def test_from_id_and_parent_path(test_base_parented_file, tmp_model_cache):
418
+ # Set up parent and children models
419
+ parent = BaseParentExample.load_from_file(test_base_parented_file)
420
+
421
+ child1 = DefaultParentedModel(parent=parent, name="Child1")
422
+ child2 = DefaultParentedModel(parent=parent, name="Child2")
423
+ child3 = DefaultParentedModel(parent=parent, name="Child3")
424
+
425
+ # Save all children
426
+ child1.save_to_file()
427
+ child2.save_to_file()
428
+ child3.save_to_file()
429
+
430
+ # Test finding existing child by ID
431
+ found_child = DefaultParentedModel.from_id_and_parent_path(
432
+ child2.id, test_base_parented_file
433
+ )
434
+ assert found_child is not None
435
+ assert found_child.id == child2.id
436
+ assert found_child.name == "Child2"
437
+ assert found_child is not child2 # not same instance (deep copy)
438
+
439
+ # Test non-existent ID returns None
440
+ not_found = DefaultParentedModel.from_id_and_parent_path(
441
+ "nonexistent", test_base_parented_file
442
+ )
443
+ assert not_found is None
444
+
445
+
446
+ def test_from_id_and_parent_path_with_cache(test_base_parented_file, tmp_model_cache):
447
+ # Set up parent and child
448
+ parent = BaseParentExample.load_from_file(test_base_parented_file)
449
+ child = DefaultParentedModel(parent=parent, name="Child")
450
+ child.save_to_file()
451
+
452
+ # First load to populate cache
453
+ _ = DefaultParentedModel.from_id_and_parent_path(child.id, test_base_parented_file)
454
+
455
+ # Mock cache to verify it's used
456
+ tmp_model_cache.get_model_id = MagicMock(return_value=child.id)
457
+
458
+ # Load again - should use cache
459
+ found_child = DefaultParentedModel.from_id_and_parent_path(
460
+ child.id, test_base_parented_file
461
+ )
462
+
463
+ assert found_child is not None
464
+ assert found_child.id == child.id
465
+ tmp_model_cache.get_model_id.assert_called()
466
+
467
+
468
+ def test_from_id_and_parent_path_without_parent():
469
+ # Test with None parent_path
470
+ not_found = DefaultParentedModel.from_id_and_parent_path("any-id", None)
471
+ assert not_found is None