PyPI - kiln-ai - Versions diffs - 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

kiln-ai 0.6.1py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (44) hide show

kiln_ai/adapters/__init__.py +2 -0
kiln_ai/adapters/adapter_registry.py +19 -0
kiln_ai/adapters/data_gen/test_data_gen_task.py +29 -21
kiln_ai/adapters/fine_tune/__init__.py +14 -0
kiln_ai/adapters/fine_tune/base_finetune.py +186 -0
kiln_ai/adapters/fine_tune/dataset_formatter.py +187 -0
kiln_ai/adapters/fine_tune/finetune_registry.py +11 -0
kiln_ai/adapters/fine_tune/fireworks_finetune.py +308 -0
kiln_ai/adapters/fine_tune/openai_finetune.py +205 -0
kiln_ai/adapters/fine_tune/test_base_finetune.py +290 -0
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +342 -0
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +455 -0
kiln_ai/adapters/fine_tune/test_openai_finetune.py +503 -0
kiln_ai/adapters/langchain_adapters.py +103 -13
kiln_ai/adapters/ml_model_list.py +239 -303
kiln_ai/adapters/ollama_tools.py +115 -0
kiln_ai/adapters/provider_tools.py +308 -0
kiln_ai/adapters/repair/repair_task.py +4 -2
kiln_ai/adapters/repair/test_repair_task.py +6 -11
kiln_ai/adapters/test_langchain_adapter.py +229 -18
kiln_ai/adapters/test_ollama_tools.py +42 -0
kiln_ai/adapters/test_prompt_adaptors.py +7 -5
kiln_ai/adapters/test_provider_tools.py +531 -0
kiln_ai/adapters/test_structured_output.py +22 -43
kiln_ai/datamodel/__init__.py +287 -24
kiln_ai/datamodel/basemodel.py +122 -38
kiln_ai/datamodel/model_cache.py +116 -0
kiln_ai/datamodel/registry.py +31 -0
kiln_ai/datamodel/test_basemodel.py +167 -4
kiln_ai/datamodel/test_dataset_split.py +234 -0
kiln_ai/datamodel/test_example_models.py +12 -0
kiln_ai/datamodel/test_model_cache.py +244 -0
kiln_ai/datamodel/test_models.py +215 -1
kiln_ai/datamodel/test_registry.py +96 -0
kiln_ai/utils/config.py +14 -1
kiln_ai/utils/name_generator.py +125 -0
kiln_ai/utils/test_name_geneator.py +47 -0
kiln_ai-0.7.1.dist-info/METADATA +237 -0
kiln_ai-0.7.1.dist-info/RECORD +58 -0
{kiln_ai-0.6.1.dist-info → kiln_ai-0.7.1.dist-info}/WHEEL +1 -1
kiln_ai/adapters/test_ml_model_list.py +0 -181
kiln_ai-0.6.1.dist-info/METADATA +0 -88
kiln_ai-0.6.1.dist-info/RECORD +0 -37
{kiln_ai-0.6.1.dist-info → kiln_ai-0.7.1.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/datamodel/__init__.py CHANGED Viewed

@@ -1,12 +1,23 @@
+"""
+See our docs for details about our datamodel: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
+"""
 from __future__ import annotations
 import json
+import math
+import random
 from enum import Enum, IntEnum
-from typing import TYPE_CHECKING, Dict, List, Type, Union
+from typing import TYPE_CHECKING, Callable, Dict, List, Type, Union
 import jsonschema
 import jsonschema.exceptions
-from pydantic import BaseModel, Field, model_validator
+from pydantic import (
+    BaseModel,
+    Field,
+    ValidationInfo,
+    model_validator,
+)
 from typing_extensions import Self
 from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
@@ -14,6 +25,8 @@ from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
 from .basemodel import (
     ID_FIELD,
     ID_TYPE,
+    NAME_FIELD,
+    SHORT_NAME_FIELD,
     KilnBaseModel,
     KilnParentedModel,
     KilnParentModel,
@@ -39,27 +52,23 @@ __all__ = [
     "TaskOutputRatingType",
     "TaskRequirement",
     "TaskDeterminism",
+    "strict_mode",
+    "set_strict_mode",
 ]
-# Conventions:
-# 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation.
-# 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead.
+# We want to be hard on ourselves for data completeness generated by the Kiln App, but don't want to make it hard for users to use the datamodel/library.
+# Strict mode enables extra validations that we want to enforce in Kiln App (and any other client that wants best practices), but not in the library (unless they opt in)
+_strict_mode: bool = False
-# Filename compatible names
-NAME_REGEX = r"^[A-Za-z0-9 _-]+$"
-NAME_FIELD = Field(
-    min_length=1,
-    max_length=120,
-    pattern=NAME_REGEX,
-    description="A name for this entity.",
-)
-SHORT_NAME_FIELD = Field(
-    min_length=1,
-    max_length=32,
-    pattern=NAME_REGEX,
-    description="A name for this entity",
-)
+def strict_mode() -> bool:
+    return _strict_mode
+def set_strict_mode(value: bool) -> None:
+    global _strict_mode
+    _strict_mode = value
 class Priority(IntEnum):
@@ -137,8 +146,9 @@ class TaskOutput(KilnBaseModel):
     output: str = Field(
         description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
     )
-    source: DataSource = Field(
-        description="The source of the output: human or synthetic."
+    source: DataSource | None = Field(
+        description="The source of the output: human or synthetic.",
+        default=None,
     )
     rating: TaskOutputRating | None = Field(
         default=None, description="The rating of the output"
@@ -155,6 +165,83 @@ class TaskOutput(KilnBaseModel):
                 raise ValueError(f"Output does not match task output schema: {e}")
         return self
+    @model_validator(mode="after")
+    def validate_output_source(self, info: ValidationInfo) -> Self:
+        # On strict mode and not loaded from file, we validate output_source is not None.
+        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
+        if not strict_mode():
+            return self
+        if self.loaded_from_file(info):
+            return self
+        if self.source is None:
+            raise ValueError("Output source is required when strict mode is enabled")
+        return self
+class FineTuneStatusType(str, Enum):
+    """
+    The status type of a fine-tune (running, completed, failed, etc).
+    """
+    unknown = "unknown"  # server error
+    pending = "pending"
+    running = "running"
+    completed = "completed"
+    failed = "failed"
+class Finetune(KilnParentedModel):
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the fine-tune for you and your team. Not used in training.",
+    )
+    provider: str = Field(
+        description="The provider to use for the fine-tune (e.g. 'openai')."
+    )
+    base_model_id: str = Field(
+        description="The id of the base model to use for the fine-tune. This string relates to the provider's IDs for their own models, not Kiln IDs."
+    )
+    provider_id: str | None = Field(
+        default=None,
+        description="The ID of the fine-tune job on the provider's side. May not be the same as the fine_tune_model_id.",
+    )
+    fine_tune_model_id: str | None = Field(
+        default=None,
+        description="The ID of the fine-tuned model on the provider's side. May not be the same as the provider_id.",
+    )
+    dataset_split_id: str = Field(
+        description="The ID of the dataset split to use for this fine-tune.",
+    )
+    train_split_name: str = Field(
+        default="train",
+        description="The name of the training split to use for this fine-tune.",
+    )
+    validation_split_name: str | None = Field(
+        default=None,
+        description="The name of the validation split to use for this fine-tune. Optional.",
+    )
+    parameters: dict[str, str | int | float | bool] = Field(
+        default={},
+        description="The parameters to use for this fine-tune. These are provider-specific.",
+    )
+    system_message: str = Field(
+        description="The system message to use for this fine-tune.",
+    )
+    latest_status: FineTuneStatusType = Field(
+        default=FineTuneStatusType.unknown,
+        description="The latest known status of this fine-tune. Not updated in real time.",
+    )
+    properties: Dict[str, str | int | float] = Field(
+        default={},
+        description="Properties of the fine-tune. Different providers may use different properties.",
+    )
+    def parent_task(self) -> Task | None:
+        if not isinstance(self.parent, Task):
+            return None
+        return self.parent
 class DataSourceType(str, Enum):
     """
@@ -277,8 +364,8 @@ class TaskRun(KilnParentedModel):
     input: str = Field(
         description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input."
     )
-    input_source: DataSource = Field(
-        description="The source of the input: human or synthetic."
+    input_source: DataSource | None = Field(
+        default=None, description="The source of the input: human or synthetic."
     )
     output: TaskOutput = Field(description="The output of the task run.")
@@ -343,6 +430,172 @@ class TaskRun(KilnParentedModel):
             )
         return self
+    @model_validator(mode="after")
+    def validate_input_source(self, info: ValidationInfo) -> Self:
+        # On strict mode and not loaded from file, we validate input_source is not None.
+        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
+        if not strict_mode():
+            return self
+        if self.loaded_from_file(info):
+            return self
+        if self.input_source is None:
+            raise ValueError("input_source is required when strict mode is enabled")
+        return self
+# Define the type alias for clarity
+DatasetFilter = Callable[[TaskRun], bool]
+def AllDatasetFilter(_: TaskRun) -> bool:
+    return True
+def HighRatingDatasetFilter(task_run: TaskRun) -> bool:
+    if task_run.output is None or task_run.output.rating is None:
+        return False
+    return task_run.output.rating.is_high_quality()
+class DatasetSplitDefinition(BaseModel):
+    """
+    A definition of a split in a dataset.
+    Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the dataset for you and your team. Not used in training.",
+    )
+    percentage: float = Field(
+        ge=0.0,
+        le=1.0,
+        description="The percentage of the dataset that this split represents (between 0 and 1).",
+    )
+AllSplitDefinition: list[DatasetSplitDefinition] = [
+    DatasetSplitDefinition(name="all", percentage=1.0)
+]
+Train80Test20SplitDefinition: list[DatasetSplitDefinition] = [
+    DatasetSplitDefinition(name="train", percentage=0.8),
+    DatasetSplitDefinition(name="test", percentage=0.2),
+]
+Train60Test20Val20SplitDefinition: list[DatasetSplitDefinition] = [
+    DatasetSplitDefinition(name="train", percentage=0.6),
+    DatasetSplitDefinition(name="test", percentage=0.2),
+    DatasetSplitDefinition(name="val", percentage=0.2),
+]
+class DatasetSplit(KilnParentedModel):
+    """
+    A collection of task runs, with optional splits (train, test, validation).
+    Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.
+    Maintains a list of IDs for each split, to avoid data duplication.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the dataset for you and your team. Not used in training.",
+    )
+    splits: list[DatasetSplitDefinition] = Field(
+        default_factory=list,
+        description="The splits in the dataset.",
+    )
+    split_contents: dict[str, list[str]] = Field(
+        description="The contents of each split in the dataset. The key is the split name, and the value is a list of task run IDs.",
+    )
+    @model_validator(mode="after")
+    def validate_split_percentages(self) -> "DatasetSplit":
+        total = sum(split.percentage for split in self.splits)
+        if not math.isclose(total, 1.0, rel_tol=1e-9):
+            raise ValueError(f"The sum of split percentages must be 1.0 (got {total})")
+        return self
+    @classmethod
+    def from_task(
+        cls,
+        name: str,
+        task: "Task",
+        splits: list[DatasetSplitDefinition],
+        filter: DatasetFilter = AllDatasetFilter,
+        description: str | None = None,
+    ):
+        """
+        Build a dataset split from a task.
+        """
+        split_contents = cls.build_split_contents(task, splits, filter)
+        return cls(
+            parent=task,
+            name=name,
+            description=description,
+            splits=splits,
+            split_contents=split_contents,
+        )
+    @classmethod
+    def build_split_contents(
+        cls,
+        task: "Task",
+        splits: list[DatasetSplitDefinition],
+        filter: DatasetFilter,
+    ) -> dict[str, list[str]]:
+        valid_ids = []
+        for task_run in task.runs():
+            if filter(task_run):
+                valid_ids.append(task_run.id)
+        # Shuffle and split by split percentage
+        random.shuffle(valid_ids)
+        split_contents = {}
+        start_idx = 0
+        remaining_items = len(valid_ids)
+        # Handle all splits except the last one
+        for split in splits[:-1]:
+            split_size = round(len(valid_ids) * split.percentage)
+            split_contents[split.name] = valid_ids[start_idx : start_idx + split_size]
+            start_idx += split_size
+            remaining_items -= split_size
+        # Last split gets all remaining items (for rounding)
+        if splits:
+            split_contents[splits[-1].name] = valid_ids[start_idx:]
+        return split_contents
+    def parent_task(self) -> "Task | None":
+        # inline import to avoid circular import
+        from kiln_ai.datamodel import Task
+        if not isinstance(self.parent, Task):
+            return None
+        return self.parent
+    def missing_count(self) -> int:
+        """
+        Returns:
+            int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
+        """
+        parent = self.parent_task()
+        if parent is None:
+            raise ValueError("DatasetSplit has no parent task")
+        runs = parent.runs()
+        all_ids = set(run.id for run in runs)
+        all_ids_in_splits = set()
+        for ids in self.split_contents.values():
+            all_ids_in_splits.update(ids)
+        missing = all_ids_in_splits - all_ids
+        return len(missing)
 class TaskRequirement(BaseModel):
     """
@@ -376,7 +629,11 @@ class TaskDeterminism(str, Enum):
 class Task(
     KilnParentedModel,
     KilnParentModel,
-    parent_of={"runs": TaskRun},
+    parent_of={
+        "runs": TaskRun,
+        "dataset_splits": DatasetSplit,
+        "finetunes": Finetune,
+    },
 ):
     """
     Represents a specific task to be performed, with associated requirements and validation rules.
@@ -416,6 +673,12 @@ class Task(
     def runs(self) -> list[TaskRun]:
         return super().runs()  # type: ignore
+    def dataset_splits(self) -> list[DatasetSplit]:
+        return super().dataset_splits()  # type: ignore
+    def finetunes(self) -> list[Finetune]:
+        return super().finetunes()  # type: ignore
 class Project(KilnParentModel, parent_of={"tasks": Task}):
     """

kiln_ai/datamodel/basemodel.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import json
+import os
+import re
 import shutil
 import uuid
 from abc import ABCMeta
@@ -6,7 +8,6 @@ from builtins import classmethod
 from datetime import datetime
 from pathlib import Path
 from typing import (
-    TYPE_CHECKING,
     Any,
     Dict,
     List,
@@ -20,12 +21,14 @@ from pydantic import (
     ConfigDict,
     Field,
     ValidationError,
+    ValidationInfo,
     computed_field,
     model_validator,
 )
 from pydantic_core import ErrorDetails
 from typing_extensions import Self
+from kiln_ai.datamodel.model_cache import ModelCache
 from kiln_ai.utils.config import Config
 from kiln_ai.utils.formatting import snake_case
@@ -39,6 +42,35 @@ T = TypeVar("T", bound="KilnBaseModel")
 PT = TypeVar("PT", bound="KilnParentedModel")
+# Naming conventions:
+# 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation.
+# 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead.
+# Filename compatible names
+NAME_REGEX = r"^[A-Za-z0-9 _-]+$"
+NAME_FIELD = Field(
+    min_length=1,
+    max_length=120,
+    pattern=NAME_REGEX,
+    description="A name for this entity.",
+)
+SHORT_NAME_FIELD = Field(
+    min_length=1,
+    max_length=32,
+    pattern=NAME_REGEX,
+    description="A name for this entity",
+)
+def string_to_valid_name(name: str) -> str:
+    # Replace any character not allowed by NAME_REGEX with an underscore
+    valid_name = re.sub(r"[^A-Za-z0-9 _-]", "_", name)
+    # Replace consecutive underscores with a single underscore
+    valid_name = re.sub(r"_+", "_", valid_name)
+    # Remove leading and trailing underscores or whitespace
+    return valid_name.strip("_").strip()
 class KilnBaseModel(BaseModel):
     """Base model for all Kiln data models with common functionality for persistence and versioning.
@@ -58,6 +90,8 @@ class KilnBaseModel(BaseModel):
     created_at: datetime = Field(default_factory=datetime.now)
     created_by: str = Field(default_factory=lambda: Config.shared().user_id)
+    _loaded_from_file: bool = False
     @computed_field()
     def model_type(self) -> str:
         return self.type_name()
@@ -86,7 +120,7 @@ class KilnBaseModel(BaseModel):
         return cls.load_from_file(path)
     @classmethod
-    def load_from_file(cls: Type[T], path: Path) -> T:
+    def load_from_file(cls: Type[T], path: Path | str) -> T:
         """Load a model instance from a specific file path.
         Args:
@@ -97,15 +131,28 @@ class KilnBaseModel(BaseModel):
         Raises:
             ValueError: If the loaded model is not of the expected type or version
+            FileNotFoundError: If the file does not exist
         """
+        if isinstance(path, str):
+            path = Path(path)
+        cached_model = ModelCache.shared().get_model(path, cls)
+        if cached_model is not None:
+            return cached_model
         with open(path, "r") as file:
+            # modified time of file for cache invalidation. From file descriptor so it's atomic w read.
+            mtime_ns = os.fstat(file.fileno()).st_mtime_ns
             file_data = file.read()
             # TODO P2 perf: parsing the JSON twice here.
             # Once for model_type, once for model. Can't call model_validate with parsed json because enum types break; they get strings instead of enums.
             parsed_json = json.loads(file_data)
-            m = cls.model_validate_json(file_data, strict=True)
+            m = cls.model_validate_json(
+                file_data,
+                strict=True,
+                context={"loading_from_file": True},
+            )
             if not isinstance(m, cls):
                 raise ValueError(f"Loaded model is not of type {cls.__name__}")
+            m._loaded_from_file = True
             file_data = None
         m.path = path
         if m.v > m.max_schema_version():
@@ -120,8 +167,21 @@ class KilnBaseModel(BaseModel):
                 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, "
                 f"version: {m.v}, max version: {m.max_schema_version()}"
             )
+        ModelCache.shared().set_model(path, m, mtime_ns)
         return m
+    def loaded_from_file(self, info: ValidationInfo | None = None) -> bool:
+        # Two methods of indicated it's loaded from file:
+        # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file
+        # 2) self._loaded_from_file -> After loading, set by the loader
+        if (
+            info is not None
+            and info.context is not None
+            and info.context.get("loading_from_file", False)
+        ):
+            return True
+        return self._loaded_from_file
     def save_to_file(self) -> None:
         """Save the model instance to a file.
@@ -140,6 +200,9 @@ class KilnBaseModel(BaseModel):
             file.write(json_data)
         # save the path so even if something like name changes, the file doesn't move
         self.path = path
+        # We could save, but invalidating will trigger load on next use.
+        # This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk
+        ModelCache.shared().invalidate(path)
     def delete(self) -> None:
         if self.path is None:
@@ -148,6 +211,7 @@ class KilnBaseModel(BaseModel):
         if dir_path is None:
             raise ValueError("Cannot delete model because path is not set")
         shutil.rmtree(dir_path)
+        ModelCache.shared().invalidate(self.path)
         self.path = None
     def build_path(self) -> Path | None:
@@ -167,51 +231,44 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
     including parent reference handling and file system organization.
     Attributes:
-        _parent (KilnBaseModel): Reference to the parent model instance
+        parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
     """
-    _parent: KilnBaseModel | None = None
+    # Parent is an in memory only reference to parent. If it's set we use that. If not we'll try to load it from disk based on the path.
+    # We don't persist the parent reference to disk. See the accessors below for how we make it a clean api (parent accessor will lazy load from disk)
+    parent: Optional[KilnBaseModel] = Field(default=None, exclude=True)
-    # workaround to tell typechecker that we support the parent property, even though it's not a stock property
-    if TYPE_CHECKING:
-        parent: KilnBaseModel  # type: ignore
+    def __getattribute__(self, name: str) -> Any:
+        if name == "parent":
+            return self.load_parent()
+        return super().__getattribute__(name)
-    def __init__(self, **data):
-        super().__init__(**data)
-        if "parent" in data:
-            self.parent = data["parent"]
+    def cached_parent(self) -> Optional[KilnBaseModel]:
+        return object.__getattribute__(self, "parent")
-    @property
-    def parent(self) -> Optional[KilnBaseModel]:
+    def load_parent(self) -> Optional[KilnBaseModel]:
         """Get the parent model instance, loading it from disk if necessary.
         Returns:
             Optional[KilnBaseModel]: The parent model instance or None if not set
         """
-        if self._parent is not None:
-            return self._parent
+        cached_parent = self.cached_parent()
+        if cached_parent is not None:
+            return cached_parent
         # lazy load parent from path
         if self.path is None:
             return None
-        # TODO: this only works with base_filename. If we every support custom names, we need to change this.
+        # Note: this only works with base_filename. If we every support custom names, we need to change this.
         parent_path = (
             self.path.parent.parent.parent
             / self.__class__.parent_type().base_filename()
         )
         if parent_path is None:
             return None
-        self._parent = self.__class__.parent_type().load_from_file(parent_path)
-        return self._parent
-    @parent.setter
-    def parent(self, value: Optional[KilnBaseModel]):
-        if value is not None:
-            expected_parent_type = self.__class__.parent_type()
-            if not isinstance(value, expected_parent_type):
-                raise ValueError(
-                    f"Parent must be of type {expected_parent_type}, but was {type(value)}"
-                )
-        self._parent = value
+        loaded_parent = self.__class__.parent_type().load_from_file(parent_path)
+        self.parent = loaded_parent
+        return loaded_parent
     # Dynamically implemented by KilnParentModel method injection
     @classmethod
@@ -225,11 +282,12 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
     @model_validator(mode="after")
     def check_parent_type(self) -> Self:
-        if self._parent is not None:
+        cached_parent = self.cached_parent()
+        if cached_parent is not None:
             expected_parent_type = self.__class__.parent_type()
-            if not isinstance(self._parent, expected_parent_type):
+            if not isinstance(cached_parent, expected_parent_type):
                 raise ValueError(
-                    f"Parent must be of type {expected_parent_type}, but was {type(self._parent)}"
+                    f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}"
                 )
         return self
@@ -268,9 +326,7 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
         )
     @classmethod
-    def all_children_of_parent_path(
-        cls: Type[PT], parent_path: Path | None
-    ) -> list[PT]:
+    def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None):
         if parent_path is None:
             # children are disk based. If not saved, they don't exist
             return []
@@ -292,13 +348,41 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
             return []
         # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder
-        children = []
         for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"):
-            child = cls.load_from_file(child_file)
-            children.append(child)
+            yield child_file
+    @classmethod
+    def all_children_of_parent_path(
+        cls: Type[PT], parent_path: Path | None
+    ) -> list[PT]:
+        children = []
+        for child_path in cls.iterate_children_paths_of_parent_path(parent_path):
+            children.append(cls.load_from_file(child_path))
         return children
+    @classmethod
+    def from_id_and_parent_path(
+        cls: Type[PT], id: str, parent_path: Path | None
+    ) -> PT | None:
+        """
+        Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match.
+        Uses cache so still slow on first load.
+        """
+        if parent_path is None:
+            return None
+        # Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth.
+        for child_path in cls.iterate_children_paths_of_parent_path(parent_path):
+            child_id = ModelCache.shared().get_model_id(child_path, cls)
+            if child_id == id:
+                return cls.load_from_file(child_path)
+            if child_id is None:
+                child = cls.load_from_file(child_path)
+                if child.id == id:
+                    return child
+        return None
 # Parent create methods for all child relationships
 # You must pass in parent_of in the subclass definition, defining the child relationships

kiln-ai 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.6.1py3-none-any.whl → 0.7.1py3-none-any.whl