PyPI - kiln-ai - Versions diffs - 0.8.1__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

kiln-ai 0.8.1py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (88) hide show

kiln_ai/adapters/__init__.py +7 -7
kiln_ai/adapters/adapter_registry.py +81 -10
kiln_ai/adapters/data_gen/data_gen_task.py +21 -3
kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
kiln_ai/adapters/eval/base_eval.py +164 -0
kiln_ai/adapters/eval/eval_runner.py +267 -0
kiln_ai/adapters/eval/g_eval.py +367 -0
kiln_ai/adapters/eval/registry.py +16 -0
kiln_ai/adapters/eval/test_base_eval.py +324 -0
kiln_ai/adapters/eval/test_eval_runner.py +640 -0
kiln_ai/adapters/eval/test_g_eval.py +497 -0
kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +472 -129
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +114 -22
kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
kiln_ai/adapters/ml_model_list.py +434 -93
kiln_ai/adapters/model_adapters/__init__.py +18 -0
kiln_ai/adapters/model_adapters/base_adapter.py +250 -0
kiln_ai/adapters/model_adapters/langchain_adapters.py +309 -0
kiln_ai/adapters/model_adapters/openai_compatible_config.py +10 -0
kiln_ai/adapters/model_adapters/openai_model_adapter.py +289 -0
kiln_ai/adapters/model_adapters/test_base_adapter.py +199 -0
kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +105 -97
kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +216 -0
kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +80 -30
kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +125 -46
kiln_ai/adapters/ollama_tools.py +0 -1
kiln_ai/adapters/parsers/__init__.py +10 -0
kiln_ai/adapters/parsers/base_parser.py +12 -0
kiln_ai/adapters/parsers/json_parser.py +37 -0
kiln_ai/adapters/parsers/parser_registry.py +19 -0
kiln_ai/adapters/parsers/r1_parser.py +69 -0
kiln_ai/adapters/parsers/test_json_parser.py +81 -0
kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
kiln_ai/adapters/prompt_builders.py +193 -49
kiln_ai/adapters/provider_tools.py +91 -36
kiln_ai/adapters/repair/repair_task.py +18 -19
kiln_ai/adapters/repair/test_repair_task.py +7 -7
kiln_ai/adapters/run_output.py +11 -0
kiln_ai/adapters/test_adapter_registry.py +177 -0
kiln_ai/adapters/test_generate_docs.py +69 -0
kiln_ai/adapters/test_ollama_tools.py +0 -1
kiln_ai/adapters/test_prompt_adaptors.py +25 -18
kiln_ai/adapters/test_prompt_builders.py +265 -44
kiln_ai/adapters/test_provider_tools.py +268 -46
kiln_ai/datamodel/__init__.py +51 -772
kiln_ai/datamodel/basemodel.py +31 -11
kiln_ai/datamodel/datamodel_enums.py +58 -0
kiln_ai/datamodel/dataset_filters.py +114 -0
kiln_ai/datamodel/dataset_split.py +170 -0
kiln_ai/datamodel/eval.py +298 -0
kiln_ai/datamodel/finetune.py +105 -0
kiln_ai/datamodel/json_schema.py +14 -3
kiln_ai/datamodel/model_cache.py +8 -3
kiln_ai/datamodel/project.py +23 -0
kiln_ai/datamodel/prompt.py +37 -0
kiln_ai/datamodel/prompt_id.py +83 -0
kiln_ai/datamodel/strict_mode.py +24 -0
kiln_ai/datamodel/task.py +181 -0
kiln_ai/datamodel/task_output.py +321 -0
kiln_ai/datamodel/task_run.py +164 -0
kiln_ai/datamodel/test_basemodel.py +80 -2
kiln_ai/datamodel/test_dataset_filters.py +71 -0
kiln_ai/datamodel/test_dataset_split.py +127 -6
kiln_ai/datamodel/test_datasource.py +3 -2
kiln_ai/datamodel/test_eval_model.py +635 -0
kiln_ai/datamodel/test_example_models.py +34 -17
kiln_ai/datamodel/test_json_schema.py +23 -0
kiln_ai/datamodel/test_model_cache.py +24 -0
kiln_ai/datamodel/test_model_perf.py +125 -0
kiln_ai/datamodel/test_models.py +131 -2
kiln_ai/datamodel/test_prompt_id.py +129 -0
kiln_ai/datamodel/test_task.py +159 -0
kiln_ai/utils/config.py +6 -1
kiln_ai/utils/exhaustive_error.py +6 -0
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/METADATA +45 -7
kiln_ai-0.12.0.dist-info/RECORD +100 -0
kiln_ai/adapters/base_adapter.py +0 -191
kiln_ai/adapters/langchain_adapters.py +0 -256
kiln_ai-0.8.1.dist-info/RECORD +0 -58
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/datamodel/basemodel.py CHANGED Viewed

@@ -120,11 +120,12 @@ class KilnBaseModel(BaseModel):
         return cls.load_from_file(path)
     @classmethod
-    def load_from_file(cls: Type[T], path: Path | str) -> T:
+    def load_from_file(cls: Type[T], path: Path | str, readonly: bool = False) -> T:
         """Load a model instance from a specific file path.
         Args:
             path (Path): Path to the model file
+            readonly (bool): If True, the model will be returned in readonly mode (cached instance, not a copy, not safe to mutate)
         Returns:
             T: Instance of the model
@@ -135,10 +136,10 @@ class KilnBaseModel(BaseModel):
         """
         if isinstance(path, str):
             path = Path(path)
-        cached_model = ModelCache.shared().get_model(path, cls)
+        cached_model = ModelCache.shared().get_model(path, cls, readonly=readonly)
         if cached_model is not None:
             return cached_model
-        with open(path, "r") as file:
+        with open(path, "r", encoding="utf-8") as file:
             # modified time of file for cache invalidation. From file descriptor so it's atomic w read.
             mtime_ns = os.fstat(file.fileno()).st_mtime_ns
             file_data = file.read()
@@ -168,13 +169,20 @@ class KilnBaseModel(BaseModel):
         # Two methods of indicated it's loaded from file:
         # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file
         # 2) self._loaded_from_file -> After loading, set by the loader
+        if self.loading_from_file(info):
+            return True
+        return self._loaded_from_file
+    # indicates the model is currently being loaded from file (not mutating it after)
+    def loading_from_file(self, info: ValidationInfo | None = None) -> bool:
+        # info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file
         if (
             info is not None
             and info.context is not None
             and info.context.get("loading_from_file", False)
         ):
             return True
-        return self._loaded_from_file
+        return False
     def save_to_file(self) -> None:
         """Save the model instance to a file.
@@ -190,7 +198,7 @@ class KilnBaseModel(BaseModel):
             )
         path.parent.mkdir(parents=True, exist_ok=True)
         json_data = self.model_dump_json(indent=2, exclude={"path"})
-        with open(path, "w") as file:
+        with open(path, "w", encoding="utf-8") as file:
             file.write(json_data)
         # save the path so even if something like name changes, the file doesn't move
         self.path = path
@@ -342,16 +350,28 @@ class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta):
             return []
         # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder
-        for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"):
-            yield child_file
+        # manual code instead of glob for performance (5x speedup over glob)
+        base_filename = cls.base_filename()
+        # Iterate through immediate subdirectories using scandir for better performance
+        # Benchmark: scandir is 10x faster than glob, so worth the extra code
+        with os.scandir(relationship_folder) as entries:
+            for entry in entries:
+                if not entry.is_dir():
+                    continue
+                child_file = Path(entry.path) / base_filename
+                if child_file.is_file():
+                    yield child_file
     @classmethod
     def all_children_of_parent_path(
-        cls: Type[PT], parent_path: Path | None
+        cls: Type[PT], parent_path: Path | None, readonly: bool = False
     ) -> list[PT]:
         children = []
         for child_path in cls.iterate_children_paths_of_parent_path(parent_path):
-            children.append(cls.load_from_file(child_path))
+            item = cls.load_from_file(child_path, readonly=readonly)
+            children.append(item)
         return children
     @classmethod
@@ -394,8 +414,8 @@ class KilnParentModel(KilnBaseModel, metaclass=ABCMeta):
     def _create_child_method(
         cls, relationship_name: str, child_class: Type[KilnParentedModel]
     ):
-        def child_method(self) -> list[child_class]:
-            return child_class.all_children_of_parent_path(self.path)
+        def child_method(self, readonly: bool = False) -> list[child_class]:
+            return child_class.all_children_of_parent_path(self.path, readonly=readonly)
         child_method.__name__ = relationship_name
         child_method.__annotations__ = {"return": List[child_class]}

kiln_ai/datamodel/datamodel_enums.py ADDED Viewed

@@ -0,0 +1,58 @@
+from enum import Enum, IntEnum
+class Priority(IntEnum):
+    """Defines priority levels for tasks and requirements, where P0 is highest priority."""
+    p0 = 0
+    p1 = 1
+    p2 = 2
+    p3 = 3
+# Only one rating type for now, but this allows for extensibility if we want to add more in the future
+class TaskOutputRatingType(str, Enum):
+    """Defines the types of rating systems available for task outputs."""
+    five_star = "five_star"
+    pass_fail = "pass_fail"
+    pass_fail_critical = "pass_fail_critical"
+    custom = "custom"
+class StructuredOutputMode(str, Enum):
+    """
+    Enumeration of supported structured output modes.
+    - default: let the adapter decide
+    - json_schema: request json using API capabilities for json_schema
+    - function_calling: request json using API capabilities for function calling
+    - json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
+    - json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
+    - json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
+    """
+    default = "default"
+    json_schema = "json_schema"
+    function_calling_weak = "function_calling_weak"
+    function_calling = "function_calling"
+    json_mode = "json_mode"
+    json_instructions = "json_instructions"
+    json_instruction_and_object = "json_instruction_and_object"
+class FineTuneStatusType(str, Enum):
+    """
+    The status type of a fine-tune (running, completed, failed, etc).
+    """
+    unknown = "unknown"  # server error
+    pending = "pending"
+    running = "running"
+    completed = "completed"
+    failed = "failed"
+class FinetuneDataStrategy(str, Enum):
+    final_only = "final_only"
+    final_and_intermediate = "final_and_intermediate"

kiln_ai/datamodel/dataset_filters.py ADDED Viewed

@@ -0,0 +1,114 @@
+from enum import Enum
+from typing import Annotated, Protocol
+from pydantic import AfterValidator
+from kiln_ai.datamodel.task_run import TaskRun
+class DatasetFilter(Protocol):
+    """A protocol defining the interface for dataset filters.
+    This allows both stateless function-based filters and stateful class-based filters
+    to be used interchangeably, as long as they implement the __call__ method.
+    """
+    def __call__(self, task_run: TaskRun) -> bool:
+        """Return True if the task run should be included in the dataset."""
+        ...
+def AllDatasetFilter(_: TaskRun) -> bool:
+    return True
+def HighRatingDatasetFilter(task_run: TaskRun) -> bool:
+    if task_run.output is None:
+        return False
+    if task_run.repaired_output is not None:
+        # Repairs always considered high quality
+        return True
+    if task_run.output.rating is None:
+        return False
+    return task_run.output.rating.is_high_quality()
+def ThinkingModelDatasetFilter(task_run: TaskRun) -> bool:
+    """
+    A filter that returns True if the task has intermediate outputs we can training a 'thinking' model on (reasoning or chain of thought)
+    """
+    return task_run.has_thinking_training_data()
+def ThinkingModelHighRatedFilter(task_run: TaskRun) -> bool:
+    """
+    A filter that returns True if the task has thinking data and the output is high quality
+    """
+    return ThinkingModelDatasetFilter(task_run) and HighRatingDatasetFilter(task_run)
+class TagFilter:
+    """
+    A filter that returns True if the task has a tag matching the given tag.
+    """
+    def __init__(self, tag: str):
+        self.tag = tag
+    def __call__(self, task_run: TaskRun) -> bool:
+        return self.tag in task_run.tags
+class StaticDatasetFilters(str, Enum):
+    """Dataset filter names."""
+    ALL = "all"
+    HIGH_RATING = "high_rating"
+    THINKING_MODEL = "thinking_model"
+    THINKING_MODEL_HIGH_RATED = "thinking_model_high_rated"
+static_dataset_filters = {
+    StaticDatasetFilters.ALL: AllDatasetFilter,
+    StaticDatasetFilters.HIGH_RATING: HighRatingDatasetFilter,
+    StaticDatasetFilters.THINKING_MODEL: ThinkingModelDatasetFilter,
+    StaticDatasetFilters.THINKING_MODEL_HIGH_RATED: ThinkingModelHighRatedFilter,
+}
+DatasetFilterId = Annotated[
+    str,
+    AfterValidator(lambda v: _check_dataset_filter_id(v)),
+]
+"""
+A pydantic type that validates strings containing a valid dataset filter ID.
+Dataset filter IDs can be one of:
+- A built-in dataset filter name
+- A tag::<tag> filter, where <tag> is a string
+"""
+def _check_dataset_filter_id(id: str) -> str:
+    """
+    Check that the dataset filter ID is valid.
+    """
+    if id in static_dataset_filters:
+        return id
+    if id.startswith("tag::") and len(id) > 5:
+        return id
+    raise ValueError(f"Invalid dataset filter ID: {id}")
+def dataset_filter_from_id(id: DatasetFilterId) -> DatasetFilter:
+    """
+    Get a dataset filter from an ID.
+    """
+    if id.startswith("tag::") and len(id) > 5:
+        return TagFilter(id[5:])
+    if id in static_dataset_filters:
+        return static_dataset_filters[id]
+    raise ValueError(f"Invalid dataset filter ID: {id}")

kiln_ai/datamodel/dataset_split.py ADDED Viewed

@@ -0,0 +1,170 @@
+"""
+Tools for splitting datasets into train/test/validation splits. Includes filters for selecting which task runs to include in each split.
+"""
+import math
+import random
+from typing import TYPE_CHECKING
+from pydantic import BaseModel, Field, model_validator
+from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
+from kiln_ai.datamodel.dataset_filters import (
+    DatasetFilter,
+    DatasetFilterId,
+    dataset_filter_from_id,
+)
+if TYPE_CHECKING:
+    from kiln_ai.datamodel.task import Task
+class DatasetSplitDefinition(BaseModel):
+    """
+    A definition of a split in a dataset.
+    Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the dataset for you and your team. Not used in training.",
+    )
+    percentage: float = Field(
+        ge=0.0,
+        le=1.0,
+        description="The percentage of the dataset that this split represents (between 0 and 1).",
+    )
+AllSplitDefinition: list[DatasetSplitDefinition] = [
+    DatasetSplitDefinition(name="all", percentage=1.0)
+]
+Train80Test20SplitDefinition: list[DatasetSplitDefinition] = [
+    DatasetSplitDefinition(name="train", percentage=0.8),
+    DatasetSplitDefinition(name="test", percentage=0.2),
+]
+Train60Test20Val20SplitDefinition: list[DatasetSplitDefinition] = [
+    DatasetSplitDefinition(name="train", percentage=0.6),
+    DatasetSplitDefinition(name="test", percentage=0.2),
+    DatasetSplitDefinition(name="val", percentage=0.2),
+]
+Train80Test10Val10SplitDefinition: list[DatasetSplitDefinition] = [
+    DatasetSplitDefinition(name="train", percentage=0.8),
+    DatasetSplitDefinition(name="test", percentage=0.1),
+    DatasetSplitDefinition(name="val", percentage=0.1),
+]
+class DatasetSplit(KilnParentedModel):
+    """
+    A collection of task runs, with optional splits (train, test, validation).
+    Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.
+    Maintains a list of IDs for each split, to avoid data duplication.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the dataset for you and your team. Not used in training.",
+    )
+    splits: list[DatasetSplitDefinition] = Field(
+        default_factory=list,
+        description="The splits in the dataset.",
+    )
+    split_contents: dict[str, list[str]] = Field(
+        description="The contents of each split in the dataset. The key is the split name, and the value is a list of task run IDs.",
+    )
+    filter: DatasetFilterId | None = Field(
+        default=None,
+        description="The filter used to build the dataset.",
+    )
+    @model_validator(mode="after")
+    def validate_split_percentages(self) -> "DatasetSplit":
+        total = sum(split.percentage for split in self.splits)
+        if not math.isclose(total, 1.0, rel_tol=1e-9):
+            raise ValueError(f"The sum of split percentages must be 1.0 (got {total})")
+        return self
+    @classmethod
+    def from_task(
+        cls,
+        name: str,
+        task: "Task",
+        splits: list[DatasetSplitDefinition],
+        filter_id: DatasetFilterId = "all",
+        description: str | None = None,
+    ):
+        """
+        Build a dataset split from a task.
+        """
+        filter = dataset_filter_from_id(filter_id)
+        split_contents = cls.build_split_contents(task, splits, filter)
+        return cls(
+            parent=task,
+            name=name,
+            description=description,
+            splits=splits,
+            split_contents=split_contents,
+            filter=filter_id,
+        )
+    @classmethod
+    def build_split_contents(
+        cls,
+        task: "Task",
+        splits: list[DatasetSplitDefinition],
+        filter: DatasetFilter,
+    ) -> dict[str, list[str]]:
+        valid_ids = []
+        for task_run in task.runs():
+            if filter(task_run):
+                valid_ids.append(task_run.id)
+        # Shuffle and split by split percentage
+        random.shuffle(valid_ids)
+        split_contents = {}
+        start_idx = 0
+        remaining_items = len(valid_ids)
+        # Handle all splits except the last one
+        for split in splits[:-1]:
+            split_size = round(len(valid_ids) * split.percentage)
+            split_contents[split.name] = valid_ids[start_idx : start_idx + split_size]
+            start_idx += split_size
+            remaining_items -= split_size
+        # Last split gets all remaining items (for rounding)
+        if splits:
+            split_contents[splits[-1].name] = valid_ids[start_idx:]
+        return split_contents
+    def parent_task(self) -> "Task | None":
+        # inline import to avoid circular import
+        from kiln_ai.datamodel import Task
+        if not isinstance(self.parent, Task):
+            return None
+        return self.parent
+    def missing_count(self) -> int:
+        """
+        Returns:
+            int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
+        """
+        parent = self.parent_task()
+        if parent is None:
+            raise ValueError("DatasetSplit has no parent task")
+        runs = parent.runs(readonly=True)
+        all_ids = set(run.id for run in runs)
+        all_ids_in_splits = set()
+        for ids in self.split_contents.values():
+            all_ids_in_splits.update(ids)
+        missing = all_ids_in_splits - all_ids
+        return len(missing)

kiln-ai 0.8.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.8.1py3-none-any.whl → 0.12.0py3-none-any.whl