PyPI - kiln-ai - Versions diffs - 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

kiln-ai 0.11.1py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (80) hide show

kiln_ai/adapters/__init__.py +4 -0
kiln_ai/adapters/adapter_registry.py +163 -39
kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
kiln_ai/adapters/eval/__init__.py +28 -0
kiln_ai/adapters/eval/base_eval.py +164 -0
kiln_ai/adapters/eval/eval_runner.py +270 -0
kiln_ai/adapters/eval/g_eval.py +368 -0
kiln_ai/adapters/eval/registry.py +16 -0
kiln_ai/adapters/eval/test_base_eval.py +325 -0
kiln_ai/adapters/eval/test_eval_runner.py +641 -0
kiln_ai/adapters/eval/test_g_eval.py +498 -0
kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
kiln_ai/adapters/fine_tune/base_finetune.py +16 -2
kiln_ai/adapters/fine_tune/finetune_registry.py +2 -0
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
kiln_ai/adapters/fine_tune/test_together_finetune.py +531 -0
kiln_ai/adapters/fine_tune/together_finetune.py +325 -0
kiln_ai/adapters/ml_model_list.py +758 -163
kiln_ai/adapters/model_adapters/__init__.py +2 -4
kiln_ai/adapters/model_adapters/base_adapter.py +61 -43
kiln_ai/adapters/model_adapters/litellm_adapter.py +391 -0
kiln_ai/adapters/model_adapters/litellm_config.py +13 -0
kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -0
kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
kiln_ai/adapters/model_adapters/test_structured_output.py +59 -35
kiln_ai/adapters/ollama_tools.py +3 -3
kiln_ai/adapters/parsers/r1_parser.py +19 -14
kiln_ai/adapters/parsers/test_r1_parser.py +17 -5
kiln_ai/adapters/prompt_builders.py +80 -42
kiln_ai/adapters/provider_tools.py +50 -58
kiln_ai/adapters/repair/repair_task.py +9 -21
kiln_ai/adapters/repair/test_repair_task.py +6 -6
kiln_ai/adapters/run_output.py +3 -0
kiln_ai/adapters/test_adapter_registry.py +26 -29
kiln_ai/adapters/test_generate_docs.py +4 -4
kiln_ai/adapters/test_ollama_tools.py +0 -1
kiln_ai/adapters/test_prompt_adaptors.py +47 -33
kiln_ai/adapters/test_prompt_builders.py +91 -31
kiln_ai/adapters/test_provider_tools.py +26 -81
kiln_ai/datamodel/__init__.py +50 -952
kiln_ai/datamodel/basemodel.py +2 -0
kiln_ai/datamodel/datamodel_enums.py +60 -0
kiln_ai/datamodel/dataset_filters.py +114 -0
kiln_ai/datamodel/dataset_split.py +170 -0
kiln_ai/datamodel/eval.py +298 -0
kiln_ai/datamodel/finetune.py +105 -0
kiln_ai/datamodel/json_schema.py +7 -1
kiln_ai/datamodel/project.py +23 -0
kiln_ai/datamodel/prompt.py +37 -0
kiln_ai/datamodel/prompt_id.py +83 -0
kiln_ai/datamodel/strict_mode.py +24 -0
kiln_ai/datamodel/task.py +181 -0
kiln_ai/datamodel/task_output.py +328 -0
kiln_ai/datamodel/task_run.py +164 -0
kiln_ai/datamodel/test_basemodel.py +19 -11
kiln_ai/datamodel/test_dataset_filters.py +71 -0
kiln_ai/datamodel/test_dataset_split.py +32 -8
kiln_ai/datamodel/test_datasource.py +22 -2
kiln_ai/datamodel/test_eval_model.py +635 -0
kiln_ai/datamodel/test_example_models.py +9 -13
kiln_ai/datamodel/test_json_schema.py +23 -0
kiln_ai/datamodel/test_models.py +2 -2
kiln_ai/datamodel/test_prompt_id.py +129 -0
kiln_ai/datamodel/test_task.py +159 -0
kiln_ai/utils/config.py +43 -1
kiln_ai/utils/dataset_import.py +232 -0
kiln_ai/utils/test_dataset_import.py +596 -0
{kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/METADATA +86 -6
kiln_ai-0.13.0.dist-info/RECORD +103 -0
kiln_ai/adapters/model_adapters/langchain_adapters.py +0 -302
kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -11
kiln_ai/adapters/model_adapters/openai_model_adapter.py +0 -246
kiln_ai/adapters/model_adapters/test_langchain_adapter.py +0 -350
kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +0 -225
kiln_ai-0.11.1.dist-info/RECORD +0 -76
{kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/datamodel/finetune.py ADDED Viewed

@@ -0,0 +1,105 @@
+from typing import TYPE_CHECKING, Dict, Union
+from pydantic import Field, model_validator
+from typing_extensions import Self
+from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
+from kiln_ai.datamodel.datamodel_enums import (
+    FinetuneDataStrategy,
+    FineTuneStatusType,
+    StructuredOutputMode,
+)
+if TYPE_CHECKING:
+    from kiln_ai.datamodel.task import Task
+class Finetune(KilnParentedModel):
+    """
+    The Kiln fine-tune datamodel.
+    Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the fine-tune for you and your team. Not used in training.",
+    )
+    structured_output_mode: StructuredOutputMode | None = Field(
+        default=None,
+        description="The mode to use to train the model for structured output, if it was trained with structured output. Will determine how we call the tuned model, so we call with the matching mode.",
+    )
+    provider: str = Field(
+        description="The provider to use for the fine-tune (e.g. 'openai')."
+    )
+    base_model_id: str = Field(
+        description="The id of the base model to use for the fine-tune. This string relates to the provider's IDs for their own models, not Kiln IDs."
+    )
+    provider_id: str | None = Field(
+        default=None,
+        description="The ID of the fine-tune job on the provider's side. May not be the same as the fine_tune_model_id.",
+    )
+    fine_tune_model_id: str | None = Field(
+        default=None,
+        description="The ID of the fine-tuned model on the provider's side. May not be the same as the provider_id.",
+    )
+    dataset_split_id: str = Field(
+        description="The ID of the dataset split to use for this fine-tune.",
+    )
+    train_split_name: str = Field(
+        default="train",
+        description="The name of the training split to use for this fine-tune.",
+    )
+    validation_split_name: str | None = Field(
+        default=None,
+        description="The name of the validation split to use for this fine-tune. Optional.",
+    )
+    parameters: dict[str, str | int | float | bool] = Field(
+        default={},
+        description="The parameters to use for this fine-tune. These are provider-specific.",
+    )
+    # These two fields are saved exactly used for training. Even if they map exactly to a custom prompt or generator, those can change, so we want to keep a record of the training prompt.
+    system_message: str = Field(
+        description="The system message to use for this fine-tune.",
+    )
+    thinking_instructions: str | None = Field(
+        default=None,
+        description="The thinking instructions to use for this fine-tune. Only used when data_strategy is final_and_intermediate.",
+    )
+    latest_status: FineTuneStatusType = Field(
+        default=FineTuneStatusType.unknown,
+        description="The latest known status of this fine-tune. Not updated in real time.",
+    )
+    properties: Dict[str, str | int | float] = Field(
+        default={},
+        description="Properties of the fine-tune. Different providers may use different properties.",
+    )
+    data_strategy: FinetuneDataStrategy = Field(
+        default=FinetuneDataStrategy.final_only,
+        description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).",
+    )
+    # Workaround to return typed parent without importing Task
+    def parent_task(self) -> Union["Task", None]:
+        if self.parent is None or self.parent.__class__.__name__ != "Task":
+            return None
+        return self.parent  # type: ignore
+    @model_validator(mode="after")
+    def validate_thinking_instructions(self) -> Self:
+        if (
+            self.thinking_instructions is not None
+            and self.data_strategy != FinetuneDataStrategy.final_and_intermediate
+        ):
+            raise ValueError(
+                "Thinking instructions can only be used when data_strategy is final_and_intermediate"
+            )
+        if (
+            self.thinking_instructions is None
+            and self.data_strategy == FinetuneDataStrategy.final_and_intermediate
+        ):
+            raise ValueError(
+                "Thinking instructions are required when data_strategy is final_and_intermediate"
+            )
+        return self

kiln_ai/datamodel/json_schema.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import re
 from typing import Annotated, Dict
 import jsonschema
@@ -48,7 +49,7 @@ def validate_schema(instance: Dict, schema_str: str) -> None:
         v.validate(instance)
     except jsonschema.exceptions.ValidationError as e:
         raise ValueError(
-            f"This task requires a specific output schema. While the model produced JSON, that JSON didn't meet the schema. Search 'Troubleshooting Structured Data Issues' in our docs for more information. The error from the schema check was: {e.message}"
+            f"This task requires a specific output schema. While the model produced JSON, that JSON didn't meet the schema. Search 'Troubleshooting Structured Data Issues' in our docs for more information. The error from the schema check was: {e.message}. The JSON was: \n```json\n{instance}\n```"
         ) from e
@@ -83,3 +84,8 @@ def schema_from_json_str(v: str) -> Dict:
         raise ValueError(f"Invalid JSON: {v}\n {e}")
     except Exception as e:
         raise ValueError(f"Unexpected error parsing JSON schema: {v}\n {e}")
+def string_to_json_key(s: str) -> str:
+    """Convert a string to a valid JSON key."""
+    return re.sub(r"[^a-z0-9_]", "", s.strip().lower().replace(" ", "_"))

kiln_ai/datamodel/project.py ADDED Viewed

@@ -0,0 +1,23 @@
+from pydantic import Field
+from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentModel
+from kiln_ai.datamodel.task import Task
+class Project(KilnParentModel, parent_of={"tasks": Task}):
+    """
+    A collection of related tasks.
+    Projects organize tasks into logical groups and provide high-level descriptions
+    of the overall goals.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
+    )
+    # Needed for typechecking. TODO P2: fix this in KilnParentModel
+    def tasks(self) -> list[Task]:
+        return super().tasks()  # type: ignore

kiln_ai/datamodel/prompt.py ADDED Viewed

@@ -0,0 +1,37 @@
+from pydantic import BaseModel, Field
+from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
+class BasePrompt(BaseModel):
+    """
+    A prompt for a task. This is the basic data storage format which can be used throughout a project.
+    The "Prompt" model name is reserved for the custom prompts parented by a task.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A more detailed description of the prompt.",
+    )
+    generator_id: str | None = Field(
+        default=None,
+        description="The id of the generator that created this prompt.",
+    )
+    prompt: str = Field(
+        description="The prompt for the task.",
+        min_length=1,
+    )
+    chain_of_thought_instructions: str | None = Field(
+        default=None,
+        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting. COT will not be used unless this is provided.",
+    )
+class Prompt(KilnParentedModel, BasePrompt):
+    """
+    A prompt for a task. This is the custom prompt parented by a task.
+    """
+    pass

kiln_ai/datamodel/prompt_id.py ADDED Viewed

@@ -0,0 +1,83 @@
+from enum import Enum
+from typing import Annotated
+from pydantic import AfterValidator
+# Generators that can take any task and build a prompt
+class PromptGenerators(str, Enum):
+    SIMPLE = "simple_prompt_builder"
+    MULTI_SHOT = "multi_shot_prompt_builder"
+    FEW_SHOT = "few_shot_prompt_builder"
+    REPAIRS = "repairs_prompt_builder"
+    SIMPLE_CHAIN_OF_THOUGHT = "simple_chain_of_thought_prompt_builder"
+    FEW_SHOT_CHAIN_OF_THOUGHT = "few_shot_chain_of_thought_prompt_builder"
+    MULTI_SHOT_CHAIN_OF_THOUGHT = "multi_shot_chain_of_thought_prompt_builder"
+prompt_generator_values = [pg.value for pg in PromptGenerators]
+PromptId = Annotated[
+    str,
+    AfterValidator(lambda v: _check_prompt_id(v)),
+]
+"""
+A pydantic type that validates strings containing a valid prompt ID.
+Prompt IDs can be one of:
+- A saved prompt ID
+- A fine-tune prompt ID
+- A task run config ID
+- A prompt generator name
+"""
+def _check_prompt_id(id: str) -> str:
+    """
+    Check that the prompt ID is valid.
+    """
+    if id in prompt_generator_values:
+        return id
+    if id.startswith("id::"):
+        # check it has 4 parts divided by :: -- 'id::project_id::task_id::prompt_id'
+        parts = id.split("::")
+        if len(parts) != 2 or len(parts[1]) == 0:
+            raise ValueError(
+                f"Invalid saved prompt ID: {id}. Expected format: 'id::[prompt_id]'."
+            )
+        return id
+    if id.startswith("task_run_config::"):
+        # check it had a eval_id after the :: -- 'project_id::task_id::task_run_config_id'
+        parts = id.split("::")
+        if len(parts) != 4:
+            raise ValueError(
+                f"Invalid task run config prompt ID: {id}. Expected format: 'task_run_config::[project_id]::[task_id]::[task_run_config_id]'."
+            )
+        return id
+    if id.startswith("fine_tune_prompt::"):
+        # check it had a fine_tune_id after the :: -- 'fine_tune_prompt::fine_tune_id'
+        fine_tune_id = id[18:]
+        if len(fine_tune_id) == 0:
+            raise ValueError(
+                f"Invalid fine-tune prompt ID: {id}. Expected format: 'fine_tune_prompt::[fine_tune_id]'."
+            )
+        return id
+    raise ValueError(f"Invalid prompt ID: {id}")
+def is_frozen_prompt(id: PromptId) -> bool:
+    """
+    Check if the prompt ID is a frozen prompt.
+    """
+    if id.startswith("id::"):
+        return True
+    if id.startswith("task_run_config::"):
+        return True
+    if id.startswith("fine_tune_prompt::"):
+        return True
+    return False

kiln_ai/datamodel/strict_mode.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""
+Strict mode is a feature that enables extra validations that we want to enforce in Kiln App, ensuring everything follows the ideal schema.
+It's off by default when used through the library. Enable it by calling `set_strict_mode(True)`.
+"""
+# We want to be hard on ourselves for data completeness generated by the Kiln App, but don't want to make it hard for users to use the datamodel/library.
+# Strict mode enables extra validations that we want to enforce in Kiln App (and any other client that wants best practices), but not in the library (unless they opt in)
+_strict_mode: bool = False
+def strict_mode() -> bool:
+    """
+    Get the current strict mode setting.
+    """
+    return _strict_mode
+def set_strict_mode(value: bool) -> None:
+    """
+    Set the strict mode setting.
+    """
+    global _strict_mode
+    _strict_mode = value

kiln_ai/datamodel/task.py ADDED Viewed

@@ -0,0 +1,181 @@
+from typing import TYPE_CHECKING, Dict, List, Union
+from pydantic import BaseModel, Field
+from kiln_ai.datamodel import Finetune
+from kiln_ai.datamodel.basemodel import (
+    ID_FIELD,
+    ID_TYPE,
+    NAME_FIELD,
+    SHORT_NAME_FIELD,
+    KilnParentedModel,
+    KilnParentModel,
+)
+from kiln_ai.datamodel.datamodel_enums import Priority, TaskOutputRatingType
+from kiln_ai.datamodel.dataset_split import DatasetSplit
+from kiln_ai.datamodel.eval import Eval
+from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
+from kiln_ai.datamodel.prompt import BasePrompt, Prompt
+from kiln_ai.datamodel.prompt_id import PromptId
+from kiln_ai.datamodel.task_run import TaskRun
+if TYPE_CHECKING:
+    from kiln_ai.datamodel.project import Project
+class TaskRequirement(BaseModel):
+    """
+    Defines a specific requirement that should be met by task outputs.
+    Includes an identifier, name, description, instruction for meeting the requirement,
+    priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).
+    """
+    id: ID_TYPE = ID_FIELD
+    name: str = SHORT_NAME_FIELD
+    description: str | None = Field(default=None)
+    instruction: str = Field(min_length=1)
+    priority: Priority = Field(default=Priority.p2)
+    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
+class RunConfigProperties(BaseModel):
+    """
+    A configuration for running a task.
+    This includes everything needed to run a task, except the input and task ID. Running the same RunConfig with the same input should make identical calls to the model (output may vary as models are non-deterministic).
+    """
+    model_name: str = Field(description="The model to use for this run config.")
+    model_provider_name: str = Field(
+        description="The provider to use for this run config."
+    )
+    prompt_id: PromptId = Field(
+        description="The prompt to use for this run config. Defaults to building a simple prompt from the task if not provided.",
+    )
+class RunConfig(RunConfigProperties):
+    """
+    A configuration for running a task.
+    This includes everything needed to run a task, except the input. Running the same RunConfig with the same input should make identical calls to the model (output may vary as models are non-deterministic).
+    For example: task, model, provider, prompt, etc.
+    """
+    task: "Task" = Field(description="The task to run.")
+class TaskRunConfig(KilnParentedModel):
+    """
+    A Kiln model for persisting a run config in a Kiln Project, nested under a task.
+    Typically used to save a method of running a task for evaluation.
+    A run config includes everything needed to run a task, except the input. Running the same RunConfig with the same input should make identical calls to the model (output may vary as models are non-deterministic).
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None, description="The description of the task run config."
+    )
+    run_config_properties: RunConfigProperties = Field(
+        description="The run config properties to use for this task run."
+    )
+    # The prompt_id in the run_config_properties is the prompt ID to use for this task run.
+    # However, we want the prompt to be perfectly consistent, and some prompt_ids are dynamic.
+    # If we need to "freeze" a prompt, we can do so here (then point the prompt_id to this frozen prompt).
+    prompt: BasePrompt | None = Field(
+        default=None,
+        description="A prompt to use for run config.",
+    )
+    # Workaround to return typed parent without importing Task
+    def parent_task(self) -> Union["Task", None]:
+        if self.parent is None or self.parent.__class__.__name__ != "Task":
+            return None
+        return self.parent  # type: ignore
+    def run_config(self) -> RunConfig:
+        parent_task = self.parent_task()
+        if parent_task is None:
+            raise ValueError("Run config must be parented to a task")
+        return RunConfig(
+            task=parent_task,
+            model_name=self.run_config_properties.model_name,
+            model_provider_name=self.run_config_properties.model_provider_name,
+            prompt_id=self.run_config_properties.prompt_id,
+        )
+class Task(
+    KilnParentedModel,
+    KilnParentModel,
+    parent_of={
+        "runs": TaskRun,
+        "dataset_splits": DatasetSplit,
+        "finetunes": Finetune,
+        "prompts": Prompt,
+        "evals": Eval,
+        "run_configs": TaskRunConfig,
+    },
+):
+    """
+    Represents a specific task to be performed, with associated requirements and validation rules.
+    Contains the task definition, requirements, input/output schemas, and maintains
+    a collection of task runs.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the task for you and your team. Will not be used in prompts/training/validation.",
+    )
+    instruction: str = Field(
+        min_length=1,
+        description="The instructions for the task. Will be used in prompts/training/validation.",
+    )
+    requirements: List[TaskRequirement] = Field(default=[])
+    output_json_schema: JsonObjectSchema | None = None
+    input_json_schema: JsonObjectSchema | None = None
+    thinking_instruction: str | None = Field(
+        default=None,
+        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting.",
+    )
+    def output_schema(self) -> Dict | None:
+        if self.output_json_schema is None:
+            return None
+        return schema_from_json_str(self.output_json_schema)
+    def input_schema(self) -> Dict | None:
+        if self.input_json_schema is None:
+            return None
+        return schema_from_json_str(self.input_json_schema)
+    # These wrappers help for typechecking. TODO P2: fix this in KilnParentModel
+    def runs(self, readonly: bool = False) -> list[TaskRun]:
+        return super().runs(readonly=readonly)  # type: ignore
+    def dataset_splits(self, readonly: bool = False) -> list[DatasetSplit]:
+        return super().dataset_splits(readonly=readonly)  # type: ignore
+    def finetunes(self, readonly: bool = False) -> list[Finetune]:
+        return super().finetunes(readonly=readonly)  # type: ignore
+    def prompts(self, readonly: bool = False) -> list[Prompt]:
+        return super().prompts(readonly=readonly)  # type: ignore
+    def evals(self, readonly: bool = False) -> list[Eval]:
+        return super().evals(readonly=readonly)  # type: ignore
+    def run_configs(self, readonly: bool = False) -> list[TaskRunConfig]:
+        return super().run_configs(readonly=readonly)  # type: ignore
+    # Workaround to return typed parent without importing Task
+    def parent_project(self) -> Union["Project", None]:
+        if self.parent is None or self.parent.__class__.__name__ != "Project":
+            return None
+        return self.parent  # type: ignore

kiln-ai 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.11.1py3-none-any.whl → 0.13.0py3-none-any.whl