PyPI - kiln-ai - Versions diffs - 0.8.1__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

kiln-ai 0.8.1py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (88) hide show

kiln_ai/adapters/__init__.py +7 -7
kiln_ai/adapters/adapter_registry.py +81 -10
kiln_ai/adapters/data_gen/data_gen_task.py +21 -3
kiln_ai/adapters/data_gen/test_data_gen_task.py +23 -3
kiln_ai/adapters/eval/base_eval.py +164 -0
kiln_ai/adapters/eval/eval_runner.py +267 -0
kiln_ai/adapters/eval/g_eval.py +367 -0
kiln_ai/adapters/eval/registry.py +16 -0
kiln_ai/adapters/eval/test_base_eval.py +324 -0
kiln_ai/adapters/eval/test_eval_runner.py +640 -0
kiln_ai/adapters/eval/test_g_eval.py +497 -0
kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
kiln_ai/adapters/fine_tune/base_finetune.py +5 -1
kiln_ai/adapters/fine_tune/dataset_formatter.py +310 -65
kiln_ai/adapters/fine_tune/fireworks_finetune.py +47 -32
kiln_ai/adapters/fine_tune/openai_finetune.py +12 -11
kiln_ai/adapters/fine_tune/test_base_finetune.py +19 -0
kiln_ai/adapters/fine_tune/test_dataset_formatter.py +472 -129
kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +114 -22
kiln_ai/adapters/fine_tune/test_openai_finetune.py +125 -14
kiln_ai/adapters/ml_model_list.py +434 -93
kiln_ai/adapters/model_adapters/__init__.py +18 -0
kiln_ai/adapters/model_adapters/base_adapter.py +250 -0
kiln_ai/adapters/model_adapters/langchain_adapters.py +309 -0
kiln_ai/adapters/model_adapters/openai_compatible_config.py +10 -0
kiln_ai/adapters/model_adapters/openai_model_adapter.py +289 -0
kiln_ai/adapters/model_adapters/test_base_adapter.py +199 -0
kiln_ai/adapters/{test_langchain_adapter.py → model_adapters/test_langchain_adapter.py} +105 -97
kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +216 -0
kiln_ai/adapters/{test_saving_adapter_results.py → model_adapters/test_saving_adapter_results.py} +80 -30
kiln_ai/adapters/{test_structured_output.py → model_adapters/test_structured_output.py} +125 -46
kiln_ai/adapters/ollama_tools.py +0 -1
kiln_ai/adapters/parsers/__init__.py +10 -0
kiln_ai/adapters/parsers/base_parser.py +12 -0
kiln_ai/adapters/parsers/json_parser.py +37 -0
kiln_ai/adapters/parsers/parser_registry.py +19 -0
kiln_ai/adapters/parsers/r1_parser.py +69 -0
kiln_ai/adapters/parsers/test_json_parser.py +81 -0
kiln_ai/adapters/parsers/test_parser_registry.py +32 -0
kiln_ai/adapters/parsers/test_r1_parser.py +144 -0
kiln_ai/adapters/prompt_builders.py +193 -49
kiln_ai/adapters/provider_tools.py +91 -36
kiln_ai/adapters/repair/repair_task.py +18 -19
kiln_ai/adapters/repair/test_repair_task.py +7 -7
kiln_ai/adapters/run_output.py +11 -0
kiln_ai/adapters/test_adapter_registry.py +177 -0
kiln_ai/adapters/test_generate_docs.py +69 -0
kiln_ai/adapters/test_ollama_tools.py +0 -1
kiln_ai/adapters/test_prompt_adaptors.py +25 -18
kiln_ai/adapters/test_prompt_builders.py +265 -44
kiln_ai/adapters/test_provider_tools.py +268 -46
kiln_ai/datamodel/__init__.py +51 -772
kiln_ai/datamodel/basemodel.py +31 -11
kiln_ai/datamodel/datamodel_enums.py +58 -0
kiln_ai/datamodel/dataset_filters.py +114 -0
kiln_ai/datamodel/dataset_split.py +170 -0
kiln_ai/datamodel/eval.py +298 -0
kiln_ai/datamodel/finetune.py +105 -0
kiln_ai/datamodel/json_schema.py +14 -3
kiln_ai/datamodel/model_cache.py +8 -3
kiln_ai/datamodel/project.py +23 -0
kiln_ai/datamodel/prompt.py +37 -0
kiln_ai/datamodel/prompt_id.py +83 -0
kiln_ai/datamodel/strict_mode.py +24 -0
kiln_ai/datamodel/task.py +181 -0
kiln_ai/datamodel/task_output.py +321 -0
kiln_ai/datamodel/task_run.py +164 -0
kiln_ai/datamodel/test_basemodel.py +80 -2
kiln_ai/datamodel/test_dataset_filters.py +71 -0
kiln_ai/datamodel/test_dataset_split.py +127 -6
kiln_ai/datamodel/test_datasource.py +3 -2
kiln_ai/datamodel/test_eval_model.py +635 -0
kiln_ai/datamodel/test_example_models.py +34 -17
kiln_ai/datamodel/test_json_schema.py +23 -0
kiln_ai/datamodel/test_model_cache.py +24 -0
kiln_ai/datamodel/test_model_perf.py +125 -0
kiln_ai/datamodel/test_models.py +131 -2
kiln_ai/datamodel/test_prompt_id.py +129 -0
kiln_ai/datamodel/test_task.py +159 -0
kiln_ai/utils/config.py +6 -1
kiln_ai/utils/exhaustive_error.py +6 -0
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/METADATA +45 -7
kiln_ai-0.12.0.dist-info/RECORD +100 -0
kiln_ai/adapters/base_adapter.py +0 -191
kiln_ai/adapters/langchain_adapters.py +0 -256
kiln_ai-0.8.1.dist-info/RECORD +0 -58
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.8.1.dist-info → kiln_ai-0.12.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/datamodel/eval.py ADDED Viewed

@@ -0,0 +1,298 @@
+import json
+from enum import Enum
+from typing import TYPE_CHECKING, Any, Dict, List, Union
+from pydantic import BaseModel, Field, model_validator
+from typing_extensions import Self
+from kiln_ai.datamodel.basemodel import (
+    ID_TYPE,
+    NAME_FIELD,
+    KilnParentedModel,
+    KilnParentModel,
+)
+from kiln_ai.datamodel.datamodel_enums import TaskOutputRatingType
+from kiln_ai.datamodel.dataset_filters import DatasetFilterId
+from kiln_ai.datamodel.json_schema import string_to_json_key
+from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
+if TYPE_CHECKING:
+    from kiln_ai.datamodel.task import Task
+EvalScores = Dict[str, float]
+class EvalTemplateId(str, Enum):
+    """
+    An eval template is a pre-defined eval that can be used as a starting point for a new eval.
+    """
+    kiln_requirements = "kiln_requirements"
+    toxicity = "toxicity"
+    bias = "bias"
+    maliciousness = "maliciousness"
+    factual_correctness = "factual_correctness"
+    jailbreak = "jailbreak"
+class EvalConfigType(str, Enum):
+    g_eval = "g_eval"
+    llm_as_judge = "llm_as_judge"
+class EvalOutputScore(BaseModel):
+    """
+    A definition of a score that an evaluator will produce.
+    Very similar to TaskRequirement, but conceptually different keeping in a separate models.
+    """
+    name: str = Field(
+        description="The name of the score. Will be provided to the model so use a descriptive name. Should align to the model's TaskRequirement name if you want to use human evals to evaluate the evaluator's performance."
+    )
+    instruction: str | None = Field(
+        default=None,
+        description="A description of the score, used to help the model understand the goal of the score. Will be provided to evaluator models, so should be written for the model, not the team/user.",
+    )
+    type: TaskOutputRatingType = Field(
+        description="The type of rating to use ('five_star', 'pass_fail', 'pass_fail_critical')."
+    )
+    def json_key(self) -> str:
+        """
+        The JSON key for the score, used when running the evaluator with a LLM and we need JSON output.
+        For example, "Overall Rating" -> "overall_rating"
+        """
+        return string_to_json_key(self.name)
+    @model_validator(mode="after")
+    def validate_type(self) -> Self:
+        if self.type == TaskOutputRatingType.custom:
+            raise ValueError(
+                f"Custom scores are not supported in evaluators. Score '{self.name}' was set to a custom score."
+            )
+        return self
+class EvalRun(KilnParentedModel):
+    """
+    The results of running an eval on a single dataset item.
+    This is a child of an EvalConfig, which specifies how the scores were generated.
+    Eval runs can be one of 2 types:
+    1) eval_config_eval=False: we were evaluating a task run (a method of running the task). We get the task input from the dataset_id.input, run the task with the task_run_config, then ran the evaluator on that output. task_run_config_id must be set. The output saved in this model is the output of the task run.
+    2) eval_config_eval=True: we were evaluating an eval config (a method of evaluating the task). We used the existing dataset item input/output, and ran the evaluator on it. task_run_config_id must be None. The input/output saved in this model is the input/output of the dataset item.
+    """
+    dataset_id: ID_TYPE = Field(
+        description="The ID of the dataset item that was used for this run. Must belong to the same Task as the grand-parent eval of this EvalRun."
+    )
+    task_run_config_id: ID_TYPE | None = Field(
+        description="The ID of the TaskRunConfig that was run, if this eval run was based on a task run. Must belong to the same Task as this eval. Can be None if this eval run is based on an eval config."
+    )
+    eval_config_eval: bool = Field(
+        description="Whether this eval run to evaluate the parent eval config (evaluating the config using an existing dataset item). If true, task_run_config_id must be None, as we're not running the task.",
+        default=False,
+    )
+    # These two may duplicate the dataset_id.input/output, but we're denormalizing intentionally.
+    input: str = Field(
+        description="The input to the task. JSON formatted for structured input, plaintext for unstructured input."
+    )
+    output: str = Field(
+        description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
+    )
+    intermediate_outputs: Dict[str, str] | None = Field(
+        default=None,
+        description="The intermediate outputs of the task (example, eval thinking).",
+    )
+    scores: EvalScores = Field(
+        description="The output scores of the evaluator (aligning to those required by the grand-parent Eval this object is a child of)."
+    )
+    def parent_eval_config(self) -> Union["EvalConfig", None]:
+        if self.parent is not None and self.parent.__class__.__name__ != "EvalConfig":
+            raise ValueError("parent must be an EvalConfig")
+        return self.parent  # type: ignore
+    @model_validator(mode="after")
+    def validate_eval_run_types(self) -> Self:
+        if self.eval_config_eval and self.task_run_config_id is not None:
+            raise ValueError(
+                "task_run_config_id must be None if eval_config_eval is true"
+            )
+        if not self.eval_config_eval and self.task_run_config_id is None:
+            raise ValueError(
+                "task_run_config_id must be set if eval_config_eval is false"
+            )
+        return self
+    @model_validator(mode="after")
+    def validate_scores(self) -> Self:
+        # We're checking the scores have the expected keys from the grand-parent eval
+        if self.scores is None or len(self.scores) == 0:
+            raise ValueError("scores are required, and must have at least one score.")
+        parent_eval_config = self.parent_eval_config()
+        eval = parent_eval_config.parent_eval() if parent_eval_config else None
+        if not eval:
+            # Can't validate without the grand-parent eval, allow it to be validated later
+            return self
+        output_score_keys = [score.json_key() for score in eval.output_scores]
+        if set(output_score_keys) != set(self.scores.keys()):
+            raise ValueError(
+                f"The scores produced by the evaluator must match the scores expected by the eval. Got: [{', '.join(self.scores.keys())}] and expected: [{', '.join(output_score_keys)}]"
+            )
+        # Check that each score is expected in this eval and the correct type
+        for output_score in eval.output_scores:
+            match output_score.type:
+                case TaskOutputRatingType.five_star:
+                    five_star_score = self.scores[output_score.json_key()]
+                    if (
+                        not isinstance(five_star_score, float)
+                        or five_star_score < 1.0
+                        or five_star_score > 5.0
+                    ):
+                        raise ValueError(
+                            f"Score {output_score.name} is a five_star rating and must be a float between 1.0 and 5.0 inclusive. Got: {five_star_score}"
+                        )
+                case TaskOutputRatingType.pass_fail:
+                    pass_fail_score = self.scores[output_score.json_key()]
+                    if (
+                        not isinstance(pass_fail_score, float)
+                        or pass_fail_score < 0.0
+                        or pass_fail_score > 1.0
+                    ):
+                        raise ValueError(
+                            f"Score {output_score.name} is a pass_fail rating and must be a float between 0.0 and 1.0 inclusive. Got: {pass_fail_score}"
+                        )
+                case TaskOutputRatingType.pass_fail_critical:
+                    pass_fail_critical_score = self.scores[output_score.json_key()]
+                    if (
+                        not isinstance(pass_fail_critical_score, float)
+                        or pass_fail_critical_score < -1.0
+                        or pass_fail_critical_score > 1.0
+                    ):
+                        raise ValueError(
+                            f"Score {output_score.name} is a pass_fail_critical rating and must be a float between -1.0 and 1.0 inclusive. Got: {pass_fail_critical_score}"
+                        )
+                case TaskOutputRatingType.custom:
+                    raise ValueError(
+                        f"Custom scores are not supported in evaluators. '{output_score.name}' was set to a custom score."
+                    )
+                case _:
+                    # Catch missing cases
+                    raise_exhaustive_enum_error(output_score.type)
+        return self
+class EvalConfig(KilnParentedModel, KilnParentModel, parent_of={"runs": EvalRun}):
+    """
+    A configuration for running an eval. This includes anything needed to run the eval on a dataset like the prompt, model, thresholds, etc.
+    A eval might have many configs, example running the same eval with 2 different models. Comparing eval results is only valid within the scope of the same config.
+    """
+    name: str = NAME_FIELD
+    model_name: str = Field(
+        description="The name of the model to use for this eval config. ",
+    )
+    model_provider: str = Field(
+        description="The provider of the model to use for this eval config.",
+    )
+    config_type: EvalConfigType = Field(
+        default=EvalConfigType.g_eval,
+        description="This is used to determine the type of eval to run.",
+    )
+    properties: dict[str, Any] = Field(
+        default={},
+        description="Properties to be used to execute the eval config. This is config_type specific and should serialize to a json dict.",
+    )
+    def parent_eval(self) -> Union["Eval", None]:
+        if self.parent is not None and self.parent.__class__.__name__ != "Eval":
+            raise ValueError("parent must be an Eval")
+        return self.parent  # type: ignore
+    def runs(self, readonly: bool = False) -> list[EvalRun]:
+        return super().runs(readonly=readonly)  # type: ignore
+    @model_validator(mode="after")
+    def validate_properties(self) -> Self:
+        if (
+            self.config_type == EvalConfigType.g_eval
+            or self.config_type == EvalConfigType.llm_as_judge
+        ):
+            if "eval_steps" not in self.properties or not isinstance(
+                self.properties["eval_steps"], list
+            ):
+                raise ValueError("eval_steps is required and must be a list for g_eval")
+            if "task_description" in self.properties and not isinstance(
+                self.properties["task_description"], str
+            ):
+                raise ValueError(
+                    "task_description is optional, but if provided must be a string"
+                )
+            return self
+        else:
+            raise ValueError(f"Invalid eval config type: {self.config_type}")
+    @model_validator(mode="after")
+    def validate_json_serializable(self) -> "EvalConfig":
+        try:
+            # This will raise a TypeError if the dict contains non-JSON-serializable objects
+            json.dumps(self.properties)
+        except TypeError as e:
+            raise ValueError(f"Properties must be JSON serializable: {str(e)}")
+        return self
+class Eval(KilnParentedModel, KilnParentModel, parent_of={"configs": EvalConfig}):
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None, description="The description of the eval"
+    )
+    template: EvalTemplateId | None = Field(
+        default=None,
+        description="The template selected when creating this eval. Useful for suggesting eval steps and output scores.",
+    )
+    current_config_id: ID_TYPE = Field(
+        default=None,
+        description="The id of the current config to use for this eval. This can be changed over time to run the same eval with different configs.",
+    )
+    eval_set_filter_id: DatasetFilterId = Field(
+        description="The id of the dataset filter which defines which dataset items are included when running this eval. Should be mutually exclusive with eval_configs_filter_id."
+    )
+    eval_configs_filter_id: DatasetFilterId = Field(
+        description="The id of the dataset filter which defines which dataset items are included when comparing the quality of the eval configs under this eval. Should consist of dataset items with ratings. Should be mutually exclusive with eval_set_filter_id."
+    )
+    output_scores: List[EvalOutputScore] = Field(
+        description="The scores this evaluator should produce."
+    )
+    # Workaround to return typed parent without importing Task
+    def parent_task(self) -> Union["Task", None]:
+        if self.parent is not None and self.parent.__class__.__name__ != "Task":
+            raise ValueError("parent must be a Task")
+        return self.parent  # type: ignore
+    def configs(self, readonly: bool = False) -> list[EvalConfig]:
+        return super().configs(readonly=readonly)  # type: ignore
+    @model_validator(mode="after")
+    def validate_scores(self) -> Self:
+        if self.output_scores is None or len(self.output_scores) == 0:
+            raise ValueError(
+                "output_scores are required, and must have at least one score."
+            )
+        # check for duplicate names (once transformed to JSON keys)
+        output_score_keys = [score.json_key() for score in self.output_scores]
+        if len(output_score_keys) != len(set(output_score_keys)):
+            raise ValueError(
+                f"output_scores must have unique names (once transformed to JSON keys). Got: [{', '.join(output_score_keys)}]"
+            )
+        return self

kiln_ai/datamodel/finetune.py ADDED Viewed

@@ -0,0 +1,105 @@
+from typing import TYPE_CHECKING, Dict, Union
+from pydantic import Field, model_validator
+from typing_extensions import Self
+from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
+from kiln_ai.datamodel.datamodel_enums import (
+    FinetuneDataStrategy,
+    FineTuneStatusType,
+    StructuredOutputMode,
+)
+if TYPE_CHECKING:
+    from kiln_ai.datamodel.task import Task
+class Finetune(KilnParentedModel):
+    """
+    The Kiln fine-tune datamodel.
+    Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the fine-tune for you and your team. Not used in training.",
+    )
+    structured_output_mode: StructuredOutputMode | None = Field(
+        default=None,
+        description="The mode to use to train the model for structured output, if it was trained with structured output. Will determine how we call the tuned model, so we call with the matching mode.",
+    )
+    provider: str = Field(
+        description="The provider to use for the fine-tune (e.g. 'openai')."
+    )
+    base_model_id: str = Field(
+        description="The id of the base model to use for the fine-tune. This string relates to the provider's IDs for their own models, not Kiln IDs."
+    )
+    provider_id: str | None = Field(
+        default=None,
+        description="The ID of the fine-tune job on the provider's side. May not be the same as the fine_tune_model_id.",
+    )
+    fine_tune_model_id: str | None = Field(
+        default=None,
+        description="The ID of the fine-tuned model on the provider's side. May not be the same as the provider_id.",
+    )
+    dataset_split_id: str = Field(
+        description="The ID of the dataset split to use for this fine-tune.",
+    )
+    train_split_name: str = Field(
+        default="train",
+        description="The name of the training split to use for this fine-tune.",
+    )
+    validation_split_name: str | None = Field(
+        default=None,
+        description="The name of the validation split to use for this fine-tune. Optional.",
+    )
+    parameters: dict[str, str | int | float | bool] = Field(
+        default={},
+        description="The parameters to use for this fine-tune. These are provider-specific.",
+    )
+    # These two fields are saved exactly used for training. Even if they map exactly to a custom prompt or generator, those can change, so we want to keep a record of the training prompt.
+    system_message: str = Field(
+        description="The system message to use for this fine-tune.",
+    )
+    thinking_instructions: str | None = Field(
+        default=None,
+        description="The thinking instructions to use for this fine-tune. Only used when data_strategy is final_and_intermediate.",
+    )
+    latest_status: FineTuneStatusType = Field(
+        default=FineTuneStatusType.unknown,
+        description="The latest known status of this fine-tune. Not updated in real time.",
+    )
+    properties: Dict[str, str | int | float] = Field(
+        default={},
+        description="Properties of the fine-tune. Different providers may use different properties.",
+    )
+    data_strategy: FinetuneDataStrategy = Field(
+        default=FinetuneDataStrategy.final_only,
+        description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).",
+    )
+    # Workaround to return typed parent without importing Task
+    def parent_task(self) -> Union["Task", None]:
+        if self.parent is None or self.parent.__class__.__name__ != "Task":
+            return None
+        return self.parent  # type: ignore
+    @model_validator(mode="after")
+    def validate_thinking_instructions(self) -> Self:
+        if (
+            self.thinking_instructions is not None
+            and self.data_strategy != FinetuneDataStrategy.final_and_intermediate
+        ):
+            raise ValueError(
+                "Thinking instructions can only be used when data_strategy is final_and_intermediate"
+            )
+        if (
+            self.thinking_instructions is None
+            and self.data_strategy == FinetuneDataStrategy.final_and_intermediate
+        ):
+            raise ValueError(
+                "Thinking instructions are required when data_strategy is final_and_intermediate"
+            )
+        return self

kiln_ai/datamodel/json_schema.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import re
 from typing import Annotated, Dict
 import jsonschema
@@ -42,9 +43,14 @@ def validate_schema(instance: Dict, schema_str: str) -> None:
         jsonschema.exceptions.ValidationError: If validation fails
         ValueError: If the schema is invalid
     """
-    schema = schema_from_json_str(schema_str)
-    v = jsonschema.Draft202012Validator(schema)
-    return v.validate(instance)
+    try:
+        schema = schema_from_json_str(schema_str)
+        v = jsonschema.Draft202012Validator(schema)
+        v.validate(instance)
+    except jsonschema.exceptions.ValidationError as e:
+        raise ValueError(
+            f"This task requires a specific output schema. While the model produced JSON, that JSON didn't meet the schema. Search 'Troubleshooting Structured Data Issues' in our docs for more information. The error from the schema check was: {e.message}"
+        ) from e
 def schema_from_json_str(v: str) -> Dict:
@@ -78,3 +84,8 @@ def schema_from_json_str(v: str) -> Dict:
         raise ValueError(f"Invalid JSON: {v}\n {e}")
     except Exception as e:
         raise ValueError(f"Unexpected error parsing JSON schema: {v}\n {e}")
+def string_to_json_key(s: str) -> str:
+    """Convert a string to a valid JSON key."""
+    return re.sub(r"[^a-z0-9_]", "", s.strip().lower().replace(" ", "_"))

kiln_ai/datamodel/model_cache.py CHANGED Viewed

@@ -62,12 +62,17 @@ class ModelCache:
             raise ValueError(f"Model at {path} is not of type {model_type.__name__}")
         return model
-    def get_model(self, path: Path, model_type: Type[T]) -> Optional[T]:
-        # We return a copy so in-memory edits don't impact the cache until they are saved
+    def get_model(
+        self, path: Path, model_type: Type[T], readonly: bool = False
+    ) -> Optional[T]:
+        # We return a copy by default, so in-memory edits don't impact the cache until they are saved
         # Benchmark shows about 2x slower, but much more foolproof
         model = self._get_model(path, model_type)
         if model:
-            return model.model_copy(deep=True)
+            if readonly:
+                return model
+            else:
+                return model.model_copy(deep=True)
         return None
     def get_model_id(self, path: Path, model_type: Type[T]) -> Optional[str]:

kiln_ai/datamodel/project.py ADDED Viewed

@@ -0,0 +1,23 @@
+from pydantic import Field
+from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentModel
+from kiln_ai.datamodel.task import Task
+class Project(KilnParentModel, parent_of={"tasks": Task}):
+    """
+    A collection of related tasks.
+    Projects organize tasks into logical groups and provide high-level descriptions
+    of the overall goals.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
+    )
+    # Needed for typechecking. TODO P2: fix this in KilnParentModel
+    def tasks(self) -> list[Task]:
+        return super().tasks()  # type: ignore

kiln_ai/datamodel/prompt.py ADDED Viewed

@@ -0,0 +1,37 @@
+from pydantic import BaseModel, Field
+from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
+class BasePrompt(BaseModel):
+    """
+    A prompt for a task. This is the basic data storage format which can be used throughout a project.
+    The "Prompt" model name is reserved for the custom prompts parented by a task.
+    """
+    name: str = NAME_FIELD
+    description: str | None = Field(
+        default=None,
+        description="A more detailed description of the prompt.",
+    )
+    generator_id: str | None = Field(
+        default=None,
+        description="The id of the generator that created this prompt.",
+    )
+    prompt: str = Field(
+        description="The prompt for the task.",
+        min_length=1,
+    )
+    chain_of_thought_instructions: str | None = Field(
+        default=None,
+        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting. COT will not be used unless this is provided.",
+    )
+class Prompt(KilnParentedModel, BasePrompt):
+    """
+    A prompt for a task. This is the custom prompt parented by a task.
+    """
+    pass

kiln_ai/datamodel/prompt_id.py ADDED Viewed

@@ -0,0 +1,83 @@
+from enum import Enum
+from typing import Annotated
+from pydantic import AfterValidator
+# Generators that can take any task and build a prompt
+class PromptGenerators(str, Enum):
+    SIMPLE = "simple_prompt_builder"
+    MULTI_SHOT = "multi_shot_prompt_builder"
+    FEW_SHOT = "few_shot_prompt_builder"
+    REPAIRS = "repairs_prompt_builder"
+    SIMPLE_CHAIN_OF_THOUGHT = "simple_chain_of_thought_prompt_builder"
+    FEW_SHOT_CHAIN_OF_THOUGHT = "few_shot_chain_of_thought_prompt_builder"
+    MULTI_SHOT_CHAIN_OF_THOUGHT = "multi_shot_chain_of_thought_prompt_builder"
+prompt_generator_values = [pg.value for pg in PromptGenerators]
+PromptId = Annotated[
+    str,
+    AfterValidator(lambda v: _check_prompt_id(v)),
+]
+"""
+A pydantic type that validates strings containing a valid prompt ID.
+Prompt IDs can be one of:
+- A saved prompt ID
+- A fine-tune prompt ID
+- A task run config ID
+- A prompt generator name
+"""
+def _check_prompt_id(id: str) -> str:
+    """
+    Check that the prompt ID is valid.
+    """
+    if id in prompt_generator_values:
+        return id
+    if id.startswith("id::"):
+        # check it has 4 parts divided by :: -- 'id::project_id::task_id::prompt_id'
+        parts = id.split("::")
+        if len(parts) != 2 or len(parts[1]) == 0:
+            raise ValueError(
+                f"Invalid saved prompt ID: {id}. Expected format: 'id::[prompt_id]'."
+            )
+        return id
+    if id.startswith("task_run_config::"):
+        # check it had a eval_id after the :: -- 'project_id::task_id::task_run_config_id'
+        parts = id.split("::")
+        if len(parts) != 4:
+            raise ValueError(
+                f"Invalid task run config prompt ID: {id}. Expected format: 'task_run_config::[project_id]::[task_id]::[task_run_config_id]'."
+            )
+        return id
+    if id.startswith("fine_tune_prompt::"):
+        # check it had a fine_tune_id after the :: -- 'fine_tune_prompt::fine_tune_id'
+        fine_tune_id = id[18:]
+        if len(fine_tune_id) == 0:
+            raise ValueError(
+                f"Invalid fine-tune prompt ID: {id}. Expected format: 'fine_tune_prompt::[fine_tune_id]'."
+            )
+        return id
+    raise ValueError(f"Invalid prompt ID: {id}")
+def is_frozen_prompt(id: PromptId) -> bool:
+    """
+    Check if the prompt ID is a frozen prompt.
+    """
+    if id.startswith("id::"):
+        return True
+    if id.startswith("task_run_config::"):
+        return True
+    if id.startswith("fine_tune_prompt::"):
+        return True
+    return False

kiln_ai/datamodel/strict_mode.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""
+Strict mode is a feature that enables extra validations that we want to enforce in Kiln App, ensuring everything follows the ideal schema.
+It's off by default when used through the library. Enable it by calling `set_strict_mode(True)`.
+"""
+# We want to be hard on ourselves for data completeness generated by the Kiln App, but don't want to make it hard for users to use the datamodel/library.
+# Strict mode enables extra validations that we want to enforce in Kiln App (and any other client that wants best practices), but not in the library (unless they opt in)
+_strict_mode: bool = False
+def strict_mode() -> bool:
+    """
+    Get the current strict mode setting.
+    """
+    return _strict_mode
+def set_strict_mode(value: bool) -> None:
+    """
+    Set the strict mode setting.
+    """
+    global _strict_mode
+    _strict_mode = value

kiln-ai 0.8.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.8.1py3-none-any.whl → 0.12.0py3-none-any.whl