PyPI - kiln-ai - Versions diffs - 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

kiln-ai 0.12.0py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (47) hide show

kiln_ai/adapters/__init__.py +4 -0
kiln_ai/adapters/adapter_registry.py +153 -28
kiln_ai/adapters/eval/__init__.py +28 -0
kiln_ai/adapters/eval/eval_runner.py +4 -1
kiln_ai/adapters/eval/g_eval.py +2 -1
kiln_ai/adapters/eval/test_base_eval.py +1 -0
kiln_ai/adapters/eval/test_eval_runner.py +1 -0
kiln_ai/adapters/eval/test_g_eval.py +1 -0
kiln_ai/adapters/fine_tune/base_finetune.py +16 -2
kiln_ai/adapters/fine_tune/finetune_registry.py +2 -0
kiln_ai/adapters/fine_tune/test_together_finetune.py +531 -0
kiln_ai/adapters/fine_tune/together_finetune.py +325 -0
kiln_ai/adapters/ml_model_list.py +638 -155
kiln_ai/adapters/model_adapters/__init__.py +2 -4
kiln_ai/adapters/model_adapters/base_adapter.py +14 -11
kiln_ai/adapters/model_adapters/litellm_adapter.py +391 -0
kiln_ai/adapters/model_adapters/litellm_config.py +13 -0
kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -0
kiln_ai/adapters/model_adapters/test_structured_output.py +23 -5
kiln_ai/adapters/ollama_tools.py +3 -2
kiln_ai/adapters/parsers/r1_parser.py +19 -14
kiln_ai/adapters/parsers/test_r1_parser.py +17 -5
kiln_ai/adapters/provider_tools.py +50 -58
kiln_ai/adapters/repair/test_repair_task.py +3 -3
kiln_ai/adapters/run_output.py +1 -1
kiln_ai/adapters/test_adapter_registry.py +17 -20
kiln_ai/adapters/test_generate_docs.py +2 -2
kiln_ai/adapters/test_prompt_adaptors.py +30 -19
kiln_ai/adapters/test_provider_tools.py +26 -81
kiln_ai/datamodel/basemodel.py +2 -0
kiln_ai/datamodel/datamodel_enums.py +2 -0
kiln_ai/datamodel/json_schema.py +1 -1
kiln_ai/datamodel/task_output.py +13 -6
kiln_ai/datamodel/test_basemodel.py +9 -0
kiln_ai/datamodel/test_datasource.py +19 -0
kiln_ai/utils/config.py +37 -0
kiln_ai/utils/dataset_import.py +232 -0
kiln_ai/utils/test_dataset_import.py +596 -0
{kiln_ai-0.12.0.dist-info → kiln_ai-0.13.0.dist-info}/METADATA +51 -7
{kiln_ai-0.12.0.dist-info → kiln_ai-0.13.0.dist-info}/RECORD +42 -39
kiln_ai/adapters/model_adapters/langchain_adapters.py +0 -309
kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -10
kiln_ai/adapters/model_adapters/openai_model_adapter.py +0 -289
kiln_ai/adapters/model_adapters/test_langchain_adapter.py +0 -343
kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +0 -216
{kiln_ai-0.12.0.dist-info → kiln_ai-0.13.0.dist-info}/WHEEL +0 -0
{kiln_ai-0.12.0.dist-info → kiln_ai-0.13.0.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/datamodel/json_schema.py CHANGED Viewed

@@ -49,7 +49,7 @@ def validate_schema(instance: Dict, schema_str: str) -> None:
         v.validate(instance)
     except jsonschema.exceptions.ValidationError as e:
         raise ValueError(
-            f"This task requires a specific output schema. While the model produced JSON, that JSON didn't meet the schema. Search 'Troubleshooting Structured Data Issues' in our docs for more information. The error from the schema check was: {e.message}"
+            f"This task requires a specific output schema. While the model produced JSON, that JSON didn't meet the schema. Search 'Troubleshooting Structured Data Issues' in our docs for more information. The error from the schema check was: {e.message}. The JSON was: \n```json\n{instance}\n```"
         ) from e

kiln_ai/datamodel/task_output.py CHANGED Viewed

@@ -171,6 +171,7 @@ class DataSourceType(str, Enum):
     human = "human"
     synthetic = "synthetic"
+    file_import = "file_import"
 class DataSourceProperty(BaseModel):
@@ -206,37 +207,43 @@ class DataSource(BaseModel):
             name="created_by",
             type=str,
             required_for=[DataSourceType.human],
-            not_allowed_for=[DataSourceType.synthetic],
+            not_allowed_for=[DataSourceType.synthetic, DataSourceType.file_import],
         ),
         DataSourceProperty(
             name="model_name",
             type=str,
             required_for=[DataSourceType.synthetic],
-            not_allowed_for=[DataSourceType.human],
+            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
         ),
         DataSourceProperty(
             name="model_provider",
             type=str,
             required_for=[DataSourceType.synthetic],
-            not_allowed_for=[DataSourceType.human],
+            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
         ),
         DataSourceProperty(
             name="adapter_name",
             type=str,
             required_for=[DataSourceType.synthetic],
-            not_allowed_for=[DataSourceType.human],
+            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
         ),
         DataSourceProperty(
             # Legacy field -- allow loading from old runs, but we shouldn't be setting it.
             name="prompt_builder_name",
             type=str,
-            not_allowed_for=[DataSourceType.human],
+            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
         ),
         DataSourceProperty(
             # The PromptId of the prompt. Can be a saved prompt, fine-tune, generator name, etc. See PromptId type for more details.
             name="prompt_id",
             type=str,
-            not_allowed_for=[DataSourceType.human],
+            not_allowed_for=[DataSourceType.human, DataSourceType.file_import],
+        ),
+        DataSourceProperty(
+            name="file_name",
+            type=str,
+            required_for=[DataSourceType.file_import],
+            not_allowed_for=[DataSourceType.human, DataSourceType.synthetic],
         ),
     ]

kiln_ai/datamodel/test_basemodel.py CHANGED Viewed

@@ -510,6 +510,7 @@ async def test_invoke_parsing_flow(adapter):
     # Mock dependencies
     mock_provider = MagicMock()
     mock_provider.parser = "test_parser"
+    mock_provider.reasoning_capable = False
     mock_parser = MagicMock()
     mock_parser.parse_output.return_value = RunOutput(
@@ -547,3 +548,11 @@ async def test_invoke_parsing_flow(adapter):
         assert result.output.output == "parsed test output"
         assert result.intermediate_outputs == {"key": "value"}
         assert result.input == "test input"
+        # Test with reasoning required, that we error if no reasoning is returned
+        mock_provider.reasoning_capable = True
+        with pytest.raises(
+            RuntimeError,
+            match="Reasoning is required for this model, but no reasoning was returned.",
+        ):
+            await adapter.invoke("test input")

kiln_ai/datamodel/test_datasource.py CHANGED Viewed

@@ -29,11 +29,30 @@ def test_valid_synthetic_data_source():
     assert data_source.properties["adapter_name"] == "langchain"
+def test_valid_file_import_data_source():
+    data_source = DataSource(
+        type=DataSourceType.file_import,
+        properties={"file_name": "test.txt"},
+    )
+    assert data_source.type == DataSourceType.file_import
+    assert data_source.properties["file_name"] == "test.txt"
 def test_missing_required_property():
     with pytest.raises(ValidationError, match="'created_by' is required for"):
         DataSource(type=DataSourceType.human)
+def test_missing_required_property_file_import():
+    with pytest.raises(ValidationError, match="'file_name' is required for"):
+        DataSource(type=DataSourceType.file_import)
+def test_not_allowed_property_file_import():
+    with pytest.raises(ValidationError, match="'model_name' is not allowed for"):
+        DataSource(type=DataSourceType.file_import, properties={"model_name": "GPT-4"})
 def test_wrong_property_type():
     with pytest.raises(
         ValidationError,

kiln_ai/utils/config.py CHANGED Viewed

@@ -78,10 +78,47 @@ class Config:
                 str,
                 env_var="FIREWORKS_ACCOUNT_ID",
             ),
+            "anthropic_api_key": ConfigProperty(
+                str,
+                env_var="ANTHROPIC_API_KEY",
+                sensitive=True,
+            ),
+            "gemini_api_key": ConfigProperty(
+                str,
+                env_var="GEMINI_API_KEY",
+                sensitive=True,
+            ),
             "projects": ConfigProperty(
                 list,
                 default_lambda=lambda: [],
             ),
+            "azure_openai_api_key": ConfigProperty(
+                str,
+                env_var="AZURE_OPENAI_API_KEY",
+                sensitive=True,
+            ),
+            "azure_openai_endpoint": ConfigProperty(
+                str,
+                env_var="AZURE_OPENAI_ENDPOINT",
+            ),
+            "huggingface_api_key": ConfigProperty(
+                str,
+                env_var="HUGGINGFACE_API_KEY",
+                sensitive=True,
+            ),
+            "vertex_project_id": ConfigProperty(
+                str,
+                env_var="VERTEX_PROJECT_ID",
+            ),
+            "vertex_location": ConfigProperty(
+                str,
+                env_var="VERTEX_LOCATION",
+            ),
+            "together_api_key": ConfigProperty(
+                str,
+                env_var="TOGETHERAI_API_KEY",
+                sensitive=True,
+            ),
             "custom_models": ConfigProperty(
                 list,
                 default_lambda=lambda: [],

kiln_ai/utils/dataset_import.py ADDED Viewed

@@ -0,0 +1,232 @@
+import csv
+import logging
+import time
+from dataclasses import dataclass
+from enum import Enum
+from typing import Dict, Protocol
+from pydantic import BaseModel, Field, ValidationError, field_validator
+from kiln_ai.datamodel import DataSource, DataSourceType, Task, TaskOutput, TaskRun
+logger = logging.getLogger(__name__)
+class DatasetImportFormat(str, Enum):
+    """
+    The format of the dataset to import.
+    """
+    CSV = "csv"
+class Importer(Protocol):
+    """Protocol for dataset importers"""
+    def __call__(
+        self,
+        task: Task,
+        dataset_path: str,
+        dataset_name: str,
+    ) -> int: ...
+class CSVRowSchema(BaseModel):
+    """Schema for validating rows in a CSV file."""
+    input: str = Field(description="The input to the model")
+    output: str = Field(description="The output of the model")
+    reasoning: str | None = Field(
+        description="The reasoning of the model (optional)",
+        default=None,
+    )
+    chain_of_thought: str | None = Field(
+        description="The chain of thought of the model (optional)",
+        default=None,
+    )
+    tags: list[str] = Field(
+        default_factory=list,
+        description="The tags of the run (optional)",
+    )
+def generate_import_tags(session_id: str) -> list[str]:
+    return [
+        "imported",
+        f"imported_{session_id}",
+    ]
+class KilnInvalidImportFormat(Exception):
+    """Raised when the import format is invalid"""
+    def __init__(self, message: str, row_number: int | None = None):
+        self.row_number = row_number
+        if row_number is not None:
+            message = f"Error in row {row_number}: {message}"
+        super().__init__(message)
+def format_validation_error(e: ValidationError) -> str:
+    """Convert a Pydantic validation error into a human-readable message."""
+    error_messages = []
+    for error in e.errors():
+        location = " -> ".join(str(loc) for loc in error["loc"])
+        message = error["msg"]
+        error_messages.append(f"- {location}: {message}")
+    return "Validation failed:\n" + "\n".join(error_messages)
+def deserialize_tags(tags_serialized: str | None) -> list[str]:
+    """Deserialize tags from a comma-separated string to a list of strings."""
+    if tags_serialized:
+        return [tag.strip() for tag in tags_serialized.split(",") if tag.strip()]
+    return []
+def without_none_values(d: dict) -> dict:
+    """Return a copy of the dictionary with all None values removed."""
+    return {k: v for k, v in d.items() if v is not None}
+def create_task_run_from_csv_row(
+    task: Task,
+    row: dict[str, str],
+    dataset_name: str,
+    session_id: str,
+) -> TaskRun:
+    """Validate and create a TaskRun from a CSV row, without saving to file"""
+    # first we validate the row from the CSV file
+    validated_row = CSVRowSchema.model_validate(
+        {
+            **row,
+            "tags": deserialize_tags(row.get("tags")),
+        }
+    )
+    tags = generate_import_tags(session_id)
+    if validated_row.tags:
+        tags.extend(validated_row.tags)
+    # note that we don't persist the run yet, we just create and validate it
+    # this instantiation may raise pydantic validation errors
+    run = TaskRun(
+        parent=task,
+        input=validated_row.input,
+        input_source=DataSource(
+            type=DataSourceType.file_import,
+            properties={
+                "file_name": dataset_name,
+            },
+        ),
+        output=TaskOutput(
+            output=validated_row.output,
+            source=DataSource(
+                type=DataSourceType.file_import,
+                properties={
+                    "file_name": dataset_name,
+                },
+            ),
+        ),
+        intermediate_outputs=without_none_values(
+            {
+                "reasoning": validated_row.reasoning,
+                "chain_of_thought": validated_row.chain_of_thought,
+            }
+        )
+        or None,
+        tags=tags,
+    )
+    return run
+def import_csv(task: Task, dataset_path: str, dataset_name: str) -> int:
+    """Import a CSV dataset.
+    All rows are validated before any are persisted to files to avoid partial imports."""
+    session_id = str(int(time.time()))
+    required_headers = {"input", "output"}  # minimum required headers
+    optional_headers = {"reasoning", "tags", "chain_of_thought"}  # optional headers
+    rows: list[TaskRun] = []
+    with open(dataset_path, "r", newline="") as csvfile:
+        reader = csv.DictReader(csvfile)
+        # Check if we have headers
+        if not reader.fieldnames:
+            raise KilnInvalidImportFormat(
+                "CSV file appears to be empty or missing headers"
+            )
+        # Check for required headers
+        actual_headers = set(reader.fieldnames)
+        missing_headers = required_headers - actual_headers
+        if missing_headers:
+            raise KilnInvalidImportFormat(
+                f"Missing required headers: {', '.join(missing_headers)}. "
+                f"Required headers are: {', '.join(required_headers)}"
+            )
+        # Warn about unknown headers (not required or optional)
+        unknown_headers = actual_headers - (required_headers | optional_headers)
+        if unknown_headers:
+            logger.warning(
+                f"Unknown headers in CSV file will be ignored: {', '.join(unknown_headers)}"
+            )
+        # enumeration starts at 2 because row 1 is headers
+        for row_number, row in enumerate(reader, start=2):
+            try:
+                run = create_task_run_from_csv_row(
+                    task=task,
+                    row=row,
+                    dataset_name=dataset_name,
+                    session_id=session_id,
+                )
+            except ValidationError as e:
+                logger.warning(f"Invalid row {row_number}: {row}", exc_info=True)
+                human_readable = format_validation_error(e)
+                raise KilnInvalidImportFormat(
+                    human_readable,
+                    row_number=row_number,
+                ) from e
+            rows.append(run)
+    # now that we know all rows are valid, we can save them
+    for run in rows:
+        run.save_to_file()
+    return len(rows)
+DATASET_IMPORTERS: Dict[DatasetImportFormat, Importer] = {
+    DatasetImportFormat.CSV: import_csv,
+}
+@dataclass
+class ImportConfig:
+    """Configuration for importing a dataset"""
+    dataset_type: DatasetImportFormat
+    dataset_path: str
+    dataset_name: str
+class DatasetFileImporter:
+    """Import a dataset from a file"""
+    def __init__(self, task: Task, config: ImportConfig):
+        self.task = task
+        self.dataset_type = config.dataset_type
+        self.dataset_path = config.dataset_path
+        self.dataset_name = config.dataset_name
+    def create_runs_from_file(self) -> int:
+        fn = DATASET_IMPORTERS[self.dataset_type]
+        return fn(self.task, self.dataset_path, self.dataset_name)

kiln-ai 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.12.0py3-none-any.whl → 0.13.0py3-none-any.whl