kiln-ai 0.6.1__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai-0.7.1/PKG-INFO +237 -0
- kiln_ai-0.7.1/README.md +204 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/adapters/base_adapter.html +489 -485
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +1354 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +264 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +1053 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +1020 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_split.html +997 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +269 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +809 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +268 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/langchain_adapters.html +966 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +2041 -0
- kiln_ai-0.7.1/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +1762 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +106 -152
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +1 -2
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/adapters.html +22 -11
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/datamodel/basemodel.html +1526 -1167
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/datamodel/json_schema.html +29 -28
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/datamodel.html +1525 -1279
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/utils/config.html +286 -246
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +1 -2
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai/utils.html +1 -2
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/kiln_ai.html +217 -9
- kiln_ai-0.7.1/docs/kiln_core_docs/search.js +46 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/__init__.py +2 -0
- kiln_ai-0.7.1/kiln_ai/adapters/adapter_registry.py +19 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/data_gen/test_data_gen_task.py +29 -21
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/__init__.py +14 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/base_finetune.py +186 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/dataset_formatter.py +187 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/finetune_registry.py +11 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/fireworks_finetune.py +308 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/openai_finetune.py +205 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/test_base_finetune.py +290 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +342 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +455 -0
- kiln_ai-0.7.1/kiln_ai/adapters/fine_tune/test_openai_finetune.py +503 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/langchain_adapters.py +103 -13
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/ml_model_list.py +239 -303
- kiln_ai-0.7.1/kiln_ai/adapters/ollama_tools.py +115 -0
- kiln_ai-0.7.1/kiln_ai/adapters/provider_tools.py +308 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/repair/repair_task.py +4 -2
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/repair/test_repair_task.py +6 -11
- kiln_ai-0.7.1/kiln_ai/adapters/test_langchain_adapter.py +335 -0
- kiln_ai-0.7.1/kiln_ai/adapters/test_ollama_tools.py +42 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/test_prompt_adaptors.py +7 -5
- kiln_ai-0.7.1/kiln_ai/adapters/test_provider_tools.py +531 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/test_structured_output.py +22 -43
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/__init__.py +287 -24
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/basemodel.py +122 -38
- kiln_ai-0.7.1/kiln_ai/datamodel/model_cache.py +116 -0
- kiln_ai-0.7.1/kiln_ai/datamodel/registry.py +31 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/test_basemodel.py +167 -4
- kiln_ai-0.7.1/kiln_ai/datamodel/test_dataset_split.py +234 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/test_example_models.py +12 -0
- kiln_ai-0.7.1/kiln_ai/datamodel/test_model_cache.py +244 -0
- kiln_ai-0.7.1/kiln_ai/datamodel/test_models.py +441 -0
- kiln_ai-0.7.1/kiln_ai/datamodel/test_registry.py +96 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/utils/config.py +14 -1
- kiln_ai-0.7.1/kiln_ai/utils/name_generator.py +125 -0
- kiln_ai-0.7.1/kiln_ai/utils/test_name_geneator.py +47 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/pyproject.toml +4 -2
- kiln_ai-0.6.1/PKG-INFO +0 -88
- kiln_ai-0.6.1/README.md +0 -58
- kiln_ai-0.6.1/docs/kiln_core_docs/kiln_ai/adapters/langchain_adapters.html +0 -652
- kiln_ai-0.6.1/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -1925
- kiln_ai-0.6.1/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -1127
- kiln_ai-0.6.1/docs/kiln_core_docs/search.js +0 -46
- kiln_ai-0.6.1/kiln_ai/adapters/test_langchain_adapter.py +0 -124
- kiln_ai-0.6.1/kiln_ai/adapters/test_ml_model_list.py +0 -181
- kiln_ai-0.6.1/kiln_ai/datamodel/test_models.py +0 -227
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/.gitignore +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/.python-version +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/LICENSE.txt +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/index.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/adapters/base_adapter.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/adapters/langchain_adapters.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/adapters/ml_model_list.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/adapters/prompt_builders.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/adapters/repair.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/adapters.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/datamodel/basemodel.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/datamodel/json_schema.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/datamodel.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/utils/config.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/utils/formatting.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai/utils.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/kiln_ai.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/core_library_docs/search.js +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/docs/kiln_core_docs/index.html +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/__init__.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/base_adapter.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/data_gen/__init__.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/prompt_builders.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/repair/__init__.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/test_prompt_builders.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/adapters/test_saving_adapter_results.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/json_schema.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/test_datasource.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/test_json_schema.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/test_nested_save.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/datamodel/test_output_rating.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/utils/__init__.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/utils/formatting.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/kiln_ai/utils/test_config.py +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/setup.cfg +0 -0
- {kiln_ai-0.6.1 → kiln_ai-0.7.1}/uv.lock +0 -0
kiln_ai-0.7.1/PKG-INFO
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kiln-ai
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Kiln AI
|
|
5
|
+
Project-URL: Homepage, https://getkiln.ai
|
|
6
|
+
Project-URL: Repository, https://github.com/Kiln-AI/kiln
|
|
7
|
+
Project-URL: Documentation, https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
|
|
8
|
+
Project-URL: Issues, https://github.com/Kiln-AI/kiln/issues
|
|
9
|
+
Author-email: "Steve Cosman, Chesterfield Laboratories Inc" <scosman@users.noreply.github.com>
|
|
10
|
+
License-File: LICENSE.txt
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: coverage>=7.6.4
|
|
18
|
+
Requires-Dist: jsonschema>=4.23.0
|
|
19
|
+
Requires-Dist: langchain-aws>=0.2.4
|
|
20
|
+
Requires-Dist: langchain-fireworks>=0.2.5
|
|
21
|
+
Requires-Dist: langchain-groq>=0.2.0
|
|
22
|
+
Requires-Dist: langchain-ollama>=0.2.0
|
|
23
|
+
Requires-Dist: langchain-openai>=0.2.4
|
|
24
|
+
Requires-Dist: langchain>=0.3.5
|
|
25
|
+
Requires-Dist: openai>=1.53.0
|
|
26
|
+
Requires-Dist: pdoc>=15.0.0
|
|
27
|
+
Requires-Dist: pydantic>=2.9.2
|
|
28
|
+
Requires-Dist: pytest-benchmark>=5.1.0
|
|
29
|
+
Requires-Dist: pytest-cov>=6.0.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
31
|
+
Requires-Dist: typing-extensions>=4.12.2
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# Kiln AI Core Library
|
|
35
|
+
|
|
36
|
+
<p align="center">
|
|
37
|
+
<picture>
|
|
38
|
+
<img width="205" alt="Kiln AI Logo" src="https://github.com/user-attachments/assets/5fbcbdf7-1feb-45c9-bd73-99a46dd0a47f">
|
|
39
|
+
</picture>
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
[](https://pypi.org/project/kiln-ai)
|
|
43
|
+
[](https://pypi.org/project/kiln-ai)
|
|
44
|
+
[](https://kiln-ai.github.io/Kiln/kiln_core_docs/index.html)
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```console
|
|
51
|
+
pip install kiln_ai
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## About
|
|
55
|
+
|
|
56
|
+
This package is the Kiln AI core library. There is also a separate desktop application and server package. Learn more about Kiln AI at [getkiln.ai](https://getkiln.ai) and on Github: [github.com/Kiln-AI/kiln](https://github.com/Kiln-AI/kiln).
|
|
57
|
+
|
|
58
|
+
# Guide: Using the Kiln Python Library
|
|
59
|
+
|
|
60
|
+
In this guide we'll walk common examples of how to use the library.
|
|
61
|
+
|
|
62
|
+
## Documentation
|
|
63
|
+
|
|
64
|
+
The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kiln_core_docs/index.html).
|
|
65
|
+
|
|
66
|
+
## Table of Contents
|
|
67
|
+
|
|
68
|
+
- [Using the Kiln Data Model](#using-the-kiln-data-model)
|
|
69
|
+
- [Understanding the Kiln Data Model](#understanding-the-kiln-data-model)
|
|
70
|
+
- [Datamodel Overview](#datamodel-overview)
|
|
71
|
+
- [Load a Project](#load-a-project)
|
|
72
|
+
- [Load an Existing Dataset into a Kiln Task Dataset](#load-an-existing-dataset-into-a-kiln-task-dataset)
|
|
73
|
+
- [Using your Kiln Dataset in a Notebook or Project](#using-your-kiln-dataset-in-a-notebook-or-project)
|
|
74
|
+
- [Using Kiln Dataset in Pandas](#using-kiln-dataset-in-pandas)
|
|
75
|
+
- [Advanced Usage](#advanced-usage)
|
|
76
|
+
|
|
77
|
+
## Installation
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install kiln-ai
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Using the Kiln Data Model
|
|
84
|
+
|
|
85
|
+
### Understanding the Kiln Data Model
|
|
86
|
+
|
|
87
|
+
Kiln projects are simply a directory of files (mostly JSON files with the extension `.kiln`) that describe your project, including tasks, runs, and other data.
|
|
88
|
+
|
|
89
|
+
This dataset design was chosen for several reasons:
|
|
90
|
+
|
|
91
|
+
- Git compatibility: Kiln project folders are easy to collaborate on in git. The filenames use unique IDs to avoid conflicts and allow many people to work in parallel. The files are small and easy to compare using standard diff tools.
|
|
92
|
+
- JSON allows you to easily load and manipulate the data using standard tools (pandas, polars, etc)
|
|
93
|
+
|
|
94
|
+
The Kiln Python library provides a set of Python classes that which help you easily interact with your Kiln dataset. Using the library to load and manipulate your dataset is the fastest way to get started, and will guarantees you don't insert any invalid data into your dataset. There's extensive validation when using the library, so we recommend using it to load and manipulate your dataset over direct JSON manipulation.
|
|
95
|
+
|
|
96
|
+
### Datamodel Overview
|
|
97
|
+
|
|
98
|
+
- Project: a Kiln Project that organizes related tasks
|
|
99
|
+
- Task: a specific task including prompt instructions, input/output schemas, and requirements
|
|
100
|
+
- TaskRun: a sample (run) of a task including input, output and human rating information
|
|
101
|
+
- DatasetSplit: a frozen collection of task runs divided into train/test/validation splits
|
|
102
|
+
- Finetune: configuration and status tracking for fine-tuning models on task data
|
|
103
|
+
|
|
104
|
+
### Load a Project
|
|
105
|
+
|
|
106
|
+
Assuming you've created a project in the Kiln UI, you'll have a `project.kiln` file in your `~/Kiln Projects/Project Name` directory.
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from kiln_ai.datamodel import Project
|
|
110
|
+
|
|
111
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
112
|
+
print("Project: ", project.name, " - ", project.description)
|
|
113
|
+
|
|
114
|
+
# List all tasks in the project, and their dataset sizes
|
|
115
|
+
tasks = project.tasks()
|
|
116
|
+
for task in tasks:
|
|
117
|
+
print("Task: ", task.name, " - ", task.description)
|
|
118
|
+
print("Total dataset size:", len(task.runs()))
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Load an Existing Dataset into a Kiln Task Dataset
|
|
122
|
+
|
|
123
|
+
If you already have a dataset in a file, you can load it into a Kiln project.
|
|
124
|
+
|
|
125
|
+
**Important**: Kiln will validate the input and output schemas, and ensure that each datapoint in the dataset is valid for this task.
|
|
126
|
+
|
|
127
|
+
- Plaintext input/output: ensure "output_json_schema" and "input_json_schema" not set in your Task definition.
|
|
128
|
+
- JSON input/output: ensure "output_json_schema" and "input_json_schema" are valid JSON schemas in your Task definition. Every datapoint in the dataset must be valid JSON fitting the schema.
|
|
129
|
+
|
|
130
|
+
Here's a simple example of how to load a dataset into a Kiln task:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
|
|
134
|
+
import kiln_ai
|
|
135
|
+
import kiln_ai.datamodel
|
|
136
|
+
|
|
137
|
+
# Created a project and task via the UI and put its path here
|
|
138
|
+
task_path = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator/task.kiln"
|
|
139
|
+
task = kiln_ai.datamodel.Task.load_from_file(task_path)
|
|
140
|
+
|
|
141
|
+
# Add data to the task - loop over you dataset and run this for each item
|
|
142
|
+
item = kiln_ai.datamodel.TaskRun(
|
|
143
|
+
parent=task,
|
|
144
|
+
input='{"topic": "AI"}',
|
|
145
|
+
output=kiln_ai.datamodel.TaskOutput(
|
|
146
|
+
output='{"setup": "What is AI?", "punchline": "content_here"}',
|
|
147
|
+
),
|
|
148
|
+
)
|
|
149
|
+
item.save_to_file()
|
|
150
|
+
print("Saved item to file: ", item.path)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
And here's a more complex example of how to load a dataset into a Kiln task. This example sets the source of the data (human in this case, but you can also set it be be synthetic), the created_by property, and a 5-star rating.
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
import kiln_ai
|
|
157
|
+
import kiln_ai.datamodel
|
|
158
|
+
|
|
159
|
+
# Created a project and task via the UI and put its path here
|
|
160
|
+
task_path = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator/task.kiln"
|
|
161
|
+
task = kiln_ai.datamodel.Task.load_from_file(task_path)
|
|
162
|
+
|
|
163
|
+
# Add data to the task - loop over you dataset and run this for each item
|
|
164
|
+
item = kiln_ai.datamodel.TaskRun(
|
|
165
|
+
parent=task,
|
|
166
|
+
input='{"topic": "AI"}',
|
|
167
|
+
input_source=kiln_ai.datamodel.DataSource(
|
|
168
|
+
type=kiln_ai.datamodel.DataSourceType.human,
|
|
169
|
+
properties={"created_by": "John Doe"},
|
|
170
|
+
),
|
|
171
|
+
output=kiln_ai.datamodel.TaskOutput(
|
|
172
|
+
output='{"setup": "What is AI?", "punchline": "content_here"}',
|
|
173
|
+
source=kiln_ai.datamodel.DataSource(
|
|
174
|
+
type=kiln_ai.datamodel.DataSourceType.human,
|
|
175
|
+
properties={"created_by": "Jane Doe"},
|
|
176
|
+
),
|
|
177
|
+
rating=kiln_ai.datamodel.TaskOutputRating(score=5,type="five_star"),
|
|
178
|
+
),
|
|
179
|
+
)
|
|
180
|
+
item.save_to_file()
|
|
181
|
+
print("Saved item to file: ", item.path)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Using your Kiln Dataset in a Notebook or Project
|
|
185
|
+
|
|
186
|
+
You can use your Kiln dataset in a notebook or project by loading the dataset into a pandas dataframe.
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
import kiln_ai
|
|
190
|
+
import kiln_ai.datamodel
|
|
191
|
+
|
|
192
|
+
# Created a project and task via the UI and put its path here
|
|
193
|
+
task_path = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator/task.kiln"
|
|
194
|
+
task = kiln_ai.datamodel.Task.load_from_file(task_path)
|
|
195
|
+
|
|
196
|
+
runs = task.runs()
|
|
197
|
+
for run in runs:
|
|
198
|
+
print(f"Input: {run.input}")
|
|
199
|
+
print(f"Output: {run.output.output}")
|
|
200
|
+
|
|
201
|
+
print(f"Total runs: {len(runs)}")
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Using Kiln Dataset in Pandas
|
|
205
|
+
|
|
206
|
+
You can also use your Kiln dataset in a pandas dataframe, or a similar script for other tools like polars.
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
import glob
|
|
210
|
+
import json
|
|
211
|
+
import pandas as pd
|
|
212
|
+
from pathlib import Path
|
|
213
|
+
|
|
214
|
+
task_dir = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator"
|
|
215
|
+
dataitem_glob = task_dir + "/runs/*/task_run.kiln"
|
|
216
|
+
|
|
217
|
+
dfs = []
|
|
218
|
+
for file in glob.glob(dataitem_glob):
|
|
219
|
+
js = json.loads(Path(file).read_text())
|
|
220
|
+
|
|
221
|
+
df = pd.DataFrame([{
|
|
222
|
+
"input": js["input"],
|
|
223
|
+
"output": js["output"]["output"],
|
|
224
|
+
}])
|
|
225
|
+
|
|
226
|
+
# Alternatively: you can use pd.json_normalize(js) to get the full json structure
|
|
227
|
+
# df = pd.json_normalize(js)
|
|
228
|
+
dfs.append(df)
|
|
229
|
+
final_df = pd.concat(dfs, ignore_index=True)
|
|
230
|
+
print(final_df)
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Advanced Usage
|
|
234
|
+
|
|
235
|
+
The library can do a lot more than the examples we've shown here.
|
|
236
|
+
|
|
237
|
+
See the [docs](https://kiln-ai.github.io/Kiln/kiln_core_docs/index.html) for more information.
|
kiln_ai-0.7.1/README.md
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# Kiln AI Core Library
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<picture>
|
|
5
|
+
<img width="205" alt="Kiln AI Logo" src="https://github.com/user-attachments/assets/5fbcbdf7-1feb-45c9-bd73-99a46dd0a47f">
|
|
6
|
+
</picture>
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
[](https://pypi.org/project/kiln-ai)
|
|
10
|
+
[](https://pypi.org/project/kiln-ai)
|
|
11
|
+
[](https://kiln-ai.github.io/Kiln/kiln_core_docs/index.html)
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```console
|
|
18
|
+
pip install kiln_ai
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## About
|
|
22
|
+
|
|
23
|
+
This package is the Kiln AI core library. There is also a separate desktop application and server package. Learn more about Kiln AI at [getkiln.ai](https://getkiln.ai) and on Github: [github.com/Kiln-AI/kiln](https://github.com/Kiln-AI/kiln).
|
|
24
|
+
|
|
25
|
+
# Guide: Using the Kiln Python Library
|
|
26
|
+
|
|
27
|
+
In this guide we'll walk common examples of how to use the library.
|
|
28
|
+
|
|
29
|
+
## Documentation
|
|
30
|
+
|
|
31
|
+
The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kiln_core_docs/index.html).
|
|
32
|
+
|
|
33
|
+
## Table of Contents
|
|
34
|
+
|
|
35
|
+
- [Using the Kiln Data Model](#using-the-kiln-data-model)
|
|
36
|
+
- [Understanding the Kiln Data Model](#understanding-the-kiln-data-model)
|
|
37
|
+
- [Datamodel Overview](#datamodel-overview)
|
|
38
|
+
- [Load a Project](#load-a-project)
|
|
39
|
+
- [Load an Existing Dataset into a Kiln Task Dataset](#load-an-existing-dataset-into-a-kiln-task-dataset)
|
|
40
|
+
- [Using your Kiln Dataset in a Notebook or Project](#using-your-kiln-dataset-in-a-notebook-or-project)
|
|
41
|
+
- [Using Kiln Dataset in Pandas](#using-kiln-dataset-in-pandas)
|
|
42
|
+
- [Advanced Usage](#advanced-usage)
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install kiln-ai
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Using the Kiln Data Model
|
|
51
|
+
|
|
52
|
+
### Understanding the Kiln Data Model
|
|
53
|
+
|
|
54
|
+
Kiln projects are simply a directory of files (mostly JSON files with the extension `.kiln`) that describe your project, including tasks, runs, and other data.
|
|
55
|
+
|
|
56
|
+
This dataset design was chosen for several reasons:
|
|
57
|
+
|
|
58
|
+
- Git compatibility: Kiln project folders are easy to collaborate on in git. The filenames use unique IDs to avoid conflicts and allow many people to work in parallel. The files are small and easy to compare using standard diff tools.
|
|
59
|
+
- JSON allows you to easily load and manipulate the data using standard tools (pandas, polars, etc)
|
|
60
|
+
|
|
61
|
+
The Kiln Python library provides a set of Python classes that which help you easily interact with your Kiln dataset. Using the library to load and manipulate your dataset is the fastest way to get started, and will guarantees you don't insert any invalid data into your dataset. There's extensive validation when using the library, so we recommend using it to load and manipulate your dataset over direct JSON manipulation.
|
|
62
|
+
|
|
63
|
+
### Datamodel Overview
|
|
64
|
+
|
|
65
|
+
- Project: a Kiln Project that organizes related tasks
|
|
66
|
+
- Task: a specific task including prompt instructions, input/output schemas, and requirements
|
|
67
|
+
- TaskRun: a sample (run) of a task including input, output and human rating information
|
|
68
|
+
- DatasetSplit: a frozen collection of task runs divided into train/test/validation splits
|
|
69
|
+
- Finetune: configuration and status tracking for fine-tuning models on task data
|
|
70
|
+
|
|
71
|
+
### Load a Project
|
|
72
|
+
|
|
73
|
+
Assuming you've created a project in the Kiln UI, you'll have a `project.kiln` file in your `~/Kiln Projects/Project Name` directory.
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from kiln_ai.datamodel import Project
|
|
77
|
+
|
|
78
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
79
|
+
print("Project: ", project.name, " - ", project.description)
|
|
80
|
+
|
|
81
|
+
# List all tasks in the project, and their dataset sizes
|
|
82
|
+
tasks = project.tasks()
|
|
83
|
+
for task in tasks:
|
|
84
|
+
print("Task: ", task.name, " - ", task.description)
|
|
85
|
+
print("Total dataset size:", len(task.runs()))
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Load an Existing Dataset into a Kiln Task Dataset
|
|
89
|
+
|
|
90
|
+
If you already have a dataset in a file, you can load it into a Kiln project.
|
|
91
|
+
|
|
92
|
+
**Important**: Kiln will validate the input and output schemas, and ensure that each datapoint in the dataset is valid for this task.
|
|
93
|
+
|
|
94
|
+
- Plaintext input/output: ensure "output_json_schema" and "input_json_schema" not set in your Task definition.
|
|
95
|
+
- JSON input/output: ensure "output_json_schema" and "input_json_schema" are valid JSON schemas in your Task definition. Every datapoint in the dataset must be valid JSON fitting the schema.
|
|
96
|
+
|
|
97
|
+
Here's a simple example of how to load a dataset into a Kiln task:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
|
|
101
|
+
import kiln_ai
|
|
102
|
+
import kiln_ai.datamodel
|
|
103
|
+
|
|
104
|
+
# Created a project and task via the UI and put its path here
|
|
105
|
+
task_path = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator/task.kiln"
|
|
106
|
+
task = kiln_ai.datamodel.Task.load_from_file(task_path)
|
|
107
|
+
|
|
108
|
+
# Add data to the task - loop over you dataset and run this for each item
|
|
109
|
+
item = kiln_ai.datamodel.TaskRun(
|
|
110
|
+
parent=task,
|
|
111
|
+
input='{"topic": "AI"}',
|
|
112
|
+
output=kiln_ai.datamodel.TaskOutput(
|
|
113
|
+
output='{"setup": "What is AI?", "punchline": "content_here"}',
|
|
114
|
+
),
|
|
115
|
+
)
|
|
116
|
+
item.save_to_file()
|
|
117
|
+
print("Saved item to file: ", item.path)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
And here's a more complex example of how to load a dataset into a Kiln task. This example sets the source of the data (human in this case, but you can also set it be be synthetic), the created_by property, and a 5-star rating.
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
import kiln_ai
|
|
124
|
+
import kiln_ai.datamodel
|
|
125
|
+
|
|
126
|
+
# Created a project and task via the UI and put its path here
|
|
127
|
+
task_path = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator/task.kiln"
|
|
128
|
+
task = kiln_ai.datamodel.Task.load_from_file(task_path)
|
|
129
|
+
|
|
130
|
+
# Add data to the task - loop over you dataset and run this for each item
|
|
131
|
+
item = kiln_ai.datamodel.TaskRun(
|
|
132
|
+
parent=task,
|
|
133
|
+
input='{"topic": "AI"}',
|
|
134
|
+
input_source=kiln_ai.datamodel.DataSource(
|
|
135
|
+
type=kiln_ai.datamodel.DataSourceType.human,
|
|
136
|
+
properties={"created_by": "John Doe"},
|
|
137
|
+
),
|
|
138
|
+
output=kiln_ai.datamodel.TaskOutput(
|
|
139
|
+
output='{"setup": "What is AI?", "punchline": "content_here"}',
|
|
140
|
+
source=kiln_ai.datamodel.DataSource(
|
|
141
|
+
type=kiln_ai.datamodel.DataSourceType.human,
|
|
142
|
+
properties={"created_by": "Jane Doe"},
|
|
143
|
+
),
|
|
144
|
+
rating=kiln_ai.datamodel.TaskOutputRating(score=5,type="five_star"),
|
|
145
|
+
),
|
|
146
|
+
)
|
|
147
|
+
item.save_to_file()
|
|
148
|
+
print("Saved item to file: ", item.path)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Using your Kiln Dataset in a Notebook or Project
|
|
152
|
+
|
|
153
|
+
You can use your Kiln dataset in a notebook or project by loading the dataset into a pandas dataframe.
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
import kiln_ai
|
|
157
|
+
import kiln_ai.datamodel
|
|
158
|
+
|
|
159
|
+
# Created a project and task via the UI and put its path here
|
|
160
|
+
task_path = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator/task.kiln"
|
|
161
|
+
task = kiln_ai.datamodel.Task.load_from_file(task_path)
|
|
162
|
+
|
|
163
|
+
runs = task.runs()
|
|
164
|
+
for run in runs:
|
|
165
|
+
print(f"Input: {run.input}")
|
|
166
|
+
print(f"Output: {run.output.output}")
|
|
167
|
+
|
|
168
|
+
print(f"Total runs: {len(runs)}")
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Using Kiln Dataset in Pandas
|
|
172
|
+
|
|
173
|
+
You can also use your Kiln dataset in a pandas dataframe, or a similar script for other tools like polars.
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
import glob
|
|
177
|
+
import json
|
|
178
|
+
import pandas as pd
|
|
179
|
+
from pathlib import Path
|
|
180
|
+
|
|
181
|
+
task_dir = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator"
|
|
182
|
+
dataitem_glob = task_dir + "/runs/*/task_run.kiln"
|
|
183
|
+
|
|
184
|
+
dfs = []
|
|
185
|
+
for file in glob.glob(dataitem_glob):
|
|
186
|
+
js = json.loads(Path(file).read_text())
|
|
187
|
+
|
|
188
|
+
df = pd.DataFrame([{
|
|
189
|
+
"input": js["input"],
|
|
190
|
+
"output": js["output"]["output"],
|
|
191
|
+
}])
|
|
192
|
+
|
|
193
|
+
# Alternatively: you can use pd.json_normalize(js) to get the full json structure
|
|
194
|
+
# df = pd.json_normalize(js)
|
|
195
|
+
dfs.append(df)
|
|
196
|
+
final_df = pd.concat(dfs, ignore_index=True)
|
|
197
|
+
print(final_df)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Advanced Usage
|
|
201
|
+
|
|
202
|
+
The library can do a lot more than the examples we've shown here.
|
|
203
|
+
|
|
204
|
+
See the [docs](https://kiln-ai.github.io/Kiln/kiln_core_docs/index.html) for more information.
|