kiln-ai 0.0.4__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (33) hide show
  1. {kiln_ai-0.0.4 → kiln_ai-0.5.1}/PKG-INFO +22 -1
  2. kiln_ai-0.5.1/README.md +18 -0
  3. kiln_ai-0.5.1/kiln_ai/adapters/base_adapter.py +168 -0
  4. kiln_ai-0.5.1/kiln_ai/adapters/langchain_adapters.py +113 -0
  5. kiln_ai-0.5.1/kiln_ai/adapters/ml_model_list.py +436 -0
  6. kiln_ai-0.5.1/kiln_ai/adapters/prompt_builders.py +122 -0
  7. kiln_ai-0.5.1/kiln_ai/adapters/repair/repair_task.py +71 -0
  8. kiln_ai-0.5.1/kiln_ai/adapters/repair/test_repair_task.py +248 -0
  9. kiln_ai-0.5.1/kiln_ai/adapters/test_langchain_adapter.py +50 -0
  10. kiln_ai-0.5.1/kiln_ai/adapters/test_ml_model_list.py +99 -0
  11. kiln_ai-0.5.1/kiln_ai/adapters/test_prompt_adaptors.py +167 -0
  12. kiln_ai-0.5.1/kiln_ai/adapters/test_prompt_builders.py +315 -0
  13. kiln_ai-0.5.1/kiln_ai/adapters/test_saving_adapter_results.py +168 -0
  14. kiln_ai-0.5.1/kiln_ai/adapters/test_structured_output.py +218 -0
  15. kiln_ai-0.5.1/kiln_ai/datamodel/__init__.py +363 -0
  16. kiln_ai-0.5.1/kiln_ai/datamodel/basemodel.py +372 -0
  17. kiln_ai-0.5.1/kiln_ai/datamodel/json_schema.py +45 -0
  18. kiln_ai-0.5.1/kiln_ai/datamodel/test_basemodel.py +277 -0
  19. kiln_ai-0.5.1/kiln_ai/datamodel/test_datasource.py +107 -0
  20. kiln_ai-0.5.1/kiln_ai/datamodel/test_example_models.py +644 -0
  21. kiln_ai-0.5.1/kiln_ai/datamodel/test_json_schema.py +124 -0
  22. kiln_ai-0.5.1/kiln_ai/datamodel/test_models.py +190 -0
  23. kiln_ai-0.5.1/kiln_ai/datamodel/test_nested_save.py +205 -0
  24. kiln_ai-0.5.1/kiln_ai/datamodel/test_output_rating.py +88 -0
  25. kiln_ai-0.5.1/kiln_ai/utils/config.py +170 -0
  26. kiln_ai-0.5.1/kiln_ai/utils/formatting.py +5 -0
  27. kiln_ai-0.5.1/kiln_ai/utils/test_config.py +245 -0
  28. {kiln_ai-0.0.4 → kiln_ai-0.5.1}/pyproject.toml +6 -1
  29. kiln_ai-0.0.4/kiln_ai/__init.__.py +0 -3
  30. kiln_ai-0.0.4/kiln_ai/coreadd.py +0 -3
  31. kiln_ai-0.0.4/kiln_ai/datamodel/__init__.py +0 -3
  32. kiln_ai-0.0.4/kiln_ai/datamodel/project.py +0 -15
  33. {kiln_ai-0.0.4 → kiln_ai-0.5.1}/LICENSE.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kiln-ai
3
- Version: 0.0.4
3
+ Version: 0.5.1
4
4
  Summary: Kiln AI
5
5
  Home-page: https://kiln-ai.com
6
6
  License: Proprietary
@@ -16,3 +16,24 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Project-URL: Bug Tracker, https://github.com/Kiln-AI/kiln/issues
17
17
  Project-URL: Documentation, https://github.com/Kiln-AI/kiln#readme
18
18
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
19
+ Description-Content-Type: text/markdown
20
+
21
+ # kiln_ai
22
+
23
+ [![PyPI - Version](https://img.shields.io/pypi/v/kiln-ai.svg?logo=pypi&label=PyPI&logoColor=gold)](https://pypi.org/project/kiln-ai)
24
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/kiln-ai.svg)](https://pypi.org/project/kiln-ai)
25
+
26
+ ---
27
+
28
+ ## Installation
29
+
30
+ ```console
31
+ pip install kiln_ai
32
+ ```
33
+
34
+ ## About Kiln AI
35
+
36
+ Learn more about Kiln AI at [kiln-ai.com](https://kiln-ai.com)
37
+
38
+ Github: [github.com/Kiln-AI/kiln](https://github.com/Kiln-AI/kiln)
39
+
@@ -0,0 +1,18 @@
1
+ # kiln_ai
2
+
3
+ [![PyPI - Version](https://img.shields.io/pypi/v/kiln-ai.svg?logo=pypi&label=PyPI&logoColor=gold)](https://pypi.org/project/kiln-ai)
4
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/kiln-ai.svg)](https://pypi.org/project/kiln-ai)
5
+
6
+ ---
7
+
8
+ ## Installation
9
+
10
+ ```console
11
+ pip install kiln_ai
12
+ ```
13
+
14
+ ## About Kiln AI
15
+
16
+ Learn more about Kiln AI at [kiln-ai.com](https://kiln-ai.com)
17
+
18
+ Github: [github.com/Kiln-AI/kiln](https://github.com/Kiln-AI/kiln)
@@ -0,0 +1,168 @@
1
+ import json
2
+ from abc import ABCMeta, abstractmethod
3
+ from dataclasses import dataclass
4
+ from typing import Dict
5
+
6
+ from kiln_ai.datamodel import (
7
+ DataSource,
8
+ DataSourceType,
9
+ Task,
10
+ TaskOutput,
11
+ TaskRun,
12
+ )
13
+ from kiln_ai.datamodel.json_schema import validate_schema
14
+ from kiln_ai.utils.config import Config
15
+
16
+ from .prompt_builders import BasePromptBuilder, SimplePromptBuilder
17
+
18
+
19
+ @dataclass
20
+ class AdapterInfo:
21
+ adapter_name: str
22
+ model_name: str
23
+ model_provider: str
24
+ prompt_builder_name: str
25
+
26
+
27
+ class BaseAdapter(metaclass=ABCMeta):
28
+ def __init__(
29
+ self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None
30
+ ):
31
+ self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
32
+ self.kiln_task = kiln_task
33
+ self.output_schema = self.kiln_task.output_json_schema
34
+ self.input_schema = self.kiln_task.input_json_schema
35
+
36
+ async def invoke_returning_raw(
37
+ self,
38
+ input: Dict | str,
39
+ input_source: DataSource | None = None,
40
+ ) -> Dict | str:
41
+ result = await self.invoke(input, input_source)
42
+ if self.kiln_task.output_json_schema is None:
43
+ return result.output.output
44
+ else:
45
+ return json.loads(result.output.output)
46
+
47
+ async def invoke(
48
+ self,
49
+ input: Dict | str,
50
+ input_source: DataSource | None = None,
51
+ ) -> TaskRun:
52
+ # validate input
53
+ if self.input_schema is not None:
54
+ if not isinstance(input, dict):
55
+ raise ValueError(f"structured input is not a dict: {input}")
56
+ validate_schema(input, self.input_schema)
57
+
58
+ # Run
59
+ result = await self._run(input)
60
+
61
+ # validate output
62
+ if self.output_schema is not None:
63
+ if not isinstance(result, dict):
64
+ raise RuntimeError(f"structured response is not a dict: {result}")
65
+ validate_schema(result, self.output_schema)
66
+ else:
67
+ if not isinstance(result, str):
68
+ raise RuntimeError(
69
+ f"response is not a string for non-structured task: {result}"
70
+ )
71
+
72
+ # Generate the run and output
73
+ run = self.generate_run(input, input_source, result)
74
+
75
+ # Save the run if configured to do so, and we have a path to save to
76
+ if Config.shared().autosave_runs and self.kiln_task.path is not None:
77
+ run.save_to_file()
78
+ else:
79
+ # Clear the ID to indicate it's not persisted
80
+ run.id = None
81
+
82
+ return run
83
+
84
+ def has_structured_output(self) -> bool:
85
+ return self.output_schema is not None
86
+
87
+ @abstractmethod
88
+ def adapter_info(self) -> AdapterInfo:
89
+ pass
90
+
91
+ @abstractmethod
92
+ async def _run(self, input: Dict | str) -> Dict | str:
93
+ pass
94
+
95
+ def build_prompt(self) -> str:
96
+ prompt = self.prompt_builder.build_prompt()
97
+ adapter_instructions = self.adapter_specific_instructions()
98
+ if adapter_instructions is not None:
99
+ prompt += f"# Format Instructions\n\n{adapter_instructions}\n\n"
100
+ return prompt
101
+
102
+ # override for adapter specific instructions (e.g. tool calling, json format, etc)
103
+ def adapter_specific_instructions(self) -> str | None:
104
+ return None
105
+
106
+ # create a run and task output
107
+ def generate_run(
108
+ self, input: Dict | str, input_source: DataSource | None, output: Dict | str
109
+ ) -> TaskRun:
110
+ # Convert input and output to JSON strings if they are dictionaries
111
+ input_str = json.dumps(input) if isinstance(input, dict) else input
112
+ output_str = json.dumps(output) if isinstance(output, dict) else output
113
+
114
+ # If no input source is provided, use the human data source
115
+ if input_source is None:
116
+ input_source = DataSource(
117
+ type=DataSourceType.human,
118
+ properties={"created_by": Config.shared().user_id},
119
+ )
120
+
121
+ new_task_run = TaskRun(
122
+ parent=self.kiln_task,
123
+ input=input_str,
124
+ input_source=input_source,
125
+ output=TaskOutput(
126
+ output=output_str,
127
+ # Synthetic since an adapter, not a human, is creating this
128
+ source=DataSource(
129
+ type=DataSourceType.synthetic,
130
+ properties=self._properties_for_task_output(),
131
+ ),
132
+ ),
133
+ )
134
+
135
+ exclude_fields = {
136
+ "id": True,
137
+ "created_at": True,
138
+ "updated_at": True,
139
+ "path": True,
140
+ "output": {"id": True, "created_at": True, "updated_at": True},
141
+ }
142
+ new_run_dump = new_task_run.model_dump(exclude=exclude_fields)
143
+
144
+ # Check if the same run already exists
145
+ existing_task_run = next(
146
+ (
147
+ task_run
148
+ for task_run in self.kiln_task.runs()
149
+ if task_run.model_dump(exclude=exclude_fields) == new_run_dump
150
+ ),
151
+ None,
152
+ )
153
+ if existing_task_run:
154
+ return existing_task_run
155
+
156
+ return new_task_run
157
+
158
+ def _properties_for_task_output(self) -> Dict[str, str | int | float]:
159
+ props = {}
160
+
161
+ # adapter info
162
+ adapter_info = self.adapter_info()
163
+ props["adapter_name"] = adapter_info.adapter_name
164
+ props["model_name"] = adapter_info.model_name
165
+ props["model_provider"] = adapter_info.model_provider
166
+ props["prompt_builder_name"] = adapter_info.prompt_builder_name
167
+
168
+ return props
@@ -0,0 +1,113 @@
1
+ from typing import Dict
2
+
3
+ import kiln_ai.datamodel as datamodel
4
+ from kiln_ai.adapters.prompt_builders import SimplePromptBuilder
5
+ from langchain_core.language_models.chat_models import BaseChatModel
6
+ from langchain_core.messages import HumanMessage, SystemMessage
7
+ from langchain_core.messages.base import BaseMessage
8
+
9
+ from .base_adapter import AdapterInfo, BaseAdapter, BasePromptBuilder
10
+ from .ml_model_list import langchain_model_from
11
+
12
+
13
+ class LangChainPromptAdapter(BaseAdapter):
14
+ def __init__(
15
+ self,
16
+ kiln_task: datamodel.Task,
17
+ custom_model: BaseChatModel | None = None,
18
+ model_name: str | None = None,
19
+ provider: str | None = None,
20
+ prompt_builder: BasePromptBuilder | None = None,
21
+ ):
22
+ super().__init__(kiln_task, prompt_builder=prompt_builder)
23
+ if custom_model is not None:
24
+ self.model = custom_model
25
+
26
+ # Attempt to infer model provider and name from custom model
27
+ self.model_provider = "custom.langchain:" + custom_model.__class__.__name__
28
+ self.model_name = "custom.langchain:unknown_model"
29
+ if hasattr(custom_model, "model_name") and isinstance(
30
+ getattr(custom_model, "model_name"), str
31
+ ):
32
+ self.model_name = "custom.langchain:" + getattr(
33
+ custom_model, "model_name"
34
+ )
35
+ if hasattr(custom_model, "model") and isinstance(
36
+ getattr(custom_model, "model"), str
37
+ ):
38
+ self.model_name = "custom.langchain:" + getattr(custom_model, "model")
39
+ elif model_name is not None:
40
+ self.model = langchain_model_from(model_name, provider)
41
+ self.model_name = model_name
42
+ self.model_provider = provider or "custom.langchain.default_provider"
43
+ else:
44
+ raise ValueError(
45
+ "model_name and provider must be provided if custom_model is not provided"
46
+ )
47
+ if self.has_structured_output():
48
+ if not hasattr(self.model, "with_structured_output") or not callable(
49
+ getattr(self.model, "with_structured_output")
50
+ ):
51
+ raise ValueError(
52
+ f"model {self.model} does not support structured output, cannot use output_json_schema"
53
+ )
54
+ # Langchain expects title/description to be at top level, on top of json schema
55
+ output_schema = self.kiln_task.output_schema()
56
+ if output_schema is None:
57
+ raise ValueError(
58
+ f"output_json_schema is not valid json: {self.kiln_task.output_json_schema}"
59
+ )
60
+ output_schema["title"] = "task_response"
61
+ output_schema["description"] = "A response from the task"
62
+ self.model = self.model.with_structured_output(
63
+ output_schema, include_raw=True
64
+ )
65
+
66
+ def adapter_specific_instructions(self) -> str | None:
67
+ # TODO: would be better to explicitly use bind_tools:tool_choice="task_response" here
68
+ if self.has_structured_output():
69
+ return "Always respond with a tool call. Never respond with a human readable message."
70
+ return None
71
+
72
+ async def _run(self, input: Dict | str) -> Dict | str:
73
+ prompt = self.build_prompt()
74
+ user_msg = self.prompt_builder.build_user_message(input)
75
+ messages = [
76
+ SystemMessage(content=prompt),
77
+ HumanMessage(content=user_msg),
78
+ ]
79
+ response = self.model.invoke(messages)
80
+ if self.has_structured_output():
81
+ if (
82
+ not isinstance(response, dict)
83
+ or "parsed" not in response
84
+ or not isinstance(response["parsed"], dict)
85
+ ):
86
+ raise RuntimeError(f"structured response not returned: {response}")
87
+ structured_response = response["parsed"]
88
+ return self._munge_response(structured_response)
89
+ else:
90
+ if not isinstance(response, BaseMessage):
91
+ raise RuntimeError(f"response is not a BaseMessage: {response}")
92
+ text_content = response.content
93
+ if not isinstance(text_content, str):
94
+ raise RuntimeError(f"response is not a string: {text_content}")
95
+ return text_content
96
+
97
+ def adapter_info(self) -> AdapterInfo:
98
+ return AdapterInfo(
99
+ model_name=self.model_name,
100
+ model_provider=self.model_provider,
101
+ adapter_name="kiln_langchain_adapter",
102
+ prompt_builder_name=self.prompt_builder.__class__.prompt_builder_name(),
103
+ )
104
+
105
+ def _munge_response(self, response: Dict) -> Dict:
106
+ # Mistral Large tool calling format is a bit different. Convert to standard format.
107
+ if (
108
+ "name" in response
109
+ and response["name"] == "task_response"
110
+ and "arguments" in response
111
+ ):
112
+ return response["arguments"]
113
+ return response