kiln-ai 0.0.4__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/base_adapter.py +168 -0
- kiln_ai/adapters/langchain_adapters.py +113 -0
- kiln_ai/adapters/ml_model_list.py +436 -0
- kiln_ai/adapters/prompt_builders.py +122 -0
- kiln_ai/adapters/repair/repair_task.py +71 -0
- kiln_ai/adapters/repair/test_repair_task.py +248 -0
- kiln_ai/adapters/test_langchain_adapter.py +50 -0
- kiln_ai/adapters/test_ml_model_list.py +99 -0
- kiln_ai/adapters/test_prompt_adaptors.py +167 -0
- kiln_ai/adapters/test_prompt_builders.py +315 -0
- kiln_ai/adapters/test_saving_adapter_results.py +168 -0
- kiln_ai/adapters/test_structured_output.py +218 -0
- kiln_ai/datamodel/__init__.py +362 -2
- kiln_ai/datamodel/basemodel.py +372 -0
- kiln_ai/datamodel/json_schema.py +45 -0
- kiln_ai/datamodel/test_basemodel.py +277 -0
- kiln_ai/datamodel/test_datasource.py +107 -0
- kiln_ai/datamodel/test_example_models.py +644 -0
- kiln_ai/datamodel/test_json_schema.py +124 -0
- kiln_ai/datamodel/test_models.py +190 -0
- kiln_ai/datamodel/test_nested_save.py +205 -0
- kiln_ai/datamodel/test_output_rating.py +88 -0
- kiln_ai/utils/config.py +170 -0
- kiln_ai/utils/formatting.py +5 -0
- kiln_ai/utils/test_config.py +245 -0
- {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.1.dist-info}/METADATA +22 -1
- kiln_ai-0.5.1.dist-info/RECORD +29 -0
- kiln_ai/__init.__.py +0 -3
- kiln_ai/coreadd.py +0 -3
- kiln_ai/datamodel/project.py +0 -15
- kiln_ai-0.0.4.dist-info/RECORD +0 -8
- {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.1.dist-info}/LICENSE.txt +0 -0
- {kiln_ai-0.0.4.dist-info → kiln_ai-0.5.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from abc import ABCMeta, abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
from kiln_ai.datamodel import (
|
|
7
|
+
DataSource,
|
|
8
|
+
DataSourceType,
|
|
9
|
+
Task,
|
|
10
|
+
TaskOutput,
|
|
11
|
+
TaskRun,
|
|
12
|
+
)
|
|
13
|
+
from kiln_ai.datamodel.json_schema import validate_schema
|
|
14
|
+
from kiln_ai.utils.config import Config
|
|
15
|
+
|
|
16
|
+
from .prompt_builders import BasePromptBuilder, SimplePromptBuilder
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class AdapterInfo:
|
|
21
|
+
adapter_name: str
|
|
22
|
+
model_name: str
|
|
23
|
+
model_provider: str
|
|
24
|
+
prompt_builder_name: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BaseAdapter(metaclass=ABCMeta):
|
|
28
|
+
def __init__(
|
|
29
|
+
self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None
|
|
30
|
+
):
|
|
31
|
+
self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
|
|
32
|
+
self.kiln_task = kiln_task
|
|
33
|
+
self.output_schema = self.kiln_task.output_json_schema
|
|
34
|
+
self.input_schema = self.kiln_task.input_json_schema
|
|
35
|
+
|
|
36
|
+
async def invoke_returning_raw(
|
|
37
|
+
self,
|
|
38
|
+
input: Dict | str,
|
|
39
|
+
input_source: DataSource | None = None,
|
|
40
|
+
) -> Dict | str:
|
|
41
|
+
result = await self.invoke(input, input_source)
|
|
42
|
+
if self.kiln_task.output_json_schema is None:
|
|
43
|
+
return result.output.output
|
|
44
|
+
else:
|
|
45
|
+
return json.loads(result.output.output)
|
|
46
|
+
|
|
47
|
+
async def invoke(
|
|
48
|
+
self,
|
|
49
|
+
input: Dict | str,
|
|
50
|
+
input_source: DataSource | None = None,
|
|
51
|
+
) -> TaskRun:
|
|
52
|
+
# validate input
|
|
53
|
+
if self.input_schema is not None:
|
|
54
|
+
if not isinstance(input, dict):
|
|
55
|
+
raise ValueError(f"structured input is not a dict: {input}")
|
|
56
|
+
validate_schema(input, self.input_schema)
|
|
57
|
+
|
|
58
|
+
# Run
|
|
59
|
+
result = await self._run(input)
|
|
60
|
+
|
|
61
|
+
# validate output
|
|
62
|
+
if self.output_schema is not None:
|
|
63
|
+
if not isinstance(result, dict):
|
|
64
|
+
raise RuntimeError(f"structured response is not a dict: {result}")
|
|
65
|
+
validate_schema(result, self.output_schema)
|
|
66
|
+
else:
|
|
67
|
+
if not isinstance(result, str):
|
|
68
|
+
raise RuntimeError(
|
|
69
|
+
f"response is not a string for non-structured task: {result}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Generate the run and output
|
|
73
|
+
run = self.generate_run(input, input_source, result)
|
|
74
|
+
|
|
75
|
+
# Save the run if configured to do so, and we have a path to save to
|
|
76
|
+
if Config.shared().autosave_runs and self.kiln_task.path is not None:
|
|
77
|
+
run.save_to_file()
|
|
78
|
+
else:
|
|
79
|
+
# Clear the ID to indicate it's not persisted
|
|
80
|
+
run.id = None
|
|
81
|
+
|
|
82
|
+
return run
|
|
83
|
+
|
|
84
|
+
def has_structured_output(self) -> bool:
|
|
85
|
+
return self.output_schema is not None
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def adapter_info(self) -> AdapterInfo:
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
async def _run(self, input: Dict | str) -> Dict | str:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
def build_prompt(self) -> str:
|
|
96
|
+
prompt = self.prompt_builder.build_prompt()
|
|
97
|
+
adapter_instructions = self.adapter_specific_instructions()
|
|
98
|
+
if adapter_instructions is not None:
|
|
99
|
+
prompt += f"# Format Instructions\n\n{adapter_instructions}\n\n"
|
|
100
|
+
return prompt
|
|
101
|
+
|
|
102
|
+
# override for adapter specific instructions (e.g. tool calling, json format, etc)
|
|
103
|
+
def adapter_specific_instructions(self) -> str | None:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
# create a run and task output
|
|
107
|
+
def generate_run(
|
|
108
|
+
self, input: Dict | str, input_source: DataSource | None, output: Dict | str
|
|
109
|
+
) -> TaskRun:
|
|
110
|
+
# Convert input and output to JSON strings if they are dictionaries
|
|
111
|
+
input_str = json.dumps(input) if isinstance(input, dict) else input
|
|
112
|
+
output_str = json.dumps(output) if isinstance(output, dict) else output
|
|
113
|
+
|
|
114
|
+
# If no input source is provided, use the human data source
|
|
115
|
+
if input_source is None:
|
|
116
|
+
input_source = DataSource(
|
|
117
|
+
type=DataSourceType.human,
|
|
118
|
+
properties={"created_by": Config.shared().user_id},
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
new_task_run = TaskRun(
|
|
122
|
+
parent=self.kiln_task,
|
|
123
|
+
input=input_str,
|
|
124
|
+
input_source=input_source,
|
|
125
|
+
output=TaskOutput(
|
|
126
|
+
output=output_str,
|
|
127
|
+
# Synthetic since an adapter, not a human, is creating this
|
|
128
|
+
source=DataSource(
|
|
129
|
+
type=DataSourceType.synthetic,
|
|
130
|
+
properties=self._properties_for_task_output(),
|
|
131
|
+
),
|
|
132
|
+
),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
exclude_fields = {
|
|
136
|
+
"id": True,
|
|
137
|
+
"created_at": True,
|
|
138
|
+
"updated_at": True,
|
|
139
|
+
"path": True,
|
|
140
|
+
"output": {"id": True, "created_at": True, "updated_at": True},
|
|
141
|
+
}
|
|
142
|
+
new_run_dump = new_task_run.model_dump(exclude=exclude_fields)
|
|
143
|
+
|
|
144
|
+
# Check if the same run already exists
|
|
145
|
+
existing_task_run = next(
|
|
146
|
+
(
|
|
147
|
+
task_run
|
|
148
|
+
for task_run in self.kiln_task.runs()
|
|
149
|
+
if task_run.model_dump(exclude=exclude_fields) == new_run_dump
|
|
150
|
+
),
|
|
151
|
+
None,
|
|
152
|
+
)
|
|
153
|
+
if existing_task_run:
|
|
154
|
+
return existing_task_run
|
|
155
|
+
|
|
156
|
+
return new_task_run
|
|
157
|
+
|
|
158
|
+
def _properties_for_task_output(self) -> Dict[str, str | int | float]:
|
|
159
|
+
props = {}
|
|
160
|
+
|
|
161
|
+
# adapter info
|
|
162
|
+
adapter_info = self.adapter_info()
|
|
163
|
+
props["adapter_name"] = adapter_info.adapter_name
|
|
164
|
+
props["model_name"] = adapter_info.model_name
|
|
165
|
+
props["model_provider"] = adapter_info.model_provider
|
|
166
|
+
props["prompt_builder_name"] = adapter_info.prompt_builder_name
|
|
167
|
+
|
|
168
|
+
return props
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
import kiln_ai.datamodel as datamodel
|
|
4
|
+
from kiln_ai.adapters.prompt_builders import SimplePromptBuilder
|
|
5
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
|
6
|
+
from langchain_core.messages import HumanMessage, SystemMessage
|
|
7
|
+
from langchain_core.messages.base import BaseMessage
|
|
8
|
+
|
|
9
|
+
from .base_adapter import AdapterInfo, BaseAdapter, BasePromptBuilder
|
|
10
|
+
from .ml_model_list import langchain_model_from
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LangChainPromptAdapter(BaseAdapter):
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
kiln_task: datamodel.Task,
|
|
17
|
+
custom_model: BaseChatModel | None = None,
|
|
18
|
+
model_name: str | None = None,
|
|
19
|
+
provider: str | None = None,
|
|
20
|
+
prompt_builder: BasePromptBuilder | None = None,
|
|
21
|
+
):
|
|
22
|
+
super().__init__(kiln_task, prompt_builder=prompt_builder)
|
|
23
|
+
if custom_model is not None:
|
|
24
|
+
self.model = custom_model
|
|
25
|
+
|
|
26
|
+
# Attempt to infer model provider and name from custom model
|
|
27
|
+
self.model_provider = "custom.langchain:" + custom_model.__class__.__name__
|
|
28
|
+
self.model_name = "custom.langchain:unknown_model"
|
|
29
|
+
if hasattr(custom_model, "model_name") and isinstance(
|
|
30
|
+
getattr(custom_model, "model_name"), str
|
|
31
|
+
):
|
|
32
|
+
self.model_name = "custom.langchain:" + getattr(
|
|
33
|
+
custom_model, "model_name"
|
|
34
|
+
)
|
|
35
|
+
if hasattr(custom_model, "model") and isinstance(
|
|
36
|
+
getattr(custom_model, "model"), str
|
|
37
|
+
):
|
|
38
|
+
self.model_name = "custom.langchain:" + getattr(custom_model, "model")
|
|
39
|
+
elif model_name is not None:
|
|
40
|
+
self.model = langchain_model_from(model_name, provider)
|
|
41
|
+
self.model_name = model_name
|
|
42
|
+
self.model_provider = provider or "custom.langchain.default_provider"
|
|
43
|
+
else:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
"model_name and provider must be provided if custom_model is not provided"
|
|
46
|
+
)
|
|
47
|
+
if self.has_structured_output():
|
|
48
|
+
if not hasattr(self.model, "with_structured_output") or not callable(
|
|
49
|
+
getattr(self.model, "with_structured_output")
|
|
50
|
+
):
|
|
51
|
+
raise ValueError(
|
|
52
|
+
f"model {self.model} does not support structured output, cannot use output_json_schema"
|
|
53
|
+
)
|
|
54
|
+
# Langchain expects title/description to be at top level, on top of json schema
|
|
55
|
+
output_schema = self.kiln_task.output_schema()
|
|
56
|
+
if output_schema is None:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"output_json_schema is not valid json: {self.kiln_task.output_json_schema}"
|
|
59
|
+
)
|
|
60
|
+
output_schema["title"] = "task_response"
|
|
61
|
+
output_schema["description"] = "A response from the task"
|
|
62
|
+
self.model = self.model.with_structured_output(
|
|
63
|
+
output_schema, include_raw=True
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def adapter_specific_instructions(self) -> str | None:
|
|
67
|
+
# TODO: would be better to explicitly use bind_tools:tool_choice="task_response" here
|
|
68
|
+
if self.has_structured_output():
|
|
69
|
+
return "Always respond with a tool call. Never respond with a human readable message."
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
async def _run(self, input: Dict | str) -> Dict | str:
|
|
73
|
+
prompt = self.build_prompt()
|
|
74
|
+
user_msg = self.prompt_builder.build_user_message(input)
|
|
75
|
+
messages = [
|
|
76
|
+
SystemMessage(content=prompt),
|
|
77
|
+
HumanMessage(content=user_msg),
|
|
78
|
+
]
|
|
79
|
+
response = self.model.invoke(messages)
|
|
80
|
+
if self.has_structured_output():
|
|
81
|
+
if (
|
|
82
|
+
not isinstance(response, dict)
|
|
83
|
+
or "parsed" not in response
|
|
84
|
+
or not isinstance(response["parsed"], dict)
|
|
85
|
+
):
|
|
86
|
+
raise RuntimeError(f"structured response not returned: {response}")
|
|
87
|
+
structured_response = response["parsed"]
|
|
88
|
+
return self._munge_response(structured_response)
|
|
89
|
+
else:
|
|
90
|
+
if not isinstance(response, BaseMessage):
|
|
91
|
+
raise RuntimeError(f"response is not a BaseMessage: {response}")
|
|
92
|
+
text_content = response.content
|
|
93
|
+
if not isinstance(text_content, str):
|
|
94
|
+
raise RuntimeError(f"response is not a string: {text_content}")
|
|
95
|
+
return text_content
|
|
96
|
+
|
|
97
|
+
def adapter_info(self) -> AdapterInfo:
|
|
98
|
+
return AdapterInfo(
|
|
99
|
+
model_name=self.model_name,
|
|
100
|
+
model_provider=self.model_provider,
|
|
101
|
+
adapter_name="kiln_langchain_adapter",
|
|
102
|
+
prompt_builder_name=self.prompt_builder.__class__.prompt_builder_name(),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def _munge_response(self, response: Dict) -> Dict:
|
|
106
|
+
# Mistral Large tool calling format is a bit different. Convert to standard format.
|
|
107
|
+
if (
|
|
108
|
+
"name" in response
|
|
109
|
+
and response["name"] == "task_response"
|
|
110
|
+
and "arguments" in response
|
|
111
|
+
):
|
|
112
|
+
return response["arguments"]
|
|
113
|
+
return response
|