kiln-ai 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. kiln_ai/adapters/__init__.py +2 -0
  2. kiln_ai/adapters/adapter_registry.py +22 -44
  3. kiln_ai/adapters/chat/__init__.py +8 -0
  4. kiln_ai/adapters/chat/chat_formatter.py +233 -0
  5. kiln_ai/adapters/chat/test_chat_formatter.py +131 -0
  6. kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
  7. kiln_ai/adapters/data_gen/data_gen_task.py +49 -36
  8. kiln_ai/adapters/data_gen/test_data_gen_task.py +330 -40
  9. kiln_ai/adapters/eval/base_eval.py +7 -6
  10. kiln_ai/adapters/eval/eval_runner.py +9 -2
  11. kiln_ai/adapters/eval/g_eval.py +40 -17
  12. kiln_ai/adapters/eval/test_base_eval.py +174 -17
  13. kiln_ai/adapters/eval/test_eval_runner.py +3 -0
  14. kiln_ai/adapters/eval/test_g_eval.py +116 -5
  15. kiln_ai/adapters/fine_tune/base_finetune.py +3 -8
  16. kiln_ai/adapters/fine_tune/dataset_formatter.py +135 -273
  17. kiln_ai/adapters/fine_tune/test_base_finetune.py +10 -10
  18. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +287 -353
  19. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +3 -3
  20. kiln_ai/adapters/fine_tune/test_openai_finetune.py +6 -6
  21. kiln_ai/adapters/fine_tune/test_together_finetune.py +1 -0
  22. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +6 -11
  23. kiln_ai/adapters/fine_tune/together_finetune.py +13 -2
  24. kiln_ai/adapters/ml_model_list.py +370 -84
  25. kiln_ai/adapters/model_adapters/base_adapter.py +73 -26
  26. kiln_ai/adapters/model_adapters/litellm_adapter.py +88 -97
  27. kiln_ai/adapters/model_adapters/litellm_config.py +3 -2
  28. kiln_ai/adapters/model_adapters/test_base_adapter.py +235 -61
  29. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +104 -21
  30. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -0
  31. kiln_ai/adapters/model_adapters/test_structured_output.py +44 -12
  32. kiln_ai/adapters/parsers/parser_registry.py +0 -2
  33. kiln_ai/adapters/parsers/r1_parser.py +0 -1
  34. kiln_ai/adapters/prompt_builders.py +0 -16
  35. kiln_ai/adapters/provider_tools.py +27 -9
  36. kiln_ai/adapters/remote_config.py +66 -0
  37. kiln_ai/adapters/repair/repair_task.py +1 -6
  38. kiln_ai/adapters/repair/test_repair_task.py +24 -3
  39. kiln_ai/adapters/test_adapter_registry.py +88 -28
  40. kiln_ai/adapters/test_ml_model_list.py +176 -0
  41. kiln_ai/adapters/test_prompt_adaptors.py +17 -7
  42. kiln_ai/adapters/test_prompt_builders.py +3 -16
  43. kiln_ai/adapters/test_provider_tools.py +69 -20
  44. kiln_ai/adapters/test_remote_config.py +100 -0
  45. kiln_ai/datamodel/__init__.py +0 -2
  46. kiln_ai/datamodel/datamodel_enums.py +38 -13
  47. kiln_ai/datamodel/eval.py +32 -0
  48. kiln_ai/datamodel/finetune.py +12 -8
  49. kiln_ai/datamodel/task.py +68 -7
  50. kiln_ai/datamodel/task_output.py +0 -2
  51. kiln_ai/datamodel/task_run.py +0 -2
  52. kiln_ai/datamodel/test_basemodel.py +2 -1
  53. kiln_ai/datamodel/test_dataset_split.py +0 -8
  54. kiln_ai/datamodel/test_eval_model.py +146 -4
  55. kiln_ai/datamodel/test_models.py +33 -10
  56. kiln_ai/datamodel/test_task.py +168 -2
  57. kiln_ai/utils/config.py +3 -2
  58. kiln_ai/utils/dataset_import.py +1 -1
  59. kiln_ai/utils/logging.py +166 -0
  60. kiln_ai/utils/test_config.py +23 -0
  61. kiln_ai/utils/test_dataset_import.py +30 -0
  62. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/METADATA +2 -2
  63. kiln_ai-0.18.0.dist-info/RECORD +115 -0
  64. kiln_ai-0.16.0.dist-info/RECORD +0 -108
  65. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/WHEEL +0 -0
  66. {kiln_ai-0.16.0.dist-info → kiln_ai-0.18.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  import pytest
2
2
  from pydantic import ValidationError
3
3
 
4
- from kiln_ai.datamodel.datamodel_enums import TaskOutputRatingType
4
+ from kiln_ai.datamodel.datamodel_enums import StructuredOutputMode, TaskOutputRatingType
5
5
  from kiln_ai.datamodel.prompt_id import PromptGenerators
6
6
  from kiln_ai.datamodel.task import RunConfig, RunConfigProperties, Task, TaskRunConfig
7
7
  from kiln_ai.datamodel.task_output import normalize_rating
@@ -15,6 +15,7 @@ def test_runconfig_valid_creation():
15
15
  model_name="gpt-4",
16
16
  model_provider_name="openai",
17
17
  prompt_id=PromptGenerators.SIMPLE,
18
+ structured_output_mode="json_schema",
18
19
  )
19
20
 
20
21
  assert config.task == task
@@ -29,12 +30,13 @@ def test_runconfig_missing_required_fields():
29
30
 
30
31
  errors = exc_info.value.errors()
31
32
  assert (
32
- len(errors) == 4
33
+ len(errors) == 5
33
34
  ) # task, model_name, model_provider_name, and prompt_id are required
34
35
  assert any(error["loc"][0] == "task" for error in errors)
35
36
  assert any(error["loc"][0] == "model_name" for error in errors)
36
37
  assert any(error["loc"][0] == "model_provider_name" for error in errors)
37
38
  assert any(error["loc"][0] == "prompt_id" for error in errors)
39
+ assert any(error["loc"][0] == "structured_output_mode" for error in errors)
38
40
 
39
41
 
40
42
  def test_runconfig_custom_prompt_id():
@@ -45,6 +47,7 @@ def test_runconfig_custom_prompt_id():
45
47
  model_name="gpt-4",
46
48
  model_provider_name="openai",
47
49
  prompt_id=PromptGenerators.SIMPLE_CHAIN_OF_THOUGHT,
50
+ structured_output_mode="json_schema",
48
51
  )
49
52
 
50
53
  assert config.prompt_id == PromptGenerators.SIMPLE_CHAIN_OF_THOUGHT
@@ -61,6 +64,7 @@ def sample_run_config_props(sample_task):
61
64
  model_name="gpt-4",
62
65
  model_provider_name="openai",
63
66
  prompt_id=PromptGenerators.SIMPLE,
67
+ structured_output_mode="json_schema",
64
68
  )
65
69
 
66
70
 
@@ -157,3 +161,165 @@ def test_normalize_rating(rating_type, rating, expected):
157
161
  def test_normalize_rating_errors(rating_type, rating):
158
162
  with pytest.raises(ValueError):
159
163
  normalize_rating(rating, rating_type)
164
+
165
+
166
+ def test_run_config_defaults():
167
+ """RunConfig should require top_p, temperature, and structured_output_mode to be set."""
168
+ task = Task(id="task1", name="Test Task", instruction="Do something")
169
+
170
+ config = RunConfig(
171
+ task=task,
172
+ model_name="gpt-4",
173
+ model_provider_name="openai",
174
+ prompt_id=PromptGenerators.SIMPLE,
175
+ structured_output_mode="json_schema",
176
+ )
177
+ assert config.top_p == 1.0
178
+ assert config.temperature == 1.0
179
+
180
+
181
+ def test_run_config_valid_ranges():
182
+ """RunConfig should accept valid ranges for top_p and temperature."""
183
+ task = Task(id="task1", name="Test Task", instruction="Do something")
184
+
185
+ # Test valid values
186
+ config = RunConfig(
187
+ task=task,
188
+ model_name="gpt-4",
189
+ model_provider_name="openai",
190
+ prompt_id=PromptGenerators.SIMPLE,
191
+ top_p=0.9,
192
+ temperature=0.7,
193
+ structured_output_mode=StructuredOutputMode.json_schema,
194
+ )
195
+
196
+ assert config.top_p == 0.9
197
+ assert config.temperature == 0.7
198
+ assert config.structured_output_mode == StructuredOutputMode.json_schema
199
+
200
+
201
+ @pytest.mark.parametrize("top_p", [0.0, 0.5, 1.0])
202
+ def test_run_config_valid_top_p(top_p):
203
+ """Test that RunConfig accepts valid top_p values (0-1)."""
204
+ task = Task(id="task1", name="Test Task", instruction="Do something")
205
+
206
+ config = RunConfig(
207
+ task=task,
208
+ model_name="gpt-4",
209
+ model_provider_name="openai",
210
+ prompt_id=PromptGenerators.SIMPLE,
211
+ top_p=top_p,
212
+ temperature=1.0,
213
+ structured_output_mode=StructuredOutputMode.json_schema,
214
+ )
215
+
216
+ assert config.top_p == top_p
217
+
218
+
219
+ @pytest.mark.parametrize("top_p", [-0.1, 1.1, 2.0])
220
+ def test_run_config_invalid_top_p(top_p):
221
+ """Test that RunConfig rejects invalid top_p values."""
222
+ task = Task(id="task1", name="Test Task", instruction="Do something")
223
+
224
+ with pytest.raises(ValueError, match="top_p must be between 0 and 1"):
225
+ RunConfig(
226
+ task=task,
227
+ model_name="gpt-4",
228
+ model_provider_name="openai",
229
+ prompt_id=PromptGenerators.SIMPLE,
230
+ top_p=top_p,
231
+ temperature=1.0,
232
+ structured_output_mode=StructuredOutputMode.json_schema,
233
+ )
234
+
235
+
236
+ @pytest.mark.parametrize("temperature", [0.0, 1.0, 2.0])
237
+ def test_run_config_valid_temperature(temperature):
238
+ """Test that RunConfig accepts valid temperature values (0-2)."""
239
+ task = Task(id="task1", name="Test Task", instruction="Do something")
240
+
241
+ config = RunConfig(
242
+ task=task,
243
+ model_name="gpt-4",
244
+ model_provider_name="openai",
245
+ prompt_id=PromptGenerators.SIMPLE,
246
+ top_p=0.9,
247
+ temperature=temperature,
248
+ structured_output_mode=StructuredOutputMode.json_schema,
249
+ )
250
+
251
+ assert config.temperature == temperature
252
+
253
+
254
+ @pytest.mark.parametrize("temperature", [-0.1, 2.1, 3.0])
255
+ def test_run_config_invalid_temperature(temperature):
256
+ """Test that RunConfig rejects invalid temperature values."""
257
+ task = Task(id="task1", name="Test Task", instruction="Do something")
258
+
259
+ with pytest.raises(ValueError, match="temperature must be between 0 and 2"):
260
+ RunConfig(
261
+ task=task,
262
+ model_name="gpt-4",
263
+ model_provider_name="openai",
264
+ prompt_id=PromptGenerators.SIMPLE,
265
+ top_p=0.9,
266
+ temperature=temperature,
267
+ structured_output_mode=StructuredOutputMode.json_schema,
268
+ )
269
+
270
+
271
+ def test_run_config_upgrade_old_entries():
272
+ """Test that TaskRunConfig parses old entries correctly with nested objects, filling in defaults where needed."""
273
+
274
+ data = {
275
+ "v": 1,
276
+ "name": "test name",
277
+ "created_at": "2025-06-09T13:33:35.276927",
278
+ "created_by": "scosman",
279
+ "run_config_properties": {
280
+ "model_name": "gpt_4_1_nano",
281
+ "model_provider_name": "openai",
282
+ "prompt_id": "task_run_config::189194447826::228174773209::244130257039",
283
+ "top_p": 0.77,
284
+ "temperature": 0.77,
285
+ "structured_output_mode": "json_instruction_and_object",
286
+ },
287
+ "prompt": {
288
+ "name": "Dazzling Unicorn",
289
+ "description": "Frozen copy of prompt 'simple_prompt_builder', created for evaluations.",
290
+ "generator_id": "simple_prompt_builder",
291
+ "prompt": "Generate a joke, given a theme. The theme will be provided as a word or phrase as the input to the model. The assistant should output a joke that is funny and relevant to the theme. If a style is provided, the joke should be in that style. The output should include a setup and punchline.\n\nYour response should respect the following requirements:\n1) Keep the joke on topic. If the user specifies a theme, the joke must be related to that theme.\n2) Avoid any jokes that are offensive or inappropriate. Keep the joke clean and appropriate for all audiences.\n3) Make the joke funny and engaging. It should be something that someone would want to tell to their friends. Something clever, not just a simple pun.\n",
292
+ "chain_of_thought_instructions": None,
293
+ },
294
+ "model_type": "task_run_config",
295
+ }
296
+
297
+ # Parse the data - this should be TaskRunConfig, not RunConfig
298
+ parsed = TaskRunConfig.model_validate(data)
299
+ assert parsed.name == "test name"
300
+ assert parsed.created_by == "scosman"
301
+ assert (
302
+ parsed.run_config_properties.structured_output_mode
303
+ == "json_instruction_and_object"
304
+ )
305
+
306
+ # should still work if loading from file
307
+ parsed = TaskRunConfig.model_validate(data, context={"loading_from_file": True})
308
+ assert parsed.name == "test name"
309
+ assert parsed.created_by == "scosman"
310
+ assert (
311
+ parsed.run_config_properties.structured_output_mode
312
+ == "json_instruction_and_object"
313
+ )
314
+
315
+ # Remove structured_output_mode from run_config_properties and parse again
316
+ del data["run_config_properties"]["structured_output_mode"]
317
+
318
+ with pytest.raises(ValidationError):
319
+ # should error if not loading from file
320
+ parsed = TaskRunConfig.model_validate(data)
321
+
322
+ parsed = TaskRunConfig.model_validate(data, context={"loading_from_file": True})
323
+ assert parsed.name == "test name"
324
+ assert parsed.created_by == "scosman"
325
+ assert parsed.run_config_properties.structured_output_mode == "unknown"
kiln_ai/utils/config.py CHANGED
@@ -138,6 +138,7 @@ class Config:
138
138
  sensitive_keys=["api_key"],
139
139
  ),
140
140
  }
141
+ self._lock = threading.Lock()
141
142
  self._settings = self.load_settings()
142
143
 
143
144
  @classmethod
@@ -180,7 +181,7 @@ class Config:
180
181
  return None if value is None else property_config.type(value)
181
182
 
182
183
  def __setattr__(self, name, value):
183
- if name in ("_properties", "_settings"):
184
+ if name in ("_properties", "_settings", "_lock"):
184
185
  super().__setattr__(name, value)
185
186
  elif name in self._properties:
186
187
  self.update_settings({name: value})
@@ -234,7 +235,7 @@ class Config:
234
235
 
235
236
  def update_settings(self, new_settings: Dict[str, Any]):
236
237
  # Lock to prevent race conditions in multi-threaded scenarios
237
- with threading.Lock():
238
+ with self._lock:
238
239
  # Fresh load to avoid clobbering changes from other instances
239
240
  current_settings = self.load_settings()
240
241
  current_settings.update(new_settings)
@@ -221,7 +221,7 @@ def import_csv(
221
221
  optional_headers = {"reasoning", "tags", "chain_of_thought"} # optional headers
222
222
 
223
223
  rows: list[TaskRun] = []
224
- with open(dataset_path, "r", newline="") as csvfile:
224
+ with open(dataset_path, "r", newline="", encoding="utf-8") as csvfile:
225
225
  reader = csv.DictReader(csvfile)
226
226
 
227
227
  # Check if we have headers
@@ -0,0 +1,166 @@
1
+ import datetime
2
+ import json
3
+ import logging
4
+ import logging.handlers
5
+ import os
6
+
7
+ import litellm
8
+ from litellm.integrations.custom_logger import CustomLogger
9
+ from litellm.litellm_core_utils.litellm_logging import Logging
10
+
11
+ from kiln_ai.utils.config import Config
12
+
13
+
14
+ def get_default_formatter() -> str:
15
+ return "%(asctime)s.%(msecs)03d - %(levelname)s - %(name)s - %(message)s"
16
+
17
+
18
+ def get_log_file_path(filename: str) -> str:
19
+ """Get the path to the log file, using environment override if specified.
20
+
21
+ Returns:
22
+ str: The path to the log file
23
+ """
24
+ log_path_default = os.path.join(Config.settings_dir(), "logs", filename)
25
+ log_path = os.getenv("KILN_LOG_FILE", log_path_default)
26
+
27
+ # Ensure the log directory exists
28
+ os.makedirs(os.path.dirname(log_path), exist_ok=True)
29
+ return log_path
30
+
31
+
32
+ class CustomLiteLLMLogger(CustomLogger):
33
+ def __init__(self, logger: logging.Logger):
34
+ self.logger = logger
35
+
36
+ def log_pre_api_call(self, model, messages, kwargs):
37
+ api_base = kwargs.get("litellm_params", {}).get("api_base", "")
38
+ headers = kwargs.get("additional_args", {}).get("headers", {})
39
+ data = kwargs.get("additional_args", {}).get("complete_input_dict", {})
40
+
41
+ try:
42
+ # Print the curl command for the request
43
+ logger = Logging(
44
+ model=model,
45
+ messages=messages,
46
+ stream=False,
47
+ call_type="completion",
48
+ start_time=datetime.datetime.now(),
49
+ litellm_call_id="",
50
+ function_id="na",
51
+ kwargs=kwargs,
52
+ )
53
+ curl_command = logger._get_request_curl_command(
54
+ api_base=api_base,
55
+ headers=headers,
56
+ additional_args=kwargs,
57
+ data=data,
58
+ )
59
+ self.logger.info(f"{curl_command}")
60
+ except Exception as e:
61
+ self.logger.info(f"Curl Command: Could not print {e}")
62
+
63
+ # Print the formatted input data for the request in API format, pretty print
64
+ try:
65
+ self.logger.info(
66
+ f"Formatted Input Data (API):\n{json.dumps(data, indent=2, ensure_ascii=False)}"
67
+ )
68
+ except Exception as e:
69
+ self.logger.info(f"Formatted Input Data (API): Could not print {e}")
70
+
71
+ # Print the messages for the request in LiteLLM Message list, pretty print
72
+ try:
73
+ json_messages = json.dumps(messages, indent=2, ensure_ascii=False)
74
+ self.logger.info(f"Messages:\n{json_messages}")
75
+ except Exception as e:
76
+ self.logger.info(f"Messages: Could not print {e}")
77
+
78
+ def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
79
+ # No op
80
+ pass
81
+
82
+ def log_success_event(self, kwargs, response_obj, start_time, end_time):
83
+ litellm_logger = logging.getLogger("LiteLLM")
84
+ litellm_logger.error(
85
+ "Used a sync call in Litellm. Kiln should use async calls."
86
+ )
87
+
88
+ def log_failure_event(self, kwargs, response_obj, start_time, end_time):
89
+ # This logging method is supposed to be called by Litellm in synchronous error cases (Kiln should use async calls instead)
90
+ # but it appears to also be getting called in async calls that fail early (e.g. UnsupportedParamsError).
91
+ litellm_logger = logging.getLogger("LiteLLM")
92
+ litellm_logger.error(
93
+ "LiteLLM logged a synchronous failure event. This may result from a sync call, or from an async call failing early (e.g. invalid parameters). Make sure you are using async calls.",
94
+ )
95
+
96
+ #### ASYNC #### - for acompletion/aembeddings
97
+
98
+ async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
99
+ try:
100
+ if len(response_obj.choices) == 1:
101
+ if response_obj.choices[0].message.tool_calls:
102
+ for tool_call in response_obj.choices[0].message.tool_calls:
103
+ try:
104
+ args = tool_call.function.arguments
105
+ function_name = tool_call.function.name
106
+ self.logger.info(
107
+ f"Model Response Tool Call Arguments [{function_name}]:\n{args}"
108
+ )
109
+ except Exception:
110
+ self.logger.info(f"Model Response Tool Call:\n{tool_call}")
111
+
112
+ content = response_obj.choices[0].message.content
113
+ if content:
114
+ try:
115
+ # JSON format logs if possible
116
+ json_content = json.loads(content)
117
+ self.logger.info(
118
+ f"Model Response Content:\n{json.dumps(json_content, indent=2, ensure_ascii=False)}"
119
+ )
120
+ except Exception:
121
+ self.logger.info(f"Model Response Content:\n{content}")
122
+ elif len(response_obj.choices) > 1:
123
+ self.logger.info(
124
+ f"Model Response (multiple choices):\n{response_obj.choices}"
125
+ )
126
+ else:
127
+ self.logger.info("Model Response: No choices returned")
128
+
129
+ except Exception as e:
130
+ self.logger.info(f"Model Response: Could not print {e}")
131
+
132
+ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
133
+ self.logger.info(f"LiteLLM Failure: {response_obj}")
134
+
135
+
136
+ def setup_litellm_logging(filename: str = "model_calls.log"):
137
+ # Check if we already have a custom litellm logger
138
+ for callback in litellm.callbacks or []:
139
+ if isinstance(callback, CustomLiteLLMLogger):
140
+ return # We already have a custom litellm logger
141
+
142
+ # If we don't have a custom litellm logger, create one
143
+ # Disable the default litellm logger except for errors. It's ugly, hard to use, and we don't want it to mix with kiln logs.
144
+ litellm_logger = logging.getLogger("LiteLLM")
145
+ litellm_logger.setLevel(logging.ERROR)
146
+
147
+ # Create a logger that logs to files, with a max size of 5MB and 3 backup files
148
+ handler = logging.handlers.RotatingFileHandler(
149
+ get_log_file_path(filename),
150
+ maxBytes=5 * 1024 * 1024, # 5MB
151
+ backupCount=3,
152
+ encoding="utf-8",
153
+ )
154
+
155
+ # Set formatter to match the default formatting
156
+ formatter = logging.Formatter(get_default_formatter())
157
+ handler.setFormatter(formatter)
158
+
159
+ # Create a new logger for model calls
160
+ model_calls_logger = logging.getLogger("ModelCalls")
161
+ model_calls_logger.setLevel(logging.INFO)
162
+ model_calls_logger.propagate = False # Only log to file
163
+ model_calls_logger.addHandler(handler)
164
+
165
+ # Tell litellm to use our custom logger
166
+ litellm.callbacks = [CustomLiteLLMLogger(model_calls_logger)]
@@ -1,5 +1,6 @@
1
1
  import getpass
2
2
  import os
3
+ import threading
3
4
  from unittest.mock import patch
4
5
 
5
6
  import pytest
@@ -299,3 +300,25 @@ def test_yaml_persistence_structured_data(config_with_yaml, mock_yaml_file):
299
300
  with open(mock_yaml_file, "r") as f:
300
301
  saved_settings = yaml.safe_load(f)
301
302
  assert saved_settings["list_of_objects"] == new_settings
303
+
304
+
305
+ def test_update_settings_thread_safety(config_with_yaml):
306
+ config = config_with_yaml
307
+
308
+ exceptions = []
309
+
310
+ def update(val):
311
+ try:
312
+ config.update_settings({"int_property": val})
313
+ except Exception as e:
314
+ exceptions.append(e)
315
+
316
+ threads = [threading.Thread(target=update, args=(i,)) for i in range(5)]
317
+
318
+ for t in threads:
319
+ t.start()
320
+ for t in threads:
321
+ t.join()
322
+
323
+ assert not exceptions
324
+ assert config.int_property in range(5)
@@ -261,6 +261,36 @@ def test_import_csv_plain_text_missing_output(base_task: Task, tmp_path):
261
261
  assert "Missing required headers" in str(e.value)
262
262
 
263
263
 
264
+ def test_import_csv_utf8_encoding(base_task: Task, tmp_path):
265
+ """Ensure UTF-8 encoded files are read correctly."""
266
+
267
+ row_data = [
268
+ {
269
+ "input": "Español entrada 你好👋",
270
+ "output": "salida áéí 你好👋",
271
+ "tags": "",
272
+ },
273
+ ]
274
+
275
+ file_path = dicts_to_file_as_csv(row_data, "utf8.csv", tmp_path)
276
+
277
+ importer = DatasetFileImporter(
278
+ base_task,
279
+ ImportConfig(
280
+ dataset_type=DatasetImportFormat.CSV,
281
+ dataset_path=file_path,
282
+ dataset_name="utf8.csv",
283
+ ),
284
+ )
285
+
286
+ importer.create_runs_from_file()
287
+
288
+ assert len(base_task.runs()) == 1
289
+ run = base_task.runs()[0]
290
+ assert run.input == "Español entrada 你好👋"
291
+ assert run.output.output == "salida áéí 你好👋"
292
+
293
+
264
294
  def test_import_csv_structured_output(task_with_structured_output: Task, tmp_path):
265
295
  row_data = [
266
296
  {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.16.0
3
+ Version: 0.18.0
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://getkiln.ai
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -19,7 +19,7 @@ Requires-Dist: boto3>=1.37.10
19
19
  Requires-Dist: coverage>=7.6.4
20
20
  Requires-Dist: google-cloud-aiplatform>=1.84.0
21
21
  Requires-Dist: jsonschema>=4.23.0
22
- Requires-Dist: litellm>=1.67.0
22
+ Requires-Dist: litellm>=1.72.6
23
23
  Requires-Dist: openai>=1.53.0
24
24
  Requires-Dist: pdoc>=15.0.0
25
25
  Requires-Dist: pydantic>=2.9.2
@@ -0,0 +1,115 @@
1
+ kiln_ai/__init__.py,sha256=Sc4z8LRVFMwJUoc_DPVUriSXTZ6PO9MaJ80PhRbKyB8,34
2
+ kiln_ai/adapters/__init__.py,sha256=5GTN1bnEWz1NxWrlzsI6CfUio-D1jG74_-fx_PXJkY8,1027
3
+ kiln_ai/adapters/adapter_registry.py,sha256=oEY6Zy6rtzs5mYLSZVhCzNWE4UJiUzIkVmmE_UYnBbE,8529
4
+ kiln_ai/adapters/ml_model_list.py,sha256=eO9epRT73M-LRwTpMA9g8OkRHU87hZ5pg_PkJadmq0c,97972
5
+ kiln_ai/adapters/ollama_tools.py,sha256=uObtLWfqKb9RXHN-TGGw2Y1FQlEMe0u8FgszI0zQn6U,3550
6
+ kiln_ai/adapters/prompt_builders.py,sha256=R5IgZ7I2Ftx7i67xQb9UwcHE5gbPxgvgys_UAs3fc6A,15165
7
+ kiln_ai/adapters/provider_tools.py,sha256=HIGsU38cAWKGONi8-Rh66WNOved3O0w9OYbj9tU_HU4,16794
8
+ kiln_ai/adapters/remote_config.py,sha256=nUaQAWNSAC3F0lcY13t-ZI4iIoD8FV6gExFdhaWNEf4,1869
9
+ kiln_ai/adapters/run_output.py,sha256=RAi2Qp6dmqJVNm3CxbNTdAuhitHfH5NiUGbf6ygUP-k,257
10
+ kiln_ai/adapters/test_adapter_registry.py,sha256=W4KoZ5cFTZwmW3i19cCgwzmcG802tpFC0GrUnZcD8_4,8378
11
+ kiln_ai/adapters/test_ml_model_list.py,sha256=soE1SI5p9HlN4SJfHEYrlUUTu3kcKDNDj3oL1rpmPno,7358
12
+ kiln_ai/adapters/test_ollama_tools.py,sha256=xAUzL0IVmmXadVehJu1WjqbhpKEYGAgGt3pWx7hrubc,2514
13
+ kiln_ai/adapters/test_prompt_adaptors.py,sha256=ikEeaWvBI4OQ_bVRYkUGPtjZIz37Ej2P8bAr85DGYNQ,8180
14
+ kiln_ai/adapters/test_prompt_builders.py,sha256=SqGkluKZd7m7SPuq64NAprG0db11RDqoYai1m-1QgeQ,22585
15
+ kiln_ai/adapters/test_provider_tools.py,sha256=QCknT944HN7jKLeSi9qS0cH_O5jKMMLThX7kWfAcJqg,32090
16
+ kiln_ai/adapters/test_remote_config.py,sha256=Ui7L5aVrU05cbRBCWVC6bdCEo-jdbTGyzLXWQugDXXA,3022
17
+ kiln_ai/adapters/chat/__init__.py,sha256=ELydKUGeCcTiBJogzZUT8VXqr5kDtmoB8_GoyO28qR0,192
18
+ kiln_ai/adapters/chat/chat_formatter.py,sha256=lr2zIj-jbZNStaM1VZSvoaN_e6KDKNWiNNPC6RDQ6ug,8321
19
+ kiln_ai/adapters/chat/test_chat_formatter.py,sha256=MVEZTSIFBwLvplOmit-4TDdcmPXsaMZMQEwoXWmq1FI,4603
20
+ kiln_ai/adapters/data_gen/__init__.py,sha256=QTZWaf7kq5BorhPvexJfwDEKmjRmIbhwW9ei8LW2SIs,276
21
+ kiln_ai/adapters/data_gen/data_gen_prompts.py,sha256=06gqU7uJDBr-G-txeGV9a4_NgKlbxDxhcylWclcVVxg,8792
22
+ kiln_ai/adapters/data_gen/data_gen_task.py,sha256=Afz36cXgsUYpLlM76zm_HrW66bNOAjzIK18-1K95VAo,7153
23
+ kiln_ai/adapters/data_gen/test_data_gen_task.py,sha256=1eebTrwScbn7o3oK1v3KGAq-r4lkcD7G9Cu_j5G0y8o,21188
24
+ kiln_ai/adapters/eval/__init__.py,sha256=0ptbK0ZxWuraxGn_WMgmE1tcaq0k5t-g-52kVohvWCg,693
25
+ kiln_ai/adapters/eval/base_eval.py,sha256=g7BWaljeuRquP-ygOBO6jwwzuWyNTYy_QhF0lxZlr-A,7476
26
+ kiln_ai/adapters/eval/eval_runner.py,sha256=W3qBVWBhKQzgxW2jp4ht_WALyjqwDJnsbX9L_85XUDc,8996
27
+ kiln_ai/adapters/eval/g_eval.py,sha256=NVq6iXgKbOG8qimivTciEh2C6lmgrKFXcbNu9yBBeSQ,16304
28
+ kiln_ai/adapters/eval/registry.py,sha256=gZ_s0VgEx79Fswkgi1tS4yOl7lzpkvUBJZ62RldhM_w,626
29
+ kiln_ai/adapters/eval/test_base_eval.py,sha256=JRts5jsvD2XMqdegsenVl2yy_sL6ru9W7n9wPEbKD7U,15994
30
+ kiln_ai/adapters/eval/test_eval_runner.py,sha256=37cmZPjvZaNlNDzmstmOEUwQEFhpafD9jRhDD1UH6PE,18718
31
+ kiln_ai/adapters/eval/test_g_eval.py,sha256=4sn0sfZdKHgSW_Y-fbZAE88JAUERl93MhlZxKFkCMiY,21002
32
+ kiln_ai/adapters/eval/test_g_eval_data.py,sha256=8caiZfLWnXVX8alrBPrH7L7gqqSS9vO7u6PzcHurQcA,27769
33
+ kiln_ai/adapters/fine_tune/__init__.py,sha256=DxdTR60chwgck1aEoVYWyfWi6Ed2ZkdJj0lar-SEAj4,257
34
+ kiln_ai/adapters/fine_tune/base_finetune.py,sha256=OUmcfdjKlx32ERlGEMKHoeW3FZsvVoMxvM4nVrjIGTE,5838
35
+ kiln_ai/adapters/fine_tune/dataset_formatter.py,sha256=oWYXOdkRaVZR_mIeox2gLf5_LZJ79hHV9PThk82zHtU,12928
36
+ kiln_ai/adapters/fine_tune/finetune_registry.py,sha256=9RJLjviSoN3dQnKJE9Ss7df7dtdJgbuShB8IUcI-q9k,726
37
+ kiln_ai/adapters/fine_tune/fireworks_finetune.py,sha256=ze0QxghpHAqwO9nXOTkDEC9irmqduX5bjIhZDU0DCZQ,20101
38
+ kiln_ai/adapters/fine_tune/openai_finetune.py,sha256=Dz9E_0BWfrIkvv8ArZe-RKPwbIKPZ3v8rfbc3JELyTY,8571
39
+ kiln_ai/adapters/fine_tune/test_base_finetune.py,sha256=atZI54SC6mVili5dEIuqwv2groLCZWYKSMzDZYjbUxQ,10705
40
+ kiln_ai/adapters/fine_tune/test_dataset_formatter.py,sha256=OmF3jW78FBHvDes6lusY8hWgLLHz5XBgHdDVWv2ccAI,29898
41
+ kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py,sha256=dzOOFIzgRUGGNlfA7-L1URI4qwMVHg1fuUc8RBqrDNI,36765
42
+ kiln_ai/adapters/fine_tune/test_openai_finetune.py,sha256=NtMFo3LZjpAsNHFvIMHsFmWRrHrOKjniRvrZTx4nXhg,20001
43
+ kiln_ai/adapters/fine_tune/test_together_finetune.py,sha256=ZwYmT4pPkhU-45Q-5fs4WL16dQ4OyvI7KXPevsrA24E,18122
44
+ kiln_ai/adapters/fine_tune/test_vertex_finetune.py,sha256=DBQbNUgpqDEvjjk7eHwBvDXPCEkhf4yfUw8QPi5fFPE,19178
45
+ kiln_ai/adapters/fine_tune/together_finetune.py,sha256=KpJBZt0NBNUNLsVEYYC4Dzob1jnlOFQnVI8Q-4kY9d8,14766
46
+ kiln_ai/adapters/fine_tune/vertex_finetune.py,sha256=Ik6Ov711-oruJnMHpVZTPimWJY2W_JnfdKIdR2djGrc,8545
47
+ kiln_ai/adapters/model_adapters/__init__.py,sha256=m5GRtOHwVVvp_XDOss8c1X3NFf1wQQlC2eBgI4tXQhM,212
48
+ kiln_ai/adapters/model_adapters/base_adapter.py,sha256=mGF3ozrFp14_elYXghgzZiWrEKo6EGY9lSdo3aRaJ-A,13101
49
+ kiln_ai/adapters/model_adapters/litellm_adapter.py,sha256=rAhkIrUdqT_i54vpU4xa-s_vetN5X9BPo1sgCafuWD8,17413
50
+ kiln_ai/adapters/model_adapters/litellm_config.py,sha256=zOQEkYKeoQ0FIbxTkyyoaGtaQiB9eYK3IuyUgqSwzLE,485
51
+ kiln_ai/adapters/model_adapters/test_base_adapter.py,sha256=_32Jz4AX7RYwj2z8HDyfyNrGdsvp1WtjOIyKgg-MXAA,15968
52
+ kiln_ai/adapters/model_adapters/test_litellm_adapter.py,sha256=lGbr_8zRl7KQEX3WIEnlR9K_-lp-aqbcqGqIcygGyvE,19049
53
+ kiln_ai/adapters/model_adapters/test_saving_adapter_results.py,sha256=xkd_b9GS-6ybst97FsS_XrvyTXMlEufYF1Q8CGpE1V8,9697
54
+ kiln_ai/adapters/model_adapters/test_structured_output.py,sha256=DQ_FBXOx6WdngqzkFY8hx-DWmqF9Y4JiYdL9RCLYDUo,13476
55
+ kiln_ai/adapters/parsers/__init__.py,sha256=TGJS_8JhjUwg5Bnq4cDmwt5eIRo4vowmcL2A72L1Hzk,202
56
+ kiln_ai/adapters/parsers/base_parser.py,sha256=AE8UYCttmVXbilpICotnDdFYTFhGMiBJHrxIUgPTAWM,280
57
+ kiln_ai/adapters/parsers/json_parser.py,sha256=IszrBrhIFrrVr76UZsuejkBdqpZG27mU72264HVgVzE,1274
58
+ kiln_ai/adapters/parsers/parser_registry.py,sha256=BbY12zbXj-FgdJ8WqEcBLg26WXoDk2fmriLRo2LYCSk,710
59
+ kiln_ai/adapters/parsers/r1_parser.py,sha256=fbO-Ab2CVj6K756GaLGwNL6kkNqBBq7dWHtenL1oiNI,3559
60
+ kiln_ai/adapters/parsers/request_formatters.py,sha256=NdZO8zcZy9tkea8JaD5c_OeoeVjoYYTG0GjwF812STw,1124
61
+ kiln_ai/adapters/parsers/test_json_parser.py,sha256=9kdWe_vRC5wjP8A1Ym6Zu6enDIz4ARCNiRpcZr7_3ak,1971
62
+ kiln_ai/adapters/parsers/test_parser_registry.py,sha256=FFJQgaKVu67yK4W7w_b26tuEYPPYGppfttJ0y5ctWUo,1041
63
+ kiln_ai/adapters/parsers/test_r1_parser.py,sha256=XbB0saThkcgOvoYNq3TxDgILUu5Me38yeKoQonqaN-g,6296
64
+ kiln_ai/adapters/parsers/test_request_formatters.py,sha256=t3FlKT_Tte2u8zXJTMl8VaE8IrSzumuBysahbGesrbU,2090
65
+ kiln_ai/adapters/repair/__init__.py,sha256=dOO9MEpEhjiwzDVFg3MNfA2bKMPlax9iekDatpTkX8E,217
66
+ kiln_ai/adapters/repair/repair_task.py,sha256=yKiewxL-JSuc7Sw9kI64wM2cBeeYnfTa_GAUvMHkeg8,3281
67
+ kiln_ai/adapters/repair/test_repair_task.py,sha256=fMlad29edA2tTt4t7cv6cXVWhuWOJ9x1Mpb3wJFTD1I,8603
68
+ kiln_ai/datamodel/__init__.py,sha256=eHDUB9ltKmnsx0TAX310o5OMLnZbSznMJLFRnNk6HlA,1927
69
+ kiln_ai/datamodel/basemodel.py,sha256=Qsr3Kxq5B5rfPZxQFrfOMTGwInnfYy5srd8nK0nwOs0,22204
70
+ kiln_ai/datamodel/datamodel_enums.py,sha256=7BpyiXEtNsz43el7zTAeaaozDVUZUJCFKHsoaXoTomQ,3796
71
+ kiln_ai/datamodel/dataset_filters.py,sha256=zXS5QeCqWa1vAxs3Pyy5FJtySx9yYJzxsCMANd23-3c,5502
72
+ kiln_ai/datamodel/dataset_split.py,sha256=GDF3Pj3NLG42X8kjR606R_aN14rpqx8Ig56FG8NCn9k,5790
73
+ kiln_ai/datamodel/eval.py,sha256=S4Ae1hSD_xaig-yVsIxBNiiUD4p4DcLQ9h-kEaVoJVk,15800
74
+ kiln_ai/datamodel/finetune.py,sha256=i3XB0whLz-8dPrd3NyezYuLDfnp70dUJaiq99h1Vonk,4783
75
+ kiln_ai/datamodel/json_schema.py,sha256=o50wSp8frRXjT-NZjml4-Is7LNoF7DQP4g3AaaYzBfI,3379
76
+ kiln_ai/datamodel/model_cache.py,sha256=9X4aAigbkFdytckgw8InCMh86uBna0ME_1HJSeMPEn0,4495
77
+ kiln_ai/datamodel/project.py,sha256=uVH2_3TDFtsG_tpts81A-zbd9uPDFxAwMCKZt_km3IE,727
78
+ kiln_ai/datamodel/prompt.py,sha256=70JPYHfgyX18cHW_DXoMzIOA28Jbaz6gyabElmpycyc,1161
79
+ kiln_ai/datamodel/prompt_id.py,sha256=S4Wcrt05USN-JrO8BeDlNYGrcoTOVocR3iUxBbgoq1c,2566
80
+ kiln_ai/datamodel/registry.py,sha256=XwGFXJFKZtOpR1Z9ven6SftggfADdZRm8TFxCEVtfUQ,957
81
+ kiln_ai/datamodel/strict_mode.py,sha256=sm4Xka8mnJHCShtbh6MMU5dDQv-cLj8lHgHkmFKpsl0,849
82
+ kiln_ai/datamodel/task.py,sha256=ZjTfOxk4Op83CAVp1KFsuHeaeqBNAvdVS-QQCKCNIug,9253
83
+ kiln_ai/datamodel/task_output.py,sha256=E0Tfmc1Pa90TpYST32E4zay3emkAvyMdch-OiF5PwqA,13198
84
+ kiln_ai/datamodel/task_run.py,sha256=rTPqmN0VOiw7P8Fy0cVQXzV7h0sO2-SWqhGsYN0owLU,8759
85
+ kiln_ai/datamodel/test_basemodel.py,sha256=1__3dVyxCVMQH7jhBYYEYgaCgtc08faSIlVOLfPdMZ0,18021
86
+ kiln_ai/datamodel/test_dataset_filters.py,sha256=TFHQZLB0rJxnxsFjD546eXVFxZNAJi0sPZ8j24wYd1w,5322
87
+ kiln_ai/datamodel/test_dataset_split.py,sha256=mXB8udml_6U4BXR0xAGHsFINnhdcTDB1qhuZbQemk-w,11055
88
+ kiln_ai/datamodel/test_datasource.py,sha256=H4Kc-Im9eM7WnADWZXdoiOIrOl05RtkyuhTCKiRimyU,3905
89
+ kiln_ai/datamodel/test_eval_model.py,sha256=plq3r7X1Rm0HKIN0qLud_SmsFNdnTkMka-GDaXvQR_8,23977
90
+ kiln_ai/datamodel/test_example_models.py,sha256=dwLAAOXLvdKupE5Q1m6VNcHtfdhpi2qWtoEbC0nfJg8,26156
91
+ kiln_ai/datamodel/test_json_schema.py,sha256=R0Cfc9WbieMslgvYsj2HFx8RHIq2fF9NcT5jH-kEqh4,4793
92
+ kiln_ai/datamodel/test_model_cache.py,sha256=Fy-ucYNzS5JEG-8SFY4nVHA8iRbXXxai20f8_oGl97o,8184
93
+ kiln_ai/datamodel/test_model_perf.py,sha256=9_76f__5XtZSHSjuaoiHRj2t-z3OWn-sSA4S9kH1jpY,3306
94
+ kiln_ai/datamodel/test_models.py,sha256=wENuBiZ2Y-N_puFkitNZ1T-lBpVgQWotfhGug6k4AMY,21674
95
+ kiln_ai/datamodel/test_nested_save.py,sha256=xciCddqvPyKyoyjC5Lx_3Kh1t4LJv1xYRAPazR3SRcs,5588
96
+ kiln_ai/datamodel/test_output_rating.py,sha256=zvPIp2shAgCs2RQBgwYoL09fRA3krHvgAqUa91RlWR0,15125
97
+ kiln_ai/datamodel/test_prompt_id.py,sha256=ihyXVPQi0dSLGnBM7rTXRnVaiWXhh7HJmSy4nZZKmso,4225
98
+ kiln_ai/datamodel/test_registry.py,sha256=PhS4anLi5Bf_023obuTlO5DALhtPB8WIc_bX12Yg6Po,2705
99
+ kiln_ai/datamodel/test_task.py,sha256=X85YgGt7Y9kuv6-jE9kl5b8mPz3cgrEFqiZAZFWfpO0,11890
100
+ kiln_ai/utils/__init__.py,sha256=PTD0MwBCKAMIOGsTAwsFaJOusTJJoRFTfOGqRvCaU-E,142
101
+ kiln_ai/utils/async_job_runner.py,sha256=1gjoEq5yc2MOVjDo05O1wztguEuMC6l3haDZsltlvuw,3457
102
+ kiln_ai/utils/config.py,sha256=Pk9w7C985jinE9_SDJzgkG1CkPtIaitLZqia_dtjhwQ,8520
103
+ kiln_ai/utils/dataset_import.py,sha256=EqBBBopCEUy1JH4-EAsBETwGp4MFjzZGfUUBZ6FLfGY,9011
104
+ kiln_ai/utils/exhaustive_error.py,sha256=TkkRixIAR3CPEKHeAJzyv0mtxp6BxUBKMvobA3vzQug,262
105
+ kiln_ai/utils/formatting.py,sha256=VtB9oag0lOGv17dwT7OPX_3HzBfaU9GsLH-iLete0yM,97
106
+ kiln_ai/utils/logging.py,sha256=ixtv2Mm6-XWB2PVUPAWVRXnPJULvoTDrsp5NTAk3yp0,6815
107
+ kiln_ai/utils/name_generator.py,sha256=v26TgpCwQbhQFcZvzgjZvURinjrOyyFhxpsI6NQrHKc,1914
108
+ kiln_ai/utils/test_async_job_runner.py,sha256=8AwPfOlR09qzfhVm-djpkmuoyHxJJ19QEzs0WV5KFSQ,6813
109
+ kiln_ai/utils/test_config.py,sha256=8goGdVLOO1OiSPnuDLUHbEJWJ8hAjaAIDg0myhR2A00,9687
110
+ kiln_ai/utils/test_dataset_import.py,sha256=BEl38D95HQYpc7_jeB1N-ocOnKM1DLutp669cNrVOuE,25765
111
+ kiln_ai/utils/test_name_geneator.py,sha256=9-hSTBshyakqlPbFnNcggwLrL7lcPTitauBYHg9jFWI,1513
112
+ kiln_ai-0.18.0.dist-info/METADATA,sha256=-u5qNtlQ89DH86wdNxtEY11nuWJyR9q35tkl3_WxeI0,12263
113
+ kiln_ai-0.18.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
114
+ kiln_ai-0.18.0.dist-info/licenses/LICENSE.txt,sha256=_NA5pnTYgRRr4qH6lE3X-TuZJ8iRcMUi5ASoGr-lEx8,1209
115
+ kiln_ai-0.18.0.dist-info/RECORD,,