kiln-ai 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (42) hide show
  1. kiln_ai/adapters/adapter_registry.py +28 -0
  2. kiln_ai/adapters/data_gen/data_gen_task.py +2 -2
  3. kiln_ai/adapters/data_gen/test_data_gen_task.py +7 -3
  4. kiln_ai/adapters/eval/test_eval_runner.py +6 -12
  5. kiln_ai/adapters/eval/test_g_eval_data.py +1 -1
  6. kiln_ai/adapters/fine_tune/base_finetune.py +1 -0
  7. kiln_ai/adapters/fine_tune/fireworks_finetune.py +32 -20
  8. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +30 -21
  9. kiln_ai/adapters/ml_model_list.py +635 -83
  10. kiln_ai/adapters/model_adapters/base_adapter.py +11 -7
  11. kiln_ai/adapters/model_adapters/litellm_adapter.py +14 -1
  12. kiln_ai/adapters/model_adapters/test_base_adapter.py +1 -1
  13. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +22 -3
  14. kiln_ai/adapters/model_adapters/test_structured_output.py +10 -10
  15. kiln_ai/adapters/parsers/test_r1_parser.py +1 -1
  16. kiln_ai/adapters/provider_tools.py +20 -19
  17. kiln_ai/adapters/remote_config.py +57 -10
  18. kiln_ai/adapters/repair/repair_task.py +1 -1
  19. kiln_ai/adapters/test_adapter_registry.py +30 -2
  20. kiln_ai/adapters/test_ml_model_list.py +12 -0
  21. kiln_ai/adapters/test_provider_tools.py +18 -12
  22. kiln_ai/adapters/test_remote_config.py +372 -16
  23. kiln_ai/datamodel/basemodel.py +54 -28
  24. kiln_ai/datamodel/datamodel_enums.py +2 -0
  25. kiln_ai/datamodel/dataset_split.py +5 -3
  26. kiln_ai/datamodel/eval.py +3 -3
  27. kiln_ai/datamodel/finetune.py +2 -2
  28. kiln_ai/datamodel/project.py +3 -3
  29. kiln_ai/datamodel/prompt.py +2 -2
  30. kiln_ai/datamodel/prompt_id.py +4 -4
  31. kiln_ai/datamodel/task.py +6 -6
  32. kiln_ai/datamodel/task_output.py +1 -1
  33. kiln_ai/datamodel/test_basemodel.py +210 -18
  34. kiln_ai/datamodel/test_eval_model.py +6 -6
  35. kiln_ai/datamodel/test_model_perf.py +1 -1
  36. kiln_ai/datamodel/test_prompt_id.py +5 -1
  37. kiln_ai/datamodel/test_task.py +5 -0
  38. kiln_ai/utils/config.py +10 -0
  39. {kiln_ai-0.18.0.dist-info → kiln_ai-0.19.0.dist-info}/METADATA +32 -2
  40. {kiln_ai-0.18.0.dist-info → kiln_ai-0.19.0.dist-info}/RECORD +42 -42
  41. {kiln_ai-0.18.0.dist-info → kiln_ai-0.19.0.dist-info}/WHEEL +0 -0
  42. {kiln_ai-0.18.0.dist-info → kiln_ai-0.19.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  from pydantic import BaseModel, Field
2
2
 
3
- from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
3
+ from kiln_ai.datamodel.basemodel import FilenameString, KilnParentedModel
4
4
 
5
5
 
6
6
  class BasePrompt(BaseModel):
@@ -10,7 +10,7 @@ class BasePrompt(BaseModel):
10
10
  The "Prompt" model name is reserved for the custom prompts parented by a task.
11
11
  """
12
12
 
13
- name: str = NAME_FIELD
13
+ name: FilenameString = Field(description="The name of the prompt.")
14
14
  description: str | None = Field(
15
15
  default=None,
16
16
  description="A more detailed description of the prompt.",
@@ -60,11 +60,11 @@ def _check_prompt_id(id: str) -> str:
60
60
  return id
61
61
 
62
62
  if id.startswith("fine_tune_prompt::"):
63
- # check it had a fine_tune_id after the :: -- 'fine_tune_prompt::fine_tune_id'
64
- fine_tune_id = id[18:]
65
- if len(fine_tune_id) == 0:
63
+ # check it had a fine_tune_id after the :: -- 'fine_tune_prompt::[project_id]::[task_id]::fine_tune_id'
64
+ parts = id.split("::")
65
+ if len(parts) != 4 or len(parts[3]) == 0:
66
66
  raise ValueError(
67
- f"Invalid fine-tune prompt ID: {id}. Expected format: 'fine_tune_prompt::[fine_tune_id]'."
67
+ f"Invalid fine-tune prompt ID: {id}. Expected format: 'fine_tune_prompt::[project_id]::[task_id]::[fine_tune_id]'."
68
68
  )
69
69
  return id
70
70
 
kiln_ai/datamodel/task.py CHANGED
@@ -7,8 +7,8 @@ from kiln_ai.datamodel import Finetune
7
7
  from kiln_ai.datamodel.basemodel import (
8
8
  ID_FIELD,
9
9
  ID_TYPE,
10
- NAME_FIELD,
11
- SHORT_NAME_FIELD,
10
+ FilenameString,
11
+ FilenameStringShort,
12
12
  KilnParentedModel,
13
13
  KilnParentModel,
14
14
  )
@@ -38,7 +38,7 @@ class TaskRequirement(BaseModel):
38
38
  """
39
39
 
40
40
  id: ID_TYPE = ID_FIELD
41
- name: str = SHORT_NAME_FIELD
41
+ name: FilenameStringShort = Field(description="The name of the task requirement.")
42
42
  description: str | None = Field(default=None)
43
43
  instruction: str = Field(min_length=1)
44
44
  priority: Priority = Field(default=Priority.p2)
@@ -103,7 +103,7 @@ class TaskRunConfig(KilnParentedModel):
103
103
  A run config includes everything needed to run a task, except the input. Running the same RunConfig with the same input should make identical calls to the model (output may vary as models are non-deterministic).
104
104
  """
105
105
 
106
- name: str = NAME_FIELD
106
+ name: FilenameString = Field(description="The name of the task run config.")
107
107
  description: str | None = Field(
108
108
  default=None, description="The description of the task run config."
109
109
  )
@@ -189,7 +189,7 @@ class Task(
189
189
  a collection of task runs.
190
190
  """
191
191
 
192
- name: str = NAME_FIELD
192
+ name: FilenameString = Field(description="The name of the task.")
193
193
  description: str | None = Field(
194
194
  default=None,
195
195
  description="A description of the task for you and your team. Will not be used in prompts/training/validation.",
@@ -216,7 +216,7 @@ class Task(
216
216
  return None
217
217
  return schema_from_json_str(self.input_json_schema)
218
218
 
219
- # These wrappers help for typechecking. TODO P2: fix this in KilnParentModel
219
+ # These wrappers help for typechecking. We should fix this in KilnParentModel
220
220
  def runs(self, readonly: bool = False) -> list[TaskRun]:
221
221
  return super().runs(readonly=readonly) # type: ignore
222
222
 
@@ -307,7 +307,7 @@ class TaskOutput(KilnBaseModel):
307
307
  if task.output_json_schema is not None:
308
308
  try:
309
309
  output_parsed = json.loads(self.output)
310
- except json.JSONDecodeError as e:
310
+ except json.JSONDecodeError:
311
311
  raise ValueError("Output is not a valid JSON object")
312
312
 
313
313
  validate_schema_with_value_error(
@@ -1,5 +1,6 @@
1
1
  import datetime
2
2
  import json
3
+ import uuid
3
4
  from pathlib import Path
4
5
  from typing import Optional
5
6
  from unittest.mock import MagicMock, patch
@@ -12,6 +13,7 @@ from kiln_ai.datamodel import Task, TaskRun
12
13
  from kiln_ai.datamodel.basemodel import (
13
14
  KilnBaseModel,
14
15
  KilnParentedModel,
16
+ name_validator,
15
17
  string_to_valid_name,
16
18
  )
17
19
  from kiln_ai.datamodel.model_cache import ModelCache
@@ -328,28 +330,81 @@ def test_delete_no_path():
328
330
  model.delete()
329
331
 
330
332
 
331
- def test_string_to_valid_name():
332
- # Test basic valid strings remain unchanged
333
- assert string_to_valid_name("Hello World") == "Hello World"
334
- assert string_to_valid_name("Test-123") == "Test-123"
335
- assert string_to_valid_name("my_file_name") == "my_file_name"
333
+ @pytest.mark.parametrize(
334
+ "name,expected",
335
+ [
336
+ # Basic valid strings remain unchanged
337
+ ("Hello World", "Hello World"),
338
+ ("Test-123", "Test-123"),
339
+ ("my_file_name", "my_file_name"),
340
+ ("multiple!!!symbols", "multiple!!!symbols"),
341
+ # Emoji
342
+ ("Hello 👍", "Hello 👍"),
343
+ # Invalid characters are replaced
344
+ ("Hello@World!", "Hello@World!"),
345
+ ("File.name.txt", "File_name_txt"),
346
+ ("Special%%%Chars", "Special_Chars"),
347
+ ("Special#$%Chars", "Special#$_Chars"),
348
+ # Consecutive invalid characters are replaced
349
+ ("Special%%%Chars", "Special_Chars"),
350
+ ("path/to/file", "path_to_file"),
351
+ # Leading/trailing special characters are removed
352
+ ("__test__", "test"),
353
+ ("...test...", "test"),
354
+ # Whitespace is replaced
355
+ ("", ""),
356
+ (" ", ""),
357
+ ("Hello World", "Hello World"),
358
+ # Unicode characters are replaced
359
+ ("你好", "你好"),
360
+ ("你好_世界", "你好_世界"),
361
+ ("你好_世界_你好", "你好_世界_你好"),
362
+ # Newlines, tabs, and other control characters are replaced
363
+ ("Hello\nworld", "Hello_world"),
364
+ ("Hello\tworld", "Hello_world"),
365
+ ("Hello\rworld", "Hello_world"),
366
+ ("Hello\fworld", "Hello_world"),
367
+ ("Hello\bworld", "Hello_world"),
368
+ ("Hello\vworld", "Hello_world"),
369
+ ("Hello\0world", "Hello_world"),
370
+ ("Hello\x00world", "Hello_world"),
371
+ ],
372
+ )
373
+ def test_string_to_valid_name(tmp_path, name, expected):
374
+ assert string_to_valid_name(name) == expected
336
375
 
337
- # Test invalid characters are replaced
338
- assert string_to_valid_name("Hello@World!") == "Hello_World"
339
- assert string_to_valid_name("File.name.txt") == "File_name_txt"
340
- assert string_to_valid_name("Special#$%Chars") == "Special_Chars"
376
+ # check we can create a folder with the valid name
377
+ dir_path = tmp_path / str(uuid.uuid4()) / expected
378
+ dir_path.mkdir(parents=True)
341
379
 
342
- # Test consecutive invalid characters
343
- assert string_to_valid_name("multiple!!!symbols") == "multiple_symbols"
344
- assert string_to_valid_name("path/to/file") == "path_to_file"
345
380
 
346
- # Test leading/trailing special characters
347
- assert string_to_valid_name("__test__") == "test"
348
- assert string_to_valid_name("...test...") == "test"
381
+ @pytest.mark.parametrize(
382
+ "name,min_length,max_length,should_pass",
383
+ [
384
+ # Valid cases
385
+ ("ValidName", 5, 20, True),
386
+ ("Short", 1, 10, True),
387
+ ("LongerValidName", 5, 20, True),
388
+ # None case (line 53)
389
+ (None, 5, 20, False),
390
+ # Too short cases (lines 57-59)
391
+ ("Hi", 5, 20, False),
392
+ ("", 1, 20, False),
393
+ ("a", 2, 20, False),
394
+ # Too long cases (lines 61-63)
395
+ ("ThisNameIsTooLong", 5, 10, False),
396
+ ("VeryVeryVeryLongName", 1, 15, False),
397
+ ],
398
+ )
399
+ def test_name_validator_error_conditions(name, min_length, max_length, should_pass):
400
+ validator = name_validator(min_length=min_length, max_length=max_length)
349
401
 
350
- # Test empty string and whitespace
351
- assert string_to_valid_name("") == ""
352
- assert string_to_valid_name(" ") == ""
402
+ if should_pass:
403
+ result = validator(name)
404
+ assert result == name
405
+ else:
406
+ with pytest.raises(ValueError):
407
+ validator(name)
353
408
 
354
409
 
355
410
  def test_load_from_file_with_cache(test_base_file, tmp_model_cache):
@@ -553,3 +608,140 @@ async def test_invoke_parsing_flow(adapter):
553
608
  match="Reasoning is required for this model, but no reasoning was returned.",
554
609
  ):
555
610
  await adapter.invoke("test input")
611
+
612
+
613
+ async def test_invoke_parsing_flow_basic_no_reasoning(adapter):
614
+ """Test for reasoning_optional_for_structured_output
615
+ when reasoning is not required.
616
+ This is a special case where we want to return the output as is.
617
+ """
618
+ # Mock dependencies
619
+ mock_provider = MagicMock()
620
+ mock_provider.parser = "test_parser"
621
+ mock_provider.formatter = None
622
+ mock_provider.reasoning_capable = False
623
+ mock_provider.reasoning_optional_for_structured_output = True
624
+
625
+ mock_parser = MagicMock()
626
+ mock_parser.parse_output.return_value = RunOutput(
627
+ output="parsed test output", intermediate_outputs={"key": "value"}
628
+ )
629
+
630
+ with (
631
+ patch.object(adapter, "model_provider", return_value=mock_provider),
632
+ patch(
633
+ "kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
634
+ return_value=mock_parser,
635
+ ),
636
+ patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
637
+ ):
638
+ # Disable autosaving for this test
639
+ mock_config.shared.return_value.autosave_runs = False
640
+ mock_config.shared.return_value.user_id = "test_user_id"
641
+
642
+ # Execute
643
+ result = await adapter.invoke("test input")
644
+
645
+ # Verify parsing occurred
646
+ mock_parser.parse_output.assert_called_once()
647
+ parsed_args = mock_parser.parse_output.call_args[1]
648
+ assert isinstance(parsed_args["original_output"], RunOutput)
649
+ assert parsed_args["original_output"].output == "test output"
650
+
651
+ # Verify result contains parsed output
652
+ assert isinstance(result, TaskRun)
653
+ assert result.output.output == "parsed test output"
654
+ assert result.intermediate_outputs == {"key": "value"}
655
+ assert result.input == "test input"
656
+
657
+
658
+ async def test_invoke_parsing_flow_no_reasoning_with_structured_output(adapter):
659
+ """Test for reasoning_optional_for_structured_output
660
+ when reasoning is required but not provided, with structured output enabled.
661
+ This is a special case where we don't want to error, but we want to return the output as is.
662
+ """
663
+ # Mock dependencies
664
+ mock_provider = MagicMock()
665
+ mock_provider.parser = "test_parser"
666
+ mock_provider.formatter = None
667
+ mock_provider.reasoning_capable = True
668
+ mock_provider.reasoning_optional_for_structured_output = True
669
+
670
+ mock_parser = MagicMock()
671
+ mock_parser.parse_output.return_value = RunOutput(
672
+ output="parsed test output", intermediate_outputs={"key": "value"}
673
+ )
674
+
675
+ with (
676
+ patch.object(adapter, "model_provider", return_value=mock_provider),
677
+ patch(
678
+ "kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
679
+ return_value=mock_parser,
680
+ ),
681
+ patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
682
+ patch.object(adapter, "has_structured_output", return_value=True),
683
+ ):
684
+ # Disable autosaving for this test
685
+ mock_config.shared.return_value.autosave_runs = False
686
+ mock_config.shared.return_value.user_id = "test_user_id"
687
+
688
+ # Execute
689
+ result = await adapter.invoke("test input")
690
+
691
+ # Verify parsing occurred
692
+ mock_parser.parse_output.assert_called_once()
693
+ parsed_args = mock_parser.parse_output.call_args[1]
694
+ assert isinstance(parsed_args["original_output"], RunOutput)
695
+ assert parsed_args["original_output"].output == "test output"
696
+
697
+ # Verify result contains parsed output
698
+ assert isinstance(result, TaskRun)
699
+ assert result.output.output == "parsed test output"
700
+ assert result.intermediate_outputs == {"key": "value"}
701
+ assert result.input == "test input"
702
+
703
+
704
+ async def test_invoke_parsing_flow_with_reasoning_and_structured_output(adapter):
705
+ """Test for reasoning_optional_for_structured_output
706
+ when reasoning is provided with structured output enabled.
707
+ This is a special case where we want to return the output as is.
708
+ """
709
+ # Mock dependencies
710
+ mock_provider = MagicMock()
711
+ mock_provider.parser = "test_parser"
712
+ mock_provider.formatter = None
713
+ mock_provider.reasoning_capable = True
714
+ mock_provider.reasoning_optional_for_structured_output = True
715
+
716
+ mock_parser = MagicMock()
717
+ mock_parser.parse_output.return_value = RunOutput(
718
+ output="parsed test output", intermediate_outputs={"reasoning": "value"}
719
+ )
720
+
721
+ with (
722
+ patch.object(adapter, "model_provider", return_value=mock_provider),
723
+ patch(
724
+ "kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
725
+ return_value=mock_parser,
726
+ ),
727
+ patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
728
+ patch.object(adapter, "has_structured_output", return_value=True),
729
+ ):
730
+ # Disable autosaving for this test
731
+ mock_config.shared.return_value.autosave_runs = False
732
+ mock_config.shared.return_value.user_id = "test_user_id"
733
+
734
+ # Execute
735
+ result = await adapter.invoke("test input")
736
+
737
+ # Verify parsing occurred
738
+ mock_parser.parse_output.assert_called_once()
739
+ parsed_args = mock_parser.parse_output.call_args[1]
740
+ assert isinstance(parsed_args["original_output"], RunOutput)
741
+ assert parsed_args["original_output"].output == "test output"
742
+
743
+ # Verify result contains parsed output
744
+ assert isinstance(result, TaskRun)
745
+ assert result.output.output == "parsed test output"
746
+ assert result.intermediate_outputs == {"reasoning": "value"}
747
+ assert result.input == "test input"
@@ -517,13 +517,13 @@ def test_eval_run_score_keys_must_match(valid_eval_config, valid_eval_run_data):
517
517
  valid_eval_config.parent = eval
518
518
 
519
519
  # Correct
520
- run = EvalRun(
520
+ EvalRun(
521
521
  parent=valid_eval_config,
522
522
  **{**valid_eval_run_data, "scores": {"accuracy": 4.5, "critical": 1.0}},
523
523
  )
524
524
 
525
525
  # Correct but wrong order still okay
526
- run = EvalRun(
526
+ EvalRun(
527
527
  parent=valid_eval_config,
528
528
  **{**valid_eval_run_data, "scores": {"critical": 1.0, "accuracy": 4.5}},
529
529
  )
@@ -533,7 +533,7 @@ def test_eval_run_score_keys_must_match(valid_eval_config, valid_eval_run_data):
533
533
  ValueError,
534
534
  match="The scores produced by the evaluator must match the scores expected by the eval",
535
535
  ):
536
- run = EvalRun(
536
+ EvalRun(
537
537
  parent=valid_eval_config,
538
538
  **{**valid_eval_run_data, "scores": {"accuracy": 4.5}},
539
539
  )
@@ -543,7 +543,7 @@ def test_eval_run_score_keys_must_match(valid_eval_config, valid_eval_run_data):
543
543
  ValueError,
544
544
  match="The scores produced by the evaluator must match the scores expected by the eval",
545
545
  ):
546
- run = EvalRun(
546
+ EvalRun(
547
547
  parent=valid_eval_config,
548
548
  **{
549
549
  **valid_eval_run_data,
@@ -556,7 +556,7 @@ def test_eval_run_score_keys_must_match(valid_eval_config, valid_eval_run_data):
556
556
  ValueError,
557
557
  match="The scores produced by the evaluator must match the scores expected by the eval",
558
558
  ):
559
- run = EvalRun(
559
+ EvalRun(
560
560
  parent=valid_eval_config,
561
561
  **{**valid_eval_run_data, "scores": {"accuracy": 4.5, "wrong": 1.0}},
562
562
  )
@@ -566,7 +566,7 @@ def test_eval_run_custom_scores_not_allowed(valid_eval_config, valid_eval_run_da
566
566
  with pytest.raises(
567
567
  ValueError, match="Custom scores are not supported in evaluators"
568
568
  ):
569
- eval = Eval(
569
+ Eval(
570
570
  name="Test Eval",
571
571
  eval_set_filter_id="tag::tag1",
572
572
  eval_configs_filter_id="tag::tag2",
@@ -121,6 +121,6 @@ def test_benchmark_load_from_file(benchmark, task_run):
121
121
 
122
122
  # I get 8k ops per second on my MBP. Lower value here for CI and parallel testing.
123
123
  # Prior to optimization was 290 ops per second.
124
- print(f"Ops per second: {ops_per_second:.6f}")
124
+ # sys.stdout.write(f"Ops per second: {ops_per_second:.6f}")
125
125
  if ops_per_second < 500:
126
126
  pytest.fail(f"Ops per second: {ops_per_second:.6f}, expected more than 1k ops")
@@ -29,7 +29,7 @@ def test_valid_saved_prompt_id():
29
29
 
30
30
  def test_valid_fine_tune_prompt_id():
31
31
  """Test that valid fine-tune prompt IDs are accepted"""
32
- valid_id = "fine_tune_prompt::ft_123456"
32
+ valid_id = "fine_tune_prompt::project_123::task_456::ft_123456"
33
33
  model = ModelTester(prompt_id=valid_id)
34
34
  assert model.prompt_id == valid_id
35
35
 
@@ -53,6 +53,10 @@ def test_invalid_saved_prompt_id_format(invalid_id):
53
53
  [
54
54
  ("fine_tune_prompt::", "Invalid fine-tune prompt ID: fine_tune_prompt::"),
55
55
  ("fine_tune_prompt", "Invalid prompt ID: fine_tune_prompt"),
56
+ (
57
+ "fine_tune_prompt::ft_123456",
58
+ "Invalid fine-tune prompt ID: fine_tune_prompt::ft_123456",
59
+ ),
56
60
  ],
57
61
  )
58
62
  def test_invalid_fine_tune_prompt_id_format(invalid_id, expected_error):
@@ -323,3 +323,8 @@ def test_run_config_upgrade_old_entries():
323
323
  assert parsed.name == "test name"
324
324
  assert parsed.created_by == "scosman"
325
325
  assert parsed.run_config_properties.structured_output_mode == "unknown"
326
+
327
+
328
+ def test_task_name_unicode_name():
329
+ task = Task(name="你好", instruction="Do something")
330
+ assert task.name == "你好"
kiln_ai/utils/config.py CHANGED
@@ -124,6 +124,11 @@ class Config:
124
124
  env_var="WANDB_API_KEY",
125
125
  sensitive=True,
126
126
  ),
127
+ "siliconflow_cn_api_key": ConfigProperty(
128
+ str,
129
+ env_var="SILICONFLOW_CN_API_KEY",
130
+ sensitive=True,
131
+ ),
127
132
  "wandb_base_url": ConfigProperty(
128
133
  str,
129
134
  env_var="WANDB_BASE_URL",
@@ -137,6 +142,11 @@ class Config:
137
142
  default_lambda=lambda: [],
138
143
  sensitive_keys=["api_key"],
139
144
  ),
145
+ "cerebras_api_key": ConfigProperty(
146
+ str,
147
+ env_var="CEREBRAS_API_KEY",
148
+ sensitive=True,
149
+ ),
140
150
  }
141
151
  self._lock = threading.Lock()
142
152
  self._settings = self.load_settings()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.18.0
3
+ Version: 0.19.0
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://getkiln.ai
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -65,6 +65,7 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
65
65
 
66
66
  ## Table of Contents
67
67
 
68
+ - [Connecting AI Providers](#connecting-ai-providers-openai-openrouter-ollama-etc)
68
69
  - [Using the Kiln Data Model](#using-the-kiln-data-model)
69
70
  - [Understanding the Kiln Data Model](#understanding-the-kiln-data-model)
70
71
  - [Datamodel Overview](#datamodel-overview)
@@ -73,6 +74,7 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
73
74
  - [Using your Kiln Dataset in a Notebook or Project](#using-your-kiln-dataset-in-a-notebook-or-project)
74
75
  - [Using Kiln Dataset in Pandas](#using-kiln-dataset-in-pandas)
75
76
  - [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
77
+ - [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
76
78
  - [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
77
79
  - [Full API Reference](#full-api-reference)
78
80
 
@@ -82,6 +84,12 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
82
84
  pip install kiln-ai
83
85
  ```
84
86
 
87
+ ## Connecting AI Providers (OpenAI, OpenRouter, Ollama, etc)
88
+
89
+ The easiest way to connect AI providers is to use the Kiln app UI. Once connected in the UI, credentials will be stored to `~/.kiln_ai/settings.yml`, which will be available to the library.
90
+
91
+ For configuring credentials from code or connecting custom servers/model, see [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code).
92
+
85
93
  ## Using the Kiln Data Model
86
94
 
87
95
  ### Understanding the Kiln Data Model
@@ -179,7 +187,10 @@ item = kiln_ai.datamodel.TaskRun(
179
187
  type=kiln_ai.datamodel.DataSourceType.human,
180
188
  properties={"created_by": "Jane Doe"},
181
189
  ),
182
- rating=kiln_ai.datamodel.TaskOutputRating(score=5,type="five_star"),
190
+ rating=kiln_ai.datamodel.TaskOutputRating(
191
+ value=5,
192
+ type=kiln_ai.datamodel.datamodel_enums.five_star,
193
+ ),
183
194
  ),
184
195
  )
185
196
  item.save_to_file()
@@ -270,6 +281,25 @@ for run in task.runs():
270
281
 
271
282
  ```
272
283
 
284
+ ## Tagging Task Runs Programmatically
285
+
286
+ You can also tag your Kiln Task runs programmatically:
287
+
288
+ ```py
289
+ # Load your Kiln Task from disk
290
+ task_path = "/Users/youruser/Kiln Projects/test project/tasks/632780983478 - Joke Generator/task.kiln"
291
+ task = kiln_ai.datamodel.Task.load_from_file(task_path)
292
+
293
+ for run in task.runs():
294
+ # Parse the task output from JSON
295
+ output = json.loads(run.output.output)
296
+
297
+ # Add a tag if the punchline is unusually short
298
+ if len(output["punchline"]) < 100:
299
+ run.tags.append("very_short")
300
+ run.save_to_file() # Persist the updated tags
301
+ ```
302
+
273
303
  ### Adding Custom Model or AI Provider from Code
274
304
 
275
305
  You can add additional AI models and providers to Kiln.