kiln-ai 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (63) hide show
  1. kiln_ai/adapters/adapter_registry.py +12 -13
  2. kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
  3. kiln_ai/adapters/eval/base_eval.py +164 -0
  4. kiln_ai/adapters/eval/eval_runner.py +267 -0
  5. kiln_ai/adapters/eval/g_eval.py +367 -0
  6. kiln_ai/adapters/eval/registry.py +16 -0
  7. kiln_ai/adapters/eval/test_base_eval.py +324 -0
  8. kiln_ai/adapters/eval/test_eval_runner.py +640 -0
  9. kiln_ai/adapters/eval/test_g_eval.py +497 -0
  10. kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
  11. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
  12. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
  13. kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
  14. kiln_ai/adapters/ml_model_list.py +141 -29
  15. kiln_ai/adapters/model_adapters/base_adapter.py +50 -35
  16. kiln_ai/adapters/model_adapters/langchain_adapters.py +27 -20
  17. kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -1
  18. kiln_ai/adapters/model_adapters/openai_model_adapter.py +93 -50
  19. kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
  20. kiln_ai/adapters/model_adapters/test_langchain_adapter.py +7 -14
  21. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +55 -64
  22. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
  23. kiln_ai/adapters/model_adapters/test_structured_output.py +36 -30
  24. kiln_ai/adapters/ollama_tools.py +0 -1
  25. kiln_ai/adapters/prompt_builders.py +80 -42
  26. kiln_ai/adapters/repair/repair_task.py +9 -21
  27. kiln_ai/adapters/repair/test_repair_task.py +3 -3
  28. kiln_ai/adapters/run_output.py +3 -0
  29. kiln_ai/adapters/test_adapter_registry.py +10 -10
  30. kiln_ai/adapters/test_generate_docs.py +6 -6
  31. kiln_ai/adapters/test_ollama_tools.py +0 -1
  32. kiln_ai/adapters/test_prompt_adaptors.py +17 -14
  33. kiln_ai/adapters/test_prompt_builders.py +91 -31
  34. kiln_ai/datamodel/__init__.py +50 -952
  35. kiln_ai/datamodel/datamodel_enums.py +58 -0
  36. kiln_ai/datamodel/dataset_filters.py +114 -0
  37. kiln_ai/datamodel/dataset_split.py +170 -0
  38. kiln_ai/datamodel/eval.py +298 -0
  39. kiln_ai/datamodel/finetune.py +105 -0
  40. kiln_ai/datamodel/json_schema.py +6 -0
  41. kiln_ai/datamodel/project.py +23 -0
  42. kiln_ai/datamodel/prompt.py +37 -0
  43. kiln_ai/datamodel/prompt_id.py +83 -0
  44. kiln_ai/datamodel/strict_mode.py +24 -0
  45. kiln_ai/datamodel/task.py +181 -0
  46. kiln_ai/datamodel/task_output.py +321 -0
  47. kiln_ai/datamodel/task_run.py +164 -0
  48. kiln_ai/datamodel/test_basemodel.py +10 -11
  49. kiln_ai/datamodel/test_dataset_filters.py +71 -0
  50. kiln_ai/datamodel/test_dataset_split.py +32 -8
  51. kiln_ai/datamodel/test_datasource.py +3 -2
  52. kiln_ai/datamodel/test_eval_model.py +635 -0
  53. kiln_ai/datamodel/test_example_models.py +9 -13
  54. kiln_ai/datamodel/test_json_schema.py +23 -0
  55. kiln_ai/datamodel/test_models.py +2 -2
  56. kiln_ai/datamodel/test_prompt_id.py +129 -0
  57. kiln_ai/datamodel/test_task.py +159 -0
  58. kiln_ai/utils/config.py +6 -1
  59. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/METADATA +37 -1
  60. kiln_ai-0.12.0.dist-info/RECORD +100 -0
  61. kiln_ai-0.11.1.dist-info/RECORD +0 -76
  62. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/WHEEL +0 -0
  63. {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,8 +1,9 @@
1
1
  import json
2
+ import logging
2
3
 
3
4
  import pytest
4
5
 
5
- from kiln_ai.adapters.model_adapters.base_adapter import AdapterInfo, BaseAdapter
6
+ from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
6
7
  from kiln_ai.adapters.model_adapters.test_structured_output import (
7
8
  build_structured_output_test_task,
8
9
  )
@@ -16,8 +17,9 @@ from kiln_ai.adapters.prompt_builders import (
16
17
  SavedPromptBuilder,
17
18
  SimpleChainOfThoughtPromptBuilder,
18
19
  SimplePromptBuilder,
20
+ TaskRunConfigPromptBuilder,
19
21
  chain_of_thought_prompt,
20
- prompt_builder_from_ui_name,
22
+ prompt_builder_from_id,
21
23
  )
22
24
  from kiln_ai.adapters.test_prompt_adaptors import build_test_task
23
25
  from kiln_ai.datamodel import (
@@ -32,6 +34,9 @@ from kiln_ai.datamodel import (
32
34
  TaskOutputRating,
33
35
  TaskRun,
34
36
  )
37
+ from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
38
+
39
+ logger = logging.getLogger(__name__)
35
40
 
36
41
 
37
42
  def test_simple_prompt_builder(tmp_path):
@@ -57,12 +62,8 @@ class MockAdapter(BaseAdapter):
57
62
  def _run(self, input: str) -> str:
58
63
  return "mock response"
59
64
 
60
- def adapter_info(self) -> AdapterInfo:
61
- return AdapterInfo(
62
- adapter_name="mock_adapter",
63
- model_name="mock_model",
64
- model_provider="mock_provider",
65
- )
65
+ def adapter_name(self) -> str:
66
+ return "mock_adapter"
66
67
 
67
68
 
68
69
  def test_simple_prompt_builder_structured_output(tmp_path):
@@ -269,7 +270,6 @@ def test_few_shot_prompt_builder(tmp_path):
269
270
  rating=TaskOutputRating(value=4 + (i % 2), reason="Good joke"),
270
271
  ),
271
272
  )
272
- print("RATING", "Joke Initial Output ", i + 1, " - RATED:", 4 + (i % 2), "\n")
273
273
  if i < 2:
274
274
  run = run.model_copy(
275
275
  update={
@@ -290,7 +290,7 @@ def test_few_shot_prompt_builder(tmp_path):
290
290
  prompt = prompt_builder.build_prompt(include_json_instructions=False)
291
291
  assert prompt.count("## Example") == 4
292
292
 
293
- print("PROMPT", prompt)
293
+ logger.info("PROMPT: %s", prompt)
294
294
  # Verify the order of examples (2 repaired, then 2 highest-rated)
295
295
  assert "Repaired Joke 1" in prompt
296
296
  assert "Repaired Joke 2" in prompt
@@ -314,54 +314,53 @@ def check_example_outputs(task: Task, count: int):
314
314
  assert f"## Example {count}" in prompt
315
315
 
316
316
 
317
- def test_prompt_builder_name():
318
- assert SimplePromptBuilder.prompt_builder_name() == "simple_prompt_builder"
319
- assert MultiShotPromptBuilder.prompt_builder_name() == "multi_shot_prompt_builder"
320
- assert RepairsPromptBuilder.prompt_builder_name() == "repairs_prompt_builder"
321
-
322
-
323
- def test_prompt_builder_from_ui_name(task_with_examples):
317
+ def test_prompt_builder_from_id(task_with_examples):
324
318
  task = task_with_examples
325
- assert isinstance(prompt_builder_from_ui_name("basic", task), SimplePromptBuilder)
326
319
  assert isinstance(
327
- prompt_builder_from_ui_name("few_shot", task), FewShotPromptBuilder
320
+ prompt_builder_from_id("simple_prompt_builder", task), SimplePromptBuilder
328
321
  )
329
322
  assert isinstance(
330
- prompt_builder_from_ui_name("many_shot", task), MultiShotPromptBuilder
323
+ prompt_builder_from_id("few_shot_prompt_builder", task),
324
+ FewShotPromptBuilder,
331
325
  )
332
326
  assert isinstance(
333
- prompt_builder_from_ui_name("repairs", task), RepairsPromptBuilder
327
+ prompt_builder_from_id("multi_shot_prompt_builder", task),
328
+ MultiShotPromptBuilder,
334
329
  )
335
330
  assert isinstance(
336
- prompt_builder_from_ui_name("simple_chain_of_thought", task),
331
+ prompt_builder_from_id("repairs_prompt_builder", task),
332
+ RepairsPromptBuilder,
333
+ )
334
+ assert isinstance(
335
+ prompt_builder_from_id("simple_chain_of_thought_prompt_builder", task),
337
336
  SimpleChainOfThoughtPromptBuilder,
338
337
  )
339
338
  assert isinstance(
340
- prompt_builder_from_ui_name("few_shot_chain_of_thought", task),
339
+ prompt_builder_from_id("few_shot_chain_of_thought_prompt_builder", task),
341
340
  FewShotChainOfThoughtPromptBuilder,
342
341
  )
343
342
  assert isinstance(
344
- prompt_builder_from_ui_name("multi_shot_chain_of_thought", task),
343
+ prompt_builder_from_id("multi_shot_chain_of_thought_prompt_builder", task),
345
344
  MultiShotChainOfThoughtPromptBuilder,
346
345
  )
347
346
 
348
- with pytest.raises(ValueError, match="Unknown prompt builder: invalid_name"):
349
- prompt_builder_from_ui_name("invalid_name", task)
347
+ with pytest.raises(ValueError, match="Unknown prompt generator: invalid_name"):
348
+ prompt_builder_from_id("invalid_name", task)
350
349
 
351
350
  with pytest.raises(ValueError, match="Prompt ID not found: 123"):
352
- prompt_builder_from_ui_name("id::123", task)
351
+ prompt_builder_from_id("id::123", task)
353
352
 
354
353
  with pytest.raises(
355
354
  ValueError,
356
355
  match="Invalid fine-tune ID format. Expected 'project_id::task_id::fine_tune_id'",
357
356
  ):
358
- prompt_builder_from_ui_name("fine_tune_prompt::123", task)
357
+ prompt_builder_from_id("fine_tune_prompt::123", task)
359
358
 
360
359
  with pytest.raises(
361
360
  ValueError,
362
361
  match="Fine-tune ID not found",
363
362
  ):
364
- prompt_builder_from_ui_name("fine_tune_prompt::123::456::789", task)
363
+ prompt_builder_from_id("fine_tune_prompt::123::456::789", task)
365
364
 
366
365
  prompt = Prompt(
367
366
  name="test_prompt_name",
@@ -370,7 +369,7 @@ def test_prompt_builder_from_ui_name(task_with_examples):
370
369
  parent=task,
371
370
  )
372
371
  prompt.save_to_file()
373
- pb = prompt_builder_from_ui_name("id::" + prompt.id, task)
372
+ pb = prompt_builder_from_id("id::" + prompt.id, task)
374
373
  assert isinstance(pb, SavedPromptBuilder)
375
374
  assert pb.prompt_id() == prompt.id
376
375
  assert pb.build_prompt(include_json_instructions=False) == "test_prompt"
@@ -390,7 +389,7 @@ def test_prompt_builder_from_ui_name(task_with_examples):
390
389
  nested_fine_tune_id = (
391
390
  task_with_examples.parent.id + "::" + task_with_examples.id + "::" + finetune.id
392
391
  )
393
- pb = prompt_builder_from_ui_name(
392
+ pb = prompt_builder_from_id(
394
393
  "fine_tune_prompt::" + nested_fine_tune_id,
395
394
  task_with_examples,
396
395
  )
@@ -587,3 +586,64 @@ def test_build_prompt_with_json_instructions(tmp_path):
587
586
  assert task.instruction in prompt_with_json
588
587
  for requirement in task.requirements:
589
588
  assert requirement.instruction in prompt_with_json
589
+
590
+
591
+ def test_task_run_config_prompt_builder(tmp_path):
592
+ task = build_test_task(tmp_path)
593
+
594
+ run_config = TaskRunConfig(
595
+ name="test_run_config",
596
+ parent=task,
597
+ run_config_properties=RunConfigProperties(
598
+ model_name="gpt-4",
599
+ model_provider_name="openai",
600
+ prompt_id="simple_prompt_builder",
601
+ ),
602
+ prompt=Prompt(
603
+ name="test prompt name",
604
+ prompt="test prompt content",
605
+ chain_of_thought_instructions="test step by step",
606
+ ),
607
+ )
608
+ run_config.save_to_file()
609
+
610
+ # Construct the eval prompt ID
611
+ run_config_prompt_id = (
612
+ f"task_run_config::{task.parent.id}::{task.id}::{run_config.id}"
613
+ )
614
+
615
+ # Test successful creation 2 ways: constructor and ID creation
616
+ builders = [
617
+ TaskRunConfigPromptBuilder(
618
+ task=task, run_config_prompt_id=run_config_prompt_id
619
+ ),
620
+ prompt_builder_from_id(run_config_prompt_id, task),
621
+ ]
622
+
623
+ for builder in builders:
624
+ assert (
625
+ builder.build_prompt(include_json_instructions=False)
626
+ == "test prompt content"
627
+ )
628
+ assert builder.chain_of_thought_prompt() == "test step by step"
629
+ assert builder.prompt_id() == run_config_prompt_id
630
+
631
+
632
+ def test_task_run_config_prompt_builder_validation_errors(tmp_path):
633
+ task = build_test_task(tmp_path)
634
+
635
+ # Test invalid format
636
+ with pytest.raises(ValueError, match="Invalid task run config prompt ID"):
637
+ TaskRunConfigPromptBuilder(
638
+ task=task, run_config_prompt_id="task_run_config::wrong::format"
639
+ )
640
+
641
+ # Test task ID mismatch
642
+ wrong_task_id = f"task_run_config::{task.parent.id}::wrong_task_id::config_id"
643
+ with pytest.raises(ValueError, match="Task ID mismatch"):
644
+ TaskRunConfigPromptBuilder(task=task, run_config_prompt_id=wrong_task_id)
645
+
646
+ # Test eval not found
647
+ nonexistent_eval = f"task_run_config::{task.parent.id}::{task.id}::nonexistent_id"
648
+ with pytest.raises(ValueError, match="Task run config ID not found"):
649
+ TaskRunConfigPromptBuilder(task=task, run_config_prompt_id=nonexistent_eval)