kiln-ai 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/adapter_registry.py +12 -13
- kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
- kiln_ai/adapters/eval/base_eval.py +164 -0
- kiln_ai/adapters/eval/eval_runner.py +267 -0
- kiln_ai/adapters/eval/g_eval.py +367 -0
- kiln_ai/adapters/eval/registry.py +16 -0
- kiln_ai/adapters/eval/test_base_eval.py +324 -0
- kiln_ai/adapters/eval/test_eval_runner.py +640 -0
- kiln_ai/adapters/eval/test_g_eval.py +497 -0
- kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
- kiln_ai/adapters/ml_model_list.py +141 -29
- kiln_ai/adapters/model_adapters/base_adapter.py +50 -35
- kiln_ai/adapters/model_adapters/langchain_adapters.py +27 -20
- kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -1
- kiln_ai/adapters/model_adapters/openai_model_adapter.py +93 -50
- kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
- kiln_ai/adapters/model_adapters/test_langchain_adapter.py +7 -14
- kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +55 -64
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
- kiln_ai/adapters/model_adapters/test_structured_output.py +36 -30
- kiln_ai/adapters/ollama_tools.py +0 -1
- kiln_ai/adapters/prompt_builders.py +80 -42
- kiln_ai/adapters/repair/repair_task.py +9 -21
- kiln_ai/adapters/repair/test_repair_task.py +3 -3
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +10 -10
- kiln_ai/adapters/test_generate_docs.py +6 -6
- kiln_ai/adapters/test_ollama_tools.py +0 -1
- kiln_ai/adapters/test_prompt_adaptors.py +17 -14
- kiln_ai/adapters/test_prompt_builders.py +91 -31
- kiln_ai/datamodel/__init__.py +50 -952
- kiln_ai/datamodel/datamodel_enums.py +58 -0
- kiln_ai/datamodel/dataset_filters.py +114 -0
- kiln_ai/datamodel/dataset_split.py +170 -0
- kiln_ai/datamodel/eval.py +298 -0
- kiln_ai/datamodel/finetune.py +105 -0
- kiln_ai/datamodel/json_schema.py +6 -0
- kiln_ai/datamodel/project.py +23 -0
- kiln_ai/datamodel/prompt.py +37 -0
- kiln_ai/datamodel/prompt_id.py +83 -0
- kiln_ai/datamodel/strict_mode.py +24 -0
- kiln_ai/datamodel/task.py +181 -0
- kiln_ai/datamodel/task_output.py +321 -0
- kiln_ai/datamodel/task_run.py +164 -0
- kiln_ai/datamodel/test_basemodel.py +10 -11
- kiln_ai/datamodel/test_dataset_filters.py +71 -0
- kiln_ai/datamodel/test_dataset_split.py +32 -8
- kiln_ai/datamodel/test_datasource.py +3 -2
- kiln_ai/datamodel/test_eval_model.py +635 -0
- kiln_ai/datamodel/test_example_models.py +9 -13
- kiln_ai/datamodel/test_json_schema.py +23 -0
- kiln_ai/datamodel/test_models.py +2 -2
- kiln_ai/datamodel/test_prompt_id.py +129 -0
- kiln_ai/datamodel/test_task.py +159 -0
- kiln_ai/utils/config.py +6 -1
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/METADATA +37 -1
- kiln_ai-0.12.0.dist-info/RECORD +100 -0
- kiln_ai-0.11.1.dist-info/RECORD +0 -76
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.11.1.dist-info → kiln_ai-0.12.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
|
|
3
4
|
import pytest
|
|
4
5
|
|
|
5
|
-
from kiln_ai.adapters.model_adapters.base_adapter import
|
|
6
|
+
from kiln_ai.adapters.model_adapters.base_adapter import BaseAdapter
|
|
6
7
|
from kiln_ai.adapters.model_adapters.test_structured_output import (
|
|
7
8
|
build_structured_output_test_task,
|
|
8
9
|
)
|
|
@@ -16,8 +17,9 @@ from kiln_ai.adapters.prompt_builders import (
|
|
|
16
17
|
SavedPromptBuilder,
|
|
17
18
|
SimpleChainOfThoughtPromptBuilder,
|
|
18
19
|
SimplePromptBuilder,
|
|
20
|
+
TaskRunConfigPromptBuilder,
|
|
19
21
|
chain_of_thought_prompt,
|
|
20
|
-
|
|
22
|
+
prompt_builder_from_id,
|
|
21
23
|
)
|
|
22
24
|
from kiln_ai.adapters.test_prompt_adaptors import build_test_task
|
|
23
25
|
from kiln_ai.datamodel import (
|
|
@@ -32,6 +34,9 @@ from kiln_ai.datamodel import (
|
|
|
32
34
|
TaskOutputRating,
|
|
33
35
|
TaskRun,
|
|
34
36
|
)
|
|
37
|
+
from kiln_ai.datamodel.task import RunConfigProperties, TaskRunConfig
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
35
40
|
|
|
36
41
|
|
|
37
42
|
def test_simple_prompt_builder(tmp_path):
|
|
@@ -57,12 +62,8 @@ class MockAdapter(BaseAdapter):
|
|
|
57
62
|
def _run(self, input: str) -> str:
|
|
58
63
|
return "mock response"
|
|
59
64
|
|
|
60
|
-
def
|
|
61
|
-
return
|
|
62
|
-
adapter_name="mock_adapter",
|
|
63
|
-
model_name="mock_model",
|
|
64
|
-
model_provider="mock_provider",
|
|
65
|
-
)
|
|
65
|
+
def adapter_name(self) -> str:
|
|
66
|
+
return "mock_adapter"
|
|
66
67
|
|
|
67
68
|
|
|
68
69
|
def test_simple_prompt_builder_structured_output(tmp_path):
|
|
@@ -269,7 +270,6 @@ def test_few_shot_prompt_builder(tmp_path):
|
|
|
269
270
|
rating=TaskOutputRating(value=4 + (i % 2), reason="Good joke"),
|
|
270
271
|
),
|
|
271
272
|
)
|
|
272
|
-
print("RATING", "Joke Initial Output ", i + 1, " - RATED:", 4 + (i % 2), "\n")
|
|
273
273
|
if i < 2:
|
|
274
274
|
run = run.model_copy(
|
|
275
275
|
update={
|
|
@@ -290,7 +290,7 @@ def test_few_shot_prompt_builder(tmp_path):
|
|
|
290
290
|
prompt = prompt_builder.build_prompt(include_json_instructions=False)
|
|
291
291
|
assert prompt.count("## Example") == 4
|
|
292
292
|
|
|
293
|
-
|
|
293
|
+
logger.info("PROMPT: %s", prompt)
|
|
294
294
|
# Verify the order of examples (2 repaired, then 2 highest-rated)
|
|
295
295
|
assert "Repaired Joke 1" in prompt
|
|
296
296
|
assert "Repaired Joke 2" in prompt
|
|
@@ -314,54 +314,53 @@ def check_example_outputs(task: Task, count: int):
|
|
|
314
314
|
assert f"## Example {count}" in prompt
|
|
315
315
|
|
|
316
316
|
|
|
317
|
-
def
|
|
318
|
-
assert SimplePromptBuilder.prompt_builder_name() == "simple_prompt_builder"
|
|
319
|
-
assert MultiShotPromptBuilder.prompt_builder_name() == "multi_shot_prompt_builder"
|
|
320
|
-
assert RepairsPromptBuilder.prompt_builder_name() == "repairs_prompt_builder"
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
def test_prompt_builder_from_ui_name(task_with_examples):
|
|
317
|
+
def test_prompt_builder_from_id(task_with_examples):
|
|
324
318
|
task = task_with_examples
|
|
325
|
-
assert isinstance(prompt_builder_from_ui_name("basic", task), SimplePromptBuilder)
|
|
326
319
|
assert isinstance(
|
|
327
|
-
|
|
320
|
+
prompt_builder_from_id("simple_prompt_builder", task), SimplePromptBuilder
|
|
328
321
|
)
|
|
329
322
|
assert isinstance(
|
|
330
|
-
|
|
323
|
+
prompt_builder_from_id("few_shot_prompt_builder", task),
|
|
324
|
+
FewShotPromptBuilder,
|
|
331
325
|
)
|
|
332
326
|
assert isinstance(
|
|
333
|
-
|
|
327
|
+
prompt_builder_from_id("multi_shot_prompt_builder", task),
|
|
328
|
+
MultiShotPromptBuilder,
|
|
334
329
|
)
|
|
335
330
|
assert isinstance(
|
|
336
|
-
|
|
331
|
+
prompt_builder_from_id("repairs_prompt_builder", task),
|
|
332
|
+
RepairsPromptBuilder,
|
|
333
|
+
)
|
|
334
|
+
assert isinstance(
|
|
335
|
+
prompt_builder_from_id("simple_chain_of_thought_prompt_builder", task),
|
|
337
336
|
SimpleChainOfThoughtPromptBuilder,
|
|
338
337
|
)
|
|
339
338
|
assert isinstance(
|
|
340
|
-
|
|
339
|
+
prompt_builder_from_id("few_shot_chain_of_thought_prompt_builder", task),
|
|
341
340
|
FewShotChainOfThoughtPromptBuilder,
|
|
342
341
|
)
|
|
343
342
|
assert isinstance(
|
|
344
|
-
|
|
343
|
+
prompt_builder_from_id("multi_shot_chain_of_thought_prompt_builder", task),
|
|
345
344
|
MultiShotChainOfThoughtPromptBuilder,
|
|
346
345
|
)
|
|
347
346
|
|
|
348
|
-
with pytest.raises(ValueError, match="Unknown prompt
|
|
349
|
-
|
|
347
|
+
with pytest.raises(ValueError, match="Unknown prompt generator: invalid_name"):
|
|
348
|
+
prompt_builder_from_id("invalid_name", task)
|
|
350
349
|
|
|
351
350
|
with pytest.raises(ValueError, match="Prompt ID not found: 123"):
|
|
352
|
-
|
|
351
|
+
prompt_builder_from_id("id::123", task)
|
|
353
352
|
|
|
354
353
|
with pytest.raises(
|
|
355
354
|
ValueError,
|
|
356
355
|
match="Invalid fine-tune ID format. Expected 'project_id::task_id::fine_tune_id'",
|
|
357
356
|
):
|
|
358
|
-
|
|
357
|
+
prompt_builder_from_id("fine_tune_prompt::123", task)
|
|
359
358
|
|
|
360
359
|
with pytest.raises(
|
|
361
360
|
ValueError,
|
|
362
361
|
match="Fine-tune ID not found",
|
|
363
362
|
):
|
|
364
|
-
|
|
363
|
+
prompt_builder_from_id("fine_tune_prompt::123::456::789", task)
|
|
365
364
|
|
|
366
365
|
prompt = Prompt(
|
|
367
366
|
name="test_prompt_name",
|
|
@@ -370,7 +369,7 @@ def test_prompt_builder_from_ui_name(task_with_examples):
|
|
|
370
369
|
parent=task,
|
|
371
370
|
)
|
|
372
371
|
prompt.save_to_file()
|
|
373
|
-
pb =
|
|
372
|
+
pb = prompt_builder_from_id("id::" + prompt.id, task)
|
|
374
373
|
assert isinstance(pb, SavedPromptBuilder)
|
|
375
374
|
assert pb.prompt_id() == prompt.id
|
|
376
375
|
assert pb.build_prompt(include_json_instructions=False) == "test_prompt"
|
|
@@ -390,7 +389,7 @@ def test_prompt_builder_from_ui_name(task_with_examples):
|
|
|
390
389
|
nested_fine_tune_id = (
|
|
391
390
|
task_with_examples.parent.id + "::" + task_with_examples.id + "::" + finetune.id
|
|
392
391
|
)
|
|
393
|
-
pb =
|
|
392
|
+
pb = prompt_builder_from_id(
|
|
394
393
|
"fine_tune_prompt::" + nested_fine_tune_id,
|
|
395
394
|
task_with_examples,
|
|
396
395
|
)
|
|
@@ -587,3 +586,64 @@ def test_build_prompt_with_json_instructions(tmp_path):
|
|
|
587
586
|
assert task.instruction in prompt_with_json
|
|
588
587
|
for requirement in task.requirements:
|
|
589
588
|
assert requirement.instruction in prompt_with_json
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def test_task_run_config_prompt_builder(tmp_path):
|
|
592
|
+
task = build_test_task(tmp_path)
|
|
593
|
+
|
|
594
|
+
run_config = TaskRunConfig(
|
|
595
|
+
name="test_run_config",
|
|
596
|
+
parent=task,
|
|
597
|
+
run_config_properties=RunConfigProperties(
|
|
598
|
+
model_name="gpt-4",
|
|
599
|
+
model_provider_name="openai",
|
|
600
|
+
prompt_id="simple_prompt_builder",
|
|
601
|
+
),
|
|
602
|
+
prompt=Prompt(
|
|
603
|
+
name="test prompt name",
|
|
604
|
+
prompt="test prompt content",
|
|
605
|
+
chain_of_thought_instructions="test step by step",
|
|
606
|
+
),
|
|
607
|
+
)
|
|
608
|
+
run_config.save_to_file()
|
|
609
|
+
|
|
610
|
+
# Construct the eval prompt ID
|
|
611
|
+
run_config_prompt_id = (
|
|
612
|
+
f"task_run_config::{task.parent.id}::{task.id}::{run_config.id}"
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
# Test successful creation 2 ways: constructor and ID creation
|
|
616
|
+
builders = [
|
|
617
|
+
TaskRunConfigPromptBuilder(
|
|
618
|
+
task=task, run_config_prompt_id=run_config_prompt_id
|
|
619
|
+
),
|
|
620
|
+
prompt_builder_from_id(run_config_prompt_id, task),
|
|
621
|
+
]
|
|
622
|
+
|
|
623
|
+
for builder in builders:
|
|
624
|
+
assert (
|
|
625
|
+
builder.build_prompt(include_json_instructions=False)
|
|
626
|
+
== "test prompt content"
|
|
627
|
+
)
|
|
628
|
+
assert builder.chain_of_thought_prompt() == "test step by step"
|
|
629
|
+
assert builder.prompt_id() == run_config_prompt_id
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def test_task_run_config_prompt_builder_validation_errors(tmp_path):
|
|
633
|
+
task = build_test_task(tmp_path)
|
|
634
|
+
|
|
635
|
+
# Test invalid format
|
|
636
|
+
with pytest.raises(ValueError, match="Invalid task run config prompt ID"):
|
|
637
|
+
TaskRunConfigPromptBuilder(
|
|
638
|
+
task=task, run_config_prompt_id="task_run_config::wrong::format"
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
# Test task ID mismatch
|
|
642
|
+
wrong_task_id = f"task_run_config::{task.parent.id}::wrong_task_id::config_id"
|
|
643
|
+
with pytest.raises(ValueError, match="Task ID mismatch"):
|
|
644
|
+
TaskRunConfigPromptBuilder(task=task, run_config_prompt_id=wrong_task_id)
|
|
645
|
+
|
|
646
|
+
# Test eval not found
|
|
647
|
+
nonexistent_eval = f"task_run_config::{task.parent.id}::{task.id}::nonexistent_id"
|
|
648
|
+
with pytest.raises(ValueError, match="Task run config ID not found"):
|
|
649
|
+
TaskRunConfigPromptBuilder(task=task, run_config_prompt_id=nonexistent_eval)
|