PyPI - kiln-ai - Versions diffs - 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

kiln-ai 0.5.5py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kiln-ai might be problematic. Click here for more details.

Files changed (26) hide show

kiln_ai/adapters/__init__.py +9 -1
kiln_ai/adapters/base_adapter.py +24 -35
kiln_ai/adapters/data_gen/__init__.py +11 -0
kiln_ai/adapters/data_gen/data_gen_prompts.py +73 -0
kiln_ai/adapters/data_gen/data_gen_task.py +185 -0
kiln_ai/adapters/data_gen/test_data_gen_task.py +293 -0
kiln_ai/adapters/langchain_adapters.py +39 -7
kiln_ai/adapters/ml_model_list.py +55 -1
kiln_ai/adapters/prompt_builders.py +66 -0
kiln_ai/adapters/repair/test_repair_task.py +4 -1
kiln_ai/adapters/test_langchain_adapter.py +73 -0
kiln_ai/adapters/test_ml_model_list.py +56 -0
kiln_ai/adapters/test_prompt_adaptors.py +52 -18
kiln_ai/adapters/test_prompt_builders.py +97 -7
kiln_ai/adapters/test_saving_adapter_results.py +16 -6
kiln_ai/adapters/test_structured_output.py +33 -5
kiln_ai/datamodel/__init__.py +28 -7
kiln_ai/datamodel/json_schema.py +1 -0
kiln_ai/datamodel/test_models.py +44 -8
kiln_ai/utils/config.py +3 -2
kiln_ai/utils/test_config.py +7 -0
{kiln_ai-0.5.5.dist-info → kiln_ai-0.6.1.dist-info}/METADATA +1 -2
kiln_ai-0.6.1.dist-info/RECORD +37 -0
{kiln_ai-0.5.5.dist-info → kiln_ai-0.6.1.dist-info}/WHEEL +1 -1
kiln_ai-0.5.5.dist-info/RECORD +0 -33
{kiln_ai-0.5.5.dist-info → kiln_ai-0.6.1.dist-info}/licenses/LICENSE.txt +0 -0

kiln_ai/adapters/test_langchain_adapter.py CHANGED Viewed

@@ -1,6 +1,10 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_groq import ChatGroq
 from kiln_ai.adapters.langchain_adapters import LangChainPromptAdapter
+from kiln_ai.adapters.prompt_builders import SimpleChainOfThoughtPromptBuilder
 from kiln_ai.adapters.test_prompt_adaptors import build_test_task
@@ -49,3 +53,72 @@ def test_langchain_adapter_info(tmp_path):
     assert model_info.adapter_name == "kiln_langchain_adapter"
     assert model_info.model_name == "llama_3_1_8b"
     assert model_info.model_provider == "ollama"
+async def test_langchain_adapter_with_cot(tmp_path):
+    task = build_test_task(tmp_path)
+    task.output_json_schema = (
+        '{"type": "object", "properties": {"count": {"type": "integer"}}}'
+    )
+    lca = LangChainPromptAdapter(
+        kiln_task=task,
+        model_name="llama_3_1_8b",
+        provider="ollama",
+        prompt_builder=SimpleChainOfThoughtPromptBuilder(task),
+    )
+    # Mock the base model and its invoke method
+    mock_base_model = MagicMock()
+    mock_base_model.invoke.return_value = AIMessage(
+        content="Chain of thought reasoning..."
+    )
+    # Create a separate mock for self.model()
+    mock_model_instance = MagicMock()
+    mock_model_instance.invoke.return_value = {"parsed": {"count": 1}}
+    # Mock the langchain_model_from function to return the base model
+    mock_model_from = AsyncMock(return_value=mock_base_model)
+    # Patch both the langchain_model_from function and self.model()
+    with (
+        patch(
+            "kiln_ai.adapters.langchain_adapters.langchain_model_from", mock_model_from
+        ),
+        patch.object(LangChainPromptAdapter, "model", return_value=mock_model_instance),
+    ):
+        response = await lca._run("test input")
+    # First 3 messages are the same for both calls
+    for invoke_args in [
+        mock_base_model.invoke.call_args[0][0],
+        mock_model_instance.invoke.call_args[0][0],
+    ]:
+        assert isinstance(
+            invoke_args[0], SystemMessage
+        )  # First message should be system prompt
+        assert (
+            "You are an assistant which performs math tasks provided in plain text."
+            in invoke_args[0].content
+        )
+        assert isinstance(invoke_args[1], HumanMessage)
+        assert "test input" in invoke_args[1].content
+        assert isinstance(invoke_args[2], SystemMessage)
+        assert "step by step" in invoke_args[2].content
+    # the COT should only have 3 messages
+    assert len(mock_base_model.invoke.call_args[0][0]) == 3
+    assert len(mock_model_instance.invoke.call_args[0][0]) == 5
+    # the final response should have the COT content and the final instructions
+    invoke_args = mock_model_instance.invoke.call_args[0][0]
+    assert isinstance(invoke_args[3], AIMessage)
+    assert "Chain of thought reasoning..." in invoke_args[3].content
+    assert isinstance(invoke_args[4], SystemMessage)
+    assert "Considering the above, return a final result." in invoke_args[4].content
+    assert (
+        response.intermediate_outputs["chain_of_thought"]
+        == "Chain of thought reasoning..."
+    )
+    assert response.output == {"count": 1}

kiln_ai/adapters/test_ml_model_list.py CHANGED Viewed

@@ -4,9 +4,11 @@ from unittest.mock import patch
 import pytest
 from kiln_ai.adapters.ml_model_list import (
+    ModelName,
     ModelProviderName,
     OllamaConnection,
     check_provider_warnings,
+    get_model_and_provider,
     ollama_model_supported,
     parse_ollama_tags,
     provider_name_from_id,
@@ -123,3 +125,57 @@ def test_ollama_model_supported():
     assert ollama_model_supported(conn, "llama3.1:latest")
     assert ollama_model_supported(conn, "llama3.1")
     assert not ollama_model_supported(conn, "unknown_model")
+def test_get_model_and_provider_valid():
+    # Test with a known valid model and provider combination
+    model, provider = get_model_and_provider(
+        ModelName.phi_3_5, ModelProviderName.ollama
+    )
+    assert model is not None
+    assert provider is not None
+    assert model.name == ModelName.phi_3_5
+    assert provider.name == ModelProviderName.ollama
+    assert provider.provider_options["model"] == "phi3.5"
+def test_get_model_and_provider_invalid_model():
+    # Test with an invalid model name
+    model, provider = get_model_and_provider(
+        "nonexistent_model", ModelProviderName.ollama
+    )
+    assert model is None
+    assert provider is None
+def test_get_model_and_provider_invalid_provider():
+    # Test with a valid model but invalid provider
+    model, provider = get_model_and_provider(ModelName.phi_3_5, "nonexistent_provider")
+    assert model is None
+    assert provider is None
+def test_get_model_and_provider_valid_model_wrong_provider():
+    # Test with a valid model but a provider that doesn't support it
+    model, provider = get_model_and_provider(
+        ModelName.phi_3_5, ModelProviderName.amazon_bedrock
+    )
+    assert model is None
+    assert provider is None
+def test_get_model_and_provider_multiple_providers():
+    # Test with a model that has multiple providers
+    model, provider = get_model_and_provider(
+        ModelName.llama_3_1_70b, ModelProviderName.groq
+    )
+    assert model is not None
+    assert provider is not None
+    assert model.name == ModelName.llama_3_1_70b
+    assert provider.name == ModelProviderName.groq
+    assert provider.provider_options["model"] == "llama-3.1-70b-versatile"

kiln_ai/adapters/test_prompt_adaptors.py CHANGED Viewed

@@ -7,6 +7,18 @@ from langchain_core.language_models.fake_chat_models import FakeListChatModel
 import kiln_ai.datamodel as datamodel
 from kiln_ai.adapters.langchain_adapters import LangChainPromptAdapter
 from kiln_ai.adapters.ml_model_list import built_in_models, ollama_online
+from kiln_ai.adapters.prompt_builders import (
+    BasePromptBuilder,
+    SimpleChainOfThoughtPromptBuilder,
+)
+def get_all_models_and_providers():
+    model_provider_pairs = []
+    for model in built_in_models:
+        for provider in model.providers:
+            model_provider_pairs.append((model.name, provider.name))
+    return model_provider_pairs
 @pytest.mark.paid
@@ -30,6 +42,7 @@ async def test_groq(tmp_path):
         "llama_3_2_90b",
         "claude_3_5_haiku",
         "claude_3_5_sonnet",
+        "phi_3_5",
     ],
 )
 @pytest.mark.paid
@@ -119,15 +132,19 @@ async def test_mock_returning_run(tmp_path):
 @pytest.mark.paid
 @pytest.mark.ollama
-async def test_all_built_in_models(tmp_path):
+@pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
+async def test_all_models_providers_plaintext(tmp_path, model_name, provider_name):
     task = build_test_task(tmp_path)
-    for model in built_in_models:
-        for provider in model.providers:
-            try:
-                print(f"Running {model.name} {provider.name}")
-                await run_simple_task(task, model.name, provider.name)
-            except Exception as e:
-                raise RuntimeError(f"Error running {model.name} {provider}") from e
+    await run_simple_task(task, model_name, provider_name)
+@pytest.mark.paid
+@pytest.mark.ollama
+@pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
+async def test_cot_prompt_builder(tmp_path, model_name, provider_name):
+    task = build_test_task(tmp_path)
+    pb = SimpleChainOfThoughtPromptBuilder(task)
+    await run_simple_task(task, model_name, provider_name, pb)
 def build_test_task(tmp_path: Path):
@@ -159,13 +176,25 @@ def build_test_task(tmp_path: Path):
     return task
-async def run_simple_test(tmp_path: Path, model_name: str, provider: str | None = None):
+async def run_simple_test(
+    tmp_path: Path,
+    model_name: str,
+    provider: str | None = None,
+    prompt_builder: BasePromptBuilder | None = None,
+):
     task = build_test_task(tmp_path)
-    return await run_simple_task(task, model_name, provider)
+    return await run_simple_task(task, model_name, provider, prompt_builder)
-async def run_simple_task(task: datamodel.Task, model_name: str, provider: str):
-    adapter = LangChainPromptAdapter(task, model_name=model_name, provider=provider)
+async def run_simple_task(
+    task: datamodel.Task,
+    model_name: str,
+    provider: str,
+    prompt_builder: BasePromptBuilder | None = None,
+) -> datamodel.TaskRun:
+    adapter = LangChainPromptAdapter(
+        task, model_name=model_name, provider=provider, prompt_builder=prompt_builder
+    )
     run = await adapter.invoke(
         "You should answer the following question: four plus six times 10"
@@ -176,9 +205,14 @@ async def run_simple_task(task: datamodel.Task, model_name: str, provider: str):
         run.input == "You should answer the following question: four plus six times 10"
     )
     assert "64" in run.output.output
-    assert run.output.source.properties == {
-        "adapter_name": "kiln_langchain_adapter",
-        "model_name": model_name,
-        "model_provider": provider,
-        "prompt_builder_name": "simple_prompt_builder",
-    }
+    source_props = run.output.source.properties
+    assert source_props["adapter_name"] == "kiln_langchain_adapter"
+    assert source_props["model_name"] == model_name
+    assert source_props["model_provider"] == provider
+    expected_prompt_builder_name = (
+        prompt_builder.__class__.prompt_builder_name()
+        if prompt_builder
+        else "simple_prompt_builder"
+    )
+    assert source_props["prompt_builder_name"] == expected_prompt_builder_name
+    return run

kiln_ai/adapters/test_prompt_builders.py CHANGED Viewed

@@ -4,10 +4,14 @@ import pytest
 from kiln_ai.adapters.base_adapter import AdapterInfo, BaseAdapter
 from kiln_ai.adapters.prompt_builders import (
+    FewShotChainOfThoughtPromptBuilder,
     FewShotPromptBuilder,
+    MultiShotChainOfThoughtPromptBuilder,
     MultiShotPromptBuilder,
     RepairsPromptBuilder,
+    SimpleChainOfThoughtPromptBuilder,
     SimplePromptBuilder,
+    chain_of_thought_prompt,
     prompt_builder_from_ui_name,
 )
 from kiln_ai.adapters.test_prompt_adaptors import build_test_task
@@ -43,9 +47,6 @@ def test_simple_prompt_builder(tmp_path):
 class MockAdapter(BaseAdapter):
-    def adapter_specific_instructions(self) -> str | None:
-        return "You are a mock, send me the response!"
     def _run(self, input: str) -> str:
         return "mock response"
@@ -64,10 +65,6 @@ def test_simple_prompt_builder_structured_output(tmp_path):
     prompt = builder.build_prompt()
     assert "You are an assistant which tells a joke, given a subject." in prompt
-    # check adapter instructions are included
-    run_adapter = MockAdapter(task, prompt_builder=builder)
-    assert "You are a mock, send me the response!" in run_adapter.build_prompt()
     user_msg = builder.build_user_message(input)
     assert input in user_msg
     assert input not in prompt
@@ -313,6 +310,18 @@ def test_prompt_builder_from_ui_name():
     assert prompt_builder_from_ui_name("few_shot") == FewShotPromptBuilder
     assert prompt_builder_from_ui_name("many_shot") == MultiShotPromptBuilder
     assert prompt_builder_from_ui_name("repairs") == RepairsPromptBuilder
+    assert (
+        prompt_builder_from_ui_name("simple_chain_of_thought")
+        == SimpleChainOfThoughtPromptBuilder
+    )
+    assert (
+        prompt_builder_from_ui_name("few_shot_chain_of_thought")
+        == FewShotChainOfThoughtPromptBuilder
+    )
+    assert (
+        prompt_builder_from_ui_name("multi_shot_chain_of_thought")
+        == MultiShotChainOfThoughtPromptBuilder
+    )
     with pytest.raises(ValueError, match="Unknown prompt builder: invalid_name"):
         prompt_builder_from_ui_name("invalid_name")
@@ -336,3 +345,84 @@ def test_repair_multi_shot_prompt_builder(task_with_examples):
         'Initial Output Which Was Insufficient: {"joke": "Moo I am a cow joke."}'
         in prompt
     )
+def test_chain_of_thought_prompt(tmp_path):
+    # Test with default thinking instruction
+    task = Task(
+        name="Test Task",
+        instruction="Test instruction",
+        parent=None,
+        thinking_instruction=None,
+    )
+    assert (
+        chain_of_thought_prompt(task)
+        == "Think step by step, explaining your reasoning."
+    )
+    # Test with custom thinking instruction
+    custom_instruction = "First analyze the problem, then break it down into steps."
+    task = Task(
+        name="Test Task",
+        instruction="Test instruction",
+        parent=None,
+        thinking_instruction=custom_instruction,
+    )
+    assert chain_of_thought_prompt(task) == custom_instruction
+@pytest.mark.parametrize(
+    "builder_class",
+    [
+        SimpleChainOfThoughtPromptBuilder,
+        FewShotChainOfThoughtPromptBuilder,
+        MultiShotChainOfThoughtPromptBuilder,
+    ],
+)
+def test_chain_of_thought_prompt_builders(builder_class, task_with_examples):
+    # Test with default thinking instruction
+    builder = builder_class(task=task_with_examples)
+    assert (
+        builder.chain_of_thought_prompt()
+        == "Think step by step, explaining your reasoning."
+    )
+    # Test with custom thinking instruction
+    custom_instruction = "First analyze the problem, then break it down into steps."
+    task_with_custom = task_with_examples.model_copy(
+        update={"thinking_instruction": custom_instruction}
+    )
+    builder = builder_class(task=task_with_custom)
+    assert builder.chain_of_thought_prompt() == custom_instruction
+def test_build_prompt_for_ui(tmp_path):
+    # Test regular prompt builder
+    task = build_test_task(tmp_path)
+    simple_builder = SimplePromptBuilder(task=task)
+    ui_prompt = simple_builder.build_prompt_for_ui()
+    # Should match regular prompt since no chain of thought
+    assert ui_prompt == simple_builder.build_prompt()
+    assert "# Thinking Instructions" not in ui_prompt
+    # Test chain of thought prompt builder
+    cot_builder = SimpleChainOfThoughtPromptBuilder(task=task)
+    ui_prompt_cot = cot_builder.build_prompt_for_ui()
+    # Should include both base prompt and thinking instructions
+    assert cot_builder.build_prompt() in ui_prompt_cot
+    assert "# Thinking Instructions" in ui_prompt_cot
+    assert "Think step by step" in ui_prompt_cot
+    # Test with custom thinking instruction
+    custom_instruction = "First analyze the problem, then solve it."
+    task_with_custom = task.model_copy(
+        update={"thinking_instruction": custom_instruction}
+    )
+    custom_cot_builder = SimpleChainOfThoughtPromptBuilder(task=task_with_custom)
+    ui_prompt_custom = custom_cot_builder.build_prompt_for_ui()
+    assert custom_cot_builder.build_prompt() in ui_prompt_custom
+    assert "# Thinking Instructions" in ui_prompt_custom
+    assert custom_instruction in ui_prompt_custom

kiln_ai/adapters/test_saving_adapter_results.py CHANGED Viewed

@@ -2,7 +2,7 @@ from unittest.mock import patch
 import pytest
-from kiln_ai.adapters.base_adapter import AdapterInfo, BaseAdapter
+from kiln_ai.adapters.base_adapter import AdapterInfo, BaseAdapter, RunOutput
 from kiln_ai.datamodel import (
     DataSource,
     DataSourceType,
@@ -14,7 +14,7 @@ from kiln_ai.utils.config import Config
 class MockAdapter(BaseAdapter):
     async def _run(self, input: dict | str) -> dict | str:
-        return "Test output"
+        return RunOutput(output="Test output", intermediate_outputs=None)
     def adapter_info(self) -> AdapterInfo:
         return AdapterInfo(
@@ -42,9 +42,13 @@ def test_save_run_isolation(test_task):
     adapter = MockAdapter(test_task)
     input_data = "Test input"
     output_data = "Test output"
+    run_output = RunOutput(
+        output=output_data,
+        intermediate_outputs={"chain_of_thought": "Test chain of thought"},
+    )
     task_run = adapter.generate_run(
-        input=input_data, input_source=None, output=output_data
+        input=input_data, input_source=None, run_output=run_output
     )
     task_run.save_to_file()
@@ -52,6 +56,9 @@ def test_save_run_isolation(test_task):
     assert task_run.parent == test_task
     assert task_run.input == input_data
     assert task_run.input_source.type == DataSourceType.human
+    assert task_run.intermediate_outputs == {
+        "chain_of_thought": "Test chain of thought"
+    }
     created_by = Config.shared().user_id
     if created_by and created_by != "":
         assert task_run.input_source.properties["created_by"] == created_by
@@ -86,13 +93,16 @@ def test_save_run_isolation(test_task):
     )
     # Run again, with same input and different output. Should create a new TaskRun.
-    task_output = adapter.generate_run(input_data, None, "Different output")
+    different_run_output = RunOutput(
+        output="Different output", intermediate_outputs=None
+    )
+    task_output = adapter.generate_run(input_data, None, different_run_output)
     task_output.save_to_file()
     assert len(test_task.runs()) == 2
     assert "Different output" in set(run.output.output for run in test_task.runs())
     # run again with same input and same output. Should not create a new TaskRun.
-    task_output = adapter.generate_run(input_data, None, output_data)
+    task_output = adapter.generate_run(input_data, None, run_output)
     task_output.save_to_file()
     assert len(test_task.runs()) == 2
     assert "Different output" in set(run.output.output for run in test_task.runs())
@@ -110,7 +120,7 @@ def test_save_run_isolation(test_task):
                 "adapter_name": "mock_adapter",
             },
         ),
-        output_data,
+        run_output,
     )
     task_output.save_to_file()
     assert len(test_task.runs()) == 3

kiln_ai/adapters/test_structured_output.py CHANGED Viewed

@@ -6,12 +6,17 @@ import jsonschema.exceptions
 import pytest
 import kiln_ai.datamodel as datamodel
-from kiln_ai.adapters.base_adapter import AdapterInfo, BaseAdapter
+from kiln_ai.adapters.base_adapter import AdapterInfo, BaseAdapter, RunOutput
 from kiln_ai.adapters.langchain_adapters import LangChainPromptAdapter
 from kiln_ai.adapters.ml_model_list import (
     built_in_models,
     ollama_online,
 )
+from kiln_ai.adapters.prompt_builders import (
+    BasePromptBuilder,
+    SimpleChainOfThoughtPromptBuilder,
+)
+from kiln_ai.adapters.test_prompt_adaptors import get_all_models_and_providers
 from kiln_ai.datamodel.test_json_schema import json_joke_schema, json_triangle_schema
@@ -59,8 +64,8 @@ class MockAdapter(BaseAdapter):
         super().__init__(kiln_task)
         self.response = response
-    async def _run(self, input: str) -> Dict | str:
-        return self.response
+    async def _run(self, input: str) -> RunOutput:
+        return RunOutput(output=self.response, intermediate_outputs=None)
     def adapter_info(self) -> AdapterInfo:
         return AdapterInfo(
@@ -190,7 +195,18 @@ def build_structured_input_test_task(tmp_path: Path):
 async def run_structured_input_test(tmp_path: Path, model_name: str, provider: str):
     task = build_structured_input_test_task(tmp_path)
-    a = LangChainPromptAdapter(task, model_name=model_name, provider=provider)
+    await run_structured_input_task(task, model_name, provider)
+async def run_structured_input_task(
+    task: datamodel.Task,
+    model_name: str,
+    provider: str,
+    pb: BasePromptBuilder | None = None,
+):
+    a = LangChainPromptAdapter(
+        task, model_name=model_name, provider=provider, prompt_builder=pb
+    )
     with pytest.raises(ValueError):
         # not structured input in dictionary
         await a.invoke("a=1, b=2, c=3")
@@ -203,7 +219,10 @@ async def run_structured_input_test(tmp_path: Path, model_name: str, provider: s
     assert isinstance(response, str)
     assert "[[equilateral]]" in response
     adapter_info = a.adapter_info()
-    assert adapter_info.prompt_builder_name == "SimplePromptBuilder"
+    expected_pb_name = "simple_prompt_builder"
+    if pb is not None:
+        expected_pb_name = pb.__class__.prompt_builder_name()
+    assert adapter_info.prompt_builder_name == expected_pb_name
     assert adapter_info.model_name == model_name
     assert adapter_info.model_provider == provider
     assert adapter_info.adapter_name == "kiln_langchain_adapter"
@@ -224,3 +243,12 @@ async def test_all_built_in_models_structured_input(tmp_path):
                 await run_structured_input_test(tmp_path, model.name, provider.name)
             except Exception as e:
                 raise RuntimeError(f"Error running {model.name} {provider}") from e
+@pytest.mark.paid
+@pytest.mark.ollama
+@pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
+async def test_structured_cot_prompt_builder(tmp_path, model_name, provider_name):
+    task = build_structured_input_test_task(tmp_path)
+    pb = SimpleChainOfThoughtPromptBuilder(task)
+    await run_structured_input_task(task, model_name, provider_name, pb)

kiln_ai/datamodel/__init__.py CHANGED Viewed

@@ -48,8 +48,18 @@ __all__ = [
 # Filename compatible names
 NAME_REGEX = r"^[A-Za-z0-9 _-]+$"
-NAME_FIELD = Field(min_length=1, max_length=120, pattern=NAME_REGEX)
-SHORT_NAME_FIELD = Field(min_length=1, max_length=20, pattern=NAME_REGEX)
+NAME_FIELD = Field(
+    min_length=1,
+    max_length=120,
+    pattern=NAME_REGEX,
+    description="A name for this entity.",
+)
+SHORT_NAME_FIELD = Field(
+    min_length=1,
+    max_length=32,
+    pattern=NAME_REGEX,
+    description="A name for this entity",
+)
 class Priority(IntEnum):
@@ -280,6 +290,10 @@ class TaskRun(KilnParentedModel):
         default=None,
         description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.",
     )
+    intermediate_outputs: Dict[str, str] | None = Field(
+        default=None,
+        description="Intermediate outputs from the task run. Keys are the names of the intermediate output steps (cot=chain of thought, etc), values are the output data.",
+    )
     def parent_task(self) -> Task | None:
         if not isinstance(self.parent, Task):
@@ -372,14 +386,21 @@ class Task(
     """
     name: str = NAME_FIELD
-    description: str = Field(default="")
-    priority: Priority = Field(default=Priority.p2)
-    determinism: TaskDeterminism = Field(default=TaskDeterminism.flexible)
-    instruction: str = Field(min_length=1)
+    description: str | None = Field(
+        default=None,
+        description="A description of the task for you and your team. Will not be used in prompts/training/validation.",
+    )
+    instruction: str = Field(
+        min_length=1,
+        description="The instructions for the task. Will be used in prompts/training/validation.",
+    )
     requirements: List[TaskRequirement] = Field(default=[])
-    # TODO: make this required, or formalize the default message output schema
     output_json_schema: JsonObjectSchema | None = None
     input_json_schema: JsonObjectSchema | None = None
+    thinking_instruction: str | None = Field(
+        default=None,
+        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting.",
+    )
     def output_schema(self) -> Dict | None:
         if self.output_json_schema is None:

kiln_ai/datamodel/json_schema.py CHANGED Viewed

@@ -64,6 +64,7 @@ def schema_from_json_str(v: str) -> Dict:
         jsonschema.Draft202012Validator.check_schema(parsed)
         if not isinstance(parsed, dict):
             raise ValueError(f"JSON schema must be a dict, not {type(parsed)}")
+        # Top level arrays are valid JSON schemas, but we don't want to allow them here as they often cause issues
         if (
             "type" not in parsed
             or parsed["type"] != "object"

kiln-ai 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl

Potentially problematic release.

kiln-ai 0.5.5py3-none-any.whl → 0.6.1py3-none-any.whl