kiln-ai 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (58) hide show
  1. kiln_ai/adapters/adapter_registry.py +28 -0
  2. kiln_ai/adapters/chat/chat_formatter.py +0 -1
  3. kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
  4. kiln_ai/adapters/data_gen/data_gen_task.py +51 -38
  5. kiln_ai/adapters/data_gen/test_data_gen_task.py +318 -37
  6. kiln_ai/adapters/eval/base_eval.py +6 -7
  7. kiln_ai/adapters/eval/eval_runner.py +5 -1
  8. kiln_ai/adapters/eval/g_eval.py +17 -12
  9. kiln_ai/adapters/eval/test_base_eval.py +8 -2
  10. kiln_ai/adapters/eval/test_eval_runner.py +6 -12
  11. kiln_ai/adapters/eval/test_g_eval.py +115 -5
  12. kiln_ai/adapters/eval/test_g_eval_data.py +1 -1
  13. kiln_ai/adapters/fine_tune/base_finetune.py +2 -6
  14. kiln_ai/adapters/fine_tune/dataset_formatter.py +1 -5
  15. kiln_ai/adapters/fine_tune/fireworks_finetune.py +32 -20
  16. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +1 -1
  17. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +30 -21
  18. kiln_ai/adapters/fine_tune/test_vertex_finetune.py +2 -7
  19. kiln_ai/adapters/fine_tune/together_finetune.py +1 -1
  20. kiln_ai/adapters/ml_model_list.py +926 -125
  21. kiln_ai/adapters/model_adapters/base_adapter.py +11 -7
  22. kiln_ai/adapters/model_adapters/litellm_adapter.py +23 -1
  23. kiln_ai/adapters/model_adapters/test_base_adapter.py +1 -2
  24. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +70 -3
  25. kiln_ai/adapters/model_adapters/test_structured_output.py +13 -13
  26. kiln_ai/adapters/parsers/parser_registry.py +0 -2
  27. kiln_ai/adapters/parsers/r1_parser.py +0 -1
  28. kiln_ai/adapters/parsers/test_r1_parser.py +1 -1
  29. kiln_ai/adapters/provider_tools.py +20 -19
  30. kiln_ai/adapters/remote_config.py +113 -0
  31. kiln_ai/adapters/repair/repair_task.py +2 -7
  32. kiln_ai/adapters/test_adapter_registry.py +30 -2
  33. kiln_ai/adapters/test_ml_model_list.py +30 -0
  34. kiln_ai/adapters/test_prompt_adaptors.py +0 -4
  35. kiln_ai/adapters/test_provider_tools.py +18 -12
  36. kiln_ai/adapters/test_remote_config.py +456 -0
  37. kiln_ai/datamodel/basemodel.py +54 -28
  38. kiln_ai/datamodel/datamodel_enums.py +2 -0
  39. kiln_ai/datamodel/dataset_split.py +5 -3
  40. kiln_ai/datamodel/eval.py +35 -3
  41. kiln_ai/datamodel/finetune.py +2 -3
  42. kiln_ai/datamodel/project.py +3 -3
  43. kiln_ai/datamodel/prompt.py +2 -2
  44. kiln_ai/datamodel/prompt_id.py +4 -4
  45. kiln_ai/datamodel/task.py +6 -6
  46. kiln_ai/datamodel/task_output.py +1 -3
  47. kiln_ai/datamodel/task_run.py +0 -2
  48. kiln_ai/datamodel/test_basemodel.py +210 -18
  49. kiln_ai/datamodel/test_eval_model.py +152 -10
  50. kiln_ai/datamodel/test_model_perf.py +1 -1
  51. kiln_ai/datamodel/test_prompt_id.py +5 -1
  52. kiln_ai/datamodel/test_task.py +5 -0
  53. kiln_ai/utils/config.py +10 -0
  54. kiln_ai/utils/logging.py +4 -3
  55. {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/METADATA +33 -3
  56. {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/RECORD +58 -56
  57. {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/WHEEL +0 -0
  58. {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING
8
8
 
9
9
  from pydantic import BaseModel, Field, model_validator
10
10
 
11
- from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
11
+ from kiln_ai.datamodel.basemodel import FilenameString, KilnParentedModel
12
12
  from kiln_ai.datamodel.dataset_filters import (
13
13
  DatasetFilter,
14
14
  DatasetFilterId,
@@ -26,7 +26,9 @@ class DatasetSplitDefinition(BaseModel):
26
26
  Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)
27
27
  """
28
28
 
29
- name: str = NAME_FIELD
29
+ name: FilenameString = Field(
30
+ description="The name of the dataset split definition."
31
+ )
30
32
  description: str | None = Field(
31
33
  default=None,
32
34
  description="A description of the dataset for you and your team. Not used in training.",
@@ -70,7 +72,7 @@ class DatasetSplit(KilnParentedModel):
70
72
  Maintains a list of IDs for each split, to avoid data duplication.
71
73
  """
72
74
 
73
- name: str = NAME_FIELD
75
+ name: FilenameString = Field(description="The name of the dataset split.")
74
76
  description: str | None = Field(
75
77
  default=None,
76
78
  description="A description of the dataset for you and your team. Not used in training.",
kiln_ai/datamodel/eval.py CHANGED
@@ -7,13 +7,14 @@ from typing_extensions import Self
7
7
 
8
8
  from kiln_ai.datamodel.basemodel import (
9
9
  ID_TYPE,
10
- NAME_FIELD,
10
+ FilenameString,
11
11
  KilnParentedModel,
12
12
  KilnParentModel,
13
13
  )
14
14
  from kiln_ai.datamodel.datamodel_enums import TaskOutputRatingType
15
15
  from kiln_ai.datamodel.dataset_filters import DatasetFilterId
16
16
  from kiln_ai.datamodel.json_schema import string_to_json_key
17
+ from kiln_ai.datamodel.task_run import Usage
17
18
  from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
18
19
 
19
20
  if TYPE_CHECKING:
@@ -28,6 +29,7 @@ class EvalTemplateId(str, Enum):
28
29
  """
29
30
 
30
31
  kiln_requirements = "kiln_requirements"
32
+ issue = "kiln_issue"
31
33
  toxicity = "toxicity"
32
34
  bias = "bias"
33
35
  maliciousness = "maliciousness"
@@ -110,6 +112,10 @@ class EvalRun(KilnParentedModel):
110
112
  scores: EvalScores = Field(
111
113
  description="The output scores of the evaluator (aligning to those required by the grand-parent Eval this object is a child of)."
112
114
  )
115
+ task_run_usage: Usage | None = Field(
116
+ default=None,
117
+ description="The usage of the task run that produced this eval run output (not the usage by the evaluation model).",
118
+ )
113
119
 
114
120
  def parent_eval_config(self) -> Union["EvalConfig", None]:
115
121
  if self.parent is not None and self.parent.__class__.__name__ != "EvalConfig":
@@ -196,7 +202,7 @@ class EvalConfig(KilnParentedModel, KilnParentModel, parent_of={"runs": EvalRun}
196
202
  A eval might have many configs, example running the same eval with 2 different models. Comparing eval results is only valid within the scope of the same config.
197
203
  """
198
204
 
199
- name: str = NAME_FIELD
205
+ name: FilenameString = Field(description="The name of the eval config.")
200
206
  model_name: str = Field(
201
207
  description="The name of the model to use for this eval config. ",
202
208
  )
@@ -251,7 +257,7 @@ class EvalConfig(KilnParentedModel, KilnParentModel, parent_of={"runs": EvalRun}
251
257
 
252
258
 
253
259
  class Eval(KilnParentedModel, KilnParentModel, parent_of={"configs": EvalConfig}):
254
- name: str = NAME_FIELD
260
+ name: FilenameString = Field(description="The name of the eval.")
255
261
  description: str | None = Field(
256
262
  default=None, description="The description of the eval"
257
263
  )
@@ -280,6 +286,10 @@ class Eval(KilnParentedModel, KilnParentModel, parent_of={"configs": EvalConfig}
280
286
  default=False,
281
287
  description="Whether this eval is a favourite of the user. Rendered as a star icon in the UI.",
282
288
  )
289
+ template_properties: dict[str, str | int | bool | float] = Field(
290
+ default={},
291
+ description="Properties to be used to execute the eval. This is template_type specific and should serialize to a json dict.",
292
+ )
283
293
 
284
294
  # Workaround to return typed parent without importing Task
285
295
  def parent_task(self) -> Union["Task", None]:
@@ -304,3 +314,25 @@ class Eval(KilnParentedModel, KilnParentModel, parent_of={"configs": EvalConfig}
304
314
  f"output_scores must have unique names (once transformed to JSON keys). Got: [{', '.join(output_score_keys)}]"
305
315
  )
306
316
  return self
317
+
318
+ @model_validator(mode="after")
319
+ def validate_template_properties(self) -> Self:
320
+ # Check for properties that are required for the issue template
321
+ if self.template == EvalTemplateId.issue:
322
+ if "issue_prompt" not in self.template_properties or not isinstance(
323
+ self.template_properties["issue_prompt"], str
324
+ ):
325
+ raise ValueError("issue_prompt is required for issue template")
326
+ if "failure_example" in self.template_properties and not isinstance(
327
+ self.template_properties["failure_example"], str
328
+ ):
329
+ raise ValueError(
330
+ "failure_example is optional for issue template, but if provided must be a string"
331
+ )
332
+ if "pass_example" in self.template_properties and not isinstance(
333
+ self.template_properties["pass_example"], str
334
+ ):
335
+ raise ValueError(
336
+ "pass_example is optional for issue template, but if provided must be a string"
337
+ )
338
+ return self
@@ -3,9 +3,8 @@ from typing import TYPE_CHECKING, Dict, Union
3
3
  from pydantic import Field, model_validator
4
4
  from typing_extensions import Self
5
5
 
6
- from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
6
+ from kiln_ai.datamodel.basemodel import FilenameString, KilnParentedModel
7
7
  from kiln_ai.datamodel.datamodel_enums import (
8
- THINKING_DATA_STRATEGIES,
9
8
  ChatStrategy,
10
9
  FineTuneStatusType,
11
10
  StructuredOutputMode,
@@ -27,7 +26,7 @@ class Finetune(KilnParentedModel):
27
26
  Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.
28
27
  """
29
28
 
30
- name: str = NAME_FIELD
29
+ name: FilenameString = Field(description="The name of the fine-tune.")
31
30
  description: str | None = Field(
32
31
  default=None,
33
32
  description="A description of the fine-tune for you and your team. Not used in training.",
@@ -1,6 +1,6 @@
1
1
  from pydantic import Field
2
2
 
3
- from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentModel
3
+ from kiln_ai.datamodel.basemodel import FilenameString, KilnParentModel
4
4
  from kiln_ai.datamodel.task import Task
5
5
 
6
6
 
@@ -12,12 +12,12 @@ class Project(KilnParentModel, parent_of={"tasks": Task}):
12
12
  of the overall goals.
13
13
  """
14
14
 
15
- name: str = NAME_FIELD
15
+ name: FilenameString = Field(description="The name of the project.")
16
16
  description: str | None = Field(
17
17
  default=None,
18
18
  description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
19
19
  )
20
20
 
21
- # Needed for typechecking. TODO P2: fix this in KilnParentModel
21
+ # Needed for typechecking. We should fix this in KilnParentModel
22
22
  def tasks(self) -> list[Task]:
23
23
  return super().tasks() # type: ignore
@@ -1,6 +1,6 @@
1
1
  from pydantic import BaseModel, Field
2
2
 
3
- from kiln_ai.datamodel.basemodel import NAME_FIELD, KilnParentedModel
3
+ from kiln_ai.datamodel.basemodel import FilenameString, KilnParentedModel
4
4
 
5
5
 
6
6
  class BasePrompt(BaseModel):
@@ -10,7 +10,7 @@ class BasePrompt(BaseModel):
10
10
  The "Prompt" model name is reserved for the custom prompts parented by a task.
11
11
  """
12
12
 
13
- name: str = NAME_FIELD
13
+ name: FilenameString = Field(description="The name of the prompt.")
14
14
  description: str | None = Field(
15
15
  default=None,
16
16
  description="A more detailed description of the prompt.",
@@ -60,11 +60,11 @@ def _check_prompt_id(id: str) -> str:
60
60
  return id
61
61
 
62
62
  if id.startswith("fine_tune_prompt::"):
63
- # check it had a fine_tune_id after the :: -- 'fine_tune_prompt::fine_tune_id'
64
- fine_tune_id = id[18:]
65
- if len(fine_tune_id) == 0:
63
+ # check it had a fine_tune_id after the :: -- 'fine_tune_prompt::[project_id]::[task_id]::fine_tune_id'
64
+ parts = id.split("::")
65
+ if len(parts) != 4 or len(parts[3]) == 0:
66
66
  raise ValueError(
67
- f"Invalid fine-tune prompt ID: {id}. Expected format: 'fine_tune_prompt::[fine_tune_id]'."
67
+ f"Invalid fine-tune prompt ID: {id}. Expected format: 'fine_tune_prompt::[project_id]::[task_id]::[fine_tune_id]'."
68
68
  )
69
69
  return id
70
70
 
kiln_ai/datamodel/task.py CHANGED
@@ -7,8 +7,8 @@ from kiln_ai.datamodel import Finetune
7
7
  from kiln_ai.datamodel.basemodel import (
8
8
  ID_FIELD,
9
9
  ID_TYPE,
10
- NAME_FIELD,
11
- SHORT_NAME_FIELD,
10
+ FilenameString,
11
+ FilenameStringShort,
12
12
  KilnParentedModel,
13
13
  KilnParentModel,
14
14
  )
@@ -38,7 +38,7 @@ class TaskRequirement(BaseModel):
38
38
  """
39
39
 
40
40
  id: ID_TYPE = ID_FIELD
41
- name: str = SHORT_NAME_FIELD
41
+ name: FilenameStringShort = Field(description="The name of the task requirement.")
42
42
  description: str | None = Field(default=None)
43
43
  instruction: str = Field(min_length=1)
44
44
  priority: Priority = Field(default=Priority.p2)
@@ -103,7 +103,7 @@ class TaskRunConfig(KilnParentedModel):
103
103
  A run config includes everything needed to run a task, except the input. Running the same RunConfig with the same input should make identical calls to the model (output may vary as models are non-deterministic).
104
104
  """
105
105
 
106
- name: str = NAME_FIELD
106
+ name: FilenameString = Field(description="The name of the task run config.")
107
107
  description: str | None = Field(
108
108
  default=None, description="The description of the task run config."
109
109
  )
@@ -189,7 +189,7 @@ class Task(
189
189
  a collection of task runs.
190
190
  """
191
191
 
192
- name: str = NAME_FIELD
192
+ name: FilenameString = Field(description="The name of the task.")
193
193
  description: str | None = Field(
194
194
  default=None,
195
195
  description="A description of the task for you and your team. Will not be used in prompts/training/validation.",
@@ -216,7 +216,7 @@ class Task(
216
216
  return None
217
217
  return schema_from_json_str(self.input_json_schema)
218
218
 
219
- # These wrappers help for typechecking. TODO P2: fix this in KilnParentModel
219
+ # These wrappers help for typechecking. We should fix this in KilnParentModel
220
220
  def runs(self, readonly: bool = False) -> list[TaskRun]:
221
221
  return super().runs(readonly=readonly) # type: ignore
222
222
 
@@ -2,8 +2,6 @@ import json
2
2
  from enum import Enum
3
3
  from typing import TYPE_CHECKING, Dict, List, Type, Union
4
4
 
5
- import jsonschema
6
- import jsonschema.exceptions
7
5
  from pydantic import BaseModel, Field, ValidationInfo, model_validator
8
6
  from typing_extensions import Self
9
7
 
@@ -309,7 +307,7 @@ class TaskOutput(KilnBaseModel):
309
307
  if task.output_json_schema is not None:
310
308
  try:
311
309
  output_parsed = json.loads(self.output)
312
- except json.JSONDecodeError as e:
310
+ except json.JSONDecodeError:
313
311
  raise ValueError("Output is not a valid JSON object")
314
312
 
315
313
  validate_schema_with_value_error(
@@ -1,8 +1,6 @@
1
1
  import json
2
2
  from typing import TYPE_CHECKING, Dict, List, Union
3
3
 
4
- import jsonschema
5
- import jsonschema.exceptions
6
4
  from pydantic import BaseModel, Field, ValidationInfo, model_validator
7
5
  from typing_extensions import Self
8
6
 
@@ -1,5 +1,6 @@
1
1
  import datetime
2
2
  import json
3
+ import uuid
3
4
  from pathlib import Path
4
5
  from typing import Optional
5
6
  from unittest.mock import MagicMock, patch
@@ -12,6 +13,7 @@ from kiln_ai.datamodel import Task, TaskRun
12
13
  from kiln_ai.datamodel.basemodel import (
13
14
  KilnBaseModel,
14
15
  KilnParentedModel,
16
+ name_validator,
15
17
  string_to_valid_name,
16
18
  )
17
19
  from kiln_ai.datamodel.model_cache import ModelCache
@@ -328,28 +330,81 @@ def test_delete_no_path():
328
330
  model.delete()
329
331
 
330
332
 
331
- def test_string_to_valid_name():
332
- # Test basic valid strings remain unchanged
333
- assert string_to_valid_name("Hello World") == "Hello World"
334
- assert string_to_valid_name("Test-123") == "Test-123"
335
- assert string_to_valid_name("my_file_name") == "my_file_name"
333
+ @pytest.mark.parametrize(
334
+ "name,expected",
335
+ [
336
+ # Basic valid strings remain unchanged
337
+ ("Hello World", "Hello World"),
338
+ ("Test-123", "Test-123"),
339
+ ("my_file_name", "my_file_name"),
340
+ ("multiple!!!symbols", "multiple!!!symbols"),
341
+ # Emoji
342
+ ("Hello 👍", "Hello 👍"),
343
+ # Invalid characters are replaced
344
+ ("Hello@World!", "Hello@World!"),
345
+ ("File.name.txt", "File_name_txt"),
346
+ ("Special%%%Chars", "Special_Chars"),
347
+ ("Special#$%Chars", "Special#$_Chars"),
348
+ # Consecutive invalid characters are replaced
349
+ ("Special%%%Chars", "Special_Chars"),
350
+ ("path/to/file", "path_to_file"),
351
+ # Leading/trailing special characters are removed
352
+ ("__test__", "test"),
353
+ ("...test...", "test"),
354
+ # Whitespace is replaced
355
+ ("", ""),
356
+ (" ", ""),
357
+ ("Hello World", "Hello World"),
358
+ # Unicode characters are replaced
359
+ ("你好", "你好"),
360
+ ("你好_世界", "你好_世界"),
361
+ ("你好_世界_你好", "你好_世界_你好"),
362
+ # Newlines, tabs, and other control characters are replaced
363
+ ("Hello\nworld", "Hello_world"),
364
+ ("Hello\tworld", "Hello_world"),
365
+ ("Hello\rworld", "Hello_world"),
366
+ ("Hello\fworld", "Hello_world"),
367
+ ("Hello\bworld", "Hello_world"),
368
+ ("Hello\vworld", "Hello_world"),
369
+ ("Hello\0world", "Hello_world"),
370
+ ("Hello\x00world", "Hello_world"),
371
+ ],
372
+ )
373
+ def test_string_to_valid_name(tmp_path, name, expected):
374
+ assert string_to_valid_name(name) == expected
336
375
 
337
- # Test invalid characters are replaced
338
- assert string_to_valid_name("Hello@World!") == "Hello_World"
339
- assert string_to_valid_name("File.name.txt") == "File_name_txt"
340
- assert string_to_valid_name("Special#$%Chars") == "Special_Chars"
376
+ # check we can create a folder with the valid name
377
+ dir_path = tmp_path / str(uuid.uuid4()) / expected
378
+ dir_path.mkdir(parents=True)
341
379
 
342
- # Test consecutive invalid characters
343
- assert string_to_valid_name("multiple!!!symbols") == "multiple_symbols"
344
- assert string_to_valid_name("path/to/file") == "path_to_file"
345
380
 
346
- # Test leading/trailing special characters
347
- assert string_to_valid_name("__test__") == "test"
348
- assert string_to_valid_name("...test...") == "test"
381
+ @pytest.mark.parametrize(
382
+ "name,min_length,max_length,should_pass",
383
+ [
384
+ # Valid cases
385
+ ("ValidName", 5, 20, True),
386
+ ("Short", 1, 10, True),
387
+ ("LongerValidName", 5, 20, True),
388
+ # None case (line 53)
389
+ (None, 5, 20, False),
390
+ # Too short cases (lines 57-59)
391
+ ("Hi", 5, 20, False),
392
+ ("", 1, 20, False),
393
+ ("a", 2, 20, False),
394
+ # Too long cases (lines 61-63)
395
+ ("ThisNameIsTooLong", 5, 10, False),
396
+ ("VeryVeryVeryLongName", 1, 15, False),
397
+ ],
398
+ )
399
+ def test_name_validator_error_conditions(name, min_length, max_length, should_pass):
400
+ validator = name_validator(min_length=min_length, max_length=max_length)
349
401
 
350
- # Test empty string and whitespace
351
- assert string_to_valid_name("") == ""
352
- assert string_to_valid_name(" ") == ""
402
+ if should_pass:
403
+ result = validator(name)
404
+ assert result == name
405
+ else:
406
+ with pytest.raises(ValueError):
407
+ validator(name)
353
408
 
354
409
 
355
410
  def test_load_from_file_with_cache(test_base_file, tmp_model_cache):
@@ -553,3 +608,140 @@ async def test_invoke_parsing_flow(adapter):
553
608
  match="Reasoning is required for this model, but no reasoning was returned.",
554
609
  ):
555
610
  await adapter.invoke("test input")
611
+
612
+
613
+ async def test_invoke_parsing_flow_basic_no_reasoning(adapter):
614
+ """Test for reasoning_optional_for_structured_output
615
+ when reasoning is not required.
616
+ This is a special case where we want to return the output as is.
617
+ """
618
+ # Mock dependencies
619
+ mock_provider = MagicMock()
620
+ mock_provider.parser = "test_parser"
621
+ mock_provider.formatter = None
622
+ mock_provider.reasoning_capable = False
623
+ mock_provider.reasoning_optional_for_structured_output = True
624
+
625
+ mock_parser = MagicMock()
626
+ mock_parser.parse_output.return_value = RunOutput(
627
+ output="parsed test output", intermediate_outputs={"key": "value"}
628
+ )
629
+
630
+ with (
631
+ patch.object(adapter, "model_provider", return_value=mock_provider),
632
+ patch(
633
+ "kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
634
+ return_value=mock_parser,
635
+ ),
636
+ patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
637
+ ):
638
+ # Disable autosaving for this test
639
+ mock_config.shared.return_value.autosave_runs = False
640
+ mock_config.shared.return_value.user_id = "test_user_id"
641
+
642
+ # Execute
643
+ result = await adapter.invoke("test input")
644
+
645
+ # Verify parsing occurred
646
+ mock_parser.parse_output.assert_called_once()
647
+ parsed_args = mock_parser.parse_output.call_args[1]
648
+ assert isinstance(parsed_args["original_output"], RunOutput)
649
+ assert parsed_args["original_output"].output == "test output"
650
+
651
+ # Verify result contains parsed output
652
+ assert isinstance(result, TaskRun)
653
+ assert result.output.output == "parsed test output"
654
+ assert result.intermediate_outputs == {"key": "value"}
655
+ assert result.input == "test input"
656
+
657
+
658
+ async def test_invoke_parsing_flow_no_reasoning_with_structured_output(adapter):
659
+ """Test for reasoning_optional_for_structured_output
660
+ when reasoning is required but not provided, with structured output enabled.
661
+ This is a special case where we don't want to error, but we want to return the output as is.
662
+ """
663
+ # Mock dependencies
664
+ mock_provider = MagicMock()
665
+ mock_provider.parser = "test_parser"
666
+ mock_provider.formatter = None
667
+ mock_provider.reasoning_capable = True
668
+ mock_provider.reasoning_optional_for_structured_output = True
669
+
670
+ mock_parser = MagicMock()
671
+ mock_parser.parse_output.return_value = RunOutput(
672
+ output="parsed test output", intermediate_outputs={"key": "value"}
673
+ )
674
+
675
+ with (
676
+ patch.object(adapter, "model_provider", return_value=mock_provider),
677
+ patch(
678
+ "kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
679
+ return_value=mock_parser,
680
+ ),
681
+ patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
682
+ patch.object(adapter, "has_structured_output", return_value=True),
683
+ ):
684
+ # Disable autosaving for this test
685
+ mock_config.shared.return_value.autosave_runs = False
686
+ mock_config.shared.return_value.user_id = "test_user_id"
687
+
688
+ # Execute
689
+ result = await adapter.invoke("test input")
690
+
691
+ # Verify parsing occurred
692
+ mock_parser.parse_output.assert_called_once()
693
+ parsed_args = mock_parser.parse_output.call_args[1]
694
+ assert isinstance(parsed_args["original_output"], RunOutput)
695
+ assert parsed_args["original_output"].output == "test output"
696
+
697
+ # Verify result contains parsed output
698
+ assert isinstance(result, TaskRun)
699
+ assert result.output.output == "parsed test output"
700
+ assert result.intermediate_outputs == {"key": "value"}
701
+ assert result.input == "test input"
702
+
703
+
704
+ async def test_invoke_parsing_flow_with_reasoning_and_structured_output(adapter):
705
+ """Test for reasoning_optional_for_structured_output
706
+ when reasoning is provided with structured output enabled.
707
+ This is a special case where we want to return the output as is.
708
+ """
709
+ # Mock dependencies
710
+ mock_provider = MagicMock()
711
+ mock_provider.parser = "test_parser"
712
+ mock_provider.formatter = None
713
+ mock_provider.reasoning_capable = True
714
+ mock_provider.reasoning_optional_for_structured_output = True
715
+
716
+ mock_parser = MagicMock()
717
+ mock_parser.parse_output.return_value = RunOutput(
718
+ output="parsed test output", intermediate_outputs={"reasoning": "value"}
719
+ )
720
+
721
+ with (
722
+ patch.object(adapter, "model_provider", return_value=mock_provider),
723
+ patch(
724
+ "kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
725
+ return_value=mock_parser,
726
+ ),
727
+ patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
728
+ patch.object(adapter, "has_structured_output", return_value=True),
729
+ ):
730
+ # Disable autosaving for this test
731
+ mock_config.shared.return_value.autosave_runs = False
732
+ mock_config.shared.return_value.user_id = "test_user_id"
733
+
734
+ # Execute
735
+ result = await adapter.invoke("test input")
736
+
737
+ # Verify parsing occurred
738
+ mock_parser.parse_output.assert_called_once()
739
+ parsed_args = mock_parser.parse_output.call_args[1]
740
+ assert isinstance(parsed_args["original_output"], RunOutput)
741
+ assert parsed_args["original_output"].output == "test output"
742
+
743
+ # Verify result contains parsed output
744
+ assert isinstance(result, TaskRun)
745
+ assert result.output.output == "parsed test output"
746
+ assert result.intermediate_outputs == {"reasoning": "value"}
747
+ assert result.input == "test input"