kiln-ai 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/adapter_registry.py +28 -0
- kiln_ai/adapters/chat/chat_formatter.py +0 -1
- kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
- kiln_ai/adapters/data_gen/data_gen_task.py +51 -38
- kiln_ai/adapters/data_gen/test_data_gen_task.py +318 -37
- kiln_ai/adapters/eval/base_eval.py +6 -7
- kiln_ai/adapters/eval/eval_runner.py +5 -1
- kiln_ai/adapters/eval/g_eval.py +17 -12
- kiln_ai/adapters/eval/test_base_eval.py +8 -2
- kiln_ai/adapters/eval/test_eval_runner.py +6 -12
- kiln_ai/adapters/eval/test_g_eval.py +115 -5
- kiln_ai/adapters/eval/test_g_eval_data.py +1 -1
- kiln_ai/adapters/fine_tune/base_finetune.py +2 -6
- kiln_ai/adapters/fine_tune/dataset_formatter.py +1 -5
- kiln_ai/adapters/fine_tune/fireworks_finetune.py +32 -20
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +1 -1
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +30 -21
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +2 -7
- kiln_ai/adapters/fine_tune/together_finetune.py +1 -1
- kiln_ai/adapters/ml_model_list.py +926 -125
- kiln_ai/adapters/model_adapters/base_adapter.py +11 -7
- kiln_ai/adapters/model_adapters/litellm_adapter.py +23 -1
- kiln_ai/adapters/model_adapters/test_base_adapter.py +1 -2
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +70 -3
- kiln_ai/adapters/model_adapters/test_structured_output.py +13 -13
- kiln_ai/adapters/parsers/parser_registry.py +0 -2
- kiln_ai/adapters/parsers/r1_parser.py +0 -1
- kiln_ai/adapters/parsers/test_r1_parser.py +1 -1
- kiln_ai/adapters/provider_tools.py +20 -19
- kiln_ai/adapters/remote_config.py +113 -0
- kiln_ai/adapters/repair/repair_task.py +2 -7
- kiln_ai/adapters/test_adapter_registry.py +30 -2
- kiln_ai/adapters/test_ml_model_list.py +30 -0
- kiln_ai/adapters/test_prompt_adaptors.py +0 -4
- kiln_ai/adapters/test_provider_tools.py +18 -12
- kiln_ai/adapters/test_remote_config.py +456 -0
- kiln_ai/datamodel/basemodel.py +54 -28
- kiln_ai/datamodel/datamodel_enums.py +2 -0
- kiln_ai/datamodel/dataset_split.py +5 -3
- kiln_ai/datamodel/eval.py +35 -3
- kiln_ai/datamodel/finetune.py +2 -3
- kiln_ai/datamodel/project.py +3 -3
- kiln_ai/datamodel/prompt.py +2 -2
- kiln_ai/datamodel/prompt_id.py +4 -4
- kiln_ai/datamodel/task.py +6 -6
- kiln_ai/datamodel/task_output.py +1 -3
- kiln_ai/datamodel/task_run.py +0 -2
- kiln_ai/datamodel/test_basemodel.py +210 -18
- kiln_ai/datamodel/test_eval_model.py +152 -10
- kiln_ai/datamodel/test_model_perf.py +1 -1
- kiln_ai/datamodel/test_prompt_id.py +5 -1
- kiln_ai/datamodel/test_task.py +5 -0
- kiln_ai/utils/config.py +10 -0
- kiln_ai/utils/logging.py +4 -3
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/METADATA +33 -3
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/RECORD +58 -56
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field, model_validator
|
|
10
10
|
|
|
11
|
-
from kiln_ai.datamodel.basemodel import
|
|
11
|
+
from kiln_ai.datamodel.basemodel import FilenameString, KilnParentedModel
|
|
12
12
|
from kiln_ai.datamodel.dataset_filters import (
|
|
13
13
|
DatasetFilter,
|
|
14
14
|
DatasetFilterId,
|
|
@@ -26,7 +26,9 @@ class DatasetSplitDefinition(BaseModel):
|
|
|
26
26
|
Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
-
name:
|
|
29
|
+
name: FilenameString = Field(
|
|
30
|
+
description="The name of the dataset split definition."
|
|
31
|
+
)
|
|
30
32
|
description: str | None = Field(
|
|
31
33
|
default=None,
|
|
32
34
|
description="A description of the dataset for you and your team. Not used in training.",
|
|
@@ -70,7 +72,7 @@ class DatasetSplit(KilnParentedModel):
|
|
|
70
72
|
Maintains a list of IDs for each split, to avoid data duplication.
|
|
71
73
|
"""
|
|
72
74
|
|
|
73
|
-
name:
|
|
75
|
+
name: FilenameString = Field(description="The name of the dataset split.")
|
|
74
76
|
description: str | None = Field(
|
|
75
77
|
default=None,
|
|
76
78
|
description="A description of the dataset for you and your team. Not used in training.",
|
kiln_ai/datamodel/eval.py
CHANGED
|
@@ -7,13 +7,14 @@ from typing_extensions import Self
|
|
|
7
7
|
|
|
8
8
|
from kiln_ai.datamodel.basemodel import (
|
|
9
9
|
ID_TYPE,
|
|
10
|
-
|
|
10
|
+
FilenameString,
|
|
11
11
|
KilnParentedModel,
|
|
12
12
|
KilnParentModel,
|
|
13
13
|
)
|
|
14
14
|
from kiln_ai.datamodel.datamodel_enums import TaskOutputRatingType
|
|
15
15
|
from kiln_ai.datamodel.dataset_filters import DatasetFilterId
|
|
16
16
|
from kiln_ai.datamodel.json_schema import string_to_json_key
|
|
17
|
+
from kiln_ai.datamodel.task_run import Usage
|
|
17
18
|
from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
@@ -28,6 +29,7 @@ class EvalTemplateId(str, Enum):
|
|
|
28
29
|
"""
|
|
29
30
|
|
|
30
31
|
kiln_requirements = "kiln_requirements"
|
|
32
|
+
issue = "kiln_issue"
|
|
31
33
|
toxicity = "toxicity"
|
|
32
34
|
bias = "bias"
|
|
33
35
|
maliciousness = "maliciousness"
|
|
@@ -110,6 +112,10 @@ class EvalRun(KilnParentedModel):
|
|
|
110
112
|
scores: EvalScores = Field(
|
|
111
113
|
description="The output scores of the evaluator (aligning to those required by the grand-parent Eval this object is a child of)."
|
|
112
114
|
)
|
|
115
|
+
task_run_usage: Usage | None = Field(
|
|
116
|
+
default=None,
|
|
117
|
+
description="The usage of the task run that produced this eval run output (not the usage by the evaluation model).",
|
|
118
|
+
)
|
|
113
119
|
|
|
114
120
|
def parent_eval_config(self) -> Union["EvalConfig", None]:
|
|
115
121
|
if self.parent is not None and self.parent.__class__.__name__ != "EvalConfig":
|
|
@@ -196,7 +202,7 @@ class EvalConfig(KilnParentedModel, KilnParentModel, parent_of={"runs": EvalRun}
|
|
|
196
202
|
A eval might have many configs, example running the same eval with 2 different models. Comparing eval results is only valid within the scope of the same config.
|
|
197
203
|
"""
|
|
198
204
|
|
|
199
|
-
name:
|
|
205
|
+
name: FilenameString = Field(description="The name of the eval config.")
|
|
200
206
|
model_name: str = Field(
|
|
201
207
|
description="The name of the model to use for this eval config. ",
|
|
202
208
|
)
|
|
@@ -251,7 +257,7 @@ class EvalConfig(KilnParentedModel, KilnParentModel, parent_of={"runs": EvalRun}
|
|
|
251
257
|
|
|
252
258
|
|
|
253
259
|
class Eval(KilnParentedModel, KilnParentModel, parent_of={"configs": EvalConfig}):
|
|
254
|
-
name:
|
|
260
|
+
name: FilenameString = Field(description="The name of the eval.")
|
|
255
261
|
description: str | None = Field(
|
|
256
262
|
default=None, description="The description of the eval"
|
|
257
263
|
)
|
|
@@ -280,6 +286,10 @@ class Eval(KilnParentedModel, KilnParentModel, parent_of={"configs": EvalConfig}
|
|
|
280
286
|
default=False,
|
|
281
287
|
description="Whether this eval is a favourite of the user. Rendered as a star icon in the UI.",
|
|
282
288
|
)
|
|
289
|
+
template_properties: dict[str, str | int | bool | float] = Field(
|
|
290
|
+
default={},
|
|
291
|
+
description="Properties to be used to execute the eval. This is template_type specific and should serialize to a json dict.",
|
|
292
|
+
)
|
|
283
293
|
|
|
284
294
|
# Workaround to return typed parent without importing Task
|
|
285
295
|
def parent_task(self) -> Union["Task", None]:
|
|
@@ -304,3 +314,25 @@ class Eval(KilnParentedModel, KilnParentModel, parent_of={"configs": EvalConfig}
|
|
|
304
314
|
f"output_scores must have unique names (once transformed to JSON keys). Got: [{', '.join(output_score_keys)}]"
|
|
305
315
|
)
|
|
306
316
|
return self
|
|
317
|
+
|
|
318
|
+
@model_validator(mode="after")
|
|
319
|
+
def validate_template_properties(self) -> Self:
|
|
320
|
+
# Check for properties that are required for the issue template
|
|
321
|
+
if self.template == EvalTemplateId.issue:
|
|
322
|
+
if "issue_prompt" not in self.template_properties or not isinstance(
|
|
323
|
+
self.template_properties["issue_prompt"], str
|
|
324
|
+
):
|
|
325
|
+
raise ValueError("issue_prompt is required for issue template")
|
|
326
|
+
if "failure_example" in self.template_properties and not isinstance(
|
|
327
|
+
self.template_properties["failure_example"], str
|
|
328
|
+
):
|
|
329
|
+
raise ValueError(
|
|
330
|
+
"failure_example is optional for issue template, but if provided must be a string"
|
|
331
|
+
)
|
|
332
|
+
if "pass_example" in self.template_properties and not isinstance(
|
|
333
|
+
self.template_properties["pass_example"], str
|
|
334
|
+
):
|
|
335
|
+
raise ValueError(
|
|
336
|
+
"pass_example is optional for issue template, but if provided must be a string"
|
|
337
|
+
)
|
|
338
|
+
return self
|
kiln_ai/datamodel/finetune.py
CHANGED
|
@@ -3,9 +3,8 @@ from typing import TYPE_CHECKING, Dict, Union
|
|
|
3
3
|
from pydantic import Field, model_validator
|
|
4
4
|
from typing_extensions import Self
|
|
5
5
|
|
|
6
|
-
from kiln_ai.datamodel.basemodel import
|
|
6
|
+
from kiln_ai.datamodel.basemodel import FilenameString, KilnParentedModel
|
|
7
7
|
from kiln_ai.datamodel.datamodel_enums import (
|
|
8
|
-
THINKING_DATA_STRATEGIES,
|
|
9
8
|
ChatStrategy,
|
|
10
9
|
FineTuneStatusType,
|
|
11
10
|
StructuredOutputMode,
|
|
@@ -27,7 +26,7 @@ class Finetune(KilnParentedModel):
|
|
|
27
26
|
Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.
|
|
28
27
|
"""
|
|
29
28
|
|
|
30
|
-
name:
|
|
29
|
+
name: FilenameString = Field(description="The name of the fine-tune.")
|
|
31
30
|
description: str | None = Field(
|
|
32
31
|
default=None,
|
|
33
32
|
description="A description of the fine-tune for you and your team. Not used in training.",
|
kiln_ai/datamodel/project.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pydantic import Field
|
|
2
2
|
|
|
3
|
-
from kiln_ai.datamodel.basemodel import
|
|
3
|
+
from kiln_ai.datamodel.basemodel import FilenameString, KilnParentModel
|
|
4
4
|
from kiln_ai.datamodel.task import Task
|
|
5
5
|
|
|
6
6
|
|
|
@@ -12,12 +12,12 @@ class Project(KilnParentModel, parent_of={"tasks": Task}):
|
|
|
12
12
|
of the overall goals.
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
name:
|
|
15
|
+
name: FilenameString = Field(description="The name of the project.")
|
|
16
16
|
description: str | None = Field(
|
|
17
17
|
default=None,
|
|
18
18
|
description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
|
|
19
19
|
)
|
|
20
20
|
|
|
21
|
-
# Needed for typechecking.
|
|
21
|
+
# Needed for typechecking. We should fix this in KilnParentModel
|
|
22
22
|
def tasks(self) -> list[Task]:
|
|
23
23
|
return super().tasks() # type: ignore
|
kiln_ai/datamodel/prompt.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pydantic import BaseModel, Field
|
|
2
2
|
|
|
3
|
-
from kiln_ai.datamodel.basemodel import
|
|
3
|
+
from kiln_ai.datamodel.basemodel import FilenameString, KilnParentedModel
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class BasePrompt(BaseModel):
|
|
@@ -10,7 +10,7 @@ class BasePrompt(BaseModel):
|
|
|
10
10
|
The "Prompt" model name is reserved for the custom prompts parented by a task.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
name:
|
|
13
|
+
name: FilenameString = Field(description="The name of the prompt.")
|
|
14
14
|
description: str | None = Field(
|
|
15
15
|
default=None,
|
|
16
16
|
description="A more detailed description of the prompt.",
|
kiln_ai/datamodel/prompt_id.py
CHANGED
|
@@ -60,11 +60,11 @@ def _check_prompt_id(id: str) -> str:
|
|
|
60
60
|
return id
|
|
61
61
|
|
|
62
62
|
if id.startswith("fine_tune_prompt::"):
|
|
63
|
-
# check it had a fine_tune_id after the :: -- 'fine_tune_prompt::fine_tune_id'
|
|
64
|
-
|
|
65
|
-
if len(
|
|
63
|
+
# check it had a fine_tune_id after the :: -- 'fine_tune_prompt::[project_id]::[task_id]::fine_tune_id'
|
|
64
|
+
parts = id.split("::")
|
|
65
|
+
if len(parts) != 4 or len(parts[3]) == 0:
|
|
66
66
|
raise ValueError(
|
|
67
|
-
f"Invalid fine-tune prompt ID: {id}. Expected format: 'fine_tune_prompt::[fine_tune_id]'."
|
|
67
|
+
f"Invalid fine-tune prompt ID: {id}. Expected format: 'fine_tune_prompt::[project_id]::[task_id]::[fine_tune_id]'."
|
|
68
68
|
)
|
|
69
69
|
return id
|
|
70
70
|
|
kiln_ai/datamodel/task.py
CHANGED
|
@@ -7,8 +7,8 @@ from kiln_ai.datamodel import Finetune
|
|
|
7
7
|
from kiln_ai.datamodel.basemodel import (
|
|
8
8
|
ID_FIELD,
|
|
9
9
|
ID_TYPE,
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
FilenameString,
|
|
11
|
+
FilenameStringShort,
|
|
12
12
|
KilnParentedModel,
|
|
13
13
|
KilnParentModel,
|
|
14
14
|
)
|
|
@@ -38,7 +38,7 @@ class TaskRequirement(BaseModel):
|
|
|
38
38
|
"""
|
|
39
39
|
|
|
40
40
|
id: ID_TYPE = ID_FIELD
|
|
41
|
-
name:
|
|
41
|
+
name: FilenameStringShort = Field(description="The name of the task requirement.")
|
|
42
42
|
description: str | None = Field(default=None)
|
|
43
43
|
instruction: str = Field(min_length=1)
|
|
44
44
|
priority: Priority = Field(default=Priority.p2)
|
|
@@ -103,7 +103,7 @@ class TaskRunConfig(KilnParentedModel):
|
|
|
103
103
|
A run config includes everything needed to run a task, except the input. Running the same RunConfig with the same input should make identical calls to the model (output may vary as models are non-deterministic).
|
|
104
104
|
"""
|
|
105
105
|
|
|
106
|
-
name:
|
|
106
|
+
name: FilenameString = Field(description="The name of the task run config.")
|
|
107
107
|
description: str | None = Field(
|
|
108
108
|
default=None, description="The description of the task run config."
|
|
109
109
|
)
|
|
@@ -189,7 +189,7 @@ class Task(
|
|
|
189
189
|
a collection of task runs.
|
|
190
190
|
"""
|
|
191
191
|
|
|
192
|
-
name:
|
|
192
|
+
name: FilenameString = Field(description="The name of the task.")
|
|
193
193
|
description: str | None = Field(
|
|
194
194
|
default=None,
|
|
195
195
|
description="A description of the task for you and your team. Will not be used in prompts/training/validation.",
|
|
@@ -216,7 +216,7 @@ class Task(
|
|
|
216
216
|
return None
|
|
217
217
|
return schema_from_json_str(self.input_json_schema)
|
|
218
218
|
|
|
219
|
-
# These wrappers help for typechecking.
|
|
219
|
+
# These wrappers help for typechecking. We should fix this in KilnParentModel
|
|
220
220
|
def runs(self, readonly: bool = False) -> list[TaskRun]:
|
|
221
221
|
return super().runs(readonly=readonly) # type: ignore
|
|
222
222
|
|
kiln_ai/datamodel/task_output.py
CHANGED
|
@@ -2,8 +2,6 @@ import json
|
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from typing import TYPE_CHECKING, Dict, List, Type, Union
|
|
4
4
|
|
|
5
|
-
import jsonschema
|
|
6
|
-
import jsonschema.exceptions
|
|
7
5
|
from pydantic import BaseModel, Field, ValidationInfo, model_validator
|
|
8
6
|
from typing_extensions import Self
|
|
9
7
|
|
|
@@ -309,7 +307,7 @@ class TaskOutput(KilnBaseModel):
|
|
|
309
307
|
if task.output_json_schema is not None:
|
|
310
308
|
try:
|
|
311
309
|
output_parsed = json.loads(self.output)
|
|
312
|
-
except json.JSONDecodeError
|
|
310
|
+
except json.JSONDecodeError:
|
|
313
311
|
raise ValueError("Output is not a valid JSON object")
|
|
314
312
|
|
|
315
313
|
validate_schema_with_value_error(
|
kiln_ai/datamodel/task_run.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import json
|
|
3
|
+
import uuid
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Optional
|
|
5
6
|
from unittest.mock import MagicMock, patch
|
|
@@ -12,6 +13,7 @@ from kiln_ai.datamodel import Task, TaskRun
|
|
|
12
13
|
from kiln_ai.datamodel.basemodel import (
|
|
13
14
|
KilnBaseModel,
|
|
14
15
|
KilnParentedModel,
|
|
16
|
+
name_validator,
|
|
15
17
|
string_to_valid_name,
|
|
16
18
|
)
|
|
17
19
|
from kiln_ai.datamodel.model_cache import ModelCache
|
|
@@ -328,28 +330,81 @@ def test_delete_no_path():
|
|
|
328
330
|
model.delete()
|
|
329
331
|
|
|
330
332
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
333
|
+
@pytest.mark.parametrize(
|
|
334
|
+
"name,expected",
|
|
335
|
+
[
|
|
336
|
+
# Basic valid strings remain unchanged
|
|
337
|
+
("Hello World", "Hello World"),
|
|
338
|
+
("Test-123", "Test-123"),
|
|
339
|
+
("my_file_name", "my_file_name"),
|
|
340
|
+
("multiple!!!symbols", "multiple!!!symbols"),
|
|
341
|
+
# Emoji
|
|
342
|
+
("Hello 👍", "Hello 👍"),
|
|
343
|
+
# Invalid characters are replaced
|
|
344
|
+
("Hello@World!", "Hello@World!"),
|
|
345
|
+
("File.name.txt", "File_name_txt"),
|
|
346
|
+
("Special%%%Chars", "Special_Chars"),
|
|
347
|
+
("Special#$%Chars", "Special#$_Chars"),
|
|
348
|
+
# Consecutive invalid characters are replaced
|
|
349
|
+
("Special%%%Chars", "Special_Chars"),
|
|
350
|
+
("path/to/file", "path_to_file"),
|
|
351
|
+
# Leading/trailing special characters are removed
|
|
352
|
+
("__test__", "test"),
|
|
353
|
+
("...test...", "test"),
|
|
354
|
+
# Whitespace is replaced
|
|
355
|
+
("", ""),
|
|
356
|
+
(" ", ""),
|
|
357
|
+
("Hello World", "Hello World"),
|
|
358
|
+
# Unicode characters are replaced
|
|
359
|
+
("你好", "你好"),
|
|
360
|
+
("你好_世界", "你好_世界"),
|
|
361
|
+
("你好_世界_你好", "你好_世界_你好"),
|
|
362
|
+
# Newlines, tabs, and other control characters are replaced
|
|
363
|
+
("Hello\nworld", "Hello_world"),
|
|
364
|
+
("Hello\tworld", "Hello_world"),
|
|
365
|
+
("Hello\rworld", "Hello_world"),
|
|
366
|
+
("Hello\fworld", "Hello_world"),
|
|
367
|
+
("Hello\bworld", "Hello_world"),
|
|
368
|
+
("Hello\vworld", "Hello_world"),
|
|
369
|
+
("Hello\0world", "Hello_world"),
|
|
370
|
+
("Hello\x00world", "Hello_world"),
|
|
371
|
+
],
|
|
372
|
+
)
|
|
373
|
+
def test_string_to_valid_name(tmp_path, name, expected):
|
|
374
|
+
assert string_to_valid_name(name) == expected
|
|
336
375
|
|
|
337
|
-
#
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
assert string_to_valid_name("Special#$%Chars") == "Special_Chars"
|
|
376
|
+
# check we can create a folder with the valid name
|
|
377
|
+
dir_path = tmp_path / str(uuid.uuid4()) / expected
|
|
378
|
+
dir_path.mkdir(parents=True)
|
|
341
379
|
|
|
342
|
-
# Test consecutive invalid characters
|
|
343
|
-
assert string_to_valid_name("multiple!!!symbols") == "multiple_symbols"
|
|
344
|
-
assert string_to_valid_name("path/to/file") == "path_to_file"
|
|
345
380
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
381
|
+
@pytest.mark.parametrize(
|
|
382
|
+
"name,min_length,max_length,should_pass",
|
|
383
|
+
[
|
|
384
|
+
# Valid cases
|
|
385
|
+
("ValidName", 5, 20, True),
|
|
386
|
+
("Short", 1, 10, True),
|
|
387
|
+
("LongerValidName", 5, 20, True),
|
|
388
|
+
# None case (line 53)
|
|
389
|
+
(None, 5, 20, False),
|
|
390
|
+
# Too short cases (lines 57-59)
|
|
391
|
+
("Hi", 5, 20, False),
|
|
392
|
+
("", 1, 20, False),
|
|
393
|
+
("a", 2, 20, False),
|
|
394
|
+
# Too long cases (lines 61-63)
|
|
395
|
+
("ThisNameIsTooLong", 5, 10, False),
|
|
396
|
+
("VeryVeryVeryLongName", 1, 15, False),
|
|
397
|
+
],
|
|
398
|
+
)
|
|
399
|
+
def test_name_validator_error_conditions(name, min_length, max_length, should_pass):
|
|
400
|
+
validator = name_validator(min_length=min_length, max_length=max_length)
|
|
349
401
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
402
|
+
if should_pass:
|
|
403
|
+
result = validator(name)
|
|
404
|
+
assert result == name
|
|
405
|
+
else:
|
|
406
|
+
with pytest.raises(ValueError):
|
|
407
|
+
validator(name)
|
|
353
408
|
|
|
354
409
|
|
|
355
410
|
def test_load_from_file_with_cache(test_base_file, tmp_model_cache):
|
|
@@ -553,3 +608,140 @@ async def test_invoke_parsing_flow(adapter):
|
|
|
553
608
|
match="Reasoning is required for this model, but no reasoning was returned.",
|
|
554
609
|
):
|
|
555
610
|
await adapter.invoke("test input")
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
async def test_invoke_parsing_flow_basic_no_reasoning(adapter):
|
|
614
|
+
"""Test for reasoning_optional_for_structured_output
|
|
615
|
+
when reasoning is not required.
|
|
616
|
+
This is a special case where we want to return the output as is.
|
|
617
|
+
"""
|
|
618
|
+
# Mock dependencies
|
|
619
|
+
mock_provider = MagicMock()
|
|
620
|
+
mock_provider.parser = "test_parser"
|
|
621
|
+
mock_provider.formatter = None
|
|
622
|
+
mock_provider.reasoning_capable = False
|
|
623
|
+
mock_provider.reasoning_optional_for_structured_output = True
|
|
624
|
+
|
|
625
|
+
mock_parser = MagicMock()
|
|
626
|
+
mock_parser.parse_output.return_value = RunOutput(
|
|
627
|
+
output="parsed test output", intermediate_outputs={"key": "value"}
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
with (
|
|
631
|
+
patch.object(adapter, "model_provider", return_value=mock_provider),
|
|
632
|
+
patch(
|
|
633
|
+
"kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
|
|
634
|
+
return_value=mock_parser,
|
|
635
|
+
),
|
|
636
|
+
patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
|
|
637
|
+
):
|
|
638
|
+
# Disable autosaving for this test
|
|
639
|
+
mock_config.shared.return_value.autosave_runs = False
|
|
640
|
+
mock_config.shared.return_value.user_id = "test_user_id"
|
|
641
|
+
|
|
642
|
+
# Execute
|
|
643
|
+
result = await adapter.invoke("test input")
|
|
644
|
+
|
|
645
|
+
# Verify parsing occurred
|
|
646
|
+
mock_parser.parse_output.assert_called_once()
|
|
647
|
+
parsed_args = mock_parser.parse_output.call_args[1]
|
|
648
|
+
assert isinstance(parsed_args["original_output"], RunOutput)
|
|
649
|
+
assert parsed_args["original_output"].output == "test output"
|
|
650
|
+
|
|
651
|
+
# Verify result contains parsed output
|
|
652
|
+
assert isinstance(result, TaskRun)
|
|
653
|
+
assert result.output.output == "parsed test output"
|
|
654
|
+
assert result.intermediate_outputs == {"key": "value"}
|
|
655
|
+
assert result.input == "test input"
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
async def test_invoke_parsing_flow_no_reasoning_with_structured_output(adapter):
|
|
659
|
+
"""Test for reasoning_optional_for_structured_output
|
|
660
|
+
when reasoning is required but not provided, with structured output enabled.
|
|
661
|
+
This is a special case where we don't want to error, but we want to return the output as is.
|
|
662
|
+
"""
|
|
663
|
+
# Mock dependencies
|
|
664
|
+
mock_provider = MagicMock()
|
|
665
|
+
mock_provider.parser = "test_parser"
|
|
666
|
+
mock_provider.formatter = None
|
|
667
|
+
mock_provider.reasoning_capable = True
|
|
668
|
+
mock_provider.reasoning_optional_for_structured_output = True
|
|
669
|
+
|
|
670
|
+
mock_parser = MagicMock()
|
|
671
|
+
mock_parser.parse_output.return_value = RunOutput(
|
|
672
|
+
output="parsed test output", intermediate_outputs={"key": "value"}
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
with (
|
|
676
|
+
patch.object(adapter, "model_provider", return_value=mock_provider),
|
|
677
|
+
patch(
|
|
678
|
+
"kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
|
|
679
|
+
return_value=mock_parser,
|
|
680
|
+
),
|
|
681
|
+
patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
|
|
682
|
+
patch.object(adapter, "has_structured_output", return_value=True),
|
|
683
|
+
):
|
|
684
|
+
# Disable autosaving for this test
|
|
685
|
+
mock_config.shared.return_value.autosave_runs = False
|
|
686
|
+
mock_config.shared.return_value.user_id = "test_user_id"
|
|
687
|
+
|
|
688
|
+
# Execute
|
|
689
|
+
result = await adapter.invoke("test input")
|
|
690
|
+
|
|
691
|
+
# Verify parsing occurred
|
|
692
|
+
mock_parser.parse_output.assert_called_once()
|
|
693
|
+
parsed_args = mock_parser.parse_output.call_args[1]
|
|
694
|
+
assert isinstance(parsed_args["original_output"], RunOutput)
|
|
695
|
+
assert parsed_args["original_output"].output == "test output"
|
|
696
|
+
|
|
697
|
+
# Verify result contains parsed output
|
|
698
|
+
assert isinstance(result, TaskRun)
|
|
699
|
+
assert result.output.output == "parsed test output"
|
|
700
|
+
assert result.intermediate_outputs == {"key": "value"}
|
|
701
|
+
assert result.input == "test input"
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
async def test_invoke_parsing_flow_with_reasoning_and_structured_output(adapter):
|
|
705
|
+
"""Test for reasoning_optional_for_structured_output
|
|
706
|
+
when reasoning is provided with structured output enabled.
|
|
707
|
+
This is a special case where we want to return the output as is.
|
|
708
|
+
"""
|
|
709
|
+
# Mock dependencies
|
|
710
|
+
mock_provider = MagicMock()
|
|
711
|
+
mock_provider.parser = "test_parser"
|
|
712
|
+
mock_provider.formatter = None
|
|
713
|
+
mock_provider.reasoning_capable = True
|
|
714
|
+
mock_provider.reasoning_optional_for_structured_output = True
|
|
715
|
+
|
|
716
|
+
mock_parser = MagicMock()
|
|
717
|
+
mock_parser.parse_output.return_value = RunOutput(
|
|
718
|
+
output="parsed test output", intermediate_outputs={"reasoning": "value"}
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
with (
|
|
722
|
+
patch.object(adapter, "model_provider", return_value=mock_provider),
|
|
723
|
+
patch(
|
|
724
|
+
"kiln_ai.adapters.model_adapters.base_adapter.model_parser_from_id",
|
|
725
|
+
return_value=mock_parser,
|
|
726
|
+
),
|
|
727
|
+
patch("kiln_ai.adapters.model_adapters.base_adapter.Config") as mock_config,
|
|
728
|
+
patch.object(adapter, "has_structured_output", return_value=True),
|
|
729
|
+
):
|
|
730
|
+
# Disable autosaving for this test
|
|
731
|
+
mock_config.shared.return_value.autosave_runs = False
|
|
732
|
+
mock_config.shared.return_value.user_id = "test_user_id"
|
|
733
|
+
|
|
734
|
+
# Execute
|
|
735
|
+
result = await adapter.invoke("test input")
|
|
736
|
+
|
|
737
|
+
# Verify parsing occurred
|
|
738
|
+
mock_parser.parse_output.assert_called_once()
|
|
739
|
+
parsed_args = mock_parser.parse_output.call_args[1]
|
|
740
|
+
assert isinstance(parsed_args["original_output"], RunOutput)
|
|
741
|
+
assert parsed_args["original_output"].output == "test output"
|
|
742
|
+
|
|
743
|
+
# Verify result contains parsed output
|
|
744
|
+
assert isinstance(result, TaskRun)
|
|
745
|
+
assert result.output.output == "parsed test output"
|
|
746
|
+
assert result.intermediate_outputs == {"reasoning": "value"}
|
|
747
|
+
assert result.input == "test input"
|