kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +8 -2
- kiln_ai/adapters/adapter_registry.py +43 -208
- kiln_ai/adapters/chat/chat_formatter.py +8 -12
- kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
- kiln_ai/adapters/chunkers/__init__.py +13 -0
- kiln_ai/adapters/chunkers/base_chunker.py +42 -0
- kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
- kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
- kiln_ai/adapters/chunkers/helpers.py +23 -0
- kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
- kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
- kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
- kiln_ai/adapters/chunkers/test_helpers.py +75 -0
- kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
- kiln_ai/adapters/docker_model_runner_tools.py +119 -0
- kiln_ai/adapters/embedding/__init__.py +0 -0
- kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
- kiln_ai/adapters/embedding/embedding_registry.py +32 -0
- kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
- kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
- kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
- kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
- kiln_ai/adapters/eval/base_eval.py +2 -2
- kiln_ai/adapters/eval/eval_runner.py +9 -3
- kiln_ai/adapters/eval/g_eval.py +2 -2
- kiln_ai/adapters/eval/test_base_eval.py +2 -4
- kiln_ai/adapters/eval/test_g_eval.py +4 -5
- kiln_ai/adapters/extractors/__init__.py +18 -0
- kiln_ai/adapters/extractors/base_extractor.py +72 -0
- kiln_ai/adapters/extractors/encoding.py +20 -0
- kiln_ai/adapters/extractors/extractor_registry.py +44 -0
- kiln_ai/adapters/extractors/extractor_runner.py +112 -0
- kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
- kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
- kiln_ai/adapters/extractors/test_encoding.py +54 -0
- kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
- kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
- kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
- kiln_ai/adapters/fine_tune/__init__.py +1 -1
- kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
- kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
- kiln_ai/adapters/ml_embedding_model_list.py +192 -0
- kiln_ai/adapters/ml_model_list.py +761 -37
- kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
- kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
- kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
- kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
- kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
- kiln_ai/adapters/ollama_tools.py +69 -12
- kiln_ai/adapters/parsers/__init__.py +1 -1
- kiln_ai/adapters/provider_tools.py +205 -47
- kiln_ai/adapters/rag/deduplication.py +49 -0
- kiln_ai/adapters/rag/progress.py +252 -0
- kiln_ai/adapters/rag/rag_runners.py +844 -0
- kiln_ai/adapters/rag/test_deduplication.py +195 -0
- kiln_ai/adapters/rag/test_progress.py +785 -0
- kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
- kiln_ai/adapters/remote_config.py +80 -8
- kiln_ai/adapters/repair/test_repair_task.py +12 -9
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +657 -85
- kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
- kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
- kiln_ai/adapters/test_ml_model_list.py +251 -1
- kiln_ai/adapters/test_ollama_tools.py +340 -1
- kiln_ai/adapters/test_prompt_adaptors.py +13 -6
- kiln_ai/adapters/test_prompt_builders.py +1 -1
- kiln_ai/adapters/test_provider_tools.py +254 -8
- kiln_ai/adapters/test_remote_config.py +651 -58
- kiln_ai/adapters/vector_store/__init__.py +1 -0
- kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
- kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
- kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
- kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
- kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
- kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
- kiln_ai/datamodel/__init__.py +39 -34
- kiln_ai/datamodel/basemodel.py +170 -1
- kiln_ai/datamodel/chunk.py +158 -0
- kiln_ai/datamodel/datamodel_enums.py +28 -0
- kiln_ai/datamodel/embedding.py +64 -0
- kiln_ai/datamodel/eval.py +1 -1
- kiln_ai/datamodel/external_tool_server.py +298 -0
- kiln_ai/datamodel/extraction.py +303 -0
- kiln_ai/datamodel/json_schema.py +25 -10
- kiln_ai/datamodel/project.py +40 -1
- kiln_ai/datamodel/rag.py +79 -0
- kiln_ai/datamodel/registry.py +0 -15
- kiln_ai/datamodel/run_config.py +62 -0
- kiln_ai/datamodel/task.py +2 -77
- kiln_ai/datamodel/task_output.py +6 -1
- kiln_ai/datamodel/task_run.py +41 -0
- kiln_ai/datamodel/test_attachment.py +649 -0
- kiln_ai/datamodel/test_basemodel.py +4 -4
- kiln_ai/datamodel/test_chunk_models.py +317 -0
- kiln_ai/datamodel/test_dataset_split.py +1 -1
- kiln_ai/datamodel/test_embedding_models.py +448 -0
- kiln_ai/datamodel/test_eval_model.py +6 -6
- kiln_ai/datamodel/test_example_models.py +175 -0
- kiln_ai/datamodel/test_external_tool_server.py +691 -0
- kiln_ai/datamodel/test_extraction_chunk.py +206 -0
- kiln_ai/datamodel/test_extraction_model.py +470 -0
- kiln_ai/datamodel/test_rag.py +641 -0
- kiln_ai/datamodel/test_registry.py +8 -3
- kiln_ai/datamodel/test_task.py +15 -47
- kiln_ai/datamodel/test_tool_id.py +320 -0
- kiln_ai/datamodel/test_vector_store.py +320 -0
- kiln_ai/datamodel/tool_id.py +105 -0
- kiln_ai/datamodel/vector_store.py +141 -0
- kiln_ai/tools/__init__.py +8 -0
- kiln_ai/tools/base_tool.py +82 -0
- kiln_ai/tools/built_in_tools/__init__.py +13 -0
- kiln_ai/tools/built_in_tools/math_tools.py +124 -0
- kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
- kiln_ai/tools/mcp_server_tool.py +95 -0
- kiln_ai/tools/mcp_session_manager.py +246 -0
- kiln_ai/tools/rag_tools.py +157 -0
- kiln_ai/tools/test_base_tools.py +199 -0
- kiln_ai/tools/test_mcp_server_tool.py +457 -0
- kiln_ai/tools/test_mcp_session_manager.py +1585 -0
- kiln_ai/tools/test_rag_tools.py +848 -0
- kiln_ai/tools/test_tool_registry.py +562 -0
- kiln_ai/tools/tool_registry.py +85 -0
- kiln_ai/utils/__init__.py +3 -0
- kiln_ai/utils/async_job_runner.py +62 -17
- kiln_ai/utils/config.py +24 -2
- kiln_ai/utils/env.py +15 -0
- kiln_ai/utils/filesystem.py +14 -0
- kiln_ai/utils/filesystem_cache.py +60 -0
- kiln_ai/utils/litellm.py +94 -0
- kiln_ai/utils/lock.py +100 -0
- kiln_ai/utils/mime_type.py +38 -0
- kiln_ai/utils/open_ai_types.py +94 -0
- kiln_ai/utils/pdf_utils.py +38 -0
- kiln_ai/utils/project_utils.py +17 -0
- kiln_ai/utils/test_async_job_runner.py +151 -35
- kiln_ai/utils/test_config.py +138 -1
- kiln_ai/utils/test_env.py +142 -0
- kiln_ai/utils/test_filesystem_cache.py +316 -0
- kiln_ai/utils/test_litellm.py +206 -0
- kiln_ai/utils/test_lock.py +185 -0
- kiln_ai/utils/test_mime_type.py +66 -0
- kiln_ai/utils/test_open_ai_types.py +131 -0
- kiln_ai/utils/test_pdf_utils.py +73 -0
- kiln_ai/utils/test_uuid.py +111 -0
- kiln_ai/utils/test_validation.py +524 -0
- kiln_ai/utils/uuid.py +9 -0
- kiln_ai/utils/validation.py +90 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
- kiln_ai-0.21.0.dist-info/RECORD +211 -0
- kiln_ai-0.19.0.dist-info/RECORD +0 -115
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,649 @@
|
|
|
1
|
+
import filecmp
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Optional
|
|
7
|
+
from unittest.mock import patch
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
from pydantic import BaseModel, Field, SerializationInfo, field_serializer
|
|
11
|
+
|
|
12
|
+
from conftest import MockFileFactoryMimeType
|
|
13
|
+
from kiln_ai.datamodel.basemodel import KilnAttachmentModel, KilnBaseModel
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ModelWithAttachment(KilnBaseModel):
|
|
17
|
+
attachment: KilnAttachmentModel = Field(default=None)
|
|
18
|
+
attachment_list: Optional[List[KilnAttachmentModel]] = Field(default=None)
|
|
19
|
+
attachment_dict: Optional[Dict[str, KilnAttachmentModel]] = Field(default=None)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ContainerModel(BaseModel):
|
|
23
|
+
indirect_attachment: Optional[KilnAttachmentModel] = Field(default=None)
|
|
24
|
+
indirect_attachment_list: Optional[List[KilnAttachmentModel]] = Field(default=None)
|
|
25
|
+
indirect_attachment_dict: Optional[Dict[str, KilnAttachmentModel]] = Field(
|
|
26
|
+
default=None
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ModelWithIndirectAttachment(KilnBaseModel):
|
|
31
|
+
# this nested model contains an attachment field
|
|
32
|
+
container: ContainerModel = Field(default=ContainerModel())
|
|
33
|
+
container_optional: Optional[ContainerModel] = Field(default=None)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def hash_file(p: Path) -> str:
|
|
37
|
+
return hashlib.md5(p.read_bytes()).hexdigest()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.fixture
|
|
41
|
+
def test_base_kiln_file(tmp_path) -> Path:
|
|
42
|
+
test_file_path = tmp_path / "test_model.json"
|
|
43
|
+
data = {"v": 1, "model_type": "kiln_base_model"}
|
|
44
|
+
|
|
45
|
+
with open(test_file_path, "w") as file:
|
|
46
|
+
json.dump(data, file, indent=4)
|
|
47
|
+
|
|
48
|
+
return test_file_path
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_save_to_file_with_attachment_single(test_base_kiln_file, mock_file_factory):
|
|
52
|
+
test_file = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
53
|
+
model = ModelWithAttachment(
|
|
54
|
+
path=test_base_kiln_file,
|
|
55
|
+
attachment=KilnAttachmentModel.from_file(test_file),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
assert model.attachment.path is None
|
|
59
|
+
|
|
60
|
+
model.save_to_file()
|
|
61
|
+
|
|
62
|
+
assert model.attachment.path is not None
|
|
63
|
+
|
|
64
|
+
with open(test_base_kiln_file, "r") as file:
|
|
65
|
+
data = json.load(file)
|
|
66
|
+
|
|
67
|
+
# the path after saving
|
|
68
|
+
attachment_path = data["attachment"]["path"]
|
|
69
|
+
|
|
70
|
+
# check it is a string, and not an absolute path
|
|
71
|
+
assert isinstance(attachment_path, str)
|
|
72
|
+
assert not Path(attachment_path).is_absolute()
|
|
73
|
+
|
|
74
|
+
# check persisted path is relative to model.path.parent
|
|
75
|
+
assert model.path is not None
|
|
76
|
+
expected_full_path = model.path.parent / attachment_path
|
|
77
|
+
assert expected_full_path.exists()
|
|
78
|
+
assert filecmp.cmp(expected_full_path, test_file)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_save_to_file_with_attachment_list(test_base_kiln_file, mock_file_factory):
|
|
82
|
+
media_file_paths = [
|
|
83
|
+
mock_file_factory(MockFileFactoryMimeType.PDF),
|
|
84
|
+
mock_file_factory(MockFileFactoryMimeType.PNG),
|
|
85
|
+
mock_file_factory(MockFileFactoryMimeType.MP4),
|
|
86
|
+
mock_file_factory(MockFileFactoryMimeType.OGG),
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
# we map hashes to their files, so we can find the corresponding file after the save
|
|
90
|
+
media_file_hashes = {hash_file(p): p for p in media_file_paths}
|
|
91
|
+
|
|
92
|
+
model = ModelWithAttachment(
|
|
93
|
+
path=test_base_kiln_file,
|
|
94
|
+
attachment_list=[KilnAttachmentModel.from_file(p) for p in media_file_paths],
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
for attachment in model.attachment_list:
|
|
98
|
+
assert attachment.path is None
|
|
99
|
+
|
|
100
|
+
model.save_to_file()
|
|
101
|
+
|
|
102
|
+
for attachment in model.attachment_list:
|
|
103
|
+
assert attachment.path is not None
|
|
104
|
+
|
|
105
|
+
with open(test_base_kiln_file, "r") as file:
|
|
106
|
+
data = json.load(file)
|
|
107
|
+
|
|
108
|
+
# check the paths are relative to model.path.parent
|
|
109
|
+
for attachment in data["attachment_list"]:
|
|
110
|
+
attachment_path = attachment["path"]
|
|
111
|
+
assert isinstance(attachment_path, str)
|
|
112
|
+
assert not Path(attachment_path).is_absolute()
|
|
113
|
+
|
|
114
|
+
# check all the files were persisted
|
|
115
|
+
attachment_list = data["attachment_list"]
|
|
116
|
+
assert len(attachment_list) == len(media_file_paths)
|
|
117
|
+
|
|
118
|
+
# check the files are present and correct in model.path.parent
|
|
119
|
+
for attachment in attachment_list:
|
|
120
|
+
attachment_path = attachment["path"]
|
|
121
|
+
# check the path is a string, and not an absolute path
|
|
122
|
+
assert isinstance(attachment_path, str)
|
|
123
|
+
assert not Path(attachment_path).is_absolute()
|
|
124
|
+
|
|
125
|
+
# check the file is the same as the original
|
|
126
|
+
assert model.path is not None
|
|
127
|
+
expected_full_path = model.path.parent / attachment_path
|
|
128
|
+
assert expected_full_path.exists()
|
|
129
|
+
|
|
130
|
+
# find the original file it corresponds to, and check content hash is identical
|
|
131
|
+
original_file = media_file_hashes[hash_file(expected_full_path)]
|
|
132
|
+
assert filecmp.cmp(expected_full_path, original_file)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def test_save_to_file_with_attachment_dict(test_base_kiln_file, mock_file_factory):
|
|
136
|
+
media_file_paths = [
|
|
137
|
+
mock_file_factory(MockFileFactoryMimeType.PDF),
|
|
138
|
+
mock_file_factory(MockFileFactoryMimeType.PNG),
|
|
139
|
+
mock_file_factory(MockFileFactoryMimeType.MP4),
|
|
140
|
+
mock_file_factory(MockFileFactoryMimeType.OGG),
|
|
141
|
+
]
|
|
142
|
+
# we map hashes to their files, so we can find the corresponding file after the save
|
|
143
|
+
media_file_hashes = {hash_file(p): p for p in media_file_paths}
|
|
144
|
+
|
|
145
|
+
attachment_dict = {
|
|
146
|
+
f"file_{i}": KilnAttachmentModel.from_file(p)
|
|
147
|
+
for i, p in enumerate(media_file_paths)
|
|
148
|
+
}
|
|
149
|
+
model = ModelWithAttachment(
|
|
150
|
+
path=test_base_kiln_file,
|
|
151
|
+
attachment_dict=attachment_dict,
|
|
152
|
+
)
|
|
153
|
+
for attachment in model.attachment_dict.values():
|
|
154
|
+
assert attachment.path is None
|
|
155
|
+
|
|
156
|
+
model.save_to_file()
|
|
157
|
+
|
|
158
|
+
for attachment in model.attachment_dict.values():
|
|
159
|
+
assert attachment.path is not None
|
|
160
|
+
|
|
161
|
+
with open(test_base_kiln_file, "r") as file:
|
|
162
|
+
data = json.load(file)
|
|
163
|
+
|
|
164
|
+
# check the paths are relative to model.path.parent
|
|
165
|
+
for attachment in data["attachment_dict"].values():
|
|
166
|
+
attachment_path = attachment["path"]
|
|
167
|
+
assert isinstance(attachment_path, str)
|
|
168
|
+
assert not Path(attachment_path).is_absolute()
|
|
169
|
+
|
|
170
|
+
# check all the files were persisted
|
|
171
|
+
attachment_dict = data["attachment_dict"]
|
|
172
|
+
assert len(attachment_dict) == len(media_file_paths)
|
|
173
|
+
|
|
174
|
+
# check the files are present and correct in model.path.parent
|
|
175
|
+
for attachment in attachment_dict.values():
|
|
176
|
+
attachment_path = attachment["path"]
|
|
177
|
+
# check the path is a string, and not an absolute path
|
|
178
|
+
assert isinstance(attachment_path, str)
|
|
179
|
+
assert not Path(attachment_path).is_absolute()
|
|
180
|
+
|
|
181
|
+
# check the file is the same as the original
|
|
182
|
+
assert model.path is not None
|
|
183
|
+
expected_full_path = model.path.parent / attachment_path
|
|
184
|
+
assert expected_full_path.exists()
|
|
185
|
+
|
|
186
|
+
# find the original file it corresponds to, and check content hash is identical
|
|
187
|
+
original_file = media_file_hashes[hash_file(expected_full_path)]
|
|
188
|
+
assert filecmp.cmp(expected_full_path, original_file)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def test_save_to_file_with_indirect_attachment(test_base_kiln_file, mock_file_factory):
|
|
192
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
193
|
+
model = ModelWithIndirectAttachment(
|
|
194
|
+
path=test_base_kiln_file,
|
|
195
|
+
container=ContainerModel(
|
|
196
|
+
indirect_attachment=KilnAttachmentModel.from_file(test_media_file_document)
|
|
197
|
+
),
|
|
198
|
+
)
|
|
199
|
+
assert model.container.indirect_attachment.path is None
|
|
200
|
+
|
|
201
|
+
model.save_to_file()
|
|
202
|
+
|
|
203
|
+
assert model.container.indirect_attachment.path is not None
|
|
204
|
+
|
|
205
|
+
with open(test_base_kiln_file, "r") as file:
|
|
206
|
+
data = json.load(file)
|
|
207
|
+
|
|
208
|
+
# check the path is relative to model.path.parent
|
|
209
|
+
assert isinstance(data["container"]["indirect_attachment"]["path"], str)
|
|
210
|
+
assert not Path(data["container"]["indirect_attachment"]["path"]).is_absolute()
|
|
211
|
+
|
|
212
|
+
# check the file is the same as the original
|
|
213
|
+
assert model.path is not None
|
|
214
|
+
expected_full_path = (
|
|
215
|
+
model.path.parent / data["container"]["indirect_attachment"]["path"]
|
|
216
|
+
)
|
|
217
|
+
assert expected_full_path.exists()
|
|
218
|
+
assert filecmp.cmp(expected_full_path, test_media_file_document)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def test_save_to_file_with_indirect_attachment_optional(
|
|
222
|
+
test_base_kiln_file, mock_file_factory
|
|
223
|
+
):
|
|
224
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
225
|
+
model = ModelWithIndirectAttachment(
|
|
226
|
+
path=test_base_kiln_file,
|
|
227
|
+
container_optional=ContainerModel(
|
|
228
|
+
indirect_attachment=KilnAttachmentModel.from_file(test_media_file_document)
|
|
229
|
+
),
|
|
230
|
+
)
|
|
231
|
+
assert model.container_optional.indirect_attachment.path is None
|
|
232
|
+
|
|
233
|
+
model.save_to_file()
|
|
234
|
+
|
|
235
|
+
assert model.container_optional.indirect_attachment.path is not None
|
|
236
|
+
|
|
237
|
+
with open(test_base_kiln_file, "r") as file:
|
|
238
|
+
data = json.load(file)
|
|
239
|
+
|
|
240
|
+
# check the path is relative to model.path.parent
|
|
241
|
+
assert data["container_optional"] is not None
|
|
242
|
+
|
|
243
|
+
# check the file is the same as the original
|
|
244
|
+
assert model.path is not None
|
|
245
|
+
expected_full_path = (
|
|
246
|
+
model.path.parent / data["container_optional"]["indirect_attachment"]["path"]
|
|
247
|
+
)
|
|
248
|
+
assert expected_full_path.exists()
|
|
249
|
+
assert filecmp.cmp(expected_full_path, test_media_file_document)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def test_save_to_file_with_indirect_attachment_optional_none(test_base_kiln_file):
|
|
253
|
+
# check we don't copy the attachment if it is None
|
|
254
|
+
with patch.object(KilnAttachmentModel, "copy_file_to") as mock_save_to_file:
|
|
255
|
+
mock_save_to_file.return_value = Path("fake.txt")
|
|
256
|
+
model = ModelWithIndirectAttachment(
|
|
257
|
+
path=test_base_kiln_file,
|
|
258
|
+
container_optional=None,
|
|
259
|
+
)
|
|
260
|
+
model.save_to_file()
|
|
261
|
+
|
|
262
|
+
with open(test_base_kiln_file, "r") as file:
|
|
263
|
+
data = json.load(file)
|
|
264
|
+
|
|
265
|
+
# check the path is relative to model.path.parent
|
|
266
|
+
assert data["container_optional"] is None
|
|
267
|
+
|
|
268
|
+
# check KilnAttachmentModel.copy_to() not called
|
|
269
|
+
mock_save_to_file.assert_not_called()
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def test_dump_dest_path(test_base_kiln_file, mock_file_factory):
|
|
273
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
274
|
+
model = ModelWithAttachment(
|
|
275
|
+
path=test_base_kiln_file,
|
|
276
|
+
attachment=KilnAttachmentModel.from_file(test_media_file_document),
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
with pytest.raises(
|
|
280
|
+
ValueError,
|
|
281
|
+
match="dest_path must be a valid Path object when saving attachments",
|
|
282
|
+
):
|
|
283
|
+
model.model_dump_json(context={"save_attachments": True})
|
|
284
|
+
|
|
285
|
+
# should raise when dest_path is not a Path object
|
|
286
|
+
with pytest.raises(
|
|
287
|
+
ValueError,
|
|
288
|
+
match="dest_path must be a valid Path object when saving attachments",
|
|
289
|
+
):
|
|
290
|
+
model.model_dump_json(
|
|
291
|
+
context={
|
|
292
|
+
"save_attachments": True,
|
|
293
|
+
"dest_path": str(test_media_file_document),
|
|
294
|
+
}
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# should raise when dest_path is not a directory
|
|
298
|
+
with pytest.raises(
|
|
299
|
+
ValueError,
|
|
300
|
+
match="dest_path must be a directory when saving attachments",
|
|
301
|
+
):
|
|
302
|
+
model.model_dump_json(
|
|
303
|
+
context={"save_attachments": True, "dest_path": test_media_file_document}
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# should not raise when dest_path is set
|
|
307
|
+
model.model_dump_json(context={"dest_path": test_base_kiln_file.parent})
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def test_resolve_path(test_base_kiln_file, mock_file_factory):
|
|
311
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
312
|
+
model = ModelWithAttachment(
|
|
313
|
+
path=test_base_kiln_file,
|
|
314
|
+
attachment=KilnAttachmentModel.from_file(test_media_file_document),
|
|
315
|
+
)
|
|
316
|
+
assert (
|
|
317
|
+
model.attachment.resolve_path(test_base_kiln_file.parent)
|
|
318
|
+
== test_media_file_document
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def test_create_from_data(test_base_kiln_file, mock_file_factory):
|
|
323
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
324
|
+
with open(test_media_file_document, "rb") as file:
|
|
325
|
+
data = file.read()
|
|
326
|
+
|
|
327
|
+
attachment = KilnAttachmentModel.from_data(data, "application/pdf")
|
|
328
|
+
assert attachment.resolve_path(test_base_kiln_file.parent).exists()
|
|
329
|
+
|
|
330
|
+
model = ModelWithAttachment(
|
|
331
|
+
path=test_base_kiln_file,
|
|
332
|
+
attachment=attachment,
|
|
333
|
+
)
|
|
334
|
+
assert model.attachment.path is None
|
|
335
|
+
|
|
336
|
+
model.save_to_file()
|
|
337
|
+
|
|
338
|
+
assert model.attachment.path is not None
|
|
339
|
+
|
|
340
|
+
with open(test_base_kiln_file, "r") as file:
|
|
341
|
+
data = json.load(file)
|
|
342
|
+
|
|
343
|
+
assert str(data["attachment"]["path"]) == str(model.attachment.path)
|
|
344
|
+
assert filecmp.cmp(
|
|
345
|
+
test_media_file_document, attachment.resolve_path(test_base_kiln_file.parent)
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def test_attachment_file_does_not_exist(test_base_kiln_file):
|
|
350
|
+
not_found_file = Path(f"/not/found/{uuid.uuid4()!s}.txt")
|
|
351
|
+
|
|
352
|
+
# should raise when we assign a file that does not exist
|
|
353
|
+
with pytest.raises(ValueError):
|
|
354
|
+
KilnAttachmentModel.from_file(not_found_file)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def test_attachment_is_folder(test_base_kiln_file, tmp_path):
|
|
358
|
+
# create folder in tmp_path
|
|
359
|
+
folder = tmp_path / "test_folder"
|
|
360
|
+
folder.mkdir()
|
|
361
|
+
|
|
362
|
+
# should raise when we assign a folder
|
|
363
|
+
with pytest.raises(ValueError):
|
|
364
|
+
ModelWithAttachment(
|
|
365
|
+
path=test_base_kiln_file,
|
|
366
|
+
attachment=KilnAttachmentModel.from_file(folder),
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
@pytest.mark.parametrize(
|
|
371
|
+
"mime_type",
|
|
372
|
+
[
|
|
373
|
+
MockFileFactoryMimeType.PDF,
|
|
374
|
+
MockFileFactoryMimeType.PNG,
|
|
375
|
+
MockFileFactoryMimeType.MP4,
|
|
376
|
+
MockFileFactoryMimeType.OGG,
|
|
377
|
+
],
|
|
378
|
+
)
|
|
379
|
+
def test_attachment_lifecycle(test_base_kiln_file, mock_file_factory, mime_type):
|
|
380
|
+
test_media_file_document = mock_file_factory(mime_type)
|
|
381
|
+
model = ModelWithAttachment(
|
|
382
|
+
path=test_base_kiln_file,
|
|
383
|
+
attachment=KilnAttachmentModel.from_file(test_media_file_document),
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# before save, the attachment has an absolute path and its stable path does not exist yet
|
|
387
|
+
assert model.attachment.input_path is not None
|
|
388
|
+
assert model.attachment.path is None
|
|
389
|
+
|
|
390
|
+
# before save, resolve_path should resolve to the original absolute path
|
|
391
|
+
path_resolved_pre_saved = model.attachment.resolve_path(test_base_kiln_file.parent)
|
|
392
|
+
assert path_resolved_pre_saved is not None
|
|
393
|
+
assert filecmp.cmp(path_resolved_pre_saved, test_media_file_document)
|
|
394
|
+
|
|
395
|
+
# check it also returns the absolute path when we don't provide the parent path
|
|
396
|
+
path_resolved_pre_saved_no_parent = model.attachment.resolve_path()
|
|
397
|
+
assert path_resolved_pre_saved_no_parent is not None
|
|
398
|
+
assert filecmp.cmp(path_resolved_pre_saved_no_parent, test_media_file_document)
|
|
399
|
+
|
|
400
|
+
assert path_resolved_pre_saved_no_parent == path_resolved_pre_saved
|
|
401
|
+
|
|
402
|
+
# now we save the model, the attachment is persisted to disk, the absolute path is cleared,
|
|
403
|
+
# and the stable path (relative to the model's path) is set
|
|
404
|
+
model.save_to_file()
|
|
405
|
+
|
|
406
|
+
# after save, the attachment has a stable path and its absolute path is cleared
|
|
407
|
+
assert model.attachment.path is not None
|
|
408
|
+
assert model.attachment.input_path is None
|
|
409
|
+
|
|
410
|
+
# when we load the model from file, the attachment has its stable relative path, and no absolute path
|
|
411
|
+
model_loaded_from_file = ModelWithAttachment.load_from_file(test_base_kiln_file)
|
|
412
|
+
assert model_loaded_from_file.attachment.path is not None
|
|
413
|
+
assert model_loaded_from_file.attachment.input_path is None
|
|
414
|
+
|
|
415
|
+
# the attachment is not aware of its full absolute path, so we need to resolve it, and it should reconstruct it
|
|
416
|
+
path_resolved_post_saved = model_loaded_from_file.attachment.resolve_path(
|
|
417
|
+
test_base_kiln_file.parent
|
|
418
|
+
)
|
|
419
|
+
assert path_resolved_post_saved is not None
|
|
420
|
+
assert filecmp.cmp(path_resolved_post_saved, test_media_file_document)
|
|
421
|
+
|
|
422
|
+
# verify the model JSON file does not contain the input_path
|
|
423
|
+
with open(test_base_kiln_file, "r") as file:
|
|
424
|
+
data = json.load(file)
|
|
425
|
+
assert "input_path" not in data["attachment"]
|
|
426
|
+
assert "path" in data["attachment"]
|
|
427
|
+
|
|
428
|
+
# test idempotency - saving again should not change the attachment path
|
|
429
|
+
model.save_to_file()
|
|
430
|
+
assert model.attachment.path is not None
|
|
431
|
+
assert model.attachment.path == Path(data["attachment"]["path"])
|
|
432
|
+
|
|
433
|
+
model_loaded_from_file = ModelWithAttachment.load_from_file(test_base_kiln_file)
|
|
434
|
+
assert model_loaded_from_file.attachment.path is not None
|
|
435
|
+
assert model_loaded_from_file.attachment.input_path is None
|
|
436
|
+
assert model_loaded_from_file.attachment.path == Path(data["attachment"]["path"])
|
|
437
|
+
assert filecmp.cmp(
|
|
438
|
+
model_loaded_from_file.attachment.resolve_path(test_base_kiln_file.parent),
|
|
439
|
+
test_media_file_document,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def test_attachment_rejects_relative_path_input(mock_file_factory):
|
|
444
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
445
|
+
# the input path should be absolute, and we should reject relative paths
|
|
446
|
+
with pytest.raises(ValueError):
|
|
447
|
+
KilnAttachmentModel.from_file(
|
|
448
|
+
test_media_file_document.relative_to(test_media_file_document.parent)
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def test_loading_from_file(test_base_kiln_file, mock_file_factory):
|
|
453
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
454
|
+
root_path = test_base_kiln_file.parent
|
|
455
|
+
json_path = root_path / "test_model.json"
|
|
456
|
+
model = ModelWithAttachment(
|
|
457
|
+
path=json_path,
|
|
458
|
+
attachment=KilnAttachmentModel.from_file(test_media_file_document),
|
|
459
|
+
)
|
|
460
|
+
assert model.attachment.path is None
|
|
461
|
+
|
|
462
|
+
model.save_to_file()
|
|
463
|
+
|
|
464
|
+
assert model.attachment.path is not None
|
|
465
|
+
|
|
466
|
+
# check we can load the model from the file
|
|
467
|
+
model = ModelWithAttachment.load_from_file(json_path)
|
|
468
|
+
|
|
469
|
+
assert model.attachment.path is not None
|
|
470
|
+
|
|
471
|
+
# when we load from JSON, the attachment path is only the relative segment
|
|
472
|
+
assert filecmp.cmp(root_path / model.attachment.path, test_media_file_document)
|
|
473
|
+
|
|
474
|
+
# we need to make sure that the path is hydrated correctly so the next save
|
|
475
|
+
# won't think the file does not exist during validation
|
|
476
|
+
model.save_to_file()
|
|
477
|
+
|
|
478
|
+
assert model.attachment.path is not None
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
class ModelWithAttachmentNameOverride(KilnBaseModel):
|
|
482
|
+
attachment: KilnAttachmentModel = Field(default=None)
|
|
483
|
+
|
|
484
|
+
@field_serializer("attachment")
|
|
485
|
+
def serialize_attachment(
|
|
486
|
+
self, attachment: KilnAttachmentModel, info: SerializationInfo
|
|
487
|
+
) -> dict:
|
|
488
|
+
context = info.context or {}
|
|
489
|
+
context["filename_prefix"] = "attachment_override"
|
|
490
|
+
return attachment.model_dump(mode="json", context=context)
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def test_attachment_filename_override(test_base_kiln_file, mock_file_factory):
|
|
494
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
495
|
+
root_path = test_base_kiln_file.parent
|
|
496
|
+
json_path = root_path / "test_model.json"
|
|
497
|
+
model = ModelWithAttachmentNameOverride(
|
|
498
|
+
path=json_path,
|
|
499
|
+
attachment=KilnAttachmentModel.from_file(test_media_file_document),
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
model.save_to_file()
|
|
503
|
+
|
|
504
|
+
with open(test_base_kiln_file, "r") as file:
|
|
505
|
+
data = json.load(file)
|
|
506
|
+
|
|
507
|
+
# file persisted to disk will be named like: attachment_override_<random_numbers>.pdf
|
|
508
|
+
assert data["attachment"]["path"].startswith("attachment_override_")
|
|
509
|
+
assert data["attachment"]["path"].endswith(".pdf")
|
|
510
|
+
assert filecmp.cmp(root_path / data["attachment"]["path"], test_media_file_document)
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
class ModelWithAttachmentNameOverrideList(KilnBaseModel):
|
|
514
|
+
attachment_list: List[KilnAttachmentModel] = Field(default=[])
|
|
515
|
+
|
|
516
|
+
@field_serializer("attachment_list")
|
|
517
|
+
def serialize_attachment_list(
|
|
518
|
+
self, attachment_list: List[KilnAttachmentModel], info: SerializationInfo
|
|
519
|
+
) -> dict:
|
|
520
|
+
context = info.context or {}
|
|
521
|
+
context["filename_prefix"] = "attachment_override"
|
|
522
|
+
return [
|
|
523
|
+
attachment.model_dump(mode="json", context=context)
|
|
524
|
+
for attachment in attachment_list
|
|
525
|
+
]
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def test_attachment_filename_override_list(test_base_kiln_file, mock_file_factory):
|
|
529
|
+
test_media_file_paths = [
|
|
530
|
+
mock_file_factory(MockFileFactoryMimeType.PDF),
|
|
531
|
+
mock_file_factory(MockFileFactoryMimeType.PNG),
|
|
532
|
+
mock_file_factory(MockFileFactoryMimeType.MP4),
|
|
533
|
+
mock_file_factory(MockFileFactoryMimeType.OGG),
|
|
534
|
+
]
|
|
535
|
+
root_path = test_base_kiln_file.parent
|
|
536
|
+
json_path = root_path / "test_model.json"
|
|
537
|
+
model = ModelWithAttachmentNameOverrideList(
|
|
538
|
+
path=json_path,
|
|
539
|
+
attachment_list=[
|
|
540
|
+
KilnAttachmentModel.from_file(p) for p in test_media_file_paths
|
|
541
|
+
],
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
model.save_to_file()
|
|
545
|
+
|
|
546
|
+
with open(test_base_kiln_file, "r") as file:
|
|
547
|
+
data = json.load(file)
|
|
548
|
+
|
|
549
|
+
for attachment, file_path in zip(data["attachment_list"], test_media_file_paths):
|
|
550
|
+
# file persisted to disk will be named like: attachment_override_<random_numbers>.pdf
|
|
551
|
+
assert attachment["path"].startswith("attachment_override_")
|
|
552
|
+
extension = file_path.suffix
|
|
553
|
+
assert attachment["path"].endswith(extension)
|
|
554
|
+
assert filecmp.cmp(root_path / attachment["path"], file_path)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
class ModelWithAttachmentNoNameOverride(KilnBaseModel):
|
|
558
|
+
attachment: KilnAttachmentModel = Field(default=None)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def test_attachment_filename_no_override(test_base_kiln_file, mock_file_factory):
|
|
562
|
+
test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
|
|
563
|
+
root_path = test_base_kiln_file.parent
|
|
564
|
+
json_path = root_path / "test_model.json"
|
|
565
|
+
model = ModelWithAttachmentNoNameOverride(
|
|
566
|
+
path=json_path,
|
|
567
|
+
attachment=KilnAttachmentModel.from_file(test_media_file_document),
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
model.save_to_file()
|
|
571
|
+
|
|
572
|
+
with open(test_base_kiln_file, "r") as file:
|
|
573
|
+
data = json.load(file)
|
|
574
|
+
|
|
575
|
+
# file persisted to disk will be named like: <random_numbers>.pdf
|
|
576
|
+
assert data["attachment"]["path"].split(".")[0].isdigit()
|
|
577
|
+
assert data["attachment"]["path"].endswith(".pdf")
|
|
578
|
+
assert filecmp.cmp(root_path / data["attachment"]["path"], test_media_file_document)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
@pytest.mark.parametrize(
|
|
582
|
+
"mime_type, extension",
|
|
583
|
+
[
|
|
584
|
+
(MockFileFactoryMimeType.PDF, ".pdf"),
|
|
585
|
+
(MockFileFactoryMimeType.PNG, ".png"),
|
|
586
|
+
(MockFileFactoryMimeType.MP4, ".mp4"),
|
|
587
|
+
(MockFileFactoryMimeType.OGG, ".ogg"),
|
|
588
|
+
(MockFileFactoryMimeType.MD, ".md"),
|
|
589
|
+
(MockFileFactoryMimeType.TXT, ".txt"),
|
|
590
|
+
(MockFileFactoryMimeType.HTML, ".html"),
|
|
591
|
+
(MockFileFactoryMimeType.CSV, ".csv"),
|
|
592
|
+
(MockFileFactoryMimeType.JPEG, ".jpeg"),
|
|
593
|
+
(MockFileFactoryMimeType.MP3, ".mp3"),
|
|
594
|
+
(MockFileFactoryMimeType.WAV, ".wav"),
|
|
595
|
+
(MockFileFactoryMimeType.OGG, ".ogg"),
|
|
596
|
+
(MockFileFactoryMimeType.MOV, ".mov"),
|
|
597
|
+
],
|
|
598
|
+
)
|
|
599
|
+
def test_attachment_extension_from_data(
|
|
600
|
+
test_base_kiln_file, mock_file_factory, mime_type, extension
|
|
601
|
+
):
|
|
602
|
+
test_media_file_document = mock_file_factory(mime_type)
|
|
603
|
+
root_path = test_base_kiln_file.parent
|
|
604
|
+
json_path = root_path / "test_model.json"
|
|
605
|
+
|
|
606
|
+
data_bytes = test_media_file_document.read_bytes()
|
|
607
|
+
|
|
608
|
+
model = ModelWithAttachment(
|
|
609
|
+
path=json_path,
|
|
610
|
+
attachment=KilnAttachmentModel.from_data(data_bytes, mime_type),
|
|
611
|
+
)
|
|
612
|
+
model.save_to_file()
|
|
613
|
+
|
|
614
|
+
with open(test_base_kiln_file, "r") as file:
|
|
615
|
+
data = json.load(file)
|
|
616
|
+
|
|
617
|
+
assert data["attachment"]["path"].endswith(extension), (
|
|
618
|
+
f"{data['attachment']['path']} does not end with {extension}"
|
|
619
|
+
)
|
|
620
|
+
assert filecmp.cmp(root_path / data["attachment"]["path"], test_media_file_document)
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
@pytest.mark.parametrize(
|
|
624
|
+
"mime_type, extension",
|
|
625
|
+
[
|
|
626
|
+
("application/octet-stream", ".unknown"),
|
|
627
|
+
("fake-mimetype", ".unknown"),
|
|
628
|
+
],
|
|
629
|
+
)
|
|
630
|
+
def test_attachment_extension_from_data_unknown_mime_type(
|
|
631
|
+
test_base_kiln_file, mock_file_factory, mime_type, extension
|
|
632
|
+
):
|
|
633
|
+
root_path = test_base_kiln_file.parent
|
|
634
|
+
json_path = root_path / "test_model.json"
|
|
635
|
+
|
|
636
|
+
data_bytes = b"fake data"
|
|
637
|
+
|
|
638
|
+
model = ModelWithAttachment(
|
|
639
|
+
path=json_path,
|
|
640
|
+
attachment=KilnAttachmentModel.from_data(data_bytes, mime_type),
|
|
641
|
+
)
|
|
642
|
+
model.save_to_file()
|
|
643
|
+
|
|
644
|
+
with open(test_base_kiln_file, "r") as file:
|
|
645
|
+
data = json.load(file)
|
|
646
|
+
|
|
647
|
+
assert data["attachment"]["path"].endswith(extension), (
|
|
648
|
+
f"{data['attachment']['path']} does not end with {extension}"
|
|
649
|
+
)
|
|
@@ -17,7 +17,7 @@ from kiln_ai.datamodel.basemodel import (
|
|
|
17
17
|
string_to_valid_name,
|
|
18
18
|
)
|
|
19
19
|
from kiln_ai.datamodel.model_cache import ModelCache
|
|
20
|
-
from kiln_ai.datamodel.task import
|
|
20
|
+
from kiln_ai.datamodel.task import RunConfigProperties
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@pytest.fixture
|
|
@@ -552,8 +552,8 @@ def base_task():
|
|
|
552
552
|
@pytest.fixture
|
|
553
553
|
def adapter(base_task):
|
|
554
554
|
return MockAdapter(
|
|
555
|
-
|
|
556
|
-
|
|
555
|
+
task=base_task,
|
|
556
|
+
run_config=RunConfigProperties(
|
|
557
557
|
model_name="test_model",
|
|
558
558
|
model_provider_name="openai",
|
|
559
559
|
prompt_id="simple_prompt_builder",
|
|
@@ -605,7 +605,7 @@ async def test_invoke_parsing_flow(adapter):
|
|
|
605
605
|
mock_provider.reasoning_capable = True
|
|
606
606
|
with pytest.raises(
|
|
607
607
|
RuntimeError,
|
|
608
|
-
match="Reasoning is required for this model, but no reasoning was returned
|
|
608
|
+
match=r"^Reasoning is required for this model, but no reasoning was returned.$",
|
|
609
609
|
):
|
|
610
610
|
await adapter.invoke("test input")
|
|
611
611
|
|