kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. kiln_ai/adapters/__init__.py +8 -2
  2. kiln_ai/adapters/adapter_registry.py +43 -208
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  16. kiln_ai/adapters/embedding/__init__.py +0 -0
  17. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  18. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  19. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  20. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  21. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  22. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  23. kiln_ai/adapters/eval/base_eval.py +2 -2
  24. kiln_ai/adapters/eval/eval_runner.py +9 -3
  25. kiln_ai/adapters/eval/g_eval.py +2 -2
  26. kiln_ai/adapters/eval/test_base_eval.py +2 -4
  27. kiln_ai/adapters/eval/test_g_eval.py +4 -5
  28. kiln_ai/adapters/extractors/__init__.py +18 -0
  29. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  30. kiln_ai/adapters/extractors/encoding.py +20 -0
  31. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  32. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  33. kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  34. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  35. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  36. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  37. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  38. kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  39. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  40. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  41. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  42. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  43. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  44. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  45. kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  46. kiln_ai/adapters/ml_model_list.py +761 -37
  47. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  48. kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
  49. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  50. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
  51. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  52. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  53. kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
  54. kiln_ai/adapters/ollama_tools.py +69 -12
  55. kiln_ai/adapters/parsers/__init__.py +1 -1
  56. kiln_ai/adapters/provider_tools.py +205 -47
  57. kiln_ai/adapters/rag/deduplication.py +49 -0
  58. kiln_ai/adapters/rag/progress.py +252 -0
  59. kiln_ai/adapters/rag/rag_runners.py +844 -0
  60. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  61. kiln_ai/adapters/rag/test_progress.py +785 -0
  62. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  63. kiln_ai/adapters/remote_config.py +80 -8
  64. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  65. kiln_ai/adapters/run_output.py +3 -0
  66. kiln_ai/adapters/test_adapter_registry.py +657 -85
  67. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  68. kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  69. kiln_ai/adapters/test_ml_model_list.py +251 -1
  70. kiln_ai/adapters/test_ollama_tools.py +340 -1
  71. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  72. kiln_ai/adapters/test_prompt_builders.py +1 -1
  73. kiln_ai/adapters/test_provider_tools.py +254 -8
  74. kiln_ai/adapters/test_remote_config.py +651 -58
  75. kiln_ai/adapters/vector_store/__init__.py +1 -0
  76. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  77. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  78. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  79. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  80. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  81. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  82. kiln_ai/datamodel/__init__.py +39 -34
  83. kiln_ai/datamodel/basemodel.py +170 -1
  84. kiln_ai/datamodel/chunk.py +158 -0
  85. kiln_ai/datamodel/datamodel_enums.py +28 -0
  86. kiln_ai/datamodel/embedding.py +64 -0
  87. kiln_ai/datamodel/eval.py +1 -1
  88. kiln_ai/datamodel/external_tool_server.py +298 -0
  89. kiln_ai/datamodel/extraction.py +303 -0
  90. kiln_ai/datamodel/json_schema.py +25 -10
  91. kiln_ai/datamodel/project.py +40 -1
  92. kiln_ai/datamodel/rag.py +79 -0
  93. kiln_ai/datamodel/registry.py +0 -15
  94. kiln_ai/datamodel/run_config.py +62 -0
  95. kiln_ai/datamodel/task.py +2 -77
  96. kiln_ai/datamodel/task_output.py +6 -1
  97. kiln_ai/datamodel/task_run.py +41 -0
  98. kiln_ai/datamodel/test_attachment.py +649 -0
  99. kiln_ai/datamodel/test_basemodel.py +4 -4
  100. kiln_ai/datamodel/test_chunk_models.py +317 -0
  101. kiln_ai/datamodel/test_dataset_split.py +1 -1
  102. kiln_ai/datamodel/test_embedding_models.py +448 -0
  103. kiln_ai/datamodel/test_eval_model.py +6 -6
  104. kiln_ai/datamodel/test_example_models.py +175 -0
  105. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  106. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  107. kiln_ai/datamodel/test_extraction_model.py +470 -0
  108. kiln_ai/datamodel/test_rag.py +641 -0
  109. kiln_ai/datamodel/test_registry.py +8 -3
  110. kiln_ai/datamodel/test_task.py +15 -47
  111. kiln_ai/datamodel/test_tool_id.py +320 -0
  112. kiln_ai/datamodel/test_vector_store.py +320 -0
  113. kiln_ai/datamodel/tool_id.py +105 -0
  114. kiln_ai/datamodel/vector_store.py +141 -0
  115. kiln_ai/tools/__init__.py +8 -0
  116. kiln_ai/tools/base_tool.py +82 -0
  117. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  118. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  119. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  120. kiln_ai/tools/mcp_server_tool.py +95 -0
  121. kiln_ai/tools/mcp_session_manager.py +246 -0
  122. kiln_ai/tools/rag_tools.py +157 -0
  123. kiln_ai/tools/test_base_tools.py +199 -0
  124. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  125. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  126. kiln_ai/tools/test_rag_tools.py +848 -0
  127. kiln_ai/tools/test_tool_registry.py +562 -0
  128. kiln_ai/tools/tool_registry.py +85 -0
  129. kiln_ai/utils/__init__.py +3 -0
  130. kiln_ai/utils/async_job_runner.py +62 -17
  131. kiln_ai/utils/config.py +24 -2
  132. kiln_ai/utils/env.py +15 -0
  133. kiln_ai/utils/filesystem.py +14 -0
  134. kiln_ai/utils/filesystem_cache.py +60 -0
  135. kiln_ai/utils/litellm.py +94 -0
  136. kiln_ai/utils/lock.py +100 -0
  137. kiln_ai/utils/mime_type.py +38 -0
  138. kiln_ai/utils/open_ai_types.py +94 -0
  139. kiln_ai/utils/pdf_utils.py +38 -0
  140. kiln_ai/utils/project_utils.py +17 -0
  141. kiln_ai/utils/test_async_job_runner.py +151 -35
  142. kiln_ai/utils/test_config.py +138 -1
  143. kiln_ai/utils/test_env.py +142 -0
  144. kiln_ai/utils/test_filesystem_cache.py +316 -0
  145. kiln_ai/utils/test_litellm.py +206 -0
  146. kiln_ai/utils/test_lock.py +185 -0
  147. kiln_ai/utils/test_mime_type.py +66 -0
  148. kiln_ai/utils/test_open_ai_types.py +131 -0
  149. kiln_ai/utils/test_pdf_utils.py +73 -0
  150. kiln_ai/utils/test_uuid.py +111 -0
  151. kiln_ai/utils/test_validation.py +524 -0
  152. kiln_ai/utils/uuid.py +9 -0
  153. kiln_ai/utils/validation.py +90 -0
  154. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
  155. kiln_ai-0.21.0.dist-info/RECORD +211 -0
  156. kiln_ai-0.19.0.dist-info/RECORD +0 -115
  157. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
  158. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,649 @@
1
+ import filecmp
2
+ import hashlib
3
+ import json
4
+ import uuid
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional
7
+ from unittest.mock import patch
8
+
9
+ import pytest
10
+ from pydantic import BaseModel, Field, SerializationInfo, field_serializer
11
+
12
+ from conftest import MockFileFactoryMimeType
13
+ from kiln_ai.datamodel.basemodel import KilnAttachmentModel, KilnBaseModel
14
+
15
+
16
+ class ModelWithAttachment(KilnBaseModel):
17
+ attachment: KilnAttachmentModel = Field(default=None)
18
+ attachment_list: Optional[List[KilnAttachmentModel]] = Field(default=None)
19
+ attachment_dict: Optional[Dict[str, KilnAttachmentModel]] = Field(default=None)
20
+
21
+
22
+ class ContainerModel(BaseModel):
23
+ indirect_attachment: Optional[KilnAttachmentModel] = Field(default=None)
24
+ indirect_attachment_list: Optional[List[KilnAttachmentModel]] = Field(default=None)
25
+ indirect_attachment_dict: Optional[Dict[str, KilnAttachmentModel]] = Field(
26
+ default=None
27
+ )
28
+
29
+
30
+ class ModelWithIndirectAttachment(KilnBaseModel):
31
+ # this nested model contains an attachment field
32
+ container: ContainerModel = Field(default=ContainerModel())
33
+ container_optional: Optional[ContainerModel] = Field(default=None)
34
+
35
+
36
+ def hash_file(p: Path) -> str:
37
+ return hashlib.md5(p.read_bytes()).hexdigest()
38
+
39
+
40
+ @pytest.fixture
41
+ def test_base_kiln_file(tmp_path) -> Path:
42
+ test_file_path = tmp_path / "test_model.json"
43
+ data = {"v": 1, "model_type": "kiln_base_model"}
44
+
45
+ with open(test_file_path, "w") as file:
46
+ json.dump(data, file, indent=4)
47
+
48
+ return test_file_path
49
+
50
+
51
+ def test_save_to_file_with_attachment_single(test_base_kiln_file, mock_file_factory):
52
+ test_file = mock_file_factory(MockFileFactoryMimeType.PDF)
53
+ model = ModelWithAttachment(
54
+ path=test_base_kiln_file,
55
+ attachment=KilnAttachmentModel.from_file(test_file),
56
+ )
57
+
58
+ assert model.attachment.path is None
59
+
60
+ model.save_to_file()
61
+
62
+ assert model.attachment.path is not None
63
+
64
+ with open(test_base_kiln_file, "r") as file:
65
+ data = json.load(file)
66
+
67
+ # the path after saving
68
+ attachment_path = data["attachment"]["path"]
69
+
70
+ # check it is a string, and not an absolute path
71
+ assert isinstance(attachment_path, str)
72
+ assert not Path(attachment_path).is_absolute()
73
+
74
+ # check persisted path is relative to model.path.parent
75
+ assert model.path is not None
76
+ expected_full_path = model.path.parent / attachment_path
77
+ assert expected_full_path.exists()
78
+ assert filecmp.cmp(expected_full_path, test_file)
79
+
80
+
81
+ def test_save_to_file_with_attachment_list(test_base_kiln_file, mock_file_factory):
82
+ media_file_paths = [
83
+ mock_file_factory(MockFileFactoryMimeType.PDF),
84
+ mock_file_factory(MockFileFactoryMimeType.PNG),
85
+ mock_file_factory(MockFileFactoryMimeType.MP4),
86
+ mock_file_factory(MockFileFactoryMimeType.OGG),
87
+ ]
88
+
89
+ # we map hashes to their files, so we can find the corresponding file after the save
90
+ media_file_hashes = {hash_file(p): p for p in media_file_paths}
91
+
92
+ model = ModelWithAttachment(
93
+ path=test_base_kiln_file,
94
+ attachment_list=[KilnAttachmentModel.from_file(p) for p in media_file_paths],
95
+ )
96
+
97
+ for attachment in model.attachment_list:
98
+ assert attachment.path is None
99
+
100
+ model.save_to_file()
101
+
102
+ for attachment in model.attachment_list:
103
+ assert attachment.path is not None
104
+
105
+ with open(test_base_kiln_file, "r") as file:
106
+ data = json.load(file)
107
+
108
+ # check the paths are relative to model.path.parent
109
+ for attachment in data["attachment_list"]:
110
+ attachment_path = attachment["path"]
111
+ assert isinstance(attachment_path, str)
112
+ assert not Path(attachment_path).is_absolute()
113
+
114
+ # check all the files were persisted
115
+ attachment_list = data["attachment_list"]
116
+ assert len(attachment_list) == len(media_file_paths)
117
+
118
+ # check the files are present and correct in model.path.parent
119
+ for attachment in attachment_list:
120
+ attachment_path = attachment["path"]
121
+ # check the path is a string, and not an absolute path
122
+ assert isinstance(attachment_path, str)
123
+ assert not Path(attachment_path).is_absolute()
124
+
125
+ # check the file is the same as the original
126
+ assert model.path is not None
127
+ expected_full_path = model.path.parent / attachment_path
128
+ assert expected_full_path.exists()
129
+
130
+ # find the original file it corresponds to, and check content hash is identical
131
+ original_file = media_file_hashes[hash_file(expected_full_path)]
132
+ assert filecmp.cmp(expected_full_path, original_file)
133
+
134
+
135
+ def test_save_to_file_with_attachment_dict(test_base_kiln_file, mock_file_factory):
136
+ media_file_paths = [
137
+ mock_file_factory(MockFileFactoryMimeType.PDF),
138
+ mock_file_factory(MockFileFactoryMimeType.PNG),
139
+ mock_file_factory(MockFileFactoryMimeType.MP4),
140
+ mock_file_factory(MockFileFactoryMimeType.OGG),
141
+ ]
142
+ # we map hashes to their files, so we can find the corresponding file after the save
143
+ media_file_hashes = {hash_file(p): p for p in media_file_paths}
144
+
145
+ attachment_dict = {
146
+ f"file_{i}": KilnAttachmentModel.from_file(p)
147
+ for i, p in enumerate(media_file_paths)
148
+ }
149
+ model = ModelWithAttachment(
150
+ path=test_base_kiln_file,
151
+ attachment_dict=attachment_dict,
152
+ )
153
+ for attachment in model.attachment_dict.values():
154
+ assert attachment.path is None
155
+
156
+ model.save_to_file()
157
+
158
+ for attachment in model.attachment_dict.values():
159
+ assert attachment.path is not None
160
+
161
+ with open(test_base_kiln_file, "r") as file:
162
+ data = json.load(file)
163
+
164
+ # check the paths are relative to model.path.parent
165
+ for attachment in data["attachment_dict"].values():
166
+ attachment_path = attachment["path"]
167
+ assert isinstance(attachment_path, str)
168
+ assert not Path(attachment_path).is_absolute()
169
+
170
+ # check all the files were persisted
171
+ attachment_dict = data["attachment_dict"]
172
+ assert len(attachment_dict) == len(media_file_paths)
173
+
174
+ # check the files are present and correct in model.path.parent
175
+ for attachment in attachment_dict.values():
176
+ attachment_path = attachment["path"]
177
+ # check the path is a string, and not an absolute path
178
+ assert isinstance(attachment_path, str)
179
+ assert not Path(attachment_path).is_absolute()
180
+
181
+ # check the file is the same as the original
182
+ assert model.path is not None
183
+ expected_full_path = model.path.parent / attachment_path
184
+ assert expected_full_path.exists()
185
+
186
+ # find the original file it corresponds to, and check content hash is identical
187
+ original_file = media_file_hashes[hash_file(expected_full_path)]
188
+ assert filecmp.cmp(expected_full_path, original_file)
189
+
190
+
191
+ def test_save_to_file_with_indirect_attachment(test_base_kiln_file, mock_file_factory):
192
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
193
+ model = ModelWithIndirectAttachment(
194
+ path=test_base_kiln_file,
195
+ container=ContainerModel(
196
+ indirect_attachment=KilnAttachmentModel.from_file(test_media_file_document)
197
+ ),
198
+ )
199
+ assert model.container.indirect_attachment.path is None
200
+
201
+ model.save_to_file()
202
+
203
+ assert model.container.indirect_attachment.path is not None
204
+
205
+ with open(test_base_kiln_file, "r") as file:
206
+ data = json.load(file)
207
+
208
+ # check the path is relative to model.path.parent
209
+ assert isinstance(data["container"]["indirect_attachment"]["path"], str)
210
+ assert not Path(data["container"]["indirect_attachment"]["path"]).is_absolute()
211
+
212
+ # check the file is the same as the original
213
+ assert model.path is not None
214
+ expected_full_path = (
215
+ model.path.parent / data["container"]["indirect_attachment"]["path"]
216
+ )
217
+ assert expected_full_path.exists()
218
+ assert filecmp.cmp(expected_full_path, test_media_file_document)
219
+
220
+
221
+ def test_save_to_file_with_indirect_attachment_optional(
222
+ test_base_kiln_file, mock_file_factory
223
+ ):
224
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
225
+ model = ModelWithIndirectAttachment(
226
+ path=test_base_kiln_file,
227
+ container_optional=ContainerModel(
228
+ indirect_attachment=KilnAttachmentModel.from_file(test_media_file_document)
229
+ ),
230
+ )
231
+ assert model.container_optional.indirect_attachment.path is None
232
+
233
+ model.save_to_file()
234
+
235
+ assert model.container_optional.indirect_attachment.path is not None
236
+
237
+ with open(test_base_kiln_file, "r") as file:
238
+ data = json.load(file)
239
+
240
+ # check the path is relative to model.path.parent
241
+ assert data["container_optional"] is not None
242
+
243
+ # check the file is the same as the original
244
+ assert model.path is not None
245
+ expected_full_path = (
246
+ model.path.parent / data["container_optional"]["indirect_attachment"]["path"]
247
+ )
248
+ assert expected_full_path.exists()
249
+ assert filecmp.cmp(expected_full_path, test_media_file_document)
250
+
251
+
252
+ def test_save_to_file_with_indirect_attachment_optional_none(test_base_kiln_file):
253
+ # check we don't copy the attachment if it is None
254
+ with patch.object(KilnAttachmentModel, "copy_file_to") as mock_save_to_file:
255
+ mock_save_to_file.return_value = Path("fake.txt")
256
+ model = ModelWithIndirectAttachment(
257
+ path=test_base_kiln_file,
258
+ container_optional=None,
259
+ )
260
+ model.save_to_file()
261
+
262
+ with open(test_base_kiln_file, "r") as file:
263
+ data = json.load(file)
264
+
265
+ # check the path is relative to model.path.parent
266
+ assert data["container_optional"] is None
267
+
268
+ # check KilnAttachmentModel.copy_to() not called
269
+ mock_save_to_file.assert_not_called()
270
+
271
+
272
+ def test_dump_dest_path(test_base_kiln_file, mock_file_factory):
273
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
274
+ model = ModelWithAttachment(
275
+ path=test_base_kiln_file,
276
+ attachment=KilnAttachmentModel.from_file(test_media_file_document),
277
+ )
278
+
279
+ with pytest.raises(
280
+ ValueError,
281
+ match="dest_path must be a valid Path object when saving attachments",
282
+ ):
283
+ model.model_dump_json(context={"save_attachments": True})
284
+
285
+ # should raise when dest_path is not a Path object
286
+ with pytest.raises(
287
+ ValueError,
288
+ match="dest_path must be a valid Path object when saving attachments",
289
+ ):
290
+ model.model_dump_json(
291
+ context={
292
+ "save_attachments": True,
293
+ "dest_path": str(test_media_file_document),
294
+ }
295
+ )
296
+
297
+ # should raise when dest_path is not a directory
298
+ with pytest.raises(
299
+ ValueError,
300
+ match="dest_path must be a directory when saving attachments",
301
+ ):
302
+ model.model_dump_json(
303
+ context={"save_attachments": True, "dest_path": test_media_file_document}
304
+ )
305
+
306
+ # should not raise when dest_path is set
307
+ model.model_dump_json(context={"dest_path": test_base_kiln_file.parent})
308
+
309
+
310
+ def test_resolve_path(test_base_kiln_file, mock_file_factory):
311
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
312
+ model = ModelWithAttachment(
313
+ path=test_base_kiln_file,
314
+ attachment=KilnAttachmentModel.from_file(test_media_file_document),
315
+ )
316
+ assert (
317
+ model.attachment.resolve_path(test_base_kiln_file.parent)
318
+ == test_media_file_document
319
+ )
320
+
321
+
322
+ def test_create_from_data(test_base_kiln_file, mock_file_factory):
323
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
324
+ with open(test_media_file_document, "rb") as file:
325
+ data = file.read()
326
+
327
+ attachment = KilnAttachmentModel.from_data(data, "application/pdf")
328
+ assert attachment.resolve_path(test_base_kiln_file.parent).exists()
329
+
330
+ model = ModelWithAttachment(
331
+ path=test_base_kiln_file,
332
+ attachment=attachment,
333
+ )
334
+ assert model.attachment.path is None
335
+
336
+ model.save_to_file()
337
+
338
+ assert model.attachment.path is not None
339
+
340
+ with open(test_base_kiln_file, "r") as file:
341
+ data = json.load(file)
342
+
343
+ assert str(data["attachment"]["path"]) == str(model.attachment.path)
344
+ assert filecmp.cmp(
345
+ test_media_file_document, attachment.resolve_path(test_base_kiln_file.parent)
346
+ )
347
+
348
+
349
+ def test_attachment_file_does_not_exist(test_base_kiln_file):
350
+ not_found_file = Path(f"/not/found/{uuid.uuid4()!s}.txt")
351
+
352
+ # should raise when we assign a file that does not exist
353
+ with pytest.raises(ValueError):
354
+ KilnAttachmentModel.from_file(not_found_file)
355
+
356
+
357
+ def test_attachment_is_folder(test_base_kiln_file, tmp_path):
358
+ # create folder in tmp_path
359
+ folder = tmp_path / "test_folder"
360
+ folder.mkdir()
361
+
362
+ # should raise when we assign a folder
363
+ with pytest.raises(ValueError):
364
+ ModelWithAttachment(
365
+ path=test_base_kiln_file,
366
+ attachment=KilnAttachmentModel.from_file(folder),
367
+ )
368
+
369
+
370
+ @pytest.mark.parametrize(
371
+ "mime_type",
372
+ [
373
+ MockFileFactoryMimeType.PDF,
374
+ MockFileFactoryMimeType.PNG,
375
+ MockFileFactoryMimeType.MP4,
376
+ MockFileFactoryMimeType.OGG,
377
+ ],
378
+ )
379
+ def test_attachment_lifecycle(test_base_kiln_file, mock_file_factory, mime_type):
380
+ test_media_file_document = mock_file_factory(mime_type)
381
+ model = ModelWithAttachment(
382
+ path=test_base_kiln_file,
383
+ attachment=KilnAttachmentModel.from_file(test_media_file_document),
384
+ )
385
+
386
+ # before save, the attachment has an absolute path and its stable path does not exist yet
387
+ assert model.attachment.input_path is not None
388
+ assert model.attachment.path is None
389
+
390
+ # before save, resolve_path should resolve to the original absolute path
391
+ path_resolved_pre_saved = model.attachment.resolve_path(test_base_kiln_file.parent)
392
+ assert path_resolved_pre_saved is not None
393
+ assert filecmp.cmp(path_resolved_pre_saved, test_media_file_document)
394
+
395
+ # check it also returns the absolute path when we don't provide the parent path
396
+ path_resolved_pre_saved_no_parent = model.attachment.resolve_path()
397
+ assert path_resolved_pre_saved_no_parent is not None
398
+ assert filecmp.cmp(path_resolved_pre_saved_no_parent, test_media_file_document)
399
+
400
+ assert path_resolved_pre_saved_no_parent == path_resolved_pre_saved
401
+
402
+ # now we save the model, the attachment is persisted to disk, the absolute path is cleared,
403
+ # and the stable path (relative to the model's path) is set
404
+ model.save_to_file()
405
+
406
+ # after save, the attachment has a stable path and its absolute path is cleared
407
+ assert model.attachment.path is not None
408
+ assert model.attachment.input_path is None
409
+
410
+ # when we load the model from file, the attachment has its stable relative path, and no absolute path
411
+ model_loaded_from_file = ModelWithAttachment.load_from_file(test_base_kiln_file)
412
+ assert model_loaded_from_file.attachment.path is not None
413
+ assert model_loaded_from_file.attachment.input_path is None
414
+
415
+ # the attachment is not aware of its full absolute path, so we need to resolve it, and it should reconstruct it
416
+ path_resolved_post_saved = model_loaded_from_file.attachment.resolve_path(
417
+ test_base_kiln_file.parent
418
+ )
419
+ assert path_resolved_post_saved is not None
420
+ assert filecmp.cmp(path_resolved_post_saved, test_media_file_document)
421
+
422
+ # verify the model JSON file does not contain the input_path
423
+ with open(test_base_kiln_file, "r") as file:
424
+ data = json.load(file)
425
+ assert "input_path" not in data["attachment"]
426
+ assert "path" in data["attachment"]
427
+
428
+ # test idempotency - saving again should not change the attachment path
429
+ model.save_to_file()
430
+ assert model.attachment.path is not None
431
+ assert model.attachment.path == Path(data["attachment"]["path"])
432
+
433
+ model_loaded_from_file = ModelWithAttachment.load_from_file(test_base_kiln_file)
434
+ assert model_loaded_from_file.attachment.path is not None
435
+ assert model_loaded_from_file.attachment.input_path is None
436
+ assert model_loaded_from_file.attachment.path == Path(data["attachment"]["path"])
437
+ assert filecmp.cmp(
438
+ model_loaded_from_file.attachment.resolve_path(test_base_kiln_file.parent),
439
+ test_media_file_document,
440
+ )
441
+
442
+
443
+ def test_attachment_rejects_relative_path_input(mock_file_factory):
444
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
445
+ # the input path should be absolute, and we should reject relative paths
446
+ with pytest.raises(ValueError):
447
+ KilnAttachmentModel.from_file(
448
+ test_media_file_document.relative_to(test_media_file_document.parent)
449
+ )
450
+
451
+
452
+ def test_loading_from_file(test_base_kiln_file, mock_file_factory):
453
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
454
+ root_path = test_base_kiln_file.parent
455
+ json_path = root_path / "test_model.json"
456
+ model = ModelWithAttachment(
457
+ path=json_path,
458
+ attachment=KilnAttachmentModel.from_file(test_media_file_document),
459
+ )
460
+ assert model.attachment.path is None
461
+
462
+ model.save_to_file()
463
+
464
+ assert model.attachment.path is not None
465
+
466
+ # check we can load the model from the file
467
+ model = ModelWithAttachment.load_from_file(json_path)
468
+
469
+ assert model.attachment.path is not None
470
+
471
+ # when we load from JSON, the attachment path is only the relative segment
472
+ assert filecmp.cmp(root_path / model.attachment.path, test_media_file_document)
473
+
474
+ # we need to make sure that the path is hydrated correctly so the next save
475
+ # won't think the file does not exist during validation
476
+ model.save_to_file()
477
+
478
+ assert model.attachment.path is not None
479
+
480
+
481
+ class ModelWithAttachmentNameOverride(KilnBaseModel):
482
+ attachment: KilnAttachmentModel = Field(default=None)
483
+
484
+ @field_serializer("attachment")
485
+ def serialize_attachment(
486
+ self, attachment: KilnAttachmentModel, info: SerializationInfo
487
+ ) -> dict:
488
+ context = info.context or {}
489
+ context["filename_prefix"] = "attachment_override"
490
+ return attachment.model_dump(mode="json", context=context)
491
+
492
+
493
+ def test_attachment_filename_override(test_base_kiln_file, mock_file_factory):
494
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
495
+ root_path = test_base_kiln_file.parent
496
+ json_path = root_path / "test_model.json"
497
+ model = ModelWithAttachmentNameOverride(
498
+ path=json_path,
499
+ attachment=KilnAttachmentModel.from_file(test_media_file_document),
500
+ )
501
+
502
+ model.save_to_file()
503
+
504
+ with open(test_base_kiln_file, "r") as file:
505
+ data = json.load(file)
506
+
507
+ # file persisted to disk will be named like: attachment_override_<random_numbers>.pdf
508
+ assert data["attachment"]["path"].startswith("attachment_override_")
509
+ assert data["attachment"]["path"].endswith(".pdf")
510
+ assert filecmp.cmp(root_path / data["attachment"]["path"], test_media_file_document)
511
+
512
+
513
+ class ModelWithAttachmentNameOverrideList(KilnBaseModel):
514
+ attachment_list: List[KilnAttachmentModel] = Field(default=[])
515
+
516
+ @field_serializer("attachment_list")
517
+ def serialize_attachment_list(
518
+ self, attachment_list: List[KilnAttachmentModel], info: SerializationInfo
519
+ ) -> dict:
520
+ context = info.context or {}
521
+ context["filename_prefix"] = "attachment_override"
522
+ return [
523
+ attachment.model_dump(mode="json", context=context)
524
+ for attachment in attachment_list
525
+ ]
526
+
527
+
528
+ def test_attachment_filename_override_list(test_base_kiln_file, mock_file_factory):
529
+ test_media_file_paths = [
530
+ mock_file_factory(MockFileFactoryMimeType.PDF),
531
+ mock_file_factory(MockFileFactoryMimeType.PNG),
532
+ mock_file_factory(MockFileFactoryMimeType.MP4),
533
+ mock_file_factory(MockFileFactoryMimeType.OGG),
534
+ ]
535
+ root_path = test_base_kiln_file.parent
536
+ json_path = root_path / "test_model.json"
537
+ model = ModelWithAttachmentNameOverrideList(
538
+ path=json_path,
539
+ attachment_list=[
540
+ KilnAttachmentModel.from_file(p) for p in test_media_file_paths
541
+ ],
542
+ )
543
+
544
+ model.save_to_file()
545
+
546
+ with open(test_base_kiln_file, "r") as file:
547
+ data = json.load(file)
548
+
549
+ for attachment, file_path in zip(data["attachment_list"], test_media_file_paths):
550
+ # file persisted to disk will be named like: attachment_override_<random_numbers>.pdf
551
+ assert attachment["path"].startswith("attachment_override_")
552
+ extension = file_path.suffix
553
+ assert attachment["path"].endswith(extension)
554
+ assert filecmp.cmp(root_path / attachment["path"], file_path)
555
+
556
+
557
+ class ModelWithAttachmentNoNameOverride(KilnBaseModel):
558
+ attachment: KilnAttachmentModel = Field(default=None)
559
+
560
+
561
+ def test_attachment_filename_no_override(test_base_kiln_file, mock_file_factory):
562
+ test_media_file_document = mock_file_factory(MockFileFactoryMimeType.PDF)
563
+ root_path = test_base_kiln_file.parent
564
+ json_path = root_path / "test_model.json"
565
+ model = ModelWithAttachmentNoNameOverride(
566
+ path=json_path,
567
+ attachment=KilnAttachmentModel.from_file(test_media_file_document),
568
+ )
569
+
570
+ model.save_to_file()
571
+
572
+ with open(test_base_kiln_file, "r") as file:
573
+ data = json.load(file)
574
+
575
+ # file persisted to disk will be named like: <random_numbers>.pdf
576
+ assert data["attachment"]["path"].split(".")[0].isdigit()
577
+ assert data["attachment"]["path"].endswith(".pdf")
578
+ assert filecmp.cmp(root_path / data["attachment"]["path"], test_media_file_document)
579
+
580
+
581
+ @pytest.mark.parametrize(
582
+ "mime_type, extension",
583
+ [
584
+ (MockFileFactoryMimeType.PDF, ".pdf"),
585
+ (MockFileFactoryMimeType.PNG, ".png"),
586
+ (MockFileFactoryMimeType.MP4, ".mp4"),
587
+ (MockFileFactoryMimeType.OGG, ".ogg"),
588
+ (MockFileFactoryMimeType.MD, ".md"),
589
+ (MockFileFactoryMimeType.TXT, ".txt"),
590
+ (MockFileFactoryMimeType.HTML, ".html"),
591
+ (MockFileFactoryMimeType.CSV, ".csv"),
592
+ (MockFileFactoryMimeType.JPEG, ".jpeg"),
593
+ (MockFileFactoryMimeType.MP3, ".mp3"),
594
+ (MockFileFactoryMimeType.WAV, ".wav"),
595
+ (MockFileFactoryMimeType.OGG, ".ogg"),
596
+ (MockFileFactoryMimeType.MOV, ".mov"),
597
+ ],
598
+ )
599
+ def test_attachment_extension_from_data(
600
+ test_base_kiln_file, mock_file_factory, mime_type, extension
601
+ ):
602
+ test_media_file_document = mock_file_factory(mime_type)
603
+ root_path = test_base_kiln_file.parent
604
+ json_path = root_path / "test_model.json"
605
+
606
+ data_bytes = test_media_file_document.read_bytes()
607
+
608
+ model = ModelWithAttachment(
609
+ path=json_path,
610
+ attachment=KilnAttachmentModel.from_data(data_bytes, mime_type),
611
+ )
612
+ model.save_to_file()
613
+
614
+ with open(test_base_kiln_file, "r") as file:
615
+ data = json.load(file)
616
+
617
+ assert data["attachment"]["path"].endswith(extension), (
618
+ f"{data['attachment']['path']} does not end with {extension}"
619
+ )
620
+ assert filecmp.cmp(root_path / data["attachment"]["path"], test_media_file_document)
621
+
622
+
623
+ @pytest.mark.parametrize(
624
+ "mime_type, extension",
625
+ [
626
+ ("application/octet-stream", ".unknown"),
627
+ ("fake-mimetype", ".unknown"),
628
+ ],
629
+ )
630
+ def test_attachment_extension_from_data_unknown_mime_type(
631
+ test_base_kiln_file, mock_file_factory, mime_type, extension
632
+ ):
633
+ root_path = test_base_kiln_file.parent
634
+ json_path = root_path / "test_model.json"
635
+
636
+ data_bytes = b"fake data"
637
+
638
+ model = ModelWithAttachment(
639
+ path=json_path,
640
+ attachment=KilnAttachmentModel.from_data(data_bytes, mime_type),
641
+ )
642
+ model.save_to_file()
643
+
644
+ with open(test_base_kiln_file, "r") as file:
645
+ data = json.load(file)
646
+
647
+ assert data["attachment"]["path"].endswith(extension), (
648
+ f"{data['attachment']['path']} does not end with {extension}"
649
+ )
@@ -17,7 +17,7 @@ from kiln_ai.datamodel.basemodel import (
17
17
  string_to_valid_name,
18
18
  )
19
19
  from kiln_ai.datamodel.model_cache import ModelCache
20
- from kiln_ai.datamodel.task import RunConfig
20
+ from kiln_ai.datamodel.task import RunConfigProperties
21
21
 
22
22
 
23
23
  @pytest.fixture
@@ -552,8 +552,8 @@ def base_task():
552
552
  @pytest.fixture
553
553
  def adapter(base_task):
554
554
  return MockAdapter(
555
- run_config=RunConfig(
556
- task=base_task,
555
+ task=base_task,
556
+ run_config=RunConfigProperties(
557
557
  model_name="test_model",
558
558
  model_provider_name="openai",
559
559
  prompt_id="simple_prompt_builder",
@@ -605,7 +605,7 @@ async def test_invoke_parsing_flow(adapter):
605
605
  mock_provider.reasoning_capable = True
606
606
  with pytest.raises(
607
607
  RuntimeError,
608
- match="Reasoning is required for this model, but no reasoning was returned.",
608
+ match=r"^Reasoning is required for this model, but no reasoning was returned.$",
609
609
  ):
610
610
  await adapter.invoke("test input")
611
611