kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +8 -2
- kiln_ai/adapters/adapter_registry.py +43 -208
- kiln_ai/adapters/chat/chat_formatter.py +8 -12
- kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
- kiln_ai/adapters/chunkers/__init__.py +13 -0
- kiln_ai/adapters/chunkers/base_chunker.py +42 -0
- kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
- kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
- kiln_ai/adapters/chunkers/helpers.py +23 -0
- kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
- kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
- kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
- kiln_ai/adapters/chunkers/test_helpers.py +75 -0
- kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
- kiln_ai/adapters/docker_model_runner_tools.py +119 -0
- kiln_ai/adapters/embedding/__init__.py +0 -0
- kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
- kiln_ai/adapters/embedding/embedding_registry.py +32 -0
- kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
- kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
- kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
- kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
- kiln_ai/adapters/eval/base_eval.py +2 -2
- kiln_ai/adapters/eval/eval_runner.py +9 -3
- kiln_ai/adapters/eval/g_eval.py +2 -2
- kiln_ai/adapters/eval/test_base_eval.py +2 -4
- kiln_ai/adapters/eval/test_g_eval.py +4 -5
- kiln_ai/adapters/extractors/__init__.py +18 -0
- kiln_ai/adapters/extractors/base_extractor.py +72 -0
- kiln_ai/adapters/extractors/encoding.py +20 -0
- kiln_ai/adapters/extractors/extractor_registry.py +44 -0
- kiln_ai/adapters/extractors/extractor_runner.py +112 -0
- kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
- kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
- kiln_ai/adapters/extractors/test_encoding.py +54 -0
- kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
- kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
- kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
- kiln_ai/adapters/fine_tune/__init__.py +1 -1
- kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
- kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
- kiln_ai/adapters/ml_embedding_model_list.py +192 -0
- kiln_ai/adapters/ml_model_list.py +761 -37
- kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
- kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
- kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
- kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
- kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
- kiln_ai/adapters/ollama_tools.py +69 -12
- kiln_ai/adapters/parsers/__init__.py +1 -1
- kiln_ai/adapters/provider_tools.py +205 -47
- kiln_ai/adapters/rag/deduplication.py +49 -0
- kiln_ai/adapters/rag/progress.py +252 -0
- kiln_ai/adapters/rag/rag_runners.py +844 -0
- kiln_ai/adapters/rag/test_deduplication.py +195 -0
- kiln_ai/adapters/rag/test_progress.py +785 -0
- kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
- kiln_ai/adapters/remote_config.py +80 -8
- kiln_ai/adapters/repair/test_repair_task.py +12 -9
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +657 -85
- kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
- kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
- kiln_ai/adapters/test_ml_model_list.py +251 -1
- kiln_ai/adapters/test_ollama_tools.py +340 -1
- kiln_ai/adapters/test_prompt_adaptors.py +13 -6
- kiln_ai/adapters/test_prompt_builders.py +1 -1
- kiln_ai/adapters/test_provider_tools.py +254 -8
- kiln_ai/adapters/test_remote_config.py +651 -58
- kiln_ai/adapters/vector_store/__init__.py +1 -0
- kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
- kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
- kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
- kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
- kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
- kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
- kiln_ai/datamodel/__init__.py +39 -34
- kiln_ai/datamodel/basemodel.py +170 -1
- kiln_ai/datamodel/chunk.py +158 -0
- kiln_ai/datamodel/datamodel_enums.py +28 -0
- kiln_ai/datamodel/embedding.py +64 -0
- kiln_ai/datamodel/eval.py +1 -1
- kiln_ai/datamodel/external_tool_server.py +298 -0
- kiln_ai/datamodel/extraction.py +303 -0
- kiln_ai/datamodel/json_schema.py +25 -10
- kiln_ai/datamodel/project.py +40 -1
- kiln_ai/datamodel/rag.py +79 -0
- kiln_ai/datamodel/registry.py +0 -15
- kiln_ai/datamodel/run_config.py +62 -0
- kiln_ai/datamodel/task.py +2 -77
- kiln_ai/datamodel/task_output.py +6 -1
- kiln_ai/datamodel/task_run.py +41 -0
- kiln_ai/datamodel/test_attachment.py +649 -0
- kiln_ai/datamodel/test_basemodel.py +4 -4
- kiln_ai/datamodel/test_chunk_models.py +317 -0
- kiln_ai/datamodel/test_dataset_split.py +1 -1
- kiln_ai/datamodel/test_embedding_models.py +448 -0
- kiln_ai/datamodel/test_eval_model.py +6 -6
- kiln_ai/datamodel/test_example_models.py +175 -0
- kiln_ai/datamodel/test_external_tool_server.py +691 -0
- kiln_ai/datamodel/test_extraction_chunk.py +206 -0
- kiln_ai/datamodel/test_extraction_model.py +470 -0
- kiln_ai/datamodel/test_rag.py +641 -0
- kiln_ai/datamodel/test_registry.py +8 -3
- kiln_ai/datamodel/test_task.py +15 -47
- kiln_ai/datamodel/test_tool_id.py +320 -0
- kiln_ai/datamodel/test_vector_store.py +320 -0
- kiln_ai/datamodel/tool_id.py +105 -0
- kiln_ai/datamodel/vector_store.py +141 -0
- kiln_ai/tools/__init__.py +8 -0
- kiln_ai/tools/base_tool.py +82 -0
- kiln_ai/tools/built_in_tools/__init__.py +13 -0
- kiln_ai/tools/built_in_tools/math_tools.py +124 -0
- kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
- kiln_ai/tools/mcp_server_tool.py +95 -0
- kiln_ai/tools/mcp_session_manager.py +246 -0
- kiln_ai/tools/rag_tools.py +157 -0
- kiln_ai/tools/test_base_tools.py +199 -0
- kiln_ai/tools/test_mcp_server_tool.py +457 -0
- kiln_ai/tools/test_mcp_session_manager.py +1585 -0
- kiln_ai/tools/test_rag_tools.py +848 -0
- kiln_ai/tools/test_tool_registry.py +562 -0
- kiln_ai/tools/tool_registry.py +85 -0
- kiln_ai/utils/__init__.py +3 -0
- kiln_ai/utils/async_job_runner.py +62 -17
- kiln_ai/utils/config.py +24 -2
- kiln_ai/utils/env.py +15 -0
- kiln_ai/utils/filesystem.py +14 -0
- kiln_ai/utils/filesystem_cache.py +60 -0
- kiln_ai/utils/litellm.py +94 -0
- kiln_ai/utils/lock.py +100 -0
- kiln_ai/utils/mime_type.py +38 -0
- kiln_ai/utils/open_ai_types.py +94 -0
- kiln_ai/utils/pdf_utils.py +38 -0
- kiln_ai/utils/project_utils.py +17 -0
- kiln_ai/utils/test_async_job_runner.py +151 -35
- kiln_ai/utils/test_config.py +138 -1
- kiln_ai/utils/test_env.py +142 -0
- kiln_ai/utils/test_filesystem_cache.py +316 -0
- kiln_ai/utils/test_litellm.py +206 -0
- kiln_ai/utils/test_lock.py +185 -0
- kiln_ai/utils/test_mime_type.py +66 -0
- kiln_ai/utils/test_open_ai_types.py +131 -0
- kiln_ai/utils/test_pdf_utils.py +73 -0
- kiln_ai/utils/test_uuid.py +111 -0
- kiln_ai/utils/test_validation.py +524 -0
- kiln_ai/utils/uuid.py +9 -0
- kiln_ai/utils/validation.py +90 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
- kiln_ai-0.21.0.dist-info/RECORD +211 -0
- kiln_ai-0.19.0.dist-info/RECORD +0 -115
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
import uuid
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from kiln_ai.datamodel.basemodel import KilnAttachmentModel
|
|
9
|
+
from kiln_ai.datamodel.chunk import Chunk, ChunkedDocument, ChunkerConfig, ChunkerType
|
|
10
|
+
from kiln_ai.datamodel.project import Project
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.fixture
|
|
14
|
+
def mock_project(tmp_path):
|
|
15
|
+
project_root = tmp_path / str(uuid.uuid4())
|
|
16
|
+
project_root.mkdir()
|
|
17
|
+
project = Project(
|
|
18
|
+
name="Test Project",
|
|
19
|
+
description="Test description",
|
|
20
|
+
path=project_root / "project.kiln",
|
|
21
|
+
)
|
|
22
|
+
project.save_to_file()
|
|
23
|
+
return project
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TestFixedWindowChunkerProperties:
|
|
27
|
+
"""Test the FixedWindowChunkerProperties class."""
|
|
28
|
+
|
|
29
|
+
def test_required_fields(self):
|
|
30
|
+
"""Test that required fields are set correctly."""
|
|
31
|
+
with pytest.raises(ValueError):
|
|
32
|
+
ChunkerConfig(
|
|
33
|
+
name="test-chunker",
|
|
34
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
35
|
+
properties={},
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def test_custom_values(self):
|
|
39
|
+
"""Test that custom values can be set."""
|
|
40
|
+
config = ChunkerConfig(
|
|
41
|
+
name="test-chunker",
|
|
42
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
43
|
+
properties={"chunk_size": 512, "chunk_overlap": 20},
|
|
44
|
+
)
|
|
45
|
+
assert config.properties == {
|
|
46
|
+
"chunk_size": 512,
|
|
47
|
+
"chunk_overlap": 20,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
assert config.chunk_size() == 512
|
|
51
|
+
assert config.chunk_overlap() == 20
|
|
52
|
+
|
|
53
|
+
def test_validation_positive_values(self):
|
|
54
|
+
"""Test that positive values are accepted."""
|
|
55
|
+
config = ChunkerConfig(
|
|
56
|
+
name="test-chunker",
|
|
57
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
58
|
+
properties={"chunk_size": 1, "chunk_overlap": 0},
|
|
59
|
+
)
|
|
60
|
+
assert config.properties == {
|
|
61
|
+
"chunk_size": 1,
|
|
62
|
+
"chunk_overlap": 0,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
assert config.chunk_size() == 1
|
|
66
|
+
assert config.chunk_overlap() == 0
|
|
67
|
+
|
|
68
|
+
def test_validation_negative_values(self):
|
|
69
|
+
"""Test that negative values are rejected."""
|
|
70
|
+
with pytest.raises(ValueError):
|
|
71
|
+
ChunkerConfig(
|
|
72
|
+
name="test-chunker",
|
|
73
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
74
|
+
properties={"chunk_size": -1, "chunk_overlap": -1},
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def test_validation_zero_chunk_size(self):
|
|
78
|
+
"""Test that zero chunk size is rejected."""
|
|
79
|
+
with pytest.raises(ValueError):
|
|
80
|
+
ChunkerConfig(
|
|
81
|
+
name="test-chunker",
|
|
82
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
83
|
+
properties={"chunk_size": 0, "chunk_overlap": 0},
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def test_validation_overlap_greater_than_chunk_size(self):
|
|
87
|
+
"""Test that overlap is greater than chunk size."""
|
|
88
|
+
with pytest.raises(ValueError):
|
|
89
|
+
ChunkerConfig(
|
|
90
|
+
name="test-chunker",
|
|
91
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
92
|
+
properties={"chunk_size": 100, "chunk_overlap": 101},
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def test_validation_overlap_less_than_zero(self):
|
|
96
|
+
"""Test that overlap is less than zero."""
|
|
97
|
+
with pytest.raises(ValueError):
|
|
98
|
+
ChunkerConfig(
|
|
99
|
+
name="test-chunker",
|
|
100
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
101
|
+
properties={"chunk_size": 100, "chunk_overlap": -1},
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def test_validation_overlap_without_chunk_size(self):
|
|
105
|
+
"""Test that overlap without chunk size is rejected."""
|
|
106
|
+
with pytest.raises(ValueError):
|
|
107
|
+
ChunkerConfig(
|
|
108
|
+
name="test-chunker",
|
|
109
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
110
|
+
properties={"chunk_overlap": 10},
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def test_validation_chunk_size_without_overlap(self):
|
|
114
|
+
"""Test that chunk size without overlap will raise an error."""
|
|
115
|
+
with pytest.raises(ValueError, match=r"Chunk overlap is required."):
|
|
116
|
+
ChunkerConfig(
|
|
117
|
+
name="test-chunker",
|
|
118
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
119
|
+
properties={"chunk_size": 100},
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def test_validation_wrong_type(self):
|
|
123
|
+
"""Test that wrong type is rejected."""
|
|
124
|
+
with pytest.raises(ValueError):
|
|
125
|
+
ChunkerConfig(
|
|
126
|
+
name="test-chunker",
|
|
127
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
128
|
+
properties={"chunk_size": "100", "chunk_overlap": 10},
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def test_validation_none_values(self):
|
|
132
|
+
"""Reject none values - we prefer not to have the properties rather than a None."""
|
|
133
|
+
with pytest.raises(ValueError):
|
|
134
|
+
ChunkerConfig(
|
|
135
|
+
name="test-chunker",
|
|
136
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
137
|
+
properties={"chunk_size": None, "chunk_overlap": 15},
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class TestChunkerType:
|
|
142
|
+
"""Test the ChunkerType enum."""
|
|
143
|
+
|
|
144
|
+
def test_enum_values(self):
|
|
145
|
+
"""Test that enum has the expected values."""
|
|
146
|
+
assert ChunkerType.FIXED_WINDOW == "fixed_window"
|
|
147
|
+
|
|
148
|
+
def test_enum_inheritance(self):
|
|
149
|
+
"""Test that ChunkerType inherits from str and Enum."""
|
|
150
|
+
assert issubclass(ChunkerType, str)
|
|
151
|
+
assert issubclass(ChunkerType, Enum)
|
|
152
|
+
|
|
153
|
+
def test_enum_comparison(self):
|
|
154
|
+
"""Test enum comparison operations."""
|
|
155
|
+
assert ChunkerType.FIXED_WINDOW == "fixed_window"
|
|
156
|
+
assert ChunkerType.FIXED_WINDOW.value == "fixed_window"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class TestChunkerConfig:
|
|
160
|
+
"""Test the ChunkerConfig class."""
|
|
161
|
+
|
|
162
|
+
def test_optional_description(self):
|
|
163
|
+
"""Test that description is optional."""
|
|
164
|
+
config = ChunkerConfig(
|
|
165
|
+
name="test-chunker",
|
|
166
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
167
|
+
properties={
|
|
168
|
+
"chunk_size": 100,
|
|
169
|
+
"chunk_overlap": 10,
|
|
170
|
+
},
|
|
171
|
+
)
|
|
172
|
+
assert config.description is None
|
|
173
|
+
|
|
174
|
+
config_with_desc = ChunkerConfig(
|
|
175
|
+
name="test-chunker",
|
|
176
|
+
description="A test chunker",
|
|
177
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
178
|
+
properties={
|
|
179
|
+
"chunk_size": 100,
|
|
180
|
+
"chunk_overlap": 10,
|
|
181
|
+
},
|
|
182
|
+
)
|
|
183
|
+
assert config_with_desc.description == "A test chunker"
|
|
184
|
+
|
|
185
|
+
def test_name_validation(self):
|
|
186
|
+
"""Test name field validation."""
|
|
187
|
+
# Test valid name
|
|
188
|
+
config = ChunkerConfig(
|
|
189
|
+
name="valid-name_123",
|
|
190
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
191
|
+
properties={
|
|
192
|
+
"chunk_size": 100,
|
|
193
|
+
"chunk_overlap": 10,
|
|
194
|
+
},
|
|
195
|
+
)
|
|
196
|
+
assert config.name == "valid-name_123"
|
|
197
|
+
|
|
198
|
+
# Test invalid name (contains special characters)
|
|
199
|
+
with pytest.raises(ValueError):
|
|
200
|
+
ChunkerConfig(
|
|
201
|
+
name="invalid@name",
|
|
202
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
203
|
+
properties={},
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Test empty name
|
|
207
|
+
with pytest.raises(ValueError):
|
|
208
|
+
ChunkerConfig(
|
|
209
|
+
name="",
|
|
210
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
211
|
+
properties={},
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def test_parent_project_method_no_parent(self):
|
|
215
|
+
"""Test parent_project method when no parent is set."""
|
|
216
|
+
config = ChunkerConfig(
|
|
217
|
+
name="test-chunker",
|
|
218
|
+
chunker_type=ChunkerType.FIXED_WINDOW,
|
|
219
|
+
properties={
|
|
220
|
+
"chunk_size": 100,
|
|
221
|
+
"chunk_overlap": 10,
|
|
222
|
+
},
|
|
223
|
+
)
|
|
224
|
+
assert config.parent_project() is None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class TestChunk:
|
|
228
|
+
"""Test the Chunk class."""
|
|
229
|
+
|
|
230
|
+
def test_required_fields(self):
|
|
231
|
+
"""Test that required fields are properly validated."""
|
|
232
|
+
# Create a temporary file for the content
|
|
233
|
+
with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
|
|
234
|
+
tmp_file.write(b"test content")
|
|
235
|
+
tmp_path = Path(tmp_file.name)
|
|
236
|
+
|
|
237
|
+
attachment = KilnAttachmentModel.from_file(tmp_path)
|
|
238
|
+
chunk = Chunk(content=attachment)
|
|
239
|
+
assert chunk.content == attachment
|
|
240
|
+
|
|
241
|
+
def test_content_validation(self):
|
|
242
|
+
"""Test that content field is properly validated."""
|
|
243
|
+
# Create a temporary file for the attachment
|
|
244
|
+
with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
|
|
245
|
+
tmp_file.write(b"test content")
|
|
246
|
+
tmp_path = Path(tmp_file.name)
|
|
247
|
+
|
|
248
|
+
# Test with valid attachment
|
|
249
|
+
attachment = KilnAttachmentModel.from_file(tmp_path)
|
|
250
|
+
chunk = Chunk(content=attachment)
|
|
251
|
+
assert chunk.content == attachment
|
|
252
|
+
|
|
253
|
+
# Test that attachment is required
|
|
254
|
+
with pytest.raises(ValueError):
|
|
255
|
+
Chunk(content=None)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class TestChunkedDocument:
|
|
259
|
+
"""Test the ChunkedDocument class."""
|
|
260
|
+
|
|
261
|
+
def test_required_fields(self):
|
|
262
|
+
"""Test that required fields are properly validated."""
|
|
263
|
+
chunks = []
|
|
264
|
+
doc = ChunkedDocument(chunks=chunks, chunker_config_id="fake-id")
|
|
265
|
+
assert doc.chunks == chunks
|
|
266
|
+
|
|
267
|
+
def test_with_chunks(self):
|
|
268
|
+
"""Test with actual chunks."""
|
|
269
|
+
# Create a temporary file for the attachment
|
|
270
|
+
with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
|
|
271
|
+
tmp_file.write(b"test content")
|
|
272
|
+
tmp_path = Path(tmp_file.name)
|
|
273
|
+
|
|
274
|
+
attachment = KilnAttachmentModel.from_file(tmp_path)
|
|
275
|
+
chunk1 = Chunk(content=attachment)
|
|
276
|
+
chunk2 = Chunk(content=attachment)
|
|
277
|
+
|
|
278
|
+
chunks = [chunk1, chunk2]
|
|
279
|
+
doc = ChunkedDocument(chunks=chunks, chunker_config_id="fake-id")
|
|
280
|
+
assert doc.chunks == chunks
|
|
281
|
+
assert len(doc.chunks) == 2
|
|
282
|
+
|
|
283
|
+
def test_parent_extraction_method_no_parent(self):
|
|
284
|
+
"""Test parent_extraction method when no parent is set."""
|
|
285
|
+
doc = ChunkedDocument(chunks=[], chunker_config_id="fake-id")
|
|
286
|
+
assert doc.parent_extraction() is None
|
|
287
|
+
|
|
288
|
+
def test_empty_chunks_list(self):
|
|
289
|
+
"""Test that empty chunks list is valid."""
|
|
290
|
+
doc = ChunkedDocument(chunks=[], chunker_config_id="fake-id")
|
|
291
|
+
assert doc.chunks == []
|
|
292
|
+
assert len(doc.chunks) == 0
|
|
293
|
+
|
|
294
|
+
def test_chunks_validation(self):
|
|
295
|
+
"""Test that chunks field validation works correctly."""
|
|
296
|
+
# Create a temporary file for the attachment
|
|
297
|
+
with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
|
|
298
|
+
tmp_file.write(b"test content")
|
|
299
|
+
tmp_path = Path(tmp_file.name)
|
|
300
|
+
|
|
301
|
+
# Test with valid list of chunks
|
|
302
|
+
attachment = KilnAttachmentModel.from_file(tmp_path)
|
|
303
|
+
chunk = Chunk(content=attachment)
|
|
304
|
+
chunks = [chunk]
|
|
305
|
+
|
|
306
|
+
doc = ChunkedDocument(
|
|
307
|
+
chunks=chunks,
|
|
308
|
+
chunker_config_id="fake-id",
|
|
309
|
+
)
|
|
310
|
+
assert doc.chunks == chunks
|
|
311
|
+
|
|
312
|
+
# Test that chunks must be a list
|
|
313
|
+
with pytest.raises(ValueError):
|
|
314
|
+
ChunkedDocument(
|
|
315
|
+
chunks=chunk,
|
|
316
|
+
chunker_config_id="fake-id",
|
|
317
|
+
)
|
|
@@ -120,7 +120,7 @@ def test_dataset_split_validation():
|
|
|
120
120
|
DatasetSplitDefinition(name="train", percentage=0.8),
|
|
121
121
|
DatasetSplitDefinition(name="test", percentage=0.3),
|
|
122
122
|
]
|
|
123
|
-
with pytest.raises(ValueError, match="sum of split percentages must be 1.0"):
|
|
123
|
+
with pytest.raises(ValueError, match=r"sum of split percentages must be 1.0"):
|
|
124
124
|
DatasetSplit(
|
|
125
125
|
name="test_split",
|
|
126
126
|
splits=invalid_splits,
|