kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. kiln_ai/adapters/__init__.py +8 -2
  2. kiln_ai/adapters/adapter_registry.py +43 -208
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  16. kiln_ai/adapters/embedding/__init__.py +0 -0
  17. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  18. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  19. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  20. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  21. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  22. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  23. kiln_ai/adapters/eval/base_eval.py +2 -2
  24. kiln_ai/adapters/eval/eval_runner.py +9 -3
  25. kiln_ai/adapters/eval/g_eval.py +2 -2
  26. kiln_ai/adapters/eval/test_base_eval.py +2 -4
  27. kiln_ai/adapters/eval/test_g_eval.py +4 -5
  28. kiln_ai/adapters/extractors/__init__.py +18 -0
  29. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  30. kiln_ai/adapters/extractors/encoding.py +20 -0
  31. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  32. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  33. kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  34. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  35. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  36. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  37. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  38. kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  39. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  40. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  41. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  42. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  43. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  44. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  45. kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  46. kiln_ai/adapters/ml_model_list.py +761 -37
  47. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  48. kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
  49. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  50. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
  51. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  52. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  53. kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
  54. kiln_ai/adapters/ollama_tools.py +69 -12
  55. kiln_ai/adapters/parsers/__init__.py +1 -1
  56. kiln_ai/adapters/provider_tools.py +205 -47
  57. kiln_ai/adapters/rag/deduplication.py +49 -0
  58. kiln_ai/adapters/rag/progress.py +252 -0
  59. kiln_ai/adapters/rag/rag_runners.py +844 -0
  60. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  61. kiln_ai/adapters/rag/test_progress.py +785 -0
  62. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  63. kiln_ai/adapters/remote_config.py +80 -8
  64. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  65. kiln_ai/adapters/run_output.py +3 -0
  66. kiln_ai/adapters/test_adapter_registry.py +657 -85
  67. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  68. kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  69. kiln_ai/adapters/test_ml_model_list.py +251 -1
  70. kiln_ai/adapters/test_ollama_tools.py +340 -1
  71. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  72. kiln_ai/adapters/test_prompt_builders.py +1 -1
  73. kiln_ai/adapters/test_provider_tools.py +254 -8
  74. kiln_ai/adapters/test_remote_config.py +651 -58
  75. kiln_ai/adapters/vector_store/__init__.py +1 -0
  76. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  77. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  78. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  79. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  80. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  81. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  82. kiln_ai/datamodel/__init__.py +39 -34
  83. kiln_ai/datamodel/basemodel.py +170 -1
  84. kiln_ai/datamodel/chunk.py +158 -0
  85. kiln_ai/datamodel/datamodel_enums.py +28 -0
  86. kiln_ai/datamodel/embedding.py +64 -0
  87. kiln_ai/datamodel/eval.py +1 -1
  88. kiln_ai/datamodel/external_tool_server.py +298 -0
  89. kiln_ai/datamodel/extraction.py +303 -0
  90. kiln_ai/datamodel/json_schema.py +25 -10
  91. kiln_ai/datamodel/project.py +40 -1
  92. kiln_ai/datamodel/rag.py +79 -0
  93. kiln_ai/datamodel/registry.py +0 -15
  94. kiln_ai/datamodel/run_config.py +62 -0
  95. kiln_ai/datamodel/task.py +2 -77
  96. kiln_ai/datamodel/task_output.py +6 -1
  97. kiln_ai/datamodel/task_run.py +41 -0
  98. kiln_ai/datamodel/test_attachment.py +649 -0
  99. kiln_ai/datamodel/test_basemodel.py +4 -4
  100. kiln_ai/datamodel/test_chunk_models.py +317 -0
  101. kiln_ai/datamodel/test_dataset_split.py +1 -1
  102. kiln_ai/datamodel/test_embedding_models.py +448 -0
  103. kiln_ai/datamodel/test_eval_model.py +6 -6
  104. kiln_ai/datamodel/test_example_models.py +175 -0
  105. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  106. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  107. kiln_ai/datamodel/test_extraction_model.py +470 -0
  108. kiln_ai/datamodel/test_rag.py +641 -0
  109. kiln_ai/datamodel/test_registry.py +8 -3
  110. kiln_ai/datamodel/test_task.py +15 -47
  111. kiln_ai/datamodel/test_tool_id.py +320 -0
  112. kiln_ai/datamodel/test_vector_store.py +320 -0
  113. kiln_ai/datamodel/tool_id.py +105 -0
  114. kiln_ai/datamodel/vector_store.py +141 -0
  115. kiln_ai/tools/__init__.py +8 -0
  116. kiln_ai/tools/base_tool.py +82 -0
  117. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  118. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  119. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  120. kiln_ai/tools/mcp_server_tool.py +95 -0
  121. kiln_ai/tools/mcp_session_manager.py +246 -0
  122. kiln_ai/tools/rag_tools.py +157 -0
  123. kiln_ai/tools/test_base_tools.py +199 -0
  124. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  125. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  126. kiln_ai/tools/test_rag_tools.py +848 -0
  127. kiln_ai/tools/test_tool_registry.py +562 -0
  128. kiln_ai/tools/tool_registry.py +85 -0
  129. kiln_ai/utils/__init__.py +3 -0
  130. kiln_ai/utils/async_job_runner.py +62 -17
  131. kiln_ai/utils/config.py +24 -2
  132. kiln_ai/utils/env.py +15 -0
  133. kiln_ai/utils/filesystem.py +14 -0
  134. kiln_ai/utils/filesystem_cache.py +60 -0
  135. kiln_ai/utils/litellm.py +94 -0
  136. kiln_ai/utils/lock.py +100 -0
  137. kiln_ai/utils/mime_type.py +38 -0
  138. kiln_ai/utils/open_ai_types.py +94 -0
  139. kiln_ai/utils/pdf_utils.py +38 -0
  140. kiln_ai/utils/project_utils.py +17 -0
  141. kiln_ai/utils/test_async_job_runner.py +151 -35
  142. kiln_ai/utils/test_config.py +138 -1
  143. kiln_ai/utils/test_env.py +142 -0
  144. kiln_ai/utils/test_filesystem_cache.py +316 -0
  145. kiln_ai/utils/test_litellm.py +206 -0
  146. kiln_ai/utils/test_lock.py +185 -0
  147. kiln_ai/utils/test_mime_type.py +66 -0
  148. kiln_ai/utils/test_open_ai_types.py +131 -0
  149. kiln_ai/utils/test_pdf_utils.py +73 -0
  150. kiln_ai/utils/test_uuid.py +111 -0
  151. kiln_ai/utils/test_validation.py +524 -0
  152. kiln_ai/utils/uuid.py +9 -0
  153. kiln_ai/utils/validation.py +90 -0
  154. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
  155. kiln_ai-0.21.0.dist-info/RECORD +211 -0
  156. kiln_ai-0.19.0.dist-info/RECORD +0 -115
  157. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
  158. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,317 @@
1
+ import tempfile
2
+ import uuid
3
+ from enum import Enum
4
+ from pathlib import Path
5
+
6
+ import pytest
7
+
8
+ from kiln_ai.datamodel.basemodel import KilnAttachmentModel
9
+ from kiln_ai.datamodel.chunk import Chunk, ChunkedDocument, ChunkerConfig, ChunkerType
10
+ from kiln_ai.datamodel.project import Project
11
+
12
+
13
+ @pytest.fixture
14
+ def mock_project(tmp_path):
15
+ project_root = tmp_path / str(uuid.uuid4())
16
+ project_root.mkdir()
17
+ project = Project(
18
+ name="Test Project",
19
+ description="Test description",
20
+ path=project_root / "project.kiln",
21
+ )
22
+ project.save_to_file()
23
+ return project
24
+
25
+
26
+ class TestFixedWindowChunkerProperties:
27
+ """Test the FixedWindowChunkerProperties class."""
28
+
29
+ def test_required_fields(self):
30
+ """Test that required fields are set correctly."""
31
+ with pytest.raises(ValueError):
32
+ ChunkerConfig(
33
+ name="test-chunker",
34
+ chunker_type=ChunkerType.FIXED_WINDOW,
35
+ properties={},
36
+ )
37
+
38
+ def test_custom_values(self):
39
+ """Test that custom values can be set."""
40
+ config = ChunkerConfig(
41
+ name="test-chunker",
42
+ chunker_type=ChunkerType.FIXED_WINDOW,
43
+ properties={"chunk_size": 512, "chunk_overlap": 20},
44
+ )
45
+ assert config.properties == {
46
+ "chunk_size": 512,
47
+ "chunk_overlap": 20,
48
+ }
49
+
50
+ assert config.chunk_size() == 512
51
+ assert config.chunk_overlap() == 20
52
+
53
+ def test_validation_positive_values(self):
54
+ """Test that positive values are accepted."""
55
+ config = ChunkerConfig(
56
+ name="test-chunker",
57
+ chunker_type=ChunkerType.FIXED_WINDOW,
58
+ properties={"chunk_size": 1, "chunk_overlap": 0},
59
+ )
60
+ assert config.properties == {
61
+ "chunk_size": 1,
62
+ "chunk_overlap": 0,
63
+ }
64
+
65
+ assert config.chunk_size() == 1
66
+ assert config.chunk_overlap() == 0
67
+
68
+ def test_validation_negative_values(self):
69
+ """Test that negative values are rejected."""
70
+ with pytest.raises(ValueError):
71
+ ChunkerConfig(
72
+ name="test-chunker",
73
+ chunker_type=ChunkerType.FIXED_WINDOW,
74
+ properties={"chunk_size": -1, "chunk_overlap": -1},
75
+ )
76
+
77
+ def test_validation_zero_chunk_size(self):
78
+ """Test that zero chunk size is rejected."""
79
+ with pytest.raises(ValueError):
80
+ ChunkerConfig(
81
+ name="test-chunker",
82
+ chunker_type=ChunkerType.FIXED_WINDOW,
83
+ properties={"chunk_size": 0, "chunk_overlap": 0},
84
+ )
85
+
86
+ def test_validation_overlap_greater_than_chunk_size(self):
87
+ """Test that overlap is greater than chunk size."""
88
+ with pytest.raises(ValueError):
89
+ ChunkerConfig(
90
+ name="test-chunker",
91
+ chunker_type=ChunkerType.FIXED_WINDOW,
92
+ properties={"chunk_size": 100, "chunk_overlap": 101},
93
+ )
94
+
95
+ def test_validation_overlap_less_than_zero(self):
96
+ """Test that overlap is less than zero."""
97
+ with pytest.raises(ValueError):
98
+ ChunkerConfig(
99
+ name="test-chunker",
100
+ chunker_type=ChunkerType.FIXED_WINDOW,
101
+ properties={"chunk_size": 100, "chunk_overlap": -1},
102
+ )
103
+
104
+ def test_validation_overlap_without_chunk_size(self):
105
+ """Test that overlap without chunk size is rejected."""
106
+ with pytest.raises(ValueError):
107
+ ChunkerConfig(
108
+ name="test-chunker",
109
+ chunker_type=ChunkerType.FIXED_WINDOW,
110
+ properties={"chunk_overlap": 10},
111
+ )
112
+
113
+ def test_validation_chunk_size_without_overlap(self):
114
+ """Test that chunk size without overlap will raise an error."""
115
+ with pytest.raises(ValueError, match=r"Chunk overlap is required."):
116
+ ChunkerConfig(
117
+ name="test-chunker",
118
+ chunker_type=ChunkerType.FIXED_WINDOW,
119
+ properties={"chunk_size": 100},
120
+ )
121
+
122
+ def test_validation_wrong_type(self):
123
+ """Test that wrong type is rejected."""
124
+ with pytest.raises(ValueError):
125
+ ChunkerConfig(
126
+ name="test-chunker",
127
+ chunker_type=ChunkerType.FIXED_WINDOW,
128
+ properties={"chunk_size": "100", "chunk_overlap": 10},
129
+ )
130
+
131
+ def test_validation_none_values(self):
132
+ """Reject none values - we prefer not to have the properties rather than a None."""
133
+ with pytest.raises(ValueError):
134
+ ChunkerConfig(
135
+ name="test-chunker",
136
+ chunker_type=ChunkerType.FIXED_WINDOW,
137
+ properties={"chunk_size": None, "chunk_overlap": 15},
138
+ )
139
+
140
+
141
+ class TestChunkerType:
142
+ """Test the ChunkerType enum."""
143
+
144
+ def test_enum_values(self):
145
+ """Test that enum has the expected values."""
146
+ assert ChunkerType.FIXED_WINDOW == "fixed_window"
147
+
148
+ def test_enum_inheritance(self):
149
+ """Test that ChunkerType inherits from str and Enum."""
150
+ assert issubclass(ChunkerType, str)
151
+ assert issubclass(ChunkerType, Enum)
152
+
153
+ def test_enum_comparison(self):
154
+ """Test enum comparison operations."""
155
+ assert ChunkerType.FIXED_WINDOW == "fixed_window"
156
+ assert ChunkerType.FIXED_WINDOW.value == "fixed_window"
157
+
158
+
159
+ class TestChunkerConfig:
160
+ """Test the ChunkerConfig class."""
161
+
162
+ def test_optional_description(self):
163
+ """Test that description is optional."""
164
+ config = ChunkerConfig(
165
+ name="test-chunker",
166
+ chunker_type=ChunkerType.FIXED_WINDOW,
167
+ properties={
168
+ "chunk_size": 100,
169
+ "chunk_overlap": 10,
170
+ },
171
+ )
172
+ assert config.description is None
173
+
174
+ config_with_desc = ChunkerConfig(
175
+ name="test-chunker",
176
+ description="A test chunker",
177
+ chunker_type=ChunkerType.FIXED_WINDOW,
178
+ properties={
179
+ "chunk_size": 100,
180
+ "chunk_overlap": 10,
181
+ },
182
+ )
183
+ assert config_with_desc.description == "A test chunker"
184
+
185
+ def test_name_validation(self):
186
+ """Test name field validation."""
187
+ # Test valid name
188
+ config = ChunkerConfig(
189
+ name="valid-name_123",
190
+ chunker_type=ChunkerType.FIXED_WINDOW,
191
+ properties={
192
+ "chunk_size": 100,
193
+ "chunk_overlap": 10,
194
+ },
195
+ )
196
+ assert config.name == "valid-name_123"
197
+
198
+ # Test invalid name (contains special characters)
199
+ with pytest.raises(ValueError):
200
+ ChunkerConfig(
201
+ name="invalid@name",
202
+ chunker_type=ChunkerType.FIXED_WINDOW,
203
+ properties={},
204
+ )
205
+
206
+ # Test empty name
207
+ with pytest.raises(ValueError):
208
+ ChunkerConfig(
209
+ name="",
210
+ chunker_type=ChunkerType.FIXED_WINDOW,
211
+ properties={},
212
+ )
213
+
214
+ def test_parent_project_method_no_parent(self):
215
+ """Test parent_project method when no parent is set."""
216
+ config = ChunkerConfig(
217
+ name="test-chunker",
218
+ chunker_type=ChunkerType.FIXED_WINDOW,
219
+ properties={
220
+ "chunk_size": 100,
221
+ "chunk_overlap": 10,
222
+ },
223
+ )
224
+ assert config.parent_project() is None
225
+
226
+
227
+ class TestChunk:
228
+ """Test the Chunk class."""
229
+
230
+ def test_required_fields(self):
231
+ """Test that required fields are properly validated."""
232
+ # Create a temporary file for the content
233
+ with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
234
+ tmp_file.write(b"test content")
235
+ tmp_path = Path(tmp_file.name)
236
+
237
+ attachment = KilnAttachmentModel.from_file(tmp_path)
238
+ chunk = Chunk(content=attachment)
239
+ assert chunk.content == attachment
240
+
241
+ def test_content_validation(self):
242
+ """Test that content field is properly validated."""
243
+ # Create a temporary file for the attachment
244
+ with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
245
+ tmp_file.write(b"test content")
246
+ tmp_path = Path(tmp_file.name)
247
+
248
+ # Test with valid attachment
249
+ attachment = KilnAttachmentModel.from_file(tmp_path)
250
+ chunk = Chunk(content=attachment)
251
+ assert chunk.content == attachment
252
+
253
+ # Test that attachment is required
254
+ with pytest.raises(ValueError):
255
+ Chunk(content=None)
256
+
257
+
258
+ class TestChunkedDocument:
259
+ """Test the ChunkedDocument class."""
260
+
261
+ def test_required_fields(self):
262
+ """Test that required fields are properly validated."""
263
+ chunks = []
264
+ doc = ChunkedDocument(chunks=chunks, chunker_config_id="fake-id")
265
+ assert doc.chunks == chunks
266
+
267
+ def test_with_chunks(self):
268
+ """Test with actual chunks."""
269
+ # Create a temporary file for the attachment
270
+ with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
271
+ tmp_file.write(b"test content")
272
+ tmp_path = Path(tmp_file.name)
273
+
274
+ attachment = KilnAttachmentModel.from_file(tmp_path)
275
+ chunk1 = Chunk(content=attachment)
276
+ chunk2 = Chunk(content=attachment)
277
+
278
+ chunks = [chunk1, chunk2]
279
+ doc = ChunkedDocument(chunks=chunks, chunker_config_id="fake-id")
280
+ assert doc.chunks == chunks
281
+ assert len(doc.chunks) == 2
282
+
283
+ def test_parent_extraction_method_no_parent(self):
284
+ """Test parent_extraction method when no parent is set."""
285
+ doc = ChunkedDocument(chunks=[], chunker_config_id="fake-id")
286
+ assert doc.parent_extraction() is None
287
+
288
+ def test_empty_chunks_list(self):
289
+ """Test that empty chunks list is valid."""
290
+ doc = ChunkedDocument(chunks=[], chunker_config_id="fake-id")
291
+ assert doc.chunks == []
292
+ assert len(doc.chunks) == 0
293
+
294
+ def test_chunks_validation(self):
295
+ """Test that chunks field validation works correctly."""
296
+ # Create a temporary file for the attachment
297
+ with tempfile.NamedTemporaryFile(delete=True) as tmp_file:
298
+ tmp_file.write(b"test content")
299
+ tmp_path = Path(tmp_file.name)
300
+
301
+ # Test with valid list of chunks
302
+ attachment = KilnAttachmentModel.from_file(tmp_path)
303
+ chunk = Chunk(content=attachment)
304
+ chunks = [chunk]
305
+
306
+ doc = ChunkedDocument(
307
+ chunks=chunks,
308
+ chunker_config_id="fake-id",
309
+ )
310
+ assert doc.chunks == chunks
311
+
312
+ # Test that chunks must be a list
313
+ with pytest.raises(ValueError):
314
+ ChunkedDocument(
315
+ chunks=chunk,
316
+ chunker_config_id="fake-id",
317
+ )
@@ -120,7 +120,7 @@ def test_dataset_split_validation():
120
120
  DatasetSplitDefinition(name="train", percentage=0.8),
121
121
  DatasetSplitDefinition(name="test", percentage=0.3),
122
122
  ]
123
- with pytest.raises(ValueError, match="sum of split percentages must be 1.0"):
123
+ with pytest.raises(ValueError, match=r"sum of split percentages must be 1.0"):
124
124
  DatasetSplit(
125
125
  name="test_split",
126
126
  splits=invalid_splits,