kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. kiln_ai/adapters/__init__.py +8 -2
  2. kiln_ai/adapters/adapter_registry.py +43 -208
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  16. kiln_ai/adapters/embedding/__init__.py +0 -0
  17. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  18. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  19. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  20. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  21. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  22. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  23. kiln_ai/adapters/eval/base_eval.py +2 -2
  24. kiln_ai/adapters/eval/eval_runner.py +9 -3
  25. kiln_ai/adapters/eval/g_eval.py +2 -2
  26. kiln_ai/adapters/eval/test_base_eval.py +2 -4
  27. kiln_ai/adapters/eval/test_g_eval.py +4 -5
  28. kiln_ai/adapters/extractors/__init__.py +18 -0
  29. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  30. kiln_ai/adapters/extractors/encoding.py +20 -0
  31. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  32. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  33. kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  34. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  35. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  36. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  37. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  38. kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  39. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  40. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  41. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  42. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  43. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  44. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  45. kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  46. kiln_ai/adapters/ml_model_list.py +761 -37
  47. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  48. kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
  49. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  50. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
  51. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  52. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  53. kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
  54. kiln_ai/adapters/ollama_tools.py +69 -12
  55. kiln_ai/adapters/parsers/__init__.py +1 -1
  56. kiln_ai/adapters/provider_tools.py +205 -47
  57. kiln_ai/adapters/rag/deduplication.py +49 -0
  58. kiln_ai/adapters/rag/progress.py +252 -0
  59. kiln_ai/adapters/rag/rag_runners.py +844 -0
  60. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  61. kiln_ai/adapters/rag/test_progress.py +785 -0
  62. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  63. kiln_ai/adapters/remote_config.py +80 -8
  64. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  65. kiln_ai/adapters/run_output.py +3 -0
  66. kiln_ai/adapters/test_adapter_registry.py +657 -85
  67. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  68. kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  69. kiln_ai/adapters/test_ml_model_list.py +251 -1
  70. kiln_ai/adapters/test_ollama_tools.py +340 -1
  71. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  72. kiln_ai/adapters/test_prompt_builders.py +1 -1
  73. kiln_ai/adapters/test_provider_tools.py +254 -8
  74. kiln_ai/adapters/test_remote_config.py +651 -58
  75. kiln_ai/adapters/vector_store/__init__.py +1 -0
  76. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  77. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  78. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  79. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  80. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  81. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  82. kiln_ai/datamodel/__init__.py +39 -34
  83. kiln_ai/datamodel/basemodel.py +170 -1
  84. kiln_ai/datamodel/chunk.py +158 -0
  85. kiln_ai/datamodel/datamodel_enums.py +28 -0
  86. kiln_ai/datamodel/embedding.py +64 -0
  87. kiln_ai/datamodel/eval.py +1 -1
  88. kiln_ai/datamodel/external_tool_server.py +298 -0
  89. kiln_ai/datamodel/extraction.py +303 -0
  90. kiln_ai/datamodel/json_schema.py +25 -10
  91. kiln_ai/datamodel/project.py +40 -1
  92. kiln_ai/datamodel/rag.py +79 -0
  93. kiln_ai/datamodel/registry.py +0 -15
  94. kiln_ai/datamodel/run_config.py +62 -0
  95. kiln_ai/datamodel/task.py +2 -77
  96. kiln_ai/datamodel/task_output.py +6 -1
  97. kiln_ai/datamodel/task_run.py +41 -0
  98. kiln_ai/datamodel/test_attachment.py +649 -0
  99. kiln_ai/datamodel/test_basemodel.py +4 -4
  100. kiln_ai/datamodel/test_chunk_models.py +317 -0
  101. kiln_ai/datamodel/test_dataset_split.py +1 -1
  102. kiln_ai/datamodel/test_embedding_models.py +448 -0
  103. kiln_ai/datamodel/test_eval_model.py +6 -6
  104. kiln_ai/datamodel/test_example_models.py +175 -0
  105. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  106. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  107. kiln_ai/datamodel/test_extraction_model.py +470 -0
  108. kiln_ai/datamodel/test_rag.py +641 -0
  109. kiln_ai/datamodel/test_registry.py +8 -3
  110. kiln_ai/datamodel/test_task.py +15 -47
  111. kiln_ai/datamodel/test_tool_id.py +320 -0
  112. kiln_ai/datamodel/test_vector_store.py +320 -0
  113. kiln_ai/datamodel/tool_id.py +105 -0
  114. kiln_ai/datamodel/vector_store.py +141 -0
  115. kiln_ai/tools/__init__.py +8 -0
  116. kiln_ai/tools/base_tool.py +82 -0
  117. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  118. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  119. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  120. kiln_ai/tools/mcp_server_tool.py +95 -0
  121. kiln_ai/tools/mcp_session_manager.py +246 -0
  122. kiln_ai/tools/rag_tools.py +157 -0
  123. kiln_ai/tools/test_base_tools.py +199 -0
  124. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  125. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  126. kiln_ai/tools/test_rag_tools.py +848 -0
  127. kiln_ai/tools/test_tool_registry.py +562 -0
  128. kiln_ai/tools/tool_registry.py +85 -0
  129. kiln_ai/utils/__init__.py +3 -0
  130. kiln_ai/utils/async_job_runner.py +62 -17
  131. kiln_ai/utils/config.py +24 -2
  132. kiln_ai/utils/env.py +15 -0
  133. kiln_ai/utils/filesystem.py +14 -0
  134. kiln_ai/utils/filesystem_cache.py +60 -0
  135. kiln_ai/utils/litellm.py +94 -0
  136. kiln_ai/utils/lock.py +100 -0
  137. kiln_ai/utils/mime_type.py +38 -0
  138. kiln_ai/utils/open_ai_types.py +94 -0
  139. kiln_ai/utils/pdf_utils.py +38 -0
  140. kiln_ai/utils/project_utils.py +17 -0
  141. kiln_ai/utils/test_async_job_runner.py +151 -35
  142. kiln_ai/utils/test_config.py +138 -1
  143. kiln_ai/utils/test_env.py +142 -0
  144. kiln_ai/utils/test_filesystem_cache.py +316 -0
  145. kiln_ai/utils/test_litellm.py +206 -0
  146. kiln_ai/utils/test_lock.py +185 -0
  147. kiln_ai/utils/test_mime_type.py +66 -0
  148. kiln_ai/utils/test_open_ai_types.py +131 -0
  149. kiln_ai/utils/test_pdf_utils.py +73 -0
  150. kiln_ai/utils/test_uuid.py +111 -0
  151. kiln_ai/utils/test_validation.py +524 -0
  152. kiln_ai/utils/uuid.py +9 -0
  153. kiln_ai/utils/validation.py +90 -0
  154. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
  155. kiln_ai-0.21.0.dist-info/RECORD +211 -0
  156. kiln_ai-0.19.0.dist-info/RECORD +0 -115
  157. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
  158. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,848 @@
1
+ from typing import List
2
+ from unittest.mock import AsyncMock, Mock, patch
3
+
4
+ import pytest
5
+
6
+ from kiln_ai.adapters.vector_store.base_vector_store_adapter import SearchResult
7
+ from kiln_ai.datamodel.embedding import EmbeddingConfig
8
+ from kiln_ai.datamodel.project import Project
9
+ from kiln_ai.datamodel.rag import RagConfig
10
+ from kiln_ai.datamodel.vector_store import VectorStoreConfig, VectorStoreType
11
+ from kiln_ai.tools.rag_tools import ChunkContext, RagTool, format_search_results
12
+
13
+
14
+ class TestChunkContext:
15
+ """Test the ChunkContext model."""
16
+
17
+ def test_chunk_context_serialize_basic(self):
18
+ """Test basic serialization of ChunkContext."""
19
+ chunk = ChunkContext(
20
+ metadata={"document_id": "doc1", "chunk_idx": 0},
21
+ text="This is test content.",
22
+ )
23
+
24
+ result = chunk.serialize()
25
+ expected = "[document_id: doc1, chunk_idx: 0]\nThis is test content.\n\n"
26
+ assert result == expected
27
+
28
+ def test_chunk_context_serialize_empty_metadata(self):
29
+ """Test serialization with empty metadata."""
30
+ chunk = ChunkContext(metadata={}, text="Content without metadata.")
31
+
32
+ result = chunk.serialize()
33
+ expected = "[]\nContent without metadata.\n\n"
34
+ assert result == expected
35
+
36
+ def test_chunk_context_serialize_multiple_metadata(self):
37
+ """Test serialization with multiple metadata fields."""
38
+ chunk = ChunkContext(
39
+ metadata={
40
+ "document_id": "doc123",
41
+ "chunk_idx": 5,
42
+ "score": 0.95,
43
+ "source": "file.txt",
44
+ },
45
+ text="Multi-metadata content.",
46
+ )
47
+
48
+ result = chunk.serialize()
49
+ # Note: dict order might vary, so check that all parts are present
50
+ assert "[" in result and "]" in result
51
+ assert "document_id: doc123" in result
52
+ assert "chunk_idx: 5" in result
53
+ assert "score: 0.95" in result
54
+ assert "source: file.txt" in result
55
+ assert "\nMulti-metadata content.\n\n" in result
56
+
57
+ def test_chunk_context_serialize_empty_text(self):
58
+ """Test serialization with empty text."""
59
+ chunk = ChunkContext(metadata={"document_id": "doc1"}, text="")
60
+
61
+ result = chunk.serialize()
62
+ expected = "[document_id: doc1]\n\n\n"
63
+ assert result == expected
64
+
65
+
66
+ class TestFormatSearchResults:
67
+ """Test the format_search_results function."""
68
+
69
+ def test_format_search_results_single_result(self):
70
+ """Test formatting a single search result."""
71
+ search_results = [
72
+ SearchResult(
73
+ document_id="doc1",
74
+ chunk_idx=0,
75
+ chunk_text="First chunk content",
76
+ similarity=0.95,
77
+ )
78
+ ]
79
+
80
+ result = format_search_results(search_results)
81
+ expected = "[document_id: doc1, chunk_idx: 0]\nFirst chunk content\n\n"
82
+ assert result == expected
83
+
84
+ def test_format_search_results_multiple_results(self):
85
+ """Test formatting multiple search results."""
86
+ search_results = [
87
+ SearchResult(
88
+ document_id="doc1",
89
+ chunk_idx=0,
90
+ chunk_text="First chunk",
91
+ similarity=0.95,
92
+ ),
93
+ SearchResult(
94
+ document_id="doc2",
95
+ chunk_idx=1,
96
+ chunk_text="Second chunk",
97
+ similarity=0.85,
98
+ ),
99
+ ]
100
+
101
+ result = format_search_results(search_results)
102
+
103
+ # Check that both chunks are present and separated by the delimiter
104
+ assert "[document_id: doc1, chunk_idx: 0]\nFirst chunk\n\n" in result
105
+ assert "[document_id: doc2, chunk_idx: 1]\nSecond chunk\n\n" in result
106
+ assert "\n=========\n" in result
107
+
108
+ def test_format_search_results_empty_list(self):
109
+ """Test formatting empty search results."""
110
+ search_results: List[SearchResult] = []
111
+
112
+ result = format_search_results(search_results)
113
+ assert result == ""
114
+
115
+ def test_format_search_results_preserves_search_result_data(self):
116
+ """Test that formatting preserves all relevant SearchResult data."""
117
+ search_results = [
118
+ SearchResult(
119
+ document_id="test_doc_123",
120
+ chunk_idx=42,
121
+ chunk_text="Complex text with\nmultiple lines\nand special chars!@#$%",
122
+ similarity=0.7654321,
123
+ )
124
+ ]
125
+
126
+ result = format_search_results(search_results)
127
+
128
+ assert "document_id: test_doc_123" in result
129
+ assert "chunk_idx: 42" in result
130
+ assert "Complex text with\nmultiple lines\nand special chars!@#$%" in result
131
+ # Note: similarity is not included in the formatted output, which matches the implementation
132
+
133
+
134
+ class TestRagTool:
135
+ """Test the RagTool class."""
136
+
137
+ @pytest.fixture
138
+ def mock_rag_config(self):
139
+ """Create a mock RAG config."""
140
+ config = Mock(spec=RagConfig)
141
+ config.id = "rag_config_123"
142
+ config.tool_name = "Test Search Tool"
143
+ config.tool_description = "A test search tool for RAG"
144
+ config.vector_store_config_id = "vector_store_456"
145
+ config.embedding_config_id = "embedding_789"
146
+ return config
147
+
148
+ @pytest.fixture
149
+ def mock_project(self):
150
+ """Create a mock project."""
151
+ project = Mock(spec=Project)
152
+ project.id = "project_123"
153
+ project.path = "/test/project/path"
154
+ return project
155
+
156
+ @pytest.fixture
157
+ def mock_vector_store_config(self):
158
+ """Create a mock vector store config."""
159
+ config = Mock(spec=VectorStoreConfig)
160
+ config.id = "vector_store_456"
161
+ config.store_type = VectorStoreType.LANCE_DB_VECTOR
162
+ return config
163
+
164
+ @pytest.fixture
165
+ def mock_embedding_config(self):
166
+ """Create a mock embedding config."""
167
+ config = Mock(spec=EmbeddingConfig)
168
+ config.id = "embedding_789"
169
+ return config
170
+
171
+ def test_rag_tool_init_success(self, mock_rag_config, mock_project):
172
+ """Test successful RagTool initialization."""
173
+ mock_rag_config.parent_project.return_value = mock_project
174
+
175
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
176
+ mock_vector_store_config = Mock(spec=VectorStoreConfig)
177
+ mock_vs_config_class.from_id_and_parent_path.return_value = (
178
+ mock_vector_store_config
179
+ )
180
+
181
+ tool = RagTool("tool_123", mock_rag_config)
182
+
183
+ assert tool._id == "tool_123"
184
+ assert tool._name == "Test Search Tool"
185
+ assert tool._description == "A test search tool for RAG"
186
+ assert tool._rag_config == mock_rag_config
187
+ assert tool._vector_store_config == mock_vector_store_config
188
+ assert tool._vector_store_adapter is None
189
+
190
+ # Verify vector store config lookup
191
+ mock_vs_config_class.from_id_and_parent_path.assert_called_once_with(
192
+ "vector_store_456", "/test/project/path"
193
+ )
194
+
195
+ def test_rag_tool_init_vector_store_config_not_found(
196
+ self, mock_rag_config, mock_project
197
+ ):
198
+ """Test RagTool initialization when vector store config is not found."""
199
+ mock_rag_config.parent_project.return_value = mock_project
200
+
201
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
202
+ mock_vs_config_class.from_id_and_parent_path.return_value = None
203
+
204
+ with pytest.raises(
205
+ ValueError, match="Vector store config not found: vector_store_456"
206
+ ):
207
+ RagTool("tool_123", mock_rag_config)
208
+
209
+ def test_rag_tool_project_property(self, mock_rag_config, mock_project):
210
+ """Test RagTool project cached property."""
211
+ mock_rag_config.parent_project.return_value = mock_project
212
+
213
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
214
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
215
+
216
+ tool = RagTool("tool_123", mock_rag_config)
217
+
218
+ # Test that project property returns the correct project
219
+ assert tool.project == mock_project
220
+
221
+ # Test that it's cached (should not call parent_project again)
222
+ assert tool.project == mock_project
223
+ mock_rag_config.parent_project.assert_called_once()
224
+
225
+ def test_rag_tool_project_property_no_project(self, mock_rag_config):
226
+ """Test RagTool initialization when no project is found."""
227
+ mock_rag_config.parent_project.return_value = None
228
+
229
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
230
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
231
+
232
+ # The constructor should fail when accessing the project property
233
+ with pytest.raises(
234
+ ValueError, match="RAG config rag_config_123 has no project"
235
+ ):
236
+ RagTool("tool_123", mock_rag_config)
237
+
238
+ def test_rag_tool_embedding_property(
239
+ self, mock_rag_config, mock_project, mock_embedding_config
240
+ ):
241
+ """Test RagTool embedding cached property."""
242
+ mock_rag_config.parent_project.return_value = mock_project
243
+ mock_embedding_adapter = Mock()
244
+
245
+ with (
246
+ patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class,
247
+ patch("kiln_ai.tools.rag_tools.EmbeddingConfig") as mock_embed_config_class,
248
+ patch(
249
+ "kiln_ai.tools.rag_tools.embedding_adapter_from_type"
250
+ ) as mock_adapter_factory,
251
+ ):
252
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
253
+ mock_embed_config_class.from_id_and_parent_path.return_value = (
254
+ mock_embedding_config
255
+ )
256
+ mock_adapter_factory.return_value = mock_embedding_adapter
257
+
258
+ tool = RagTool("tool_123", mock_rag_config)
259
+
260
+ # Test that embedding property returns the correct tuple
261
+ config, adapter = tool.embedding
262
+ assert config == mock_embedding_config
263
+ assert adapter == mock_embedding_adapter
264
+
265
+ # Test that it's cached
266
+ config2, adapter2 = tool.embedding
267
+ assert config2 == mock_embedding_config
268
+ assert adapter2 == mock_embedding_adapter
269
+
270
+ # Verify calls
271
+ mock_embed_config_class.from_id_and_parent_path.assert_called_once_with(
272
+ "embedding_789", "/test/project/path"
273
+ )
274
+ mock_adapter_factory.assert_called_once_with(mock_embedding_config)
275
+
276
+ def test_rag_tool_embedding_property_config_not_found(
277
+ self, mock_rag_config, mock_project
278
+ ):
279
+ """Test RagTool embedding property when embedding config is not found."""
280
+ mock_rag_config.parent_project.return_value = mock_project
281
+
282
+ with (
283
+ patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class,
284
+ patch("kiln_ai.tools.rag_tools.EmbeddingConfig") as mock_embed_config_class,
285
+ ):
286
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
287
+ mock_embed_config_class.from_id_and_parent_path.return_value = None
288
+
289
+ tool = RagTool("tool_123", mock_rag_config)
290
+
291
+ with pytest.raises(
292
+ ValueError, match="Embedding config not found: embedding_789"
293
+ ):
294
+ _ = tool.embedding
295
+
296
+ async def test_rag_tool_vector_store_property(self, mock_rag_config, mock_project):
297
+ """Test RagTool vector_store async property."""
298
+ mock_rag_config.parent_project.return_value = mock_project
299
+ mock_vector_store_adapter = AsyncMock()
300
+
301
+ with (
302
+ patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class,
303
+ patch(
304
+ "kiln_ai.tools.rag_tools.vector_store_adapter_for_config",
305
+ new_callable=AsyncMock,
306
+ ) as mock_adapter_factory,
307
+ ):
308
+ mock_vector_store_config = Mock()
309
+ mock_vs_config_class.from_id_and_parent_path.return_value = (
310
+ mock_vector_store_config
311
+ )
312
+ mock_adapter_factory.return_value = mock_vector_store_adapter
313
+
314
+ tool = RagTool("tool_123", mock_rag_config)
315
+
316
+ # Test that vector_store property returns the correct adapter
317
+ adapter = await tool.vector_store()
318
+ assert adapter == mock_vector_store_adapter
319
+
320
+ # Test that it's cached
321
+ adapter2 = await tool.vector_store()
322
+ assert adapter2 == mock_vector_store_adapter
323
+
324
+ # Verify factory was called only once due to caching
325
+ mock_adapter_factory.assert_called_once_with(
326
+ vector_store_config=mock_vector_store_config, rag_config=mock_rag_config
327
+ )
328
+
329
+ async def test_rag_tool_interface_methods(self, mock_rag_config, mock_project):
330
+ """Test RagTool interface methods: id, name, description, toolcall_definition."""
331
+ mock_rag_config.parent_project.return_value = mock_project
332
+
333
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
334
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
335
+
336
+ tool = RagTool("tool_123", mock_rag_config)
337
+
338
+ # Test interface methods
339
+ assert await tool.id() == "tool_123"
340
+ assert await tool.name() == "Test Search Tool"
341
+ description = await tool.description()
342
+ assert description == "A test search tool for RAG"
343
+
344
+ # Test toolcall_definition
345
+ definition = await tool.toolcall_definition()
346
+ expected_definition = {
347
+ "type": "function",
348
+ "function": {
349
+ "name": "Test Search Tool",
350
+ "description": "A test search tool for RAG",
351
+ "parameters": {
352
+ "type": "object",
353
+ "properties": {
354
+ "query": {
355
+ "type": "string",
356
+ "description": "The search query",
357
+ },
358
+ },
359
+ "required": ["query"],
360
+ },
361
+ },
362
+ }
363
+ assert definition == expected_definition
364
+
365
+ async def test_rag_tool_run_vector_store_type(self, mock_rag_config, mock_project):
366
+ """Test RagTool.run() with LANCE_DB_VECTOR store type (embedding needed)."""
367
+ mock_rag_config.parent_project.return_value = mock_project
368
+
369
+ # Mock search results
370
+ search_results = [
371
+ SearchResult(
372
+ document_id="doc1",
373
+ chunk_idx=0,
374
+ chunk_text="Test content 1",
375
+ similarity=0.95,
376
+ ),
377
+ SearchResult(
378
+ document_id="doc2",
379
+ chunk_idx=1,
380
+ chunk_text="Test content 2",
381
+ similarity=0.85,
382
+ ),
383
+ ]
384
+
385
+ with (
386
+ patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class,
387
+ patch("kiln_ai.tools.rag_tools.EmbeddingConfig") as mock_embed_config_class,
388
+ patch(
389
+ "kiln_ai.tools.rag_tools.embedding_adapter_from_type"
390
+ ) as mock_adapter_factory,
391
+ patch(
392
+ "kiln_ai.tools.rag_tools.vector_store_adapter_for_config",
393
+ new_callable=AsyncMock,
394
+ ) as mock_vs_adapter_factory,
395
+ ):
396
+ # Setup mocks
397
+ mock_vector_store_config = Mock()
398
+ mock_vector_store_config.store_type = VectorStoreType.LANCE_DB_VECTOR
399
+ mock_vs_config_class.from_id_and_parent_path.return_value = (
400
+ mock_vector_store_config
401
+ )
402
+
403
+ mock_embedding_config = Mock()
404
+ mock_embed_config_class.from_id_and_parent_path.return_value = (
405
+ mock_embedding_config
406
+ )
407
+
408
+ mock_embedding_adapter = AsyncMock()
409
+ mock_embedding_result = Mock()
410
+ mock_embedding_result.embeddings = [Mock(vector=[0.1, 0.2, 0.3, 0.4])]
411
+ mock_embedding_adapter.generate_embeddings.return_value = (
412
+ mock_embedding_result
413
+ )
414
+ mock_adapter_factory.return_value = mock_embedding_adapter
415
+
416
+ mock_vector_store_adapter = AsyncMock()
417
+ mock_vector_store_adapter.search.return_value = search_results
418
+ mock_vs_adapter_factory.return_value = mock_vector_store_adapter
419
+
420
+ tool = RagTool("tool_123", mock_rag_config)
421
+
422
+ # Run the tool
423
+ result = await tool.run("test query")
424
+
425
+ # Verify the result format
426
+ expected_result = (
427
+ "[document_id: doc1, chunk_idx: 0]\nTest content 1\n\n"
428
+ "\n=========\n"
429
+ "[document_id: doc2, chunk_idx: 1]\nTest content 2\n\n"
430
+ )
431
+ assert result == expected_result
432
+
433
+ # Verify embedding generation was called
434
+ mock_embedding_adapter.generate_embeddings.assert_called_once_with(
435
+ ["test query"]
436
+ )
437
+
438
+ # Verify vector store search was called correctly
439
+ mock_vector_store_adapter.search.assert_called_once()
440
+ search_query = mock_vector_store_adapter.search.call_args[0][0]
441
+ assert search_query.query_string == "test query"
442
+ assert search_query.query_embedding == [
443
+ 0.1,
444
+ 0.2,
445
+ 0.3,
446
+ 0.4,
447
+ ] # Embedding provided for VECTOR type
448
+
449
+ async def test_rag_tool_run_hybrid_store_type(self, mock_rag_config, mock_project):
450
+ """Test RagTool.run() with LANCE_DB_HYBRID store type (embedding needed)."""
451
+ mock_rag_config.parent_project.return_value = mock_project
452
+
453
+ # Mock embedding result
454
+ mock_embedding_result = Mock()
455
+ mock_embedding_result.embeddings = [Mock(vector=[0.1, 0.2, 0.3, 0.4])]
456
+
457
+ # Mock search results
458
+ search_results = [
459
+ SearchResult(
460
+ document_id="doc1",
461
+ chunk_idx=0,
462
+ chunk_text="Hybrid search result",
463
+ similarity=0.92,
464
+ )
465
+ ]
466
+
467
+ with (
468
+ patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class,
469
+ patch("kiln_ai.tools.rag_tools.EmbeddingConfig") as mock_embed_config_class,
470
+ patch(
471
+ "kiln_ai.tools.rag_tools.embedding_adapter_from_type"
472
+ ) as mock_adapter_factory,
473
+ patch(
474
+ "kiln_ai.tools.rag_tools.vector_store_adapter_for_config",
475
+ new_callable=AsyncMock,
476
+ ) as mock_vs_adapter_factory,
477
+ ):
478
+ # Setup mocks
479
+ mock_vector_store_config = Mock()
480
+ mock_vector_store_config.store_type = VectorStoreType.LANCE_DB_HYBRID
481
+ mock_vs_config_class.from_id_and_parent_path.return_value = (
482
+ mock_vector_store_config
483
+ )
484
+
485
+ mock_embedding_config = Mock()
486
+ mock_embed_config_class.from_id_and_parent_path.return_value = (
487
+ mock_embedding_config
488
+ )
489
+
490
+ mock_embedding_adapter = AsyncMock()
491
+ mock_embedding_adapter.generate_embeddings.return_value = (
492
+ mock_embedding_result
493
+ )
494
+ mock_adapter_factory.return_value = mock_embedding_adapter
495
+
496
+ mock_vector_store_adapter = AsyncMock()
497
+ mock_vector_store_adapter.search.return_value = search_results
498
+ mock_vs_adapter_factory.return_value = mock_vector_store_adapter
499
+
500
+ tool = RagTool("tool_123", mock_rag_config)
501
+
502
+ # Run the tool
503
+ result = await tool.run("hybrid query")
504
+
505
+ # Verify embedding generation was called
506
+ mock_embedding_adapter.generate_embeddings.assert_called_once_with(
507
+ ["hybrid query"]
508
+ )
509
+
510
+ # Verify vector store search was called with embedding
511
+ mock_vector_store_adapter.search.assert_called_once()
512
+ search_query = mock_vector_store_adapter.search.call_args[0][0]
513
+ assert search_query.query_string == "hybrid query"
514
+ assert search_query.query_embedding == [0.1, 0.2, 0.3, 0.4]
515
+
516
+ # Verify result
517
+ expected_result = (
518
+ "[document_id: doc1, chunk_idx: 0]\nHybrid search result\n\n"
519
+ )
520
+ assert result == expected_result
521
+
522
+ async def test_rag_tool_run_fts_store_type(self, mock_rag_config, mock_project):
523
+ """Test RagTool.run() with LANCE_DB_FTS store type (no embedding needed)."""
524
+ mock_rag_config.parent_project.return_value = mock_project
525
+
526
+ # Mock search results
527
+ search_results = [
528
+ SearchResult(
529
+ document_id="doc_fts",
530
+ chunk_idx=2,
531
+ chunk_text="FTS search result",
532
+ similarity=0.88,
533
+ )
534
+ ]
535
+
536
+ with (
537
+ patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class,
538
+ patch("kiln_ai.tools.rag_tools.EmbeddingConfig") as mock_embed_config_class,
539
+ patch(
540
+ "kiln_ai.tools.rag_tools.embedding_adapter_from_type"
541
+ ) as mock_adapter_factory,
542
+ patch(
543
+ "kiln_ai.tools.rag_tools.vector_store_adapter_for_config",
544
+ new_callable=AsyncMock,
545
+ ) as mock_vs_adapter_factory,
546
+ ):
547
+ # Setup mocks
548
+ mock_vector_store_config = Mock()
549
+ mock_vector_store_config.store_type = VectorStoreType.LANCE_DB_FTS
550
+ mock_vs_config_class.from_id_and_parent_path.return_value = (
551
+ mock_vector_store_config
552
+ )
553
+
554
+ mock_embedding_config = Mock()
555
+ mock_embed_config_class.from_id_and_parent_path.return_value = (
556
+ mock_embedding_config
557
+ )
558
+
559
+ mock_embedding_adapter = AsyncMock()
560
+ mock_adapter_factory.return_value = mock_embedding_adapter
561
+
562
+ mock_vector_store_adapter = AsyncMock()
563
+ mock_vector_store_adapter.search.return_value = search_results
564
+ mock_vs_adapter_factory.return_value = mock_vector_store_adapter
565
+
566
+ tool = RagTool("tool_123", mock_rag_config)
567
+
568
+ # Run the tool
569
+ result = await tool.run("fts query")
570
+
571
+ # Verify the result format
572
+ expected_result = (
573
+ "[document_id: doc_fts, chunk_idx: 2]\nFTS search result\n\n"
574
+ )
575
+ assert result == expected_result
576
+
577
+ # Verify embedding generation was NOT called for FTS
578
+ mock_embedding_adapter.generate_embeddings.assert_not_called()
579
+
580
+ # Verify vector store search was called without embedding
581
+ mock_vector_store_adapter.search.assert_called_once()
582
+ search_query = mock_vector_store_adapter.search.call_args[0][0]
583
+ assert search_query.query_string == "fts query"
584
+ assert search_query.query_embedding is None # No embedding for FTS type
585
+
586
+ async def test_rag_tool_run_no_embeddings_generated(
587
+ self, mock_rag_config, mock_project
588
+ ):
589
+ """Test RagTool.run() when no embeddings are generated."""
590
+ mock_rag_config.parent_project.return_value = mock_project
591
+
592
+ # Mock empty embedding result
593
+ mock_embedding_result = Mock()
594
+ mock_embedding_result.embeddings = []
595
+
596
+ with (
597
+ patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class,
598
+ patch("kiln_ai.tools.rag_tools.EmbeddingConfig") as mock_embed_config_class,
599
+ patch(
600
+ "kiln_ai.tools.rag_tools.embedding_adapter_from_type"
601
+ ) as mock_adapter_factory,
602
+ patch(
603
+ "kiln_ai.tools.rag_tools.vector_store_adapter_for_config",
604
+ new_callable=AsyncMock,
605
+ ) as mock_vs_adapter_factory,
606
+ ):
607
+ # Setup mocks
608
+ mock_vector_store_config = Mock()
609
+ mock_vector_store_config.store_type = VectorStoreType.LANCE_DB_HYBRID
610
+ mock_vs_config_class.from_id_and_parent_path.return_value = (
611
+ mock_vector_store_config
612
+ )
613
+
614
+ mock_embedding_config = Mock()
615
+ mock_embed_config_class.from_id_and_parent_path.return_value = (
616
+ mock_embedding_config
617
+ )
618
+
619
+ mock_embedding_adapter = AsyncMock()
620
+ mock_embedding_adapter.generate_embeddings.return_value = (
621
+ mock_embedding_result
622
+ )
623
+ mock_adapter_factory.return_value = mock_embedding_adapter
624
+
625
+ mock_vector_store_adapter = AsyncMock()
626
+ mock_vs_adapter_factory.return_value = mock_vector_store_adapter
627
+
628
+ tool = RagTool("tool_123", mock_rag_config)
629
+
630
+ # Run the tool and expect an error
631
+ with pytest.raises(ValueError, match="No embeddings generated"):
632
+ await tool.run("query with no embeddings")
633
+
634
+ async def test_rag_tool_run_empty_search_results(
635
+ self, mock_rag_config, mock_project
636
+ ):
637
+ """Test RagTool.run() with empty search results."""
638
+ mock_rag_config.parent_project.return_value = mock_project
639
+
640
+ with (
641
+ patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class,
642
+ patch("kiln_ai.tools.rag_tools.EmbeddingConfig") as mock_embed_config_class,
643
+ patch(
644
+ "kiln_ai.tools.rag_tools.embedding_adapter_from_type"
645
+ ) as mock_adapter_factory,
646
+ patch(
647
+ "kiln_ai.tools.rag_tools.vector_store_adapter_for_config",
648
+ new_callable=AsyncMock,
649
+ ) as mock_vs_adapter_factory,
650
+ ):
651
+ # Setup mocks
652
+ mock_vector_store_config = Mock()
653
+ mock_vector_store_config.store_type = VectorStoreType.LANCE_DB_VECTOR
654
+ mock_vs_config_class.from_id_and_parent_path.return_value = (
655
+ mock_vector_store_config
656
+ )
657
+
658
+ mock_embedding_config = Mock()
659
+ mock_embed_config_class.from_id_and_parent_path.return_value = (
660
+ mock_embedding_config
661
+ )
662
+
663
+ mock_embedding_adapter = AsyncMock()
664
+ mock_embedding_result = Mock()
665
+ mock_embedding_result.embeddings = [Mock(vector=[0.1, 0.2, 0.3, 0.4])]
666
+ mock_embedding_adapter.generate_embeddings.return_value = (
667
+ mock_embedding_result
668
+ )
669
+ mock_adapter_factory.return_value = mock_embedding_adapter
670
+
671
+ mock_vector_store_adapter = AsyncMock()
672
+ mock_vector_store_adapter.search.return_value = [] # Empty results
673
+ mock_vs_adapter_factory.return_value = mock_vector_store_adapter
674
+
675
+ tool = RagTool("tool_123", mock_rag_config)
676
+
677
+ # Run the tool
678
+ result = await tool.run("query with no results")
679
+
680
+ # Should return empty string for no results
681
+ assert result == ""
682
+
683
+
684
+ class TestRagToolNameAndDescription:
685
+ """Test RagTool name and description functionality with tool_name and tool_description fields."""
686
+
687
+ @pytest.fixture
688
+ def mock_rag_config_with_tool_fields(self):
689
+ """Create a mock RAG config with specific tool_name and tool_description."""
690
+ config = Mock(spec=RagConfig)
691
+ config.id = "rag_config_456"
692
+ config.tool_name = "Advanced Document Search"
693
+ config.tool_description = "An advanced search tool that retrieves relevant documents from the knowledge base using semantic similarity"
694
+ config.vector_store_config_id = "vector_store_789"
695
+ config.embedding_config_id = "embedding_101"
696
+ return config
697
+
698
+ @pytest.fixture
699
+ def mock_project_for_tool_fields(self):
700
+ """Create a mock project for tool field tests."""
701
+ project = Mock(spec=Project)
702
+ project.id = "project_456"
703
+ project.path = "/test/tool/project"
704
+ return project
705
+
706
+ def test_rag_tool_uses_tool_name_field(
707
+ self, mock_rag_config_with_tool_fields, mock_project_for_tool_fields
708
+ ):
709
+ """Test that RagTool uses the tool_name field from RagConfig."""
710
+ mock_rag_config_with_tool_fields.parent_project.return_value = (
711
+ mock_project_for_tool_fields
712
+ )
713
+
714
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
715
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
716
+
717
+ tool = RagTool("tool_456", mock_rag_config_with_tool_fields)
718
+
719
+ assert tool._name == "Advanced Document Search"
720
+
721
+ def test_rag_tool_uses_tool_description_field(
722
+ self, mock_rag_config_with_tool_fields, mock_project_for_tool_fields
723
+ ):
724
+ """Test that RagTool uses the tool_description field from RagConfig."""
725
+ mock_rag_config_with_tool_fields.parent_project.return_value = (
726
+ mock_project_for_tool_fields
727
+ )
728
+
729
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
730
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
731
+
732
+ tool = RagTool("tool_456", mock_rag_config_with_tool_fields)
733
+
734
+ assert (
735
+ tool._description
736
+ == "An advanced search tool that retrieves relevant documents from the knowledge base using semantic similarity"
737
+ )
738
+
739
+ async def test_rag_tool_name_method_returns_tool_name(
740
+ self, mock_rag_config_with_tool_fields, mock_project_for_tool_fields
741
+ ):
742
+ """Test that the name() method returns the tool_name field."""
743
+ mock_rag_config_with_tool_fields.parent_project.return_value = (
744
+ mock_project_for_tool_fields
745
+ )
746
+
747
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
748
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
749
+
750
+ tool = RagTool("tool_456", mock_rag_config_with_tool_fields)
751
+
752
+ name = await tool.name()
753
+ assert name == "Advanced Document Search"
754
+
755
+ async def test_rag_tool_description_method_returns_tool_description(
756
+ self, mock_rag_config_with_tool_fields, mock_project_for_tool_fields
757
+ ):
758
+ """Test that the description() method returns the tool_description field."""
759
+ mock_rag_config_with_tool_fields.parent_project.return_value = (
760
+ mock_project_for_tool_fields
761
+ )
762
+
763
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
764
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
765
+
766
+ tool = RagTool("tool_456", mock_rag_config_with_tool_fields)
767
+
768
+ description = await tool.description()
769
+ assert (
770
+ description
771
+ == "An advanced search tool that retrieves relevant documents from the knowledge base using semantic similarity"
772
+ )
773
+
774
+ async def test_rag_tool_toolcall_definition_uses_tool_fields(
775
+ self, mock_rag_config_with_tool_fields, mock_project_for_tool_fields
776
+ ):
777
+ """Test that toolcall_definition uses tool_name and tool_description fields."""
778
+ mock_rag_config_with_tool_fields.parent_project.return_value = (
779
+ mock_project_for_tool_fields
780
+ )
781
+
782
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
783
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
784
+
785
+ tool = RagTool("tool_456", mock_rag_config_with_tool_fields)
786
+
787
+ definition = await tool.toolcall_definition()
788
+
789
+ expected_definition = {
790
+ "type": "function",
791
+ "function": {
792
+ "name": "Advanced Document Search",
793
+ "description": "An advanced search tool that retrieves relevant documents from the knowledge base using semantic similarity",
794
+ "parameters": {
795
+ "type": "object",
796
+ "properties": {
797
+ "query": {
798
+ "type": "string",
799
+ "description": "The search query",
800
+ },
801
+ },
802
+ "required": ["query"],
803
+ },
804
+ },
805
+ }
806
+
807
+ assert definition == expected_definition
808
+
809
+ def test_rag_tool_with_unicode_tool_fields(self, mock_project_for_tool_fields):
810
+ """Test RagTool with Unicode characters in tool_name and tool_description."""
811
+ config = Mock(spec=RagConfig)
812
+ config.id = "rag_config_unicode"
813
+ config.tool_name = "🔍 文档搜索工具"
814
+ config.tool_description = "一个用于搜索文档的高级工具 🚀"
815
+ config.vector_store_config_id = "vector_store_789"
816
+ config.embedding_config_id = "embedding_101"
817
+ config.parent_project.return_value = mock_project_for_tool_fields
818
+
819
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
820
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
821
+
822
+ tool = RagTool("tool_unicode", config)
823
+ assert tool._name == "🔍 文档搜索工具"
824
+ assert tool._description == "一个用于搜索文档的高级工具 🚀"
825
+
826
+ def test_rag_tool_with_multiline_tool_description(
827
+ self, mock_project_for_tool_fields
828
+ ):
829
+ """Test RagTool with multiline tool_description."""
830
+ multiline_description = """This is a comprehensive search tool that:
831
+ - Searches through document collections
832
+ - Uses semantic similarity matching
833
+ - Returns relevant context with metadata
834
+ - Supports various document formats"""
835
+
836
+ config = Mock(spec=RagConfig)
837
+ config.id = "rag_config_multiline"
838
+ config.tool_name = "Comprehensive Search Tool"
839
+ config.tool_description = multiline_description
840
+ config.vector_store_config_id = "vector_store_789"
841
+ config.embedding_config_id = "embedding_101"
842
+ config.parent_project.return_value = mock_project_for_tool_fields
843
+
844
+ with patch("kiln_ai.tools.rag_tools.VectorStoreConfig") as mock_vs_config_class:
845
+ mock_vs_config_class.from_id_and_parent_path.return_value = Mock()
846
+
847
+ tool = RagTool("tool_multiline", config)
848
+ assert tool._description == multiline_description