kiln-ai 0.20.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (133) hide show
  1. kiln_ai/adapters/__init__.py +6 -0
  2. kiln_ai/adapters/adapter_registry.py +43 -226
  3. kiln_ai/adapters/chunkers/__init__.py +13 -0
  4. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  5. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  6. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  7. kiln_ai/adapters/chunkers/helpers.py +23 -0
  8. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  9. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  10. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  11. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  12. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  13. kiln_ai/adapters/embedding/__init__.py +0 -0
  14. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  15. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  16. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  17. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  18. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  19. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  20. kiln_ai/adapters/eval/eval_runner.py +6 -2
  21. kiln_ai/adapters/eval/test_base_eval.py +1 -3
  22. kiln_ai/adapters/eval/test_g_eval.py +1 -1
  23. kiln_ai/adapters/extractors/__init__.py +18 -0
  24. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  25. kiln_ai/adapters/extractors/encoding.py +20 -0
  26. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  27. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  28. kiln_ai/adapters/extractors/litellm_extractor.py +406 -0
  29. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  30. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  31. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  32. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  33. kiln_ai/adapters/extractors/test_litellm_extractor.py +1290 -0
  34. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  35. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  36. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  37. kiln_ai/adapters/ml_embedding_model_list.py +494 -0
  38. kiln_ai/adapters/ml_model_list.py +876 -18
  39. kiln_ai/adapters/model_adapters/litellm_adapter.py +40 -75
  40. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +79 -1
  41. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
  42. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
  43. kiln_ai/adapters/model_adapters/test_structured_output.py +9 -10
  44. kiln_ai/adapters/ollama_tools.py +69 -12
  45. kiln_ai/adapters/provider_tools.py +190 -46
  46. kiln_ai/adapters/rag/deduplication.py +49 -0
  47. kiln_ai/adapters/rag/progress.py +252 -0
  48. kiln_ai/adapters/rag/rag_runners.py +844 -0
  49. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  50. kiln_ai/adapters/rag/test_progress.py +785 -0
  51. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  52. kiln_ai/adapters/remote_config.py +80 -8
  53. kiln_ai/adapters/test_adapter_registry.py +579 -86
  54. kiln_ai/adapters/test_ml_embedding_model_list.py +239 -0
  55. kiln_ai/adapters/test_ml_model_list.py +202 -0
  56. kiln_ai/adapters/test_ollama_tools.py +340 -1
  57. kiln_ai/adapters/test_prompt_builders.py +1 -1
  58. kiln_ai/adapters/test_provider_tools.py +199 -8
  59. kiln_ai/adapters/test_remote_config.py +551 -56
  60. kiln_ai/adapters/vector_store/__init__.py +1 -0
  61. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  62. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  63. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  64. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  65. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  66. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  67. kiln_ai/datamodel/__init__.py +16 -13
  68. kiln_ai/datamodel/basemodel.py +201 -4
  69. kiln_ai/datamodel/chunk.py +158 -0
  70. kiln_ai/datamodel/datamodel_enums.py +27 -0
  71. kiln_ai/datamodel/embedding.py +64 -0
  72. kiln_ai/datamodel/external_tool_server.py +206 -54
  73. kiln_ai/datamodel/extraction.py +317 -0
  74. kiln_ai/datamodel/project.py +33 -1
  75. kiln_ai/datamodel/rag.py +79 -0
  76. kiln_ai/datamodel/task.py +5 -0
  77. kiln_ai/datamodel/task_output.py +41 -11
  78. kiln_ai/datamodel/test_attachment.py +649 -0
  79. kiln_ai/datamodel/test_basemodel.py +270 -14
  80. kiln_ai/datamodel/test_chunk_models.py +317 -0
  81. kiln_ai/datamodel/test_dataset_split.py +1 -1
  82. kiln_ai/datamodel/test_datasource.py +50 -0
  83. kiln_ai/datamodel/test_embedding_models.py +448 -0
  84. kiln_ai/datamodel/test_eval_model.py +6 -6
  85. kiln_ai/datamodel/test_external_tool_server.py +534 -152
  86. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  87. kiln_ai/datamodel/test_extraction_model.py +501 -0
  88. kiln_ai/datamodel/test_rag.py +641 -0
  89. kiln_ai/datamodel/test_task.py +35 -1
  90. kiln_ai/datamodel/test_tool_id.py +187 -1
  91. kiln_ai/datamodel/test_vector_store.py +320 -0
  92. kiln_ai/datamodel/tool_id.py +58 -0
  93. kiln_ai/datamodel/vector_store.py +141 -0
  94. kiln_ai/tools/base_tool.py +12 -3
  95. kiln_ai/tools/built_in_tools/math_tools.py +12 -4
  96. kiln_ai/tools/kiln_task_tool.py +158 -0
  97. kiln_ai/tools/mcp_server_tool.py +2 -2
  98. kiln_ai/tools/mcp_session_manager.py +51 -22
  99. kiln_ai/tools/rag_tools.py +164 -0
  100. kiln_ai/tools/test_kiln_task_tool.py +527 -0
  101. kiln_ai/tools/test_mcp_server_tool.py +4 -15
  102. kiln_ai/tools/test_mcp_session_manager.py +187 -227
  103. kiln_ai/tools/test_rag_tools.py +929 -0
  104. kiln_ai/tools/test_tool_registry.py +290 -7
  105. kiln_ai/tools/tool_registry.py +69 -16
  106. kiln_ai/utils/__init__.py +3 -0
  107. kiln_ai/utils/async_job_runner.py +62 -17
  108. kiln_ai/utils/config.py +2 -2
  109. kiln_ai/utils/env.py +15 -0
  110. kiln_ai/utils/filesystem.py +14 -0
  111. kiln_ai/utils/filesystem_cache.py +60 -0
  112. kiln_ai/utils/litellm.py +94 -0
  113. kiln_ai/utils/lock.py +100 -0
  114. kiln_ai/utils/mime_type.py +38 -0
  115. kiln_ai/utils/open_ai_types.py +19 -2
  116. kiln_ai/utils/pdf_utils.py +59 -0
  117. kiln_ai/utils/test_async_job_runner.py +151 -35
  118. kiln_ai/utils/test_env.py +142 -0
  119. kiln_ai/utils/test_filesystem_cache.py +316 -0
  120. kiln_ai/utils/test_litellm.py +206 -0
  121. kiln_ai/utils/test_lock.py +185 -0
  122. kiln_ai/utils/test_mime_type.py +66 -0
  123. kiln_ai/utils/test_open_ai_types.py +88 -12
  124. kiln_ai/utils/test_pdf_utils.py +86 -0
  125. kiln_ai/utils/test_uuid.py +111 -0
  126. kiln_ai/utils/test_validation.py +524 -0
  127. kiln_ai/utils/uuid.py +9 -0
  128. kiln_ai/utils/validation.py +90 -0
  129. {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/METADATA +9 -1
  130. kiln_ai-0.22.0.dist-info/RECORD +213 -0
  131. kiln_ai-0.20.1.dist-info/RECORD +0 -138
  132. {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/WHEEL +0 -0
  133. {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -4,10 +4,13 @@ from pydantic import BaseModel, ValidationError
4
4
  from kiln_ai.datamodel.tool_id import (
5
5
  MCP_LOCAL_TOOL_ID_PREFIX,
6
6
  MCP_REMOTE_TOOL_ID_PREFIX,
7
+ RAG_TOOL_ID_PREFIX,
7
8
  KilnBuiltInToolId,
8
9
  ToolId,
9
10
  _check_tool_id,
11
+ kiln_task_server_id_from_tool_id,
10
12
  mcp_server_and_tool_name_from_id,
13
+ rag_config_id_from_id,
11
14
  )
12
15
 
13
16
 
@@ -113,6 +116,69 @@ class TestCheckToolId:
113
116
  with pytest.raises(ValueError, match="Invalid tool ID"):
114
117
  _check_tool_id("mcp::wrong::server::tool")
115
118
 
119
+ def test_valid_rag_tools(self):
120
+ """Test validation of valid RAG tools."""
121
+ valid_ids = [
122
+ "kiln_tool::rag::config1",
123
+ "kiln_tool::rag::my_rag_config",
124
+ "kiln_tool::rag::test_config_123",
125
+ ]
126
+ for tool_id in valid_ids:
127
+ result = _check_tool_id(tool_id)
128
+ assert result == tool_id
129
+
130
+ def test_invalid_rag_format(self):
131
+ """Test validation fails for invalid RAG tool formats."""
132
+ # These IDs start with the RAG prefix but have invalid formats
133
+ rag_invalid_ids = [
134
+ "kiln_tool::rag::", # Missing config ID
135
+ "kiln_tool::rag::config::extra", # Too many parts
136
+ ]
137
+
138
+ for invalid_id in rag_invalid_ids:
139
+ with pytest.raises(ValueError, match="Invalid RAG tool ID"):
140
+ _check_tool_id(invalid_id)
141
+
142
+ def test_rag_tool_empty_config_id(self):
143
+ """Test that RAG tool with empty config ID is handled properly."""
144
+ # This tests the case where rag_config_id_from_id returns empty string
145
+ # which should trigger line 66 in the source
146
+ with pytest.raises(ValueError, match="Invalid RAG tool ID"):
147
+ _check_tool_id("kiln_tool::rag::")
148
+
149
+ def test_valid_kiln_task_tools(self):
150
+ """Test validation of valid Kiln task tools."""
151
+ valid_ids = [
152
+ "kiln_task::server1",
153
+ "kiln_task::my_server",
154
+ "kiln_task::test_server_123",
155
+ "kiln_task::server_with_underscores",
156
+ "kiln_task::server-with-dashes",
157
+ "kiln_task::server.with.dots",
158
+ ]
159
+ for tool_id in valid_ids:
160
+ result = _check_tool_id(tool_id)
161
+ assert result == tool_id
162
+
163
+ def test_invalid_kiln_task_format(self):
164
+ """Test validation fails for invalid Kiln task tool formats."""
165
+ # These IDs start with the Kiln task prefix but have invalid formats
166
+ kiln_task_invalid_ids = [
167
+ "kiln_task::", # Missing server ID
168
+ "kiln_task::server::extra", # Too many parts
169
+ "kiln_task::server::tool::extra", # Too many parts
170
+ ]
171
+
172
+ for invalid_id in kiln_task_invalid_ids:
173
+ with pytest.raises(ValueError, match="Invalid Kiln task tool ID"):
174
+ _check_tool_id(invalid_id)
175
+
176
+ def test_kiln_task_tool_empty_server_id(self):
177
+ """Test that Kiln task tool with empty server ID is handled properly."""
178
+ # This tests the case where kiln_task_server_id_from_tool_id returns empty string which should raise an error
179
+ with pytest.raises(ValueError, match="Invalid Kiln task tool ID"):
180
+ _check_tool_id("kiln_task::")
181
+
116
182
 
117
183
  class TestMcpServerAndToolNameFromId:
118
184
  """Test the mcp_server_and_tool_name_from_id function."""
@@ -188,7 +254,7 @@ class TestToolIdPydanticType:
188
254
  model = self._ModelWithToolId(tool_id=tool_id.value)
189
255
  assert model.tool_id == tool_id.value
190
256
 
191
- def test_valid_mcp_tools(self):
257
+ def test_valid_tool_ids(self):
192
258
  """Test ToolId validates MCP remote and local tools."""
193
259
  valid_ids = [
194
260
  # Remote MCP tools
@@ -197,6 +263,12 @@ class TestToolIdPydanticType:
197
263
  # Local MCP tools
198
264
  "mcp::local::server1::tool1",
199
265
  "mcp::local::my_server::my_tool",
266
+ # RAG tools
267
+ "kiln_tool::rag::config1",
268
+ "kiln_tool::rag::my_rag_config",
269
+ # Kiln task tools
270
+ "kiln_task::server1",
271
+ "kiln_task::my_server",
200
272
  ]
201
273
 
202
274
  for tool_id in valid_ids:
@@ -212,6 +284,10 @@ class TestToolIdPydanticType:
212
284
  "mcp::remote::server",
213
285
  "mcp::local::",
214
286
  "mcp::local::server",
287
+ "kiln_tool::rag::",
288
+ "kiln_tool::rag::config::extra",
289
+ "kiln_task::",
290
+ "kiln_task::server::extra",
215
291
  ]
216
292
 
217
293
  for invalid_id in invalid_ids:
@@ -237,3 +313,113 @@ class TestConstants:
237
313
  def test_mcp_local_tool_id_prefix(self):
238
314
  """Test the MCP local tool ID prefix constant."""
239
315
  assert MCP_LOCAL_TOOL_ID_PREFIX == "mcp::local::"
316
+
317
+ def test_rag_tool_id_prefix(self):
318
+ """Test the RAG tool ID prefix constant."""
319
+ assert RAG_TOOL_ID_PREFIX == "kiln_tool::rag::"
320
+
321
+
322
+ class TestRagConfigIdFromId:
323
+ """Test the rag_config_id_from_id function."""
324
+
325
+ def test_valid_rag_ids(self):
326
+ """Test parsing valid RAG tool IDs."""
327
+ test_cases = [
328
+ ("kiln_tool::rag::config1", "config1"),
329
+ ("kiln_tool::rag::my_rag_config", "my_rag_config"),
330
+ ("kiln_tool::rag::test_config_123", "test_config_123"),
331
+ ("kiln_tool::rag::a", "a"), # Minimal valid case
332
+ ]
333
+
334
+ for tool_id, expected in test_cases:
335
+ result = rag_config_id_from_id(tool_id)
336
+ assert result == expected
337
+
338
+ def test_invalid_rag_ids(self):
339
+ """Test parsing fails for invalid RAG tool IDs."""
340
+ # Test various invalid formats that should trigger line 104
341
+ invalid_ids = [
342
+ "kiln_tool::rag::config::extra", # Too many parts (4 parts)
343
+ "wrong::rag::config", # Wrong prefix
344
+ "kiln_tool::wrong::config", # Wrong middle part
345
+ "rag::config", # Too few parts (2 parts)
346
+ "", # Empty string
347
+ "single_part", # Only 1 part
348
+ ]
349
+
350
+ for invalid_id in invalid_ids:
351
+ with pytest.raises(ValueError, match="Invalid RAG tool ID"):
352
+ rag_config_id_from_id(invalid_id)
353
+
354
+ def test_rag_id_with_empty_config_id(self):
355
+ """Test that RAG tool ID with empty config ID returns empty string."""
356
+ # This is actually valid according to the parser - it returns empty string
357
+ # The validation for empty config ID happens in _check_tool_id
358
+ result = rag_config_id_from_id("kiln_tool::rag::")
359
+ assert result == ""
360
+
361
+
362
+ class TestKilnTaskServerIdFromToolId:
363
+ """Test the kiln_task_server_id_from_tool_id function."""
364
+
365
+ def test_valid_kiln_task_ids(self):
366
+ """Test parsing valid Kiln task tool IDs."""
367
+ test_cases = [
368
+ ("kiln_task::server1", "server1"),
369
+ ("kiln_task::my_server", "my_server"),
370
+ ("kiln_task::test_server_123", "test_server_123"),
371
+ ("kiln_task::a", "a"), # Minimal valid case
372
+ ("kiln_task::server_with_underscores", "server_with_underscores"),
373
+ ("kiln_task::server-with-dashes", "server-with-dashes"),
374
+ ("kiln_task::server.with.dots", "server.with.dots"),
375
+ ]
376
+
377
+ for tool_id, expected in test_cases:
378
+ result = kiln_task_server_id_from_tool_id(tool_id)
379
+ assert result == expected
380
+
381
+ def test_invalid_kiln_task_ids(self):
382
+ """Test parsing fails for invalid Kiln task tool IDs."""
383
+ # Test various invalid formats
384
+ invalid_ids = [
385
+ "kiln_task::", # Empty server ID
386
+ "kiln_task::server::extra", # Too many parts (3 parts)
387
+ "kiln_task::server::tool::extra", # Too many parts (4 parts)
388
+ "wrong::server", # Wrong prefix
389
+ "kiln_wrong::server", # Wrong prefix
390
+ "task::server", # Too few parts (2 parts)
391
+ "", # Empty string
392
+ "single_part", # Only 1 part
393
+ "kiln_task", # Missing server ID
394
+ ]
395
+
396
+ for invalid_id in invalid_ids:
397
+ with pytest.raises(ValueError, match="Invalid Kiln task tool ID format"):
398
+ kiln_task_server_id_from_tool_id(invalid_id)
399
+
400
+ def test_kiln_task_id_with_empty_server_id(self):
401
+ """Test that Kiln task tool ID with empty server ID raises error."""
402
+ with pytest.raises(ValueError, match="Invalid Kiln task tool ID format"):
403
+ kiln_task_server_id_from_tool_id("kiln_task::")
404
+
405
+ def test_kiln_task_id_with_whitespace_server_id(self):
406
+ """Test that Kiln task tool ID with whitespace-only server ID raises error."""
407
+ with pytest.raises(ValueError, match="Invalid Kiln task tool ID format"):
408
+ kiln_task_server_id_from_tool_id("kiln_task::")
409
+
410
+ def test_kiln_task_id_with_multiple_colons(self):
411
+ """Test that Kiln task tool ID with multiple colons raises error."""
412
+ with pytest.raises(ValueError, match="Invalid Kiln task tool ID format"):
413
+ kiln_task_server_id_from_tool_id("kiln_task::server::extra")
414
+
415
+ def test_kiln_task_id_case_sensitivity(self):
416
+ """Test that Kiln task tool IDs are case sensitive."""
417
+ # These should work
418
+ result1 = kiln_task_server_id_from_tool_id("kiln_task::Server")
419
+ assert result1 == "Server"
420
+
421
+ result2 = kiln_task_server_id_from_tool_id("kiln_task::SERVER")
422
+ assert result2 == "SERVER"
423
+
424
+ result3 = kiln_task_server_id_from_tool_id("kiln_task::server")
425
+ assert result3 == "server"
@@ -0,0 +1,320 @@
1
+ import pytest
2
+ from pydantic import ValidationError
3
+
4
+ from kiln_ai.datamodel.project import Project
5
+ from kiln_ai.datamodel.vector_store import (
6
+ LanceDBConfigBaseProperties,
7
+ VectorStoreConfig,
8
+ VectorStoreType,
9
+ )
10
+
11
+
12
+ @pytest.fixture
13
+ def mock_project(tmp_path):
14
+ project_path = tmp_path / "test_project" / "project.kiln"
15
+ project_path.parent.mkdir()
16
+
17
+ project = Project(name="Test Project", path=project_path)
18
+ project.save_to_file()
19
+
20
+ return project
21
+
22
+
23
+ @pytest.fixture
24
+ def mock_vector_store_fts_config_properties():
25
+ return {
26
+ "similarity_top_k": 10,
27
+ "overfetch_factor": 2,
28
+ "vector_column_name": "vector",
29
+ "text_key": "text",
30
+ "doc_id_key": "doc_id",
31
+ }
32
+
33
+
34
+ @pytest.fixture
35
+ def mock_vector_store_vector_config_properties():
36
+ return {
37
+ "similarity_top_k": 10,
38
+ "overfetch_factor": 2,
39
+ "vector_column_name": "vector",
40
+ "text_key": "text",
41
+ "doc_id_key": "doc_id",
42
+ "nprobes": 1,
43
+ }
44
+
45
+
46
+ class TestVectorStoreType:
47
+ def test_vector_store_type_values(self):
48
+ """Test that VectorStoreType enum has expected values."""
49
+ assert VectorStoreType.LANCE_DB_FTS == "lancedb_fts"
50
+ assert VectorStoreType.LANCE_DB_HYBRID == "lancedb_hybrid"
51
+ assert VectorStoreType.LANCE_DB_VECTOR == "lancedb_vector"
52
+
53
+
54
+ class TestLanceDBConfigBaseProperties:
55
+ def test_valid_lance_db_config_base_properties(self):
56
+ """Test creating valid LanceDBConfigBaseProperties."""
57
+ config = LanceDBConfigBaseProperties(
58
+ similarity_top_k=10,
59
+ overfetch_factor=2,
60
+ vector_column_name="vector",
61
+ text_key="text",
62
+ doc_id_key="doc_id",
63
+ nprobes=1,
64
+ )
65
+
66
+ assert config.similarity_top_k == 10
67
+ assert config.overfetch_factor == 2
68
+ assert config.vector_column_name == "vector"
69
+ assert config.text_key == "text"
70
+ assert config.doc_id_key == "doc_id"
71
+ assert config.nprobes == 1
72
+
73
+ def test_lance_db_config_base_properties_without_nprobes(self):
74
+ """Test creating LanceDBConfigBaseProperties without nprobes."""
75
+ config = LanceDBConfigBaseProperties(
76
+ similarity_top_k=10,
77
+ overfetch_factor=2,
78
+ vector_column_name="vector",
79
+ text_key="text",
80
+ doc_id_key="doc_id",
81
+ )
82
+
83
+ assert config.similarity_top_k == 10
84
+ assert config.nprobes is None
85
+
86
+
87
+ class TestVectorStoreConfig:
88
+ def test_invalid_store_type(self, mock_vector_store_fts_config_properties):
89
+ """Test creating VectorStoreConfig with invalid store type."""
90
+ with pytest.raises(ValidationError, match="Input should be"):
91
+ VectorStoreConfig(
92
+ name="test_store",
93
+ store_type="invalid_type", # type: ignore
94
+ properties=mock_vector_store_fts_config_properties,
95
+ )
96
+
97
+ def test_invalid_store_type_after_creation(
98
+ self, mock_vector_store_fts_config_properties
99
+ ):
100
+ """Test creating VectorStoreConfig with invalid store type after creation."""
101
+ config = VectorStoreConfig(
102
+ name="test_store",
103
+ store_type=VectorStoreType.LANCE_DB_FTS,
104
+ properties=mock_vector_store_fts_config_properties,
105
+ )
106
+ with pytest.raises(ValidationError, match="Input should be"):
107
+ config.store_type = "invalid_type" # type: ignore
108
+
109
+ def test_valid_lance_db_fts_vector_store_config(
110
+ self, mock_vector_store_fts_config_properties
111
+ ):
112
+ """Test creating valid VectorStoreConfig with LanceDB FTS."""
113
+ config = VectorStoreConfig(
114
+ name="test_store",
115
+ store_type=VectorStoreType.LANCE_DB_FTS,
116
+ properties=mock_vector_store_fts_config_properties,
117
+ )
118
+
119
+ assert config.name == "test_store"
120
+ assert config.store_type == VectorStoreType.LANCE_DB_FTS
121
+ assert config.properties["similarity_top_k"] == 10
122
+ assert config.properties["overfetch_factor"] == 2
123
+ assert config.properties["vector_column_name"] == "vector"
124
+ assert config.properties["text_key"] == "text"
125
+ assert config.properties["doc_id_key"] == "doc_id"
126
+
127
+ def test_valid_lance_db_vector_store_config(
128
+ self, mock_vector_store_vector_config_properties
129
+ ):
130
+ """Test creating valid VectorStoreConfig with LanceDB Vector."""
131
+ config = VectorStoreConfig(
132
+ name="test_store",
133
+ store_type=VectorStoreType.LANCE_DB_VECTOR,
134
+ properties=mock_vector_store_vector_config_properties,
135
+ )
136
+
137
+ assert config.name == "test_store"
138
+ assert config.store_type == VectorStoreType.LANCE_DB_VECTOR
139
+ assert config.properties["similarity_top_k"] == 10
140
+ assert config.properties["nprobes"] == 1
141
+
142
+ def test_valid_lance_db_hybrid_store_config(
143
+ self, mock_vector_store_vector_config_properties
144
+ ):
145
+ """Test creating valid VectorStoreConfig with LanceDB Hybrid."""
146
+ config = VectorStoreConfig(
147
+ name="test_store",
148
+ store_type=VectorStoreType.LANCE_DB_HYBRID,
149
+ properties=mock_vector_store_vector_config_properties,
150
+ )
151
+
152
+ assert config.name == "test_store"
153
+ assert config.store_type == VectorStoreType.LANCE_DB_HYBRID
154
+ assert config.properties["nprobes"] == 1
155
+
156
+ def test_vector_store_config_missing_required_property(
157
+ self, mock_vector_store_fts_config_properties
158
+ ):
159
+ """Test VectorStoreConfig validation fails when required property is missing."""
160
+ mock_vector_store_fts_config_properties.pop("similarity_top_k")
161
+ with pytest.raises(
162
+ ValidationError,
163
+ match=r".*similarity_top_k is a required property",
164
+ ):
165
+ VectorStoreConfig(
166
+ name="test_store",
167
+ store_type=VectorStoreType.LANCE_DB_FTS,
168
+ properties=mock_vector_store_fts_config_properties,
169
+ )
170
+
171
+ def test_vector_store_config_invalid_property_type(
172
+ self, mock_vector_store_fts_config_properties
173
+ ):
174
+ """Test VectorStoreConfig validation fails when property has wrong type."""
175
+ mock_vector_store_fts_config_properties["similarity_top_k"] = "not_an_int"
176
+ with pytest.raises(
177
+ ValidationError,
178
+ match=r".*similarity_top_k must be of type",
179
+ ):
180
+ VectorStoreConfig(
181
+ name="test_store",
182
+ store_type=VectorStoreType.LANCE_DB_FTS,
183
+ properties=mock_vector_store_fts_config_properties,
184
+ )
185
+
186
+ def test_vector_store_config_fts_missing_nprobes_is_valid(
187
+ self, mock_vector_store_fts_config_properties
188
+ ):
189
+ """Test VectorStoreConfig with FTS type doesn't require nprobes."""
190
+ config = VectorStoreConfig(
191
+ name="test_store",
192
+ store_type=VectorStoreType.LANCE_DB_FTS,
193
+ properties=mock_vector_store_fts_config_properties,
194
+ )
195
+ assert config.store_type == VectorStoreType.LANCE_DB_FTS
196
+
197
+ def test_vector_store_config_vector_missing_nprobes_fails(
198
+ self, mock_vector_store_vector_config_properties
199
+ ):
200
+ """Test VectorStoreConfig with VECTOR type requires nprobes."""
201
+ mock_vector_store_vector_config_properties.pop("nprobes")
202
+ with pytest.raises(
203
+ ValidationError,
204
+ match=r".*nprobes is a required property",
205
+ ):
206
+ VectorStoreConfig(
207
+ name="test_store",
208
+ store_type=VectorStoreType.LANCE_DB_VECTOR,
209
+ properties=mock_vector_store_vector_config_properties,
210
+ )
211
+
212
+ def test_lancedb_properties(self, mock_vector_store_vector_config_properties):
213
+ """Test lancedb_properties method returns correct LanceDBConfigBaseProperties."""
214
+ config = VectorStoreConfig(
215
+ name="test_store",
216
+ store_type=VectorStoreType.LANCE_DB_VECTOR,
217
+ properties=mock_vector_store_vector_config_properties,
218
+ )
219
+
220
+ props = config.lancedb_properties
221
+
222
+ assert isinstance(props, LanceDBConfigBaseProperties)
223
+ assert props.similarity_top_k == 10
224
+ assert props.overfetch_factor == 2
225
+ assert props.vector_column_name == "vector"
226
+ assert props.text_key == "text"
227
+ assert props.doc_id_key == "doc_id"
228
+ assert props.nprobes == 1
229
+
230
+ def test_vector_store_config_inherits_from_kiln_parented_model(
231
+ self, mock_vector_store_fts_config_properties
232
+ ):
233
+ """Test that VectorStoreConfig inherits from KilnParentedModel."""
234
+ config = VectorStoreConfig(
235
+ name="test_store",
236
+ store_type=VectorStoreType.LANCE_DB_FTS,
237
+ properties=mock_vector_store_fts_config_properties,
238
+ )
239
+
240
+ # Check that it has the expected base fields
241
+ assert hasattr(config, "id")
242
+ assert hasattr(config, "v")
243
+ assert hasattr(config, "created_at")
244
+ assert hasattr(config, "created_by")
245
+ assert hasattr(config, "parent")
246
+
247
+ @pytest.mark.parametrize(
248
+ "name",
249
+ ["valid_name", "valid name", "valid-name", "valid_name_123", "VALID_NAME"],
250
+ )
251
+ def test_vector_store_config_valid_names(
252
+ self, name, mock_vector_store_fts_config_properties
253
+ ):
254
+ """Test VectorStoreConfig accepts valid names."""
255
+ config = VectorStoreConfig(
256
+ name=name,
257
+ store_type=VectorStoreType.LANCE_DB_FTS,
258
+ properties=mock_vector_store_fts_config_properties,
259
+ )
260
+ assert config.name == name
261
+
262
+ @pytest.mark.parametrize(
263
+ "name",
264
+ [
265
+ "",
266
+ "a" * 121, # Too long
267
+ ],
268
+ )
269
+ def test_vector_store_config_invalid_names(
270
+ self, name, mock_vector_store_fts_config_properties
271
+ ):
272
+ """Test VectorStoreConfig rejects invalid names."""
273
+ with pytest.raises(ValidationError):
274
+ VectorStoreConfig(
275
+ name=name,
276
+ store_type=VectorStoreType.LANCE_DB_FTS,
277
+ properties=mock_vector_store_fts_config_properties,
278
+ )
279
+
280
+ def test_parent_project(
281
+ self, mock_project, mock_vector_store_fts_config_properties
282
+ ):
283
+ """Test that parent project is returned correctly."""
284
+ config = VectorStoreConfig(
285
+ name="test_store",
286
+ store_type=VectorStoreType.LANCE_DB_FTS,
287
+ properties=mock_vector_store_fts_config_properties,
288
+ parent=mock_project,
289
+ )
290
+
291
+ assert config.parent_project() is mock_project
292
+
293
+ def test_vector_store_config_parent_project_none(
294
+ self, mock_vector_store_fts_config_properties
295
+ ):
296
+ """Test that parent project is None if not set."""
297
+ config = VectorStoreConfig(
298
+ name="test_store",
299
+ store_type=VectorStoreType.LANCE_DB_FTS,
300
+ properties=mock_vector_store_fts_config_properties,
301
+ )
302
+
303
+ assert config.parent_project() is None
304
+
305
+ def test_project_has_vector_store_configs(
306
+ self, mock_project, mock_vector_store_fts_config_properties
307
+ ):
308
+ """Test that project has vector store configs."""
309
+ config = VectorStoreConfig(
310
+ name="test_store",
311
+ store_type=VectorStoreType.LANCE_DB_FTS,
312
+ properties=mock_vector_store_fts_config_properties,
313
+ parent=mock_project,
314
+ )
315
+ config.save_to_file()
316
+
317
+ assert len(mock_project.vector_store_configs(readonly=True)) == 1
318
+ assert config.id in [
319
+ vc.id for vc in mock_project.vector_store_configs(readonly=True)
320
+ ]
@@ -14,6 +14,7 @@ Tool IDs can be one of:
14
14
  - A kiln built-in tool name: kiln_tool::add_numbers
15
15
  - A remote MCP tool: mcp::remote::<server_id>::<tool_name>
16
16
  - A local MCP tool: mcp::local::<server_id>::<tool_name>
17
+ - A Kiln task tool: kiln_task::<server_id>
17
18
  - More coming soon like kiln_project_tool::rag::RAG_CONFIG_ID
18
19
  """
19
20
 
@@ -26,7 +27,9 @@ class KilnBuiltInToolId(str, Enum):
26
27
 
27
28
 
28
29
  MCP_REMOTE_TOOL_ID_PREFIX = "mcp::remote::"
30
+ RAG_TOOL_ID_PREFIX = "kiln_tool::rag::"
29
31
  MCP_LOCAL_TOOL_ID_PREFIX = "mcp::local::"
32
+ KILN_TASK_TOOL_ID_PREFIX = "kiln_task::"
30
33
 
31
34
 
32
35
  def _check_tool_id(id: str) -> str:
@@ -58,6 +61,24 @@ def _check_tool_id(id: str) -> str:
58
61
  )
59
62
  return id
60
63
 
64
+ # RAG tools must have format: kiln_tool::rag::<rag_config_id>
65
+ if id.startswith(RAG_TOOL_ID_PREFIX):
66
+ rag_config_id = rag_config_id_from_id(id)
67
+ if not rag_config_id:
68
+ raise ValueError(
69
+ f"Invalid RAG tool ID: {id}. Expected format: 'kiln_tool::rag::<rag_config_id>'."
70
+ )
71
+ return id
72
+
73
+ # Kiln task tools must have format: kiln_task::<server_id>
74
+ if id.startswith(KILN_TASK_TOOL_ID_PREFIX):
75
+ server_id = kiln_task_server_id_from_tool_id(id)
76
+ if not server_id:
77
+ raise ValueError(
78
+ f"Invalid Kiln task tool ID: {id}. Expected format: 'kiln_task::<server_id>'."
79
+ )
80
+ return id
81
+
61
82
  raise ValueError(f"Invalid tool ID: {id}")
62
83
 
63
84
 
@@ -81,3 +102,40 @@ def mcp_server_and_tool_name_from_id(id: str) -> tuple[str, str]:
81
102
  f"Invalid MCP tool ID: {id}. Expected format: 'mcp::(remote|local)::<server_id>::<tool_name>'."
82
103
  )
83
104
  return parts[2], parts[3] # server_id, tool_name
105
+
106
+
107
+ def rag_config_id_from_id(id: str) -> str:
108
+ """
109
+ Get the RAG config ID from the ID.
110
+ """
111
+ parts = id.split("::")
112
+ if not id.startswith(RAG_TOOL_ID_PREFIX) or len(parts) != 3:
113
+ raise ValueError(
114
+ f"Invalid RAG tool ID: {id}. Expected format: 'kiln_tool::rag::<rag_config_id>'."
115
+ )
116
+ return parts[2]
117
+
118
+
119
+ def kiln_task_server_id_from_tool_id(tool_id: str) -> str:
120
+ """
121
+ Get the server ID from the tool ID.
122
+ """
123
+ if not tool_id.startswith(KILN_TASK_TOOL_ID_PREFIX):
124
+ raise ValueError(
125
+ f"Invalid Kiln task tool ID format: {tool_id}. Expected format: 'kiln_task::<server_id>'."
126
+ )
127
+
128
+ # Remove prefix and split on ::
129
+ remaining = tool_id[len(KILN_TASK_TOOL_ID_PREFIX) :]
130
+ if not remaining:
131
+ raise ValueError(
132
+ f"Invalid Kiln task tool ID format: {tool_id}. Expected format: 'kiln_task::<server_id>'."
133
+ )
134
+ parts = remaining.split("::")
135
+
136
+ if len(parts) != 1 or not parts[0].strip():
137
+ raise ValueError(
138
+ f"Invalid Kiln task tool ID format: {tool_id}. Expected format: 'kiln_task::<server_id>'."
139
+ )
140
+
141
+ return parts[0] # server_id