kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. kiln_ai/adapters/__init__.py +8 -2
  2. kiln_ai/adapters/adapter_registry.py +43 -208
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  16. kiln_ai/adapters/embedding/__init__.py +0 -0
  17. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  18. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  19. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  20. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  21. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  22. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  23. kiln_ai/adapters/eval/base_eval.py +2 -2
  24. kiln_ai/adapters/eval/eval_runner.py +9 -3
  25. kiln_ai/adapters/eval/g_eval.py +2 -2
  26. kiln_ai/adapters/eval/test_base_eval.py +2 -4
  27. kiln_ai/adapters/eval/test_g_eval.py +4 -5
  28. kiln_ai/adapters/extractors/__init__.py +18 -0
  29. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  30. kiln_ai/adapters/extractors/encoding.py +20 -0
  31. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  32. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  33. kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  34. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  35. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  36. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  37. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  38. kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  39. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  40. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  41. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  42. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  43. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  44. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  45. kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  46. kiln_ai/adapters/ml_model_list.py +761 -37
  47. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  48. kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
  49. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  50. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
  51. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  52. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  53. kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
  54. kiln_ai/adapters/ollama_tools.py +69 -12
  55. kiln_ai/adapters/parsers/__init__.py +1 -1
  56. kiln_ai/adapters/provider_tools.py +205 -47
  57. kiln_ai/adapters/rag/deduplication.py +49 -0
  58. kiln_ai/adapters/rag/progress.py +252 -0
  59. kiln_ai/adapters/rag/rag_runners.py +844 -0
  60. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  61. kiln_ai/adapters/rag/test_progress.py +785 -0
  62. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  63. kiln_ai/adapters/remote_config.py +80 -8
  64. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  65. kiln_ai/adapters/run_output.py +3 -0
  66. kiln_ai/adapters/test_adapter_registry.py +657 -85
  67. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  68. kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  69. kiln_ai/adapters/test_ml_model_list.py +251 -1
  70. kiln_ai/adapters/test_ollama_tools.py +340 -1
  71. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  72. kiln_ai/adapters/test_prompt_builders.py +1 -1
  73. kiln_ai/adapters/test_provider_tools.py +254 -8
  74. kiln_ai/adapters/test_remote_config.py +651 -58
  75. kiln_ai/adapters/vector_store/__init__.py +1 -0
  76. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  77. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  78. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  79. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  80. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  81. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  82. kiln_ai/datamodel/__init__.py +39 -34
  83. kiln_ai/datamodel/basemodel.py +170 -1
  84. kiln_ai/datamodel/chunk.py +158 -0
  85. kiln_ai/datamodel/datamodel_enums.py +28 -0
  86. kiln_ai/datamodel/embedding.py +64 -0
  87. kiln_ai/datamodel/eval.py +1 -1
  88. kiln_ai/datamodel/external_tool_server.py +298 -0
  89. kiln_ai/datamodel/extraction.py +303 -0
  90. kiln_ai/datamodel/json_schema.py +25 -10
  91. kiln_ai/datamodel/project.py +40 -1
  92. kiln_ai/datamodel/rag.py +79 -0
  93. kiln_ai/datamodel/registry.py +0 -15
  94. kiln_ai/datamodel/run_config.py +62 -0
  95. kiln_ai/datamodel/task.py +2 -77
  96. kiln_ai/datamodel/task_output.py +6 -1
  97. kiln_ai/datamodel/task_run.py +41 -0
  98. kiln_ai/datamodel/test_attachment.py +649 -0
  99. kiln_ai/datamodel/test_basemodel.py +4 -4
  100. kiln_ai/datamodel/test_chunk_models.py +317 -0
  101. kiln_ai/datamodel/test_dataset_split.py +1 -1
  102. kiln_ai/datamodel/test_embedding_models.py +448 -0
  103. kiln_ai/datamodel/test_eval_model.py +6 -6
  104. kiln_ai/datamodel/test_example_models.py +175 -0
  105. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  106. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  107. kiln_ai/datamodel/test_extraction_model.py +470 -0
  108. kiln_ai/datamodel/test_rag.py +641 -0
  109. kiln_ai/datamodel/test_registry.py +8 -3
  110. kiln_ai/datamodel/test_task.py +15 -47
  111. kiln_ai/datamodel/test_tool_id.py +320 -0
  112. kiln_ai/datamodel/test_vector_store.py +320 -0
  113. kiln_ai/datamodel/tool_id.py +105 -0
  114. kiln_ai/datamodel/vector_store.py +141 -0
  115. kiln_ai/tools/__init__.py +8 -0
  116. kiln_ai/tools/base_tool.py +82 -0
  117. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  118. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  119. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  120. kiln_ai/tools/mcp_server_tool.py +95 -0
  121. kiln_ai/tools/mcp_session_manager.py +246 -0
  122. kiln_ai/tools/rag_tools.py +157 -0
  123. kiln_ai/tools/test_base_tools.py +199 -0
  124. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  125. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  126. kiln_ai/tools/test_rag_tools.py +848 -0
  127. kiln_ai/tools/test_tool_registry.py +562 -0
  128. kiln_ai/tools/tool_registry.py +85 -0
  129. kiln_ai/utils/__init__.py +3 -0
  130. kiln_ai/utils/async_job_runner.py +62 -17
  131. kiln_ai/utils/config.py +24 -2
  132. kiln_ai/utils/env.py +15 -0
  133. kiln_ai/utils/filesystem.py +14 -0
  134. kiln_ai/utils/filesystem_cache.py +60 -0
  135. kiln_ai/utils/litellm.py +94 -0
  136. kiln_ai/utils/lock.py +100 -0
  137. kiln_ai/utils/mime_type.py +38 -0
  138. kiln_ai/utils/open_ai_types.py +94 -0
  139. kiln_ai/utils/pdf_utils.py +38 -0
  140. kiln_ai/utils/project_utils.py +17 -0
  141. kiln_ai/utils/test_async_job_runner.py +151 -35
  142. kiln_ai/utils/test_config.py +138 -1
  143. kiln_ai/utils/test_env.py +142 -0
  144. kiln_ai/utils/test_filesystem_cache.py +316 -0
  145. kiln_ai/utils/test_litellm.py +206 -0
  146. kiln_ai/utils/test_lock.py +185 -0
  147. kiln_ai/utils/test_mime_type.py +66 -0
  148. kiln_ai/utils/test_open_ai_types.py +131 -0
  149. kiln_ai/utils/test_pdf_utils.py +73 -0
  150. kiln_ai/utils/test_uuid.py +111 -0
  151. kiln_ai/utils/test_validation.py +524 -0
  152. kiln_ai/utils/uuid.py +9 -0
  153. kiln_ai/utils/validation.py +90 -0
  154. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
  155. kiln_ai-0.21.0.dist-info/RECORD +211 -0
  156. kiln_ai-0.19.0.dist-info/RECORD +0 -115
  157. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
  158. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,157 @@
1
+ from functools import cached_property
2
+ from typing import Any, Dict, List
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from kiln_ai.adapters.embedding.base_embedding_adapter import BaseEmbeddingAdapter
7
+ from kiln_ai.adapters.embedding.embedding_registry import embedding_adapter_from_type
8
+ from kiln_ai.adapters.vector_store.base_vector_store_adapter import (
9
+ BaseVectorStoreAdapter,
10
+ SearchResult,
11
+ VectorStoreQuery,
12
+ )
13
+ from kiln_ai.adapters.vector_store.vector_store_registry import (
14
+ vector_store_adapter_for_config,
15
+ )
16
+ from kiln_ai.datamodel.embedding import EmbeddingConfig
17
+ from kiln_ai.datamodel.project import Project
18
+ from kiln_ai.datamodel.rag import RagConfig
19
+ from kiln_ai.datamodel.tool_id import ToolId
20
+ from kiln_ai.datamodel.vector_store import VectorStoreConfig, VectorStoreType
21
+ from kiln_ai.tools.base_tool import KilnToolInterface
22
+ from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
23
+
24
+
25
+ class ChunkContext(BaseModel):
26
+ metadata: dict
27
+ text: str
28
+
29
+ def serialize(self) -> str:
30
+ metadata_str = ", ".join([f"{k}: {v}" for k, v in self.metadata.items()])
31
+ return f"[{metadata_str}]\n{self.text}\n\n"
32
+
33
+
34
+ def format_search_results(search_results: List[SearchResult]) -> str:
35
+ results: List[ChunkContext] = []
36
+ for search_result in search_results:
37
+ results.append(
38
+ ChunkContext(
39
+ metadata={
40
+ "document_id": search_result.document_id,
41
+ "chunk_idx": search_result.chunk_idx,
42
+ },
43
+ text=search_result.chunk_text,
44
+ )
45
+ )
46
+ return "\n=========\n".join([result.serialize() for result in results])
47
+
48
+
49
+ class RagTool(KilnToolInterface):
50
+ """
51
+ A tool that searches the vector store and returns the most relevant chunks.
52
+ """
53
+
54
+ def __init__(self, tool_id: str, rag_config: RagConfig):
55
+ self._id = tool_id
56
+ self._name = rag_config.tool_name
57
+ self._description = rag_config.tool_description
58
+ self._parameters_schema = {
59
+ "type": "object",
60
+ "properties": {
61
+ "query": {
62
+ "type": "string",
63
+ "description": "The search query",
64
+ },
65
+ },
66
+ "required": ["query"],
67
+ }
68
+ self._rag_config = rag_config
69
+ vector_store_config = VectorStoreConfig.from_id_and_parent_path(
70
+ str(self._rag_config.vector_store_config_id), self.project.path
71
+ )
72
+ if vector_store_config is None:
73
+ raise ValueError(
74
+ f"Vector store config not found: {self._rag_config.vector_store_config_id}"
75
+ )
76
+ self._vector_store_config = vector_store_config
77
+ self._vector_store_adapter: BaseVectorStoreAdapter | None = None
78
+
79
+ @cached_property
80
+ def project(self) -> Project:
81
+ project = self._rag_config.parent_project()
82
+ if project is None:
83
+ raise ValueError(f"RAG config {self._rag_config.id} has no project")
84
+ return project
85
+
86
+ @cached_property
87
+ def embedding(
88
+ self,
89
+ ) -> tuple[EmbeddingConfig, BaseEmbeddingAdapter]:
90
+ embedding_config = EmbeddingConfig.from_id_and_parent_path(
91
+ str(self._rag_config.embedding_config_id), self.project.path
92
+ )
93
+ if embedding_config is None:
94
+ raise ValueError(
95
+ f"Embedding config not found: {self._rag_config.embedding_config_id}"
96
+ )
97
+ return embedding_config, embedding_adapter_from_type(embedding_config)
98
+
99
+ async def vector_store(
100
+ self,
101
+ ) -> BaseVectorStoreAdapter:
102
+ if self._vector_store_adapter is None:
103
+ self._vector_store_adapter = await vector_store_adapter_for_config(
104
+ vector_store_config=self._vector_store_config,
105
+ rag_config=self._rag_config,
106
+ )
107
+ return self._vector_store_adapter
108
+
109
+ async def id(self) -> ToolId:
110
+ return self._id
111
+
112
+ async def name(self) -> str:
113
+ return self._name
114
+
115
+ async def description(self) -> str:
116
+ return self._description
117
+
118
+ async def toolcall_definition(self) -> Dict[str, Any]:
119
+ """Return the OpenAI-compatible tool definition for this tool."""
120
+ return {
121
+ "type": "function",
122
+ "function": {
123
+ "name": await self.name(),
124
+ "description": await self.description(),
125
+ "parameters": self._parameters_schema,
126
+ },
127
+ }
128
+
129
+ async def run(self, query: str) -> str:
130
+ _, embedding_adapter = self.embedding
131
+
132
+ vector_store_adapter = await self.vector_store()
133
+ store_query = VectorStoreQuery(
134
+ query_embedding=None,
135
+ query_string=query,
136
+ )
137
+
138
+ match self._vector_store_config.store_type:
139
+ case VectorStoreType.LANCE_DB_HYBRID | VectorStoreType.LANCE_DB_VECTOR:
140
+ is_vector_query = True
141
+ case VectorStoreType.LANCE_DB_FTS:
142
+ is_vector_query = False
143
+ case _:
144
+ raise_exhaustive_enum_error(self._vector_store_config.store_type)
145
+
146
+ if is_vector_query:
147
+ query_embedding_result = await embedding_adapter.generate_embeddings(
148
+ [query]
149
+ )
150
+ if len(query_embedding_result.embeddings) == 0:
151
+ raise ValueError("No embeddings generated")
152
+ store_query.query_embedding = query_embedding_result.embeddings[0].vector
153
+
154
+ search_results = await vector_store_adapter.search(store_query)
155
+ context = format_search_results(search_results)
156
+
157
+ return context
@@ -0,0 +1,199 @@
1
+ import pytest
2
+
3
+ from kiln_ai.tools.base_tool import KilnTool, KilnToolInterface
4
+
5
+
6
+ class TestKilnToolInterface:
7
+ """Test the abstract KilnToolInterface."""
8
+
9
+ def test_cannot_instantiate_abstract_class(self):
10
+ """Test that KilnToolInterface cannot be instantiated directly."""
11
+ with pytest.raises(TypeError):
12
+ KilnToolInterface() # type: ignore
13
+
14
+
15
+ class ConcreteTestTool(KilnTool):
16
+ """Concrete implementation of KilnTool for testing."""
17
+
18
+ def run(self, **kwargs) -> str:
19
+ return f"test_result: {kwargs}"
20
+
21
+
22
+ class TestKilnTool:
23
+ """Test the KilnTool base class."""
24
+
25
+ async def test_init_with_valid_schema(self):
26
+ """Test KilnTool initialization with valid parameters schema."""
27
+ schema = {
28
+ "type": "object",
29
+ "properties": {
30
+ "param1": {"type": "string", "description": "Test parameter"}
31
+ },
32
+ "required": ["param1"],
33
+ }
34
+
35
+ tool = ConcreteTestTool(
36
+ tool_id="test_tool_id",
37
+ name="test_tool",
38
+ description="A test tool",
39
+ parameters_schema=schema,
40
+ )
41
+
42
+ assert await tool.id() == "test_tool_id"
43
+ assert await tool.name() == "test_tool"
44
+ assert await tool.description() == "A test tool"
45
+ assert tool._parameters_schema == schema
46
+
47
+ async def test_init_with_invalid_schema_missing_type(self):
48
+ """Test KilnTool initialization fails with schema missing type."""
49
+ invalid_schema = {"properties": {"param1": {"type": "string"}}}
50
+
51
+ with pytest.raises(
52
+ ValueError, match="JSON schema must be an object with properties"
53
+ ):
54
+ ConcreteTestTool(
55
+ tool_id="test_tool",
56
+ name="test_tool",
57
+ description="A test tool",
58
+ parameters_schema=invalid_schema,
59
+ )
60
+
61
+ def test_init_with_invalid_schema_missing_properties(self):
62
+ """Test KilnTool initialization fails with schema missing properties."""
63
+ invalid_schema = {"type": "object"}
64
+
65
+ with pytest.raises(
66
+ ValueError, match="JSON schema must be an object with properties"
67
+ ):
68
+ ConcreteTestTool(
69
+ tool_id="test_tool",
70
+ name="test_tool",
71
+ description="A test tool",
72
+ parameters_schema=invalid_schema,
73
+ )
74
+
75
+ def test_init_with_invalid_schema_wrong_type(self):
76
+ """Test KilnTool initialization fails with schema of wrong type."""
77
+ invalid_schema = {"type": "array", "properties": {"param1": {"type": "string"}}}
78
+
79
+ with pytest.raises(
80
+ ValueError, match="JSON schema must be an object with properties"
81
+ ):
82
+ ConcreteTestTool(
83
+ tool_id="test_tool",
84
+ name="test_tool",
85
+ description="A test tool",
86
+ parameters_schema=invalid_schema,
87
+ )
88
+
89
+ async def test_toolcall_definition(self):
90
+ """Test that toolcall_definition returns correct OpenAI-compatible format."""
91
+ schema = {
92
+ "type": "object",
93
+ "properties": {
94
+ "param1": {"type": "string", "description": "Test parameter"},
95
+ "param2": {"type": "integer", "description": "Another parameter"},
96
+ },
97
+ "required": ["param1"],
98
+ }
99
+
100
+ tool = ConcreteTestTool(
101
+ tool_id="test_tool_id",
102
+ name="test_function",
103
+ description="A test function tool",
104
+ parameters_schema=schema,
105
+ )
106
+
107
+ definition = await tool.toolcall_definition()
108
+
109
+ expected = {
110
+ "type": "function",
111
+ "function": {
112
+ "name": "test_function",
113
+ "description": "A test function tool",
114
+ "parameters": schema,
115
+ },
116
+ }
117
+
118
+ assert definition == expected
119
+
120
+ def test_run_method_implemented_by_subclass(self):
121
+ """Test that the run method works when implemented by subclass."""
122
+ schema = {
123
+ "type": "object",
124
+ "properties": {"message": {"type": "string"}},
125
+ "required": ["message"],
126
+ }
127
+
128
+ tool = ConcreteTestTool(
129
+ tool_id="test_tool",
130
+ name="test_tool",
131
+ description="A test tool",
132
+ parameters_schema=schema,
133
+ )
134
+
135
+ result = tool.run(message="hello", extra_param=42)
136
+ assert result == "test_result: {'message': 'hello', 'extra_param': 42}"
137
+
138
+ def test_cannot_instantiate_abstract_kiln_tool_directly(self):
139
+ """Test that KilnTool cannot be instantiated directly due to abstract run method."""
140
+ schema = {
141
+ "type": "object",
142
+ "properties": {"param": {"type": "string"}},
143
+ "required": ["param"],
144
+ }
145
+
146
+ with pytest.raises(TypeError):
147
+ KilnTool(
148
+ tool_id="test",
149
+ name="test",
150
+ description="test",
151
+ parameters_schema=schema,
152
+ ) # type: ignore
153
+
154
+
155
+ class TestValidationEdgeCases:
156
+ """Test edge cases and validation scenarios."""
157
+
158
+ def test_empty_properties_schema(self):
159
+ """Test schema with empty properties is valid."""
160
+ schema = {"type": "object", "properties": {}}
161
+
162
+ tool = ConcreteTestTool(
163
+ tool_id="test_tool",
164
+ name="test_tool",
165
+ description="A test tool",
166
+ parameters_schema=schema,
167
+ )
168
+
169
+ assert tool._parameters_schema == schema
170
+
171
+ async def test_complex_nested_schema(self):
172
+ """Test complex nested schema validation."""
173
+ schema = {
174
+ "type": "object",
175
+ "properties": {
176
+ "config": {
177
+ "type": "object",
178
+ "properties": {
179
+ "timeout": {"type": "integer", "minimum": 0},
180
+ "retries": {"type": "integer", "minimum": 1},
181
+ },
182
+ "required": ["timeout"],
183
+ },
184
+ "items": {"type": "array", "items": {"type": "string"}},
185
+ },
186
+ "required": ["config"],
187
+ }
188
+
189
+ tool = ConcreteTestTool(
190
+ tool_id="complex_tool",
191
+ name="complex_tool",
192
+ description="A complex test tool",
193
+ parameters_schema=schema,
194
+ )
195
+
196
+ assert tool._parameters_schema == schema
197
+
198
+ definition = await tool.toolcall_definition()
199
+ assert definition["function"]["parameters"] == schema