kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. kiln_ai/adapters/__init__.py +8 -2
  2. kiln_ai/adapters/adapter_registry.py +43 -208
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  16. kiln_ai/adapters/embedding/__init__.py +0 -0
  17. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  18. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  19. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  20. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  21. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  22. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  23. kiln_ai/adapters/eval/base_eval.py +2 -2
  24. kiln_ai/adapters/eval/eval_runner.py +9 -3
  25. kiln_ai/adapters/eval/g_eval.py +2 -2
  26. kiln_ai/adapters/eval/test_base_eval.py +2 -4
  27. kiln_ai/adapters/eval/test_g_eval.py +4 -5
  28. kiln_ai/adapters/extractors/__init__.py +18 -0
  29. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  30. kiln_ai/adapters/extractors/encoding.py +20 -0
  31. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  32. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  33. kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  34. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  35. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  36. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  37. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  38. kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  39. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  40. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  41. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  42. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  43. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  44. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  45. kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  46. kiln_ai/adapters/ml_model_list.py +761 -37
  47. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  48. kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
  49. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  50. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
  51. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  52. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  53. kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
  54. kiln_ai/adapters/ollama_tools.py +69 -12
  55. kiln_ai/adapters/parsers/__init__.py +1 -1
  56. kiln_ai/adapters/provider_tools.py +205 -47
  57. kiln_ai/adapters/rag/deduplication.py +49 -0
  58. kiln_ai/adapters/rag/progress.py +252 -0
  59. kiln_ai/adapters/rag/rag_runners.py +844 -0
  60. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  61. kiln_ai/adapters/rag/test_progress.py +785 -0
  62. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  63. kiln_ai/adapters/remote_config.py +80 -8
  64. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  65. kiln_ai/adapters/run_output.py +3 -0
  66. kiln_ai/adapters/test_adapter_registry.py +657 -85
  67. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  68. kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  69. kiln_ai/adapters/test_ml_model_list.py +251 -1
  70. kiln_ai/adapters/test_ollama_tools.py +340 -1
  71. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  72. kiln_ai/adapters/test_prompt_builders.py +1 -1
  73. kiln_ai/adapters/test_provider_tools.py +254 -8
  74. kiln_ai/adapters/test_remote_config.py +651 -58
  75. kiln_ai/adapters/vector_store/__init__.py +1 -0
  76. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  77. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  78. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  79. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  80. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  81. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  82. kiln_ai/datamodel/__init__.py +39 -34
  83. kiln_ai/datamodel/basemodel.py +170 -1
  84. kiln_ai/datamodel/chunk.py +158 -0
  85. kiln_ai/datamodel/datamodel_enums.py +28 -0
  86. kiln_ai/datamodel/embedding.py +64 -0
  87. kiln_ai/datamodel/eval.py +1 -1
  88. kiln_ai/datamodel/external_tool_server.py +298 -0
  89. kiln_ai/datamodel/extraction.py +303 -0
  90. kiln_ai/datamodel/json_schema.py +25 -10
  91. kiln_ai/datamodel/project.py +40 -1
  92. kiln_ai/datamodel/rag.py +79 -0
  93. kiln_ai/datamodel/registry.py +0 -15
  94. kiln_ai/datamodel/run_config.py +62 -0
  95. kiln_ai/datamodel/task.py +2 -77
  96. kiln_ai/datamodel/task_output.py +6 -1
  97. kiln_ai/datamodel/task_run.py +41 -0
  98. kiln_ai/datamodel/test_attachment.py +649 -0
  99. kiln_ai/datamodel/test_basemodel.py +4 -4
  100. kiln_ai/datamodel/test_chunk_models.py +317 -0
  101. kiln_ai/datamodel/test_dataset_split.py +1 -1
  102. kiln_ai/datamodel/test_embedding_models.py +448 -0
  103. kiln_ai/datamodel/test_eval_model.py +6 -6
  104. kiln_ai/datamodel/test_example_models.py +175 -0
  105. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  106. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  107. kiln_ai/datamodel/test_extraction_model.py +470 -0
  108. kiln_ai/datamodel/test_rag.py +641 -0
  109. kiln_ai/datamodel/test_registry.py +8 -3
  110. kiln_ai/datamodel/test_task.py +15 -47
  111. kiln_ai/datamodel/test_tool_id.py +320 -0
  112. kiln_ai/datamodel/test_vector_store.py +320 -0
  113. kiln_ai/datamodel/tool_id.py +105 -0
  114. kiln_ai/datamodel/vector_store.py +141 -0
  115. kiln_ai/tools/__init__.py +8 -0
  116. kiln_ai/tools/base_tool.py +82 -0
  117. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  118. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  119. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  120. kiln_ai/tools/mcp_server_tool.py +95 -0
  121. kiln_ai/tools/mcp_session_manager.py +246 -0
  122. kiln_ai/tools/rag_tools.py +157 -0
  123. kiln_ai/tools/test_base_tools.py +199 -0
  124. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  125. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  126. kiln_ai/tools/test_rag_tools.py +848 -0
  127. kiln_ai/tools/test_tool_registry.py +562 -0
  128. kiln_ai/tools/tool_registry.py +85 -0
  129. kiln_ai/utils/__init__.py +3 -0
  130. kiln_ai/utils/async_job_runner.py +62 -17
  131. kiln_ai/utils/config.py +24 -2
  132. kiln_ai/utils/env.py +15 -0
  133. kiln_ai/utils/filesystem.py +14 -0
  134. kiln_ai/utils/filesystem_cache.py +60 -0
  135. kiln_ai/utils/litellm.py +94 -0
  136. kiln_ai/utils/lock.py +100 -0
  137. kiln_ai/utils/mime_type.py +38 -0
  138. kiln_ai/utils/open_ai_types.py +94 -0
  139. kiln_ai/utils/pdf_utils.py +38 -0
  140. kiln_ai/utils/project_utils.py +17 -0
  141. kiln_ai/utils/test_async_job_runner.py +151 -35
  142. kiln_ai/utils/test_config.py +138 -1
  143. kiln_ai/utils/test_env.py +142 -0
  144. kiln_ai/utils/test_filesystem_cache.py +316 -0
  145. kiln_ai/utils/test_litellm.py +206 -0
  146. kiln_ai/utils/test_lock.py +185 -0
  147. kiln_ai/utils/test_mime_type.py +66 -0
  148. kiln_ai/utils/test_open_ai_types.py +131 -0
  149. kiln_ai/utils/test_pdf_utils.py +73 -0
  150. kiln_ai/utils/test_uuid.py +111 -0
  151. kiln_ai/utils/test_validation.py +524 -0
  152. kiln_ai/utils/uuid.py +9 -0
  153. kiln_ai/utils/validation.py +90 -0
  154. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
  155. kiln_ai-0.21.0.dist-info/RECORD +211 -0
  156. kiln_ai-0.19.0.dist-info/RECORD +0 -115
  157. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
  158. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -3,28 +3,53 @@ import json
3
3
  import logging
4
4
  import os
5
5
  import threading
6
+ from dataclasses import dataclass
6
7
  from pathlib import Path
7
8
  from typing import Any, List
8
9
 
9
10
  import requests
10
11
  from pydantic import ValidationError
11
12
 
13
+ from kiln_ai.adapters.ml_embedding_model_list import (
14
+ KilnEmbeddingModel,
15
+ KilnEmbeddingModelProvider,
16
+ built_in_embedding_models,
17
+ )
18
+ from kiln_ai.datamodel.datamodel_enums import KilnMimeType
19
+
12
20
  from .ml_model_list import KilnModel, KilnModelProvider, built_in_models
13
21
 
14
22
  logger = logging.getLogger(__name__)
15
23
 
16
24
 
17
- def serialize_config(models: List[KilnModel], path: str | Path) -> None:
18
- data = {"model_list": [m.model_dump(mode="json") for m in models]}
25
+ @dataclass
26
+ class KilnRemoteConfig:
27
+ model_list: List[KilnModel]
28
+ embedding_model_list: List[KilnEmbeddingModel]
29
+
30
+
31
+ def serialize_config(
32
+ models: List[KilnModel],
33
+ embedding_models: List[KilnEmbeddingModel],
34
+ path: str | Path,
35
+ ) -> None:
36
+ data = {
37
+ "model_list": [m.model_dump(mode="json") for m in models],
38
+ "embedding_model_list": [m.model_dump(mode="json") for m in embedding_models],
39
+ }
19
40
  Path(path).write_text(json.dumps(data, indent=2, sort_keys=True))
20
41
 
21
42
 
22
- def deserialize_config_at_path(path: str | Path) -> List[KilnModel]:
43
+ def deserialize_config_at_path(
44
+ path: str | Path,
45
+ ) -> KilnRemoteConfig:
23
46
  raw = json.loads(Path(path).read_text())
24
47
  return deserialize_config_data(raw)
25
48
 
26
49
 
27
- def deserialize_config_data(config_data: Any) -> List[KilnModel]:
50
+ def deserialize_config_data(
51
+ config_data: Any,
52
+ ) -> KilnRemoteConfig:
28
53
  if not isinstance(config_data, dict):
29
54
  raise ValueError(f"Remote config expected dict, got {type(config_data)}")
30
55
 
@@ -34,6 +59,12 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
34
59
  f"Remote config expected list of models, got {type(model_list)}"
35
60
  )
36
61
 
62
+ embedding_model_data = config_data.get("embedding_model_list", [])
63
+ if not isinstance(embedding_model_data, list):
64
+ raise ValueError(
65
+ f"Remote config expected list of embedding models, got {type(embedding_model_data)}"
66
+ )
67
+
37
68
  # We must be careful here, because some of the JSON data may be generated from a forward
38
69
  # version of the code that has newer fields / versions of the fields, that may cause
39
70
  # the current client this code is running on to fail to validate the item into a KilnModel.
@@ -48,6 +79,14 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
48
79
  providers = []
49
80
  for provider_data in providers_list:
50
81
  try:
82
+ # we filter out the mime types that we don't support
83
+ mime_types = provider_data.get("multimodal_mime_types")
84
+ if mime_types is not None:
85
+ provider_data["multimodal_mime_types"] = [
86
+ mime_type
87
+ for mime_type in mime_types
88
+ if mime_type in list(KilnMimeType)
89
+ ]
51
90
  provider = KilnModelProvider.model_validate(provider_data)
52
91
  providers.append(provider)
53
92
  except ValidationError as e:
@@ -72,10 +111,38 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
72
111
  model_data,
73
112
  e,
74
113
  )
75
- return models
114
+
115
+ embedding_models = []
116
+ for embedding_model_data in embedding_model_data:
117
+ try:
118
+ provider_list = embedding_model_data.get("providers", [])
119
+ providers = []
120
+ for provider_data in provider_list:
121
+ try:
122
+ provider = KilnEmbeddingModelProvider.model_validate(provider_data)
123
+ providers.append(provider)
124
+ except ValidationError as e:
125
+ logger.warning(
126
+ "Failed to validate an embedding model provider from remote config. Upgrade Kiln to use this model. Details %s: %s",
127
+ provider_data,
128
+ e,
129
+ )
130
+
131
+ embedding_model_data["providers"] = []
132
+ embedding_model = KilnEmbeddingModel.model_validate(embedding_model_data)
133
+ embedding_model.providers = providers
134
+ embedding_models.append(embedding_model)
135
+ except ValidationError as e:
136
+ logger.warning(
137
+ "Failed to validate an embedding model from remote config. Upgrade Kiln to use this model. Details %s: %s",
138
+ embedding_model_data,
139
+ e,
140
+ )
141
+
142
+ return KilnRemoteConfig(model_list=models, embedding_model_list=embedding_models)
76
143
 
77
144
 
78
- def load_from_url(url: str) -> List[KilnModel]:
145
+ def load_from_url(url: str) -> KilnRemoteConfig:
79
146
  response = requests.get(url, timeout=10)
80
147
  response.raise_for_status()
81
148
  data = response.json()
@@ -83,7 +150,11 @@ def load_from_url(url: str) -> List[KilnModel]:
83
150
 
84
151
 
85
152
  def dump_builtin_config(path: str | Path) -> None:
86
- serialize_config(built_in_models, path)
153
+ serialize_config(
154
+ models=built_in_models,
155
+ embedding_models=built_in_embedding_models,
156
+ path=path,
157
+ )
87
158
 
88
159
 
89
160
  def load_remote_models(url: str) -> None:
@@ -93,7 +164,8 @@ def load_remote_models(url: str) -> None:
93
164
  def fetch_and_replace() -> None:
94
165
  try:
95
166
  models = load_from_url(url)
96
- built_in_models[:] = models
167
+ built_in_models[:] = models.model_list
168
+ built_in_embedding_models[:] = models.embedding_model_list
97
169
  except Exception as exc:
98
170
  # Do not crash startup, but surface the issue
99
171
  logger.warning("Failed to fetch remote model list from %s: %s", url, exc)
@@ -229,21 +229,20 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
229
229
  "rating": 8,
230
230
  }
231
231
 
232
+ run_config = RunConfigProperties(
233
+ model_name="llama_3_1_8b",
234
+ model_provider_name="ollama",
235
+ prompt_id="simple_prompt_builder",
236
+ structured_output_mode="json_schema",
237
+ )
238
+
232
239
  with patch.object(LiteLlmAdapter, "_run", new_callable=AsyncMock) as mock_run:
233
240
  mock_run.return_value = (
234
241
  RunOutput(output=mocked_output, intermediate_outputs=None),
235
242
  None,
236
243
  )
237
244
 
238
- adapter = adapter_for_task(
239
- repair_task,
240
- RunConfigProperties(
241
- model_name="llama_3_1_8b",
242
- model_provider_name="ollama",
243
- prompt_id="simple_prompt_builder",
244
- structured_output_mode="json_schema",
245
- ),
246
- )
245
+ adapter = adapter_for_task(repair_task, run_config)
247
246
 
248
247
  run = await adapter.invoke(repair_task_input.model_dump())
249
248
 
@@ -264,6 +263,10 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
264
263
  }
265
264
  assert run.input_source.type == DataSourceType.human
266
265
  assert "created_by" in run.input_source.properties
266
+ assert run.output.source is not None
267
+ assert run.output.source.run_config is not None
268
+ saved_run_config = run.output.source.run_config.model_dump()
269
+ assert saved_run_config == run_config.model_dump()
267
270
 
268
271
  # Verify that the mock was called
269
272
  mock_run.assert_called_once()
@@ -3,9 +3,12 @@ from typing import Dict
3
3
 
4
4
  from litellm.types.utils import ChoiceLogprobs
5
5
 
6
+ from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam
7
+
6
8
 
7
9
  @dataclass
8
10
  class RunOutput:
9
11
  output: Dict | str
10
12
  intermediate_outputs: Dict[str, str] | None
11
13
  output_logprobs: ChoiceLogprobs | None = None
14
+ trace: list[ChatCompletionMessageParam] | None = None