kiln-ai 0.20.1__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +6 -0
- kiln_ai/adapters/adapter_registry.py +43 -226
- kiln_ai/adapters/chunkers/__init__.py +13 -0
- kiln_ai/adapters/chunkers/base_chunker.py +42 -0
- kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
- kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
- kiln_ai/adapters/chunkers/helpers.py +23 -0
- kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
- kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
- kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
- kiln_ai/adapters/chunkers/test_helpers.py +75 -0
- kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
- kiln_ai/adapters/embedding/__init__.py +0 -0
- kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
- kiln_ai/adapters/embedding/embedding_registry.py +32 -0
- kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
- kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
- kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
- kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
- kiln_ai/adapters/eval/eval_runner.py +6 -2
- kiln_ai/adapters/eval/test_base_eval.py +1 -3
- kiln_ai/adapters/eval/test_g_eval.py +1 -1
- kiln_ai/adapters/extractors/__init__.py +18 -0
- kiln_ai/adapters/extractors/base_extractor.py +72 -0
- kiln_ai/adapters/extractors/encoding.py +20 -0
- kiln_ai/adapters/extractors/extractor_registry.py +44 -0
- kiln_ai/adapters/extractors/extractor_runner.py +112 -0
- kiln_ai/adapters/extractors/litellm_extractor.py +406 -0
- kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
- kiln_ai/adapters/extractors/test_encoding.py +54 -0
- kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
- kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
- kiln_ai/adapters/extractors/test_litellm_extractor.py +1290 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
- kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
- kiln_ai/adapters/ml_embedding_model_list.py +494 -0
- kiln_ai/adapters/ml_model_list.py +876 -18
- kiln_ai/adapters/model_adapters/litellm_adapter.py +40 -75
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +79 -1
- kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
- kiln_ai/adapters/model_adapters/test_structured_output.py +9 -10
- kiln_ai/adapters/ollama_tools.py +69 -12
- kiln_ai/adapters/provider_tools.py +190 -46
- kiln_ai/adapters/rag/deduplication.py +49 -0
- kiln_ai/adapters/rag/progress.py +252 -0
- kiln_ai/adapters/rag/rag_runners.py +844 -0
- kiln_ai/adapters/rag/test_deduplication.py +195 -0
- kiln_ai/adapters/rag/test_progress.py +785 -0
- kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
- kiln_ai/adapters/remote_config.py +80 -8
- kiln_ai/adapters/test_adapter_registry.py +579 -86
- kiln_ai/adapters/test_ml_embedding_model_list.py +239 -0
- kiln_ai/adapters/test_ml_model_list.py +202 -0
- kiln_ai/adapters/test_ollama_tools.py +340 -1
- kiln_ai/adapters/test_prompt_builders.py +1 -1
- kiln_ai/adapters/test_provider_tools.py +199 -8
- kiln_ai/adapters/test_remote_config.py +551 -56
- kiln_ai/adapters/vector_store/__init__.py +1 -0
- kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
- kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
- kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
- kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
- kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
- kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
- kiln_ai/datamodel/__init__.py +16 -13
- kiln_ai/datamodel/basemodel.py +201 -4
- kiln_ai/datamodel/chunk.py +158 -0
- kiln_ai/datamodel/datamodel_enums.py +27 -0
- kiln_ai/datamodel/embedding.py +64 -0
- kiln_ai/datamodel/external_tool_server.py +206 -54
- kiln_ai/datamodel/extraction.py +317 -0
- kiln_ai/datamodel/project.py +33 -1
- kiln_ai/datamodel/rag.py +79 -0
- kiln_ai/datamodel/task.py +5 -0
- kiln_ai/datamodel/task_output.py +41 -11
- kiln_ai/datamodel/test_attachment.py +649 -0
- kiln_ai/datamodel/test_basemodel.py +270 -14
- kiln_ai/datamodel/test_chunk_models.py +317 -0
- kiln_ai/datamodel/test_dataset_split.py +1 -1
- kiln_ai/datamodel/test_datasource.py +50 -0
- kiln_ai/datamodel/test_embedding_models.py +448 -0
- kiln_ai/datamodel/test_eval_model.py +6 -6
- kiln_ai/datamodel/test_external_tool_server.py +534 -152
- kiln_ai/datamodel/test_extraction_chunk.py +206 -0
- kiln_ai/datamodel/test_extraction_model.py +501 -0
- kiln_ai/datamodel/test_rag.py +641 -0
- kiln_ai/datamodel/test_task.py +35 -1
- kiln_ai/datamodel/test_tool_id.py +187 -1
- kiln_ai/datamodel/test_vector_store.py +320 -0
- kiln_ai/datamodel/tool_id.py +58 -0
- kiln_ai/datamodel/vector_store.py +141 -0
- kiln_ai/tools/base_tool.py +12 -3
- kiln_ai/tools/built_in_tools/math_tools.py +12 -4
- kiln_ai/tools/kiln_task_tool.py +158 -0
- kiln_ai/tools/mcp_server_tool.py +2 -2
- kiln_ai/tools/mcp_session_manager.py +51 -22
- kiln_ai/tools/rag_tools.py +164 -0
- kiln_ai/tools/test_kiln_task_tool.py +527 -0
- kiln_ai/tools/test_mcp_server_tool.py +4 -15
- kiln_ai/tools/test_mcp_session_manager.py +187 -227
- kiln_ai/tools/test_rag_tools.py +929 -0
- kiln_ai/tools/test_tool_registry.py +290 -7
- kiln_ai/tools/tool_registry.py +69 -16
- kiln_ai/utils/__init__.py +3 -0
- kiln_ai/utils/async_job_runner.py +62 -17
- kiln_ai/utils/config.py +2 -2
- kiln_ai/utils/env.py +15 -0
- kiln_ai/utils/filesystem.py +14 -0
- kiln_ai/utils/filesystem_cache.py +60 -0
- kiln_ai/utils/litellm.py +94 -0
- kiln_ai/utils/lock.py +100 -0
- kiln_ai/utils/mime_type.py +38 -0
- kiln_ai/utils/open_ai_types.py +19 -2
- kiln_ai/utils/pdf_utils.py +59 -0
- kiln_ai/utils/test_async_job_runner.py +151 -35
- kiln_ai/utils/test_env.py +142 -0
- kiln_ai/utils/test_filesystem_cache.py +316 -0
- kiln_ai/utils/test_litellm.py +206 -0
- kiln_ai/utils/test_lock.py +185 -0
- kiln_ai/utils/test_mime_type.py +66 -0
- kiln_ai/utils/test_open_ai_types.py +88 -12
- kiln_ai/utils/test_pdf_utils.py +86 -0
- kiln_ai/utils/test_uuid.py +111 -0
- kiln_ai/utils/test_validation.py +524 -0
- kiln_ai/utils/uuid.py +9 -0
- kiln_ai/utils/validation.py +90 -0
- {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/METADATA +9 -1
- kiln_ai-0.22.0.dist-info/RECORD +213 -0
- kiln_ai-0.20.1.dist-info/RECORD +0 -138
- {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -3,28 +3,53 @@ import json
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import threading
|
|
6
|
+
from dataclasses import dataclass
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Any, List
|
|
8
9
|
|
|
9
10
|
import requests
|
|
10
11
|
from pydantic import ValidationError
|
|
11
12
|
|
|
13
|
+
from kiln_ai.adapters.ml_embedding_model_list import (
|
|
14
|
+
KilnEmbeddingModel,
|
|
15
|
+
KilnEmbeddingModelProvider,
|
|
16
|
+
built_in_embedding_models,
|
|
17
|
+
)
|
|
18
|
+
from kiln_ai.datamodel.datamodel_enums import KilnMimeType
|
|
19
|
+
|
|
12
20
|
from .ml_model_list import KilnModel, KilnModelProvider, built_in_models
|
|
13
21
|
|
|
14
22
|
logger = logging.getLogger(__name__)
|
|
15
23
|
|
|
16
24
|
|
|
17
|
-
|
|
18
|
-
|
|
25
|
+
@dataclass
|
|
26
|
+
class KilnRemoteConfig:
|
|
27
|
+
model_list: List[KilnModel]
|
|
28
|
+
embedding_model_list: List[KilnEmbeddingModel]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def serialize_config(
|
|
32
|
+
models: List[KilnModel],
|
|
33
|
+
embedding_models: List[KilnEmbeddingModel],
|
|
34
|
+
path: str | Path,
|
|
35
|
+
) -> None:
|
|
36
|
+
data = {
|
|
37
|
+
"model_list": [m.model_dump(mode="json") for m in models],
|
|
38
|
+
"embedding_model_list": [m.model_dump(mode="json") for m in embedding_models],
|
|
39
|
+
}
|
|
19
40
|
Path(path).write_text(json.dumps(data, indent=2, sort_keys=True))
|
|
20
41
|
|
|
21
42
|
|
|
22
|
-
def deserialize_config_at_path(
|
|
43
|
+
def deserialize_config_at_path(
|
|
44
|
+
path: str | Path,
|
|
45
|
+
) -> KilnRemoteConfig:
|
|
23
46
|
raw = json.loads(Path(path).read_text())
|
|
24
47
|
return deserialize_config_data(raw)
|
|
25
48
|
|
|
26
49
|
|
|
27
|
-
def deserialize_config_data(
|
|
50
|
+
def deserialize_config_data(
|
|
51
|
+
config_data: Any,
|
|
52
|
+
) -> KilnRemoteConfig:
|
|
28
53
|
if not isinstance(config_data, dict):
|
|
29
54
|
raise ValueError(f"Remote config expected dict, got {type(config_data)}")
|
|
30
55
|
|
|
@@ -34,6 +59,12 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
|
|
|
34
59
|
f"Remote config expected list of models, got {type(model_list)}"
|
|
35
60
|
)
|
|
36
61
|
|
|
62
|
+
embedding_model_data = config_data.get("embedding_model_list", [])
|
|
63
|
+
if not isinstance(embedding_model_data, list):
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"Remote config expected list of embedding models, got {type(embedding_model_data)}"
|
|
66
|
+
)
|
|
67
|
+
|
|
37
68
|
# We must be careful here, because some of the JSON data may be generated from a forward
|
|
38
69
|
# version of the code that has newer fields / versions of the fields, that may cause
|
|
39
70
|
# the current client this code is running on to fail to validate the item into a KilnModel.
|
|
@@ -48,6 +79,14 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
|
|
|
48
79
|
providers = []
|
|
49
80
|
for provider_data in providers_list:
|
|
50
81
|
try:
|
|
82
|
+
# we filter out the mime types that we don't support
|
|
83
|
+
mime_types = provider_data.get("multimodal_mime_types")
|
|
84
|
+
if mime_types is not None:
|
|
85
|
+
provider_data["multimodal_mime_types"] = [
|
|
86
|
+
mime_type
|
|
87
|
+
for mime_type in mime_types
|
|
88
|
+
if mime_type in list(KilnMimeType)
|
|
89
|
+
]
|
|
51
90
|
provider = KilnModelProvider.model_validate(provider_data)
|
|
52
91
|
providers.append(provider)
|
|
53
92
|
except ValidationError as e:
|
|
@@ -72,10 +111,38 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
|
|
|
72
111
|
model_data,
|
|
73
112
|
e,
|
|
74
113
|
)
|
|
75
|
-
|
|
114
|
+
|
|
115
|
+
embedding_models = []
|
|
116
|
+
for embedding_model_data in embedding_model_data:
|
|
117
|
+
try:
|
|
118
|
+
provider_list = embedding_model_data.get("providers", [])
|
|
119
|
+
providers = []
|
|
120
|
+
for provider_data in provider_list:
|
|
121
|
+
try:
|
|
122
|
+
provider = KilnEmbeddingModelProvider.model_validate(provider_data)
|
|
123
|
+
providers.append(provider)
|
|
124
|
+
except ValidationError as e:
|
|
125
|
+
logger.warning(
|
|
126
|
+
"Failed to validate an embedding model provider from remote config. Upgrade Kiln to use this model. Details %s: %s",
|
|
127
|
+
provider_data,
|
|
128
|
+
e,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
embedding_model_data["providers"] = []
|
|
132
|
+
embedding_model = KilnEmbeddingModel.model_validate(embedding_model_data)
|
|
133
|
+
embedding_model.providers = providers
|
|
134
|
+
embedding_models.append(embedding_model)
|
|
135
|
+
except ValidationError as e:
|
|
136
|
+
logger.warning(
|
|
137
|
+
"Failed to validate an embedding model from remote config. Upgrade Kiln to use this model. Details %s: %s",
|
|
138
|
+
embedding_model_data,
|
|
139
|
+
e,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return KilnRemoteConfig(model_list=models, embedding_model_list=embedding_models)
|
|
76
143
|
|
|
77
144
|
|
|
78
|
-
def load_from_url(url: str) ->
|
|
145
|
+
def load_from_url(url: str) -> KilnRemoteConfig:
|
|
79
146
|
response = requests.get(url, timeout=10)
|
|
80
147
|
response.raise_for_status()
|
|
81
148
|
data = response.json()
|
|
@@ -83,7 +150,11 @@ def load_from_url(url: str) -> List[KilnModel]:
|
|
|
83
150
|
|
|
84
151
|
|
|
85
152
|
def dump_builtin_config(path: str | Path) -> None:
|
|
86
|
-
serialize_config(
|
|
153
|
+
serialize_config(
|
|
154
|
+
models=built_in_models,
|
|
155
|
+
embedding_models=built_in_embedding_models,
|
|
156
|
+
path=path,
|
|
157
|
+
)
|
|
87
158
|
|
|
88
159
|
|
|
89
160
|
def load_remote_models(url: str) -> None:
|
|
@@ -93,7 +164,8 @@ def load_remote_models(url: str) -> None:
|
|
|
93
164
|
def fetch_and_replace() -> None:
|
|
94
165
|
try:
|
|
95
166
|
models = load_from_url(url)
|
|
96
|
-
built_in_models[:] = models
|
|
167
|
+
built_in_models[:] = models.model_list
|
|
168
|
+
built_in_embedding_models[:] = models.embedding_model_list
|
|
97
169
|
except Exception as exc:
|
|
98
170
|
# Do not crash startup, but surface the issue
|
|
99
171
|
logger.warning("Failed to fetch remote model list from %s: %s", url, exc)
|