kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +8 -2
- kiln_ai/adapters/adapter_registry.py +43 -208
- kiln_ai/adapters/chat/chat_formatter.py +8 -12
- kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
- kiln_ai/adapters/chunkers/__init__.py +13 -0
- kiln_ai/adapters/chunkers/base_chunker.py +42 -0
- kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
- kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
- kiln_ai/adapters/chunkers/helpers.py +23 -0
- kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
- kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
- kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
- kiln_ai/adapters/chunkers/test_helpers.py +75 -0
- kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
- kiln_ai/adapters/docker_model_runner_tools.py +119 -0
- kiln_ai/adapters/embedding/__init__.py +0 -0
- kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
- kiln_ai/adapters/embedding/embedding_registry.py +32 -0
- kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
- kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
- kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
- kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
- kiln_ai/adapters/eval/base_eval.py +2 -2
- kiln_ai/adapters/eval/eval_runner.py +9 -3
- kiln_ai/adapters/eval/g_eval.py +2 -2
- kiln_ai/adapters/eval/test_base_eval.py +2 -4
- kiln_ai/adapters/eval/test_g_eval.py +4 -5
- kiln_ai/adapters/extractors/__init__.py +18 -0
- kiln_ai/adapters/extractors/base_extractor.py +72 -0
- kiln_ai/adapters/extractors/encoding.py +20 -0
- kiln_ai/adapters/extractors/extractor_registry.py +44 -0
- kiln_ai/adapters/extractors/extractor_runner.py +112 -0
- kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
- kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
- kiln_ai/adapters/extractors/test_encoding.py +54 -0
- kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
- kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
- kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
- kiln_ai/adapters/fine_tune/__init__.py +1 -1
- kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
- kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
- kiln_ai/adapters/ml_embedding_model_list.py +192 -0
- kiln_ai/adapters/ml_model_list.py +761 -37
- kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
- kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
- kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
- kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
- kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
- kiln_ai/adapters/ollama_tools.py +69 -12
- kiln_ai/adapters/parsers/__init__.py +1 -1
- kiln_ai/adapters/provider_tools.py +205 -47
- kiln_ai/adapters/rag/deduplication.py +49 -0
- kiln_ai/adapters/rag/progress.py +252 -0
- kiln_ai/adapters/rag/rag_runners.py +844 -0
- kiln_ai/adapters/rag/test_deduplication.py +195 -0
- kiln_ai/adapters/rag/test_progress.py +785 -0
- kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
- kiln_ai/adapters/remote_config.py +80 -8
- kiln_ai/adapters/repair/test_repair_task.py +12 -9
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +657 -85
- kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
- kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
- kiln_ai/adapters/test_ml_model_list.py +251 -1
- kiln_ai/adapters/test_ollama_tools.py +340 -1
- kiln_ai/adapters/test_prompt_adaptors.py +13 -6
- kiln_ai/adapters/test_prompt_builders.py +1 -1
- kiln_ai/adapters/test_provider_tools.py +254 -8
- kiln_ai/adapters/test_remote_config.py +651 -58
- kiln_ai/adapters/vector_store/__init__.py +1 -0
- kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
- kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
- kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
- kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
- kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
- kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
- kiln_ai/datamodel/__init__.py +39 -34
- kiln_ai/datamodel/basemodel.py +170 -1
- kiln_ai/datamodel/chunk.py +158 -0
- kiln_ai/datamodel/datamodel_enums.py +28 -0
- kiln_ai/datamodel/embedding.py +64 -0
- kiln_ai/datamodel/eval.py +1 -1
- kiln_ai/datamodel/external_tool_server.py +298 -0
- kiln_ai/datamodel/extraction.py +303 -0
- kiln_ai/datamodel/json_schema.py +25 -10
- kiln_ai/datamodel/project.py +40 -1
- kiln_ai/datamodel/rag.py +79 -0
- kiln_ai/datamodel/registry.py +0 -15
- kiln_ai/datamodel/run_config.py +62 -0
- kiln_ai/datamodel/task.py +2 -77
- kiln_ai/datamodel/task_output.py +6 -1
- kiln_ai/datamodel/task_run.py +41 -0
- kiln_ai/datamodel/test_attachment.py +649 -0
- kiln_ai/datamodel/test_basemodel.py +4 -4
- kiln_ai/datamodel/test_chunk_models.py +317 -0
- kiln_ai/datamodel/test_dataset_split.py +1 -1
- kiln_ai/datamodel/test_embedding_models.py +448 -0
- kiln_ai/datamodel/test_eval_model.py +6 -6
- kiln_ai/datamodel/test_example_models.py +175 -0
- kiln_ai/datamodel/test_external_tool_server.py +691 -0
- kiln_ai/datamodel/test_extraction_chunk.py +206 -0
- kiln_ai/datamodel/test_extraction_model.py +470 -0
- kiln_ai/datamodel/test_rag.py +641 -0
- kiln_ai/datamodel/test_registry.py +8 -3
- kiln_ai/datamodel/test_task.py +15 -47
- kiln_ai/datamodel/test_tool_id.py +320 -0
- kiln_ai/datamodel/test_vector_store.py +320 -0
- kiln_ai/datamodel/tool_id.py +105 -0
- kiln_ai/datamodel/vector_store.py +141 -0
- kiln_ai/tools/__init__.py +8 -0
- kiln_ai/tools/base_tool.py +82 -0
- kiln_ai/tools/built_in_tools/__init__.py +13 -0
- kiln_ai/tools/built_in_tools/math_tools.py +124 -0
- kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
- kiln_ai/tools/mcp_server_tool.py +95 -0
- kiln_ai/tools/mcp_session_manager.py +246 -0
- kiln_ai/tools/rag_tools.py +157 -0
- kiln_ai/tools/test_base_tools.py +199 -0
- kiln_ai/tools/test_mcp_server_tool.py +457 -0
- kiln_ai/tools/test_mcp_session_manager.py +1585 -0
- kiln_ai/tools/test_rag_tools.py +848 -0
- kiln_ai/tools/test_tool_registry.py +562 -0
- kiln_ai/tools/tool_registry.py +85 -0
- kiln_ai/utils/__init__.py +3 -0
- kiln_ai/utils/async_job_runner.py +62 -17
- kiln_ai/utils/config.py +24 -2
- kiln_ai/utils/env.py +15 -0
- kiln_ai/utils/filesystem.py +14 -0
- kiln_ai/utils/filesystem_cache.py +60 -0
- kiln_ai/utils/litellm.py +94 -0
- kiln_ai/utils/lock.py +100 -0
- kiln_ai/utils/mime_type.py +38 -0
- kiln_ai/utils/open_ai_types.py +94 -0
- kiln_ai/utils/pdf_utils.py +38 -0
- kiln_ai/utils/project_utils.py +17 -0
- kiln_ai/utils/test_async_job_runner.py +151 -35
- kiln_ai/utils/test_config.py +138 -1
- kiln_ai/utils/test_env.py +142 -0
- kiln_ai/utils/test_filesystem_cache.py +316 -0
- kiln_ai/utils/test_litellm.py +206 -0
- kiln_ai/utils/test_lock.py +185 -0
- kiln_ai/utils/test_mime_type.py +66 -0
- kiln_ai/utils/test_open_ai_types.py +131 -0
- kiln_ai/utils/test_pdf_utils.py +73 -0
- kiln_ai/utils/test_uuid.py +111 -0
- kiln_ai/utils/test_validation.py +524 -0
- kiln_ai/utils/uuid.py +9 -0
- kiln_ai/utils/validation.py +90 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
- kiln_ai-0.21.0.dist-info/RECORD +211 -0
- kiln_ai-0.19.0.dist-info/RECORD +0 -115
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -3,28 +3,53 @@ import json
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import threading
|
|
6
|
+
from dataclasses import dataclass
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Any, List
|
|
8
9
|
|
|
9
10
|
import requests
|
|
10
11
|
from pydantic import ValidationError
|
|
11
12
|
|
|
13
|
+
from kiln_ai.adapters.ml_embedding_model_list import (
|
|
14
|
+
KilnEmbeddingModel,
|
|
15
|
+
KilnEmbeddingModelProvider,
|
|
16
|
+
built_in_embedding_models,
|
|
17
|
+
)
|
|
18
|
+
from kiln_ai.datamodel.datamodel_enums import KilnMimeType
|
|
19
|
+
|
|
12
20
|
from .ml_model_list import KilnModel, KilnModelProvider, built_in_models
|
|
13
21
|
|
|
14
22
|
logger = logging.getLogger(__name__)
|
|
15
23
|
|
|
16
24
|
|
|
17
|
-
|
|
18
|
-
|
|
25
|
+
@dataclass
|
|
26
|
+
class KilnRemoteConfig:
|
|
27
|
+
model_list: List[KilnModel]
|
|
28
|
+
embedding_model_list: List[KilnEmbeddingModel]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def serialize_config(
|
|
32
|
+
models: List[KilnModel],
|
|
33
|
+
embedding_models: List[KilnEmbeddingModel],
|
|
34
|
+
path: str | Path,
|
|
35
|
+
) -> None:
|
|
36
|
+
data = {
|
|
37
|
+
"model_list": [m.model_dump(mode="json") for m in models],
|
|
38
|
+
"embedding_model_list": [m.model_dump(mode="json") for m in embedding_models],
|
|
39
|
+
}
|
|
19
40
|
Path(path).write_text(json.dumps(data, indent=2, sort_keys=True))
|
|
20
41
|
|
|
21
42
|
|
|
22
|
-
def deserialize_config_at_path(
|
|
43
|
+
def deserialize_config_at_path(
|
|
44
|
+
path: str | Path,
|
|
45
|
+
) -> KilnRemoteConfig:
|
|
23
46
|
raw = json.loads(Path(path).read_text())
|
|
24
47
|
return deserialize_config_data(raw)
|
|
25
48
|
|
|
26
49
|
|
|
27
|
-
def deserialize_config_data(
|
|
50
|
+
def deserialize_config_data(
|
|
51
|
+
config_data: Any,
|
|
52
|
+
) -> KilnRemoteConfig:
|
|
28
53
|
if not isinstance(config_data, dict):
|
|
29
54
|
raise ValueError(f"Remote config expected dict, got {type(config_data)}")
|
|
30
55
|
|
|
@@ -34,6 +59,12 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
|
|
|
34
59
|
f"Remote config expected list of models, got {type(model_list)}"
|
|
35
60
|
)
|
|
36
61
|
|
|
62
|
+
embedding_model_data = config_data.get("embedding_model_list", [])
|
|
63
|
+
if not isinstance(embedding_model_data, list):
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"Remote config expected list of embedding models, got {type(embedding_model_data)}"
|
|
66
|
+
)
|
|
67
|
+
|
|
37
68
|
# We must be careful here, because some of the JSON data may be generated from a forward
|
|
38
69
|
# version of the code that has newer fields / versions of the fields, that may cause
|
|
39
70
|
# the current client this code is running on to fail to validate the item into a KilnModel.
|
|
@@ -48,6 +79,14 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
|
|
|
48
79
|
providers = []
|
|
49
80
|
for provider_data in providers_list:
|
|
50
81
|
try:
|
|
82
|
+
# we filter out the mime types that we don't support
|
|
83
|
+
mime_types = provider_data.get("multimodal_mime_types")
|
|
84
|
+
if mime_types is not None:
|
|
85
|
+
provider_data["multimodal_mime_types"] = [
|
|
86
|
+
mime_type
|
|
87
|
+
for mime_type in mime_types
|
|
88
|
+
if mime_type in list(KilnMimeType)
|
|
89
|
+
]
|
|
51
90
|
provider = KilnModelProvider.model_validate(provider_data)
|
|
52
91
|
providers.append(provider)
|
|
53
92
|
except ValidationError as e:
|
|
@@ -72,10 +111,38 @@ def deserialize_config_data(config_data: Any) -> List[KilnModel]:
|
|
|
72
111
|
model_data,
|
|
73
112
|
e,
|
|
74
113
|
)
|
|
75
|
-
|
|
114
|
+
|
|
115
|
+
embedding_models = []
|
|
116
|
+
for embedding_model_data in embedding_model_data:
|
|
117
|
+
try:
|
|
118
|
+
provider_list = embedding_model_data.get("providers", [])
|
|
119
|
+
providers = []
|
|
120
|
+
for provider_data in provider_list:
|
|
121
|
+
try:
|
|
122
|
+
provider = KilnEmbeddingModelProvider.model_validate(provider_data)
|
|
123
|
+
providers.append(provider)
|
|
124
|
+
except ValidationError as e:
|
|
125
|
+
logger.warning(
|
|
126
|
+
"Failed to validate an embedding model provider from remote config. Upgrade Kiln to use this model. Details %s: %s",
|
|
127
|
+
provider_data,
|
|
128
|
+
e,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
embedding_model_data["providers"] = []
|
|
132
|
+
embedding_model = KilnEmbeddingModel.model_validate(embedding_model_data)
|
|
133
|
+
embedding_model.providers = providers
|
|
134
|
+
embedding_models.append(embedding_model)
|
|
135
|
+
except ValidationError as e:
|
|
136
|
+
logger.warning(
|
|
137
|
+
"Failed to validate an embedding model from remote config. Upgrade Kiln to use this model. Details %s: %s",
|
|
138
|
+
embedding_model_data,
|
|
139
|
+
e,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return KilnRemoteConfig(model_list=models, embedding_model_list=embedding_models)
|
|
76
143
|
|
|
77
144
|
|
|
78
|
-
def load_from_url(url: str) ->
|
|
145
|
+
def load_from_url(url: str) -> KilnRemoteConfig:
|
|
79
146
|
response = requests.get(url, timeout=10)
|
|
80
147
|
response.raise_for_status()
|
|
81
148
|
data = response.json()
|
|
@@ -83,7 +150,11 @@ def load_from_url(url: str) -> List[KilnModel]:
|
|
|
83
150
|
|
|
84
151
|
|
|
85
152
|
def dump_builtin_config(path: str | Path) -> None:
|
|
86
|
-
serialize_config(
|
|
153
|
+
serialize_config(
|
|
154
|
+
models=built_in_models,
|
|
155
|
+
embedding_models=built_in_embedding_models,
|
|
156
|
+
path=path,
|
|
157
|
+
)
|
|
87
158
|
|
|
88
159
|
|
|
89
160
|
def load_remote_models(url: str) -> None:
|
|
@@ -93,7 +164,8 @@ def load_remote_models(url: str) -> None:
|
|
|
93
164
|
def fetch_and_replace() -> None:
|
|
94
165
|
try:
|
|
95
166
|
models = load_from_url(url)
|
|
96
|
-
built_in_models[:] = models
|
|
167
|
+
built_in_models[:] = models.model_list
|
|
168
|
+
built_in_embedding_models[:] = models.embedding_model_list
|
|
97
169
|
except Exception as exc:
|
|
98
170
|
# Do not crash startup, but surface the issue
|
|
99
171
|
logger.warning("Failed to fetch remote model list from %s: %s", url, exc)
|
|
@@ -229,21 +229,20 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
|
|
|
229
229
|
"rating": 8,
|
|
230
230
|
}
|
|
231
231
|
|
|
232
|
+
run_config = RunConfigProperties(
|
|
233
|
+
model_name="llama_3_1_8b",
|
|
234
|
+
model_provider_name="ollama",
|
|
235
|
+
prompt_id="simple_prompt_builder",
|
|
236
|
+
structured_output_mode="json_schema",
|
|
237
|
+
)
|
|
238
|
+
|
|
232
239
|
with patch.object(LiteLlmAdapter, "_run", new_callable=AsyncMock) as mock_run:
|
|
233
240
|
mock_run.return_value = (
|
|
234
241
|
RunOutput(output=mocked_output, intermediate_outputs=None),
|
|
235
242
|
None,
|
|
236
243
|
)
|
|
237
244
|
|
|
238
|
-
adapter = adapter_for_task(
|
|
239
|
-
repair_task,
|
|
240
|
-
RunConfigProperties(
|
|
241
|
-
model_name="llama_3_1_8b",
|
|
242
|
-
model_provider_name="ollama",
|
|
243
|
-
prompt_id="simple_prompt_builder",
|
|
244
|
-
structured_output_mode="json_schema",
|
|
245
|
-
),
|
|
246
|
-
)
|
|
245
|
+
adapter = adapter_for_task(repair_task, run_config)
|
|
247
246
|
|
|
248
247
|
run = await adapter.invoke(repair_task_input.model_dump())
|
|
249
248
|
|
|
@@ -264,6 +263,10 @@ async def test_mocked_repair_task_run(sample_task, sample_task_run, sample_repai
|
|
|
264
263
|
}
|
|
265
264
|
assert run.input_source.type == DataSourceType.human
|
|
266
265
|
assert "created_by" in run.input_source.properties
|
|
266
|
+
assert run.output.source is not None
|
|
267
|
+
assert run.output.source.run_config is not None
|
|
268
|
+
saved_run_config = run.output.source.run_config.model_dump()
|
|
269
|
+
assert saved_run_config == run_config.model_dump()
|
|
267
270
|
|
|
268
271
|
# Verify that the mock was called
|
|
269
272
|
mock_run.assert_called_once()
|
kiln_ai/adapters/run_output.py
CHANGED
|
@@ -3,9 +3,12 @@ from typing import Dict
|
|
|
3
3
|
|
|
4
4
|
from litellm.types.utils import ChoiceLogprobs
|
|
5
5
|
|
|
6
|
+
from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
@dataclass
|
|
8
10
|
class RunOutput:
|
|
9
11
|
output: Dict | str
|
|
10
12
|
intermediate_outputs: Dict[str, str] | None
|
|
11
13
|
output_logprobs: ChoiceLogprobs | None = None
|
|
14
|
+
trace: list[ChatCompletionMessageParam] | None = None
|