kiln-ai 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (47) hide show
  1. kiln_ai/adapters/__init__.py +4 -0
  2. kiln_ai/adapters/adapter_registry.py +153 -28
  3. kiln_ai/adapters/eval/__init__.py +28 -0
  4. kiln_ai/adapters/eval/eval_runner.py +4 -1
  5. kiln_ai/adapters/eval/g_eval.py +2 -1
  6. kiln_ai/adapters/eval/test_base_eval.py +1 -0
  7. kiln_ai/adapters/eval/test_eval_runner.py +1 -0
  8. kiln_ai/adapters/eval/test_g_eval.py +1 -0
  9. kiln_ai/adapters/fine_tune/base_finetune.py +16 -2
  10. kiln_ai/adapters/fine_tune/finetune_registry.py +2 -0
  11. kiln_ai/adapters/fine_tune/test_together_finetune.py +531 -0
  12. kiln_ai/adapters/fine_tune/together_finetune.py +325 -0
  13. kiln_ai/adapters/ml_model_list.py +638 -155
  14. kiln_ai/adapters/model_adapters/__init__.py +2 -4
  15. kiln_ai/adapters/model_adapters/base_adapter.py +14 -11
  16. kiln_ai/adapters/model_adapters/litellm_adapter.py +391 -0
  17. kiln_ai/adapters/model_adapters/litellm_config.py +13 -0
  18. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -0
  19. kiln_ai/adapters/model_adapters/test_structured_output.py +23 -5
  20. kiln_ai/adapters/ollama_tools.py +3 -2
  21. kiln_ai/adapters/parsers/r1_parser.py +19 -14
  22. kiln_ai/adapters/parsers/test_r1_parser.py +17 -5
  23. kiln_ai/adapters/provider_tools.py +50 -58
  24. kiln_ai/adapters/repair/test_repair_task.py +3 -3
  25. kiln_ai/adapters/run_output.py +1 -1
  26. kiln_ai/adapters/test_adapter_registry.py +17 -20
  27. kiln_ai/adapters/test_generate_docs.py +2 -2
  28. kiln_ai/adapters/test_prompt_adaptors.py +30 -19
  29. kiln_ai/adapters/test_provider_tools.py +26 -81
  30. kiln_ai/datamodel/basemodel.py +2 -0
  31. kiln_ai/datamodel/datamodel_enums.py +2 -0
  32. kiln_ai/datamodel/json_schema.py +1 -1
  33. kiln_ai/datamodel/task_output.py +13 -6
  34. kiln_ai/datamodel/test_basemodel.py +9 -0
  35. kiln_ai/datamodel/test_datasource.py +19 -0
  36. kiln_ai/utils/config.py +37 -0
  37. kiln_ai/utils/dataset_import.py +232 -0
  38. kiln_ai/utils/test_dataset_import.py +596 -0
  39. {kiln_ai-0.12.0.dist-info → kiln_ai-0.13.0.dist-info}/METADATA +51 -7
  40. {kiln_ai-0.12.0.dist-info → kiln_ai-0.13.0.dist-info}/RECORD +42 -39
  41. kiln_ai/adapters/model_adapters/langchain_adapters.py +0 -309
  42. kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -10
  43. kiln_ai/adapters/model_adapters/openai_model_adapter.py +0 -289
  44. kiln_ai/adapters/model_adapters/test_langchain_adapter.py +0 -343
  45. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +0 -216
  46. {kiln_ai-0.12.0.dist-info → kiln_ai-0.13.0.dist-info}/WHEEL +0 -0
  47. {kiln_ai-0.12.0.dist-info → kiln_ai-0.13.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -12,10 +12,13 @@ The prompt_builders submodule contains classes that build prompts for use with t
12
12
  The repair submodule contains an adapter for the repair task.
13
13
 
14
14
  The parser submodule contains parsers for the output of the AI models.
15
+
16
+ The eval submodule contains the code for evaluating the performance of a model.
15
17
  """
16
18
 
17
19
  from . import (
18
20
  data_gen,
21
+ eval,
19
22
  fine_tune,
20
23
  ml_model_list,
21
24
  model_adapters,
@@ -30,4 +33,5 @@ __all__ = [
30
33
  "ml_model_list",
31
34
  "prompt_builders",
32
35
  "repair",
36
+ "eval",
33
37
  ]
@@ -3,12 +3,11 @@ from os import getenv
3
3
  from kiln_ai import datamodel
4
4
  from kiln_ai.adapters.ml_model_list import ModelProviderName
5
5
  from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig, BaseAdapter
6
- from kiln_ai.adapters.model_adapters.langchain_adapters import LangchainAdapter
7
- from kiln_ai.adapters.model_adapters.openai_model_adapter import (
8
- OpenAICompatibleAdapter,
9
- OpenAICompatibleConfig,
6
+ from kiln_ai.adapters.model_adapters.litellm_adapter import (
7
+ LiteLlmAdapter,
8
+ LiteLlmConfig,
10
9
  )
11
- from kiln_ai.adapters.provider_tools import core_provider, openai_compatible_config
10
+ from kiln_ai.adapters.provider_tools import core_provider, lite_llm_config
12
11
  from kiln_ai.datamodel import PromptId
13
12
  from kiln_ai.utils.config import Config
14
13
  from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
@@ -26,50 +25,185 @@ def adapter_for_task(
26
25
 
27
26
  match core_provider_name:
28
27
  case ModelProviderName.openrouter:
29
- return OpenAICompatibleAdapter(
28
+ return LiteLlmAdapter(
30
29
  kiln_task=kiln_task,
31
- config=OpenAICompatibleConfig(
30
+ config=LiteLlmConfig(
31
+ model_name=model_name,
32
32
  base_url=getenv("OPENROUTER_BASE_URL")
33
33
  or "https://openrouter.ai/api/v1",
34
- api_key=Config.shared().open_router_api_key,
35
- model_name=model_name,
36
34
  provider_name=provider,
37
35
  default_headers={
38
36
  "HTTP-Referer": "https://getkiln.ai/openrouter",
39
37
  "X-Title": "KilnAI",
40
38
  },
39
+ additional_body_options={
40
+ "api_key": Config.shared().open_router_api_key,
41
+ },
41
42
  ),
42
43
  prompt_id=prompt_id,
43
44
  base_adapter_config=base_adapter_config,
44
45
  )
45
46
  case ModelProviderName.openai:
46
- return OpenAICompatibleAdapter(
47
+ return LiteLlmAdapter(
47
48
  kiln_task=kiln_task,
48
- config=OpenAICompatibleConfig(
49
- api_key=Config.shared().open_ai_api_key,
49
+ config=LiteLlmConfig(
50
50
  model_name=model_name,
51
51
  provider_name=provider,
52
+ additional_body_options={
53
+ "api_key": Config.shared().open_ai_api_key,
54
+ },
52
55
  ),
53
56
  prompt_id=prompt_id,
54
57
  base_adapter_config=base_adapter_config,
55
58
  )
56
59
  case ModelProviderName.openai_compatible:
57
- config = openai_compatible_config(model_name)
58
- return OpenAICompatibleAdapter(
60
+ config = lite_llm_config(model_name)
61
+ return LiteLlmAdapter(
59
62
  kiln_task=kiln_task,
60
63
  config=config,
61
64
  prompt_id=prompt_id,
62
65
  base_adapter_config=base_adapter_config,
63
66
  )
64
- # Use LangchainAdapter for the rest
65
67
  case ModelProviderName.groq:
66
- pass
68
+ return LiteLlmAdapter(
69
+ kiln_task=kiln_task,
70
+ prompt_id=prompt_id,
71
+ base_adapter_config=base_adapter_config,
72
+ config=LiteLlmConfig(
73
+ model_name=model_name,
74
+ provider_name=provider,
75
+ additional_body_options={
76
+ "api_key": Config.shared().groq_api_key,
77
+ },
78
+ ),
79
+ )
67
80
  case ModelProviderName.amazon_bedrock:
68
- pass
81
+ return LiteLlmAdapter(
82
+ kiln_task=kiln_task,
83
+ prompt_id=prompt_id,
84
+ base_adapter_config=base_adapter_config,
85
+ config=LiteLlmConfig(
86
+ model_name=model_name,
87
+ provider_name=provider,
88
+ additional_body_options={
89
+ "aws_access_key_id": Config.shared().bedrock_access_key,
90
+ "aws_secret_access_key": Config.shared().bedrock_secret_key,
91
+ # The only region that's widely supported for bedrock
92
+ "aws_region_name": "us-west-2",
93
+ },
94
+ ),
95
+ )
69
96
  case ModelProviderName.ollama:
70
- pass
97
+ ollama_base_url = (
98
+ Config.shared().ollama_base_url or "http://localhost:11434"
99
+ )
100
+ return LiteLlmAdapter(
101
+ kiln_task=kiln_task,
102
+ prompt_id=prompt_id,
103
+ base_adapter_config=base_adapter_config,
104
+ config=LiteLlmConfig(
105
+ model_name=model_name,
106
+ provider_name=provider,
107
+ # Set the Ollama base URL for 2 reasons:
108
+ # 1. To use the correct base URL
109
+ # 2. We use Ollama's OpenAI compatible API (/v1), and don't just let litellm use the Ollama API. We use more advanced features like json_schema.
110
+ base_url=ollama_base_url + "/v1",
111
+ ),
112
+ )
71
113
  case ModelProviderName.fireworks_ai:
72
- pass
114
+ return LiteLlmAdapter(
115
+ kiln_task=kiln_task,
116
+ prompt_id=prompt_id,
117
+ base_adapter_config=base_adapter_config,
118
+ config=LiteLlmConfig(
119
+ model_name=model_name,
120
+ provider_name=provider,
121
+ additional_body_options={
122
+ "api_key": Config.shared().fireworks_api_key,
123
+ },
124
+ ),
125
+ )
126
+ case ModelProviderName.anthropic:
127
+ return LiteLlmAdapter(
128
+ kiln_task=kiln_task,
129
+ prompt_id=prompt_id,
130
+ base_adapter_config=base_adapter_config,
131
+ config=LiteLlmConfig(
132
+ model_name=model_name,
133
+ provider_name=provider,
134
+ additional_body_options={
135
+ "api_key": Config.shared().anthropic_api_key,
136
+ },
137
+ ),
138
+ )
139
+ case ModelProviderName.gemini_api:
140
+ return LiteLlmAdapter(
141
+ kiln_task=kiln_task,
142
+ prompt_id=prompt_id,
143
+ base_adapter_config=base_adapter_config,
144
+ config=LiteLlmConfig(
145
+ model_name=model_name,
146
+ provider_name=provider,
147
+ additional_body_options={
148
+ "api_key": Config.shared().gemini_api_key,
149
+ },
150
+ ),
151
+ )
152
+ case ModelProviderName.vertex:
153
+ return LiteLlmAdapter(
154
+ kiln_task=kiln_task,
155
+ prompt_id=prompt_id,
156
+ base_adapter_config=base_adapter_config,
157
+ config=LiteLlmConfig(
158
+ model_name=model_name,
159
+ provider_name=provider,
160
+ additional_body_options={
161
+ "vertex_project": Config.shared().vertex_project_id,
162
+ "vertex_location": Config.shared().vertex_location,
163
+ },
164
+ ),
165
+ )
166
+ case ModelProviderName.together_ai:
167
+ return LiteLlmAdapter(
168
+ kiln_task=kiln_task,
169
+ prompt_id=prompt_id,
170
+ base_adapter_config=base_adapter_config,
171
+ config=LiteLlmConfig(
172
+ model_name=model_name,
173
+ provider_name=provider,
174
+ additional_body_options={
175
+ "api_key": Config.shared().together_api_key,
176
+ },
177
+ ),
178
+ )
179
+ case ModelProviderName.azure_openai:
180
+ return LiteLlmAdapter(
181
+ kiln_task=kiln_task,
182
+ prompt_id=prompt_id,
183
+ base_adapter_config=base_adapter_config,
184
+ config=LiteLlmConfig(
185
+ base_url=Config.shared().azure_openai_endpoint,
186
+ model_name=model_name,
187
+ provider_name=provider,
188
+ additional_body_options={
189
+ "api_key": Config.shared().azure_openai_api_key,
190
+ "api_version": "2025-02-01-preview",
191
+ },
192
+ ),
193
+ )
194
+ case ModelProviderName.huggingface:
195
+ return LiteLlmAdapter(
196
+ kiln_task=kiln_task,
197
+ prompt_id=prompt_id,
198
+ base_adapter_config=base_adapter_config,
199
+ config=LiteLlmConfig(
200
+ model_name=model_name,
201
+ provider_name=provider,
202
+ additional_body_options={
203
+ "api_key": Config.shared().huggingface_api_key,
204
+ },
205
+ ),
206
+ )
73
207
  # These are virtual providers that should have mapped to an actual provider in core_provider
74
208
  case ModelProviderName.kiln_fine_tune:
75
209
  raise ValueError(
@@ -81,12 +215,3 @@ def adapter_for_task(
81
215
  )
82
216
  case _:
83
217
  raise_exhaustive_enum_error(core_provider_name)
84
-
85
- # We use langchain for all others right now, but moving off it as we touch anything.
86
- return LangchainAdapter(
87
- kiln_task,
88
- model_name=model_name,
89
- provider=provider,
90
- prompt_id=prompt_id,
91
- base_adapter_config=base_adapter_config,
92
- )
@@ -0,0 +1,28 @@
1
+ """
2
+ # Evals
3
+
4
+ This module contains the code for evaluating the performance of a model.
5
+
6
+ The submodules contain:
7
+
8
+ - BaseEval: each eval technique implements this interface.
9
+ - G-Eval: an eval implementation, that implements G-Eval and LLM as Judge.
10
+ - EvalRunner: a class that runs an full evaluation (many smaller evals jobs). Includes async parallel processing, and the ability to restart where it left off.
11
+ - EvalRegistry: a registry for all eval implementations.
12
+
13
+ The datamodel for Evals is in the `kiln_ai.datamodel.eval` module.
14
+ """
15
+
16
+ from . import (
17
+ base_eval,
18
+ eval_runner,
19
+ g_eval,
20
+ registry,
21
+ )
22
+
23
+ __all__ = [
24
+ "base_eval",
25
+ "eval_runner",
26
+ "g_eval",
27
+ "registry",
28
+ ]
@@ -139,7 +139,10 @@ class EvalRunner:
139
139
  for run_config in self.run_configs or []:
140
140
  already_run[eval_config.id][run_config.id] = set()
141
141
  for run in eval_config.runs(readonly=True):
142
- if run.task_run_config_id is not None:
142
+ if (
143
+ run.task_run_config_id is not None
144
+ and run.task_run_config_id in already_run[eval_config.id]
145
+ ):
143
146
  already_run[eval_config.id][run.task_run_config_id].add(
144
147
  run.dataset_id
145
148
  )
@@ -1,6 +1,8 @@
1
1
  import math
2
2
  from typing import Dict, List, Tuple
3
3
 
4
+ from litellm.types.utils import ChatCompletionTokenLogprob
5
+
4
6
  from kiln_ai.adapters.adapter_registry import adapter_for_task
5
7
  from kiln_ai.adapters.eval.base_eval import BaseEval
6
8
  from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig, RunOutput
@@ -8,7 +10,6 @@ from kiln_ai.adapters.prompt_builders import PromptGenerators
8
10
  from kiln_ai.datamodel import Project, Task, TaskRun
9
11
  from kiln_ai.datamodel.eval import EvalConfig, EvalConfigType, EvalScores
10
12
  from kiln_ai.datamodel.task import RunConfig
11
- from openai.types.chat import ChatCompletionTokenLogprob
12
13
 
13
14
  # all the tokens we score for, and their float scores.
14
15
  TOKEN_TO_SCORE_MAP: Dict[str, float] = {
@@ -1,6 +1,7 @@
1
1
  import json
2
2
 
3
3
  import pytest
4
+
4
5
  from kiln_ai.adapters.eval.base_eval import BaseEval
5
6
  from kiln_ai.datamodel import BasePrompt, DataSource, DataSourceType
6
7
  from kiln_ai.datamodel.eval import Eval, EvalConfig, EvalOutputScore
@@ -2,6 +2,7 @@ from typing import Dict
2
2
  from unittest.mock import AsyncMock, patch
3
3
 
4
4
  import pytest
5
+
5
6
  from kiln_ai.adapters.eval.base_eval import BaseEval
6
7
  from kiln_ai.adapters.eval.eval_runner import EvalJob, EvalRunner
7
8
  from kiln_ai.datamodel import (
@@ -2,6 +2,7 @@ import math
2
2
  import pickle
3
3
 
4
4
  import pytest
5
+
5
6
  from kiln_ai.adapters.eval.g_eval import TOKEN_TO_SCORE_MAP, GEval, GEvalTask
6
7
  from kiln_ai.adapters.eval.test_g_eval_data import serialized_run_output
7
8
  from kiln_ai.adapters.ml_model_list import built_in_models
@@ -4,7 +4,12 @@ from typing import Literal
4
4
  from pydantic import BaseModel
5
5
 
6
6
  from kiln_ai.adapters.ml_model_list import built_in_models
7
- from kiln_ai.datamodel import DatasetSplit, FinetuneDataStrategy, FineTuneStatusType
7
+ from kiln_ai.datamodel import (
8
+ DatasetSplit,
9
+ FinetuneDataStrategy,
10
+ FineTuneStatusType,
11
+ Task,
12
+ )
8
13
  from kiln_ai.datamodel import Finetune as FinetuneModel
9
14
  from kiln_ai.utils.name_generator import generate_memorable_name
10
15
 
@@ -101,7 +106,7 @@ class BaseFinetuneAdapter(ABC):
101
106
  train_split_name=train_split_name,
102
107
  validation_split_name=validation_split_name,
103
108
  parameters=parameters,
104
- system_message=system_message,
109
+ system_message=cls.augment_system_message(system_message, parent_task),
105
110
  thinking_instructions=thinking_instructions,
106
111
  parent=parent_task,
107
112
  data_strategy=data_strategy,
@@ -114,6 +119,15 @@ class BaseFinetuneAdapter(ABC):
114
119
 
115
120
  return adapter, datamodel
116
121
 
122
+ @classmethod
123
+ def augment_system_message(cls, system_message: str, task: Task) -> str:
124
+ """
125
+ Augment the system message with additional instructions, such as JSON instructions.
126
+ """
127
+
128
+ # Base implementation does nothing, can be overridden by subclasses
129
+ return system_message
130
+
117
131
  @abstractmethod
118
132
  async def _start(self, dataset: DatasetSplit) -> None:
119
133
  """
@@ -3,9 +3,11 @@ from typing import Type
3
3
  from kiln_ai.adapters.fine_tune.base_finetune import BaseFinetuneAdapter
4
4
  from kiln_ai.adapters.fine_tune.fireworks_finetune import FireworksFinetune
5
5
  from kiln_ai.adapters.fine_tune.openai_finetune import OpenAIFinetune
6
+ from kiln_ai.adapters.fine_tune.together_finetune import TogetherFinetune
6
7
  from kiln_ai.adapters.ml_model_list import ModelProviderName
7
8
 
8
9
  finetune_registry: dict[ModelProviderName, Type[BaseFinetuneAdapter]] = {
9
10
  ModelProviderName.openai: OpenAIFinetune,
10
11
  ModelProviderName.fireworks_ai: FireworksFinetune,
12
+ ModelProviderName.together_ai: TogetherFinetune,
11
13
  }