kiln-ai 0.13.0__tar.gz → 0.13.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/PKG-INFO +2 -2
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/adapter_registry.py +4 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/g_eval.py +17 -2
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/test_g_eval.py +12 -7
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/fireworks_finetune.py +8 -1
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +19 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/together_finetune.py +2 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/provider_tools.py +2 -2
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_provider_tools.py +2 -2
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/config.py +9 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/pyproject.toml +6 -2
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/.gitignore +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/.python-version +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/LICENSE.txt +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/README.md +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/index.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/base_eval.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/g_eval.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/registry.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/base_adapter.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/litellm_adapter.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/datamodel/dataset_split.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/datamodel/eval.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/datamodel/strict_mode.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/datamodel.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/utils/config.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/utils.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai.html +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/search.js +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/data_gen/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/data_gen/test_data_gen_task.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/base_eval.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/eval_runner.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/registry.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/test_base_eval.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/test_eval_runner.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/test_g_eval_data.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/base_finetune.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/dataset_formatter.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/finetune_registry.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/openai_finetune.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_base_finetune.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_openai_finetune.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/ml_model_list.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/base_adapter.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/litellm_adapter.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/litellm_config.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_litellm_adapter.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_structured_output.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/ollama_tools.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/base_parser.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/json_parser.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/parser_registry.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/r1_parser.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/test_json_parser.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/test_parser_registry.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/test_r1_parser.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/prompt_builders.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/repair/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/repair/repair_task.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/repair/test_repair_task.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/run_output.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_adapter_registry.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_generate_docs.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_ollama_tools.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_prompt_adaptors.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_prompt_builders.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/basemodel.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/datamodel_enums.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/dataset_filters.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/dataset_split.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/eval.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/finetune.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/json_schema.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/model_cache.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/project.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/prompt.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/prompt_id.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/registry.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/strict_mode.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/task.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/task_output.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/task_run.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_basemodel.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_dataset_filters.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_dataset_split.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_datasource.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_eval_model.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_example_models.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_json_schema.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_model_cache.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_model_perf.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_models.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_nested_save.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_output_rating.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_prompt_id.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_registry.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_task.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/__init__.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/dataset_import.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/exhaustive_error.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/formatting.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/name_generator.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/test_config.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/test_dataset_import.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/test_name_geneator.py +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/setup.cfg +0 -0
- {kiln_ai-0.13.0 → kiln_ai-0.13.2}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kiln-ai
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.2
|
|
4
4
|
Summary: Kiln AI
|
|
5
5
|
Project-URL: Homepage, https://getkiln.ai
|
|
6
6
|
Project-URL: Repository, https://github.com/Kiln-AI/kiln
|
|
@@ -26,7 +26,7 @@ Requires-Dist: pydantic>=2.9.2
|
|
|
26
26
|
Requires-Dist: pytest-benchmark>=5.1.0
|
|
27
27
|
Requires-Dist: pytest-cov>=6.0.0
|
|
28
28
|
Requires-Dist: pyyaml>=6.0.2
|
|
29
|
-
Requires-Dist: together
|
|
29
|
+
Requires-Dist: together
|
|
30
30
|
Requires-Dist: typing-extensions>=4.12.2
|
|
31
31
|
Description-Content-Type: text/markdown
|
|
32
32
|
|
|
@@ -108,6 +108,10 @@ def adapter_for_task(
|
|
|
108
108
|
# 1. To use the correct base URL
|
|
109
109
|
# 2. We use Ollama's OpenAI compatible API (/v1), and don't just let litellm use the Ollama API. We use more advanced features like json_schema.
|
|
110
110
|
base_url=ollama_base_url + "/v1",
|
|
111
|
+
additional_body_options={
|
|
112
|
+
# LiteLLM errors without an api_key, even though Ollama doesn't support one.
|
|
113
|
+
"api_key": "NA",
|
|
114
|
+
},
|
|
111
115
|
),
|
|
112
116
|
)
|
|
113
117
|
case ModelProviderName.fireworks_ai:
|
|
@@ -297,9 +297,12 @@ The model produced the following output for the task:
|
|
|
297
297
|
|
|
298
298
|
total_score = 0.0
|
|
299
299
|
total_probability = 0.0
|
|
300
|
+
top_logprobs_contains_primary_token = False
|
|
300
301
|
|
|
301
|
-
# Process all valid scoring tokens
|
|
302
|
+
# Process all valid scoring tokens from alternatives
|
|
302
303
|
for top_logprob in token_logprob.top_logprobs:
|
|
304
|
+
if top_logprob.token == token_logprob.token:
|
|
305
|
+
top_logprobs_contains_primary_token = True
|
|
303
306
|
token_score = self.score_from_token_string(top_logprob.token)
|
|
304
307
|
if token_score is not None:
|
|
305
308
|
# Convert logprob to probability
|
|
@@ -307,9 +310,21 @@ The model produced the following output for the task:
|
|
|
307
310
|
total_score += token_score * probability
|
|
308
311
|
total_probability += probability
|
|
309
312
|
|
|
313
|
+
# Weird OpenAI 4o bug - sometimes the primary token is included in the top logprobs, sometimes not.
|
|
314
|
+
# Add the primary token back in if excluded
|
|
315
|
+
if not top_logprobs_contains_primary_token:
|
|
316
|
+
if token_logprob.logprob == -9999.0:
|
|
317
|
+
# Another "bug" - sometimes the logprob is -9999.0. This seems to happen when the rest of the logprobs are tiny probability.
|
|
318
|
+
total_score += primary_token_score * 1.0
|
|
319
|
+
total_probability += 1.0
|
|
320
|
+
else:
|
|
321
|
+
probability = math.exp(token_logprob.logprob)
|
|
322
|
+
total_score += primary_token_score * probability
|
|
323
|
+
total_probability += probability
|
|
324
|
+
|
|
310
325
|
if total_probability <= 0.0:
|
|
311
326
|
raise RuntimeError(
|
|
312
|
-
f"No valid scoring tokens found for {token_logprob.token}. This should never happen. Please file a bug if you see this."
|
|
327
|
+
f"No valid scoring tokens found for {token_logprob.token}. This should never happen as the token has a valid score (so it must be excluded from top logprobs). Please file a bug if you see this."
|
|
313
328
|
)
|
|
314
329
|
|
|
315
330
|
# Normalize by total probability of valid tokens (LLM may have wanted to generate other non-rating tokens, these shouldn't lower score of rating tokens)
|
|
@@ -393,12 +393,13 @@ def test_rating_token_to_score(test_eval_config, test_run_config):
|
|
|
393
393
|
self.logprob = logprob
|
|
394
394
|
|
|
395
395
|
class MockTokenLogprob:
|
|
396
|
-
def __init__(self, token, top_logprobs):
|
|
396
|
+
def __init__(self, token, top_logprobs, logprob):
|
|
397
397
|
self.token = token
|
|
398
398
|
self.top_logprobs = [MockTopLogprob(t, lp) for t, lp in top_logprobs]
|
|
399
|
+
self.logprob = logprob
|
|
399
400
|
|
|
400
401
|
# Test single token case
|
|
401
|
-
token_logprob = MockTokenLogprob("5", [("5", 0.0)]) # log(1) = 0
|
|
402
|
+
token_logprob = MockTokenLogprob("5", [("5", 0.0)], logprob=1e-8) # log(1) = 0
|
|
402
403
|
score = g_eval.rating_token_to_score(token_logprob)
|
|
403
404
|
assert score == 5.0
|
|
404
405
|
|
|
@@ -409,18 +410,22 @@ def test_rating_token_to_score(test_eval_config, test_run_config):
|
|
|
409
410
|
("4", math.log(0.6)), # 60% probability
|
|
410
411
|
("5", math.log(0.4)), # 40% probability
|
|
411
412
|
],
|
|
413
|
+
logprob=math.log(0.6),
|
|
412
414
|
)
|
|
413
415
|
score = g_eval.rating_token_to_score(token_logprob)
|
|
414
416
|
assert pytest.approx(score) == 4.4 # (4 * 0.6 + 5 * 0.4)
|
|
415
417
|
|
|
416
418
|
# Test invalid token
|
|
417
|
-
token_logprob = MockTokenLogprob(":", [(":", 0.0)])
|
|
419
|
+
token_logprob = MockTokenLogprob(":", [(":", 0.0)], logprob=1e-8)
|
|
418
420
|
assert g_eval.rating_token_to_score(token_logprob) is None
|
|
419
421
|
|
|
420
|
-
# Test
|
|
421
|
-
token_logprob = MockTokenLogprob("5", [])
|
|
422
|
-
|
|
423
|
-
|
|
422
|
+
# Test missing from top logprobs
|
|
423
|
+
token_logprob = MockTokenLogprob("5", [], logprob=1e-8)
|
|
424
|
+
assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0
|
|
425
|
+
|
|
426
|
+
# Test missing from top logprobs, with special case logprob
|
|
427
|
+
token_logprob = MockTokenLogprob("5", [], logprob=-9999)
|
|
428
|
+
assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0
|
|
424
429
|
|
|
425
430
|
|
|
426
431
|
def test_g_eval_system_instruction():
|
|
@@ -132,11 +132,18 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
132
132
|
:60
|
|
133
133
|
]
|
|
134
134
|
)
|
|
135
|
-
payload = {
|
|
135
|
+
payload: dict[str, str | dict[str, str | bool]] = {
|
|
136
136
|
"dataset": f"accounts/{account_id}/datasets/{train_file_id}",
|
|
137
137
|
"displayName": display_name,
|
|
138
138
|
"baseModel": self.datamodel.base_model_id,
|
|
139
139
|
}
|
|
140
|
+
# Add W&B config if API key is set
|
|
141
|
+
if Config.shared().wandb_api_key:
|
|
142
|
+
payload["wandbConfig"] = {
|
|
143
|
+
"enabled": True,
|
|
144
|
+
"project": "Kiln_AI",
|
|
145
|
+
"apiKey": Config.shared().wandb_api_key,
|
|
146
|
+
}
|
|
140
147
|
hyperparameters = self.create_payload_parameters(self.datamodel.parameters)
|
|
141
148
|
payload.update(hyperparameters)
|
|
142
149
|
headers = {
|
|
@@ -340,6 +340,7 @@ async def test_start_success(
|
|
|
340
340
|
expected_mode,
|
|
341
341
|
expected_format,
|
|
342
342
|
):
|
|
343
|
+
Config.shared().wandb_api_key = "test-api-key"
|
|
343
344
|
mock_task.output_json_schema = output_schema
|
|
344
345
|
|
|
345
346
|
fireworks_finetune.datamodel.parent = mock_task
|
|
@@ -378,6 +379,24 @@ async def test_start_success(
|
|
|
378
379
|
assert fireworks_finetune.datamodel.structured_output_mode == expected_mode
|
|
379
380
|
assert fireworks_finetune.datamodel.properties["endpoint_version"] == "v2"
|
|
380
381
|
|
|
382
|
+
# check mockclent.post call values
|
|
383
|
+
assert mock_client.post.call_count == 1
|
|
384
|
+
submit_call_values = mock_client.post.call_args[1]
|
|
385
|
+
assert submit_call_values["json"]["wandbConfig"] == {
|
|
386
|
+
"enabled": True,
|
|
387
|
+
"project": "Kiln_AI",
|
|
388
|
+
"apiKey": "test-api-key",
|
|
389
|
+
}
|
|
390
|
+
assert submit_call_values["json"]["baseModel"] == "llama-v2-7b"
|
|
391
|
+
assert (
|
|
392
|
+
submit_call_values["json"]["dataset"]
|
|
393
|
+
== f"accounts/{Config.shared().fireworks_account_id}/datasets/{mock_dataset_id}"
|
|
394
|
+
)
|
|
395
|
+
assert (
|
|
396
|
+
submit_call_values["json"]["displayName"]
|
|
397
|
+
== f"Kiln AI fine-tuning [ID:{fireworks_finetune.datamodel.id}][name:{fireworks_finetune.datamodel.name}]"
|
|
398
|
+
)
|
|
399
|
+
|
|
381
400
|
|
|
382
401
|
async def test_start_api_error(
|
|
383
402
|
fireworks_finetune, mock_dataset, mock_task, mock_api_key
|
|
@@ -356,6 +356,8 @@ async def test_start_success(
|
|
|
356
356
|
model=together_finetune.datamodel.base_model_id,
|
|
357
357
|
lora=True,
|
|
358
358
|
suffix=f"kiln_ai_{together_finetune.datamodel.id}"[:40],
|
|
359
|
+
wandb_api_key=Config.shared().wandb_api_key,
|
|
360
|
+
wandb_project_name="Kiln_AI",
|
|
359
361
|
)
|
|
360
362
|
|
|
361
363
|
# Check that datamodel was updated correctly
|
|
@@ -130,6 +130,8 @@ class TogetherFinetune(BaseFinetuneAdapter):
|
|
|
130
130
|
training_file=train_file_id,
|
|
131
131
|
validation_file=validation_file_id,
|
|
132
132
|
model=self.datamodel.base_model_id,
|
|
133
|
+
wandb_api_key=Config.shared().wandb_api_key,
|
|
134
|
+
wandb_project_name="Kiln_AI" if Config.shared().wandb_api_key else None,
|
|
133
135
|
**self._build_finetune_parameters(),
|
|
134
136
|
)
|
|
135
137
|
|
|
@@ -197,8 +197,8 @@ def lite_llm_config(
|
|
|
197
197
|
if provider is None:
|
|
198
198
|
raise ValueError(f"OpenAI compatible provider {openai_provider_name} not found")
|
|
199
199
|
|
|
200
|
-
# API key optional some providers don't use it
|
|
201
|
-
api_key = provider.get("api_key")
|
|
200
|
+
# API key optional - some providers like Ollama don't use it, but LiteLLM errors without one
|
|
201
|
+
api_key = provider.get("api_key") or "NA"
|
|
202
202
|
base_url = provider.get("base_url")
|
|
203
203
|
if base_url is None:
|
|
204
204
|
raise ValueError(
|
|
@@ -550,14 +550,14 @@ def test_litellm_provider_model_success(mock_shared_config):
|
|
|
550
550
|
|
|
551
551
|
|
|
552
552
|
def test_lite_llm_config_no_api_key(mock_shared_config):
|
|
553
|
-
"""Test provider creation without API key (should work as some providers don't require it)"""
|
|
553
|
+
"""Test provider creation without API key (should work as some providers don't require it, but should pass NA to LiteLLM as it requires one)"""
|
|
554
554
|
model_id = "no_key_provider::gpt-4"
|
|
555
555
|
|
|
556
556
|
config = lite_llm_config(model_id)
|
|
557
557
|
|
|
558
558
|
assert config.provider_name == ModelProviderName.openai_compatible
|
|
559
559
|
assert config.model_name == "gpt-4"
|
|
560
|
-
assert config.additional_body_options == {"api_key":
|
|
560
|
+
assert config.additional_body_options == {"api_key": "NA"}
|
|
561
561
|
assert config.base_url == "https://api.nokey.com"
|
|
562
562
|
|
|
563
563
|
|
|
@@ -119,6 +119,15 @@ class Config:
|
|
|
119
119
|
env_var="TOGETHERAI_API_KEY",
|
|
120
120
|
sensitive=True,
|
|
121
121
|
),
|
|
122
|
+
"wandb_api_key": ConfigProperty(
|
|
123
|
+
str,
|
|
124
|
+
env_var="WANDB_API_KEY",
|
|
125
|
+
sensitive=True,
|
|
126
|
+
),
|
|
127
|
+
"wandb_base_url": ConfigProperty(
|
|
128
|
+
str,
|
|
129
|
+
env_var="WANDB_BASE_URL",
|
|
130
|
+
),
|
|
122
131
|
"custom_models": ConfigProperty(
|
|
123
132
|
list,
|
|
124
133
|
default_lambda=lambda: [],
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "kiln-ai"
|
|
3
|
-
version = "0.13.
|
|
3
|
+
version = "0.13.2"
|
|
4
4
|
requires-python = ">=3.10"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
description = 'Kiln AI'
|
|
@@ -29,7 +29,7 @@ dependencies = [
|
|
|
29
29
|
"pytest-benchmark>=5.1.0",
|
|
30
30
|
"pytest-cov>=6.0.0",
|
|
31
31
|
"pyyaml>=6.0.2",
|
|
32
|
-
"together
|
|
32
|
+
"together",
|
|
33
33
|
"typing-extensions>=4.12.2",
|
|
34
34
|
]
|
|
35
35
|
|
|
@@ -50,9 +50,13 @@ build-backend = "hatchling.build"
|
|
|
50
50
|
[tool.hatch.metadata]
|
|
51
51
|
requires-python = ">=3.10"
|
|
52
52
|
|
|
53
|
+
[tool.uv.sources]
|
|
54
|
+
together = { git = "https://github.com/scosman/together-python" }
|
|
55
|
+
|
|
53
56
|
|
|
54
57
|
[project.urls]
|
|
55
58
|
Homepage = "https://getkiln.ai"
|
|
56
59
|
Repository = "https://github.com/Kiln-AI/kiln"
|
|
57
60
|
Documentation = "https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html"
|
|
58
61
|
Issues = "https://github.com/Kiln-AI/kiln/issues"
|
|
62
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|