kiln-ai 0.13.0__tar.gz → 0.13.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (137) hide show
  1. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/PKG-INFO +2 -2
  2. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/adapter_registry.py +4 -0
  3. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/g_eval.py +17 -2
  4. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/test_g_eval.py +12 -7
  5. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/fireworks_finetune.py +8 -1
  6. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +19 -0
  7. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -0
  8. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/together_finetune.py +2 -0
  9. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/provider_tools.py +2 -2
  10. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_provider_tools.py +2 -2
  11. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/config.py +9 -0
  12. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/pyproject.toml +6 -2
  13. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/.gitignore +0 -0
  14. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/.python-version +0 -0
  15. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/LICENSE.txt +0 -0
  16. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/README.md +0 -0
  17. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/index.html +0 -0
  18. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +0 -0
  19. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +0 -0
  20. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/base_eval.html +0 -0
  21. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html +0 -0
  22. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/g_eval.html +0 -0
  23. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval/registry.html +0 -0
  24. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/eval.html +0 -0
  25. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +0 -0
  26. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +0 -0
  27. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +0 -0
  28. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +0 -0
  29. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +0 -0
  30. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -0
  31. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/base_adapter.html +0 -0
  32. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/litellm_adapter.html +0 -0
  33. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters.html +0 -0
  34. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -0
  35. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
  36. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +0 -0
  37. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/adapters.html +0 -0
  38. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/datamodel/dataset_split.html +0 -0
  39. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/datamodel/eval.html +0 -0
  40. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/datamodel/strict_mode.html +0 -0
  41. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/datamodel.html +0 -0
  42. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/utils/config.html +0 -0
  43. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +0 -0
  44. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai/utils.html +0 -0
  45. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/kiln_ai.html +0 -0
  46. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/docs/kiln_core_docs/search.js +0 -0
  47. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/__init__.py +0 -0
  48. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/__init__.py +0 -0
  49. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/data_gen/__init__.py +0 -0
  50. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
  51. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
  52. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/data_gen/test_data_gen_task.py +0 -0
  53. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/__init__.py +0 -0
  54. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/base_eval.py +0 -0
  55. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/eval_runner.py +0 -0
  56. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/registry.py +0 -0
  57. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/test_base_eval.py +0 -0
  58. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/test_eval_runner.py +0 -0
  59. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/eval/test_g_eval_data.py +0 -0
  60. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/__init__.py +0 -0
  61. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/base_finetune.py +0 -0
  62. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/dataset_formatter.py +0 -0
  63. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/finetune_registry.py +0 -0
  64. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/openai_finetune.py +0 -0
  65. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_base_finetune.py +0 -0
  66. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +0 -0
  67. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/fine_tune/test_openai_finetune.py +0 -0
  68. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/ml_model_list.py +0 -0
  69. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/__init__.py +0 -0
  70. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/base_adapter.py +0 -0
  71. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/litellm_adapter.py +0 -0
  72. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/litellm_config.py +0 -0
  73. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -0
  74. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_litellm_adapter.py +0 -0
  75. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +0 -0
  76. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/model_adapters/test_structured_output.py +0 -0
  77. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/ollama_tools.py +0 -0
  78. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/__init__.py +0 -0
  79. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/base_parser.py +0 -0
  80. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/json_parser.py +0 -0
  81. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/parser_registry.py +0 -0
  82. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/r1_parser.py +0 -0
  83. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/test_json_parser.py +0 -0
  84. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/test_parser_registry.py +0 -0
  85. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/parsers/test_r1_parser.py +0 -0
  86. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/prompt_builders.py +0 -0
  87. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/repair/__init__.py +0 -0
  88. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/repair/repair_task.py +0 -0
  89. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/repair/test_repair_task.py +0 -0
  90. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/run_output.py +0 -0
  91. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_adapter_registry.py +0 -0
  92. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_generate_docs.py +0 -0
  93. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_ollama_tools.py +0 -0
  94. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_prompt_adaptors.py +0 -0
  95. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/adapters/test_prompt_builders.py +0 -0
  96. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/__init__.py +0 -0
  97. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/basemodel.py +0 -0
  98. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/datamodel_enums.py +0 -0
  99. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/dataset_filters.py +0 -0
  100. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/dataset_split.py +0 -0
  101. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/eval.py +0 -0
  102. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/finetune.py +0 -0
  103. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/json_schema.py +0 -0
  104. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/model_cache.py +0 -0
  105. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/project.py +0 -0
  106. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/prompt.py +0 -0
  107. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/prompt_id.py +0 -0
  108. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/registry.py +0 -0
  109. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/strict_mode.py +0 -0
  110. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/task.py +0 -0
  111. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/task_output.py +0 -0
  112. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/task_run.py +0 -0
  113. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_basemodel.py +0 -0
  114. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_dataset_filters.py +0 -0
  115. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_dataset_split.py +0 -0
  116. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_datasource.py +0 -0
  117. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_eval_model.py +0 -0
  118. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_example_models.py +0 -0
  119. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_json_schema.py +0 -0
  120. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_model_cache.py +0 -0
  121. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_model_perf.py +0 -0
  122. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_models.py +0 -0
  123. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_nested_save.py +0 -0
  124. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_output_rating.py +0 -0
  125. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_prompt_id.py +0 -0
  126. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_registry.py +0 -0
  127. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/datamodel/test_task.py +0 -0
  128. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/__init__.py +0 -0
  129. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/dataset_import.py +0 -0
  130. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/exhaustive_error.py +0 -0
  131. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/formatting.py +0 -0
  132. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/name_generator.py +0 -0
  133. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/test_config.py +0 -0
  134. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/test_dataset_import.py +0 -0
  135. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/kiln_ai/utils/test_name_geneator.py +0 -0
  136. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/setup.cfg +0 -0
  137. {kiln_ai-0.13.0 → kiln_ai-0.13.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.13.0
3
+ Version: 0.13.2
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://getkiln.ai
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -26,7 +26,7 @@ Requires-Dist: pydantic>=2.9.2
26
26
  Requires-Dist: pytest-benchmark>=5.1.0
27
27
  Requires-Dist: pytest-cov>=6.0.0
28
28
  Requires-Dist: pyyaml>=6.0.2
29
- Requires-Dist: together>=1.4.6
29
+ Requires-Dist: together
30
30
  Requires-Dist: typing-extensions>=4.12.2
31
31
  Description-Content-Type: text/markdown
32
32
 
@@ -108,6 +108,10 @@ def adapter_for_task(
108
108
  # 1. To use the correct base URL
109
109
  # 2. We use Ollama's OpenAI compatible API (/v1), and don't just let litellm use the Ollama API. We use more advanced features like json_schema.
110
110
  base_url=ollama_base_url + "/v1",
111
+ additional_body_options={
112
+ # LiteLLM errors without an api_key, even though Ollama doesn't support one.
113
+ "api_key": "NA",
114
+ },
111
115
  ),
112
116
  )
113
117
  case ModelProviderName.fireworks_ai:
@@ -297,9 +297,12 @@ The model produced the following output for the task:
297
297
 
298
298
  total_score = 0.0
299
299
  total_probability = 0.0
300
+ top_logprobs_contains_primary_token = False
300
301
 
301
- # Process all valid scoring tokens
302
+ # Process all valid scoring tokens from alternatives
302
303
  for top_logprob in token_logprob.top_logprobs:
304
+ if top_logprob.token == token_logprob.token:
305
+ top_logprobs_contains_primary_token = True
303
306
  token_score = self.score_from_token_string(top_logprob.token)
304
307
  if token_score is not None:
305
308
  # Convert logprob to probability
@@ -307,9 +310,21 @@ The model produced the following output for the task:
307
310
  total_score += token_score * probability
308
311
  total_probability += probability
309
312
 
313
+ # Weird OpenAI 4o bug - sometimes the primary token is included in the top logprobs, sometimes not.
314
+ # Add the primary token back in if excluded
315
+ if not top_logprobs_contains_primary_token:
316
+ if token_logprob.logprob == -9999.0:
317
+ # Another "bug" - sometimes the logprob is -9999.0. This seems to happen when the rest of the logprobs are tiny probability.
318
+ total_score += primary_token_score * 1.0
319
+ total_probability += 1.0
320
+ else:
321
+ probability = math.exp(token_logprob.logprob)
322
+ total_score += primary_token_score * probability
323
+ total_probability += probability
324
+
310
325
  if total_probability <= 0.0:
311
326
  raise RuntimeError(
312
- f"No valid scoring tokens found for {token_logprob.token}. This should never happen. Please file a bug if you see this."
327
+ f"No valid scoring tokens found for {token_logprob.token}. This should never happen as the token has a valid score (so it must be excluded from top logprobs). Please file a bug if you see this."
313
328
  )
314
329
 
315
330
  # Normalize by total probability of valid tokens (LLM may have wanted to generate other non-rating tokens, these shouldn't lower score of rating tokens)
@@ -393,12 +393,13 @@ def test_rating_token_to_score(test_eval_config, test_run_config):
393
393
  self.logprob = logprob
394
394
 
395
395
  class MockTokenLogprob:
396
- def __init__(self, token, top_logprobs):
396
+ def __init__(self, token, top_logprobs, logprob):
397
397
  self.token = token
398
398
  self.top_logprobs = [MockTopLogprob(t, lp) for t, lp in top_logprobs]
399
+ self.logprob = logprob
399
400
 
400
401
  # Test single token case
401
- token_logprob = MockTokenLogprob("5", [("5", 0.0)]) # log(1) = 0
402
+ token_logprob = MockTokenLogprob("5", [("5", 0.0)], logprob=1e-8) # log(1) = 0
402
403
  score = g_eval.rating_token_to_score(token_logprob)
403
404
  assert score == 5.0
404
405
 
@@ -409,18 +410,22 @@ def test_rating_token_to_score(test_eval_config, test_run_config):
409
410
  ("4", math.log(0.6)), # 60% probability
410
411
  ("5", math.log(0.4)), # 40% probability
411
412
  ],
413
+ logprob=math.log(0.6),
412
414
  )
413
415
  score = g_eval.rating_token_to_score(token_logprob)
414
416
  assert pytest.approx(score) == 4.4 # (4 * 0.6 + 5 * 0.4)
415
417
 
416
418
  # Test invalid token
417
- token_logprob = MockTokenLogprob(":", [(":", 0.0)])
419
+ token_logprob = MockTokenLogprob(":", [(":", 0.0)], logprob=1e-8)
418
420
  assert g_eval.rating_token_to_score(token_logprob) is None
419
421
 
420
- # Test no valid scoring tokens
421
- token_logprob = MockTokenLogprob("5", [])
422
- with pytest.raises(RuntimeError, match="No valid scoring tokens found"):
423
- g_eval.rating_token_to_score(token_logprob)
422
+ # Test missing from top logprobs
423
+ token_logprob = MockTokenLogprob("5", [], logprob=1e-8)
424
+ assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0
425
+
426
+ # Test missing from top logprobs, with special case logprob
427
+ token_logprob = MockTokenLogprob("5", [], logprob=-9999)
428
+ assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0
424
429
 
425
430
 
426
431
  def test_g_eval_system_instruction():
@@ -132,11 +132,18 @@ class FireworksFinetune(BaseFinetuneAdapter):
132
132
  :60
133
133
  ]
134
134
  )
135
- payload = {
135
+ payload: dict[str, str | dict[str, str | bool]] = {
136
136
  "dataset": f"accounts/{account_id}/datasets/{train_file_id}",
137
137
  "displayName": display_name,
138
138
  "baseModel": self.datamodel.base_model_id,
139
139
  }
140
+ # Add W&B config if API key is set
141
+ if Config.shared().wandb_api_key:
142
+ payload["wandbConfig"] = {
143
+ "enabled": True,
144
+ "project": "Kiln_AI",
145
+ "apiKey": Config.shared().wandb_api_key,
146
+ }
140
147
  hyperparameters = self.create_payload_parameters(self.datamodel.parameters)
141
148
  payload.update(hyperparameters)
142
149
  headers = {
@@ -340,6 +340,7 @@ async def test_start_success(
340
340
  expected_mode,
341
341
  expected_format,
342
342
  ):
343
+ Config.shared().wandb_api_key = "test-api-key"
343
344
  mock_task.output_json_schema = output_schema
344
345
 
345
346
  fireworks_finetune.datamodel.parent = mock_task
@@ -378,6 +379,24 @@ async def test_start_success(
378
379
  assert fireworks_finetune.datamodel.structured_output_mode == expected_mode
379
380
  assert fireworks_finetune.datamodel.properties["endpoint_version"] == "v2"
380
381
 
382
+ # check mockclent.post call values
383
+ assert mock_client.post.call_count == 1
384
+ submit_call_values = mock_client.post.call_args[1]
385
+ assert submit_call_values["json"]["wandbConfig"] == {
386
+ "enabled": True,
387
+ "project": "Kiln_AI",
388
+ "apiKey": "test-api-key",
389
+ }
390
+ assert submit_call_values["json"]["baseModel"] == "llama-v2-7b"
391
+ assert (
392
+ submit_call_values["json"]["dataset"]
393
+ == f"accounts/{Config.shared().fireworks_account_id}/datasets/{mock_dataset_id}"
394
+ )
395
+ assert (
396
+ submit_call_values["json"]["displayName"]
397
+ == f"Kiln AI fine-tuning [ID:{fireworks_finetune.datamodel.id}][name:{fireworks_finetune.datamodel.name}]"
398
+ )
399
+
381
400
 
382
401
  async def test_start_api_error(
383
402
  fireworks_finetune, mock_dataset, mock_task, mock_api_key
@@ -356,6 +356,8 @@ async def test_start_success(
356
356
  model=together_finetune.datamodel.base_model_id,
357
357
  lora=True,
358
358
  suffix=f"kiln_ai_{together_finetune.datamodel.id}"[:40],
359
+ wandb_api_key=Config.shared().wandb_api_key,
360
+ wandb_project_name="Kiln_AI",
359
361
  )
360
362
 
361
363
  # Check that datamodel was updated correctly
@@ -130,6 +130,8 @@ class TogetherFinetune(BaseFinetuneAdapter):
130
130
  training_file=train_file_id,
131
131
  validation_file=validation_file_id,
132
132
  model=self.datamodel.base_model_id,
133
+ wandb_api_key=Config.shared().wandb_api_key,
134
+ wandb_project_name="Kiln_AI" if Config.shared().wandb_api_key else None,
133
135
  **self._build_finetune_parameters(),
134
136
  )
135
137
 
@@ -197,8 +197,8 @@ def lite_llm_config(
197
197
  if provider is None:
198
198
  raise ValueError(f"OpenAI compatible provider {openai_provider_name} not found")
199
199
 
200
- # API key optional some providers don't use it
201
- api_key = provider.get("api_key")
200
+ # API key optional - some providers like Ollama don't use it, but LiteLLM errors without one
201
+ api_key = provider.get("api_key") or "NA"
202
202
  base_url = provider.get("base_url")
203
203
  if base_url is None:
204
204
  raise ValueError(
@@ -550,14 +550,14 @@ def test_litellm_provider_model_success(mock_shared_config):
550
550
 
551
551
 
552
552
  def test_lite_llm_config_no_api_key(mock_shared_config):
553
- """Test provider creation without API key (should work as some providers don't require it)"""
553
+ """Test provider creation without API key (should work as some providers don't require it, but should pass NA to LiteLLM as it requires one)"""
554
554
  model_id = "no_key_provider::gpt-4"
555
555
 
556
556
  config = lite_llm_config(model_id)
557
557
 
558
558
  assert config.provider_name == ModelProviderName.openai_compatible
559
559
  assert config.model_name == "gpt-4"
560
- assert config.additional_body_options == {"api_key": None}
560
+ assert config.additional_body_options == {"api_key": "NA"}
561
561
  assert config.base_url == "https://api.nokey.com"
562
562
 
563
563
 
@@ -119,6 +119,15 @@ class Config:
119
119
  env_var="TOGETHERAI_API_KEY",
120
120
  sensitive=True,
121
121
  ),
122
+ "wandb_api_key": ConfigProperty(
123
+ str,
124
+ env_var="WANDB_API_KEY",
125
+ sensitive=True,
126
+ ),
127
+ "wandb_base_url": ConfigProperty(
128
+ str,
129
+ env_var="WANDB_BASE_URL",
130
+ ),
122
131
  "custom_models": ConfigProperty(
123
132
  list,
124
133
  default_lambda=lambda: [],
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "kiln-ai"
3
- version = "0.13.0"
3
+ version = "0.13.2"
4
4
  requires-python = ">=3.10"
5
5
  readme = "README.md"
6
6
  description = 'Kiln AI'
@@ -29,7 +29,7 @@ dependencies = [
29
29
  "pytest-benchmark>=5.1.0",
30
30
  "pytest-cov>=6.0.0",
31
31
  "pyyaml>=6.0.2",
32
- "together>=1.4.6",
32
+ "together",
33
33
  "typing-extensions>=4.12.2",
34
34
  ]
35
35
 
@@ -50,9 +50,13 @@ build-backend = "hatchling.build"
50
50
  [tool.hatch.metadata]
51
51
  requires-python = ">=3.10"
52
52
 
53
+ [tool.uv.sources]
54
+ together = { git = "https://github.com/scosman/together-python" }
55
+
53
56
 
54
57
  [project.urls]
55
58
  Homepage = "https://getkiln.ai"
56
59
  Repository = "https://github.com/Kiln-AI/kiln"
57
60
  Documentation = "https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html"
58
61
  Issues = "https://github.com/Kiln-AI/kiln/issues"
62
+
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes