kiln-ai 0.13.0__tar.gz → 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (138) hide show
  1. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/PKG-INFO +2 -2
  2. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/adapter_registry.py +4 -0
  3. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/g_eval.py +17 -2
  4. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/test_g_eval.py +12 -7
  5. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/base_finetune.py +0 -20
  6. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/fireworks_finetune.py +169 -15
  7. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/test_base_finetune.py +0 -9
  8. kiln_ai-0.14.0/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1052 -0
  9. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -0
  10. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/together_finetune.py +2 -0
  11. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/ml_model_list.py +1 -6
  12. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/provider_tools.py +2 -2
  13. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/test_provider_tools.py +2 -2
  14. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/config.py +9 -0
  15. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/pyproject.toml +6 -2
  16. kiln_ai-0.13.0/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +0 -547
  17. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/.gitignore +0 -0
  18. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/.python-version +0 -0
  19. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/LICENSE.txt +0 -0
  20. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/README.md +0 -0
  21. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/index.html +0 -0
  22. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +0 -0
  23. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +0 -0
  24. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/eval/base_eval.html +0 -0
  25. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html +0 -0
  26. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/eval/g_eval.html +0 -0
  27. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/eval/registry.html +0 -0
  28. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/eval.html +0 -0
  29. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +0 -0
  30. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +0 -0
  31. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +0 -0
  32. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +0 -0
  33. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +0 -0
  34. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -0
  35. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/base_adapter.html +0 -0
  36. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/litellm_adapter.html +0 -0
  37. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters.html +0 -0
  38. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -0
  39. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
  40. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +0 -0
  41. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/adapters.html +0 -0
  42. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/datamodel/dataset_split.html +0 -0
  43. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/datamodel/eval.html +0 -0
  44. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/datamodel/strict_mode.html +0 -0
  45. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/datamodel.html +0 -0
  46. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/utils/config.html +0 -0
  47. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +0 -0
  48. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai/utils.html +0 -0
  49. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/kiln_ai.html +0 -0
  50. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/docs/kiln_core_docs/search.js +0 -0
  51. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/__init__.py +0 -0
  52. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/__init__.py +0 -0
  53. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/data_gen/__init__.py +0 -0
  54. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
  55. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
  56. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/data_gen/test_data_gen_task.py +0 -0
  57. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/__init__.py +0 -0
  58. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/base_eval.py +0 -0
  59. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/eval_runner.py +0 -0
  60. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/registry.py +0 -0
  61. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/test_base_eval.py +0 -0
  62. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/test_eval_runner.py +0 -0
  63. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/eval/test_g_eval_data.py +0 -0
  64. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/__init__.py +0 -0
  65. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/dataset_formatter.py +0 -0
  66. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/finetune_registry.py +0 -0
  67. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/openai_finetune.py +0 -0
  68. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +0 -0
  69. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/fine_tune/test_openai_finetune.py +0 -0
  70. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/model_adapters/__init__.py +0 -0
  71. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/model_adapters/base_adapter.py +0 -0
  72. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/model_adapters/litellm_adapter.py +0 -0
  73. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/model_adapters/litellm_config.py +0 -0
  74. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -0
  75. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/model_adapters/test_litellm_adapter.py +0 -0
  76. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +0 -0
  77. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/model_adapters/test_structured_output.py +0 -0
  78. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/ollama_tools.py +0 -0
  79. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/parsers/__init__.py +0 -0
  80. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/parsers/base_parser.py +0 -0
  81. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/parsers/json_parser.py +0 -0
  82. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/parsers/parser_registry.py +0 -0
  83. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/parsers/r1_parser.py +0 -0
  84. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/parsers/test_json_parser.py +0 -0
  85. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/parsers/test_parser_registry.py +0 -0
  86. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/parsers/test_r1_parser.py +0 -0
  87. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/prompt_builders.py +0 -0
  88. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/repair/__init__.py +0 -0
  89. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/repair/repair_task.py +0 -0
  90. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/repair/test_repair_task.py +0 -0
  91. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/run_output.py +0 -0
  92. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/test_adapter_registry.py +0 -0
  93. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/test_generate_docs.py +0 -0
  94. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/test_ollama_tools.py +0 -0
  95. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/test_prompt_adaptors.py +0 -0
  96. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/adapters/test_prompt_builders.py +0 -0
  97. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/__init__.py +0 -0
  98. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/basemodel.py +0 -0
  99. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/datamodel_enums.py +0 -0
  100. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/dataset_filters.py +0 -0
  101. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/dataset_split.py +0 -0
  102. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/eval.py +0 -0
  103. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/finetune.py +0 -0
  104. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/json_schema.py +0 -0
  105. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/model_cache.py +0 -0
  106. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/project.py +0 -0
  107. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/prompt.py +0 -0
  108. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/prompt_id.py +0 -0
  109. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/registry.py +0 -0
  110. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/strict_mode.py +0 -0
  111. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/task.py +0 -0
  112. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/task_output.py +0 -0
  113. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/task_run.py +0 -0
  114. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_basemodel.py +0 -0
  115. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_dataset_filters.py +0 -0
  116. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_dataset_split.py +0 -0
  117. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_datasource.py +0 -0
  118. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_eval_model.py +0 -0
  119. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_example_models.py +0 -0
  120. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_json_schema.py +0 -0
  121. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_model_cache.py +0 -0
  122. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_model_perf.py +0 -0
  123. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_models.py +0 -0
  124. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_nested_save.py +0 -0
  125. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_output_rating.py +0 -0
  126. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_prompt_id.py +0 -0
  127. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_registry.py +0 -0
  128. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/datamodel/test_task.py +0 -0
  129. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/__init__.py +0 -0
  130. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/dataset_import.py +0 -0
  131. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/exhaustive_error.py +0 -0
  132. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/formatting.py +0 -0
  133. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/name_generator.py +0 -0
  134. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/test_config.py +0 -0
  135. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/test_dataset_import.py +0 -0
  136. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/kiln_ai/utils/test_name_geneator.py +0 -0
  137. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/setup.cfg +0 -0
  138. {kiln_ai-0.13.0 → kiln_ai-0.14.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.13.0
3
+ Version: 0.14.0
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://getkiln.ai
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -26,7 +26,7 @@ Requires-Dist: pydantic>=2.9.2
26
26
  Requires-Dist: pytest-benchmark>=5.1.0
27
27
  Requires-Dist: pytest-cov>=6.0.0
28
28
  Requires-Dist: pyyaml>=6.0.2
29
- Requires-Dist: together>=1.4.6
29
+ Requires-Dist: together
30
30
  Requires-Dist: typing-extensions>=4.12.2
31
31
  Description-Content-Type: text/markdown
32
32
 
@@ -108,6 +108,10 @@ def adapter_for_task(
108
108
  # 1. To use the correct base URL
109
109
  # 2. We use Ollama's OpenAI compatible API (/v1), and don't just let litellm use the Ollama API. We use more advanced features like json_schema.
110
110
  base_url=ollama_base_url + "/v1",
111
+ additional_body_options={
112
+ # LiteLLM errors without an api_key, even though Ollama doesn't support one.
113
+ "api_key": "NA",
114
+ },
111
115
  ),
112
116
  )
113
117
  case ModelProviderName.fireworks_ai:
@@ -297,9 +297,12 @@ The model produced the following output for the task:
297
297
 
298
298
  total_score = 0.0
299
299
  total_probability = 0.0
300
+ top_logprobs_contains_primary_token = False
300
301
 
301
- # Process all valid scoring tokens
302
+ # Process all valid scoring tokens from alternatives
302
303
  for top_logprob in token_logprob.top_logprobs:
304
+ if top_logprob.token == token_logprob.token:
305
+ top_logprobs_contains_primary_token = True
303
306
  token_score = self.score_from_token_string(top_logprob.token)
304
307
  if token_score is not None:
305
308
  # Convert logprob to probability
@@ -307,9 +310,21 @@ The model produced the following output for the task:
307
310
  total_score += token_score * probability
308
311
  total_probability += probability
309
312
 
313
+ # Weird OpenAI 4o bug - sometimes the primary token is included in the top logprobs, sometimes not.
314
+ # Add the primary token back in if excluded
315
+ if not top_logprobs_contains_primary_token:
316
+ if token_logprob.logprob == -9999.0:
317
+ # Another "bug" - sometimes the logprob is -9999.0. This seems to happen when the rest of the logprobs are tiny probability.
318
+ total_score += primary_token_score * 1.0
319
+ total_probability += 1.0
320
+ else:
321
+ probability = math.exp(token_logprob.logprob)
322
+ total_score += primary_token_score * probability
323
+ total_probability += probability
324
+
310
325
  if total_probability <= 0.0:
311
326
  raise RuntimeError(
312
- f"No valid scoring tokens found for {token_logprob.token}. This should never happen. Please file a bug if you see this."
327
+ f"No valid scoring tokens found for {token_logprob.token}. This should never happen as the token has a valid score (so it must be excluded from top logprobs). Please file a bug if you see this."
313
328
  )
314
329
 
315
330
  # Normalize by total probability of valid tokens (LLM may have wanted to generate other non-rating tokens, these shouldn't lower score of rating tokens)
@@ -393,12 +393,13 @@ def test_rating_token_to_score(test_eval_config, test_run_config):
393
393
  self.logprob = logprob
394
394
 
395
395
  class MockTokenLogprob:
396
- def __init__(self, token, top_logprobs):
396
+ def __init__(self, token, top_logprobs, logprob):
397
397
  self.token = token
398
398
  self.top_logprobs = [MockTopLogprob(t, lp) for t, lp in top_logprobs]
399
+ self.logprob = logprob
399
400
 
400
401
  # Test single token case
401
- token_logprob = MockTokenLogprob("5", [("5", 0.0)]) # log(1) = 0
402
+ token_logprob = MockTokenLogprob("5", [("5", 0.0)], logprob=1e-8) # log(1) = 0
402
403
  score = g_eval.rating_token_to_score(token_logprob)
403
404
  assert score == 5.0
404
405
 
@@ -409,18 +410,22 @@ def test_rating_token_to_score(test_eval_config, test_run_config):
409
410
  ("4", math.log(0.6)), # 60% probability
410
411
  ("5", math.log(0.4)), # 40% probability
411
412
  ],
413
+ logprob=math.log(0.6),
412
414
  )
413
415
  score = g_eval.rating_token_to_score(token_logprob)
414
416
  assert pytest.approx(score) == 4.4 # (4 * 0.6 + 5 * 0.4)
415
417
 
416
418
  # Test invalid token
417
- token_logprob = MockTokenLogprob(":", [(":", 0.0)])
419
+ token_logprob = MockTokenLogprob(":", [(":", 0.0)], logprob=1e-8)
418
420
  assert g_eval.rating_token_to_score(token_logprob) is None
419
421
 
420
- # Test no valid scoring tokens
421
- token_logprob = MockTokenLogprob("5", [])
422
- with pytest.raises(RuntimeError, match="No valid scoring tokens found"):
423
- g_eval.rating_token_to_score(token_logprob)
422
+ # Test missing from top logprobs
423
+ token_logprob = MockTokenLogprob("5", [], logprob=1e-8)
424
+ assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0
425
+
426
+ # Test missing from top logprobs, with special case logprob
427
+ token_logprob = MockTokenLogprob("5", [], logprob=-9999)
428
+ assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0
424
429
 
425
430
 
426
431
  def test_g_eval_system_instruction():
@@ -72,8 +72,6 @@ class BaseFinetuneAdapter(ABC):
72
72
  Create and start a fine-tune.
73
73
  """
74
74
 
75
- cls.check_valid_provider_model(provider_id, provider_base_model_id)
76
-
77
75
  if not dataset.id:
78
76
  raise ValueError("Dataset must have an id")
79
77
 
@@ -184,21 +182,3 @@ class BaseFinetuneAdapter(ABC):
184
182
  for parameter_key in parameters:
185
183
  if parameter_key not in allowed_parameters:
186
184
  raise ValueError(f"Parameter {parameter_key} is not available")
187
-
188
- @classmethod
189
- def check_valid_provider_model(
190
- cls, provider_id: str, provider_base_model_id: str
191
- ) -> None:
192
- """
193
- Check if the provider and base model are valid.
194
- """
195
- for model in built_in_models:
196
- for provider in model.providers:
197
- if (
198
- provider.name == provider_id
199
- and provider.provider_finetune_id == provider_base_model_id
200
- ):
201
- return
202
- raise ValueError(
203
- f"Provider {provider_id} with base model {provider_base_model_id} is not available"
204
- )
@@ -1,4 +1,5 @@
1
- from typing import Tuple
1
+ import logging
2
+ from typing import List, Tuple
2
3
  from uuid import uuid4
3
4
 
4
5
  import httpx
@@ -13,6 +14,14 @@ from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetF
13
14
  from kiln_ai.datamodel import DatasetSplit, StructuredOutputMode, Task
14
15
  from kiln_ai.utils.config import Config
15
16
 
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # https://docs.fireworks.ai/fine-tuning/fine-tuning-models#supported-base-models-loras-on-serverless
20
+ serverless_models = [
21
+ "accounts/fireworks/models/llama-v3p1-8b-instruct",
22
+ "accounts/fireworks/models/llama-v3p1-70b-instruct",
23
+ ]
24
+
16
25
 
17
26
  class FireworksFinetune(BaseFinetuneAdapter):
18
27
  """
@@ -132,11 +141,18 @@ class FireworksFinetune(BaseFinetuneAdapter):
132
141
  :60
133
142
  ]
134
143
  )
135
- payload = {
144
+ payload: dict[str, str | dict[str, str | bool]] = {
136
145
  "dataset": f"accounts/{account_id}/datasets/{train_file_id}",
137
146
  "displayName": display_name,
138
147
  "baseModel": self.datamodel.base_model_id,
139
148
  }
149
+ # Add W&B config if API key is set
150
+ if Config.shared().wandb_api_key:
151
+ payload["wandbConfig"] = {
152
+ "enabled": True,
153
+ "project": "Kiln_AI",
154
+ "apiKey": Config.shared().wandb_api_key,
155
+ }
140
156
  hyperparameters = self.create_payload_parameters(self.datamodel.parameters)
141
157
  payload.update(hyperparameters)
142
158
  headers = {
@@ -276,32 +292,54 @@ class FireworksFinetune(BaseFinetuneAdapter):
276
292
  return {k: v for k, v in payload.items() if v is not None}
277
293
 
278
294
  async def _deploy(self) -> bool:
279
- # Now we "deploy" the model using PEFT serverless.
280
- # A bit complicated: most fireworks deploys are server based.
281
- # However, a Lora can be serverless (PEFT).
282
- # By calling the deploy endpoint WITHOUT first creating a deployment ID, it will only deploy if it can be done serverless.
283
- # https://docs.fireworks.ai/models/deploying#deploying-to-serverless
284
- # This endpoint will return 400 if already deployed with code 9, so we consider that a success.
295
+ if self.datamodel.base_model_id in serverless_models:
296
+ return await self._deploy_serverless()
297
+ else:
298
+ return await self._check_or_deploy_server()
285
299
 
300
+ def api_key_and_account_id(self) -> Tuple[str, str]:
286
301
  api_key = Config.shared().fireworks_api_key
287
302
  account_id = Config.shared().fireworks_account_id
288
303
  if not api_key or not account_id:
289
304
  raise ValueError("Fireworks API key or account ID not set")
305
+ return api_key, account_id
306
+
307
+ def deployment_display_name(self) -> str:
308
+ # Limit the display name to 60 characters
309
+ display_name = f"Kiln AI fine-tuned model [ID:{self.datamodel.id}][name:{self.datamodel.name}]"[
310
+ :60
311
+ ]
312
+ return display_name
290
313
 
314
+ async def model_id_checking_status(self) -> str | None:
291
315
  # Model ID != fine tune ID on Fireworks. Model is the result of the tune job. Call status to get it.
292
316
  status, model_id = await self._status()
293
317
  if status.status != FineTuneStatusType.completed:
294
- return False
318
+ return None
295
319
  if not model_id or not isinstance(model_id, str):
296
- return False
320
+ return None
321
+ return model_id
322
+
323
+ async def _deploy_serverless(self) -> bool:
324
+ # Now we "deploy" the model using PEFT serverless.
325
+ # A bit complicated: most fireworks deploys are server based.
326
+ # However, a Lora can be serverless (PEFT).
327
+ # By calling the deploy endpoint WITHOUT first creating a deployment ID, it will only deploy if it can be done serverless.
328
+ # https://docs.fireworks.ai/models/deploying#deploying-to-serverless
329
+ # This endpoint will return 400 if already deployed with code 9, so we consider that a success.
330
+
331
+ api_key, account_id = self.api_key_and_account_id()
297
332
 
298
333
  url = f"https://api.fireworks.ai/v1/accounts/{account_id}/deployedModels"
299
- # Limit the display name to 60 characters
300
- display_name = f"Kiln AI fine-tuned model [ID:{self.datamodel.id}][name:{self.datamodel.name}]"[
301
- :60
302
- ]
334
+ model_id = await self.model_id_checking_status()
335
+ if not model_id:
336
+ logger.error(
337
+ "Model ID not found - can't deploy model to Fireworks serverless"
338
+ )
339
+ return False
340
+
303
341
  payload = {
304
- "displayName": display_name,
342
+ "displayName": self.deployment_display_name(),
305
343
  "model": model_id,
306
344
  }
307
345
  headers = {
@@ -320,4 +358,120 @@ class FireworksFinetune(BaseFinetuneAdapter):
320
358
  self.datamodel.save_to_file()
321
359
  return True
322
360
 
361
+ logger.error(
362
+ f"Failed to deploy model to Fireworks serverless: [{response.status_code}] {response.text}"
363
+ )
323
364
  return False
365
+
366
+ async def _check_or_deploy_server(self) -> bool:
367
+ """
368
+ Check if the model is already deployed. If not, deploy it to a dedicated server.
369
+ """
370
+
371
+ # Check if the model is already deployed
372
+ # If it's fine_tune_model_id is set, it might be deployed. However, Fireworks deletes them over time so we need to check.
373
+ if self.datamodel.fine_tune_model_id:
374
+ deployments = await self._fetch_all_deployments()
375
+ for deployment in deployments:
376
+ if deployment[
377
+ "baseModel"
378
+ ] == self.datamodel.fine_tune_model_id and deployment["state"] in [
379
+ "READY",
380
+ "CREATING",
381
+ ]:
382
+ return True
383
+
384
+ # If the model is not deployed, deploy it
385
+ return await self._deploy_server()
386
+
387
+ async def _deploy_server(self) -> bool:
388
+ # For models that are not serverless, we just need to deploy the model to a server.
389
+ # We use a scale-to-zero on-demand deployment. If you stop using it, it
390
+ # will scale to zero and charges will stop.
391
+ model_id = await self.model_id_checking_status()
392
+ if not model_id:
393
+ logger.error("Model ID not found - can't deploy model to Fireworks server")
394
+ return False
395
+
396
+ api_key, account_id = self.api_key_and_account_id()
397
+ url = f"https://api.fireworks.ai/v1/accounts/{account_id}/deployments"
398
+
399
+ payload = {
400
+ "displayName": self.deployment_display_name(),
401
+ "description": "Deployed by Kiln AI",
402
+ # Allow scale to zero
403
+ "minReplicaCount": 0,
404
+ "autoscalingPolicy": {
405
+ "scaleUpWindow": "30s",
406
+ "scaleDownWindow": "300s",
407
+ # Scale to zero after 5 minutes of inactivity - this is the minimum allowed
408
+ "scaleToZeroWindow": "300s",
409
+ },
410
+ "baseModel": model_id,
411
+ }
412
+ headers = {
413
+ "Authorization": f"Bearer {api_key}",
414
+ "Content-Type": "application/json",
415
+ }
416
+
417
+ async with httpx.AsyncClient() as client:
418
+ response = await client.post(url, json=payload, headers=headers)
419
+
420
+ if response.status_code == 200:
421
+ basemodel = response.json().get("baseModel")
422
+ if basemodel is not None and isinstance(basemodel, str):
423
+ self.datamodel.fine_tune_model_id = basemodel
424
+ if self.datamodel.path:
425
+ self.datamodel.save_to_file()
426
+ return True
427
+
428
+ logger.error(
429
+ f"Failed to deploy model to Fireworks server: [{response.status_code}] {response.text}"
430
+ )
431
+ return False
432
+
433
+ async def _fetch_all_deployments(self) -> List[dict]:
434
+ """
435
+ Fetch all deployments for an account.
436
+ """
437
+ api_key, account_id = self.api_key_and_account_id()
438
+
439
+ url = f"https://api.fireworks.ai/v1/accounts/{account_id}/deployments"
440
+
441
+ params = {
442
+ # Note: filter param does not work for baseModel, which would have been ideal, and ideally would have been documented. Instead we'll fetch all and filter.
443
+ # Max page size
444
+ "pageSize": 200,
445
+ }
446
+ headers = {
447
+ "Authorization": f"Bearer {api_key}",
448
+ }
449
+
450
+ deployments = []
451
+
452
+ # Paginate through all deployments
453
+ async with httpx.AsyncClient() as client:
454
+ while True:
455
+ response = await client.get(url, params=params, headers=headers)
456
+ json = response.json()
457
+ if "deployments" not in json or not isinstance(
458
+ json["deployments"], list
459
+ ):
460
+ raise ValueError(
461
+ f"Invalid response from Fireworks. Expected list of deployments in 'deployments' key: [{response.status_code}] {response.text}"
462
+ )
463
+ deployments.extend(json["deployments"])
464
+ next_page_token = json.get("nextPageToken")
465
+ if (
466
+ next_page_token
467
+ and isinstance(next_page_token, str)
468
+ and len(next_page_token) > 0
469
+ ):
470
+ params = {
471
+ "pageSize": 200,
472
+ "pageToken": next_page_token,
473
+ }
474
+ else:
475
+ break
476
+
477
+ return deployments
@@ -261,15 +261,6 @@ async def test_create_and_start_no_parent_task_path():
261
261
  )
262
262
 
263
263
 
264
- def test_check_valid_provider_model():
265
- MockFinetune.check_valid_provider_model("openai", "gpt-4o-mini-2024-07-18")
266
-
267
- with pytest.raises(
268
- ValueError, match="Provider openai with base model gpt-99 is not available"
269
- ):
270
- MockFinetune.check_valid_provider_model("openai", "gpt-99")
271
-
272
-
273
264
  async def test_create_and_start_invalid_train_split(mock_dataset):
274
265
  # Test with an invalid train split name
275
266
  mock_dataset.split_contents = {"valid_train": [], "valid_test": []}