kiln-ai 0.19.0__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (70) hide show
  1. kiln_ai/adapters/__init__.py +2 -2
  2. kiln_ai/adapters/adapter_registry.py +19 -1
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  6. kiln_ai/adapters/eval/base_eval.py +2 -2
  7. kiln_ai/adapters/eval/eval_runner.py +3 -1
  8. kiln_ai/adapters/eval/g_eval.py +2 -2
  9. kiln_ai/adapters/eval/test_base_eval.py +1 -1
  10. kiln_ai/adapters/eval/test_g_eval.py +3 -4
  11. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  12. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  13. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  14. kiln_ai/adapters/ml_model_list.py +380 -34
  15. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  16. kiln_ai/adapters/model_adapters/litellm_adapter.py +383 -79
  17. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  18. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +406 -1
  19. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  20. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  21. kiln_ai/adapters/model_adapters/test_structured_output.py +110 -4
  22. kiln_ai/adapters/parsers/__init__.py +1 -1
  23. kiln_ai/adapters/provider_tools.py +15 -1
  24. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  25. kiln_ai/adapters/run_output.py +3 -0
  26. kiln_ai/adapters/test_adapter_registry.py +80 -1
  27. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  28. kiln_ai/adapters/test_ml_model_list.py +39 -1
  29. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  30. kiln_ai/adapters/test_provider_tools.py +55 -0
  31. kiln_ai/adapters/test_remote_config.py +98 -0
  32. kiln_ai/datamodel/__init__.py +23 -21
  33. kiln_ai/datamodel/datamodel_enums.py +1 -0
  34. kiln_ai/datamodel/eval.py +1 -1
  35. kiln_ai/datamodel/external_tool_server.py +298 -0
  36. kiln_ai/datamodel/json_schema.py +25 -10
  37. kiln_ai/datamodel/project.py +8 -1
  38. kiln_ai/datamodel/registry.py +0 -15
  39. kiln_ai/datamodel/run_config.py +62 -0
  40. kiln_ai/datamodel/task.py +2 -77
  41. kiln_ai/datamodel/task_output.py +6 -1
  42. kiln_ai/datamodel/task_run.py +41 -0
  43. kiln_ai/datamodel/test_basemodel.py +3 -3
  44. kiln_ai/datamodel/test_example_models.py +175 -0
  45. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  46. kiln_ai/datamodel/test_registry.py +8 -3
  47. kiln_ai/datamodel/test_task.py +15 -47
  48. kiln_ai/datamodel/test_tool_id.py +239 -0
  49. kiln_ai/datamodel/tool_id.py +83 -0
  50. kiln_ai/tools/__init__.py +8 -0
  51. kiln_ai/tools/base_tool.py +82 -0
  52. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  53. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  54. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  55. kiln_ai/tools/mcp_server_tool.py +95 -0
  56. kiln_ai/tools/mcp_session_manager.py +243 -0
  57. kiln_ai/tools/test_base_tools.py +199 -0
  58. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  59. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  60. kiln_ai/tools/test_tool_registry.py +473 -0
  61. kiln_ai/tools/tool_registry.py +64 -0
  62. kiln_ai/utils/config.py +22 -0
  63. kiln_ai/utils/open_ai_types.py +94 -0
  64. kiln_ai/utils/project_utils.py +17 -0
  65. kiln_ai/utils/test_config.py +138 -1
  66. kiln_ai/utils/test_open_ai_types.py +131 -0
  67. {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/METADATA +6 -5
  68. {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/RECORD +70 -47
  69. {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/WHEEL +0 -0
  70. {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -797,3 +797,178 @@ def test_usage_model_in_task_run(valid_task_run):
797
797
  assert task_run.usage.output_tokens == 50
798
798
  assert task_run.usage.total_tokens == 150
799
799
  assert task_run.usage.cost == 0.002
800
+
801
+
802
+ @pytest.mark.parametrize(
803
+ "usage1_data,usage2_data,expected_data",
804
+ [
805
+ # None + None = None
806
+ (
807
+ {
808
+ "input_tokens": None,
809
+ "output_tokens": None,
810
+ "total_tokens": None,
811
+ "cost": None,
812
+ },
813
+ {
814
+ "input_tokens": None,
815
+ "output_tokens": None,
816
+ "total_tokens": None,
817
+ "cost": None,
818
+ },
819
+ {
820
+ "input_tokens": None,
821
+ "output_tokens": None,
822
+ "total_tokens": None,
823
+ "cost": None,
824
+ },
825
+ ),
826
+ # None + value = value
827
+ (
828
+ {
829
+ "input_tokens": None,
830
+ "output_tokens": None,
831
+ "total_tokens": None,
832
+ "cost": None,
833
+ },
834
+ {
835
+ "input_tokens": 100,
836
+ "output_tokens": 50,
837
+ "total_tokens": 150,
838
+ "cost": 0.005,
839
+ },
840
+ {
841
+ "input_tokens": 100,
842
+ "output_tokens": 50,
843
+ "total_tokens": 150,
844
+ "cost": 0.005,
845
+ },
846
+ ),
847
+ # value + None = value
848
+ (
849
+ {
850
+ "input_tokens": 100,
851
+ "output_tokens": 50,
852
+ "total_tokens": 150,
853
+ "cost": 0.005,
854
+ },
855
+ {
856
+ "input_tokens": None,
857
+ "output_tokens": None,
858
+ "total_tokens": None,
859
+ "cost": None,
860
+ },
861
+ {
862
+ "input_tokens": 100,
863
+ "output_tokens": 50,
864
+ "total_tokens": 150,
865
+ "cost": 0.005,
866
+ },
867
+ ),
868
+ # value1 + value2 = value1 + value2
869
+ (
870
+ {
871
+ "input_tokens": 100,
872
+ "output_tokens": 50,
873
+ "total_tokens": 150,
874
+ "cost": 0.005,
875
+ },
876
+ {
877
+ "input_tokens": 200,
878
+ "output_tokens": 75,
879
+ "total_tokens": 275,
880
+ "cost": 0.010,
881
+ },
882
+ {
883
+ "input_tokens": 300,
884
+ "output_tokens": 125,
885
+ "total_tokens": 425,
886
+ "cost": 0.015,
887
+ },
888
+ ),
889
+ # Mixed scenarios
890
+ (
891
+ {
892
+ "input_tokens": 100,
893
+ "output_tokens": None,
894
+ "total_tokens": 150,
895
+ "cost": None,
896
+ },
897
+ {
898
+ "input_tokens": None,
899
+ "output_tokens": 75,
900
+ "total_tokens": None,
901
+ "cost": 0.010,
902
+ },
903
+ {
904
+ "input_tokens": 100,
905
+ "output_tokens": 75,
906
+ "total_tokens": 150,
907
+ "cost": 0.010,
908
+ },
909
+ ),
910
+ # Edge case: zeros
911
+ (
912
+ {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0, "cost": 0.0},
913
+ {
914
+ "input_tokens": 100,
915
+ "output_tokens": 50,
916
+ "total_tokens": 150,
917
+ "cost": 0.005,
918
+ },
919
+ {
920
+ "input_tokens": 100,
921
+ "output_tokens": 50,
922
+ "total_tokens": 150,
923
+ "cost": 0.005,
924
+ },
925
+ ),
926
+ ],
927
+ )
928
+ def test_usage_addition(usage1_data, usage2_data, expected_data):
929
+ """Test Usage addition with various combinations of None and numeric values."""
930
+ usage1 = Usage(**usage1_data)
931
+ usage2 = Usage(**usage2_data)
932
+ result = usage1 + usage2
933
+
934
+ assert result.input_tokens == expected_data["input_tokens"]
935
+ assert result.output_tokens == expected_data["output_tokens"]
936
+ assert result.total_tokens == expected_data["total_tokens"]
937
+ assert result.cost == expected_data["cost"]
938
+
939
+
940
+ def test_usage_addition_type_error():
941
+ """Test that adding Usage to non-Usage raises TypeError."""
942
+ usage = Usage(input_tokens=100, output_tokens=50, total_tokens=150, cost=0.005)
943
+
944
+ with pytest.raises(TypeError, match="Cannot add Usage with"):
945
+ usage + "not_a_usage" # type: ignore
946
+
947
+ with pytest.raises(TypeError, match="Cannot add Usage with"):
948
+ usage + 42 # type: ignore
949
+
950
+ with pytest.raises(TypeError, match="Cannot add Usage with"):
951
+ usage + {"input_tokens": 100} # type: ignore
952
+
953
+
954
+ def test_usage_addition_immutability():
955
+ """Test that addition creates new Usage objects and doesn't mutate originals."""
956
+ usage1 = Usage(input_tokens=100, output_tokens=50, total_tokens=150, cost=0.005)
957
+ usage2 = Usage(input_tokens=200, output_tokens=75, total_tokens=275, cost=0.010)
958
+
959
+ original_usage1_data = usage1.model_dump()
960
+ original_usage2_data = usage2.model_dump()
961
+
962
+ result = usage1 + usage2
963
+
964
+ # Original objects should be unchanged
965
+ assert usage1.model_dump() == original_usage1_data
966
+ assert usage2.model_dump() == original_usage2_data
967
+
968
+ # Result should be a new object
969
+ assert result is not usage1
970
+ assert result is not usage2
971
+ assert result.input_tokens == 300
972
+ assert result.output_tokens == 125
973
+ assert result.total_tokens == 425
974
+ assert result.cost == 0.015