kiln-ai 0.19.0__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +2 -2
- kiln_ai/adapters/adapter_registry.py +19 -1
- kiln_ai/adapters/chat/chat_formatter.py +8 -12
- kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
- kiln_ai/adapters/docker_model_runner_tools.py +119 -0
- kiln_ai/adapters/eval/base_eval.py +2 -2
- kiln_ai/adapters/eval/eval_runner.py +3 -1
- kiln_ai/adapters/eval/g_eval.py +2 -2
- kiln_ai/adapters/eval/test_base_eval.py +1 -1
- kiln_ai/adapters/eval/test_g_eval.py +3 -4
- kiln_ai/adapters/fine_tune/__init__.py +1 -1
- kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
- kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
- kiln_ai/adapters/ml_model_list.py +380 -34
- kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
- kiln_ai/adapters/model_adapters/litellm_adapter.py +383 -79
- kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +406 -1
- kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
- kiln_ai/adapters/model_adapters/test_structured_output.py +110 -4
- kiln_ai/adapters/parsers/__init__.py +1 -1
- kiln_ai/adapters/provider_tools.py +15 -1
- kiln_ai/adapters/repair/test_repair_task.py +12 -9
- kiln_ai/adapters/run_output.py +3 -0
- kiln_ai/adapters/test_adapter_registry.py +80 -1
- kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
- kiln_ai/adapters/test_ml_model_list.py +39 -1
- kiln_ai/adapters/test_prompt_adaptors.py +13 -6
- kiln_ai/adapters/test_provider_tools.py +55 -0
- kiln_ai/adapters/test_remote_config.py +98 -0
- kiln_ai/datamodel/__init__.py +23 -21
- kiln_ai/datamodel/datamodel_enums.py +1 -0
- kiln_ai/datamodel/eval.py +1 -1
- kiln_ai/datamodel/external_tool_server.py +298 -0
- kiln_ai/datamodel/json_schema.py +25 -10
- kiln_ai/datamodel/project.py +8 -1
- kiln_ai/datamodel/registry.py +0 -15
- kiln_ai/datamodel/run_config.py +62 -0
- kiln_ai/datamodel/task.py +2 -77
- kiln_ai/datamodel/task_output.py +6 -1
- kiln_ai/datamodel/task_run.py +41 -0
- kiln_ai/datamodel/test_basemodel.py +3 -3
- kiln_ai/datamodel/test_example_models.py +175 -0
- kiln_ai/datamodel/test_external_tool_server.py +691 -0
- kiln_ai/datamodel/test_registry.py +8 -3
- kiln_ai/datamodel/test_task.py +15 -47
- kiln_ai/datamodel/test_tool_id.py +239 -0
- kiln_ai/datamodel/tool_id.py +83 -0
- kiln_ai/tools/__init__.py +8 -0
- kiln_ai/tools/base_tool.py +82 -0
- kiln_ai/tools/built_in_tools/__init__.py +13 -0
- kiln_ai/tools/built_in_tools/math_tools.py +124 -0
- kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
- kiln_ai/tools/mcp_server_tool.py +95 -0
- kiln_ai/tools/mcp_session_manager.py +243 -0
- kiln_ai/tools/test_base_tools.py +199 -0
- kiln_ai/tools/test_mcp_server_tool.py +457 -0
- kiln_ai/tools/test_mcp_session_manager.py +1585 -0
- kiln_ai/tools/test_tool_registry.py +473 -0
- kiln_ai/tools/tool_registry.py +64 -0
- kiln_ai/utils/config.py +22 -0
- kiln_ai/utils/open_ai_types.py +94 -0
- kiln_ai/utils/project_utils.py +17 -0
- kiln_ai/utils/test_config.py +138 -1
- kiln_ai/utils/test_open_ai_types.py +131 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/METADATA +6 -5
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/RECORD +70 -47
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/WHEEL +0 -0
- {kiln_ai-0.19.0.dist-info → kiln_ai-0.20.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -797,3 +797,178 @@ def test_usage_model_in_task_run(valid_task_run):
|
|
|
797
797
|
assert task_run.usage.output_tokens == 50
|
|
798
798
|
assert task_run.usage.total_tokens == 150
|
|
799
799
|
assert task_run.usage.cost == 0.002
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
@pytest.mark.parametrize(
|
|
803
|
+
"usage1_data,usage2_data,expected_data",
|
|
804
|
+
[
|
|
805
|
+
# None + None = None
|
|
806
|
+
(
|
|
807
|
+
{
|
|
808
|
+
"input_tokens": None,
|
|
809
|
+
"output_tokens": None,
|
|
810
|
+
"total_tokens": None,
|
|
811
|
+
"cost": None,
|
|
812
|
+
},
|
|
813
|
+
{
|
|
814
|
+
"input_tokens": None,
|
|
815
|
+
"output_tokens": None,
|
|
816
|
+
"total_tokens": None,
|
|
817
|
+
"cost": None,
|
|
818
|
+
},
|
|
819
|
+
{
|
|
820
|
+
"input_tokens": None,
|
|
821
|
+
"output_tokens": None,
|
|
822
|
+
"total_tokens": None,
|
|
823
|
+
"cost": None,
|
|
824
|
+
},
|
|
825
|
+
),
|
|
826
|
+
# None + value = value
|
|
827
|
+
(
|
|
828
|
+
{
|
|
829
|
+
"input_tokens": None,
|
|
830
|
+
"output_tokens": None,
|
|
831
|
+
"total_tokens": None,
|
|
832
|
+
"cost": None,
|
|
833
|
+
},
|
|
834
|
+
{
|
|
835
|
+
"input_tokens": 100,
|
|
836
|
+
"output_tokens": 50,
|
|
837
|
+
"total_tokens": 150,
|
|
838
|
+
"cost": 0.005,
|
|
839
|
+
},
|
|
840
|
+
{
|
|
841
|
+
"input_tokens": 100,
|
|
842
|
+
"output_tokens": 50,
|
|
843
|
+
"total_tokens": 150,
|
|
844
|
+
"cost": 0.005,
|
|
845
|
+
},
|
|
846
|
+
),
|
|
847
|
+
# value + None = value
|
|
848
|
+
(
|
|
849
|
+
{
|
|
850
|
+
"input_tokens": 100,
|
|
851
|
+
"output_tokens": 50,
|
|
852
|
+
"total_tokens": 150,
|
|
853
|
+
"cost": 0.005,
|
|
854
|
+
},
|
|
855
|
+
{
|
|
856
|
+
"input_tokens": None,
|
|
857
|
+
"output_tokens": None,
|
|
858
|
+
"total_tokens": None,
|
|
859
|
+
"cost": None,
|
|
860
|
+
},
|
|
861
|
+
{
|
|
862
|
+
"input_tokens": 100,
|
|
863
|
+
"output_tokens": 50,
|
|
864
|
+
"total_tokens": 150,
|
|
865
|
+
"cost": 0.005,
|
|
866
|
+
},
|
|
867
|
+
),
|
|
868
|
+
# value1 + value2 = value1 + value2
|
|
869
|
+
(
|
|
870
|
+
{
|
|
871
|
+
"input_tokens": 100,
|
|
872
|
+
"output_tokens": 50,
|
|
873
|
+
"total_tokens": 150,
|
|
874
|
+
"cost": 0.005,
|
|
875
|
+
},
|
|
876
|
+
{
|
|
877
|
+
"input_tokens": 200,
|
|
878
|
+
"output_tokens": 75,
|
|
879
|
+
"total_tokens": 275,
|
|
880
|
+
"cost": 0.010,
|
|
881
|
+
},
|
|
882
|
+
{
|
|
883
|
+
"input_tokens": 300,
|
|
884
|
+
"output_tokens": 125,
|
|
885
|
+
"total_tokens": 425,
|
|
886
|
+
"cost": 0.015,
|
|
887
|
+
},
|
|
888
|
+
),
|
|
889
|
+
# Mixed scenarios
|
|
890
|
+
(
|
|
891
|
+
{
|
|
892
|
+
"input_tokens": 100,
|
|
893
|
+
"output_tokens": None,
|
|
894
|
+
"total_tokens": 150,
|
|
895
|
+
"cost": None,
|
|
896
|
+
},
|
|
897
|
+
{
|
|
898
|
+
"input_tokens": None,
|
|
899
|
+
"output_tokens": 75,
|
|
900
|
+
"total_tokens": None,
|
|
901
|
+
"cost": 0.010,
|
|
902
|
+
},
|
|
903
|
+
{
|
|
904
|
+
"input_tokens": 100,
|
|
905
|
+
"output_tokens": 75,
|
|
906
|
+
"total_tokens": 150,
|
|
907
|
+
"cost": 0.010,
|
|
908
|
+
},
|
|
909
|
+
),
|
|
910
|
+
# Edge case: zeros
|
|
911
|
+
(
|
|
912
|
+
{"input_tokens": 0, "output_tokens": 0, "total_tokens": 0, "cost": 0.0},
|
|
913
|
+
{
|
|
914
|
+
"input_tokens": 100,
|
|
915
|
+
"output_tokens": 50,
|
|
916
|
+
"total_tokens": 150,
|
|
917
|
+
"cost": 0.005,
|
|
918
|
+
},
|
|
919
|
+
{
|
|
920
|
+
"input_tokens": 100,
|
|
921
|
+
"output_tokens": 50,
|
|
922
|
+
"total_tokens": 150,
|
|
923
|
+
"cost": 0.005,
|
|
924
|
+
},
|
|
925
|
+
),
|
|
926
|
+
],
|
|
927
|
+
)
|
|
928
|
+
def test_usage_addition(usage1_data, usage2_data, expected_data):
|
|
929
|
+
"""Test Usage addition with various combinations of None and numeric values."""
|
|
930
|
+
usage1 = Usage(**usage1_data)
|
|
931
|
+
usage2 = Usage(**usage2_data)
|
|
932
|
+
result = usage1 + usage2
|
|
933
|
+
|
|
934
|
+
assert result.input_tokens == expected_data["input_tokens"]
|
|
935
|
+
assert result.output_tokens == expected_data["output_tokens"]
|
|
936
|
+
assert result.total_tokens == expected_data["total_tokens"]
|
|
937
|
+
assert result.cost == expected_data["cost"]
|
|
938
|
+
|
|
939
|
+
|
|
940
|
+
def test_usage_addition_type_error():
|
|
941
|
+
"""Test that adding Usage to non-Usage raises TypeError."""
|
|
942
|
+
usage = Usage(input_tokens=100, output_tokens=50, total_tokens=150, cost=0.005)
|
|
943
|
+
|
|
944
|
+
with pytest.raises(TypeError, match="Cannot add Usage with"):
|
|
945
|
+
usage + "not_a_usage" # type: ignore
|
|
946
|
+
|
|
947
|
+
with pytest.raises(TypeError, match="Cannot add Usage with"):
|
|
948
|
+
usage + 42 # type: ignore
|
|
949
|
+
|
|
950
|
+
with pytest.raises(TypeError, match="Cannot add Usage with"):
|
|
951
|
+
usage + {"input_tokens": 100} # type: ignore
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def test_usage_addition_immutability():
|
|
955
|
+
"""Test that addition creates new Usage objects and doesn't mutate originals."""
|
|
956
|
+
usage1 = Usage(input_tokens=100, output_tokens=50, total_tokens=150, cost=0.005)
|
|
957
|
+
usage2 = Usage(input_tokens=200, output_tokens=75, total_tokens=275, cost=0.010)
|
|
958
|
+
|
|
959
|
+
original_usage1_data = usage1.model_dump()
|
|
960
|
+
original_usage2_data = usage2.model_dump()
|
|
961
|
+
|
|
962
|
+
result = usage1 + usage2
|
|
963
|
+
|
|
964
|
+
# Original objects should be unchanged
|
|
965
|
+
assert usage1.model_dump() == original_usage1_data
|
|
966
|
+
assert usage2.model_dump() == original_usage2_data
|
|
967
|
+
|
|
968
|
+
# Result should be a new object
|
|
969
|
+
assert result is not usage1
|
|
970
|
+
assert result is not usage2
|
|
971
|
+
assert result.input_tokens == 300
|
|
972
|
+
assert result.output_tokens == 125
|
|
973
|
+
assert result.total_tokens == 425
|
|
974
|
+
assert result.cost == 0.015
|