PyPI - ibm-watsonx-orchestrate-evaluation-framework - Versions diffs - 1.1.3__py3-none-any.whl → 1.1.8b0__py3-none-any.whl - Mend

ibm-watsonx-orchestrate-evaluation-framework 1.1.3py3-none-any.whl → 1.1.8b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

{ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info}/METADATA +19 -1
ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/RECORD +146 -0
wxo_agentic_evaluation/analytics/tools/analyzer.py +4 -2
wxo_agentic_evaluation/analyze_run.py +1025 -220
wxo_agentic_evaluation/annotate.py +2 -2
wxo_agentic_evaluation/arg_configs.py +60 -2
wxo_agentic_evaluation/base_user.py +25 -0
wxo_agentic_evaluation/batch_annotate.py +19 -2
wxo_agentic_evaluation/clients.py +103 -0
wxo_agentic_evaluation/compare_runs/__init__.py +0 -0
wxo_agentic_evaluation/compare_runs/compare_2_runs.py +74 -0
wxo_agentic_evaluation/compare_runs/diff.py +554 -0
wxo_agentic_evaluation/compare_runs/model.py +193 -0
wxo_agentic_evaluation/data_annotator.py +25 -7
wxo_agentic_evaluation/description_quality_checker.py +29 -6
wxo_agentic_evaluation/evaluation.py +16 -8
wxo_agentic_evaluation/evaluation_controller/evaluation_controller.py +303 -0
wxo_agentic_evaluation/evaluation_package.py +414 -69
wxo_agentic_evaluation/external_agent/__init__.py +1 -1
wxo_agentic_evaluation/external_agent/external_validate.py +7 -5
wxo_agentic_evaluation/external_agent/types.py +3 -9
wxo_agentic_evaluation/extractors/__init__.py +3 -0
wxo_agentic_evaluation/extractors/extractor_base.py +21 -0
wxo_agentic_evaluation/extractors/labeled_messages.py +47 -0
wxo_agentic_evaluation/hr_agent_langgraph.py +68 -0
wxo_agentic_evaluation/langfuse_collection.py +60 -0
wxo_agentic_evaluation/langfuse_evaluation_package.py +192 -0
wxo_agentic_evaluation/llm_matching.py +104 -2
wxo_agentic_evaluation/llm_safety_eval.py +64 -0
wxo_agentic_evaluation/llm_user.py +5 -4
wxo_agentic_evaluation/llm_user_v2.py +114 -0
wxo_agentic_evaluation/main.py +112 -343
wxo_agentic_evaluation/metrics/__init__.py +15 -0
wxo_agentic_evaluation/metrics/dummy_metric.py +16 -0
wxo_agentic_evaluation/metrics/evaluations.py +107 -0
wxo_agentic_evaluation/metrics/journey_success.py +137 -0
wxo_agentic_evaluation/metrics/llm_as_judge.py +26 -0
wxo_agentic_evaluation/metrics/metrics.py +276 -8
wxo_agentic_evaluation/metrics/tool_calling.py +93 -0
wxo_agentic_evaluation/otel_parser/__init__.py +1 -0
wxo_agentic_evaluation/otel_parser/langflow_parser.py +86 -0
wxo_agentic_evaluation/otel_parser/langgraph_parser.py +61 -0
wxo_agentic_evaluation/otel_parser/parser.py +163 -0
wxo_agentic_evaluation/otel_parser/parser_types.py +38 -0
wxo_agentic_evaluation/otel_parser/pydantic_parser.py +50 -0
wxo_agentic_evaluation/otel_parser/utils.py +15 -0
wxo_agentic_evaluation/otel_parser/wxo_parser.py +39 -0
wxo_agentic_evaluation/otel_support/evaluate_tau.py +44 -10
wxo_agentic_evaluation/otel_support/otel_message_conversion.py +12 -4
wxo_agentic_evaluation/otel_support/tasks_test.py +456 -116
wxo_agentic_evaluation/prompt/derailment_prompt.jinja2 +55 -0
wxo_agentic_evaluation/prompt/llama_user_prompt.jinja2 +50 -4
wxo_agentic_evaluation/prompt/llmaaj_prompt.jinja2 +15 -0
wxo_agentic_evaluation/prompt/off_policy_attack_generation_prompt.jinja2 +1 -1
wxo_agentic_evaluation/prompt/semantic_matching_prompt.jinja2 +41 -9
wxo_agentic_evaluation/prompt/template_render.py +103 -4
wxo_agentic_evaluation/prompt/unsafe_topic_prompt.jinja2 +65 -0
wxo_agentic_evaluation/quick_eval.py +33 -17
wxo_agentic_evaluation/record_chat.py +38 -32
wxo_agentic_evaluation/red_teaming/attack_evaluator.py +211 -62
wxo_agentic_evaluation/red_teaming/attack_generator.py +63 -40
wxo_agentic_evaluation/red_teaming/attack_list.py +95 -7
wxo_agentic_evaluation/red_teaming/attack_runner.py +77 -17
wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics.json +783 -0
wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics.json +600 -0
wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py +10 -10
wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py +105 -39
wxo_agentic_evaluation/resource_map.py +3 -1
wxo_agentic_evaluation/runner.py +329 -0
wxo_agentic_evaluation/runtime_adapter/a2a_runtime_adapter.py +0 -0
wxo_agentic_evaluation/runtime_adapter/runtime_adapter.py +14 -0
wxo_agentic_evaluation/{inference_backend.py → runtime_adapter/wxo_runtime_adapter.py} +24 -293
wxo_agentic_evaluation/scheduler.py +247 -0
wxo_agentic_evaluation/service_instance.py +26 -17
wxo_agentic_evaluation/service_provider/__init__.py +145 -9
wxo_agentic_evaluation/service_provider/gateway_provider.py +707 -0
wxo_agentic_evaluation/service_provider/model_proxy_provider.py +417 -17
wxo_agentic_evaluation/service_provider/ollama_provider.py +393 -22
wxo_agentic_evaluation/service_provider/portkey_provider.py +229 -0
wxo_agentic_evaluation/service_provider/provider.py +130 -10
wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py +52 -0
wxo_agentic_evaluation/service_provider/watsonx_provider.py +481 -53
wxo_agentic_evaluation/simluation_runner.py +125 -0
wxo_agentic_evaluation/test_prompt.py +4 -4
wxo_agentic_evaluation/type.py +185 -16
wxo_agentic_evaluation/user_simulator/demo_usage_llm_user.py +100 -0
wxo_agentic_evaluation/utils/__init__.py +44 -3
wxo_agentic_evaluation/utils/evaluation_discovery.py +47 -0
wxo_agentic_evaluation/utils/gateway_provider_utils.py +39 -0
wxo_agentic_evaluation/utils/messages_parser.py +30 -0
wxo_agentic_evaluation/utils/parsers.py +71 -0
wxo_agentic_evaluation/utils/utils.py +313 -9
wxo_agentic_evaluation/wxo_client.py +81 -0
ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info/RECORD +0 -102
wxo_agentic_evaluation/otel_support/evaluate_tau_traces.py +0 -176
{ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info}/WHEEL +0 -0
{ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info}/top_level.txt +0 -0

{ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ibm-watsonx-orchestrate-evaluation-framework
-Version: 1.1.3
+Version: 1.1.8b0
 Summary: The WxO evaluation framework
 Author-email: Haode Qi <Haode.Qi@ibm.com>
 License: MIT
@@ -14,6 +14,22 @@ Requires-Dist: dataclasses-json~=0.6.7
 Requires-Dist: jsonargparse~=4.37.0
 Requires-Dist: jsonschema~=4.23.0
 Requires-Dist: requests~=2.32.5
+Requires-Dist: fuzzywuzzy~=0.18.0
+Requires-Dist: python-dateutil~=2.9.0
+Requires-Dist: langchain==1.0.3
+Requires-Dist: langchain-core==1.0.3
+Requires-Dist: langchain-openai==1.0.2
+Requires-Dist: openlit
+Requires-Dist: openinference-instrumentation>=0.1.42
+Requires-Dist: openinference-instrumentation-langchain>=0.1.54
+Requires-Dist: openinference-instrumentation-litellm>=0.1.28
+Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.9
+Requires-Dist: openinference-semantic-conventions>=0.1.25
+Requires-Dist: arize-phoenix-otel>=0.13.1
+Requires-Dist: langfuse>=3.9.0
+Requires-Dist: portkey-ai~=2.0.2
+Requires-Dist: openinference-instrumentation-langchain==0.1.54
+Requires-Dist: litellm>=1.79.3
 Provides-Extra: dev
 Requires-Dist: setuptools~=70.3.0; extra == "dev"
 Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
@@ -24,6 +40,8 @@ Requires-Dist: coverage[toml]>=6.5; extra == "dev"
 Requires-Dist: black~=24.8.0; extra == "dev"
 Requires-Dist: pylint~=3.3.8; extra == "dev"
 Requires-Dist: isort~=5.13.2; extra == "dev"
+Requires-Dist: coverage; extra == "dev"
+Requires-Dist: commitizen>=4.9.1; extra == "dev"
 Provides-Extra: rag-eval
 Requires-Dist: tqdm~=4.67.1; extra == "rag-eval"
 Requires-Dist: sentence-transformers~=3.3.1; extra == "rag-eval"

ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,146 @@
+wxo_agentic_evaluation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/analyze_run.py,sha256=_3PHCIz_7wihGx7AQLnyjJxVaknLiWO_DrAQL14vgq0,45483
+wxo_agentic_evaluation/annotate.py,sha256=l6a8hYETN3oaw4-OfpNA_k9S_XX5DqZzVcNXzpT0y28,1238
+wxo_agentic_evaluation/arg_configs.py,sha256=EYJiiPrk-oXh6LDk_h0DOwfPpIqUQicFHRZyxZDDFzk,4677
+wxo_agentic_evaluation/base_user.py,sha256=RFsn17Z51O41_YQyEymYPdiyJPPTQmATzUBowfuFVt8,753
+wxo_agentic_evaluation/batch_annotate.py,sha256=ieXLWZMJQqFvj7Xe-MUEKflLHDPmF7A5J6PyFK4ZHW4,7485
+wxo_agentic_evaluation/clients.py,sha256=CMdN8eKhcjk--rrwuGoeupp_Ttw9IBMfEq5AMYm3nVw,3329
+wxo_agentic_evaluation/data_annotator.py,sha256=pGM5M5KlgESma5W1IhKB5wamJAr9S5aPW7-qmwMoU4s,8897
+wxo_agentic_evaluation/description_quality_checker.py,sha256=ppyLmgM75sJ9r8FY0YWZYRIDnq7bM-fDa5hmiUhEzJg,6796
+wxo_agentic_evaluation/evaluation.py,sha256=g_EpTN7UkVDiLyEAS41XPQvL3D60hL6gKegtBR5JmF4,1123
+wxo_agentic_evaluation/evaluation_package.py,sha256=KNoRNN1Igi6OboEQ-0ThMK2IFA9gb_zF4Y3jUGegqQc,37607
+wxo_agentic_evaluation/hr_agent_langgraph.py,sha256=LNmPDu5vI53JimtIR5uJK9xDPQOKwf6riVZcIOq-rjg,2215
+wxo_agentic_evaluation/langfuse_collection.py,sha256=8crzrgI8kVAp6g3_O1Imr_KO-3yWjiSy72X8WwSvxBk,1910
+wxo_agentic_evaluation/langfuse_evaluation_package.py,sha256=-fam1DDvO6xsOW5h1BNcUE-Layu8QiTdrNlEYzz-q2I,6523
+wxo_agentic_evaluation/llm_matching.py,sha256=Oa3NezPcif6At3OHAlzwsdC3JOebXPHiWaufgyrpA4g,5189
+wxo_agentic_evaluation/llm_rag_eval.py,sha256=cMUutCpmR1Zg5MM7KbHjHkF42FRBBACFNc-MzwxAw9M,1655
+wxo_agentic_evaluation/llm_safety_eval.py,sha256=pNuq4xLxkImyksGmsQire_nIQWOEoGqCc-Z3ZCSrcTQ,2268
+wxo_agentic_evaluation/llm_user.py,sha256=f69Nau5FnpRoEk6W2javhHwahBu9LmM2PNPtj9g2aow,1615
+wxo_agentic_evaluation/llm_user_v2.py,sha256=39HgjqpKvvI3miLaI2pLOC8HKnsUx-6MDuxOwErkADk,4067
+wxo_agentic_evaluation/main.py,sha256=LytAGw_scOgGB42DiU2MfmSOItOKPwA45tPoDpJQKl4,5465
+wxo_agentic_evaluation/quick_eval.py,sha256=fAm3JVERaS3t4sgWlLK2GkCBVM7NQTSjMWPSF8JBAkM,13589
+wxo_agentic_evaluation/record_chat.py,sha256=o1pHZzOeM2YbKgfSi1ex1hL9tAAHqGo46usOcJwzuTc,8959
+wxo_agentic_evaluation/resource_map.py,sha256=hFk3OqOwbFolhwFPbdW-7hoB1WnU-_orX7UuXR_IIks,1726
+wxo_agentic_evaluation/runner.py,sha256=yWmczz5m8yAKfjivbHjtDB1HFL8Qrbh0rigGHwmG2To,10092
+wxo_agentic_evaluation/scheduler.py,sha256=iH1ByTBVQKsvYNYmDB8tjuEThALor-QpRisRmGSNjxI,7809
+wxo_agentic_evaluation/service_instance.py,sha256=LrXIX5e0PZkOGwUzMpbUz2VHTO3TtQaUEsMbQUECUi4,8878
+wxo_agentic_evaluation/simluation_runner.py,sha256=i5ozPDInik6wALGu9gVUTfQFjjYLWU3LepjqYT6yubQ,4773
+wxo_agentic_evaluation/test_prompt.py,sha256=Mf0FgpwB_s17dIr39s74ANKdH3WITxHRlkKgm_RDzAY,3924
+wxo_agentic_evaluation/tool_planner.py,sha256=RohospVdfYURyFVETgjm1EukmgpNBvBJopUs6obdhn0,14111
+wxo_agentic_evaluation/type.py,sha256=eN8qxl0sNGkM3GyY8VNGrPknlRKbXSiEvc3B8yMWL0o,8551
+wxo_agentic_evaluation/wxo_client.py,sha256=V4zdmGLtZb4pP5rq82ZQnyu3Slkm2EXhD1O2mGd57BI,2491
+wxo_agentic_evaluation/analytics/tools/analyzer.py,sha256=7NFPx2AGFZ0PR7hNejbIJw-YOLOwcJ3cdt8ifbyOLFw,18374
+wxo_agentic_evaluation/analytics/tools/main.py,sha256=tkDirlsRvLWQCSXcX6BlQFg24HY31K400M1a-O5xKfU,6250
+wxo_agentic_evaluation/analytics/tools/types.py,sha256=FxEHvL2i_4qMIhZBGbhMNd6Ics3nLbl5_vyLYJF5Qp0,4568
+wxo_agentic_evaluation/analytics/tools/ux.py,sha256=VwDc_d74HoI2sYMURciRzJzrUBiPzmCFx57C4JhGqGM,18974
+wxo_agentic_evaluation/compare_runs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/compare_runs/compare_2_runs.py,sha256=xGojp7aPnmrVSqaZrvY3vpQIrJPkhGIjYdcmwmlLORc,2409
+wxo_agentic_evaluation/compare_runs/diff.py,sha256=vhHPAfspqBeCoXrUdoMGti_b3KDJx0lp0rnFJG0uYag,20726
+wxo_agentic_evaluation/compare_runs/model.py,sha256=Gt65p2ZDaZeSjIXriAYuEoC7v3Xm0prOvSE9P6ps1Ko,7096
+wxo_agentic_evaluation/evaluation_controller/evaluation_controller.py,sha256=5t6DV3CBT5UxLA9fW4mDiWhJNkjZhlQ9TxEgc6Q6vOM,10696
+wxo_agentic_evaluation/external_agent/__init__.py,sha256=JAOAAcBxEzQN7oa23iXeKxXCs6nSCgl7ZwnGk2rHn9s,1554
+wxo_agentic_evaluation/external_agent/external_validate.py,sha256=xH387nMXiM3IatP5eFAjbvWQGpZJB6-vuqd9szsNFe4,4208
+wxo_agentic_evaluation/external_agent/performance_test.py,sha256=mLiUsgZlpj6sKZl2lOjb04ts6UOTf7PoOmvLMZrTN1M,2494
+wxo_agentic_evaluation/external_agent/types.py,sha256=2349ROo1nqEAlyxSCzruB2lF94Rw-Q_cRK24uuyZK78,1464
+wxo_agentic_evaluation/extractors/__init__.py,sha256=FpmHi8qZIoWwbWSfZ7uMtB2IWRkTmn75i5Q5LqFLRqs,95
+wxo_agentic_evaluation/extractors/extractor_base.py,sha256=MtdssGiaB9so0oMj-UYHE5SfX4gYJPKEyNF16HLz078,469
+wxo_agentic_evaluation/extractors/labeled_messages.py,sha256=OMebHY2MojHHQ6ubUhsM6lj1Pzj_5PfJQV_Jwsz3hSo,1493
+wxo_agentic_evaluation/metrics/__init__.py,sha256=d17QXtfXe7Tl7cQRhgPKS2zQsBSGYNHCDSH2IJS4LC8,380
+wxo_agentic_evaluation/metrics/dummy_metric.py,sha256=2p4tCXYBobEtnCeKV2i5lyjHB9XrSz4jXj113WD5Bzk,577
+wxo_agentic_evaluation/metrics/evaluations.py,sha256=l4bVEO5-tj6zN_G-aJuy7TEVfYjKL9Jj7Q3TotsWLXM,3512
+wxo_agentic_evaluation/metrics/journey_success.py,sha256=WnzK0t8MxRQMdNKOGFBj9EoYd6g5PRToPCQSZwDaFJI,4860
+wxo_agentic_evaluation/metrics/llm_as_judge.py,sha256=PBUDc_a27maEZm8PWPp5dJrFbyNccl7JxBDOs5TGSUY,1783
+wxo_agentic_evaluation/metrics/metrics.py,sha256=Yuw99kBOJ8ZzdI0bq0vZS0A4QfTCwqWeGGUWBWcNTtc,14807
+wxo_agentic_evaluation/metrics/tool_calling.py,sha256=7flWkbovl5YsN4mxSmedzw02fTzUv13ZU9qVDMsAN8w,3102
+wxo_agentic_evaluation/otel_parser/__init__.py,sha256=jKR6KdSwNC9tlncbhUZT2UGbhwwYYboa7F1sTHY2MnY,69
+wxo_agentic_evaluation/otel_parser/langflow_parser.py,sha256=I3xdQyz2OLouhvXqt6cWf34o6CnJL73oRFLpTEnO5S4,4310
+wxo_agentic_evaluation/otel_parser/langgraph_parser.py,sha256=qqwQNpj4EKfXfe065EWdXD8YBP-QkU6za7A0lY946u0,2931
+wxo_agentic_evaluation/otel_parser/parser.py,sha256=1bhms3f9gkK00yDFhBUnHqyjLOpR4rXVkdzxtN5L69A,5608
+wxo_agentic_evaluation/otel_parser/parser_types.py,sha256=ZCRoP9Unqrg4B2c8XjnbqQrBWePMhofiSmeufUX3yqQ,899
+wxo_agentic_evaluation/otel_parser/pydantic_parser.py,sha256=NifamupHEl5r4-D0v5AbiORmsD9Dec6CqUsSe59w9Js,2466
+wxo_agentic_evaluation/otel_parser/utils.py,sha256=7dqzZ2dduBook7a1--zBsC6ErpSreVepLOckOnMUZUE,334
+wxo_agentic_evaluation/otel_parser/wxo_parser.py,sha256=i5DuJC3LOL38oQxdQMG8jdje4RbV_bMSH8yWxpMIRcc,2101
+wxo_agentic_evaluation/otel_support/evaluate_tau.py,sha256=tzEmKOWsaUl3FoAH9ijek_Yy7vu0udcVYe7rzkO3fBk,2430
+wxo_agentic_evaluation/otel_support/otel_message_conversion.py,sha256=AYjA1huty4I5TvFW-ZJiZg-B2ttURvDFH0kGCKOXlpg,749
+wxo_agentic_evaluation/otel_support/tasks_test.py,sha256=fZzufxZAMlUnafq35PYsr2MEvpZWjTJ-_ZaxIAhRXxg,73280
+wxo_agentic_evaluation/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/prompt/answer_relevancy_prompt.jinja2,sha256=vLrMWce-5HlvniCQdtifnl-YdbJfT8-oixzfwulZs98,3839
+wxo_agentic_evaluation/prompt/args_extractor_prompt.jinja2,sha256=0qBicXFcc6AA3mQNLPVRmFsnuYaCABJXgZkIH9fO0Js,952
+wxo_agentic_evaluation/prompt/bad_tool_descriptions_prompt.jinja2,sha256=_Ty6QDcQcbde2ZP2HVvFtOCm_2mFu_1cUM6qj11MvcU,8085
+wxo_agentic_evaluation/prompt/batch_testcase_prompt.jinja2,sha256=QXuk2ecnEPPRCPoWZJyrtb1gAVuIPljB91YoqPBp2Dk,1896
+wxo_agentic_evaluation/prompt/derailment_prompt.jinja2,sha256=Q77FVf0-TixFz0_i2-YEh6UwrP0DRNz-cP9NDcDlqpY,1802
+wxo_agentic_evaluation/prompt/faithfulness_prompt.jinja2,sha256=DW9OdjeZJbOWrngRqTAVD4w0va_HtA2FR4G1POIIamM,2524
+wxo_agentic_evaluation/prompt/keyword_matching_prompt.jinja2,sha256=7mTkSrppjgPluUAIMTWaT30K7M4J4hyR_LjSjW1Ofq0,1290
+wxo_agentic_evaluation/prompt/keywords_generation_prompt.jinja2,sha256=PiCjr1ag44Jk5xD3F24fLD_bOGYh2sF0i5miY4OrVlc,1890
+wxo_agentic_evaluation/prompt/llama_user_prompt.jinja2,sha256=o1OfN9ltWUzSyisZBJNdYC3PCI5pImPLgjQD1iOf8UQ,4651
+wxo_agentic_evaluation/prompt/llmaaj_prompt.jinja2,sha256=0IwU1mICkqNVXni18GRnA1gEcPN9nVDp_zac3zI8WZ8,290
+wxo_agentic_evaluation/prompt/off_policy_attack_generation_prompt.jinja2,sha256=4ZzRfyXbyGbos_XpN_YhvbMFSzlyWxlPtG3qYfqBYbM,1289
+wxo_agentic_evaluation/prompt/on_policy_attack_generation_prompt.jinja2,sha256=90KF7fXW5PPwY8IkPqA6ZflDMkr_KFDpO9H_mVGdGf8,2212
+wxo_agentic_evaluation/prompt/semantic_matching_prompt.jinja2,sha256=fhLEoSiIa6meHcNfmr8UgmtKGU8zTdjth9nkE41bUDs,3642
+wxo_agentic_evaluation/prompt/starting_sentence_generation_prompt.jinja2,sha256=m_l6f7acfnWJmGQ0mXAy85oLGLgzhVhoz7UL1FVYq8A,4908
+wxo_agentic_evaluation/prompt/story_generation_prompt.jinja2,sha256=_DxjkFoHpNTmdVSUzUrUdwn4Cng7nAGqkMnm0ScOH1w,4191
+wxo_agentic_evaluation/prompt/template_render.py,sha256=eMvu6kH5M5du71HyKzHORZzaBTdOPdzQfbi2TA2PsmM,8027
+wxo_agentic_evaluation/prompt/tool_chain_agent.jinja2,sha256=9RcIjLYoOvtFsf-RgyMfMcj2Fe8fq1wGkE4nG1zamYY,297
+wxo_agentic_evaluation/prompt/tool_planner.jinja2,sha256=Ln43kwfSX50B1VBsT-MY1TCE0o8pGFh8aQJAzZfGkpI,3239
+wxo_agentic_evaluation/prompt/unsafe_topic_prompt.jinja2,sha256=swxhd_9mxoRSNtvumup40bKdKDb8O_YMv6unytGJxdc,2447
+wxo_agentic_evaluation/prompt/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/prompt/examples/data_simple.json,sha256=XXF-Pn-mosklC9Ch7coyaJxosFNnl3OkHSW3YPuiKMM,2333
+wxo_agentic_evaluation/red_teaming/attack_evaluator.py,sha256=8Y7q9qaCu3KYX1iRmvSj78EmbCay8F7GF2IQtc_NTrY,10653
+wxo_agentic_evaluation/red_teaming/attack_generator.py,sha256=i1gmszadOKC7dP4F8c_0PRIBdmdmpmdRHdt6xQ14j9I,13111
+wxo_agentic_evaluation/red_teaming/attack_list.py,sha256=_Yhl51u1lQEHZc7JvtQqQlrdjaSdR_sP7D82khFafvE,14749
+wxo_agentic_evaluation/red_teaming/attack_runner.py,sha256=GsAFzR95kw2I7kKZ8_rU6lL2tjhvfSqr10CNO6SuqCA,6470
+wxo_agentic_evaluation/referenceless_eval/__init__.py,sha256=lijXMgQ8nQe-9eIfade2jLfHMlXfYafMZIwXtC9KDZo,106
+wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py,sha256=59clSfZKIkt213ndPtYNUvI66L3D73GsNFpXt21rrP8,6432
+wxo_agentic_evaluation/referenceless_eval/function_calling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/referenceless_eval/function_calling/consts.py,sha256=UidTaT9g5IxbcakfQqP_9c5civ1wDqY-PpPUf0uOXJo,915
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/base.py,sha256=IEyo5H_TTrzMLPD9y2eFDCSTB80G5QetZRiUhRlCx-A,852
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/loader.py,sha256=3JDWWjYuYfGwa2uYLXaxGETMuppGld5c901h_-YkFO4,7645
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general.py,sha256=P1ytcARCy696ZCLq9tcaQWgaolmu0ON_kySmmTeyBtc,1549
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics.json,sha256=Pw9pynj47K1sxNlFN9SPKiNb8QTDVoqwL8R81ZJ_-Q4,54759
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics_runtime.json,sha256=bVSyudTk2Nim1DcgQZf8ilOTszY2Kgm4YU6beHWvEhQ,40475
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection.py,sha256=UwPMCn7T2BEut7Mbj6U5UJvb2AAZw03BiB9wEjSCheg,1017
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics.json,sha256=kEZj2qDAGJfpB7NCuEYXdxbVBSpibitIlBseJXI-fn0,44534
+wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics_runtime.json,sha256=o4oRur1MiXO2RYzmzj07QOBzX75DyU7T7yd-gFsgFdo,30563
+wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/adapters.py,sha256=QP43RjUfozozXBtYEzPHv7EC3pdwIWLdNRsJ8xzvcjU,3701
+wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py,sha256=f4GmTXNTBeH171GGRWaDCIRuFPRyuVMy62evWV8TEl8,9713
+wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/semantic_checker.py,sha256=Fm0unqhpFBxeofTQjQaLl_SZFSFke7K7S56t46812-E,17589
+wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/static_checker.py,sha256=0m4iHqb68psvLMNQasFaaxgQP5XmmGjBkuID8aw5Kv8,6069
+wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/transformation_prompts.py,sha256=tW1wc87WIm8BZh2lhdj1RDP6VdRLqZBWSMmemSttbGs,22034
+wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py,sha256=HzypLLJJFg-zchMNUXWnBG_8CeOmK7t47-Oa2SotcpE,17096
+wxo_agentic_evaluation/referenceless_eval/metrics/__init__.py,sha256=uJc7ZwK6pJpsMIuBSBXUtxdvd-aiRnOXGX3aeyvw2ik,151
+wxo_agentic_evaluation/referenceless_eval/metrics/field.py,sha256=ki6ZqLfg9f6il7Pk7FxqwZLeZDuZFKwON_hKPNH5jkg,8446
+wxo_agentic_evaluation/referenceless_eval/metrics/metric.py,sha256=bDRYG-HObwFvi4-CS7am4F_9WPXqh6T4UzNIrxqynsY,12331
+wxo_agentic_evaluation/referenceless_eval/metrics/metrics_runner.py,sha256=pt-XIVTzJn5c3_lM1H6r82ag5c_uxdA5PPCyCwBV1O8,6012
+wxo_agentic_evaluation/referenceless_eval/metrics/prompt.py,sha256=Y2baaQ4IaS-oPqvweJd8cPYj2pgyJwS-2_HwvE2PP-s,15112
+wxo_agentic_evaluation/referenceless_eval/metrics/utils.py,sha256=O3axxDTD2e7lFk5m7amz5713rom9hHKDvwWlrspSK3k,1466
+wxo_agentic_evaluation/referenceless_eval/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/referenceless_eval/prompt/runner.py,sha256=CLJgDoUp80ug0lDpfYJFEiLnmXej_6R-YloJjdX6I1Y,5111
+wxo_agentic_evaluation/runtime_adapter/a2a_runtime_adapter.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+wxo_agentic_evaluation/runtime_adapter/runtime_adapter.py,sha256=djOapIOI7uZtKsSuPh6hY16yBT9kcUcIfPiFYZp7IYk,298
+wxo_agentic_evaluation/runtime_adapter/wxo_runtime_adapter.py,sha256=wSCDN6d9e-TqdY-iG-EMyFtze4uDE2H6gnaLx2EXaHg,23254
+wxo_agentic_evaluation/service_provider/__init__.py,sha256=dw-fIw3Xyic-MjaOeW_cY3PMBdx22_oOktkDuN7en2A,6115
+wxo_agentic_evaluation/service_provider/gateway_provider.py,sha256=n0Rc4mWqIppL8KWk1CKFvUIsETHvYhUbtSLUrwPj-Ao,24545
+wxo_agentic_evaluation/service_provider/model_proxy_provider.py,sha256=KlSzuK4nO_EG0LaMGw6Hvj2oQeQ7ZgezPCkaRZNcl9Y,23389
+wxo_agentic_evaluation/service_provider/ollama_provider.py,sha256=irIjNryfGAKTW3cfJP1sY7P6EnIHIy8mHQuTdCAHp0s,14053
+wxo_agentic_evaluation/service_provider/portkey_provider.py,sha256=zxIeshvSSFXArce69Z1Z2C51iEUCQvajRzqYulIymfM,7931
+wxo_agentic_evaluation/service_provider/provider.py,sha256=4FGg4tXAKxuyYM3-LNIxhzJtI1b15r82C8jMLWdItII,4209
+wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py,sha256=PHbYBpmzV4Pgh1kfjVADmiUhHnjEvR1849QqjTJIbCs,6905
+wxo_agentic_evaluation/service_provider/watsonx_provider.py,sha256=CVEatGqvtIQoy_fOwxTXvMYyFPc8WE_VjaSTrPzKHgw,21193
+wxo_agentic_evaluation/user_simulator/demo_usage_llm_user.py,sha256=2Vq8nBPM89Ya2voJCeZyZjgM3vQacAXATF5oFUO_x6g,3507
+wxo_agentic_evaluation/utils/__init__.py,sha256=LjN7tf9VrHsUeVXV5GA2ASyakgo0CRdyJhu6eG71bj4,1225
+wxo_agentic_evaluation/utils/evaluation_discovery.py,sha256=palyGppHqMeFmV3fxDErWNtzNq2Bp7xIz_QHcuBg3uA,1660
+wxo_agentic_evaluation/utils/gateway_provider_utils.py,sha256=Yzs6K-h_f9NL1AwGzPKkvs0sMqFGDYJW-83fnuCQYpM,1099
+wxo_agentic_evaluation/utils/messages_parser.py,sha256=aNnoss7S5JzPh6WCXUxio66jUUze_SZ6Ta8hJn9m8e8,928
+wxo_agentic_evaluation/utils/open_ai_tool_extractor.py,sha256=kJUuprXfY5IfCRIvLT4oSvMf-Ly7RYNo4if-Lb6yGiA,6080
+wxo_agentic_evaluation/utils/parsers.py,sha256=FPKPVb0LhEKc8ozxanBhPgWRFp1S_bpyDFCJvBk3tCo,2143
+wxo_agentic_evaluation/utils/rich_utils.py,sha256=pJ43hwvSZRJwckPOeUhqGdw4_RwxLsYO02dDt6PLqxA,6351
+wxo_agentic_evaluation/utils/rouge_score.py,sha256=WvcGh6mwF4rWH599J9_lAt3BfaHbAZKtKEJBsC61iKo,692
+wxo_agentic_evaluation/utils/utils.py,sha256=BSITEsAxqO4j3vlrTXFPiVzg4XV8PU45dhnQ94xICEY,20823
+ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/METADATA,sha256=BgUk212arYQDXJhzT1Ln4wZOYaSkBHDqMpgmSbM7Jq4,2228
+ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/top_level.txt,sha256=2okpqtpxyqHoLyb2msio4pzqSg7yPSzwI7ekks96wYE,23
+ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/RECORD,,

wxo_agentic_evaluation/analytics/tools/analyzer.py CHANGED Viewed

@@ -18,7 +18,7 @@ from wxo_agentic_evaluation.analytics.tools.types import (
     ToolFailure,
 )
 from wxo_agentic_evaluation.data_annotator import ERROR_KEYWORDS
-from wxo_agentic_evaluation.type import ContentType, EvaluationData, Message
+from wxo_agentic_evaluation.type import ContentType, Message, OrchestrateDataset
 class ToolErrorAnalyzer:
@@ -54,7 +54,9 @@ class ToolErrorAnalyzer:
         return error_terms
     def __init__(
-        self, messages: List[Message], ground_truth: Optional[EvaluationData]
+        self,
+        messages: List[Message],
+        ground_truth: Optional[OrchestrateDataset],
     ):
         self.messages = messages
         self.ground_truth = ground_truth

ibm-watsonx-orchestrate-evaluation-framework 1.1.3__py3-none-any.whl → 1.1.8b0__py3-none-any.whl

ibm-watsonx-orchestrate-evaluation-framework 1.1.3py3-none-any.whl → 1.1.8b0py3-none-any.whl