ibm-watsonx-orchestrate-evaluation-framework 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ibm-watsonx-orchestrate-evaluation-framework might be problematic. Click here for more details.

Files changed (49) hide show
  1. {ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/METADATA +4 -1
  2. {ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/RECORD +49 -39
  3. wxo_agentic_evaluation/analyze_run.py +822 -344
  4. wxo_agentic_evaluation/arg_configs.py +39 -2
  5. wxo_agentic_evaluation/data_annotator.py +22 -4
  6. wxo_agentic_evaluation/description_quality_checker.py +29 -4
  7. wxo_agentic_evaluation/evaluation_package.py +197 -18
  8. wxo_agentic_evaluation/external_agent/external_validate.py +3 -1
  9. wxo_agentic_evaluation/external_agent/types.py +1 -1
  10. wxo_agentic_evaluation/inference_backend.py +105 -108
  11. wxo_agentic_evaluation/llm_matching.py +104 -2
  12. wxo_agentic_evaluation/llm_user.py +2 -2
  13. wxo_agentic_evaluation/main.py +147 -38
  14. wxo_agentic_evaluation/metrics/__init__.py +5 -0
  15. wxo_agentic_evaluation/metrics/evaluations.py +124 -0
  16. wxo_agentic_evaluation/metrics/llm_as_judge.py +4 -3
  17. wxo_agentic_evaluation/metrics/metrics.py +64 -1
  18. wxo_agentic_evaluation/prompt/llmaaj_prompt.jinja2 +15 -0
  19. wxo_agentic_evaluation/prompt/semantic_matching_prompt.jinja2 +41 -9
  20. wxo_agentic_evaluation/prompt/template_render.py +20 -2
  21. wxo_agentic_evaluation/quick_eval.py +23 -11
  22. wxo_agentic_evaluation/record_chat.py +18 -10
  23. wxo_agentic_evaluation/red_teaming/attack_evaluator.py +169 -100
  24. wxo_agentic_evaluation/red_teaming/attack_generator.py +63 -40
  25. wxo_agentic_evaluation/red_teaming/attack_list.py +78 -8
  26. wxo_agentic_evaluation/red_teaming/attack_runner.py +71 -14
  27. wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics.json +783 -0
  28. wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics.json +600 -0
  29. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py +10 -10
  30. wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py +103 -39
  31. wxo_agentic_evaluation/resource_map.py +3 -1
  32. wxo_agentic_evaluation/service_instance.py +12 -3
  33. wxo_agentic_evaluation/service_provider/__init__.py +129 -9
  34. wxo_agentic_evaluation/service_provider/gateway_provider.py +707 -0
  35. wxo_agentic_evaluation/service_provider/model_proxy_provider.py +415 -17
  36. wxo_agentic_evaluation/service_provider/ollama_provider.py +393 -22
  37. wxo_agentic_evaluation/service_provider/provider.py +130 -10
  38. wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py +52 -0
  39. wxo_agentic_evaluation/service_provider/watsonx_provider.py +480 -52
  40. wxo_agentic_evaluation/type.py +15 -5
  41. wxo_agentic_evaluation/utils/__init__.py +44 -3
  42. wxo_agentic_evaluation/utils/evaluation_discovery.py +47 -0
  43. wxo_agentic_evaluation/utils/gateway_provider_utils.py +39 -0
  44. wxo_agentic_evaluation/utils/messages_parser.py +30 -0
  45. wxo_agentic_evaluation/utils/parsers.py +71 -0
  46. wxo_agentic_evaluation/utils/utils.py +140 -20
  47. wxo_agentic_evaluation/wxo_client.py +81 -0
  48. {ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/WHEEL +0 -0
  49. {ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ibm-watsonx-orchestrate-evaluation-framework
3
- Version: 1.1.5
3
+ Version: 1.1.7
4
4
  Summary: The WxO evaluation framework
5
5
  Author-email: Haode Qi <Haode.Qi@ibm.com>
6
6
  License: MIT
@@ -14,6 +14,8 @@ Requires-Dist: dataclasses-json~=0.6.7
14
14
  Requires-Dist: jsonargparse~=4.37.0
15
15
  Requires-Dist: jsonschema~=4.23.0
16
16
  Requires-Dist: requests~=2.32.5
17
+ Requires-Dist: fuzzywuzzy~=0.18.0
18
+ Requires-Dist: python-dateutil~=2.9.0
17
19
  Provides-Extra: dev
18
20
  Requires-Dist: setuptools~=70.3.0; extra == "dev"
19
21
  Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
@@ -24,6 +26,7 @@ Requires-Dist: coverage[toml]>=6.5; extra == "dev"
24
26
  Requires-Dist: black~=24.8.0; extra == "dev"
25
27
  Requires-Dist: pylint~=3.3.8; extra == "dev"
26
28
  Requires-Dist: isort~=5.13.2; extra == "dev"
29
+ Requires-Dist: coverage; extra == "dev"
27
30
  Provides-Extra: rag-eval
28
31
  Requires-Dist: tqdm~=4.67.1; extra == "rag-eval"
29
32
  Requires-Dist: sentence-transformers~=3.3.1; extra == "rag-eval"
@@ -1,37 +1,39 @@
1
1
  wxo_agentic_evaluation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- wxo_agentic_evaluation/analyze_run.py,sha256=T1fgy1TY-KnIMB2k6cMNcr0_1eGpX3IYAc8BG_KwkeA,26615
2
+ wxo_agentic_evaluation/analyze_run.py,sha256=t5qCXxopI-LfvZSzniTTRpi6dIFw8cW0_Brqg1O_Wpc,45565
3
3
  wxo_agentic_evaluation/annotate.py,sha256=PwgRBAIVBW_yEoOLYNHg9-XVo78zzTXb68kzR2JbtCM,1230
4
- wxo_agentic_evaluation/arg_configs.py,sha256=417vaTlkPV1VQ57yLY-EnPSdxXizjl6ETZlF76sy190,3048
4
+ wxo_agentic_evaluation/arg_configs.py,sha256=y42KiYWz09JyvDUSPfSxVJ9-cy38kBo_8cfcloJ94s8,4107
5
5
  wxo_agentic_evaluation/batch_annotate.py,sha256=cZWhDt4k1Tap-e7Mw48tOXKwlHk-JxUvDT6N6n_A7PA,6694
6
- wxo_agentic_evaluation/data_annotator.py,sha256=ovrymEn2Jlivffg9_mtW7sT73_iOBLU5mX9n5hQQWPo,8398
7
- wxo_agentic_evaluation/description_quality_checker.py,sha256=Skmt_X-z5rJ9-rBXu5acp0sxq_LyjL0sOOYQVcn25K4,6163
6
+ wxo_agentic_evaluation/data_annotator.py,sha256=KYVyepXGfR4QzlEhgFBA--MieVGSb_lDE2BBn0dcvh8,8885
7
+ wxo_agentic_evaluation/description_quality_checker.py,sha256=Kfr16Ol_4Ck54uyn9Mn-kBWwzs6LuDUKmx1rzr9rVns,6809
8
8
  wxo_agentic_evaluation/evaluation.py,sha256=ZeMmxSbJyA86CVjX8EUdMsVrn25MMqMYO91DZqbe7f0,1090
9
- wxo_agentic_evaluation/evaluation_package.py,sha256=xEtx26FY7w1IqJkoWOS0VngjYyArKESlLhzwLH-vci8,28575
10
- wxo_agentic_evaluation/inference_backend.py,sha256=P8IzkmFAQoiTPfbuHPUPdE-npK8yNBzsAUJDtc9i9uE,33270
11
- wxo_agentic_evaluation/llm_matching.py,sha256=HY_4T_4-JXr08Z8o0XWcZfyrzxM0hBpCYGbwh7uSOkw,1479
9
+ wxo_agentic_evaluation/evaluation_package.py,sha256=oVfGemtGL-LRElSDkmPVHmkOHNgkuBTh0JgaLCm73w8,35989
10
+ wxo_agentic_evaluation/inference_backend.py,sha256=0DQ3JUR4JwE3xTjVBTxHguju1bZgHzX-k8aD9QpASPc,33333
11
+ wxo_agentic_evaluation/llm_matching.py,sha256=Oa3NezPcif6At3OHAlzwsdC3JOebXPHiWaufgyrpA4g,5189
12
12
  wxo_agentic_evaluation/llm_rag_eval.py,sha256=cMUutCpmR1Zg5MM7KbHjHkF42FRBBACFNc-MzwxAw9M,1655
13
13
  wxo_agentic_evaluation/llm_safety_eval.py,sha256=pNuq4xLxkImyksGmsQire_nIQWOEoGqCc-Z3ZCSrcTQ,2268
14
- wxo_agentic_evaluation/llm_user.py,sha256=-DezpQKYoWuN5kQBAx5zwE3Qd_OaxfizZQU7MeqScoM,1519
15
- wxo_agentic_evaluation/main.py,sha256=5WDJN-cpK0Dt0niVKg7b_f9CNTDC54g1psN20MYyGEw,18100
14
+ wxo_agentic_evaluation/llm_user.py,sha256=-jtUT99jJnIJl9oLKmoMJBWal0QkBZhzwGRa2pDwo9A,1519
15
+ wxo_agentic_evaluation/main.py,sha256=5VeD1qs8c3tKvptQrlgM9y4v9CIV7UvMNtzsj2Ro360,22101
16
16
  wxo_agentic_evaluation/main_v2.py,sha256=96pujdcfZJyDo1naGlLAk5mFrrSY0hIxrlH4qTdSCSs,14896
17
- wxo_agentic_evaluation/quick_eval.py,sha256=7JWBB4Q5psZi2O26wZkQgPudu3uNZZBFrZSYou2ivgw,12876
18
- wxo_agentic_evaluation/record_chat.py,sha256=YQD35ZCPMz5N_u5C1wBEmvp1vptI_voUqz3RYZp8hEY,8488
19
- wxo_agentic_evaluation/resource_map.py,sha256=fmkLcQEx4_tpc46rkSoEOsvmd5WMkxaJpIfgqaR31Ms,1646
20
- wxo_agentic_evaluation/service_instance.py,sha256=Mgr4UjnwYts91J_iLyygubsZw3aLenPnIfKcqz8OrRU,8515
17
+ wxo_agentic_evaluation/quick_eval.py,sha256=BoReOyhV-7HSde73_QczFAWBTZ_zepPnRmzvB8dVY3g,13455
18
+ wxo_agentic_evaluation/record_chat.py,sha256=DrNf28kh550EtXSxCzTi9hQoS4Ab_vPFnvQPY29M1Xk,8936
19
+ wxo_agentic_evaluation/resource_map.py,sha256=hFk3OqOwbFolhwFPbdW-7hoB1WnU-_orX7UuXR_IIks,1726
20
+ wxo_agentic_evaluation/service_instance.py,sha256=LrXIX5e0PZkOGwUzMpbUz2VHTO3TtQaUEsMbQUECUi4,8878
21
21
  wxo_agentic_evaluation/test_prompt.py,sha256=ksteXCs9iDQPMETc4Hb7JAXHhxz2r678U6-sgZJAO28,3924
22
22
  wxo_agentic_evaluation/tool_planner.py,sha256=RohospVdfYURyFVETgjm1EukmgpNBvBJopUs6obdhn0,14111
23
- wxo_agentic_evaluation/type.py,sha256=wAqE7sHEOuAD6s-GxLzdPdMyyjNqh-jOuV-KJR5zH5U,4047
23
+ wxo_agentic_evaluation/type.py,sha256=H75yT8eV45Ri6VXn37gonqe3CGnILX84V-pwk4Obu2E,4345
24
+ wxo_agentic_evaluation/wxo_client.py,sha256=V4zdmGLtZb4pP5rq82ZQnyu3Slkm2EXhD1O2mGd57BI,2491
24
25
  wxo_agentic_evaluation/analytics/tools/analyzer.py,sha256=mI2fyYzbLpSjSr2iwSwpjrOAenxvfA-6h9z2oky0uMs,18349
25
26
  wxo_agentic_evaluation/analytics/tools/main.py,sha256=tkDirlsRvLWQCSXcX6BlQFg24HY31K400M1a-O5xKfU,6250
26
27
  wxo_agentic_evaluation/analytics/tools/types.py,sha256=FxEHvL2i_4qMIhZBGbhMNd6Ics3nLbl5_vyLYJF5Qp0,4568
27
28
  wxo_agentic_evaluation/analytics/tools/ux.py,sha256=VwDc_d74HoI2sYMURciRzJzrUBiPzmCFx57C4JhGqGM,18974
28
29
  wxo_agentic_evaluation/external_agent/__init__.py,sha256=P1T0JYPIZeVyEYRqpEMKqGORQ1h_fVRvm9_lra9U0Q4,1570
29
- wxo_agentic_evaluation/external_agent/external_validate.py,sha256=eBN13OACh2Xk5-ph__bhaRK4rYUubyl3Mr_t4iYdICY,4184
30
+ wxo_agentic_evaluation/external_agent/external_validate.py,sha256=xH387nMXiM3IatP5eFAjbvWQGpZJB6-vuqd9szsNFe4,4208
30
31
  wxo_agentic_evaluation/external_agent/performance_test.py,sha256=mLiUsgZlpj6sKZl2lOjb04ts6UOTf7PoOmvLMZrTN1M,2494
31
- wxo_agentic_evaluation/external_agent/types.py,sha256=56DRfrd_hCKnk3lk3lSJI4_Ga6ZNSezOK3EutowpCe4,1464
32
- wxo_agentic_evaluation/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- wxo_agentic_evaluation/metrics/llm_as_judge.py,sha256=-JtRcCSYIafMRAL1W7mz0oLRySD1Thje8ankbFmCoMQ,1755
34
- wxo_agentic_evaluation/metrics/metrics.py,sha256=X2Bapjc7aGwU8--evEYOr2x-CNGsMMBTmMP1dXnURUo,6336
32
+ wxo_agentic_evaluation/external_agent/types.py,sha256=2349ROo1nqEAlyxSCzruB2lF94Rw-Q_cRK24uuyZK78,1464
33
+ wxo_agentic_evaluation/metrics/__init__.py,sha256=Vn3fiy8_UkOYvfXqSWUOQnTF7wMv6xy2OMrh0DiX764,127
34
+ wxo_agentic_evaluation/metrics/evaluations.py,sha256=o-Y5kvDZikR-OU3f3fU7lja9gYxSJ9SJk-wGtqX_hF0,3861
35
+ wxo_agentic_evaluation/metrics/llm_as_judge.py,sha256=PBUDc_a27maEZm8PWPp5dJrFbyNccl7JxBDOs5TGSUY,1783
36
+ wxo_agentic_evaluation/metrics/metrics.py,sha256=cx82hUjE8uE83tFxs3tJ5SMFI_9HCaMbRdy2JbwcZyk,8090
35
37
  wxo_agentic_evaluation/otel_support/evaluate_tau.py,sha256=RGfaM0jx5WmF4y1EnVsE2FWpfDQEoL3_sIMNcMUbZuQ,2023
36
38
  wxo_agentic_evaluation/otel_support/otel_message_conversion.py,sha256=6fU2nXtMZUiQuhp2w2ByYigeo7wlOmUSo1CEHcb1iqE,649
37
39
  wxo_agentic_evaluation/otel_support/tasks_test.py,sha256=Z7NglyL-uI4vzb1Lum9aEdPZ7x_J2g1A-MKEABRX3nU,67543
@@ -45,23 +47,24 @@ wxo_agentic_evaluation/prompt/faithfulness_prompt.jinja2,sha256=DW9OdjeZJbOWrngR
45
47
  wxo_agentic_evaluation/prompt/keyword_matching_prompt.jinja2,sha256=7mTkSrppjgPluUAIMTWaT30K7M4J4hyR_LjSjW1Ofq0,1290
46
48
  wxo_agentic_evaluation/prompt/keywords_generation_prompt.jinja2,sha256=PiCjr1ag44Jk5xD3F24fLD_bOGYh2sF0i5miY4OrVlc,1890
47
49
  wxo_agentic_evaluation/prompt/llama_user_prompt.jinja2,sha256=o1OfN9ltWUzSyisZBJNdYC3PCI5pImPLgjQD1iOf8UQ,4651
50
+ wxo_agentic_evaluation/prompt/llmaaj_prompt.jinja2,sha256=0IwU1mICkqNVXni18GRnA1gEcPN9nVDp_zac3zI8WZ8,290
48
51
  wxo_agentic_evaluation/prompt/off_policy_attack_generation_prompt.jinja2,sha256=4ZzRfyXbyGbos_XpN_YhvbMFSzlyWxlPtG3qYfqBYbM,1289
49
52
  wxo_agentic_evaluation/prompt/on_policy_attack_generation_prompt.jinja2,sha256=90KF7fXW5PPwY8IkPqA6ZflDMkr_KFDpO9H_mVGdGf8,2212
50
- wxo_agentic_evaluation/prompt/semantic_matching_prompt.jinja2,sha256=MltPfEXYyOwEC2xNLl7UsFTxNbr8CwHaEcPqtvKE2r8,2749
53
+ wxo_agentic_evaluation/prompt/semantic_matching_prompt.jinja2,sha256=fhLEoSiIa6meHcNfmr8UgmtKGU8zTdjth9nkE41bUDs,3642
51
54
  wxo_agentic_evaluation/prompt/starting_sentence_generation_prompt.jinja2,sha256=m_l6f7acfnWJmGQ0mXAy85oLGLgzhVhoz7UL1FVYq8A,4908
52
55
  wxo_agentic_evaluation/prompt/story_generation_prompt.jinja2,sha256=_DxjkFoHpNTmdVSUzUrUdwn4Cng7nAGqkMnm0ScOH1w,4191
53
- wxo_agentic_evaluation/prompt/template_render.py,sha256=gQy3j3RkoPJjlCip2-sq2vz0b7_adCpqnZ-uKGDU-gQ,5282
56
+ wxo_agentic_evaluation/prompt/template_render.py,sha256=U7J-u1Mrb847IXyJAbHe7CFh9WezsBzH750AFxuovQg,5742
54
57
  wxo_agentic_evaluation/prompt/tool_chain_agent.jinja2,sha256=9RcIjLYoOvtFsf-RgyMfMcj2Fe8fq1wGkE4nG1zamYY,297
55
58
  wxo_agentic_evaluation/prompt/tool_planner.jinja2,sha256=Ln43kwfSX50B1VBsT-MY1TCE0o8pGFh8aQJAzZfGkpI,3239
56
59
  wxo_agentic_evaluation/prompt/unsafe_topic_prompt.jinja2,sha256=swxhd_9mxoRSNtvumup40bKdKDb8O_YMv6unytGJxdc,2447
57
60
  wxo_agentic_evaluation/prompt/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
61
  wxo_agentic_evaluation/prompt/examples/data_simple.json,sha256=XXF-Pn-mosklC9Ch7coyaJxosFNnl3OkHSW3YPuiKMM,2333
59
- wxo_agentic_evaluation/red_teaming/attack_evaluator.py,sha256=bG3VBqmWoGySHfnkfv-2PPnwaktFQ9jeC3kCJzSMQ4k,8315
60
- wxo_agentic_evaluation/red_teaming/attack_generator.py,sha256=Sz9zB5O1ct7EoZCog8GNdwj8yWFZo7HJLPbA9HvelZc,11886
61
- wxo_agentic_evaluation/red_teaming/attack_list.py,sha256=yRnxwebrr7cl_3Rr3e3Xgdt0luCEYICvg00kutj47fo,9478
62
- wxo_agentic_evaluation/red_teaming/attack_runner.py,sha256=XXNP43mEneuDBo_zGPdCVNRdUNy-KGd7kbIKYwKhKJQ,4477
62
+ wxo_agentic_evaluation/red_teaming/attack_evaluator.py,sha256=8Y7q9qaCu3KYX1iRmvSj78EmbCay8F7GF2IQtc_NTrY,10653
63
+ wxo_agentic_evaluation/red_teaming/attack_generator.py,sha256=i1gmszadOKC7dP4F8c_0PRIBdmdmpmdRHdt6xQ14j9I,13111
64
+ wxo_agentic_evaluation/red_teaming/attack_list.py,sha256=_Yhl51u1lQEHZc7JvtQqQlrdjaSdR_sP7D82khFafvE,14749
65
+ wxo_agentic_evaluation/red_teaming/attack_runner.py,sha256=W9v-uMcfBJ7vTyQsuKI0gL7Q0s5-dLosUIvTxw9Zk9A,6361
63
66
  wxo_agentic_evaluation/referenceless_eval/__init__.py,sha256=lijXMgQ8nQe-9eIfade2jLfHMlXfYafMZIwXtC9KDZo,106
64
- wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py,sha256=G2b7rwN0VTLBVGwU1VXKUl4eqT8Ya8zCcOorwkZwrZA,4354
67
+ wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py,sha256=NMy0p0quecPpP0Y28FyXCiaXaiMW30EdYm-GyrnMXn0,6409
65
68
  wxo_agentic_evaluation/referenceless_eval/function_calling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
69
  wxo_agentic_evaluation/referenceless_eval/function_calling/consts.py,sha256=UidTaT9g5IxbcakfQqP_9c5civ1wDqY-PpPUf0uOXJo,915
67
70
  wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -69,9 +72,11 @@ wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/base.py,sha25
69
72
  wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/loader.py,sha256=3JDWWjYuYfGwa2uYLXaxGETMuppGld5c901h_-YkFO4,7645
70
73
  wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
74
  wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general.py,sha256=P1ytcARCy696ZCLq9tcaQWgaolmu0ON_kySmmTeyBtc,1549
75
+ wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics.json,sha256=Pw9pynj47K1sxNlFN9SPKiNb8QTDVoqwL8R81ZJ_-Q4,54759
72
76
  wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics_runtime.json,sha256=bVSyudTk2Nim1DcgQZf8ilOTszY2Kgm4YU6beHWvEhQ,40475
73
77
  wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
78
  wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection.py,sha256=UwPMCn7T2BEut7Mbj6U5UJvb2AAZw03BiB9wEjSCheg,1017
79
+ wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics.json,sha256=kEZj2qDAGJfpB7NCuEYXdxbVBSpibitIlBseJXI-fn0,44534
75
80
  wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics_runtime.json,sha256=o4oRur1MiXO2RYzmzj07QOBzX75DyU7T7yd-gFsgFdo,30563
76
81
  wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
82
  wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/adapters.py,sha256=QP43RjUfozozXBtYEzPHv7EC3pdwIWLdNRsJ8xzvcjU,3701
@@ -79,7 +84,7 @@ wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py,
79
84
  wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/semantic_checker.py,sha256=Fm0unqhpFBxeofTQjQaLl_SZFSFke7K7S56t46812-E,17589
80
85
  wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/static_checker.py,sha256=0m4iHqb68psvLMNQasFaaxgQP5XmmGjBkuID8aw5Kv8,6069
81
86
  wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/transformation_prompts.py,sha256=tW1wc87WIm8BZh2lhdj1RDP6VdRLqZBWSMmemSttbGs,22034
82
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py,sha256=mm7eOx6a_2ExDgck29IkgAzjeQkICpMDXecuxa6ZULo,17182
87
+ wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py,sha256=HzypLLJJFg-zchMNUXWnBG_8CeOmK7t47-Oa2SotcpE,17096
83
88
  wxo_agentic_evaluation/referenceless_eval/metrics/__init__.py,sha256=uJc7ZwK6pJpsMIuBSBXUtxdvd-aiRnOXGX3aeyvw2ik,151
84
89
  wxo_agentic_evaluation/referenceless_eval/metrics/field.py,sha256=ki6ZqLfg9f6il7Pk7FxqwZLeZDuZFKwON_hKPNH5jkg,8446
85
90
  wxo_agentic_evaluation/referenceless_eval/metrics/metric.py,sha256=bDRYG-HObwFvi4-CS7am4F_9WPXqh6T4UzNIrxqynsY,12331
@@ -88,18 +93,23 @@ wxo_agentic_evaluation/referenceless_eval/metrics/prompt.py,sha256=Y2baaQ4IaS-oP
88
93
  wxo_agentic_evaluation/referenceless_eval/metrics/utils.py,sha256=O3axxDTD2e7lFk5m7amz5713rom9hHKDvwWlrspSK3k,1466
89
94
  wxo_agentic_evaluation/referenceless_eval/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
95
  wxo_agentic_evaluation/referenceless_eval/prompt/runner.py,sha256=CLJgDoUp80ug0lDpfYJFEiLnmXej_6R-YloJjdX6I1Y,5111
91
- wxo_agentic_evaluation/service_provider/__init__.py,sha256=Xu-Wdo7vZI6iNKFp4cNGo7rXv-OQ4BkgLaKeCfALCrk,2162
92
- wxo_agentic_evaluation/service_provider/model_proxy_provider.py,sha256=VN1DFF1woJcjijwj3lMA0JS-9pxJ6fXSYu91Ah7nTNE,9866
93
- wxo_agentic_evaluation/service_provider/ollama_provider.py,sha256=OCpnqd8E9WUqPGc7Q01L5HWVIZsZ5V5-XvjhcwvqRA4,1097
94
- wxo_agentic_evaluation/service_provider/provider.py,sha256=OkMjZ_xHPXy-YqkBbKXC4K67VWJrCQb1nSZxMRt-a4g,416
95
- wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py,sha256=hM085FbKEBM_LC2O-rURtGx-RMBtulbm1FAZa73k1gg,5321
96
- wxo_agentic_evaluation/service_provider/watsonx_provider.py,sha256=LYSpxOI2oMQSysasb8WT_nn5SdDy-dsLFyJDJHXFtn0,6876
97
- wxo_agentic_evaluation/utils/__init__.py,sha256=QMxk6hx1CDvCBLFh40WpPZmqFNJtDqwXP7S7cXD6NQE,145
96
+ wxo_agentic_evaluation/service_provider/__init__.py,sha256=a8obWjeL_E3z_rjfkOgYpJV1Ygz_ASBM0VM715yUros,5746
97
+ wxo_agentic_evaluation/service_provider/gateway_provider.py,sha256=n0Rc4mWqIppL8KWk1CKFvUIsETHvYhUbtSLUrwPj-Ao,24545
98
+ wxo_agentic_evaluation/service_provider/model_proxy_provider.py,sha256=5hE6nCf4g7MQF-xS9M1RxBppLJpWa9Jvs6rLGe-dcRI,23332
99
+ wxo_agentic_evaluation/service_provider/ollama_provider.py,sha256=irIjNryfGAKTW3cfJP1sY7P6EnIHIy8mHQuTdCAHp0s,14053
100
+ wxo_agentic_evaluation/service_provider/provider.py,sha256=4FGg4tXAKxuyYM3-LNIxhzJtI1b15r82C8jMLWdItII,4209
101
+ wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py,sha256=PHbYBpmzV4Pgh1kfjVADmiUhHnjEvR1849QqjTJIbCs,6905
102
+ wxo_agentic_evaluation/service_provider/watsonx_provider.py,sha256=2bVuo2ypJVpEwUQ0ioJn7vd0kfGN_6Rsk4Or92JU2HI,21167
103
+ wxo_agentic_evaluation/utils/__init__.py,sha256=LjN7tf9VrHsUeVXV5GA2ASyakgo0CRdyJhu6eG71bj4,1225
104
+ wxo_agentic_evaluation/utils/evaluation_discovery.py,sha256=palyGppHqMeFmV3fxDErWNtzNq2Bp7xIz_QHcuBg3uA,1660
105
+ wxo_agentic_evaluation/utils/gateway_provider_utils.py,sha256=Yzs6K-h_f9NL1AwGzPKkvs0sMqFGDYJW-83fnuCQYpM,1099
106
+ wxo_agentic_evaluation/utils/messages_parser.py,sha256=aNnoss7S5JzPh6WCXUxio66jUUze_SZ6Ta8hJn9m8e8,928
98
107
  wxo_agentic_evaluation/utils/open_ai_tool_extractor.py,sha256=kJUuprXfY5IfCRIvLT4oSvMf-Ly7RYNo4if-Lb6yGiA,6080
108
+ wxo_agentic_evaluation/utils/parsers.py,sha256=-JYHd2ervARXbIIcRA9-gUfZeVuxo3otaW_d2SsVMLU,2135
99
109
  wxo_agentic_evaluation/utils/rich_utils.py,sha256=pJ43hwvSZRJwckPOeUhqGdw4_RwxLsYO02dDt6PLqxA,6351
100
110
  wxo_agentic_evaluation/utils/rouge_score.py,sha256=WvcGh6mwF4rWH599J9_lAt3BfaHbAZKtKEJBsC61iKo,692
101
- wxo_agentic_evaluation/utils/utils.py,sha256=yDPF0hsd_ypMUanf4AZOQbbBh5KhjTc_VOgIcQ-6htI,12682
102
- ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info/METADATA,sha256=E47VTRmWgRVX63K0gBHQuX4EXaRv4MsEKyNtddRGMB4,1728
103
- ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
104
- ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info/top_level.txt,sha256=2okpqtpxyqHoLyb2msio4pzqSg7yPSzwI7ekks96wYE,23
105
- ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info/RECORD,,
111
+ wxo_agentic_evaluation/utils/utils.py,sha256=dCtqpMl6RkKwfuy4KIWt8gdwYyNcMOsHC5QzGd3urBs,16693
112
+ ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info/METADATA,sha256=ZHQOYRKUvPZ9hjhyvlTreZC6Tj2YHk8n8YlJRvbU4Cc,1840
113
+ ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
114
+ ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info/top_level.txt,sha256=2okpqtpxyqHoLyb2msio4pzqSg7yPSzwI7ekks96wYE,23
115
+ ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info/RECORD,,