crfm-helm 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (184) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +29 -55
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +146 -134
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  10. helm/benchmark/annotation/call_center_annotator.py +247 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +32 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +31 -44
  18. helm/benchmark/annotation/model_as_judge.py +45 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  20. helm/benchmark/annotation/xstest_annotator.py +110 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +57 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  30. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  31. helm/benchmark/model_metadata_registry.py +3 -3
  32. helm/benchmark/presentation/test_run_entry.py +1 -0
  33. helm/benchmark/run.py +15 -0
  34. helm/benchmark/run_expander.py +56 -30
  35. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  36. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  37. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  38. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  39. helm/benchmark/run_specs/finance_run_specs.py +78 -1
  40. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  41. helm/benchmark/run_specs/vlm_run_specs.py +92 -21
  42. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  43. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  44. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  45. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  46. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  47. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  48. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  49. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  50. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  51. helm/benchmark/scenarios/scenario.py +1 -1
  52. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  53. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  54. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  55. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  56. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  57. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  58. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  59. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  60. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  61. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  62. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  63. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  64. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  65. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  66. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  67. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  68. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  69. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  70. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  71. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  72. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  73. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  74. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  75. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  76. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  78. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  79. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  80. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  81. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  82. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  83. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  84. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  85. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  86. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  87. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  88. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  89. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  91. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  92. helm/benchmark/server.py +1 -6
  93. helm/benchmark/static/schema_air_bench.yaml +750 -750
  94. helm/benchmark/static/schema_bhasa.yaml +709 -0
  95. helm/benchmark/static/schema_call_center.yaml +232 -0
  96. helm/benchmark/static/schema_cleva.yaml +768 -0
  97. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  98. helm/benchmark/static/schema_ewok.yaml +367 -0
  99. helm/benchmark/static/schema_finance.yaml +55 -9
  100. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  101. helm/benchmark/static/schema_safety.yaml +247 -0
  102. helm/benchmark/static/schema_tables.yaml +124 -7
  103. helm/benchmark/static/schema_thai.yaml +21 -0
  104. helm/benchmark/static/schema_vhelm.yaml +96 -91
  105. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  106. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  107. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  108. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  109. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  110. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  111. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  112. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  113. helm/benchmark/static_build/index.html +2 -2
  114. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  115. helm/clients/ai21_client.py +71 -1
  116. helm/clients/anthropic_client.py +7 -19
  117. helm/clients/huggingface_client.py +38 -37
  118. helm/clients/nvidia_nim_client.py +35 -0
  119. helm/clients/openai_client.py +2 -3
  120. helm/clients/palmyra_client.py +25 -0
  121. helm/clients/perspective_api_client.py +11 -6
  122. helm/clients/test_client.py +4 -6
  123. helm/clients/vision_language/open_flamingo_client.py +1 -2
  124. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  125. helm/common/images_utils.py +6 -0
  126. helm/common/mongo_key_value_store.py +2 -1
  127. helm/common/request.py +16 -0
  128. helm/config/model_deployments.yaml +315 -332
  129. helm/config/model_metadata.yaml +384 -110
  130. helm/config/tokenizer_configs.yaml +116 -11
  131. helm/proxy/example_queries.py +14 -21
  132. helm/proxy/services/server_service.py +1 -2
  133. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  134. helm/tokenizers/ai21_tokenizer.py +51 -59
  135. helm/tokenizers/cohere_tokenizer.py +0 -75
  136. helm/tokenizers/huggingface_tokenizer.py +0 -1
  137. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  138. helm/benchmark/static/benchmarking.css +0 -156
  139. helm/benchmark/static/benchmarking.js +0 -1705
  140. helm/benchmark/static/config.js +0 -3
  141. helm/benchmark/static/general.js +0 -122
  142. helm/benchmark/static/images/crfm-logo.png +0 -0
  143. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  144. helm/benchmark/static/images/helm-logo.png +0 -0
  145. helm/benchmark/static/images/language-model-helm.png +0 -0
  146. helm/benchmark/static/images/organizations/ai21.png +0 -0
  147. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  148. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  149. helm/benchmark/static/images/organizations/cohere.png +0 -0
  150. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  151. helm/benchmark/static/images/organizations/google.png +0 -0
  152. helm/benchmark/static/images/organizations/meta.png +0 -0
  153. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  154. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  155. helm/benchmark/static/images/organizations/openai.png +0 -0
  156. helm/benchmark/static/images/organizations/together.png +0 -0
  157. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  158. helm/benchmark/static/images/organizations/yandex.png +0 -0
  159. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  160. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  161. helm/benchmark/static/index.html +0 -68
  162. helm/benchmark/static/info-icon.png +0 -0
  163. helm/benchmark/static/json-urls.js +0 -69
  164. helm/benchmark/static/plot-captions.js +0 -27
  165. helm/benchmark/static/utils.js +0 -285
  166. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  167. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  168. helm/benchmark/window_services/ai21_window_service.py +0 -247
  169. helm/benchmark/window_services/cohere_window_service.py +0 -101
  170. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  171. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  172. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  173. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  174. helm/tokenizers/ice_tokenizer.py +0 -30
  175. helm/tokenizers/test_ice_tokenizer.py +0 -57
  176. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  177. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  178. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  179. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  180. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  181. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  182. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  183. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  184. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -7,15 +7,15 @@ helm/benchmark/data_preprocessor.py,sha256=aNdM-o2t4qkLIQHiQeWUFg03DjjJ8HTBIphYC
7
7
  helm/benchmark/executor.py,sha256=simd7SdJ7TciUpoq3D0uz_XUSCZj5KIWCIP57FYm4js,4906
8
8
  helm/benchmark/huggingface_registration.py,sha256=unEBO21V8K3-Ya0xLqjO9H1oq7RmU-f1MYV0tCIbXzY,4578
9
9
  helm/benchmark/model_deployment_registry.py,sha256=BjL0ghHgO7_Z5jZZ7kuSOj9saegI3BivaL-b699C0rc,9527
10
- helm/benchmark/model_metadata_registry.py,sha256=fXRJOLUIrLOHUG5duncEqhnpmfb9hyloUlGbOM2L9ds,8194
10
+ helm/benchmark/model_metadata_registry.py,sha256=m39FqNaGdxP4r7W7Vmq6r-gOLjYtn_5WmRNsGzci6d8,8283
11
11
  helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
12
- helm/benchmark/run.py,sha256=WNj10uNCqxwS2pCmt_s5Bn_JIC-NItEjK1PyQl9SXmo,13193
13
- helm/benchmark/run_expander.py,sha256=sWfcL0caHTsp1NqqsGrG-fZaIbScY8LECJqQMVIPZtE,51191
12
+ helm/benchmark/run.py,sha256=cPJh1Rwit8E_Kjf8Te2D75cd19ag4WgS2YrHHu2Fc8Q,13997
13
+ helm/benchmark/run_expander.py,sha256=YOTYbewbHLi0N7_fM_86Nke4U0wPwdeXLv47_CCVjQw,52659
14
14
  helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
15
15
  helm/benchmark/run_spec_factory.py,sha256=hp29n_Stb7RMwRm2jrP_qpyzxi8X8ojdqXTFN3KRSiY,6978
16
16
  helm/benchmark/runner.py,sha256=zlHDJ2Ys5-HxtXcwpkXcrdfXy_i886fBcq1iNeLyC3Q,14669
17
17
  helm/benchmark/runner_config_registry.py,sha256=2gW5wBLkHdYb2WNbZulto06hTcto2ROvjy8HULw3jNM,515
18
- helm/benchmark/server.py,sha256=ysd5MT1TDu65NH-OzIGf9wmZlr8FHNRwoy2ybjSc5Yk,6140
18
+ helm/benchmark/server.py,sha256=kaGpUzBwzprmTDiMcy8-sfT8KfVEOb0wWytWODsAQ94,5925
19
19
  helm/benchmark/slurm_jobs.py,sha256=eNCAoaWDfT0Wk32ZJRIGo-x8kgjhDPnPB4Xrvw_eLB0,3225
20
20
  helm/benchmark/slurm_runner.py,sha256=Tozimrjr2R6mlKHcmrGgxTy9ga-ArIW6AoAWtxqzw-M,16567
21
21
  helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5drG-4Y4UhIM,2219
@@ -23,7 +23,7 @@ helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCv
23
23
  helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
24
24
  helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  helm/benchmark/adaptation/adapter_spec.py,sha256=K5BwqTe2iimjswdw_SONlJo0xt-T-o5KH7VqxrPaov0,5072
26
- helm/benchmark/adaptation/common_adapter_specs.py,sha256=-ILsVxWjpEE6an1ncrRRrLkdP5ky_-2GN1TxSxJo38M,10449
26
+ helm/benchmark/adaptation/common_adapter_specs.py,sha256=Er8aMbDi8RTBtGWjcI08E2mRDl5AoBzUaBT1EY38Nlw,10515
27
27
  helm/benchmark/adaptation/prompt.py,sha256=n0Ka3RGSWMr3CBnJrPNPy626x9TJE3k677wKbG8hO9A,2133
28
28
  helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
29
29
  helm/benchmark/adaptation/scenario_state.py,sha256=mWEhgzk18SVoMEuj2pSnc_r9JrGAHLdOlteHJKUMA5k,1961
@@ -35,12 +35,12 @@ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=F7Aou6r9CZ1xEuAX
35
35
  helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=BbcBEJjY8Cp58me9sUktd2p3dEVFL8ZJ7RFfus3hSYE,14997
36
36
  helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=LhZHmciP8lAfu7T0p634GOPTHrJR7qRCRRIxPgVlW9E,14873
37
37
  helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=VJ66MfIGQWJg0VXCV0MJEMwF9Jx1DeJ7RxsgYlOTx_4,1889
38
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=k8wSxv9pK8wtbQNBzWYPkGEUKJb8tcVi41Y1M3fPQT4,3985
38
+ helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=S38Y_MjLRE86LS9RfB4qHmNy5x5n2KyYa4DtA63lees,4402
39
39
  helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=aMe-y4iiyEnM7_bqIoddeZBsVLoDxMmjKY2eZKB6Y2Q,2156
40
40
  helm/benchmark/adaptation/adapters/test_adapter.py,sha256=0-JrYnogZu4kENQG1eQMXHWnuSurCLRbkLpDuSnfRqs,745
41
- helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=TM6WJpWShsu6KuDzlofYHd9DNPj86Hjudubp_sqrhFI,12734
42
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=f_bggObKUxiV5XyYHHNXsM42HzM0CDzvR4uiIoXTE5o,7997
43
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=v6LLmVTopXNfzo9Qzq16EmmPPivFGGs9LuaPDJAX4vY,9506
41
+ helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=NyhVTvLznCVMB-DJeX2DRjWx91XmW3FBcrkm0RN-fJU,12766
42
+ helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=BCEhKRVEDKPHsLKhpnIv0krV37a8Eu78r8EtJxH_MXA,7980
43
+ helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=BoozcN0zPWwk6HKEPN0b61ieqwk5y8bwKvr9m8DR_2k,11874
44
44
  helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=o7CGClyVWYOuJ4G56-whq5fTvCr7QIn51Mo6DTdvwg0,1881
46
46
  helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=bvY8xT2ak_3WG4m2Z5bCM6FLImPIWG1qAn9H2ZNwNv0,6359
@@ -52,15 +52,22 @@ helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
52
52
  helm/benchmark/annotation/air_bench_annotator.py,sha256=9W3zLO2f4OzxGdavkDI2dDUStxpExa7sgrI-ATGG7NY,3048
53
53
  helm/benchmark/annotation/annotator.py,sha256=2UIXY71S5dRaZBLb1v4lcv8-O6pyJ9zTeSJl78AEWGI,1538
54
54
  helm/benchmark/annotation/annotator_factory.py,sha256=3Soh0V3lbsIR_HGHLg-XTc3eKVRj7SL9lLT_AoqUVTs,2997
55
- helm/benchmark/annotation/live_qa_annotator.py,sha256=IlUV4K-ddbL1XsvIgBAfsLH0_bdKx8kyDev1G3Kwyek,4364
56
- helm/benchmark/annotation/medication_qa_annotator.py,sha256=7LRmx2a1JODP5puAM0IH0HFTextfeLOzK7ef4sw9XIU,4129
55
+ helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=kpnIrydou3THgEFealGZyGneVKxgK5wwQ4kiMbDzJH4,2974
56
+ helm/benchmark/annotation/call_center_annotator.py,sha256=3vHsgJD24PaR4rRTfLD3wvwvbslkQdDHLokggFxijhI,11233
57
+ helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
58
+ helm/benchmark/annotation/harm_bench_annotator.py,sha256=z8EX1F7chOf-sZ93aognaTMmOqQDgWEa4KO0LLSABjM,2853
59
+ helm/benchmark/annotation/live_qa_annotator.py,sha256=I8wfDt8-iLC_C77r7fBjn9jdoXatVc_pJ_2YEWv392M,3474
60
+ helm/benchmark/annotation/medication_qa_annotator.py,sha256=TWjB3BIbBR_jVvrp2kF0PJW2p1U4MoosrSJ-b4QTgXE,3223
61
+ helm/benchmark/annotation/model_as_judge.py,sha256=CffsM05JPZbtLY9xFi1qOuy1JY4Yp-qF_OWrd_YC0yE,1737
62
+ helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=ztqagaM2M0OPKSMCo112_regyr2rDE44zpb0_HESRZs,2699
57
63
  helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
58
64
  helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
59
- helm/benchmark/annotation/image2structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py,sha256=eJFm3iyBe_eEN5Yt0G2IpeA1xdKxRmyR4krsNd6eXoE,3524
61
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py,sha256=yRifoqhGq_mQkkRcgKCFpGrZaI9gochOXYiCU8oY1KE,2477
62
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py,sha256=we6K1BynV907ZMnGI2zb_tru1uw2iGEI06Wtbnus23w,4010
63
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py,sha256=rvzdQCaVFM6ovF28TSUnNmB47f2hidlaZm6vO4DJpso,6404
65
+ helm/benchmark/annotation/xstest_annotator.py,sha256=pW3Dgu77ZoS5hVoapn-FsK3KQOHGHiRLyaKpSqnMRLg,4149
66
+ helm/benchmark/annotation/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
+ helm/benchmark/annotation/image2struct/image_compiler_annotator.py,sha256=eJFm3iyBe_eEN5Yt0G2IpeA1xdKxRmyR4krsNd6eXoE,3524
68
+ helm/benchmark/annotation/image2struct/latex_compiler_annotator.py,sha256=drbxogMMGwGxgVFbhT7hxPGDh7uyhptlmEmeP1Gq2xM,2471
69
+ helm/benchmark/annotation/image2struct/lilypond_compiler_annotator.py,sha256=odIGciLX2oVq_O8_H15lWUZoSfVvY-jRb0ILjs7GCIg,4061
70
+ helm/benchmark/annotation/image2struct/webpage_compiler_annotator.py,sha256=w6RKv7Fz__j_abKXnsTn98kHPv9tWKipdLW3NVT55m8,6389
64
71
  helm/benchmark/augmentations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
72
  helm/benchmark/augmentations/cleva_perturbation.py,sha256=arUkY_luc274YEMZocOos9rpAZVbEFZphbMlobAxTy0,29208
66
73
  helm/benchmark/augmentations/contraction_expansion_perturbation.py,sha256=yni1UR2fviN0Wig8MpOp0zzLn4H-gYocTjKTpxBwywg,4850
@@ -92,8 +99,11 @@ helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc
92
99
  helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
93
100
  helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
101
  helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
102
+ helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
95
103
  helm/benchmark/metrics/basic_metrics.py,sha256=7hk5PZL7d09uG1y7wHBhY_ox8hlXw-n7Yt_FDv_AIKw,20375
96
104
  helm/benchmark/metrics/bbq_metrics.py,sha256=Dqccr7GdfKNs1S_1QSB75d8AY7moovEPAqvacGfrCAE,6157
105
+ helm/benchmark/metrics/bhasa_metrics.py,sha256=Nw5fdZrYedYUEVJXFFnGSdOBxJ4-99GELd699TBmcSg,6958
106
+ helm/benchmark/metrics/bhasa_metrics_specs.py,sha256=fwXd1fRoeizd4kVQfLZ9ny-PzHTe1ieFKsGesiPDef0,440
97
107
  helm/benchmark/metrics/bias_metrics.py,sha256=GQ4CwOk1Sa9g-LcJCxcoQLD1vWY2Hvujck9l-9qsmf4,11418
98
108
  helm/benchmark/metrics/bias_word_lists.py,sha256=mx5JjW3mHffXIqo4GcQN-zENUEttBqQnEjPTz3J3J_4,13909
99
109
  helm/benchmark/metrics/classification_metrics.py,sha256=uB23jRFzkmtJgs1sTO5pPjdV_mOg35gWubjGS8pynLM,5654
@@ -101,7 +111,7 @@ helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIef
101
111
  helm/benchmark/metrics/cleva_harms_metrics.py,sha256=c_x9MYg8WjM1yym1S374GKxH_lwP6wZOiXrknf0mJis,11077
102
112
  helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
103
113
  helm/benchmark/metrics/code_metrics.py,sha256=e0aqLcxBAdCc0qAqebzK40Ilv2Py6xZbosud5v169x8,5121
104
- helm/benchmark/metrics/code_metrics_helper.py,sha256=h_y3BsYCbeh8cDe2LDndA8K6nkelV0J76qxlq2cBmsc,22334
114
+ helm/benchmark/metrics/code_metrics_helper.py,sha256=UNai154RuhYRZM_YK-rveLct4Ui5iEBNPYmYdKq34Xs,22712
105
115
  helm/benchmark/metrics/common_metric_specs.py,sha256=k_IW0A6BevAskS0_C6ZaP9XvIfrdLI974_NhC89rMoo,5846
106
116
  helm/benchmark/metrics/copyright_metrics.py,sha256=X9j3YsfzWEoGpgPpIvCzm18-JggLAW5QFooifE1KqaM,7729
107
117
  helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=TcyklpfcTMXrpJeaHQfxS9QQxe-gwmT-HD0g_DmIFLQ,3253
@@ -130,6 +140,7 @@ helm/benchmark/metrics/prometheus_vision_critique_metrics.py,sha256=pexBbEFF3-bz
130
140
  helm/benchmark/metrics/ranking_metrics.py,sha256=5hDRapsxx_cmo-ag_80kOQnrgZn3lfVsLZVtWxuxH-s,17391
131
141
  helm/benchmark/metrics/reference_metric.py,sha256=RlIM_PFTEkBo0_EEMq8d4_BSagNSBR_XyovMtjDeqqU,6026
132
142
  helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
143
+ helm/benchmark/metrics/safety_metrics.py,sha256=SsVRJXduF4S6C3sOozkOS-0gwy-Ff0Pz9C69jnh3Y-A,2355
133
144
  helm/benchmark/metrics/statistic.py,sha256=FuxNxMtAfiCkOxBS9KHlhEyxe61e0YXt2emvsufgPZQ,3424
134
145
  helm/benchmark/metrics/summarization_critique_metrics.py,sha256=Lf7PDuce62HDzyofsyxaOvH0QvzcaS-vJvDWtIs8xKk,4694
135
146
  helm/benchmark/metrics/summarization_metrics.py,sha256=laLMGRDy1wjcFvgSWXvzOZwBXshkmPr0S2Ofu79Z01Q,16461
@@ -178,7 +189,7 @@ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBS
178
189
  helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=__f7NVsVQatDFn_2Bfx7ObiQ68kAMvyyClApaTxqx80,649
179
190
  helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
180
191
  helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
- helm/benchmark/metrics/summac/model_summac.py,sha256=zEuTI75eBBZPzJp0j2kFd2HejouhugC83nSWoVpghBQ,17412
192
+ helm/benchmark/metrics/summac/model_summac.py,sha256=PJ2lPa-JQPnM86N0T2rPcAviTNHmSV721PTnbL1eGnk,17460
182
193
  helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
183
194
  helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
195
  helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=WeNP4yiM4TVrD9Kid-uVRmWIVDqETnBsMycZmIBiTZ0,665
@@ -187,13 +198,13 @@ helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py,sha256=5igmDhWu7H8-
187
198
  helm/benchmark/metrics/tokens/free_token_cost_estimator.py,sha256=G_6UK6Js_NZ_eqY0ZQnrC9QJVMERGhV1f6v7xq2lM-Y,461
188
199
  helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py,sha256=9zjtuxMbvfPBYuxOYMFEmNP8ZKFDVywrZ08n6nrjbA4,1520
189
200
  helm/benchmark/metrics/tokens/openai_token_cost_estimator.py,sha256=7jgjcgmbcVfLA_nTOrWXKIF8TEXng_KnE6cSgsSXWmE,1398
190
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=l9UQZ0aAIhCYuFbIjU3j3A3XXoMvwUvz1kvRtlDbtOo,1079
191
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=_wJ3E3LbJB9XPLixTH82BYQbp32o3oij6Sz3lsZL30E,2648
201
+ helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=eVnCYhRq2LT7F4BXsiIDb1bkmhvoHLgDAdMR73Xz5p8,1071
202
+ helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=h5ggZCGpgCQUjfqS0JS4Bxmx7NBaT4w43pXAgbCEnw4,2628
192
203
  helm/benchmark/metrics/tokens/token_cost_estimator.py,sha256=fTGUfhHV6yMwpTkCEMTGMxKO8jskqJz4sAtwXT6M_C8,425
193
204
  helm/benchmark/metrics/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
194
205
  helm/benchmark/metrics/vision_language/emd_utils.py,sha256=KdZdcqu3eo016FdAjAm_83v92-wWuR90EPsTogfTcok,15196
195
- helm/benchmark/metrics/vision_language/image_metrics.py,sha256=HyXeZiDszSV1Q99ScqeS_xYvyrp1dlWBYahfxt42N3E,23554
196
- helm/benchmark/metrics/vision_language/image_utils.py,sha256=XeYF3E6MnYyPJ5hYp4TtiTP27-y4S8LTBH5bZVcvJFg,3758
206
+ helm/benchmark/metrics/vision_language/image_metrics.py,sha256=3fh7vR4J2arFXIT6hLBNdR18PKxQBLPBbVrHWv0hBeA,23551
207
+ helm/benchmark/metrics/vision_language/image_utils.py,sha256=4E0NYh09O6-5sGhAPo6KZqYaZfBpCtuYbD3vLt-wQzk,3755
197
208
  helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
198
209
  helm/benchmark/presentation/contamination.py,sha256=PiIdcaD3-xfExjOmyL5q4Ao2ASa-OlScJAB9u1Zxe7o,2811
199
210
  helm/benchmark/presentation/create_plots.py,sha256=2-ZOuEdRwqqF1biRmzWggMZjmODoxOQOBoz9GT7tVww,28737
@@ -204,31 +215,38 @@ helm/benchmark/presentation/summarize.py,sha256=2fJ9BYOJRxe9eBylLUK3qcZZwAwRtJF_
204
215
  helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
205
216
  helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
206
217
  helm/benchmark/presentation/test_create_plots.py,sha256=5PPPegMTdBZurxyyUxI4rN13AVsjV3eQrwFqlobJ8UA,1286
207
- helm/benchmark/presentation/test_run_entry.py,sha256=OM-027j2A0Lx-ai2zBprOxSqzZhS_dh0OKw3ThocZW0,751
218
+ helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
208
219
  helm/benchmark/presentation/test_schema.py,sha256=6mq6CeAOLW2Kxi1lX_ZW8QCVqVR73XImR8ylcRGFkBE,378
209
220
  helm/benchmark/presentation/test_summarize.py,sha256=UfSp33Q9xvuGnPYfFmLJdH5y7KWp9qbZprRMyx8LGP0,1618
210
221
  helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
211
222
  helm/benchmark/run_specs/air_bench_run_specs.py,sha256=VdXis1HN8_KLrMHDCVi0J7WdqjRjAGbZMhrsnpzC-Kg,1604
223
+ helm/benchmark/run_specs/bhasa_run_specs.py,sha256=2m5dXJKP0ojdACgvSREiV25SB9T6IL9JeYHYjhL7xX4,23480
224
+ helm/benchmark/run_specs/call_center_run_specs.py,sha256=GX5P2tTj4YS037EEZ8so_mX9LlPWyfJ-pF8ICoErpio,5324
212
225
  helm/benchmark/run_specs/classic_run_specs.py,sha256=Cn0z-6QY-ehbLaHJMvCwjw11DFBQgUyqVCaXwTVFyJ8,58331
213
226
  helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
214
- helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=fDyIxmOdgLLWVtwBfxcnd3nFnBZNFpJHbcM4Kyq5gZA,14315
215
- helm/benchmark/run_specs/experimental_run_specs.py,sha256=7aF-Ox8iBC2obfJkyKwobJaCjk1SqxtSDuRv_RxA3Eo,1310
216
- helm/benchmark/run_specs/finance_run_specs.py,sha256=7DCmeBQpETQjK0fvUKS1nDIbM_wxTXb2GhXcjzIDyIE,1181
227
+ helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
228
+ helm/benchmark/run_specs/experimental_run_specs.py,sha256=wduA6K3mpIRHmr8g3h0c5k7rUsKiPFOqJktdbbGxtoE,2950
229
+ helm/benchmark/run_specs/finance_run_specs.py,sha256=hCaB3uBSlTZbFztdsDqdxuAdYQM20S9m9rXYQITgL5M,4161
217
230
  helm/benchmark/run_specs/heim_run_specs.py,sha256=Pt1eVbzvwZ5EXq8WB2b3XYw62SWYN_i1P_H3oE4i8KY,22096
218
231
  helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
219
232
  helm/benchmark/run_specs/lite_run_specs.py,sha256=ViCPJ86Aah8301GTEk6z4_MtP0g8iik33t4GudobhWQ,11113
233
+ helm/benchmark/run_specs/safety_run_specs.py,sha256=ZTvLbRBxHWMIKPapugNfXPStJRBHfiaiXUHgpWMBONY,5469
220
234
  helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
221
235
  helm/benchmark/run_specs/unitxt_run_specs.py,sha256=ejp_knrcIjf0J4WiKj9LTgDTcUr29-XFZYHYz0w_dkM,1518
222
- helm/benchmark/run_specs/vlm_run_specs.py,sha256=uwnk9DHZKQj8nnC14kGiSN8xKiZfpigoz5S86TiHc4k,31118
236
+ helm/benchmark/run_specs/vlm_run_specs.py,sha256=A-e3npwbqvUEHvC9iGta9N1zFCHfoP8C1_vWBVLf8ns,34134
223
237
  helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
224
238
  helm/benchmark/scenarios/air_bench_scenario.py,sha256=WUZvsUTqlsjNzQsd2baZZIgO30B4Zf3g0QjsyEaGmLc,1772
225
239
  helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=Wyt7J5BAvAqC5JTqCW4fh7ex9-itX11P_9rLTocqvtk,4973
240
+ helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=Ic0ak_5vGHeNT5PFgOptl-Ns8nQuM5nKpiQlhB1H3X0,3158
226
241
  helm/benchmark/scenarios/babi_qa_scenario.py,sha256=S1tPQY2x1I3hQL1JQ6wvUwvKyiSe7SqpRSW6N3_T0mo,5043
242
+ helm/benchmark/scenarios/banking77_scenario.py,sha256=pVA2LXB9uJ12GnjiEvjhRV-P8YNEjpFhyZr-J8MV2SA,1747
227
243
  helm/benchmark/scenarios/bbq_scenario.py,sha256=lT1XKSM-PXYtENI-ryScC4yb1TtII7YoH8kt_S1dZQo,9579
244
+ helm/benchmark/scenarios/bhasa_scenario.py,sha256=N7SYVwUOLAD_WZtkIYoCnPuRb_nFbIege-5_j4yX6nQ,70915
228
245
  helm/benchmark/scenarios/big_bench_scenario.py,sha256=bSk8Ia4u_6OqMjiyadpYQAWN-8GFWqvd3Ft3JiVGpi8,8081
229
246
  helm/benchmark/scenarios/blimp_scenario.py,sha256=o1MDcHT14KFDET4K9otx8pDiIgXrhsD19pvO0mR2ADU,6260
230
247
  helm/benchmark/scenarios/bold_scenario.py,sha256=NEfECMVzlVP_yo6sOuIzj6vZ5jd72_nvtEQ1lWrq85Q,4106
231
248
  helm/benchmark/scenarios/boolq_scenario.py,sha256=rvSp5SwXMCVzBo5BFxfhj1Xv06_ksqKrtTQR7nPiS-o,8013
249
+ helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
232
250
  helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
233
251
  helm/benchmark/scenarios/civil_comments_scenario.py,sha256=VO5G-cQ9qctmBN0O76uSewnO_mFslMo5mbR2ZTrjuds,4851
234
252
  helm/benchmark/scenarios/cleva_scenario.py,sha256=xhwZ616iz0CN3fYIfrXHcV1XlcRQjyPSzML8fq8D3l4,57939
@@ -245,7 +263,7 @@ helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=rAOZnFSxO3ENO
245
263
  helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=qhzqW614WnsiyN7TiHUdZY_NpEdW_iMO0AMrLK8DmK0,14116
246
264
  helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=RSigvRdqjeFTwFfXNmslz8zyAGSmLf6UtBDA4NrQBCo,8304
247
265
  helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=goGmHtN7MYnAQIXhffZZhuuuMWN0gHNOXyI9_injiZM,20119
248
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=mbUABlGhpDur6x7z_q5iDqJRMBZ2d4ZI3KdVWNnJagM,2859
266
+ helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=Qkwhg1s5f2_5rnCoX4BxjQGKKGVRp2StIwONvBjJVqo,2909
249
267
  helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=AI8HX16_Lw9MKqrck62q8IFLUU-P5hxaOEHcmTS4rdA,2928
250
268
  helm/benchmark/scenarios/dialogue_scenarios.py,sha256=-I7FY6q1b11zpFd1_oAgar5qlfaFcXsNCKGVln9etPI,5629
251
269
  helm/benchmark/scenarios/disinformation_scenario.py,sha256=kQi0MVVoSDhx2vOTnUaCIttPXMf8zz7Eld2FD_77tnA,8504
@@ -253,10 +271,13 @@ helm/benchmark/scenarios/dyck_language_scenario.py,sha256=vMxND9wPJenrGlCLhSw5Ux
253
271
  helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4cv7u2lmUFcigkAX_eMwIn49Pa3p-aHClkT-r-0roLU,6616
254
272
  helm/benchmark/scenarios/entity_matching_scenario.py,sha256=YjBX61TlL3CDQ3X6D-JyR-qlOYGLdoRXJxl9AEeqxYs,7022
255
273
  helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
256
- helm/benchmark/scenarios/fin_qa_scenario.py,sha256=pXUeJ34KiRSlEjYERgXqVSbr7zxvdXnOuMSpXvnUw5I,5782
274
+ helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
275
+ helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
276
+ helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
257
277
  helm/benchmark/scenarios/grammar.py,sha256=Pb9vEP_0Ki87UdQCj1ym7QWJ24M4DRP6TXB5d3GnhLs,5597
258
278
  helm/benchmark/scenarios/grammar_scenario.py,sha256=bl-Cm9caDs077zSu38mzaS9maZ2gM-QazgjOEMFvxYg,1454
259
279
  helm/benchmark/scenarios/gsm_scenario.py,sha256=9fV2SEw3ocKNAD-TrDZZTpq4l7mbttQQWbO0YNz4e6k,2613
280
+ helm/benchmark/scenarios/harm_bench_scenario.py,sha256=wzzia3HlfwALgRLFLABv3blxBh1ras-YtHk4iQ_EX30,2454
260
281
  helm/benchmark/scenarios/ice_scenario.py,sha256=vvk11cFPGUhg_CcGh3wEfVsGzrvMFgkByN-xcF-OOjI,16473
261
282
  helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
262
283
  helm/benchmark/scenarios/imdb_scenario.py,sha256=X1k76AweFECCpYCXy8HuvjRbXbfmDfwK3SES_t_wkUs,6174
@@ -290,19 +311,28 @@ helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=zVL1gb3eVz-LbK2hfdnRR9ItaM
290
311
  helm/benchmark/scenarios/quac_scenario.py,sha256=SRAhMp6TAsmTRq6VRONLl3SEayFIe23He_mBhzkZ7qM,6628
291
312
  helm/benchmark/scenarios/raft_scenario.py,sha256=_5QhHS3opxxML7Rek6F-q5NVOf0M2UgbC6OTnQZ4C1U,4452
292
313
  helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=GkgJo_13MWQQQTZbhlknvTR6ZrYr7NEn1WdMZrPs4y4,2400
293
- helm/benchmark/scenarios/scenario.py,sha256=lkJgqDAbnFQoJgAyAllqvlyIIH1nSQ3dnoVYh68tQrU,8232
314
+ helm/benchmark/scenarios/scenario.py,sha256=1HC8EjiZ-5k5AJhxtwRreLe3hBbTyZJWrs-Aa3Uq43Q,8229
294
315
  helm/benchmark/scenarios/self_instruct_scenario.py,sha256=jZ2MksT4N_4g_sp5egw7ycrsM-Ya786_RFmiYYdMvG8,2285
316
+ helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=grYOqccYBtB4m-_UUV20EOXsY6tkukwC6kwPOBAmdnY,1223
295
317
  helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
296
318
  helm/benchmark/scenarios/summarization_scenario.py,sha256=MlNMgsY369DC04nhMUdG2o9Ydi6yze1fGOjC0bK-UwQ,6847
297
319
  helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=pzifpsJJbucmTjujNqQnwQa4Y7wpQjkS6QjNXOrgTAQ,3096
298
320
  helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=1b3e3WpFMNBV3li17-0Ug6QCSKO4qRFaWDF23bYNsvQ,16326
299
321
  helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=k8IGK6VABOr6wuha4HynP47peoAkmIViAVhScOtCANo,8345
300
322
  helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
323
+ helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
324
+ helm/benchmark/scenarios/test_ewok_scenario.py,sha256=9piplj3i53_-xNSMkIN47JYEU3JB65WgEPT7qdyK4Ng,953
325
+ helm/benchmark/scenarios/test_financebench_scenario.py,sha256=EFZLJXXBoyjlTiMQFaQ6MiYkve1lfQDjQWjn4BjqgAQ,1184
301
326
  helm/benchmark/scenarios/test_grammar.py,sha256=sPlA36sHpThbXgnGlXyOuqHfDPe2epIafmzIeL0nkoU,1364
302
- helm/benchmark/scenarios/test_math_scenario.py,sha256=s3-CllgCB8DL9-L4DmJ6Zcf9xi803nWYN84KlhN7PhM,1016
327
+ helm/benchmark/scenarios/test_gsm_scenario.py,sha256=I-Sl8Sg8kmFd7u0zZbwbNmeFV1mQLuOHoQ1cQDDwovs,1123
328
+ helm/benchmark/scenarios/test_legalbench_scenario.py,sha256=FqbgwBAhHWyTIUYSzI5FOnTDx0A3u1o2ANKa_6bfA4g,1212
329
+ helm/benchmark/scenarios/test_math_scenario.py,sha256=ieI8-c6yx-3U3iaEz2yiCGSwnQTBJE_06-dMKX7a8Vk,723
330
+ helm/benchmark/scenarios/test_med_qa_scenario.py,sha256=Ekp6r5eYPkCxV3FCzVvLemKxlhENhelqdO0Mdhg5yFo,1515
331
+ helm/benchmark/scenarios/test_mmlu_scenario.py,sha256=mxEsTydKUOt8OD1Ei82nPgUFV1Tlvu5Z6drEMToEURM,1593
332
+ helm/benchmark/scenarios/test_narrativeqa_scenario.py,sha256=Rac_OrUpd2ruT95YvSrmoVz2Jpycgq3Roiyogm_0aAc,6420
303
333
  helm/benchmark/scenarios/test_scenario.py,sha256=HexTZBKphMDJbhIYj-HRCDwltPTDqHFHdT7FjPmu8Xs,2070
304
334
  helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsHzMZSzOe8j0FrUQmMyZM4,1736
305
- helm/benchmark/scenarios/thai_exam_scenario.py,sha256=FinZuwEz5dDcNBxG4OseVOnBWlgg9lT1LzMdIWSjG94,5838
335
+ helm/benchmark/scenarios/thai_exam_scenario.py,sha256=5Q-KL6fVrk2FKApVyY2ulreFduwBaUG0iJOsJ8M6El8,6008
306
336
  helm/benchmark/scenarios/the_pile_scenario.py,sha256=RqU8yXQJ4FkmEc6rO9J3QMXenyUZrsEZlLAQUx4-Wnk,4995
307
337
  helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=iqL-tuqUQZjF9-DKAnI6wV-oLGC_I3aFuofdtJYHt8E,6035
308
338
  helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=CRlPxVfkg3HPZV-lUMyCUSFOiAqg5IIPt-dq3qR9LU4,2096
@@ -312,6 +342,7 @@ helm/benchmark/scenarios/vicuna_scenario.py,sha256=zLwLuEr6n9VQjVxQwgFIM-os23kJe
312
342
  helm/benchmark/scenarios/wikifact_scenario.py,sha256=cOVKgDisBdjPcmVMCLhTekdgX3hpDJUT-aKbvRSaMoM,5791
313
343
  helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=PAPkmZdC4aIBQ1k29dDvTFBEFaPV1ZR1Ifif4FHoZqs,3087
314
344
  helm/benchmark/scenarios/wmt_14_scenario.py,sha256=NArkTZntYdYlegHo_-fkzeyCUOjosOONQKlquPbZRxY,4498
345
+ helm/benchmark/scenarios/xstest_scenario.py,sha256=wpagohfuFE1juuXjq0dleSIHr5Uk6hnClIv-wABbzEI,1285
315
346
  helm/benchmark/scenarios/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
316
347
  helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py,sha256=c8zcoGCOFqBGE4TAEx1uLsUmGXw_jIS8alI99ubGeDA,5477
317
348
  helm/benchmark/scenarios/image_generation/cub200_scenario.py,sha256=7p3G4mJRc8QHR4Mw2GLsfAFuJcEe6OeZbezVhbyc55E,4103
@@ -332,99 +363,84 @@ helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,s
332
363
  helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
333
364
  helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
334
365
  helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
335
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=zXR0LmXsD2tv_ovJsbY_HP53kdiFOvty7Y_Ai3ZCrT4,3037
336
- helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=jwGEouY30Yy5U9lRUbv0XAO98gUJ669g0dhdDCGQ-8w,4097
337
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=82qplX4gJ4GsSVhBjwrsVU46TAHh-jym3F_M5A-odRE,4608
338
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=3pBAQgOsnSyMCzt60s1m8Kf_fEJ4C7XgCDbtXatTlX0,2599
339
- helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=sBQfqAxmP-Z0ifCgwTbP11aPsKA4vogcWBqSDiKlbE4,3512
340
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=7KjsXiAaiVHDRSyW08jZXNUTWogP3Sr2Og5ViT6Xz8I,3832
366
+ helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
367
+ helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
368
+ helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=lfRHjhhXCo0YeDQe4_gfSHCzVKtqQVZ6DALLABcCmtI,4637
369
+ helm/benchmark/scenarios/vision_language/exams_v_scenario.py,sha256=pLD--gtL5q7jLSWQ8iwAdsiOrTJ_rBsLbwWMWKRhPbs,3853
370
+ helm/benchmark/scenarios/vision_language/fair_face_scenario.py,sha256=V6_1Kl2nWDRyHvwnKcSxkP0DChzKDBW0i_-t9oAxps0,4721
371
+ helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=CDutFh1PHLyeMdJ9HojzYKE1zJidL9ktcsfn9uHNLZY,2612
372
+ helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=k4E6JAN8a_KT1jjV2Ch3K5YhWKJ0f-9iCXLO-_2Xl8M,3535
373
+ helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=qiLLdiSzhnSyjmqCAvMxjhcZ7yBiX37L1cdsZvHL4ds,3845
341
374
  helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py,sha256=7GK_jAOfCgRIGiN_GInDePwuT2wZqmWHp1rqdx18xQg,4994
342
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=kzZHeyWQHUphUfAixkms2t3-KKfHRjwIKi6qm-lMyXA,4728
343
- helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=Yw4zxeYgUw8HKRR5ob9QEIT0bSPrdRUULMKCo_xzFpw,4337
344
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=ly77pj_TorwM1kN2sW7Y2AIGHOBlDkdzV0STvZTBOtc,4332
345
- helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=zxtdub2akvxPYEG12pkW2c57TIFqN38C7ucAXAHAdx0,5455
346
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=XQv7uv2m6EdbI7h0-9eDag4_bL7qE_78PuHB7c4SsHA,7654
375
+ helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=HnzA0L1Mm9rw9uyK-hnCGrxo33z_U_86TLnlELjDV6E,4738
376
+ helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=7ZHpRD7TdQQ-Mp5XQV5yyiLUE0k1KpgbLSYKLBJMxs0,4343
377
+ helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=cM7eTE4bpcIzLyEDye86Ud3rD4Id-0ju73EXjg0DYoI,4340
378
+ helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=7Aa3y0TWGZH3QrPDiqIMkj83LU2Klrzgcb46jv5uytY,5498
379
+ helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=deDMdg2-ORZPV623ngncDPlRn6z6cq_QbQtMu-z0Ydo,7665
347
380
  helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py,sha256=HUO09uM2rBXOfCsxzwovmwtihq53xjuzDOtQO_S3J4I,4161
348
381
  helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py,sha256=c7YfclYMDtygsLnEfA8oP6Vl7evdrqqTZazmuD9Oy-8,5353
349
382
  helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py,sha256=HuizbYsN5Nlihfzu4bfGuC8KSBbeIc6TVknMS4kpVJY,7149
350
383
  helm/benchmark/scenarios/vision_language/originality_scenario.py,sha256=1inr-klQEz08CM2GWqbYdy-AuXQmMhOAywAlA0lJHik,1029
351
- helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=wVcTNUql4TBClgm7oyLq5cmybsnlurc0MblqRRxXRyc,9929
352
- helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=uFkzMMsjhmuSYo3v_QdfJFX6RFse83JjzMfMa3ynvV4,3975
353
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=5MwGb9BOyB2Xy70BGYZcjencf0ZskxBuzcPa7ABRuww,5106
354
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=bH5FfAgwyzpVMPOJKNCmOgpX-lvJF-B42uVi4m1mY-I,4231
355
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=2foCM7ik9RvYahauKIoNAxkGiluOYuT0w0r7FZi-MQo,3621
356
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=hJ3sOSpPnOCwLtpVnfasI_X89oofI-2PBRjMnx8eiVA,4139
357
- helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=2hY-qngKC69ZL9SHNei3IK3C2PvJDWvwLFVQ8yNSOVs,5196
358
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
359
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py,sha256=ImhfiC_y_hihAGvlj9zRsaoW614QFCBopBD2KxnbSs0,1805
360
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
361
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py,sha256=RSLYpw3BsIIxkhS-6RfVM_UhjmwJDMoA3JQl3FBjv7I,1147
362
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py,sha256=_pgW_aNaM3E7MTl_tNExupvENdtAH3DvZuSwZIiopCg,837
363
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py,sha256=ovg8-FfJ8_I1xbajFGSLvERZIA1fQjaUn0zd04ZbI84,15316
364
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py,sha256=dOt-gif-4Z0JekI2KAel4KS1zyvzqyqoFLP3xoe5DKY,9710
365
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
366
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py,sha256=i-i0mlG5oRRDNYNqP7o7Ul56iL02p_anJoThXaSvFiM,2826
367
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
368
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py,sha256=JpTiCSNcqX4wRpH6Cl07jM5wMkpZ5OeR_hjfK_V-Qok,943
369
- helm/benchmark/static/benchmarking.css,sha256=7PsUCff8YcoVxyWuALItfB4TZL55T7MbVQpoc9nZznc,2104
370
- helm/benchmark/static/benchmarking.js,sha256=lqEmoAikBwycVBf1h-et3ZmHKW_DcwxzlwmDez2A1EU,54531
371
- helm/benchmark/static/config.js,sha256=kIfkgr6gaMdFOAdqB35EvuBohq0DWYSQZbe_pTK09VM,103
384
+ helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=D3nNu3uU87eMDiMZZafuRTntXjwbqPaSDygUgQm45F8,9943
385
+ helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=gWrBG5U8uoU92JPGNm5kuzo1GekoJo1rKQaNhv6MYGA,3996
386
+ helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py,sha256=OJtiGhSN_KYgEz0VGXjCjQik_Xihtgiali70Z00XOzk,2083
387
+ helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=YNwuIMJBo7wwftx-T5tCYmGo2oy_794fZ330lkDyqb0,5171
388
+ helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=DxGZ7EL22SzxpAkuiA5twuGVTm96wG_RBg3dU3Vh_c4,4241
389
+ helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=wRa_OuOdyf-qcy9hml-Kj6YtVP5MDzeTbGcqva6LqdA,3707
390
+ helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=zCnkiSya-PHc3ywAhmw03bFdsvLCxAUwGfE6OviEXDQ,4153
391
+ helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=cC8_Vyqw2f4K4hJY-eo9ptj6ANfWgiFAK7b6OOTIPLI,5239
392
+ helm/benchmark/scenarios/vision_language/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
393
+ helm/benchmark/scenarios/vision_language/image2struct/chart2csv_scenario.py,sha256=qcs3o9dPsXoeaP0bu9UVZ6P0GPEcRLoaqABxysLN6VY,1802
394
+ helm/benchmark/scenarios/vision_language/image2struct/image2struct_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
395
+ helm/benchmark/scenarios/vision_language/image2struct/latex_scenario.py,sha256=SnZuHATg5i764MAdgaGwjIGdjCZNrOqP83Y5jE_fkHs,1153
396
+ helm/benchmark/scenarios/vision_language/image2struct/musicsheet_scenario.py,sha256=c08cquz2IALY7PlpOoEfAjupKZmn5GDVZ1H8Gbj4r8s,831
397
+ helm/benchmark/scenarios/vision_language/image2struct/utils_latex.py,sha256=jW3_c63a6u39PJGJw6lM9pIa3dnF8CQgZlPNZdH0sfs,15001
398
+ helm/benchmark/scenarios/vision_language/image2struct/webpage_scenario.py,sha256=DJQIa8NaKV-nhkXEBuY97MJ8a1O3x-Yr6hACVa-67Ns,11117
399
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
400
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/driver.py,sha256=WBFbb3N_eHIa7OFvHQS3Pmwbmkl6r9VyobxlIEKhty8,2823
401
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
402
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
372
403
  helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
373
- helm/benchmark/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
374
- helm/benchmark/static/index.html,sha256=xIJGjMg0qn9eemfdBiNbTI0jzPfBD5x0v8HJF-dMqBc,3561
375
- helm/benchmark/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
376
- helm/benchmark/static/json-urls.js,sha256=AaULgfHw8OLfrQLJpBHfcC013uavQnlNNFS9vzb0qOg,1981
377
- helm/benchmark/static/plot-captions.js,sha256=bTR8gYx-QqF_RJyKX-L-eQP7hSEtawfJSoADCvgjKag,3011
378
- helm/benchmark/static/schema_air_bench.yaml,sha256=ePZAGL4X-yH4cAQvzS5uU44duCKwdDrMwDSvCC9y7-k,139384
404
+ helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
405
+ helm/benchmark/static/schema_bhasa.yaml,sha256=R3f48oqk9Va8rtSe9B93K_rCy_IfAhHZdTh4vNDdsOY,27444
406
+ helm/benchmark/static/schema_call_center.yaml,sha256=Mt7_rLG6IT701YrjiJdNb7HpoMVkFjabrawnBieUUhM,8049
379
407
  helm/benchmark/static/schema_classic.yaml,sha256=sK3yVQCrk3Tn3Kmg9WITBmJZI7AKVjmIY0f3zgH_t0c,104611
380
- helm/benchmark/static/schema_finance.yaml,sha256=vZG0EssYr_BVZmyV4sZmRaeLFSX2ycjni8O_L_kGzzc,5283
381
- helm/benchmark/static/schema_image2structure.yaml,sha256=IV57vHTaZakH6EupIlT6PRjK8aI14OSNFYUAHD9QBxo,15593
408
+ helm/benchmark/static/schema_cleva.yaml,sha256=TDh-zcCzzTTs7bu0IWlY5dXYaTFhxly8sJIBGQdBvug,25401
409
+ helm/benchmark/static/schema_decodingtrust.yaml,sha256=2VPxzcyKYea7mx-qmswyVRjPfVatjVH4Rs3OU82mgII,15670
410
+ helm/benchmark/static/schema_ewok.yaml,sha256=MluPnZSy22wZLFB2pR7ycBRgUSvIUsqvq4qM0Vk2ur4,12113
411
+ helm/benchmark/static/schema_finance.yaml,sha256=OgsYMSFK__8ZZS96ktsgVRfM40-BhbOY15j9OlV-rNE,7010
412
+ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0yPsYgu_MB7uu5pwHE,19894
382
413
  helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
383
414
  helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
384
415
  helm/benchmark/static/schema_medical.yaml,sha256=hDk4834FKn-5cMr6pHcu1P60sh6cXJ2J0Z1ADIj2MSc,8455
385
416
  helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
386
- helm/benchmark/static/schema_tables.yaml,sha256=i4ylaq5yZoIEUvxPS8dniPQWKHZF5bz3hMgjNbzC_MM,7064
387
- helm/benchmark/static/schema_thai.yaml,sha256=25-PjBhZMHM89M01XxLQWNg0mdQnfo4H0XInF9ZzDow,7900
417
+ helm/benchmark/static/schema_safety.yaml,sha256=LEGt9EuwjHZX-oLVrBQushbL4YUQmIYpHCjlauK_tGQ,8099
418
+ helm/benchmark/static/schema_tables.yaml,sha256=PSk00UHgbMZA8xnAVE6ka2a-py_4rX7VDdodjYBqe-4,10400
419
+ helm/benchmark/static/schema_thai.yaml,sha256=yJUrevvgTJ46TpyXfNecW_B9urh7LPwSbBi_mT4ZngA,8348
388
420
  helm/benchmark/static/schema_unitxt.yaml,sha256=9FQhoueYNNYQ2xMuJ2KHzpg_9-_ZhZ9efk6jtTQ3tlc,11855
389
- helm/benchmark/static/schema_vhelm.yaml,sha256=IZ1oAmEjnoWQ6YtMpnwZ2IQkXx86bJS1j3686mvtAGc,29476
421
+ helm/benchmark/static/schema_vhelm.yaml,sha256=ryxslQJZun-HqM9ib4rp3_dBVufa01jgdo1bsHccYSk,29943
390
422
  helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
391
- helm/benchmark/static/utils.js,sha256=bgN0PT53Dregc-nLmEmAEmg2psufWpS8jTf74WoypHw,7681
392
- helm/benchmark/static/images/crfm-logo.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
393
- helm/benchmark/static/images/helm-logo-simple.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
394
- helm/benchmark/static/images/helm-logo.png,sha256=GTqbrxJr0oQXbBRq-8v6afY5zB5x0M6PhEbKRIX9qIE,280667
395
- helm/benchmark/static/images/language-model-helm.png,sha256=mG0-bkdziXeiF0wOGd67y2jnYmVKJYqhD2N5Q8VIF8Q,26563
396
- helm/benchmark/static/images/scenarios-by-metrics.png,sha256=F7g9mvIYopm-n7sDGg-7I0XCyZvloKsi2wIq1i6da_Q,51331
397
- helm/benchmark/static/images/taxonomy-scenarios.png,sha256=2MiuCLaxnuHvwsWWJHnZFc-rvoQIi_tNIjDatY7I-Dg,100766
398
- helm/benchmark/static/images/organizations/ai21.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
399
- helm/benchmark/static/images/organizations/anthropic.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
400
- helm/benchmark/static/images/organizations/bigscience.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
401
- helm/benchmark/static/images/organizations/cohere.png,sha256=7cr4LI8WK9yPryQboyWK_T5baSND-d-tVrlPNflLQMg,8757
402
- helm/benchmark/static/images/organizations/eleutherai.png,sha256=uUURFF8YWY85mwGoKVEjArO5DUBCy4es5naCXsBzn6c,4526
403
- helm/benchmark/static/images/organizations/google.png,sha256=BtmXrVQZHr3WH5c8c23ent2FO8aPWeNwO8czl22lDCo,4914
404
- helm/benchmark/static/images/organizations/meta.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
405
- helm/benchmark/static/images/organizations/microsoft.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
406
- helm/benchmark/static/images/organizations/nvidia.png,sha256=hvp1wZMwYxkfrVMvJs73PX71JwY5L8ZvxIH_fL4n6Po,27945
407
- helm/benchmark/static/images/organizations/openai.png,sha256=P4ZT5ISIlt6Dl0mOp7juSM4Y7dfyRNPqdc0PJuwNoqg,16877
408
- helm/benchmark/static/images/organizations/together.png,sha256=pmWjW4r7GnlKqFhKLPTiBeILiOighL3XzcSCsxWtB7U,48053
409
- helm/benchmark/static/images/organizations/tsinghua-keg.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
410
- helm/benchmark/static/images/organizations/yandex.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
411
423
  helm/benchmark/static_build/config.js,sha256=ER8utDIqVZi9uge7Qrk1gmlT88TOOkFF9xYp3j10m8U,165
412
- helm/benchmark/static_build/index.html,sha256=J0TrGE5-kOkopr-iSRHvvCzDL00w8Si-8OaIt9vSX0M,1149
424
+ helm/benchmark/static_build/index.html,sha256=YHWao7kJaMx9osFxRgfuCDxu-FwaBOWDhUcaAEVe7-0,1149
413
425
  helm/benchmark/static_build/assets/01-694cb9b7.png,sha256=aUy5t0DYCg4r52HDOmeNi1S2CHsnv3mE7ySokJg3Ouo,8903
426
+ helm/benchmark/static_build/assets/accenture-6f97eeda.png,sha256=b5fu2p7L_mnwg-p5jjPk1sFRwJEBRtGwXsVyQU_Runk,9537
414
427
  helm/benchmark/static_build/assets/ai21-0eb91ec3.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
415
428
  helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
429
+ helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png,sha256=bfyazxJvVs5GTSSlnm6nOb2r_jzo3TJybqF04S5Dxhw,69372
416
430
  helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png,sha256=fOEANHS8RymKaCzUWn9gQWebts2ghSmtW9Fdda_TjR8,7224
417
431
  helm/benchmark/static_build/assets/anthropic-70d8bc39.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
418
432
  helm/benchmark/static_build/assets/bigscience-7f0400c0.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
419
433
  helm/benchmark/static_build/assets/cohere-3550c6cb.png,sha256=NVDGy09xliCqZy2TKUAka-B90jVDB_VRCS9A2_sN7VU,4414
434
+ helm/benchmark/static_build/assets/cresta-9e22b983.png,sha256=niK5g8HYADkbhKM9gSVtYEdPegBS40zZXF4nNe9Fu4o,8131
420
435
  helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
436
+ helm/benchmark/static_build/assets/cuhk-8c5631e9.png,sha256=jFYx6Xx-SGYANpsSnqrlaQytYuOBOsTHhpqPJZk3EwE,30385
421
437
  helm/benchmark/static_build/assets/eleutherai-b9451114.png,sha256=uUURFF8YWY85mwGoKVEjArO5DUBCy4es5naCXsBzn6c,4526
422
438
  helm/benchmark/static_build/assets/google-06d997ad.png,sha256=BtmXrVQZHr3WH5c8c23ent2FO8aPWeNwO8czl22lDCo,4914
423
439
  helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
424
440
  helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
425
441
  helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
426
- helm/benchmark/static_build/assets/index-30dbceba.js,sha256=WXT0A-yH9f-3wCwQ3rwKWTCIOOpjETQwOQyZt2OMAwc,77064
427
- helm/benchmark/static_build/assets/index-66b02d40.css,sha256=ZrAtQOMv7vRJwOA9urNRk_rs8hJljom_xhn-wI89g08,486795
442
+ helm/benchmark/static_build/assets/index-05c76bb1.css,sha256=BcdrsQgUFadqYf5z-wdFNosV_c2MlxV8xktld2BFKBk,489017
443
+ helm/benchmark/static_build/assets/index-58f97dcd.js,sha256=XJY99lqQJAVIYis7oEhi6Hl4drYXcG2WDGUCAGX1YVg,91191
428
444
  helm/benchmark/static_build/assets/meta-5580e9f1.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
429
445
  helm/benchmark/static_build/assets/microsoft-f5ee5016.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
430
446
  helm/benchmark/static_build/assets/mistral-18e1be23.png,sha256=GOG-Ix7XlctGOUmvJfO2oVSBM7E5O562G88OnoxsjBw,14402
@@ -434,33 +450,29 @@ helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUI
434
450
  helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
435
451
  helm/benchmark/static_build/assets/react-d4a0b69b.js,sha256=rNTpl8Is3LkYXqJowRMc8vc4SXQwP94Ozy4DZZWwldU,275141
436
452
  helm/benchmark/static_build/assets/recharts-6d337683.js,sha256=rDrVmtTCCSLY2hpcxSDxhlQ6CQmTTSQOESNeO3oVQgg,432466
453
+ helm/benchmark/static_build/assets/scb10x-204bd786.png,sha256=IEvXhlxgBA9NCH4RrGWJkMx0Yc7V9EK6o7vrAI5KZCE,4990
437
454
  helm/benchmark/static_build/assets/tii-24de195c.png,sha256=JN4ZXAa0rbR2IlxPfd_mKtntFZcYpDcXocSiqrC2rNg,63389
438
455
  helm/benchmark/static_build/assets/together-a665a35b.png,sha256=pmWjW4r7GnlKqFhKLPTiBeILiOighL3XzcSCsxWtB7U,48053
439
456
  helm/benchmark/static_build/assets/tremor-54a99cc4.js,sha256=x_K5Bp7szI2zsvESrKqffUOHbm8ohjjvuoIeY_yD_CA,293015
440
457
  helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
441
458
  helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png,sha256=zedhimhku2Q3QIvaRSYlUAQ0b5ia9pU4cFzKnABfr4c,118544
442
459
  helm/benchmark/static_build/assets/vhelm-model-6d812526.png,sha256=bYElJoVkSaMJ_lFZj5qoSrIbygbNyBk35q89jtFRet8,168494
460
+ helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png,sha256=qGpsSjEu7HFlPAk_zXuUEdDqj0wkCfFHA1bCtu8Ugdw,8531
443
461
  helm/benchmark/static_build/assets/yandex-38e09d70.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
444
462
  helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
445
- helm/benchmark/window_services/ai21_window_service.py,sha256=1ZDLJv73bxoLj_MzEBu4TgH5xHw-hx0nI6KX6RU73kE,12593
446
- helm/benchmark/window_services/cohere_window_service.py,sha256=5jm8o5ZYrbDUluA5LbMWLOOrOlIuHR7MhAJkOuzBagM,4750
447
463
  helm/benchmark/window_services/default_window_service.py,sha256=F099qF-YeM7YPVtph0dRFPry5vP8_BiudHTy2CpuICQ,151
448
464
  helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=EU3QevFOiQYBN2te54FsVRnGYZdgDxK6KqOWSQOa0q4,2125
449
465
  helm/benchmark/window_services/ice_window_service.py,sha256=9NeBN_tmOvwrK1miUnX3wJA70BP5ifIIeHpNR2gVwls,1070
450
466
  helm/benchmark/window_services/local_window_service.py,sha256=e9JHG72kFWlV6UKg_IhRCipOWQUrOD6ZjsT-_Mwewps,5232
451
467
  helm/benchmark/window_services/no_decoding_window_service.py,sha256=s_i_cqIuU9p0GDRIBApaOHzjH7gHrBPTJ2X5NEcN33Y,1375
452
- helm/benchmark/window_services/test_ai21_window_service.py,sha256=HkpNSaJAClZfaa-bQZ2BrRm1UB_u4sLAGSBlGQqRUD4,8221
453
468
  helm/benchmark/window_services/test_anthropic_window_service.py,sha256=lnxLiW5BPaWN6m03L93qCFugsxnVBbLmYPCarlrO-So,4196
454
469
  helm/benchmark/window_services/test_bloom_window_service.py,sha256=x7WBh0S223ABC9KvL2-y9G-cUxFUPm6oIkqvYO_4mt8,4288
455
- helm/benchmark/window_services/test_cohere_window_service.py,sha256=rKXnw2E7MLAtkLgtrUvnZuQp99_agDO4qcpb3daik-E,3348
456
- helm/benchmark/window_services/test_cohere_window_service_utils.py,sha256=sf25f9MeXzoqsbDzZ7d7le13hm8RkDe54nhLtKF2pqo,158150
457
470
  helm/benchmark/window_services/test_flan_t5_window_service.py,sha256=IhQMWBq2d39O3uNKGwbaMWJkz8585Zc-J_yqvPJfwu4,695
458
471
  helm/benchmark/window_services/test_gpt2_window_service.py,sha256=2UHKt4Wmh6XmSCdepjuMbZHFpb1oUcrKRSxcdOzBE1s,2671
459
472
  helm/benchmark/window_services/test_gpt4_window_service.py,sha256=tV5WdpxYxewchEp1rnsIlEfdJFrHVFKYQ-_8NhGK2yo,1052
460
473
  helm/benchmark/window_services/test_gptj_window_service.py,sha256=0lu4Os_3x3N-AbejG3LZ3-_ikxEHg1Lbmfq-Pzg_D9Y,2374
461
474
  helm/benchmark/window_services/test_gptneox_window_service.py,sha256=8CaOW_ln9bxKA4--dVLfLdsASo6RrR7ouP6EcSruzdA,4210
462
- helm/benchmark/window_services/test_ice_window_service.py,sha256=1DudvCYh4te_UDLg14XeXwKUoin9QnCgZ_PSCwxxaM0,23579
463
- helm/benchmark/window_services/test_openai_window_service.py,sha256=W_QJKaMgzYU7qGFuSS6JeM_f50UX0SuHpkH-u2bEvI4,2312
475
+ helm/benchmark/window_services/test_openai_window_service.py,sha256=Mt-dDtjQmz25n7hwNVyy1T_rl0TMvcvJfuhWNe_AvSw,2314
464
476
  helm/benchmark/window_services/test_opt_window_service.py,sha256=Gh1GzWnlgYIGwDNBw4EnHds3fXwMaSjzkfFXeLn47os,4215
465
477
  helm/benchmark/window_services/test_palmyra_window_service.py,sha256=yy7D2C0ZzExCbptYNsEI9zuX2AEGsEUTj0a_vbqub4o,4212
466
478
  helm/benchmark/window_services/test_t0pp_window_service.py,sha256=pvp55FyqjunkDpHVAhPup3h-iNkepQpxyr4nC87-5iY,3998
@@ -479,10 +491,10 @@ helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,s
479
491
  helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
480
492
  helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
481
493
  helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
482
- helm/clients/ai21_client.py,sha256=LIdkmzcUDR9uIF2tIk5YgDNGNmfQ9JDYmgscvFoCHDs,5509
494
+ helm/clients/ai21_client.py,sha256=PYyqpbnMK1l18Rv_qhE5KdHHqZHgHePaJtJOowTyG7I,8128
483
495
  helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
484
496
  helm/clients/aleph_alpha_client.py,sha256=koPqXF6uRD905atoiCaPg5yxr6B25J0g2OTWk8geebQ,4969
485
- helm/clients/anthropic_client.py,sha256=wptP4u4NhQknoy7VQsWqVzn9tv3IrCuJ3vUMq6fiq0E,34909
497
+ helm/clients/anthropic_client.py,sha256=s3eCwHh8mbhxLi8up1WtQWKkUsHJa-LO44prNd7XYFc,34059
486
498
  helm/clients/auto_client.py,sha256=uK9EWQFWBt4DoV1oytm0dIeA3YpcfGi_H0rCRZSVE8c,11438
487
499
  helm/clients/bedrock_client.py,sha256=BsH9UopsP6ZHf-K0Yzg1PYSMLDwY0yIUmPHDhJVMUi0,5293
488
500
  helm/clients/bedrock_utils.py,sha256=okZ6Z8pviGOUNlrdF2QquAqFs8-QYgcqci95eij8giM,2574
@@ -494,20 +506,21 @@ helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,32
494
506
  helm/clients/google_client.py,sha256=EOpPzK5_9yzWkMjK-4ILiixDF3aeOa8AbR2SPnEO-nw,2900
495
507
  helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
496
508
  helm/clients/http_model_client.py,sha256=DBgkVDZPmg99DCcO_1Xdf6nFQo2kyxLkgoQpwC-wkHI,2806
497
- helm/clients/huggingface_client.py,sha256=xmdqOWoioqoYQjtBqJFN-K9Fm3oHEQrOEjyzDz4ZWBY,15847
509
+ helm/clients/huggingface_client.py,sha256=k-8J4nnDbve8UtGsa0RytWhS9IpAy8hoJAUw4nRZTMI,15734
498
510
  helm/clients/lit_gpt_client.py,sha256=Sjec16bNODosEhDoBkRc4t-LNS-nCUY_jVivWj5zvfU,6205
499
511
  helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
500
512
  helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
501
513
  helm/clients/mistral_client.py,sha256=thOLMcEfrzWR00JUabIZ_PnW2o9YZsdSmNf9z3jbYKo,5982
502
514
  helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
515
+ helm/clients/nvidia_nim_client.py,sha256=f3ZWoTnJmBIFeWsHeUDaTCbDZLK_kdlUWNO1hWumUOo,987
503
516
  helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
504
- helm/clients/openai_client.py,sha256=tXxi9nZsxz2I4YQLrQrV-GhlgZ1Z9ifrUhC_3Aw5SPE,14238
505
- helm/clients/palmyra_client.py,sha256=LBYFHNc5LdpPbiSp1AAHuMm8cUUCQ2EB03BB6XnDTYQ,6551
506
- helm/clients/perspective_api_client.py,sha256=WQDArqlKVWwcK2SicnSIAgV6JGVHsxibTzkdezT3z_U,5920
517
+ helm/clients/openai_client.py,sha256=faWpoZjKxQu3EoeYwMz0deesFlH9VTVIjJ2W74c3gxY,14117
518
+ helm/clients/palmyra_client.py,sha256=XBfrTE-mxiYhLF2EXqd87DckfuZ4mwVLoI_Qif_p5KA,7223
519
+ helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
507
520
  helm/clients/reka_client.py,sha256=K8b9p7U6LLAy4PRjgYrUS06gF4G2xjhjRoMEO4XDe0o,8329
508
521
  helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
509
522
  helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
510
- helm/clients/test_client.py,sha256=V7Y56Ahqa8C2Kc2_W2QE0VfGbBEJzFmnic3LGHZkOqQ,3940
523
+ helm/clients/test_client.py,sha256=6cLpQc2IMR5o7iBxZYPvoRtHJa5i0E7JHh1VKaCtfBw,3842
511
524
  helm/clients/test_huggingface_client.py,sha256=x2NjMuIrinfUy0wQ1S6F5cYZVr09YfvN6LfhWmyGNAM,3388
512
525
  helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
513
526
  helm/clients/test_together_client.py,sha256=yYNrhU3kQjmHwhILuoP5QwUgbmkm2gg2NHiNycHjoeE,6145
@@ -576,9 +589,9 @@ helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
576
589
  helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=hTywh5nM95BmPoDyKOSDWg9G3-QwLO3KZEJZVkmFroo,6478
577
590
  helm/clients/vision_language/huggingface_vlm_client.py,sha256=H7AE8mm506PkEcUO8VaLVtptHTwVX58nZx1A_BWdKzA,4968
578
591
  helm/clients/vision_language/idefics_client.py,sha256=hi1VCDBegHfBssmW0C62H3OX3U2ISVRhaSkd24gb1K4,7692
579
- helm/clients/vision_language/open_flamingo_client.py,sha256=CkN0JCeR742ZG9Nc4A85hp4BSE0WLU-3Rs-ZwdmDkzs,6632
592
+ helm/clients/vision_language/open_flamingo_client.py,sha256=QH6el-wkEl4PMZM9b3_H-o2PRaMvumGbN29ee9dmkMU,6519
580
593
  helm/clients/vision_language/paligemma_client.py,sha256=IU_T8r1RgpGkEAqabLKBbmoUOWV6c1a9_FXgiTy8exE,6835
581
- helm/clients/vision_language/palmyra_vision_client.py,sha256=mY6vj918f-tbqhOmh7PCSEgnSpHzWY8UTqAdvYgXJ8Q,3757
594
+ helm/clients/vision_language/palmyra_vision_client.py,sha256=4elEdmwllMr2qzTzBdlRC8L5Ut3vOXFtanGGYrx4lv8,4074
582
595
  helm/clients/vision_language/qwen_vlm_client.py,sha256=6rCH4gJMDyQHyjAE_GDIrLsInH_bvd6to-4RMWbRLeM,7407
583
596
  helm/clients/vision_language/open_flamingo/__init__.py,sha256=i1tGJj6ckeE6eS1EWV5tbQKYLmPCrdSI45mPchfv_Ic,88
584
597
  helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -601,17 +614,17 @@ helm/common/general.py,sha256=nMfHNPXyAAorAMmgDClD8r8XXeJcvfF0QXTP-FgH5PQ,11690
601
614
  helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
602
615
  helm/common/hierarchical_logger.py,sha256=EnKLnfbQftca08EJfjGEQb4tcnCKbx-JtwLnoCnhMQs,2908
603
616
  helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
604
- helm/common/images_utils.py,sha256=bsxgW9knrfa9NTa6V-O13_nDnflqrqHpnKlTRxul-aY,3187
617
+ helm/common/images_utils.py,sha256=icE0tH9P3FT_qggfbi8vVwkmIjOAN5l3HcGDF9gmNnY,3345
605
618
  helm/common/key_value_store.py,sha256=iHi1WQuWttLNJnuM48QNOAXHoneNbmbBmtXYPq-dyys,3147
606
619
  helm/common/media_object.py,sha256=3VZqfb0py5dDKwWtnLp2kdl8svaike-Cn7Mjk-b0cvM,5130
607
620
  helm/common/moderations_api_request.py,sha256=3xTsErSsCr2PHD2jpdV1JglHaYHwP2Yqu25_JFtfa68,2234
608
- helm/common/mongo_key_value_store.py,sha256=yK1qyh1RgKB_hYMD1BA6hQw6oGJdrALPMpqqlkn7h0M,3811
621
+ helm/common/mongo_key_value_store.py,sha256=Qky55n8jkbJb8oIw6UCLnCbJoUR3H3yBZV7J8wVu1Ns,3878
609
622
  helm/common/multimodal_request_utils.py,sha256=GNZQQCcwsARyFCO-uoeeglyK2PEfC4MjClAKDeKqokk,1404
610
623
  helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
611
624
  helm/common/object_spec.py,sha256=_usgTDQULBF6_jy7C6m-9ZNVvNxbGoTE_CdGcSvBASU,4327
612
625
  helm/common/optional_dependencies.py,sha256=Qam3QCHff8tuXbS-fCw-MVe-pK18gSvHw-uQoXXxT7M,616
613
626
  helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
614
- helm/common/request.py,sha256=B94Dey42OJZ5lgcf71KsGW2nKo8eB4My_pj6tDkIQOg,8012
627
+ helm/common/request.py,sha256=Z_YUd77WQ15yeSN8YYdT48dI4ehUc869KuaDisAiyIA,8806
615
628
  helm/common/test_cache.py,sha256=XqboYHQAkFWIHPsuIjuageRSLeN7QoATKF7wwxggPqE,7054
616
629
  helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
617
630
  helm/common/test_general.py,sha256=c8Lh0mK8I-SfcMprq909B6zWRBxSBngq2nNL1L6-cYA,1788
@@ -622,13 +635,13 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
622
635
  helm/common/file_caches/local_file_cache.py,sha256=wBOAbbkGLiClaX4YdunokRfSQCKNkTYmMVx2KTLy4Lc,1921
623
636
  helm/common/file_caches/test_local_file_cache.py,sha256=bOCWR9MglwQXV98xk8auyjgFxaOr85zRdxWwxMBQW9s,663
624
637
  helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
625
- helm/config/model_deployments.yaml,sha256=x4j3LMGHTV3jObKK0dT5SOtKJvReWOHyyjs6jV2D2L0,89739
626
- helm/config/model_metadata.yaml,sha256=M7EsOSnf4tcrSlNYBT50SiC6mReXfZ1q5rt7_OpdzpU,138011
627
- helm/config/tokenizer_configs.yaml,sha256=lBGPsRPRPeqlN_j194hEVP8HAMC6J5NLrIZpN95Y8ug,15078
638
+ helm/config/model_deployments.yaml,sha256=_Yeji7Zz8XfyYGJzrTEFzIDL1hpVPcv_mPDvANKSGQ8,89215
639
+ helm/config/model_metadata.yaml,sha256=E2Rg5_4kR3RGtjz9XaSKg_B7nfz9KgtqGXWgXw7bLWI,158654
640
+ helm/config/tokenizer_configs.yaml,sha256=RD7lrDgoEW-foqJI0QxLo4XPHS7G8HyuaB3r4rwIK6Q,18761
628
641
  helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
629
642
  helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
630
643
  helm/proxy/cli.py,sha256=l8F7UYqrIOoBD9ZCIxJFA4fhxlzhae0-2Nn8A7FMkzk,8244
631
- helm/proxy/example_queries.py,sha256=uYc05CIhTzFCjLPdkWqtOweyHqsRUX4s6ByP8wJbjVk,4650
644
+ helm/proxy/example_queries.py,sha256=rVGmQ2ej4OS7m5Y3uI5dp9Mfdw6bv53c0o2QknsmYes,4379
632
645
  helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
633
646
  helm/proxy/retry.py,sha256=iLZmKATEJQa9jsSpOIx6YDRhmrA8G1Qm21cUxCuo2Ug,3490
634
647
  helm/proxy/server.py,sha256=V05YdMy0lZqYfYkxLDqksGYe-8CIFa6Jg8aSb8YHM7I,10753
@@ -645,30 +658,29 @@ helm/proxy/critique/scale_critique_client.py,sha256=B4povtceyfal95eE3N7em9cC_B5V
645
658
  helm/proxy/critique/surge_ai_critique_client.py,sha256=HnzgAoF4Du9Me0GS_lbNaozZslS4a2OZx735gh-coo0,8357
646
659
  helm/proxy/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
647
660
  helm/proxy/services/remote_service.py,sha256=emYN0qWOJLQ7q1n06V4TwlvXaqylQcUxmqDcGZXqPJ8,9097
648
- helm/proxy/services/server_service.py,sha256=U-1g0VMjCY9bBK8BecbUxVzSx7hyC_rpwSNm67bqmCg,11534
661
+ helm/proxy/services/server_service.py,sha256=SPaiP4D4zYwaNKaULugNtDCYxz1HqgoUPcI7BU-eS64,11469
649
662
  helm/proxy/services/service.py,sha256=Be-Z5F6AN4vMzsJr3BS6tJ9NHHy_dc_yn2Ex9cm0ChU,6193
650
663
  helm/proxy/services/test_remote_service.py,sha256=NFnLjg3QNHoDKdK0DlcrtylwlKXx1vdzheNZRrLEv7c,6605
651
664
  helm/proxy/services/test_service.py,sha256=FUZoI8pGiUg5adgB1wTJ869QOgFYjPtM6yf6FGMdE64,8968
652
665
  helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
653
666
  helm/proxy/token_counters/auto_token_counter.py,sha256=34PWvF96DurTrUtUxW4Td5VNV1_BhAebCkXQLl3xp4M,2046
654
- helm/proxy/token_counters/test_auto_token_counter.py,sha256=lDe1lXa5keRi1iLsKz2aBtoQyQ1fycYymZcGvshWvUk,8609
667
+ helm/proxy/token_counters/test_auto_token_counter.py,sha256=LO3H_NbVeoeaMmEuFNCmhoEWKjWVvxeW5U4yTKfE-84,8590
655
668
  helm/proxy/token_counters/token_counter.py,sha256=TCij1Cp08RoFTLLLdjNPoaeDGHpA1A2hQsrRV775Kf4,425
656
669
  helm/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
657
- helm/tokenizers/ai21_tokenizer.py,sha256=oXImuAY9kMohHH6Zm7BWysfT88b00NBoSELeGQ920y4,2255
670
+ helm/tokenizers/ai21_tokenizer.py,sha256=CE-u39ZY5Y4XQHONpiPHKK7uvEmySYLBQi2n70OV004,2059
658
671
  helm/tokenizers/aleph_alpha_tokenizer.py,sha256=UlWC_SjObBvexpZ3OfKZT2yjhbSsHlKjQe_oWuRrXno,3818
659
672
  helm/tokenizers/anthropic_tokenizer.py,sha256=d-HO9OEFkhYzFZu0VkOsHjxbqqSUseCNX0KQqgb3s2Q,2114
660
673
  helm/tokenizers/auto_tokenizer.py,sha256=Of-T-CFOhLAjjU45T1hnrEPG_k_hzPufuDE7FRAcSN8,4251
661
674
  helm/tokenizers/caching_tokenizer.py,sha256=kSegrCFotRevSDgJsn0g52dWiSUCNa7_EZpRNrELeUE,8163
662
- helm/tokenizers/cohere_tokenizer.py,sha256=6rahykq1SxqS8vCWOzYo_oeUoVwhg_zOfWFIkQxP6GY,5632
675
+ helm/tokenizers/cohere_tokenizer.py,sha256=6WwHIt7SsICmYR2QQpwDJ7pfNF8VWrFHFxF5Kynq6aY,2116
663
676
  helm/tokenizers/http_model_tokenizer.py,sha256=wBTtDA2UdEYspffa1wqgkT3y3YHoyLXXoucnJ5PGjhs,3109
664
- helm/tokenizers/huggingface_tokenizer.py,sha256=_XXx8uApENK7-o81qxEn0SOeJL_L2UpiiuteSYiODpE,8734
665
- helm/tokenizers/ice_tokenizer.py,sha256=4ZTIRpmt2cqwcxnmrDpCRhiJ0BI3ELE-GHoBuHWgrDA,1200
677
+ helm/tokenizers/huggingface_tokenizer.py,sha256=vmzcbgzMMlwx1x2n0syyp6KuN47nskgoP9yi1BNEGMQ,8696
666
678
  helm/tokenizers/lit_gpt_tokenizer.py,sha256=LMrpaje64UmnDKoYjPG_RQeXVA4xQUwW5t48IJIeLaQ,1660
667
679
  helm/tokenizers/simple_tokenizer.py,sha256=6_NROqVbygs-HRA7bYAZluN4YB5gUhVaRsYQeRTjA1E,1147
680
+ helm/tokenizers/test_ai21_tokenizer.py,sha256=V8orjdKxmEV44VYoZ9Sq5E7CIq2caNnr6vjdk0T_w1A,1646
668
681
  helm/tokenizers/test_anthropic_tokenizer.py,sha256=_wzXp9FVR2Ml0s2A79TTXbSPHyTRp28i9tiEyQ9S6Ko,3792
669
682
  helm/tokenizers/test_cohere_tokenizer.py,sha256=15z2GJtZ-VlrliC2_Fk5DIZhQYFkJS7J73fjxYMf8YM,1431
670
683
  helm/tokenizers/test_huggingface_tokenizer.py,sha256=8tFyZQb4DLg6MdKg13a66bLbp0yf4Ar1fGWM_sYeSjg,6309
671
- helm/tokenizers/test_ice_tokenizer.py,sha256=-xi_f8TBSkAYr5CcA56HDq7rZ9HAGd99J7twNfkLzFU,2619
672
684
  helm/tokenizers/test_simple_tokenizer.py,sha256=vUNdcnJqZV99-E8H1rwUH85AQPJ2HTnDr5DrZ_-zRL4,1219
673
685
  helm/tokenizers/test_yalm_tokenizer.py,sha256=qWpKnUuAlePd6t-UJB_mAiBwtAacnC8caKXLJ_GdTkk,2477
674
686
  helm/tokenizers/tiktoken_tokenizer.py,sha256=FU2g_FF0pVoyspYhHcz3SyCBGNbsTby-nWVrj0Cq4_c,1265
@@ -679,9 +691,9 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
679
691
  helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=W9p5QNn1GSm-y85yVEQe_82zn5CVK_vR6jvhk7JTs_k,869
680
692
  helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
681
693
  helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
682
- crfm_helm-0.5.2.dist-info/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
683
- crfm_helm-0.5.2.dist-info/METADATA,sha256=g-tT_a7wm7L7iaNCQVwNIrpUnVHK8PKfbXjel0KyhmQ,19591
684
- crfm_helm-0.5.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
685
- crfm_helm-0.5.2.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
686
- crfm_helm-0.5.2.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
687
- crfm_helm-0.5.2.dist-info/RECORD,,
694
+ crfm_helm-0.5.3.dist-info/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
695
+ crfm_helm-0.5.3.dist-info/METADATA,sha256=JNa1JuzCQTPbczD-UfRLsa_f8OW7JT1zHQML-ilNh_c,19060
696
+ crfm_helm-0.5.3.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
697
+ crfm_helm-0.5.3.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
698
+ crfm_helm-0.5.3.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
699
+ crfm_helm-0.5.3.dist-info/RECORD,,