crfm-helm 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (209) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/METADATA +81 -112
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/RECORD +165 -155
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +57 -0
  10. helm/benchmark/annotation/call_center_annotator.py +258 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +55 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +37 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +36 -44
  18. helm/benchmark/annotation/model_as_judge.py +96 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +50 -0
  20. helm/benchmark/annotation/xstest_annotator.py +100 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +79 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/unitxt_metrics.py +17 -3
  30. helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
  31. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  32. helm/benchmark/model_metadata_registry.py +3 -3
  33. helm/benchmark/presentation/create_plots.py +1 -1
  34. helm/benchmark/presentation/schema.py +3 -0
  35. helm/benchmark/presentation/summarize.py +106 -256
  36. helm/benchmark/presentation/test_run_entry.py +1 -0
  37. helm/benchmark/presentation/test_summarize.py +145 -3
  38. helm/benchmark/run.py +15 -0
  39. helm/benchmark/run_expander.py +83 -30
  40. helm/benchmark/run_specs/bhasa_run_specs.py +652 -0
  41. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  42. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  43. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  44. helm/benchmark/run_specs/finance_run_specs.py +82 -1
  45. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  46. helm/benchmark/run_specs/vlm_run_specs.py +100 -24
  47. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  48. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  49. helm/benchmark/scenarios/bhasa_scenario.py +1942 -0
  50. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  51. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  52. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  53. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  54. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  55. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  56. helm/benchmark/scenarios/raft_scenario.py +1 -1
  57. helm/benchmark/scenarios/scenario.py +1 -1
  58. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  59. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  60. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  61. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  62. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  63. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  64. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  65. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  66. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  67. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  68. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  69. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  70. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  71. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  72. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  73. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  74. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  75. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  76. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  78. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  79. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  80. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  81. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  82. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  83. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  84. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  85. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  86. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  87. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  88. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  89. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  91. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  92. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  93. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  94. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  95. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  96. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  97. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  98. helm/benchmark/server.py +1 -6
  99. helm/benchmark/static/schema_air_bench.yaml +750 -750
  100. helm/benchmark/static/schema_bhasa.yaml +709 -0
  101. helm/benchmark/static/schema_call_center.yaml +232 -0
  102. helm/benchmark/static/schema_cleva.yaml +768 -0
  103. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  104. helm/benchmark/static/schema_ewok.yaml +367 -0
  105. helm/benchmark/static/schema_finance.yaml +55 -9
  106. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  107. helm/benchmark/static/schema_legal.yaml +566 -0
  108. helm/benchmark/static/schema_safety.yaml +266 -0
  109. helm/benchmark/static/schema_tables.yaml +149 -8
  110. helm/benchmark/static/schema_thai.yaml +21 -0
  111. helm/benchmark/static/schema_vhelm.yaml +137 -101
  112. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  113. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  114. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  115. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  116. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  117. helm/benchmark/static_build/assets/index-3ee38b3d.js +10 -0
  118. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  119. helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
  120. helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
  121. helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
  122. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  123. helm/benchmark/static_build/index.html +2 -2
  124. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  125. helm/benchmark/window_services/tokenizer_service.py +0 -5
  126. helm/clients/ai21_client.py +71 -1
  127. helm/clients/anthropic_client.py +7 -19
  128. helm/clients/huggingface_client.py +38 -37
  129. helm/clients/nvidia_nim_client.py +35 -0
  130. helm/clients/openai_client.py +18 -4
  131. helm/clients/palmyra_client.py +24 -0
  132. helm/clients/perspective_api_client.py +11 -6
  133. helm/clients/test_client.py +4 -6
  134. helm/clients/together_client.py +22 -0
  135. helm/clients/vision_language/open_flamingo_client.py +1 -2
  136. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  137. helm/common/cache.py +8 -30
  138. helm/common/images_utils.py +6 -0
  139. helm/common/key_value_store.py +9 -9
  140. helm/common/mongo_key_value_store.py +5 -4
  141. helm/common/request.py +16 -0
  142. helm/common/test_cache.py +1 -48
  143. helm/common/tokenization_request.py +0 -9
  144. helm/config/model_deployments.yaml +444 -329
  145. helm/config/model_metadata.yaml +513 -111
  146. helm/config/tokenizer_configs.yaml +140 -11
  147. helm/proxy/example_queries.py +14 -21
  148. helm/proxy/server.py +0 -9
  149. helm/proxy/services/remote_service.py +0 -6
  150. helm/proxy/services/server_service.py +6 -20
  151. helm/proxy/services/service.py +0 -6
  152. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  153. helm/tokenizers/ai21_tokenizer.py +51 -59
  154. helm/tokenizers/cohere_tokenizer.py +0 -75
  155. helm/tokenizers/huggingface_tokenizer.py +0 -1
  156. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  157. helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
  158. helm/benchmark/data_overlap/export_scenario_text.py +0 -119
  159. helm/benchmark/data_overlap/light_scenario.py +0 -60
  160. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  161. helm/benchmark/static/benchmarking.css +0 -156
  162. helm/benchmark/static/benchmarking.js +0 -1705
  163. helm/benchmark/static/config.js +0 -3
  164. helm/benchmark/static/general.js +0 -122
  165. helm/benchmark/static/images/crfm-logo.png +0 -0
  166. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  167. helm/benchmark/static/images/helm-logo.png +0 -0
  168. helm/benchmark/static/images/language-model-helm.png +0 -0
  169. helm/benchmark/static/images/organizations/ai21.png +0 -0
  170. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  171. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  172. helm/benchmark/static/images/organizations/cohere.png +0 -0
  173. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  174. helm/benchmark/static/images/organizations/google.png +0 -0
  175. helm/benchmark/static/images/organizations/meta.png +0 -0
  176. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  177. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  178. helm/benchmark/static/images/organizations/openai.png +0 -0
  179. helm/benchmark/static/images/organizations/together.png +0 -0
  180. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  181. helm/benchmark/static/images/organizations/yandex.png +0 -0
  182. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  183. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  184. helm/benchmark/static/index.html +0 -68
  185. helm/benchmark/static/info-icon.png +0 -0
  186. helm/benchmark/static/json-urls.js +0 -69
  187. helm/benchmark/static/plot-captions.js +0 -27
  188. helm/benchmark/static/utils.js +0 -285
  189. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  190. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  191. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  192. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  193. helm/benchmark/window_services/ai21_window_service.py +0 -247
  194. helm/benchmark/window_services/cohere_window_service.py +0 -101
  195. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  196. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  197. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  198. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  199. helm/tokenizers/ice_tokenizer.py +0 -30
  200. helm/tokenizers/test_ice_tokenizer.py +0 -57
  201. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/LICENSE +0 -0
  202. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/entry_points.txt +0 -0
  203. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/top_level.txt +0 -0
  204. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  205. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  206. /helm/benchmark/{data_overlap → scenarios/vision_language/image2struct}/__init__.py +0 -0
  207. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  208. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct/webpage}/__init__.py +0 -0
  209. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -7,15 +7,15 @@ helm/benchmark/data_preprocessor.py,sha256=aNdM-o2t4qkLIQHiQeWUFg03DjjJ8HTBIphYC
7
7
  helm/benchmark/executor.py,sha256=simd7SdJ7TciUpoq3D0uz_XUSCZj5KIWCIP57FYm4js,4906
8
8
  helm/benchmark/huggingface_registration.py,sha256=unEBO21V8K3-Ya0xLqjO9H1oq7RmU-f1MYV0tCIbXzY,4578
9
9
  helm/benchmark/model_deployment_registry.py,sha256=BjL0ghHgO7_Z5jZZ7kuSOj9saegI3BivaL-b699C0rc,9527
10
- helm/benchmark/model_metadata_registry.py,sha256=fXRJOLUIrLOHUG5duncEqhnpmfb9hyloUlGbOM2L9ds,8194
10
+ helm/benchmark/model_metadata_registry.py,sha256=m39FqNaGdxP4r7W7Vmq6r-gOLjYtn_5WmRNsGzci6d8,8283
11
11
  helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
12
- helm/benchmark/run.py,sha256=WNj10uNCqxwS2pCmt_s5Bn_JIC-NItEjK1PyQl9SXmo,13193
13
- helm/benchmark/run_expander.py,sha256=sWfcL0caHTsp1NqqsGrG-fZaIbScY8LECJqQMVIPZtE,51191
12
+ helm/benchmark/run.py,sha256=cPJh1Rwit8E_Kjf8Te2D75cd19ag4WgS2YrHHu2Fc8Q,13997
13
+ helm/benchmark/run_expander.py,sha256=L9jvRjy3DGuNytA2eYQGBV-8VL_G8sry18FZ3OQoIlU,53323
14
14
  helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
15
15
  helm/benchmark/run_spec_factory.py,sha256=hp29n_Stb7RMwRm2jrP_qpyzxi8X8ojdqXTFN3KRSiY,6978
16
16
  helm/benchmark/runner.py,sha256=zlHDJ2Ys5-HxtXcwpkXcrdfXy_i886fBcq1iNeLyC3Q,14669
17
17
  helm/benchmark/runner_config_registry.py,sha256=2gW5wBLkHdYb2WNbZulto06hTcto2ROvjy8HULw3jNM,515
18
- helm/benchmark/server.py,sha256=ysd5MT1TDu65NH-OzIGf9wmZlr8FHNRwoy2ybjSc5Yk,6140
18
+ helm/benchmark/server.py,sha256=kaGpUzBwzprmTDiMcy8-sfT8KfVEOb0wWytWODsAQ94,5925
19
19
  helm/benchmark/slurm_jobs.py,sha256=eNCAoaWDfT0Wk32ZJRIGo-x8kgjhDPnPB4Xrvw_eLB0,3225
20
20
  helm/benchmark/slurm_runner.py,sha256=Tozimrjr2R6mlKHcmrGgxTy9ga-ArIW6AoAWtxqzw-M,16567
21
21
  helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5drG-4Y4UhIM,2219
@@ -23,7 +23,7 @@ helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCv
23
23
  helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
24
24
  helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  helm/benchmark/adaptation/adapter_spec.py,sha256=K5BwqTe2iimjswdw_SONlJo0xt-T-o5KH7VqxrPaov0,5072
26
- helm/benchmark/adaptation/common_adapter_specs.py,sha256=-ILsVxWjpEE6an1ncrRRrLkdP5ky_-2GN1TxSxJo38M,10449
26
+ helm/benchmark/adaptation/common_adapter_specs.py,sha256=Er8aMbDi8RTBtGWjcI08E2mRDl5AoBzUaBT1EY38Nlw,10515
27
27
  helm/benchmark/adaptation/prompt.py,sha256=n0Ka3RGSWMr3CBnJrPNPy626x9TJE3k677wKbG8hO9A,2133
28
28
  helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
29
29
  helm/benchmark/adaptation/scenario_state.py,sha256=mWEhgzk18SVoMEuj2pSnc_r9JrGAHLdOlteHJKUMA5k,1961
@@ -35,12 +35,12 @@ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=F7Aou6r9CZ1xEuAX
35
35
  helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=BbcBEJjY8Cp58me9sUktd2p3dEVFL8ZJ7RFfus3hSYE,14997
36
36
  helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=LhZHmciP8lAfu7T0p634GOPTHrJR7qRCRRIxPgVlW9E,14873
37
37
  helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=VJ66MfIGQWJg0VXCV0MJEMwF9Jx1DeJ7RxsgYlOTx_4,1889
38
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=k8wSxv9pK8wtbQNBzWYPkGEUKJb8tcVi41Y1M3fPQT4,3985
38
+ helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=S38Y_MjLRE86LS9RfB4qHmNy5x5n2KyYa4DtA63lees,4402
39
39
  helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=aMe-y4iiyEnM7_bqIoddeZBsVLoDxMmjKY2eZKB6Y2Q,2156
40
40
  helm/benchmark/adaptation/adapters/test_adapter.py,sha256=0-JrYnogZu4kENQG1eQMXHWnuSurCLRbkLpDuSnfRqs,745
41
- helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=TM6WJpWShsu6KuDzlofYHd9DNPj86Hjudubp_sqrhFI,12734
42
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=f_bggObKUxiV5XyYHHNXsM42HzM0CDzvR4uiIoXTE5o,7997
43
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=v6LLmVTopXNfzo9Qzq16EmmPPivFGGs9LuaPDJAX4vY,9506
41
+ helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=NyhVTvLznCVMB-DJeX2DRjWx91XmW3FBcrkm0RN-fJU,12766
42
+ helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=BCEhKRVEDKPHsLKhpnIv0krV37a8Eu78r8EtJxH_MXA,7980
43
+ helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=BoozcN0zPWwk6HKEPN0b61ieqwk5y8bwKvr9m8DR_2k,11874
44
44
  helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=o7CGClyVWYOuJ4G56-whq5fTvCr7QIn51Mo6DTdvwg0,1881
46
46
  helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=bvY8xT2ak_3WG4m2Z5bCM6FLImPIWG1qAn9H2ZNwNv0,6359
@@ -52,15 +52,22 @@ helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
52
52
  helm/benchmark/annotation/air_bench_annotator.py,sha256=9W3zLO2f4OzxGdavkDI2dDUStxpExa7sgrI-ATGG7NY,3048
53
53
  helm/benchmark/annotation/annotator.py,sha256=2UIXY71S5dRaZBLb1v4lcv8-O6pyJ9zTeSJl78AEWGI,1538
54
54
  helm/benchmark/annotation/annotator_factory.py,sha256=3Soh0V3lbsIR_HGHLg-XTc3eKVRj7SL9lLT_AoqUVTs,2997
55
- helm/benchmark/annotation/live_qa_annotator.py,sha256=IlUV4K-ddbL1XsvIgBAfsLH0_bdKx8kyDev1G3Kwyek,4364
56
- helm/benchmark/annotation/medication_qa_annotator.py,sha256=7LRmx2a1JODP5puAM0IH0HFTextfeLOzK7ef4sw9XIU,4129
55
+ helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=4hob15m2k9e2A97E0aG9FstCbJ_oMM7-9y-nh2EaYqc,2395
56
+ helm/benchmark/annotation/call_center_annotator.py,sha256=pTEjwfA4tgZhroFbamoQ8IO_D1O9r6k5GIlD50JEg5c,11601
57
+ helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
58
+ helm/benchmark/annotation/harm_bench_annotator.py,sha256=zhkWnV3qZgY-nvHgQRHGrrCMC7605JwFHesY7UC3ZnQ,2293
59
+ helm/benchmark/annotation/live_qa_annotator.py,sha256=9d2YKBlK4m0Bu5eWtc-CcwECCurU5yFGpQFIPIBC138,3548
60
+ helm/benchmark/annotation/medication_qa_annotator.py,sha256=5ayy-ZBEOjKBFxJRgSXgUxhNJ71sL7EtbCD69p5K8Xg,3297
61
+ helm/benchmark/annotation/model_as_judge.py,sha256=f3iQaBBwr-OYPLVkDp8Boutme_k83ZlLnfprHfv1alw,3689
62
+ helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=if4S8MaENr1HZ42ZsOjDPXZ-kJ0p4l4B2j9m994RuxQ,2140
57
63
  helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
58
64
  helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
59
- helm/benchmark/annotation/image2structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py,sha256=eJFm3iyBe_eEN5Yt0G2IpeA1xdKxRmyR4krsNd6eXoE,3524
61
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py,sha256=yRifoqhGq_mQkkRcgKCFpGrZaI9gochOXYiCU8oY1KE,2477
62
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py,sha256=we6K1BynV907ZMnGI2zb_tru1uw2iGEI06Wtbnus23w,4010
63
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py,sha256=rvzdQCaVFM6ovF28TSUnNmB47f2hidlaZm6vO4DJpso,6404
65
+ helm/benchmark/annotation/xstest_annotator.py,sha256=arL5DyA_nYkiSCAtl6G7MliZz5ZYRsyc7xQJNu0RBcA,3604
66
+ helm/benchmark/annotation/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
+ helm/benchmark/annotation/image2struct/image_compiler_annotator.py,sha256=eJFm3iyBe_eEN5Yt0G2IpeA1xdKxRmyR4krsNd6eXoE,3524
68
+ helm/benchmark/annotation/image2struct/latex_compiler_annotator.py,sha256=drbxogMMGwGxgVFbhT7hxPGDh7uyhptlmEmeP1Gq2xM,2471
69
+ helm/benchmark/annotation/image2struct/lilypond_compiler_annotator.py,sha256=odIGciLX2oVq_O8_H15lWUZoSfVvY-jRb0ILjs7GCIg,4061
70
+ helm/benchmark/annotation/image2struct/webpage_compiler_annotator.py,sha256=w6RKv7Fz__j_abKXnsTn98kHPv9tWKipdLW3NVT55m8,6389
64
71
  helm/benchmark/augmentations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
72
  helm/benchmark/augmentations/cleva_perturbation.py,sha256=arUkY_luc274YEMZocOos9rpAZVbEFZphbMlobAxTy0,29208
66
73
  helm/benchmark/augmentations/contraction_expansion_perturbation.py,sha256=yni1UR2fviN0Wig8MpOp0zzLn4H-gYocTjKTpxBwywg,4850
@@ -83,17 +90,16 @@ helm/benchmark/augmentations/synonym_perturbation.py,sha256=komOV5M342_8unopnwN6
83
90
  helm/benchmark/augmentations/test_perturbation.py,sha256=4EooKVcyub70I81trzpNx3Ij-m1vpFa5cFIo6O52icE,13185
84
91
  helm/benchmark/augmentations/translate_perturbation.py,sha256=vMXCYXGVSo8E78IAzH9HI4p2pvyLzcvO77BnvR2QB0k,1097
85
92
  helm/benchmark/augmentations/typos_perturbation.py,sha256=_F9zwvrLie8hX7mzUtQmYq6oq6yqaFiKGsvc9LAuBr4,2798
86
- helm/benchmark/data_overlap/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- helm/benchmark/data_overlap/data_overlap_spec.py,sha256=aj_l1l0qxUbUMrSWr70-Sb1j_JN-7WYop5BXPG_xj44,1998
88
- helm/benchmark/data_overlap/export_scenario_text.py,sha256=OiFsU_qME2_I87yDSNPfuAVI5Y9MbPEcEliYK6TaAEE,4527
89
- helm/benchmark/data_overlap/light_scenario.py,sha256=UFUr8plZD32e2TJTjFZLxTwD-ZRb9eYnHk2U3ZD8P40,1074
90
93
  helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8wtXvkVAx0iI2zwCxqHvk3XKTx31qHPalsI,4203
91
94
  helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
92
95
  helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
93
96
  helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
97
  helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
98
+ helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
95
99
  helm/benchmark/metrics/basic_metrics.py,sha256=7hk5PZL7d09uG1y7wHBhY_ox8hlXw-n7Yt_FDv_AIKw,20375
96
100
  helm/benchmark/metrics/bbq_metrics.py,sha256=Dqccr7GdfKNs1S_1QSB75d8AY7moovEPAqvacGfrCAE,6157
101
+ helm/benchmark/metrics/bhasa_metrics.py,sha256=Nw5fdZrYedYUEVJXFFnGSdOBxJ4-99GELd699TBmcSg,6958
102
+ helm/benchmark/metrics/bhasa_metrics_specs.py,sha256=fwXd1fRoeizd4kVQfLZ9ny-PzHTe1ieFKsGesiPDef0,440
97
103
  helm/benchmark/metrics/bias_metrics.py,sha256=GQ4CwOk1Sa9g-LcJCxcoQLD1vWY2Hvujck9l-9qsmf4,11418
98
104
  helm/benchmark/metrics/bias_word_lists.py,sha256=mx5JjW3mHffXIqo4GcQN-zENUEttBqQnEjPTz3J3J_4,13909
99
105
  helm/benchmark/metrics/classification_metrics.py,sha256=uB23jRFzkmtJgs1sTO5pPjdV_mOg35gWubjGS8pynLM,5654
@@ -101,7 +107,7 @@ helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIef
101
107
  helm/benchmark/metrics/cleva_harms_metrics.py,sha256=c_x9MYg8WjM1yym1S374GKxH_lwP6wZOiXrknf0mJis,11077
102
108
  helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
103
109
  helm/benchmark/metrics/code_metrics.py,sha256=e0aqLcxBAdCc0qAqebzK40Ilv2Py6xZbosud5v169x8,5121
104
- helm/benchmark/metrics/code_metrics_helper.py,sha256=h_y3BsYCbeh8cDe2LDndA8K6nkelV0J76qxlq2cBmsc,22334
110
+ helm/benchmark/metrics/code_metrics_helper.py,sha256=UNai154RuhYRZM_YK-rveLct4Ui5iEBNPYmYdKq34Xs,22712
105
111
  helm/benchmark/metrics/common_metric_specs.py,sha256=k_IW0A6BevAskS0_C6ZaP9XvIfrdLI974_NhC89rMoo,5846
106
112
  helm/benchmark/metrics/copyright_metrics.py,sha256=X9j3YsfzWEoGpgPpIvCzm18-JggLAW5QFooifE1KqaM,7729
107
113
  helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=TcyklpfcTMXrpJeaHQfxS9QQxe-gwmT-HD0g_DmIFLQ,3253
@@ -130,6 +136,7 @@ helm/benchmark/metrics/prometheus_vision_critique_metrics.py,sha256=pexBbEFF3-bz
130
136
  helm/benchmark/metrics/ranking_metrics.py,sha256=5hDRapsxx_cmo-ag_80kOQnrgZn3lfVsLZVtWxuxH-s,17391
131
137
  helm/benchmark/metrics/reference_metric.py,sha256=RlIM_PFTEkBo0_EEMq8d4_BSagNSBR_XyovMtjDeqqU,6026
132
138
  helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
139
+ helm/benchmark/metrics/safety_metrics.py,sha256=oARko_EwVnykBKYxi-w3ytKme4qcb1waz_0N2GKbSlg,3348
133
140
  helm/benchmark/metrics/statistic.py,sha256=FuxNxMtAfiCkOxBS9KHlhEyxe61e0YXt2emvsufgPZQ,3424
134
141
  helm/benchmark/metrics/summarization_critique_metrics.py,sha256=Lf7PDuce62HDzyofsyxaOvH0QvzcaS-vJvDWtIs8xKk,4694
135
142
  helm/benchmark/metrics/summarization_metrics.py,sha256=laLMGRDy1wjcFvgSWXvzOZwBXshkmPr0S2Ofu79Z01Q,16461
@@ -142,7 +149,7 @@ helm/benchmark/metrics/test_numeracy_metrics.py,sha256=ls1ZIHDePKpHMoqAbf4HmJ1SI
142
149
  helm/benchmark/metrics/test_statistic.py,sha256=AejuYLSeUwEOqpEMRKZFjnxu4HKUraeExU8TPmZEqW4,1229
143
150
  helm/benchmark/metrics/toxicity_metrics.py,sha256=6MCpHuCXbXZqWwvO57ifKYHnHWBzszN9cZjwgPQQF2Y,4027
144
151
  helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
145
- helm/benchmark/metrics/unitxt_metrics.py,sha256=2F9T4iQV0_BbDMCWrZrd9sc30XHYv8MR4xSBd_dD3eI,4053
152
+ helm/benchmark/metrics/unitxt_metrics.py,sha256=YXuq2wWwP8ccfd5CG0ZDyDd_PqBHguSqb57H_m9b55g,4749
146
153
  helm/benchmark/metrics/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
147
154
  helm/benchmark/metrics/image_generation/aesthetics_metrics.py,sha256=AXQjWBd9zBZOoCF8vQV9FjUy33teC0IF7pdbq-XiHjM,2101
148
155
  helm/benchmark/metrics/image_generation/aesthetics_scorer.py,sha256=ISdThDKMrx-SHQe69dCcr8qUrMCa_GsxX3BeZnd0WPA,2538
@@ -178,7 +185,7 @@ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBS
178
185
  helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=__f7NVsVQatDFn_2Bfx7ObiQ68kAMvyyClApaTxqx80,649
179
186
  helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
180
187
  helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
- helm/benchmark/metrics/summac/model_summac.py,sha256=zEuTI75eBBZPzJp0j2kFd2HejouhugC83nSWoVpghBQ,17412
188
+ helm/benchmark/metrics/summac/model_summac.py,sha256=PJ2lPa-JQPnM86N0T2rPcAviTNHmSV721PTnbL1eGnk,17460
182
189
  helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
183
190
  helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
191
  helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=WeNP4yiM4TVrD9Kid-uVRmWIVDqETnBsMycZmIBiTZ0,665
@@ -187,48 +194,55 @@ helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py,sha256=5igmDhWu7H8-
187
194
  helm/benchmark/metrics/tokens/free_token_cost_estimator.py,sha256=G_6UK6Js_NZ_eqY0ZQnrC9QJVMERGhV1f6v7xq2lM-Y,461
188
195
  helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py,sha256=9zjtuxMbvfPBYuxOYMFEmNP8ZKFDVywrZ08n6nrjbA4,1520
189
196
  helm/benchmark/metrics/tokens/openai_token_cost_estimator.py,sha256=7jgjcgmbcVfLA_nTOrWXKIF8TEXng_KnE6cSgsSXWmE,1398
190
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=l9UQZ0aAIhCYuFbIjU3j3A3XXoMvwUvz1kvRtlDbtOo,1079
191
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=_wJ3E3LbJB9XPLixTH82BYQbp32o3oij6Sz3lsZL30E,2648
197
+ helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=eVnCYhRq2LT7F4BXsiIDb1bkmhvoHLgDAdMR73Xz5p8,1071
198
+ helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=h5ggZCGpgCQUjfqS0JS4Bxmx7NBaT4w43pXAgbCEnw4,2628
192
199
  helm/benchmark/metrics/tokens/token_cost_estimator.py,sha256=fTGUfhHV6yMwpTkCEMTGMxKO8jskqJz4sAtwXT6M_C8,425
193
200
  helm/benchmark/metrics/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
194
201
  helm/benchmark/metrics/vision_language/emd_utils.py,sha256=KdZdcqu3eo016FdAjAm_83v92-wWuR90EPsTogfTcok,15196
195
- helm/benchmark/metrics/vision_language/image_metrics.py,sha256=HyXeZiDszSV1Q99ScqeS_xYvyrp1dlWBYahfxt42N3E,23554
196
- helm/benchmark/metrics/vision_language/image_utils.py,sha256=XeYF3E6MnYyPJ5hYp4TtiTP27-y4S8LTBH5bZVcvJFg,3758
202
+ helm/benchmark/metrics/vision_language/image_metrics.py,sha256=y3md3sCuAa63wRpcwIYo464cE4mq14YWqAwUfVFDGhA,23835
203
+ helm/benchmark/metrics/vision_language/image_utils.py,sha256=4E0NYh09O6-5sGhAPo6KZqYaZfBpCtuYbD3vLt-wQzk,3755
197
204
  helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
198
205
  helm/benchmark/presentation/contamination.py,sha256=PiIdcaD3-xfExjOmyL5q4Ao2ASa-OlScJAB9u1Zxe7o,2811
199
- helm/benchmark/presentation/create_plots.py,sha256=2-ZOuEdRwqqF1biRmzWggMZjmODoxOQOBoz9GT7tVww,28737
206
+ helm/benchmark/presentation/create_plots.py,sha256=T6ewj8rXZfRlqg01bgbhdU1rsABK4xyrLpruhRG-7Fc,28691
200
207
  helm/benchmark/presentation/run_display.py,sha256=tC1DciLvDTQJog4BDo8StWDdX7DbBkhrG2sX_SwXSPQ,11838
201
208
  helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
202
- helm/benchmark/presentation/schema.py,sha256=fPw-794HbacZR5z1SmYGUqYgqXbZ8-BrcexWV4h6vgc,10809
203
- helm/benchmark/presentation/summarize.py,sha256=2fJ9BYOJRxe9eBylLUK3qcZZwAwRtJF_C8plEQlAPEU,67266
209
+ helm/benchmark/presentation/schema.py,sha256=cblGmgkhuqQRWPh-IT75u3Il_-SVXipeq-mh64lvgWY,10947
210
+ helm/benchmark/presentation/summarize.py,sha256=iweYi83j_nogmMyCibjJtKwpbY4HzMfoIuLSeqyanHw,59084
204
211
  helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
205
212
  helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
206
213
  helm/benchmark/presentation/test_create_plots.py,sha256=5PPPegMTdBZurxyyUxI4rN13AVsjV3eQrwFqlobJ8UA,1286
207
- helm/benchmark/presentation/test_run_entry.py,sha256=OM-027j2A0Lx-ai2zBprOxSqzZhS_dh0OKw3ThocZW0,751
214
+ helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
208
215
  helm/benchmark/presentation/test_schema.py,sha256=6mq6CeAOLW2Kxi1lX_ZW8QCVqVR73XImR8ylcRGFkBE,378
209
- helm/benchmark/presentation/test_summarize.py,sha256=UfSp33Q9xvuGnPYfFmLJdH5y7KWp9qbZprRMyx8LGP0,1618
216
+ helm/benchmark/presentation/test_summarize.py,sha256=GzZNwBDybpstzl6wT0Rgqn75N9iCNrUIzrdjOfUolu0,6317
210
217
  helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
211
218
  helm/benchmark/run_specs/air_bench_run_specs.py,sha256=VdXis1HN8_KLrMHDCVi0J7WdqjRjAGbZMhrsnpzC-Kg,1604
219
+ helm/benchmark/run_specs/bhasa_run_specs.py,sha256=GEIC1Ye4zn17hPWet3QFQr1rvwmX6aEVg5fNuQ7Jwes,23815
220
+ helm/benchmark/run_specs/call_center_run_specs.py,sha256=GX5P2tTj4YS037EEZ8so_mX9LlPWyfJ-pF8ICoErpio,5324
212
221
  helm/benchmark/run_specs/classic_run_specs.py,sha256=Cn0z-6QY-ehbLaHJMvCwjw11DFBQgUyqVCaXwTVFyJ8,58331
213
222
  helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
214
- helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=fDyIxmOdgLLWVtwBfxcnd3nFnBZNFpJHbcM4Kyq5gZA,14315
215
- helm/benchmark/run_specs/experimental_run_specs.py,sha256=7aF-Ox8iBC2obfJkyKwobJaCjk1SqxtSDuRv_RxA3Eo,1310
216
- helm/benchmark/run_specs/finance_run_specs.py,sha256=7DCmeBQpETQjK0fvUKS1nDIbM_wxTXb2GhXcjzIDyIE,1181
223
+ helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
224
+ helm/benchmark/run_specs/experimental_run_specs.py,sha256=wduA6K3mpIRHmr8g3h0c5k7rUsKiPFOqJktdbbGxtoE,2950
225
+ helm/benchmark/run_specs/finance_run_specs.py,sha256=5mwb7GbAcSLVZiumqCiAr9dr8qBYApkEt5Oben5CFXs,4371
217
226
  helm/benchmark/run_specs/heim_run_specs.py,sha256=Pt1eVbzvwZ5EXq8WB2b3XYw62SWYN_i1P_H3oE4i8KY,22096
218
227
  helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
219
228
  helm/benchmark/run_specs/lite_run_specs.py,sha256=ViCPJ86Aah8301GTEk6z4_MtP0g8iik33t4GudobhWQ,11113
229
+ helm/benchmark/run_specs/safety_run_specs.py,sha256=ZTvLbRBxHWMIKPapugNfXPStJRBHfiaiXUHgpWMBONY,5469
220
230
  helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
221
231
  helm/benchmark/run_specs/unitxt_run_specs.py,sha256=ejp_knrcIjf0J4WiKj9LTgDTcUr29-XFZYHYz0w_dkM,1518
222
- helm/benchmark/run_specs/vlm_run_specs.py,sha256=uwnk9DHZKQj8nnC14kGiSN8xKiZfpigoz5S86TiHc4k,31118
232
+ helm/benchmark/run_specs/vlm_run_specs.py,sha256=wHq-FCP2dgbWtBHoe0NUSgiJfifMtZAvSiJIdn114zk,34249
223
233
  helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
224
234
  helm/benchmark/scenarios/air_bench_scenario.py,sha256=WUZvsUTqlsjNzQsd2baZZIgO30B4Zf3g0QjsyEaGmLc,1772
225
235
  helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=Wyt7J5BAvAqC5JTqCW4fh7ex9-itX11P_9rLTocqvtk,4973
236
+ helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=Ic0ak_5vGHeNT5PFgOptl-Ns8nQuM5nKpiQlhB1H3X0,3158
226
237
  helm/benchmark/scenarios/babi_qa_scenario.py,sha256=S1tPQY2x1I3hQL1JQ6wvUwvKyiSe7SqpRSW6N3_T0mo,5043
238
+ helm/benchmark/scenarios/banking77_scenario.py,sha256=pVA2LXB9uJ12GnjiEvjhRV-P8YNEjpFhyZr-J8MV2SA,1747
227
239
  helm/benchmark/scenarios/bbq_scenario.py,sha256=lT1XKSM-PXYtENI-ryScC4yb1TtII7YoH8kt_S1dZQo,9579
240
+ helm/benchmark/scenarios/bhasa_scenario.py,sha256=f8Z_xEbg9CoVyMJE4tTs7WU6B-QeIxYUI4g2IJWdj8k,78011
228
241
  helm/benchmark/scenarios/big_bench_scenario.py,sha256=bSk8Ia4u_6OqMjiyadpYQAWN-8GFWqvd3Ft3JiVGpi8,8081
229
242
  helm/benchmark/scenarios/blimp_scenario.py,sha256=o1MDcHT14KFDET4K9otx8pDiIgXrhsD19pvO0mR2ADU,6260
230
243
  helm/benchmark/scenarios/bold_scenario.py,sha256=NEfECMVzlVP_yo6sOuIzj6vZ5jd72_nvtEQ1lWrq85Q,4106
231
244
  helm/benchmark/scenarios/boolq_scenario.py,sha256=rvSp5SwXMCVzBo5BFxfhj1Xv06_ksqKrtTQR7nPiS-o,8013
245
+ helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
232
246
  helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
233
247
  helm/benchmark/scenarios/civil_comments_scenario.py,sha256=VO5G-cQ9qctmBN0O76uSewnO_mFslMo5mbR2ZTrjuds,4851
234
248
  helm/benchmark/scenarios/cleva_scenario.py,sha256=xhwZ616iz0CN3fYIfrXHcV1XlcRQjyPSzML8fq8D3l4,57939
@@ -245,7 +259,7 @@ helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=rAOZnFSxO3ENO
245
259
  helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=qhzqW614WnsiyN7TiHUdZY_NpEdW_iMO0AMrLK8DmK0,14116
246
260
  helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=RSigvRdqjeFTwFfXNmslz8zyAGSmLf6UtBDA4NrQBCo,8304
247
261
  helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=goGmHtN7MYnAQIXhffZZhuuuMWN0gHNOXyI9_injiZM,20119
248
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=mbUABlGhpDur6x7z_q5iDqJRMBZ2d4ZI3KdVWNnJagM,2859
262
+ helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=Qkwhg1s5f2_5rnCoX4BxjQGKKGVRp2StIwONvBjJVqo,2909
249
263
  helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=AI8HX16_Lw9MKqrck62q8IFLUU-P5hxaOEHcmTS4rdA,2928
250
264
  helm/benchmark/scenarios/dialogue_scenarios.py,sha256=-I7FY6q1b11zpFd1_oAgar5qlfaFcXsNCKGVln9etPI,5629
251
265
  helm/benchmark/scenarios/disinformation_scenario.py,sha256=kQi0MVVoSDhx2vOTnUaCIttPXMf8zz7Eld2FD_77tnA,8504
@@ -253,10 +267,13 @@ helm/benchmark/scenarios/dyck_language_scenario.py,sha256=vMxND9wPJenrGlCLhSw5Ux
253
267
  helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4cv7u2lmUFcigkAX_eMwIn49Pa3p-aHClkT-r-0roLU,6616
254
268
  helm/benchmark/scenarios/entity_matching_scenario.py,sha256=YjBX61TlL3CDQ3X6D-JyR-qlOYGLdoRXJxl9AEeqxYs,7022
255
269
  helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
256
- helm/benchmark/scenarios/fin_qa_scenario.py,sha256=pXUeJ34KiRSlEjYERgXqVSbr7zxvdXnOuMSpXvnUw5I,5782
270
+ helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
271
+ helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
272
+ helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
257
273
  helm/benchmark/scenarios/grammar.py,sha256=Pb9vEP_0Ki87UdQCj1ym7QWJ24M4DRP6TXB5d3GnhLs,5597
258
274
  helm/benchmark/scenarios/grammar_scenario.py,sha256=bl-Cm9caDs077zSu38mzaS9maZ2gM-QazgjOEMFvxYg,1454
259
275
  helm/benchmark/scenarios/gsm_scenario.py,sha256=9fV2SEw3ocKNAD-TrDZZTpq4l7mbttQQWbO0YNz4e6k,2613
276
+ helm/benchmark/scenarios/harm_bench_scenario.py,sha256=wzzia3HlfwALgRLFLABv3blxBh1ras-YtHk4iQ_EX30,2454
260
277
  helm/benchmark/scenarios/ice_scenario.py,sha256=vvk11cFPGUhg_CcGh3wEfVsGzrvMFgkByN-xcF-OOjI,16473
261
278
  helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
262
279
  helm/benchmark/scenarios/imdb_scenario.py,sha256=X1k76AweFECCpYCXy8HuvjRbXbfmDfwK3SES_t_wkUs,6174
@@ -288,21 +305,30 @@ helm/benchmark/scenarios/open_assistant_scenario.py,sha256=PH8F8zqYXXakr1xttBtFm
288
305
  helm/benchmark/scenarios/opinions_qa_scenario.py,sha256=s0dGhsgcgud_bSqXw6p-w-nC_cme1Vjt9T9AwU4-K84,7371
289
306
  helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=zVL1gb3eVz-LbK2hfdnRR9ItaMSPlAGJorByWLt-4wk,7506
290
307
  helm/benchmark/scenarios/quac_scenario.py,sha256=SRAhMp6TAsmTRq6VRONLl3SEayFIe23He_mBhzkZ7qM,6628
291
- helm/benchmark/scenarios/raft_scenario.py,sha256=_5QhHS3opxxML7Rek6F-q5NVOf0M2UgbC6OTnQZ4C1U,4452
308
+ helm/benchmark/scenarios/raft_scenario.py,sha256=RKRUJQIVkz7reugeRK7hXD47Z2H52Qm6w0BLJnLbC_s,4459
292
309
  helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=GkgJo_13MWQQQTZbhlknvTR6ZrYr7NEn1WdMZrPs4y4,2400
293
- helm/benchmark/scenarios/scenario.py,sha256=lkJgqDAbnFQoJgAyAllqvlyIIH1nSQ3dnoVYh68tQrU,8232
310
+ helm/benchmark/scenarios/scenario.py,sha256=1HC8EjiZ-5k5AJhxtwRreLe3hBbTyZJWrs-Aa3Uq43Q,8229
294
311
  helm/benchmark/scenarios/self_instruct_scenario.py,sha256=jZ2MksT4N_4g_sp5egw7ycrsM-Ya786_RFmiYYdMvG8,2285
312
+ helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=grYOqccYBtB4m-_UUV20EOXsY6tkukwC6kwPOBAmdnY,1223
295
313
  helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
296
314
  helm/benchmark/scenarios/summarization_scenario.py,sha256=MlNMgsY369DC04nhMUdG2o9Ydi6yze1fGOjC0bK-UwQ,6847
297
315
  helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=pzifpsJJbucmTjujNqQnwQa4Y7wpQjkS6QjNXOrgTAQ,3096
298
316
  helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=1b3e3WpFMNBV3li17-0Ug6QCSKO4qRFaWDF23bYNsvQ,16326
299
317
  helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=k8IGK6VABOr6wuha4HynP47peoAkmIViAVhScOtCANo,8345
300
318
  helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
319
+ helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
320
+ helm/benchmark/scenarios/test_ewok_scenario.py,sha256=9piplj3i53_-xNSMkIN47JYEU3JB65WgEPT7qdyK4Ng,953
321
+ helm/benchmark/scenarios/test_financebench_scenario.py,sha256=EFZLJXXBoyjlTiMQFaQ6MiYkve1lfQDjQWjn4BjqgAQ,1184
301
322
  helm/benchmark/scenarios/test_grammar.py,sha256=sPlA36sHpThbXgnGlXyOuqHfDPe2epIafmzIeL0nkoU,1364
302
- helm/benchmark/scenarios/test_math_scenario.py,sha256=s3-CllgCB8DL9-L4DmJ6Zcf9xi803nWYN84KlhN7PhM,1016
323
+ helm/benchmark/scenarios/test_gsm_scenario.py,sha256=I-Sl8Sg8kmFd7u0zZbwbNmeFV1mQLuOHoQ1cQDDwovs,1123
324
+ helm/benchmark/scenarios/test_legalbench_scenario.py,sha256=FqbgwBAhHWyTIUYSzI5FOnTDx0A3u1o2ANKa_6bfA4g,1212
325
+ helm/benchmark/scenarios/test_math_scenario.py,sha256=ieI8-c6yx-3U3iaEz2yiCGSwnQTBJE_06-dMKX7a8Vk,723
326
+ helm/benchmark/scenarios/test_med_qa_scenario.py,sha256=Ekp6r5eYPkCxV3FCzVvLemKxlhENhelqdO0Mdhg5yFo,1515
327
+ helm/benchmark/scenarios/test_mmlu_scenario.py,sha256=mxEsTydKUOt8OD1Ei82nPgUFV1Tlvu5Z6drEMToEURM,1593
328
+ helm/benchmark/scenarios/test_narrativeqa_scenario.py,sha256=Rac_OrUpd2ruT95YvSrmoVz2Jpycgq3Roiyogm_0aAc,6420
303
329
  helm/benchmark/scenarios/test_scenario.py,sha256=HexTZBKphMDJbhIYj-HRCDwltPTDqHFHdT7FjPmu8Xs,2070
304
330
  helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsHzMZSzOe8j0FrUQmMyZM4,1736
305
- helm/benchmark/scenarios/thai_exam_scenario.py,sha256=FinZuwEz5dDcNBxG4OseVOnBWlgg9lT1LzMdIWSjG94,5838
331
+ helm/benchmark/scenarios/thai_exam_scenario.py,sha256=5Q-KL6fVrk2FKApVyY2ulreFduwBaUG0iJOsJ8M6El8,6008
306
332
  helm/benchmark/scenarios/the_pile_scenario.py,sha256=RqU8yXQJ4FkmEc6rO9J3QMXenyUZrsEZlLAQUx4-Wnk,4995
307
333
  helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=iqL-tuqUQZjF9-DKAnI6wV-oLGC_I3aFuofdtJYHt8E,6035
308
334
  helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=CRlPxVfkg3HPZV-lUMyCUSFOiAqg5IIPt-dq3qR9LU4,2096
@@ -312,6 +338,7 @@ helm/benchmark/scenarios/vicuna_scenario.py,sha256=zLwLuEr6n9VQjVxQwgFIM-os23kJe
312
338
  helm/benchmark/scenarios/wikifact_scenario.py,sha256=cOVKgDisBdjPcmVMCLhTekdgX3hpDJUT-aKbvRSaMoM,5791
313
339
  helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=PAPkmZdC4aIBQ1k29dDvTFBEFaPV1ZR1Ifif4FHoZqs,3087
314
340
  helm/benchmark/scenarios/wmt_14_scenario.py,sha256=NArkTZntYdYlegHo_-fkzeyCUOjosOONQKlquPbZRxY,4498
341
+ helm/benchmark/scenarios/xstest_scenario.py,sha256=wpagohfuFE1juuXjq0dleSIHr5Uk6hnClIv-wABbzEI,1285
315
342
  helm/benchmark/scenarios/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
316
343
  helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py,sha256=c8zcoGCOFqBGE4TAEx1uLsUmGXw_jIS8alI99ubGeDA,5477
317
344
  helm/benchmark/scenarios/image_generation/cub200_scenario.py,sha256=7p3G4mJRc8QHR4Mw2GLsfAFuJcEe6OeZbezVhbyc55E,4103
@@ -332,99 +359,85 @@ helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,s
332
359
  helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
333
360
  helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
334
361
  helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
335
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=zXR0LmXsD2tv_ovJsbY_HP53kdiFOvty7Y_Ai3ZCrT4,3037
336
- helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=jwGEouY30Yy5U9lRUbv0XAO98gUJ669g0dhdDCGQ-8w,4097
337
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=82qplX4gJ4GsSVhBjwrsVU46TAHh-jym3F_M5A-odRE,4608
338
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=3pBAQgOsnSyMCzt60s1m8Kf_fEJ4C7XgCDbtXatTlX0,2599
339
- helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=sBQfqAxmP-Z0ifCgwTbP11aPsKA4vogcWBqSDiKlbE4,3512
340
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=7KjsXiAaiVHDRSyW08jZXNUTWogP3Sr2Og5ViT6Xz8I,3832
362
+ helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
363
+ helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
364
+ helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=lfRHjhhXCo0YeDQe4_gfSHCzVKtqQVZ6DALLABcCmtI,4637
365
+ helm/benchmark/scenarios/vision_language/exams_v_scenario.py,sha256=pLD--gtL5q7jLSWQ8iwAdsiOrTJ_rBsLbwWMWKRhPbs,3853
366
+ helm/benchmark/scenarios/vision_language/fair_face_scenario.py,sha256=V6_1Kl2nWDRyHvwnKcSxkP0DChzKDBW0i_-t9oAxps0,4721
367
+ helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=CDutFh1PHLyeMdJ9HojzYKE1zJidL9ktcsfn9uHNLZY,2612
368
+ helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=k4E6JAN8a_KT1jjV2Ch3K5YhWKJ0f-9iCXLO-_2Xl8M,3535
369
+ helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=qiLLdiSzhnSyjmqCAvMxjhcZ7yBiX37L1cdsZvHL4ds,3845
341
370
  helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py,sha256=7GK_jAOfCgRIGiN_GInDePwuT2wZqmWHp1rqdx18xQg,4994
342
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=kzZHeyWQHUphUfAixkms2t3-KKfHRjwIKi6qm-lMyXA,4728
343
- helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=Yw4zxeYgUw8HKRR5ob9QEIT0bSPrdRUULMKCo_xzFpw,4337
344
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=ly77pj_TorwM1kN2sW7Y2AIGHOBlDkdzV0STvZTBOtc,4332
345
- helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=zxtdub2akvxPYEG12pkW2c57TIFqN38C7ucAXAHAdx0,5455
346
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=XQv7uv2m6EdbI7h0-9eDag4_bL7qE_78PuHB7c4SsHA,7654
371
+ helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=HnzA0L1Mm9rw9uyK-hnCGrxo33z_U_86TLnlELjDV6E,4738
372
+ helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=7ZHpRD7TdQQ-Mp5XQV5yyiLUE0k1KpgbLSYKLBJMxs0,4343
373
+ helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=cM7eTE4bpcIzLyEDye86Ud3rD4Id-0ju73EXjg0DYoI,4340
374
+ helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=7Aa3y0TWGZH3QrPDiqIMkj83LU2Klrzgcb46jv5uytY,5498
375
+ helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=deDMdg2-ORZPV623ngncDPlRn6z6cq_QbQtMu-z0Ydo,7665
347
376
  helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py,sha256=HUO09uM2rBXOfCsxzwovmwtihq53xjuzDOtQO_S3J4I,4161
348
377
  helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py,sha256=c7YfclYMDtygsLnEfA8oP6Vl7evdrqqTZazmuD9Oy-8,5353
349
378
  helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py,sha256=HuizbYsN5Nlihfzu4bfGuC8KSBbeIc6TVknMS4kpVJY,7149
350
379
  helm/benchmark/scenarios/vision_language/originality_scenario.py,sha256=1inr-klQEz08CM2GWqbYdy-AuXQmMhOAywAlA0lJHik,1029
351
- helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=wVcTNUql4TBClgm7oyLq5cmybsnlurc0MblqRRxXRyc,9929
352
- helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=uFkzMMsjhmuSYo3v_QdfJFX6RFse83JjzMfMa3ynvV4,3975
353
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=5MwGb9BOyB2Xy70BGYZcjencf0ZskxBuzcPa7ABRuww,5106
354
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=bH5FfAgwyzpVMPOJKNCmOgpX-lvJF-B42uVi4m1mY-I,4231
355
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=2foCM7ik9RvYahauKIoNAxkGiluOYuT0w0r7FZi-MQo,3621
356
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=hJ3sOSpPnOCwLtpVnfasI_X89oofI-2PBRjMnx8eiVA,4139
357
- helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=2hY-qngKC69ZL9SHNei3IK3C2PvJDWvwLFVQ8yNSOVs,5196
358
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
359
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py,sha256=ImhfiC_y_hihAGvlj9zRsaoW614QFCBopBD2KxnbSs0,1805
360
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
361
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py,sha256=RSLYpw3BsIIxkhS-6RfVM_UhjmwJDMoA3JQl3FBjv7I,1147
362
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py,sha256=_pgW_aNaM3E7MTl_tNExupvENdtAH3DvZuSwZIiopCg,837
363
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py,sha256=ovg8-FfJ8_I1xbajFGSLvERZIA1fQjaUn0zd04ZbI84,15316
364
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py,sha256=dOt-gif-4Z0JekI2KAel4KS1zyvzqyqoFLP3xoe5DKY,9710
365
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
366
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py,sha256=i-i0mlG5oRRDNYNqP7o7Ul56iL02p_anJoThXaSvFiM,2826
367
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
368
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py,sha256=JpTiCSNcqX4wRpH6Cl07jM5wMkpZ5OeR_hjfK_V-Qok,943
369
- helm/benchmark/static/benchmarking.css,sha256=7PsUCff8YcoVxyWuALItfB4TZL55T7MbVQpoc9nZznc,2104
370
- helm/benchmark/static/benchmarking.js,sha256=lqEmoAikBwycVBf1h-et3ZmHKW_DcwxzlwmDez2A1EU,54531
371
- helm/benchmark/static/config.js,sha256=kIfkgr6gaMdFOAdqB35EvuBohq0DWYSQZbe_pTK09VM,103
380
+ helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=D3nNu3uU87eMDiMZZafuRTntXjwbqPaSDygUgQm45F8,9943
381
+ helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=gWrBG5U8uoU92JPGNm5kuzo1GekoJo1rKQaNhv6MYGA,3996
382
+ helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py,sha256=OJtiGhSN_KYgEz0VGXjCjQik_Xihtgiali70Z00XOzk,2083
383
+ helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=YNwuIMJBo7wwftx-T5tCYmGo2oy_794fZ330lkDyqb0,5171
384
+ helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=DxGZ7EL22SzxpAkuiA5twuGVTm96wG_RBg3dU3Vh_c4,4241
385
+ helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=wRa_OuOdyf-qcy9hml-Kj6YtVP5MDzeTbGcqva6LqdA,3707
386
+ helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=zCnkiSya-PHc3ywAhmw03bFdsvLCxAUwGfE6OviEXDQ,4153
387
+ helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=cC8_Vyqw2f4K4hJY-eo9ptj6ANfWgiFAK7b6OOTIPLI,5239
388
+ helm/benchmark/scenarios/vision_language/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
389
+ helm/benchmark/scenarios/vision_language/image2struct/chart2csv_scenario.py,sha256=qcs3o9dPsXoeaP0bu9UVZ6P0GPEcRLoaqABxysLN6VY,1802
390
+ helm/benchmark/scenarios/vision_language/image2struct/image2struct_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
391
+ helm/benchmark/scenarios/vision_language/image2struct/latex_scenario.py,sha256=SnZuHATg5i764MAdgaGwjIGdjCZNrOqP83Y5jE_fkHs,1153
392
+ helm/benchmark/scenarios/vision_language/image2struct/musicsheet_scenario.py,sha256=c08cquz2IALY7PlpOoEfAjupKZmn5GDVZ1H8Gbj4r8s,831
393
+ helm/benchmark/scenarios/vision_language/image2struct/utils_latex.py,sha256=jW3_c63a6u39PJGJw6lM9pIa3dnF8CQgZlPNZdH0sfs,15001
394
+ helm/benchmark/scenarios/vision_language/image2struct/webpage_scenario.py,sha256=DJQIa8NaKV-nhkXEBuY97MJ8a1O3x-Yr6hACVa-67Ns,11117
395
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
396
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/driver.py,sha256=WBFbb3N_eHIa7OFvHQS3Pmwbmkl6r9VyobxlIEKhty8,2823
397
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
398
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
372
399
  helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
373
- helm/benchmark/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
374
- helm/benchmark/static/index.html,sha256=xIJGjMg0qn9eemfdBiNbTI0jzPfBD5x0v8HJF-dMqBc,3561
375
- helm/benchmark/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
376
- helm/benchmark/static/json-urls.js,sha256=AaULgfHw8OLfrQLJpBHfcC013uavQnlNNFS9vzb0qOg,1981
377
- helm/benchmark/static/plot-captions.js,sha256=bTR8gYx-QqF_RJyKX-L-eQP7hSEtawfJSoADCvgjKag,3011
378
- helm/benchmark/static/schema_air_bench.yaml,sha256=ePZAGL4X-yH4cAQvzS5uU44duCKwdDrMwDSvCC9y7-k,139384
400
+ helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
401
+ helm/benchmark/static/schema_bhasa.yaml,sha256=5q-jjK-YvE8C_wVal2H2C-fbW0g4env9-Skbu8o-L1k,27774
402
+ helm/benchmark/static/schema_call_center.yaml,sha256=Mt7_rLG6IT701YrjiJdNb7HpoMVkFjabrawnBieUUhM,8049
379
403
  helm/benchmark/static/schema_classic.yaml,sha256=sK3yVQCrk3Tn3Kmg9WITBmJZI7AKVjmIY0f3zgH_t0c,104611
380
- helm/benchmark/static/schema_finance.yaml,sha256=vZG0EssYr_BVZmyV4sZmRaeLFSX2ycjni8O_L_kGzzc,5283
381
- helm/benchmark/static/schema_image2structure.yaml,sha256=IV57vHTaZakH6EupIlT6PRjK8aI14OSNFYUAHD9QBxo,15593
404
+ helm/benchmark/static/schema_cleva.yaml,sha256=TDh-zcCzzTTs7bu0IWlY5dXYaTFhxly8sJIBGQdBvug,25401
405
+ helm/benchmark/static/schema_decodingtrust.yaml,sha256=2VPxzcyKYea7mx-qmswyVRjPfVatjVH4Rs3OU82mgII,15670
406
+ helm/benchmark/static/schema_ewok.yaml,sha256=MluPnZSy22wZLFB2pR7ycBRgUSvIUsqvq4qM0Vk2ur4,12113
407
+ helm/benchmark/static/schema_finance.yaml,sha256=OgsYMSFK__8ZZS96ktsgVRfM40-BhbOY15j9OlV-rNE,7010
408
+ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0yPsYgu_MB7uu5pwHE,19894
382
409
  helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
410
+ helm/benchmark/static/schema_legal.yaml,sha256=RpoFOuVSIowNgxlPn3UMfJC-68RFr3CGDciUGLPfVqc,28806
383
411
  helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
384
412
  helm/benchmark/static/schema_medical.yaml,sha256=hDk4834FKn-5cMr6pHcu1P60sh6cXJ2J0Z1ADIj2MSc,8455
385
413
  helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
386
- helm/benchmark/static/schema_tables.yaml,sha256=i4ylaq5yZoIEUvxPS8dniPQWKHZF5bz3hMgjNbzC_MM,7064
387
- helm/benchmark/static/schema_thai.yaml,sha256=25-PjBhZMHM89M01XxLQWNg0mdQnfo4H0XInF9ZzDow,7900
414
+ helm/benchmark/static/schema_safety.yaml,sha256=k4LBKZbnxRgofejJE-hHadTcHpRTlx4NAt19j3fe4NA,8872
415
+ helm/benchmark/static/schema_tables.yaml,sha256=c2HZlGa_vTOlbc2ByuTW2FpsuLVGyRyOSXeocwkeSgY,11047
416
+ helm/benchmark/static/schema_thai.yaml,sha256=yJUrevvgTJ46TpyXfNecW_B9urh7LPwSbBi_mT4ZngA,8348
388
417
  helm/benchmark/static/schema_unitxt.yaml,sha256=9FQhoueYNNYQ2xMuJ2KHzpg_9-_ZhZ9efk6jtTQ3tlc,11855
389
- helm/benchmark/static/schema_vhelm.yaml,sha256=IZ1oAmEjnoWQ6YtMpnwZ2IQkXx86bJS1j3686mvtAGc,29476
418
+ helm/benchmark/static/schema_vhelm.yaml,sha256=4DkACpY3RPNOdk6-vBKoQTlsV8Q5AL7gNc8gZDSYiWs,31185
390
419
  helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
391
- helm/benchmark/static/utils.js,sha256=bgN0PT53Dregc-nLmEmAEmg2psufWpS8jTf74WoypHw,7681
392
- helm/benchmark/static/images/crfm-logo.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
393
- helm/benchmark/static/images/helm-logo-simple.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
394
- helm/benchmark/static/images/helm-logo.png,sha256=GTqbrxJr0oQXbBRq-8v6afY5zB5x0M6PhEbKRIX9qIE,280667
395
- helm/benchmark/static/images/language-model-helm.png,sha256=mG0-bkdziXeiF0wOGd67y2jnYmVKJYqhD2N5Q8VIF8Q,26563
396
- helm/benchmark/static/images/scenarios-by-metrics.png,sha256=F7g9mvIYopm-n7sDGg-7I0XCyZvloKsi2wIq1i6da_Q,51331
397
- helm/benchmark/static/images/taxonomy-scenarios.png,sha256=2MiuCLaxnuHvwsWWJHnZFc-rvoQIi_tNIjDatY7I-Dg,100766
398
- helm/benchmark/static/images/organizations/ai21.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
399
- helm/benchmark/static/images/organizations/anthropic.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
400
- helm/benchmark/static/images/organizations/bigscience.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
401
- helm/benchmark/static/images/organizations/cohere.png,sha256=7cr4LI8WK9yPryQboyWK_T5baSND-d-tVrlPNflLQMg,8757
402
- helm/benchmark/static/images/organizations/eleutherai.png,sha256=uUURFF8YWY85mwGoKVEjArO5DUBCy4es5naCXsBzn6c,4526
403
- helm/benchmark/static/images/organizations/google.png,sha256=BtmXrVQZHr3WH5c8c23ent2FO8aPWeNwO8czl22lDCo,4914
404
- helm/benchmark/static/images/organizations/meta.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
405
- helm/benchmark/static/images/organizations/microsoft.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
406
- helm/benchmark/static/images/organizations/nvidia.png,sha256=hvp1wZMwYxkfrVMvJs73PX71JwY5L8ZvxIH_fL4n6Po,27945
407
- helm/benchmark/static/images/organizations/openai.png,sha256=P4ZT5ISIlt6Dl0mOp7juSM4Y7dfyRNPqdc0PJuwNoqg,16877
408
- helm/benchmark/static/images/organizations/together.png,sha256=pmWjW4r7GnlKqFhKLPTiBeILiOighL3XzcSCsxWtB7U,48053
409
- helm/benchmark/static/images/organizations/tsinghua-keg.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
410
- helm/benchmark/static/images/organizations/yandex.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
411
420
  helm/benchmark/static_build/config.js,sha256=ER8utDIqVZi9uge7Qrk1gmlT88TOOkFF9xYp3j10m8U,165
412
- helm/benchmark/static_build/index.html,sha256=J0TrGE5-kOkopr-iSRHvvCzDL00w8Si-8OaIt9vSX0M,1149
421
+ helm/benchmark/static_build/index.html,sha256=YoxWJa-SHRtdMnB5V44wD-2wMj3cUEVXqA60QfK4f_I,1149
413
422
  helm/benchmark/static_build/assets/01-694cb9b7.png,sha256=aUy5t0DYCg4r52HDOmeNi1S2CHsnv3mE7ySokJg3Ouo,8903
423
+ helm/benchmark/static_build/assets/accenture-6f97eeda.png,sha256=b5fu2p7L_mnwg-p5jjPk1sFRwJEBRtGwXsVyQU_Runk,9537
414
424
  helm/benchmark/static_build/assets/ai21-0eb91ec3.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
415
425
  helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
426
+ helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png,sha256=bfyazxJvVs5GTSSlnm6nOb2r_jzo3TJybqF04S5Dxhw,69372
416
427
  helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png,sha256=fOEANHS8RymKaCzUWn9gQWebts2ghSmtW9Fdda_TjR8,7224
417
428
  helm/benchmark/static_build/assets/anthropic-70d8bc39.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
418
429
  helm/benchmark/static_build/assets/bigscience-7f0400c0.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
419
430
  helm/benchmark/static_build/assets/cohere-3550c6cb.png,sha256=NVDGy09xliCqZy2TKUAka-B90jVDB_VRCS9A2_sN7VU,4414
431
+ helm/benchmark/static_build/assets/cresta-9e22b983.png,sha256=niK5g8HYADkbhKM9gSVtYEdPegBS40zZXF4nNe9Fu4o,8131
420
432
  helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
433
+ helm/benchmark/static_build/assets/cuhk-8c5631e9.png,sha256=jFYx6Xx-SGYANpsSnqrlaQytYuOBOsTHhpqPJZk3EwE,30385
421
434
  helm/benchmark/static_build/assets/eleutherai-b9451114.png,sha256=uUURFF8YWY85mwGoKVEjArO5DUBCy4es5naCXsBzn6c,4526
422
435
  helm/benchmark/static_build/assets/google-06d997ad.png,sha256=BtmXrVQZHr3WH5c8c23ent2FO8aPWeNwO8czl22lDCo,4914
423
436
  helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
424
437
  helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
425
438
  helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
426
- helm/benchmark/static_build/assets/index-30dbceba.js,sha256=WXT0A-yH9f-3wCwQ3rwKWTCIOOpjETQwOQyZt2OMAwc,77064
427
- helm/benchmark/static_build/assets/index-66b02d40.css,sha256=ZrAtQOMv7vRJwOA9urNRk_rs8hJljom_xhn-wI89g08,486795
439
+ helm/benchmark/static_build/assets/index-05c76bb1.css,sha256=BcdrsQgUFadqYf5z-wdFNosV_c2MlxV8xktld2BFKBk,489017
440
+ helm/benchmark/static_build/assets/index-3ee38b3d.js,sha256=Mtgoy__VC4YN1GxN234HDVfEqq4ONNaj3vTfKDEtqPs,93905
428
441
  helm/benchmark/static_build/assets/meta-5580e9f1.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
429
442
  helm/benchmark/static_build/assets/microsoft-f5ee5016.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
430
443
  helm/benchmark/static_build/assets/mistral-18e1be23.png,sha256=GOG-Ix7XlctGOUmvJfO2oVSBM7E5O562G88OnoxsjBw,14402
@@ -434,33 +447,30 @@ helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUI
434
447
  helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
435
448
  helm/benchmark/static_build/assets/react-d4a0b69b.js,sha256=rNTpl8Is3LkYXqJowRMc8vc4SXQwP94Ozy4DZZWwldU,275141
436
449
  helm/benchmark/static_build/assets/recharts-6d337683.js,sha256=rDrVmtTCCSLY2hpcxSDxhlQ6CQmTTSQOESNeO3oVQgg,432466
450
+ helm/benchmark/static_build/assets/scb10x-204bd786.png,sha256=IEvXhlxgBA9NCH4RrGWJkMx0Yc7V9EK6o7vrAI5KZCE,4990
437
451
  helm/benchmark/static_build/assets/tii-24de195c.png,sha256=JN4ZXAa0rbR2IlxPfd_mKtntFZcYpDcXocSiqrC2rNg,63389
438
452
  helm/benchmark/static_build/assets/together-a665a35b.png,sha256=pmWjW4r7GnlKqFhKLPTiBeILiOighL3XzcSCsxWtB7U,48053
439
453
  helm/benchmark/static_build/assets/tremor-54a99cc4.js,sha256=x_K5Bp7szI2zsvESrKqffUOHbm8ohjjvuoIeY_yD_CA,293015
440
454
  helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
441
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png,sha256=zedhimhku2Q3QIvaRSYlUAQ0b5ia9pU4cFzKnABfr4c,118544
442
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png,sha256=bYElJoVkSaMJ_lFZj5qoSrIbygbNyBk35q89jtFRet8,168494
455
+ helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png,sha256=FDfWcwGcJhJco4qmZli_ROomLiASrrnsX-wtKSDvMkc,542231
456
+ helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png,sha256=oco_P6kwqp0cC3YaT_2H2RhJ6p1sh3sEQq3R0RA_cT0,71934
457
+ helm/benchmark/static_build/assets/vhelm-model-8afb7616.png,sha256=ivt2FhDk8dwnzp1MAle5WfbXzht_Mxg4rpy-xHRybjs,180285
458
+ helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png,sha256=qGpsSjEu7HFlPAk_zXuUEdDqj0wkCfFHA1bCtu8Ugdw,8531
443
459
  helm/benchmark/static_build/assets/yandex-38e09d70.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
444
460
  helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
445
- helm/benchmark/window_services/ai21_window_service.py,sha256=1ZDLJv73bxoLj_MzEBu4TgH5xHw-hx0nI6KX6RU73kE,12593
446
- helm/benchmark/window_services/cohere_window_service.py,sha256=5jm8o5ZYrbDUluA5LbMWLOOrOlIuHR7MhAJkOuzBagM,4750
447
461
  helm/benchmark/window_services/default_window_service.py,sha256=F099qF-YeM7YPVtph0dRFPry5vP8_BiudHTy2CpuICQ,151
448
462
  helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=EU3QevFOiQYBN2te54FsVRnGYZdgDxK6KqOWSQOa0q4,2125
449
463
  helm/benchmark/window_services/ice_window_service.py,sha256=9NeBN_tmOvwrK1miUnX3wJA70BP5ifIIeHpNR2gVwls,1070
450
464
  helm/benchmark/window_services/local_window_service.py,sha256=e9JHG72kFWlV6UKg_IhRCipOWQUrOD6ZjsT-_Mwewps,5232
451
465
  helm/benchmark/window_services/no_decoding_window_service.py,sha256=s_i_cqIuU9p0GDRIBApaOHzjH7gHrBPTJ2X5NEcN33Y,1375
452
- helm/benchmark/window_services/test_ai21_window_service.py,sha256=HkpNSaJAClZfaa-bQZ2BrRm1UB_u4sLAGSBlGQqRUD4,8221
453
466
  helm/benchmark/window_services/test_anthropic_window_service.py,sha256=lnxLiW5BPaWN6m03L93qCFugsxnVBbLmYPCarlrO-So,4196
454
467
  helm/benchmark/window_services/test_bloom_window_service.py,sha256=x7WBh0S223ABC9KvL2-y9G-cUxFUPm6oIkqvYO_4mt8,4288
455
- helm/benchmark/window_services/test_cohere_window_service.py,sha256=rKXnw2E7MLAtkLgtrUvnZuQp99_agDO4qcpb3daik-E,3348
456
- helm/benchmark/window_services/test_cohere_window_service_utils.py,sha256=sf25f9MeXzoqsbDzZ7d7le13hm8RkDe54nhLtKF2pqo,158150
457
468
  helm/benchmark/window_services/test_flan_t5_window_service.py,sha256=IhQMWBq2d39O3uNKGwbaMWJkz8585Zc-J_yqvPJfwu4,695
458
469
  helm/benchmark/window_services/test_gpt2_window_service.py,sha256=2UHKt4Wmh6XmSCdepjuMbZHFpb1oUcrKRSxcdOzBE1s,2671
459
470
  helm/benchmark/window_services/test_gpt4_window_service.py,sha256=tV5WdpxYxewchEp1rnsIlEfdJFrHVFKYQ-_8NhGK2yo,1052
460
471
  helm/benchmark/window_services/test_gptj_window_service.py,sha256=0lu4Os_3x3N-AbejG3LZ3-_ikxEHg1Lbmfq-Pzg_D9Y,2374
461
472
  helm/benchmark/window_services/test_gptneox_window_service.py,sha256=8CaOW_ln9bxKA4--dVLfLdsASo6RrR7ouP6EcSruzdA,4210
462
- helm/benchmark/window_services/test_ice_window_service.py,sha256=1DudvCYh4te_UDLg14XeXwKUoin9QnCgZ_PSCwxxaM0,23579
463
- helm/benchmark/window_services/test_openai_window_service.py,sha256=W_QJKaMgzYU7qGFuSS6JeM_f50UX0SuHpkH-u2bEvI4,2312
473
+ helm/benchmark/window_services/test_openai_window_service.py,sha256=Mt-dDtjQmz25n7hwNVyy1T_rl0TMvcvJfuhWNe_AvSw,2314
464
474
  helm/benchmark/window_services/test_opt_window_service.py,sha256=Gh1GzWnlgYIGwDNBw4EnHds3fXwMaSjzkfFXeLn47os,4215
465
475
  helm/benchmark/window_services/test_palmyra_window_service.py,sha256=yy7D2C0ZzExCbptYNsEI9zuX2AEGsEUTj0a_vbqub4o,4212
466
476
  helm/benchmark/window_services/test_t0pp_window_service.py,sha256=pvp55FyqjunkDpHVAhPup3h-iNkepQpxyr4nC87-5iY,3998
@@ -468,7 +478,7 @@ helm/benchmark/window_services/test_t511b_window_service.py,sha256=zwgUxmkpV0IJ-
468
478
  helm/benchmark/window_services/test_ul2_window_service.py,sha256=JUehWFC5P1sosoFzPacLJwZQ3D7_GUn6yLi8oBPkido,4061
469
479
  helm/benchmark/window_services/test_utils.py,sha256=TQ5Ba3rq-0k9fCqGJ-gfEr2NavP_hzvxwZ42SRPfIlI,3376
470
480
  helm/benchmark/window_services/test_yalm_window_service.py,sha256=tO1ZsUCXD1E0QZWarABJjn5Q-g-d1PyM8PFOmGMcynI,4301
471
- helm/benchmark/window_services/tokenizer_service.py,sha256=dPsEhA1I4SUvsR0UBdzJ2wsFahw8GuF6qhE8CxiPZRY,1051
481
+ helm/benchmark/window_services/tokenizer_service.py,sha256=RNznJBAxcCUMCurb7mbraZULx_ZtB0G7IxbrnUe0Urk,865
472
482
  helm/benchmark/window_services/window_service.py,sha256=y6BthPY1V-ugmYfaJElm5Wfy3PSgoJLj10vHcXZZGNA,4727
473
483
  helm/benchmark/window_services/window_service_factory.py,sha256=T55F0Y2jiOYxUHHZxT4YX4fFXY5gfFhn56zIwUBhc7s,3423
474
484
  helm/benchmark/window_services/yalm_window_service.py,sha256=_Yz4NwbMx9Px8raJlMMA5Aw80iA8G_bQnd8pxRK-By8,1059
@@ -479,10 +489,10 @@ helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,s
479
489
  helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
480
490
  helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
481
491
  helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
482
- helm/clients/ai21_client.py,sha256=LIdkmzcUDR9uIF2tIk5YgDNGNmfQ9JDYmgscvFoCHDs,5509
492
+ helm/clients/ai21_client.py,sha256=PYyqpbnMK1l18Rv_qhE5KdHHqZHgHePaJtJOowTyG7I,8128
483
493
  helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
484
494
  helm/clients/aleph_alpha_client.py,sha256=koPqXF6uRD905atoiCaPg5yxr6B25J0g2OTWk8geebQ,4969
485
- helm/clients/anthropic_client.py,sha256=wptP4u4NhQknoy7VQsWqVzn9tv3IrCuJ3vUMq6fiq0E,34909
495
+ helm/clients/anthropic_client.py,sha256=s3eCwHh8mbhxLi8up1WtQWKkUsHJa-LO44prNd7XYFc,34059
486
496
  helm/clients/auto_client.py,sha256=uK9EWQFWBt4DoV1oytm0dIeA3YpcfGi_H0rCRZSVE8c,11438
487
497
  helm/clients/bedrock_client.py,sha256=BsH9UopsP6ZHf-K0Yzg1PYSMLDwY0yIUmPHDhJVMUi0,5293
488
498
  helm/clients/bedrock_utils.py,sha256=okZ6Z8pviGOUNlrdF2QquAqFs8-QYgcqci95eij8giM,2574
@@ -494,24 +504,25 @@ helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,32
494
504
  helm/clients/google_client.py,sha256=EOpPzK5_9yzWkMjK-4ILiixDF3aeOa8AbR2SPnEO-nw,2900
495
505
  helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
496
506
  helm/clients/http_model_client.py,sha256=DBgkVDZPmg99DCcO_1Xdf6nFQo2kyxLkgoQpwC-wkHI,2806
497
- helm/clients/huggingface_client.py,sha256=xmdqOWoioqoYQjtBqJFN-K9Fm3oHEQrOEjyzDz4ZWBY,15847
507
+ helm/clients/huggingface_client.py,sha256=k-8J4nnDbve8UtGsa0RytWhS9IpAy8hoJAUw4nRZTMI,15734
498
508
  helm/clients/lit_gpt_client.py,sha256=Sjec16bNODosEhDoBkRc4t-LNS-nCUY_jVivWj5zvfU,6205
499
509
  helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
500
510
  helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
501
511
  helm/clients/mistral_client.py,sha256=thOLMcEfrzWR00JUabIZ_PnW2o9YZsdSmNf9z3jbYKo,5982
502
512
  helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
513
+ helm/clients/nvidia_nim_client.py,sha256=f3ZWoTnJmBIFeWsHeUDaTCbDZLK_kdlUWNO1hWumUOo,987
503
514
  helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
504
- helm/clients/openai_client.py,sha256=tXxi9nZsxz2I4YQLrQrV-GhlgZ1Z9ifrUhC_3Aw5SPE,14238
505
- helm/clients/palmyra_client.py,sha256=LBYFHNc5LdpPbiSp1AAHuMm8cUUCQ2EB03BB6XnDTYQ,6551
506
- helm/clients/perspective_api_client.py,sha256=WQDArqlKVWwcK2SicnSIAgV6JGVHsxibTzkdezT3z_U,5920
515
+ helm/clients/openai_client.py,sha256=Am7xfDkWV4l3MuPEwuF7ImZ6qOe6rmsjI7sRVTfMhMA,14997
516
+ helm/clients/palmyra_client.py,sha256=vnlGL3F4ZUK3-UXlIq4OgbP9sA3_C2ItJPiM7RDelo8,7224
517
+ helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
507
518
  helm/clients/reka_client.py,sha256=K8b9p7U6LLAy4PRjgYrUS06gF4G2xjhjRoMEO4XDe0o,8329
508
519
  helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
509
520
  helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
510
- helm/clients/test_client.py,sha256=V7Y56Ahqa8C2Kc2_W2QE0VfGbBEJzFmnic3LGHZkOqQ,3940
521
+ helm/clients/test_client.py,sha256=6cLpQc2IMR5o7iBxZYPvoRtHJa5i0E7JHh1VKaCtfBw,3842
511
522
  helm/clients/test_huggingface_client.py,sha256=x2NjMuIrinfUy0wQ1S6F5cYZVr09YfvN6LfhWmyGNAM,3388
512
523
  helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
513
524
  helm/clients/test_together_client.py,sha256=yYNrhU3kQjmHwhILuoP5QwUgbmkm2gg2NHiNycHjoeE,6145
514
- helm/clients/together_client.py,sha256=rtYdx53ZE19ziJpBc7MYTeSHJjN3Ke51I3Uldg0IAbs,20595
525
+ helm/clients/together_client.py,sha256=J9rQQCqPSLftTNR6BEei28bTL-eXwGAvvyiyw2SVbe0,21836
515
526
  helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
516
527
  helm/clients/vertexai_client.py,sha256=K_vCanJU97o2P_WJOeLhUFJA8SdfJDlVNl7Mi1HuIrQ,21860
517
528
  helm/clients/vllm_client.py,sha256=p9atBtq3PBOoPkOPSifkMrYZjNLnNM_sWM6tL_3N-WY,1675
@@ -576,9 +587,9 @@ helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
576
587
  helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=hTywh5nM95BmPoDyKOSDWg9G3-QwLO3KZEJZVkmFroo,6478
577
588
  helm/clients/vision_language/huggingface_vlm_client.py,sha256=H7AE8mm506PkEcUO8VaLVtptHTwVX58nZx1A_BWdKzA,4968
578
589
  helm/clients/vision_language/idefics_client.py,sha256=hi1VCDBegHfBssmW0C62H3OX3U2ISVRhaSkd24gb1K4,7692
579
- helm/clients/vision_language/open_flamingo_client.py,sha256=CkN0JCeR742ZG9Nc4A85hp4BSE0WLU-3Rs-ZwdmDkzs,6632
590
+ helm/clients/vision_language/open_flamingo_client.py,sha256=QH6el-wkEl4PMZM9b3_H-o2PRaMvumGbN29ee9dmkMU,6519
580
591
  helm/clients/vision_language/paligemma_client.py,sha256=IU_T8r1RgpGkEAqabLKBbmoUOWV6c1a9_FXgiTy8exE,6835
581
- helm/clients/vision_language/palmyra_vision_client.py,sha256=mY6vj918f-tbqhOmh7PCSEgnSpHzWY8UTqAdvYgXJ8Q,3757
592
+ helm/clients/vision_language/palmyra_vision_client.py,sha256=4elEdmwllMr2qzTzBdlRC8L5Ut3vOXFtanGGYrx4lv8,4074
582
593
  helm/clients/vision_language/qwen_vlm_client.py,sha256=6rCH4gJMDyQHyjAE_GDIrLsInH_bvd6to-4RMWbRLeM,7407
583
594
  helm/clients/vision_language/open_flamingo/__init__.py,sha256=i1tGJj6ckeE6eS1EWV5tbQKYLmPCrdSI45mPchfv_Ic,88
584
595
  helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -589,7 +600,7 @@ helm/clients/vision_language/open_flamingo/src/helpers.py,sha256=pq_BgkUflYBDw8g
589
600
  helm/clients/vision_language/open_flamingo/src/utils.py,sha256=6FYU0NgshZadF3QYWQkPW8jyEFiOd6jyb8p5rv_vOj0,1444
590
601
  helm/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
591
602
  helm/common/authentication.py,sha256=RlMx29_TSrfU7ujE7dJkxmFub5EqLj2NswV5lAVFFDk,179
592
- helm/common/cache.py,sha256=hPLBtWjCKlzccmfWZv56F6LEcLskkfLeq2DVHPeV2GM,7631
603
+ helm/common/cache.py,sha256=0gXq97M9JgSO5aO2puEV1WRpEy8jdc_wRsKL4rVVoY0,6725
593
604
  helm/common/cache_backend_config.py,sha256=4u5A6BHNBmGnnrDNhCVgrdwhXQtyAbWcUeoo7hdgZSo,1530
594
605
  helm/common/clip_score_request.py,sha256=WnNg89owDCmG7tyy8nnQL0RdKQLsUdMWiYH9XqqbGw8,840
595
606
  helm/common/codec.py,sha256=gTh6AwIQ0Bbul_QSnIO7eItwMZmYtnkIrG1jkc4GOL4,7100
@@ -601,37 +612,37 @@ helm/common/general.py,sha256=nMfHNPXyAAorAMmgDClD8r8XXeJcvfF0QXTP-FgH5PQ,11690
601
612
  helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
602
613
  helm/common/hierarchical_logger.py,sha256=EnKLnfbQftca08EJfjGEQb4tcnCKbx-JtwLnoCnhMQs,2908
603
614
  helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
604
- helm/common/images_utils.py,sha256=bsxgW9knrfa9NTa6V-O13_nDnflqrqHpnKlTRxul-aY,3187
605
- helm/common/key_value_store.py,sha256=iHi1WQuWttLNJnuM48QNOAXHoneNbmbBmtXYPq-dyys,3147
615
+ helm/common/images_utils.py,sha256=icE0tH9P3FT_qggfbi8vVwkmIjOAN5l3HcGDF9gmNnY,3345
616
+ helm/common/key_value_store.py,sha256=D9ZBORzZncf3zHQOP4AuNbQnV8cZpO_kqHY1mDRugqQ,3174
606
617
  helm/common/media_object.py,sha256=3VZqfb0py5dDKwWtnLp2kdl8svaike-Cn7Mjk-b0cvM,5130
607
618
  helm/common/moderations_api_request.py,sha256=3xTsErSsCr2PHD2jpdV1JglHaYHwP2Yqu25_JFtfa68,2234
608
- helm/common/mongo_key_value_store.py,sha256=yK1qyh1RgKB_hYMD1BA6hQw6oGJdrALPMpqqlkn7h0M,3811
619
+ helm/common/mongo_key_value_store.py,sha256=G0TIWQcvwMjyXh4TnN6xJ462HKHUAZtQJJYQOrHK-K8,3887
609
620
  helm/common/multimodal_request_utils.py,sha256=GNZQQCcwsARyFCO-uoeeglyK2PEfC4MjClAKDeKqokk,1404
610
621
  helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
611
622
  helm/common/object_spec.py,sha256=_usgTDQULBF6_jy7C6m-9ZNVvNxbGoTE_CdGcSvBASU,4327
612
623
  helm/common/optional_dependencies.py,sha256=Qam3QCHff8tuXbS-fCw-MVe-pK18gSvHw-uQoXXxT7M,616
613
624
  helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
614
- helm/common/request.py,sha256=B94Dey42OJZ5lgcf71KsGW2nKo8eB4My_pj6tDkIQOg,8012
615
- helm/common/test_cache.py,sha256=XqboYHQAkFWIHPsuIjuageRSLeN7QoATKF7wwxggPqE,7054
625
+ helm/common/request.py,sha256=Z_YUd77WQ15yeSN8YYdT48dI4ehUc869KuaDisAiyIA,8806
626
+ helm/common/test_cache.py,sha256=j19p-qzv_98X_TMW4b39ZHwSJ-MX3p91PrkYumarS6Y,4870
616
627
  helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
617
628
  helm/common/test_general.py,sha256=c8Lh0mK8I-SfcMprq909B6zWRBxSBngq2nNL1L6-cYA,1788
618
629
  helm/common/test_media_object.py,sha256=AAm9DD7MC-ZvTwiqXA-e52U6L4S1noxItW8f7ARi6DY,1650
619
- helm/common/tokenization_request.py,sha256=1e-uCXUqF3ai83wgX9yV8yXPT5GuCTjJcLk-PszDlTM,3525
630
+ helm/common/tokenization_request.py,sha256=NND9ESiiDE0H8QRNpfHVjXS7MQfKKIwtVRKDIjPnnJM,3344
620
631
  helm/common/file_caches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
621
632
  helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFqx4ftRuPA,359
622
633
  helm/common/file_caches/local_file_cache.py,sha256=wBOAbbkGLiClaX4YdunokRfSQCKNkTYmMVx2KTLy4Lc,1921
623
634
  helm/common/file_caches/test_local_file_cache.py,sha256=bOCWR9MglwQXV98xk8auyjgFxaOr85zRdxWwxMBQW9s,663
624
635
  helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
625
- helm/config/model_deployments.yaml,sha256=x4j3LMGHTV3jObKK0dT5SOtKJvReWOHyyjs6jV2D2L0,89739
626
- helm/config/model_metadata.yaml,sha256=M7EsOSnf4tcrSlNYBT50SiC6mReXfZ1q5rt7_OpdzpU,138011
627
- helm/config/tokenizer_configs.yaml,sha256=lBGPsRPRPeqlN_j194hEVP8HAMC6J5NLrIZpN95Y8ug,15078
636
+ helm/config/model_deployments.yaml,sha256=CXYtq1I6jRZJODiyfN0ha_i-2XHbWHv1-pBM5cfsHhA,94192
637
+ helm/config/model_metadata.yaml,sha256=nrQO6SbsSwKUXwtAUlfb8_Xai6TLSBKN0p1NrbtL2sU,168593
638
+ helm/config/tokenizer_configs.yaml,sha256=ZiOhsxOcEpLpK7Rv-zBw0s1ZWCQOMT9dya7DTV_lPQE,19665
628
639
  helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
629
640
  helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
630
641
  helm/proxy/cli.py,sha256=l8F7UYqrIOoBD9ZCIxJFA4fhxlzhae0-2Nn8A7FMkzk,8244
631
- helm/proxy/example_queries.py,sha256=uYc05CIhTzFCjLPdkWqtOweyHqsRUX4s6ByP8wJbjVk,4650
642
+ helm/proxy/example_queries.py,sha256=rVGmQ2ej4OS7m5Y3uI5dp9Mfdw6bv53c0o2QknsmYes,4379
632
643
  helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
633
644
  helm/proxy/retry.py,sha256=iLZmKATEJQa9jsSpOIx6YDRhmrA8G1Qm21cUxCuo2Ug,3490
634
- helm/proxy/server.py,sha256=V05YdMy0lZqYfYkxLDqksGYe-8CIFa6Jg8aSb8YHM7I,10753
645
+ helm/proxy/server.py,sha256=caho64BgGogbYMby8vecRFTtexmdg_fNxi3H0jzCVgE,10512
635
646
  helm/proxy/test_accounts.py,sha256=Vs1iOzTPN29LosDAAEs6IagQ3PccvutrJTlR1qNIcj0,1146
636
647
  helm/proxy/test_retry.py,sha256=8h398auzjW9VnlTJWllxR-bdpub-XFp8EN8LWDEnEHM,1049
637
648
  helm/proxy/critique/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -644,31 +655,30 @@ helm/proxy/critique/model_critique_client.py,sha256=QMFiMpALXnneumKbJpXOZDEb3lPP
644
655
  helm/proxy/critique/scale_critique_client.py,sha256=B4povtceyfal95eE3N7em9cC_B5Vy4jMrHXcsXc_5m4,15889
645
656
  helm/proxy/critique/surge_ai_critique_client.py,sha256=HnzgAoF4Du9Me0GS_lbNaozZslS4a2OZx735gh-coo0,8357
646
657
  helm/proxy/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
647
- helm/proxy/services/remote_service.py,sha256=emYN0qWOJLQ7q1n06V4TwlvXaqylQcUxmqDcGZXqPJ8,9097
648
- helm/proxy/services/server_service.py,sha256=U-1g0VMjCY9bBK8BecbUxVzSx7hyC_rpwSNm67bqmCg,11534
649
- helm/proxy/services/service.py,sha256=Be-Z5F6AN4vMzsJr3BS6tJ9NHHy_dc_yn2Ex9cm0ChU,6193
658
+ helm/proxy/services/remote_service.py,sha256=nqqNisHoYXGidqPOdWauTgSca04LimWDBcr-KieuLdI,8787
659
+ helm/proxy/services/server_service.py,sha256=tb1JUIG8pVhY5t634advtMGYh9ZnhwTeKIwhweJYegU,10672
660
+ helm/proxy/services/service.py,sha256=YFG5ZlBYBz3IdSVRKDIKVlAmA-oLjFCeBHE3iIe_SU8,6020
650
661
  helm/proxy/services/test_remote_service.py,sha256=NFnLjg3QNHoDKdK0DlcrtylwlKXx1vdzheNZRrLEv7c,6605
651
662
  helm/proxy/services/test_service.py,sha256=FUZoI8pGiUg5adgB1wTJ869QOgFYjPtM6yf6FGMdE64,8968
652
663
  helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
653
664
  helm/proxy/token_counters/auto_token_counter.py,sha256=34PWvF96DurTrUtUxW4Td5VNV1_BhAebCkXQLl3xp4M,2046
654
- helm/proxy/token_counters/test_auto_token_counter.py,sha256=lDe1lXa5keRi1iLsKz2aBtoQyQ1fycYymZcGvshWvUk,8609
665
+ helm/proxy/token_counters/test_auto_token_counter.py,sha256=LO3H_NbVeoeaMmEuFNCmhoEWKjWVvxeW5U4yTKfE-84,8590
655
666
  helm/proxy/token_counters/token_counter.py,sha256=TCij1Cp08RoFTLLLdjNPoaeDGHpA1A2hQsrRV775Kf4,425
656
667
  helm/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
657
- helm/tokenizers/ai21_tokenizer.py,sha256=oXImuAY9kMohHH6Zm7BWysfT88b00NBoSELeGQ920y4,2255
668
+ helm/tokenizers/ai21_tokenizer.py,sha256=CE-u39ZY5Y4XQHONpiPHKK7uvEmySYLBQi2n70OV004,2059
658
669
  helm/tokenizers/aleph_alpha_tokenizer.py,sha256=UlWC_SjObBvexpZ3OfKZT2yjhbSsHlKjQe_oWuRrXno,3818
659
670
  helm/tokenizers/anthropic_tokenizer.py,sha256=d-HO9OEFkhYzFZu0VkOsHjxbqqSUseCNX0KQqgb3s2Q,2114
660
671
  helm/tokenizers/auto_tokenizer.py,sha256=Of-T-CFOhLAjjU45T1hnrEPG_k_hzPufuDE7FRAcSN8,4251
661
672
  helm/tokenizers/caching_tokenizer.py,sha256=kSegrCFotRevSDgJsn0g52dWiSUCNa7_EZpRNrELeUE,8163
662
- helm/tokenizers/cohere_tokenizer.py,sha256=6rahykq1SxqS8vCWOzYo_oeUoVwhg_zOfWFIkQxP6GY,5632
673
+ helm/tokenizers/cohere_tokenizer.py,sha256=6WwHIt7SsICmYR2QQpwDJ7pfNF8VWrFHFxF5Kynq6aY,2116
663
674
  helm/tokenizers/http_model_tokenizer.py,sha256=wBTtDA2UdEYspffa1wqgkT3y3YHoyLXXoucnJ5PGjhs,3109
664
- helm/tokenizers/huggingface_tokenizer.py,sha256=_XXx8uApENK7-o81qxEn0SOeJL_L2UpiiuteSYiODpE,8734
665
- helm/tokenizers/ice_tokenizer.py,sha256=4ZTIRpmt2cqwcxnmrDpCRhiJ0BI3ELE-GHoBuHWgrDA,1200
675
+ helm/tokenizers/huggingface_tokenizer.py,sha256=vmzcbgzMMlwx1x2n0syyp6KuN47nskgoP9yi1BNEGMQ,8696
666
676
  helm/tokenizers/lit_gpt_tokenizer.py,sha256=LMrpaje64UmnDKoYjPG_RQeXVA4xQUwW5t48IJIeLaQ,1660
667
677
  helm/tokenizers/simple_tokenizer.py,sha256=6_NROqVbygs-HRA7bYAZluN4YB5gUhVaRsYQeRTjA1E,1147
678
+ helm/tokenizers/test_ai21_tokenizer.py,sha256=V8orjdKxmEV44VYoZ9Sq5E7CIq2caNnr6vjdk0T_w1A,1646
668
679
  helm/tokenizers/test_anthropic_tokenizer.py,sha256=_wzXp9FVR2Ml0s2A79TTXbSPHyTRp28i9tiEyQ9S6Ko,3792
669
680
  helm/tokenizers/test_cohere_tokenizer.py,sha256=15z2GJtZ-VlrliC2_Fk5DIZhQYFkJS7J73fjxYMf8YM,1431
670
681
  helm/tokenizers/test_huggingface_tokenizer.py,sha256=8tFyZQb4DLg6MdKg13a66bLbp0yf4Ar1fGWM_sYeSjg,6309
671
- helm/tokenizers/test_ice_tokenizer.py,sha256=-xi_f8TBSkAYr5CcA56HDq7rZ9HAGd99J7twNfkLzFU,2619
672
682
  helm/tokenizers/test_simple_tokenizer.py,sha256=vUNdcnJqZV99-E8H1rwUH85AQPJ2HTnDr5DrZ_-zRL4,1219
673
683
  helm/tokenizers/test_yalm_tokenizer.py,sha256=qWpKnUuAlePd6t-UJB_mAiBwtAacnC8caKXLJ_GdTkk,2477
674
684
  helm/tokenizers/tiktoken_tokenizer.py,sha256=FU2g_FF0pVoyspYhHcz3SyCBGNbsTby-nWVrj0Cq4_c,1265
@@ -679,9 +689,9 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
679
689
  helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=W9p5QNn1GSm-y85yVEQe_82zn5CVK_vR6jvhk7JTs_k,869
680
690
  helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
681
691
  helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
682
- crfm_helm-0.5.2.dist-info/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
683
- crfm_helm-0.5.2.dist-info/METADATA,sha256=g-tT_a7wm7L7iaNCQVwNIrpUnVHK8PKfbXjel0KyhmQ,19591
684
- crfm_helm-0.5.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
685
- crfm_helm-0.5.2.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
686
- crfm_helm-0.5.2.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
687
- crfm_helm-0.5.2.dist-info/RECORD,,
692
+ crfm_helm-0.5.4.dist-info/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
693
+ crfm_helm-0.5.4.dist-info/METADATA,sha256=7kuGQSYOgZrB1nra46gop4xc4e40TeQ8qThnbWgtiqU,19187
694
+ crfm_helm-0.5.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
695
+ crfm_helm-0.5.4.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
696
+ crfm_helm-0.5.4.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
697
+ crfm_helm-0.5.4.dist-info/RECORD,,