crfm-helm 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (236) hide show
  1. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +41 -57
  2. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +197 -152
  3. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +32 -31
  5. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  6. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  7. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  8. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  9. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  10. helm/benchmark/annotation/air_bench_annotator.py +64 -0
  11. helm/benchmark/annotation/annotator_factory.py +6 -0
  12. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  13. helm/benchmark/annotation/call_center_annotator.py +247 -0
  14. helm/benchmark/annotation/financebench_annotator.py +79 -0
  15. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  16. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  17. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  18. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  19. helm/benchmark/annotation/live_qa_annotator.py +71 -0
  20. helm/benchmark/annotation/medication_qa_annotator.py +68 -0
  21. helm/benchmark/annotation/model_as_judge.py +45 -0
  22. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  23. helm/benchmark/annotation/xstest_annotator.py +110 -0
  24. helm/benchmark/augmentations/translate_perturbation.py +1 -0
  25. helm/benchmark/huggingface_registration.py +16 -6
  26. helm/benchmark/metrics/air_bench_metrics.py +56 -0
  27. helm/benchmark/metrics/annotation_metrics.py +108 -0
  28. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  29. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  30. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  31. helm/benchmark/metrics/fin_qa_metrics.py +60 -0
  32. helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
  33. helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
  34. helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
  35. helm/benchmark/metrics/live_qa_metrics.py +23 -0
  36. helm/benchmark/metrics/medication_qa_metrics.py +23 -0
  37. helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
  38. helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
  39. helm/benchmark/metrics/safety_metrics.py +57 -0
  40. helm/benchmark/metrics/summac/model_summac.py +3 -3
  41. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  42. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  43. helm/benchmark/metrics/unitxt_metrics.py +20 -10
  44. helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
  45. helm/benchmark/metrics/vision_language/image_metrics.py +30 -72
  46. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  47. helm/benchmark/model_metadata_registry.py +3 -3
  48. helm/benchmark/presentation/schema.py +54 -4
  49. helm/benchmark/presentation/test_run_entry.py +1 -0
  50. helm/benchmark/presentation/test_schema.py +11 -0
  51. helm/benchmark/run.py +31 -2
  52. helm/benchmark/run_expander.py +113 -10
  53. helm/benchmark/run_spec_factory.py +4 -0
  54. helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
  55. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  56. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  57. helm/benchmark/run_specs/classic_run_specs.py +15 -11
  58. helm/benchmark/run_specs/decodingtrust_run_specs.py +11 -9
  59. helm/benchmark/run_specs/experimental_run_specs.py +85 -0
  60. helm/benchmark/run_specs/finance_run_specs.py +110 -0
  61. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  62. helm/benchmark/run_specs/vlm_run_specs.py +251 -57
  63. helm/benchmark/scenarios/air_bench_scenario.py +50 -0
  64. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  65. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  66. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  67. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  68. helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
  69. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  70. helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
  71. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  72. helm/benchmark/scenarios/fin_qa_scenario.py +119 -0
  73. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  74. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  75. helm/benchmark/scenarios/scenario.py +1 -1
  76. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  77. helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
  78. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  79. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  80. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  81. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  82. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  83. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  84. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  85. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  86. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  87. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  88. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  89. helm/benchmark/scenarios/vision_language/bingo_scenario.py +5 -5
  90. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  91. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  92. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  93. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  94. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  95. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  96. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  97. helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +13 -2
  98. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -7
  99. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -5
  100. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  101. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  102. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  103. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +44 -13
  104. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  105. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  106. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  107. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  108. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  109. helm/benchmark/scenarios/vision_language/pairs_scenario.py +7 -6
  110. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  111. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  112. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  113. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +5 -5
  114. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +98 -0
  115. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  116. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  117. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  118. helm/benchmark/server.py +1 -6
  119. helm/benchmark/static/schema_air_bench.yaml +3149 -0
  120. helm/benchmark/static/schema_bhasa.yaml +709 -0
  121. helm/benchmark/static/schema_call_center.yaml +232 -0
  122. helm/benchmark/static/schema_classic.yaml +3 -59
  123. helm/benchmark/static/schema_cleva.yaml +768 -0
  124. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  125. helm/benchmark/static/schema_ewok.yaml +367 -0
  126. helm/benchmark/static/schema_finance.yaml +189 -0
  127. helm/benchmark/static/schema_image2struct.yaml +588 -0
  128. helm/benchmark/static/schema_instruction_following.yaml +3 -52
  129. helm/benchmark/static/schema_lite.yaml +3 -61
  130. helm/benchmark/static/schema_medical.yaml +255 -0
  131. helm/benchmark/static/schema_mmlu.yaml +3 -61
  132. helm/benchmark/static/schema_safety.yaml +247 -0
  133. helm/benchmark/static/schema_tables.yaml +317 -0
  134. helm/benchmark/static/schema_thai.yaml +244 -0
  135. helm/benchmark/static/schema_unitxt.yaml +3 -61
  136. helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +304 -298
  137. helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
  138. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  139. helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
  140. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  141. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  142. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  143. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  144. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  145. helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
  146. helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
  147. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  148. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  149. helm/benchmark/static_build/index.html +2 -2
  150. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  151. helm/clients/ai21_client.py +71 -1
  152. helm/clients/anthropic_client.py +50 -28
  153. helm/clients/auto_client.py +11 -0
  154. helm/clients/client.py +24 -7
  155. helm/clients/cohere_client.py +98 -3
  156. helm/clients/huggingface_client.py +79 -19
  157. helm/clients/nvidia_nim_client.py +35 -0
  158. helm/clients/openai_client.py +11 -5
  159. helm/clients/palmyra_client.py +25 -0
  160. helm/clients/perspective_api_client.py +11 -6
  161. helm/clients/reka_client.py +189 -0
  162. helm/clients/test_client.py +7 -9
  163. helm/clients/test_huggingface_client.py +19 -3
  164. helm/clients/test_together_client.py +72 -2
  165. helm/clients/together_client.py +129 -23
  166. helm/clients/vertexai_client.py +62 -18
  167. helm/clients/vision_language/huggingface_vlm_client.py +1 -0
  168. helm/clients/vision_language/open_flamingo_client.py +1 -2
  169. helm/clients/vision_language/paligemma_client.py +146 -0
  170. helm/clients/vision_language/palmyra_vision_client.py +99 -0
  171. helm/clients/yi_client.py +31 -0
  172. helm/common/critique_request.py +10 -1
  173. helm/common/images_utils.py +25 -0
  174. helm/common/mongo_key_value_store.py +2 -1
  175. helm/common/request.py +16 -0
  176. helm/config/model_deployments.yaml +740 -363
  177. helm/config/model_metadata.yaml +824 -128
  178. helm/config/tokenizer_configs.yaml +207 -10
  179. helm/proxy/critique/model_critique_client.py +32 -4
  180. helm/proxy/example_queries.py +14 -21
  181. helm/proxy/services/server_service.py +2 -3
  182. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  183. helm/tokenizers/ai21_tokenizer.py +51 -59
  184. helm/tokenizers/auto_tokenizer.py +1 -1
  185. helm/tokenizers/cohere_tokenizer.py +29 -62
  186. helm/tokenizers/huggingface_tokenizer.py +35 -13
  187. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  188. helm/tokenizers/test_cohere_tokenizer.py +39 -0
  189. helm/tokenizers/test_huggingface_tokenizer.py +5 -1
  190. helm/benchmark/static/benchmarking.css +0 -156
  191. helm/benchmark/static/benchmarking.js +0 -1705
  192. helm/benchmark/static/config.js +0 -3
  193. helm/benchmark/static/general.js +0 -122
  194. helm/benchmark/static/images/crfm-logo.png +0 -0
  195. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  196. helm/benchmark/static/images/helm-logo.png +0 -0
  197. helm/benchmark/static/images/language-model-helm.png +0 -0
  198. helm/benchmark/static/images/organizations/ai21.png +0 -0
  199. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  200. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  201. helm/benchmark/static/images/organizations/cohere.png +0 -0
  202. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  203. helm/benchmark/static/images/organizations/google.png +0 -0
  204. helm/benchmark/static/images/organizations/meta.png +0 -0
  205. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  206. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  207. helm/benchmark/static/images/organizations/openai.png +0 -0
  208. helm/benchmark/static/images/organizations/together.png +0 -0
  209. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  210. helm/benchmark/static/images/organizations/yandex.png +0 -0
  211. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  212. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  213. helm/benchmark/static/index.html +0 -68
  214. helm/benchmark/static/info-icon.png +0 -0
  215. helm/benchmark/static/json-urls.js +0 -69
  216. helm/benchmark/static/plot-captions.js +0 -27
  217. helm/benchmark/static/schema_image2structure.yaml +0 -304
  218. helm/benchmark/static/utils.js +0 -285
  219. helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
  220. helm/benchmark/static_build/assets/index-878a1094.css +0 -1
  221. helm/benchmark/window_services/ai21_window_service.py +0 -247
  222. helm/benchmark/window_services/cohere_window_service.py +0 -101
  223. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  224. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  225. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  226. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  227. helm/tokenizers/ice_tokenizer.py +0 -30
  228. helm/tokenizers/test_ice_tokenizer.py +0 -57
  229. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  230. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  231. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  232. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  233. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  234. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  235. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  236. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -5,25 +5,25 @@ helm/benchmark/annotation_executor.py,sha256=ZJCc5xT8E0E6gux8dq3HPS4YzQs2YPCNl4g
5
5
  helm/benchmark/config_registry.py,sha256=Cd25a8FHriUzAgvGGU5sBAPyhisdSIjdUJR4YbYs6T4,1603
6
6
  helm/benchmark/data_preprocessor.py,sha256=aNdM-o2t4qkLIQHiQeWUFg03DjjJ8HTBIphYCK8pXVo,2173
7
7
  helm/benchmark/executor.py,sha256=simd7SdJ7TciUpoq3D0uz_XUSCZj5KIWCIP57FYm4js,4906
8
- helm/benchmark/huggingface_registration.py,sha256=RzfOaLAnzAcoTphan1JNo836lNyxMSH67oQlolhNLS0,4154
8
+ helm/benchmark/huggingface_registration.py,sha256=unEBO21V8K3-Ya0xLqjO9H1oq7RmU-f1MYV0tCIbXzY,4578
9
9
  helm/benchmark/model_deployment_registry.py,sha256=BjL0ghHgO7_Z5jZZ7kuSOj9saegI3BivaL-b699C0rc,9527
10
- helm/benchmark/model_metadata_registry.py,sha256=fXRJOLUIrLOHUG5duncEqhnpmfb9hyloUlGbOM2L9ds,8194
10
+ helm/benchmark/model_metadata_registry.py,sha256=m39FqNaGdxP4r7W7Vmq6r-gOLjYtn_5WmRNsGzci6d8,8283
11
11
  helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
12
- helm/benchmark/run.py,sha256=tF_aWy5GtfwBOT1ZRKWrcI74VpFWGzlR00EKiGG7zyI,12572
13
- helm/benchmark/run_expander.py,sha256=jolEPDrB4lL_VJNRpT1SQta6DZ_xyq2HaIfWHdeyNtA,47785
12
+ helm/benchmark/run.py,sha256=cPJh1Rwit8E_Kjf8Te2D75cd19ag4WgS2YrHHu2Fc8Q,13997
13
+ helm/benchmark/run_expander.py,sha256=YOTYbewbHLi0N7_fM_86Nke4U0wPwdeXLv47_CCVjQw,52659
14
14
  helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
15
- helm/benchmark/run_spec_factory.py,sha256=nRP9737niPReD5G7t9fgyQ8_EUQ1hvg2VBQe5rSZ08Y,6816
15
+ helm/benchmark/run_spec_factory.py,sha256=hp29n_Stb7RMwRm2jrP_qpyzxi8X8ojdqXTFN3KRSiY,6978
16
16
  helm/benchmark/runner.py,sha256=zlHDJ2Ys5-HxtXcwpkXcrdfXy_i886fBcq1iNeLyC3Q,14669
17
17
  helm/benchmark/runner_config_registry.py,sha256=2gW5wBLkHdYb2WNbZulto06hTcto2ROvjy8HULw3jNM,515
18
- helm/benchmark/server.py,sha256=ysd5MT1TDu65NH-OzIGf9wmZlr8FHNRwoy2ybjSc5Yk,6140
18
+ helm/benchmark/server.py,sha256=kaGpUzBwzprmTDiMcy8-sfT8KfVEOb0wWytWODsAQ94,5925
19
19
  helm/benchmark/slurm_jobs.py,sha256=eNCAoaWDfT0Wk32ZJRIGo-x8kgjhDPnPB4Xrvw_eLB0,3225
20
20
  helm/benchmark/slurm_runner.py,sha256=Tozimrjr2R6mlKHcmrGgxTy9ga-ArIW6AoAWtxqzw-M,16567
21
21
  helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5drG-4Y4UhIM,2219
22
22
  helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
23
23
  helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
24
24
  helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- helm/benchmark/adaptation/adapter_spec.py,sha256=tZ40ovgNkRsxDOHan4lcD8ukutA1QPsoZUF5XOHq-VA,4382
26
- helm/benchmark/adaptation/common_adapter_specs.py,sha256=-ILsVxWjpEE6an1ncrRRrLkdP5ky_-2GN1TxSxJo38M,10449
25
+ helm/benchmark/adaptation/adapter_spec.py,sha256=K5BwqTe2iimjswdw_SONlJo0xt-T-o5KH7VqxrPaov0,5072
26
+ helm/benchmark/adaptation/common_adapter_specs.py,sha256=Er8aMbDi8RTBtGWjcI08E2mRDl5AoBzUaBT1EY38Nlw,10515
27
27
  helm/benchmark/adaptation/prompt.py,sha256=n0Ka3RGSWMr3CBnJrPNPy626x9TJE3k677wKbG8hO9A,2133
28
28
  helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
29
29
  helm/benchmark/adaptation/scenario_state.py,sha256=mWEhgzk18SVoMEuj2pSnc_r9JrGAHLdOlteHJKUMA5k,1961
@@ -35,12 +35,12 @@ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=F7Aou6r9CZ1xEuAX
35
35
  helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=BbcBEJjY8Cp58me9sUktd2p3dEVFL8ZJ7RFfus3hSYE,14997
36
36
  helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=LhZHmciP8lAfu7T0p634GOPTHrJR7qRCRRIxPgVlW9E,14873
37
37
  helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=VJ66MfIGQWJg0VXCV0MJEMwF9Jx1DeJ7RxsgYlOTx_4,1889
38
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=k8wSxv9pK8wtbQNBzWYPkGEUKJb8tcVi41Y1M3fPQT4,3985
38
+ helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=S38Y_MjLRE86LS9RfB4qHmNy5x5n2KyYa4DtA63lees,4402
39
39
  helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=aMe-y4iiyEnM7_bqIoddeZBsVLoDxMmjKY2eZKB6Y2Q,2156
40
40
  helm/benchmark/adaptation/adapters/test_adapter.py,sha256=0-JrYnogZu4kENQG1eQMXHWnuSurCLRbkLpDuSnfRqs,745
41
- helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=TM6WJpWShsu6KuDzlofYHd9DNPj86Hjudubp_sqrhFI,12734
42
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=f_bggObKUxiV5XyYHHNXsM42HzM0CDzvR4uiIoXTE5o,7997
43
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=v6LLmVTopXNfzo9Qzq16EmmPPivFGGs9LuaPDJAX4vY,9506
41
+ helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=NyhVTvLznCVMB-DJeX2DRjWx91XmW3FBcrkm0RN-fJU,12766
42
+ helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=BCEhKRVEDKPHsLKhpnIv0krV37a8Eu78r8EtJxH_MXA,7980
43
+ helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=BoozcN0zPWwk6HKEPN0b61ieqwk5y8bwKvr9m8DR_2k,11874
44
44
  helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=o7CGClyVWYOuJ4G56-whq5fTvCr7QIn51Mo6DTdvwg0,1881
46
46
  helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=bvY8xT2ak_3WG4m2Z5bCM6FLImPIWG1qAn9H2ZNwNv0,6359
@@ -49,15 +49,25 @@ helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_a
49
49
  helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py,sha256=VjSqWiZEcW6K2jrokGUmky7syEOqJ6cbHImR7YZgwzU,10151
50
50
  helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py,sha256=KKOOlna6SHLJHSPgfgguPQysc2Nf4kKrqumqwlG27bs,3542
51
51
  helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
+ helm/benchmark/annotation/air_bench_annotator.py,sha256=9W3zLO2f4OzxGdavkDI2dDUStxpExa7sgrI-ATGG7NY,3048
52
53
  helm/benchmark/annotation/annotator.py,sha256=2UIXY71S5dRaZBLb1v4lcv8-O6pyJ9zTeSJl78AEWGI,1538
53
- helm/benchmark/annotation/annotator_factory.py,sha256=z5AGBylIuy-_IfgikX66VyGvRz4SxtnOcJsyESH8990,2699
54
+ helm/benchmark/annotation/annotator_factory.py,sha256=3Soh0V3lbsIR_HGHLg-XTc3eKVRj7SL9lLT_AoqUVTs,2997
55
+ helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=kpnIrydou3THgEFealGZyGneVKxgK5wwQ4kiMbDzJH4,2974
56
+ helm/benchmark/annotation/call_center_annotator.py,sha256=3vHsgJD24PaR4rRTfLD3wvwvbslkQdDHLokggFxijhI,11233
57
+ helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
58
+ helm/benchmark/annotation/harm_bench_annotator.py,sha256=z8EX1F7chOf-sZ93aognaTMmOqQDgWEa4KO0LLSABjM,2853
59
+ helm/benchmark/annotation/live_qa_annotator.py,sha256=I8wfDt8-iLC_C77r7fBjn9jdoXatVc_pJ_2YEWv392M,3474
60
+ helm/benchmark/annotation/medication_qa_annotator.py,sha256=TWjB3BIbBR_jVvrp2kF0PJW2p1U4MoosrSJ-b4QTgXE,3223
61
+ helm/benchmark/annotation/model_as_judge.py,sha256=CffsM05JPZbtLY9xFi1qOuy1JY4Yp-qF_OWrd_YC0yE,1737
62
+ helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=ztqagaM2M0OPKSMCo112_regyr2rDE44zpb0_HESRZs,2699
54
63
  helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
55
64
  helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
56
- helm/benchmark/annotation/image2structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py,sha256=eJFm3iyBe_eEN5Yt0G2IpeA1xdKxRmyR4krsNd6eXoE,3524
58
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py,sha256=yRifoqhGq_mQkkRcgKCFpGrZaI9gochOXYiCU8oY1KE,2477
59
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py,sha256=we6K1BynV907ZMnGI2zb_tru1uw2iGEI06Wtbnus23w,4010
60
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py,sha256=rvzdQCaVFM6ovF28TSUnNmB47f2hidlaZm6vO4DJpso,6404
65
+ helm/benchmark/annotation/xstest_annotator.py,sha256=pW3Dgu77ZoS5hVoapn-FsK3KQOHGHiRLyaKpSqnMRLg,4149
66
+ helm/benchmark/annotation/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
+ helm/benchmark/annotation/image2struct/image_compiler_annotator.py,sha256=eJFm3iyBe_eEN5Yt0G2IpeA1xdKxRmyR4krsNd6eXoE,3524
68
+ helm/benchmark/annotation/image2struct/latex_compiler_annotator.py,sha256=drbxogMMGwGxgVFbhT7hxPGDh7uyhptlmEmeP1Gq2xM,2471
69
+ helm/benchmark/annotation/image2struct/lilypond_compiler_annotator.py,sha256=odIGciLX2oVq_O8_H15lWUZoSfVvY-jRb0ILjs7GCIg,4061
70
+ helm/benchmark/annotation/image2struct/webpage_compiler_annotator.py,sha256=w6RKv7Fz__j_abKXnsTn98kHPv9tWKipdLW3NVT55m8,6389
61
71
  helm/benchmark/augmentations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
72
  helm/benchmark/augmentations/cleva_perturbation.py,sha256=arUkY_luc274YEMZocOos9rpAZVbEFZphbMlobAxTy0,29208
63
73
  helm/benchmark/augmentations/contraction_expansion_perturbation.py,sha256=yni1UR2fviN0Wig8MpOp0zzLn4H-gYocTjKTpxBwywg,4850
@@ -78,7 +88,7 @@ helm/benchmark/augmentations/space_perturbation.py,sha256=g4rbyoureBaOVf_lrRXIWY
78
88
  helm/benchmark/augmentations/suffix_perturbation.py,sha256=P3AfJj_ajTVdjO7AJRQ9dKS-cT1PyRSt8Un57iZQDVc,785
79
89
  helm/benchmark/augmentations/synonym_perturbation.py,sha256=komOV5M342_8unopnwN6gkPWpJIZXidywiu6PO9_riU,4151
80
90
  helm/benchmark/augmentations/test_perturbation.py,sha256=4EooKVcyub70I81trzpNx3Ij-m1vpFa5cFIo6O52icE,13185
81
- helm/benchmark/augmentations/translate_perturbation.py,sha256=dn8wO5UOgYbGtP9e77SmwaK2ginrQsTw-79nrzRzfeo,1054
91
+ helm/benchmark/augmentations/translate_perturbation.py,sha256=vMXCYXGVSo8E78IAzH9HI4p2pvyLzcvO77BnvR2QB0k,1097
82
92
  helm/benchmark/augmentations/typos_perturbation.py,sha256=_F9zwvrLie8hX7mzUtQmYq6oq6yqaFiKGsvc9LAuBr4,2798
83
93
  helm/benchmark/data_overlap/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
94
  helm/benchmark/data_overlap/data_overlap_spec.py,sha256=aj_l1l0qxUbUMrSWr70-Sb1j_JN-7WYop5BXPG_xj44,1998
@@ -88,8 +98,12 @@ helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8
88
98
  helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
89
99
  helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
90
100
  helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
+ helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
102
+ helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
91
103
  helm/benchmark/metrics/basic_metrics.py,sha256=7hk5PZL7d09uG1y7wHBhY_ox8hlXw-n7Yt_FDv_AIKw,20375
92
104
  helm/benchmark/metrics/bbq_metrics.py,sha256=Dqccr7GdfKNs1S_1QSB75d8AY7moovEPAqvacGfrCAE,6157
105
+ helm/benchmark/metrics/bhasa_metrics.py,sha256=Nw5fdZrYedYUEVJXFFnGSdOBxJ4-99GELd699TBmcSg,6958
106
+ helm/benchmark/metrics/bhasa_metrics_specs.py,sha256=fwXd1fRoeizd4kVQfLZ9ny-PzHTe1ieFKsGesiPDef0,440
93
107
  helm/benchmark/metrics/bias_metrics.py,sha256=GQ4CwOk1Sa9g-LcJCxcoQLD1vWY2Hvujck9l-9qsmf4,11418
94
108
  helm/benchmark/metrics/bias_word_lists.py,sha256=mx5JjW3mHffXIqo4GcQN-zENUEttBqQnEjPTz3J3J_4,13909
95
109
  helm/benchmark/metrics/classification_metrics.py,sha256=uB23jRFzkmtJgs1sTO5pPjdV_mOg35gWubjGS8pynLM,5654
@@ -97,7 +111,7 @@ helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIef
97
111
  helm/benchmark/metrics/cleva_harms_metrics.py,sha256=c_x9MYg8WjM1yym1S374GKxH_lwP6wZOiXrknf0mJis,11077
98
112
  helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
99
113
  helm/benchmark/metrics/code_metrics.py,sha256=e0aqLcxBAdCc0qAqebzK40Ilv2Py6xZbosud5v169x8,5121
100
- helm/benchmark/metrics/code_metrics_helper.py,sha256=h_y3BsYCbeh8cDe2LDndA8K6nkelV0J76qxlq2cBmsc,22334
114
+ helm/benchmark/metrics/code_metrics_helper.py,sha256=UNai154RuhYRZM_YK-rveLct4Ui5iEBNPYmYdKq34Xs,22712
101
115
  helm/benchmark/metrics/common_metric_specs.py,sha256=k_IW0A6BevAskS0_C6ZaP9XvIfrdLI974_NhC89rMoo,5846
102
116
  helm/benchmark/metrics/copyright_metrics.py,sha256=X9j3YsfzWEoGpgPpIvCzm18-JggLAW5QFooifE1KqaM,7729
103
117
  helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=TcyklpfcTMXrpJeaHQfxS9QQxe-gwmT-HD0g_DmIFLQ,3253
@@ -109,16 +123,24 @@ helm/benchmark/metrics/dry_run_metrics.py,sha256=d8RgltW4nGTH1tZeGOIlQRwRaJLIxL6
109
123
  helm/benchmark/metrics/efficiency_metrics.py,sha256=v8Eg56HHIWEMQruODKBvwdUfR6ZLGgrNifo-senCaUo,11786
110
124
  helm/benchmark/metrics/evaluate_instances_metric.py,sha256=EBUf0ONnNoi7pcxYab7RD0B_JqGksqDX8TOaosSmJk8,2847
111
125
  helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=vUJavaLVfbWtrwyrIA81npK_1iirhko7_zMF1kL7Gfw,15559
112
- helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=Pj1itUJi_KDy0D-FOPcOyHqm4ypHMfhbAVeDJzGlyeo,9773
126
+ helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
127
+ helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
128
+ helm/benchmark/metrics/gpt4v_originality_critique_metrics.py,sha256=1m7IWy9vu66svnmdBRjZQI-2YsGYzH2vXZMptlRGM0Y,5654
129
+ helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=QJxGzyERQv_vMn3PM9fy3IxfBgSg0BjcOf_mv574lGA,9786
113
130
  helm/benchmark/metrics/language_modeling_metrics.py,sha256=ofqwj1PMJQu16QhLDULXBmZ5iFz91ducwLRpNsRYELE,4510
131
+ helm/benchmark/metrics/live_qa_metrics.py,sha256=f2XFmQaohjQNqYqNg8NcDVavCzyP4cd8Cl8rLArn9EM,816
114
132
  helm/benchmark/metrics/machine_translation_metrics.py,sha256=bp_EDXyxntIty5gORDa7va-C73quOzoTc5o8MpxFmL4,3816
133
+ helm/benchmark/metrics/medication_qa_metrics.py,sha256=Z939iAc0A5xn_GdnCtfiefhUZK9qk6jZjtde2-F7IH8,840
115
134
  helm/benchmark/metrics/metric.py,sha256=dPq7ZMB0w-LgJKMzWYDJtfn-oYD4oG4jJX0yiUEziJM,14245
116
135
  helm/benchmark/metrics/metric_name.py,sha256=POhgmUqqIWh_LjCbYpiKkzGqqChBLeW3FADy9u_FcWw,1354
117
136
  helm/benchmark/metrics/metric_service.py,sha256=mlX_MEFSYNzME6GFS3El_VVOvzPYnOMosKI0XIxygP4,1802
118
137
  helm/benchmark/metrics/numeracy_metrics.py,sha256=panMWD3a1NPerg3Ix7l6NhR7jGOIQOQV9i_KysBeDA8,2818
119
138
  helm/benchmark/metrics/paraphrase_generation_metrics.py,sha256=-VkAknRhAEBmC_lpz_1aeXU8OppL8KfEPtIYCJkHTmw,1981
139
+ helm/benchmark/metrics/prometheus_vision_critique_metrics.py,sha256=pexBbEFF3-bzWoPWNFuVs-3fm7XJw2EC4xgiSb3gSa4,8508
120
140
  helm/benchmark/metrics/ranking_metrics.py,sha256=5hDRapsxx_cmo-ag_80kOQnrgZn3lfVsLZVtWxuxH-s,17391
121
141
  helm/benchmark/metrics/reference_metric.py,sha256=RlIM_PFTEkBo0_EEMq8d4_BSagNSBR_XyovMtjDeqqU,6026
142
+ helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
143
+ helm/benchmark/metrics/safety_metrics.py,sha256=SsVRJXduF4S6C3sOozkOS-0gwy-Ff0Pz9C69jnh3Y-A,2355
122
144
  helm/benchmark/metrics/statistic.py,sha256=FuxNxMtAfiCkOxBS9KHlhEyxe61e0YXt2emvsufgPZQ,3424
123
145
  helm/benchmark/metrics/summarization_critique_metrics.py,sha256=Lf7PDuce62HDzyofsyxaOvH0QvzcaS-vJvDWtIs8xKk,4694
124
146
  helm/benchmark/metrics/summarization_metrics.py,sha256=laLMGRDy1wjcFvgSWXvzOZwBXshkmPr0S2Ofu79Z01Q,16461
@@ -131,7 +153,7 @@ helm/benchmark/metrics/test_numeracy_metrics.py,sha256=ls1ZIHDePKpHMoqAbf4HmJ1SI
131
153
  helm/benchmark/metrics/test_statistic.py,sha256=AejuYLSeUwEOqpEMRKZFjnxu4HKUraeExU8TPmZEqW4,1229
132
154
  helm/benchmark/metrics/toxicity_metrics.py,sha256=6MCpHuCXbXZqWwvO57ifKYHnHWBzszN9cZjwgPQQF2Y,4027
133
155
  helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
134
- helm/benchmark/metrics/unitxt_metrics.py,sha256=5rw_fBQGWpFLr1nR4HcRlAwYvDZfJ6_MzGozzNo5NOA,3605
156
+ helm/benchmark/metrics/unitxt_metrics.py,sha256=2F9T4iQV0_BbDMCWrZrd9sc30XHYv8MR4xSBd_dD3eI,4053
135
157
  helm/benchmark/metrics/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
136
158
  helm/benchmark/metrics/image_generation/aesthetics_metrics.py,sha256=AXQjWBd9zBZOoCF8vQV9FjUy33teC0IF7pdbq-XiHjM,2101
137
159
  helm/benchmark/metrics/image_generation/aesthetics_scorer.py,sha256=ISdThDKMrx-SHQe69dCcr8qUrMCa_GsxX3BeZnd0WPA,2538
@@ -167,7 +189,7 @@ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBS
167
189
  helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=__f7NVsVQatDFn_2Bfx7ObiQ68kAMvyyClApaTxqx80,649
168
190
  helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
169
191
  helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
170
- helm/benchmark/metrics/summac/model_summac.py,sha256=zEuTI75eBBZPzJp0j2kFd2HejouhugC83nSWoVpghBQ,17412
192
+ helm/benchmark/metrics/summac/model_summac.py,sha256=PJ2lPa-JQPnM86N0T2rPcAviTNHmSV721PTnbL1eGnk,17460
171
193
  helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
172
194
  helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
173
195
  helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=WeNP4yiM4TVrD9Kid-uVRmWIVDqETnBsMycZmIBiTZ0,665
@@ -176,43 +198,56 @@ helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py,sha256=5igmDhWu7H8-
176
198
  helm/benchmark/metrics/tokens/free_token_cost_estimator.py,sha256=G_6UK6Js_NZ_eqY0ZQnrC9QJVMERGhV1f6v7xq2lM-Y,461
177
199
  helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py,sha256=9zjtuxMbvfPBYuxOYMFEmNP8ZKFDVywrZ08n6nrjbA4,1520
178
200
  helm/benchmark/metrics/tokens/openai_token_cost_estimator.py,sha256=7jgjcgmbcVfLA_nTOrWXKIF8TEXng_KnE6cSgsSXWmE,1398
179
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=l9UQZ0aAIhCYuFbIjU3j3A3XXoMvwUvz1kvRtlDbtOo,1079
180
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=_wJ3E3LbJB9XPLixTH82BYQbp32o3oij6Sz3lsZL30E,2648
201
+ helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=eVnCYhRq2LT7F4BXsiIDb1bkmhvoHLgDAdMR73Xz5p8,1071
202
+ helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=h5ggZCGpgCQUjfqS0JS4Bxmx7NBaT4w43pXAgbCEnw4,2628
181
203
  helm/benchmark/metrics/tokens/token_cost_estimator.py,sha256=fTGUfhHV6yMwpTkCEMTGMxKO8jskqJz4sAtwXT6M_C8,425
182
204
  helm/benchmark/metrics/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
183
- helm/benchmark/metrics/vision_language/emd_utils.py,sha256=3yN-DY5rxMabmtLV003lj59SRnp_T83sLAi96rycKEo,15043
184
- helm/benchmark/metrics/vision_language/image_metrics.py,sha256=aJ3zrVOLJJzdVKqXPcFsCXp9LSHET8VGEgtvwK-nkJc,25190
185
- helm/benchmark/metrics/vision_language/image_utils.py,sha256=XeYF3E6MnYyPJ5hYp4TtiTP27-y4S8LTBH5bZVcvJFg,3758
205
+ helm/benchmark/metrics/vision_language/emd_utils.py,sha256=KdZdcqu3eo016FdAjAm_83v92-wWuR90EPsTogfTcok,15196
206
+ helm/benchmark/metrics/vision_language/image_metrics.py,sha256=3fh7vR4J2arFXIT6hLBNdR18PKxQBLPBbVrHWv0hBeA,23551
207
+ helm/benchmark/metrics/vision_language/image_utils.py,sha256=4E0NYh09O6-5sGhAPo6KZqYaZfBpCtuYbD3vLt-wQzk,3755
186
208
  helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
187
209
  helm/benchmark/presentation/contamination.py,sha256=PiIdcaD3-xfExjOmyL5q4Ao2ASa-OlScJAB9u1Zxe7o,2811
188
210
  helm/benchmark/presentation/create_plots.py,sha256=2-ZOuEdRwqqF1biRmzWggMZjmODoxOQOBoz9GT7tVww,28737
189
211
  helm/benchmark/presentation/run_display.py,sha256=tC1DciLvDTQJog4BDo8StWDdX7DbBkhrG2sX_SwXSPQ,11838
190
212
  helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
191
- helm/benchmark/presentation/schema.py,sha256=pOwHCLvAC1Nh6vh48HV83gb7T7WREkifvo4qdovFdv4,8511
213
+ helm/benchmark/presentation/schema.py,sha256=fPw-794HbacZR5z1SmYGUqYgqXbZ8-BrcexWV4h6vgc,10809
192
214
  helm/benchmark/presentation/summarize.py,sha256=2fJ9BYOJRxe9eBylLUK3qcZZwAwRtJF_C8plEQlAPEU,67266
193
215
  helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
194
216
  helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
195
217
  helm/benchmark/presentation/test_create_plots.py,sha256=5PPPegMTdBZurxyyUxI4rN13AVsjV3eQrwFqlobJ8UA,1286
196
- helm/benchmark/presentation/test_run_entry.py,sha256=OM-027j2A0Lx-ai2zBprOxSqzZhS_dh0OKw3ThocZW0,751
218
+ helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
219
+ helm/benchmark/presentation/test_schema.py,sha256=6mq6CeAOLW2Kxi1lX_ZW8QCVqVR73XImR8ylcRGFkBE,378
197
220
  helm/benchmark/presentation/test_summarize.py,sha256=UfSp33Q9xvuGnPYfFmLJdH5y7KWp9qbZprRMyx8LGP0,1618
198
221
  helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
- helm/benchmark/run_specs/classic_run_specs.py,sha256=YKrjuuPXoVjUT6XGEtyouPHkkE0XfR6u2xHQDRqpNyA,57972
222
+ helm/benchmark/run_specs/air_bench_run_specs.py,sha256=VdXis1HN8_KLrMHDCVi0J7WdqjRjAGbZMhrsnpzC-Kg,1604
223
+ helm/benchmark/run_specs/bhasa_run_specs.py,sha256=2m5dXJKP0ojdACgvSREiV25SB9T6IL9JeYHYjhL7xX4,23480
224
+ helm/benchmark/run_specs/call_center_run_specs.py,sha256=GX5P2tTj4YS037EEZ8so_mX9LlPWyfJ-pF8ICoErpio,5324
225
+ helm/benchmark/run_specs/classic_run_specs.py,sha256=Cn0z-6QY-ehbLaHJMvCwjw11DFBQgUyqVCaXwTVFyJ8,58331
200
226
  helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
201
- helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=D5g_--eFOI6-hy6fv9JNj_X4DHU03prKA5GZjlqaoRk,14254
227
+ helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
228
+ helm/benchmark/run_specs/experimental_run_specs.py,sha256=wduA6K3mpIRHmr8g3h0c5k7rUsKiPFOqJktdbbGxtoE,2950
229
+ helm/benchmark/run_specs/finance_run_specs.py,sha256=hCaB3uBSlTZbFztdsDqdxuAdYQM20S9m9rXYQITgL5M,4161
202
230
  helm/benchmark/run_specs/heim_run_specs.py,sha256=Pt1eVbzvwZ5EXq8WB2b3XYw62SWYN_i1P_H3oE4i8KY,22096
203
231
  helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
204
232
  helm/benchmark/run_specs/lite_run_specs.py,sha256=ViCPJ86Aah8301GTEk6z4_MtP0g8iik33t4GudobhWQ,11113
233
+ helm/benchmark/run_specs/safety_run_specs.py,sha256=ZTvLbRBxHWMIKPapugNfXPStJRBHfiaiXUHgpWMBONY,5469
205
234
  helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
206
235
  helm/benchmark/run_specs/unitxt_run_specs.py,sha256=ejp_knrcIjf0J4WiKj9LTgDTcUr29-XFZYHYz0w_dkM,1518
207
- helm/benchmark/run_specs/vlm_run_specs.py,sha256=CmdyEF-pdFIlMhBV7UraQ0FuQgQl2rqVSdTz22uYuPQ,26808
236
+ helm/benchmark/run_specs/vlm_run_specs.py,sha256=A-e3npwbqvUEHvC9iGta9N1zFCHfoP8C1_vWBVLf8ns,34134
208
237
  helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
238
+ helm/benchmark/scenarios/air_bench_scenario.py,sha256=WUZvsUTqlsjNzQsd2baZZIgO30B4Zf3g0QjsyEaGmLc,1772
209
239
  helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=Wyt7J5BAvAqC5JTqCW4fh7ex9-itX11P_9rLTocqvtk,4973
240
+ helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=Ic0ak_5vGHeNT5PFgOptl-Ns8nQuM5nKpiQlhB1H3X0,3158
210
241
  helm/benchmark/scenarios/babi_qa_scenario.py,sha256=S1tPQY2x1I3hQL1JQ6wvUwvKyiSe7SqpRSW6N3_T0mo,5043
242
+ helm/benchmark/scenarios/banking77_scenario.py,sha256=pVA2LXB9uJ12GnjiEvjhRV-P8YNEjpFhyZr-J8MV2SA,1747
211
243
  helm/benchmark/scenarios/bbq_scenario.py,sha256=lT1XKSM-PXYtENI-ryScC4yb1TtII7YoH8kt_S1dZQo,9579
244
+ helm/benchmark/scenarios/bhasa_scenario.py,sha256=N7SYVwUOLAD_WZtkIYoCnPuRb_nFbIege-5_j4yX6nQ,70915
212
245
  helm/benchmark/scenarios/big_bench_scenario.py,sha256=bSk8Ia4u_6OqMjiyadpYQAWN-8GFWqvd3Ft3JiVGpi8,8081
213
246
  helm/benchmark/scenarios/blimp_scenario.py,sha256=o1MDcHT14KFDET4K9otx8pDiIgXrhsD19pvO0mR2ADU,6260
214
247
  helm/benchmark/scenarios/bold_scenario.py,sha256=NEfECMVzlVP_yo6sOuIzj6vZ5jd72_nvtEQ1lWrq85Q,4106
215
248
  helm/benchmark/scenarios/boolq_scenario.py,sha256=rvSp5SwXMCVzBo5BFxfhj1Xv06_ksqKrtTQR7nPiS-o,8013
249
+ helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
250
+ helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
216
251
  helm/benchmark/scenarios/civil_comments_scenario.py,sha256=VO5G-cQ9qctmBN0O76uSewnO_mFslMo5mbR2ZTrjuds,4851
217
252
  helm/benchmark/scenarios/cleva_scenario.py,sha256=xhwZ616iz0CN3fYIfrXHcV1XlcRQjyPSzML8fq8D3l4,57939
218
253
  helm/benchmark/scenarios/code_scenario.py,sha256=s4AGW8eBY0gFnu6EXvVWL0xbFYO28N9sgP1V8eBO7EI,12171
@@ -228,17 +263,21 @@ helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=rAOZnFSxO3ENO
228
263
  helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=qhzqW614WnsiyN7TiHUdZY_NpEdW_iMO0AMrLK8DmK0,14116
229
264
  helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=RSigvRdqjeFTwFfXNmslz8zyAGSmLf6UtBDA4NrQBCo,8304
230
265
  helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=goGmHtN7MYnAQIXhffZZhuuuMWN0gHNOXyI9_injiZM,20119
231
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=mbUABlGhpDur6x7z_q5iDqJRMBZ2d4ZI3KdVWNnJagM,2859
266
+ helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=Qkwhg1s5f2_5rnCoX4BxjQGKKGVRp2StIwONvBjJVqo,2909
232
267
  helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=AI8HX16_Lw9MKqrck62q8IFLUU-P5hxaOEHcmTS4rdA,2928
233
268
  helm/benchmark/scenarios/dialogue_scenarios.py,sha256=-I7FY6q1b11zpFd1_oAgar5qlfaFcXsNCKGVln9etPI,5629
234
269
  helm/benchmark/scenarios/disinformation_scenario.py,sha256=kQi0MVVoSDhx2vOTnUaCIttPXMf8zz7Eld2FD_77tnA,8504
235
270
  helm/benchmark/scenarios/dyck_language_scenario.py,sha256=vMxND9wPJenrGlCLhSw5UxOw3TV2Jq8cTmIXGpzEWaA,9318
236
- helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=n2mnkmSeTznEy7S-GVumqpD9bt27yctbuEmtgQrG-Y0,6399
271
+ helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4cv7u2lmUFcigkAX_eMwIn49Pa3p-aHClkT-r-0roLU,6616
237
272
  helm/benchmark/scenarios/entity_matching_scenario.py,sha256=YjBX61TlL3CDQ3X6D-JyR-qlOYGLdoRXJxl9AEeqxYs,7022
238
273
  helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
274
+ helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
275
+ helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
276
+ helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
239
277
  helm/benchmark/scenarios/grammar.py,sha256=Pb9vEP_0Ki87UdQCj1ym7QWJ24M4DRP6TXB5d3GnhLs,5597
240
278
  helm/benchmark/scenarios/grammar_scenario.py,sha256=bl-Cm9caDs077zSu38mzaS9maZ2gM-QazgjOEMFvxYg,1454
241
279
  helm/benchmark/scenarios/gsm_scenario.py,sha256=9fV2SEw3ocKNAD-TrDZZTpq4l7mbttQQWbO0YNz4e6k,2613
280
+ helm/benchmark/scenarios/harm_bench_scenario.py,sha256=wzzia3HlfwALgRLFLABv3blxBh1ras-YtHk4iQ_EX30,2454
242
281
  helm/benchmark/scenarios/ice_scenario.py,sha256=vvk11cFPGUhg_CcGh3wEfVsGzrvMFgkByN-xcF-OOjI,16473
243
282
  helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
244
283
  helm/benchmark/scenarios/imdb_scenario.py,sha256=X1k76AweFECCpYCXy8HuvjRbXbfmDfwK3SES_t_wkUs,6174
@@ -272,18 +311,28 @@ helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=zVL1gb3eVz-LbK2hfdnRR9ItaM
272
311
  helm/benchmark/scenarios/quac_scenario.py,sha256=SRAhMp6TAsmTRq6VRONLl3SEayFIe23He_mBhzkZ7qM,6628
273
312
  helm/benchmark/scenarios/raft_scenario.py,sha256=_5QhHS3opxxML7Rek6F-q5NVOf0M2UgbC6OTnQZ4C1U,4452
274
313
  helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=GkgJo_13MWQQQTZbhlknvTR6ZrYr7NEn1WdMZrPs4y4,2400
275
- helm/benchmark/scenarios/scenario.py,sha256=lkJgqDAbnFQoJgAyAllqvlyIIH1nSQ3dnoVYh68tQrU,8232
314
+ helm/benchmark/scenarios/scenario.py,sha256=1HC8EjiZ-5k5AJhxtwRreLe3hBbTyZJWrs-Aa3Uq43Q,8229
276
315
  helm/benchmark/scenarios/self_instruct_scenario.py,sha256=jZ2MksT4N_4g_sp5egw7ycrsM-Ya786_RFmiYYdMvG8,2285
316
+ helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=grYOqccYBtB4m-_UUV20EOXsY6tkukwC6kwPOBAmdnY,1223
277
317
  helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
278
318
  helm/benchmark/scenarios/summarization_scenario.py,sha256=MlNMgsY369DC04nhMUdG2o9Ydi6yze1fGOjC0bK-UwQ,6847
279
319
  helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=pzifpsJJbucmTjujNqQnwQa4Y7wpQjkS6QjNXOrgTAQ,3096
280
320
  helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=1b3e3WpFMNBV3li17-0Ug6QCSKO4qRFaWDF23bYNsvQ,16326
281
321
  helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=k8IGK6VABOr6wuha4HynP47peoAkmIViAVhScOtCANo,8345
322
+ helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
323
+ helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
324
+ helm/benchmark/scenarios/test_ewok_scenario.py,sha256=9piplj3i53_-xNSMkIN47JYEU3JB65WgEPT7qdyK4Ng,953
325
+ helm/benchmark/scenarios/test_financebench_scenario.py,sha256=EFZLJXXBoyjlTiMQFaQ6MiYkve1lfQDjQWjn4BjqgAQ,1184
282
326
  helm/benchmark/scenarios/test_grammar.py,sha256=sPlA36sHpThbXgnGlXyOuqHfDPe2epIafmzIeL0nkoU,1364
283
- helm/benchmark/scenarios/test_math_scenario.py,sha256=s3-CllgCB8DL9-L4DmJ6Zcf9xi803nWYN84KlhN7PhM,1016
327
+ helm/benchmark/scenarios/test_gsm_scenario.py,sha256=I-Sl8Sg8kmFd7u0zZbwbNmeFV1mQLuOHoQ1cQDDwovs,1123
328
+ helm/benchmark/scenarios/test_legalbench_scenario.py,sha256=FqbgwBAhHWyTIUYSzI5FOnTDx0A3u1o2ANKa_6bfA4g,1212
329
+ helm/benchmark/scenarios/test_math_scenario.py,sha256=ieI8-c6yx-3U3iaEz2yiCGSwnQTBJE_06-dMKX7a8Vk,723
330
+ helm/benchmark/scenarios/test_med_qa_scenario.py,sha256=Ekp6r5eYPkCxV3FCzVvLemKxlhENhelqdO0Mdhg5yFo,1515
331
+ helm/benchmark/scenarios/test_mmlu_scenario.py,sha256=mxEsTydKUOt8OD1Ei82nPgUFV1Tlvu5Z6drEMToEURM,1593
332
+ helm/benchmark/scenarios/test_narrativeqa_scenario.py,sha256=Rac_OrUpd2ruT95YvSrmoVz2Jpycgq3Roiyogm_0aAc,6420
284
333
  helm/benchmark/scenarios/test_scenario.py,sha256=HexTZBKphMDJbhIYj-HRCDwltPTDqHFHdT7FjPmu8Xs,2070
285
334
  helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsHzMZSzOe8j0FrUQmMyZM4,1736
286
- helm/benchmark/scenarios/thai_exam_scenario.py,sha256=FinZuwEz5dDcNBxG4OseVOnBWlgg9lT1LzMdIWSjG94,5838
335
+ helm/benchmark/scenarios/thai_exam_scenario.py,sha256=5Q-KL6fVrk2FKApVyY2ulreFduwBaUG0iJOsJ8M6El8,6008
287
336
  helm/benchmark/scenarios/the_pile_scenario.py,sha256=RqU8yXQJ4FkmEc6rO9J3QMXenyUZrsEZlLAQUx4-Wnk,4995
288
337
  helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=iqL-tuqUQZjF9-DKAnI6wV-oLGC_I3aFuofdtJYHt8E,6035
289
338
  helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=CRlPxVfkg3HPZV-lUMyCUSFOiAqg5IIPt-dq3qR9LU4,2096
@@ -293,6 +342,7 @@ helm/benchmark/scenarios/vicuna_scenario.py,sha256=zLwLuEr6n9VQjVxQwgFIM-os23kJe
293
342
  helm/benchmark/scenarios/wikifact_scenario.py,sha256=cOVKgDisBdjPcmVMCLhTekdgX3hpDJUT-aKbvRSaMoM,5791
294
343
  helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=PAPkmZdC4aIBQ1k29dDvTFBEFaPV1ZR1Ifif4FHoZqs,3087
295
344
  helm/benchmark/scenarios/wmt_14_scenario.py,sha256=NArkTZntYdYlegHo_-fkzeyCUOjosOONQKlquPbZRxY,4498
345
+ helm/benchmark/scenarios/xstest_scenario.py,sha256=wpagohfuFE1juuXjq0dleSIHr5Uk6hnClIv-wABbzEI,1285
296
346
  helm/benchmark/scenarios/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
297
347
  helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py,sha256=c8zcoGCOFqBGE4TAEx1uLsUmGXw_jIS8alI99ubGeDA,5477
298
348
  helm/benchmark/scenarios/image_generation/cub200_scenario.py,sha256=7p3G4mJRc8QHR4Mw2GLsfAFuJcEe6OeZbezVhbyc55E,4103
@@ -313,126 +363,116 @@ helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,s
313
363
  helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
314
364
  helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
315
365
  helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
316
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=zXR0LmXsD2tv_ovJsbY_HP53kdiFOvty7Y_Ai3ZCrT4,3037
317
- helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=LiH14xUoEKXn5ZStDbGE4bz9iMEn3-5I39eJ6kvN2UY,4045
318
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=82qplX4gJ4GsSVhBjwrsVU46TAHh-jym3F_M5A-odRE,4608
319
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=3pBAQgOsnSyMCzt60s1m8Kf_fEJ4C7XgCDbtXatTlX0,2599
320
- helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=sBQfqAxmP-Z0ifCgwTbP11aPsKA4vogcWBqSDiKlbE4,3512
321
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=7KjsXiAaiVHDRSyW08jZXNUTWogP3Sr2Og5ViT6Xz8I,3832
366
+ helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
367
+ helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
368
+ helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=lfRHjhhXCo0YeDQe4_gfSHCzVKtqQVZ6DALLABcCmtI,4637
369
+ helm/benchmark/scenarios/vision_language/exams_v_scenario.py,sha256=pLD--gtL5q7jLSWQ8iwAdsiOrTJ_rBsLbwWMWKRhPbs,3853
370
+ helm/benchmark/scenarios/vision_language/fair_face_scenario.py,sha256=V6_1Kl2nWDRyHvwnKcSxkP0DChzKDBW0i_-t9oAxps0,4721
371
+ helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=CDutFh1PHLyeMdJ9HojzYKE1zJidL9ktcsfn9uHNLZY,2612
372
+ helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=k4E6JAN8a_KT1jjV2Ch3K5YhWKJ0f-9iCXLO-_2Xl8M,3535
373
+ helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=qiLLdiSzhnSyjmqCAvMxjhcZ7yBiX37L1cdsZvHL4ds,3845
322
374
  helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py,sha256=7GK_jAOfCgRIGiN_GInDePwuT2wZqmWHp1rqdx18xQg,4994
323
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=kzZHeyWQHUphUfAixkms2t3-KKfHRjwIKi6qm-lMyXA,4728
324
- helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=Yw4zxeYgUw8HKRR5ob9QEIT0bSPrdRUULMKCo_xzFpw,4337
325
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=ly77pj_TorwM1kN2sW7Y2AIGHOBlDkdzV0STvZTBOtc,4332
326
- helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=zxtdub2akvxPYEG12pkW2c57TIFqN38C7ucAXAHAdx0,5455
327
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=XQv7uv2m6EdbI7h0-9eDag4_bL7qE_78PuHB7c4SsHA,7654
375
+ helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=HnzA0L1Mm9rw9uyK-hnCGrxo33z_U_86TLnlELjDV6E,4738
376
+ helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=7ZHpRD7TdQQ-Mp5XQV5yyiLUE0k1KpgbLSYKLBJMxs0,4343
377
+ helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=cM7eTE4bpcIzLyEDye86Ud3rD4Id-0ju73EXjg0DYoI,4340
378
+ helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=7Aa3y0TWGZH3QrPDiqIMkj83LU2Klrzgcb46jv5uytY,5498
379
+ helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=deDMdg2-ORZPV623ngncDPlRn6z6cq_QbQtMu-z0Ydo,7665
328
380
  helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py,sha256=HUO09uM2rBXOfCsxzwovmwtihq53xjuzDOtQO_S3J4I,4161
329
381
  helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py,sha256=c7YfclYMDtygsLnEfA8oP6Vl7evdrqqTZazmuD9Oy-8,5353
330
382
  helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py,sha256=HuizbYsN5Nlihfzu4bfGuC8KSBbeIc6TVknMS4kpVJY,7149
331
383
  helm/benchmark/scenarios/vision_language/originality_scenario.py,sha256=1inr-klQEz08CM2GWqbYdy-AuXQmMhOAywAlA0lJHik,1029
332
- helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=rkPR_e_RWOeSyHIlSJGJ5lVu5DD-AR3x686XYJse-1E,9885
333
- helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=uFkzMMsjhmuSYo3v_QdfJFX6RFse83JjzMfMa3ynvV4,3975
334
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=5MwGb9BOyB2Xy70BGYZcjencf0ZskxBuzcPa7ABRuww,5106
335
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=e3lCq2nevy9tIFDDKEbJvmLibfk4UMQtAIyzrgnnaZs,4179
336
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=hJ3sOSpPnOCwLtpVnfasI_X89oofI-2PBRjMnx8eiVA,4139
337
- helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=2hY-qngKC69ZL9SHNei3IK3C2PvJDWvwLFVQ8yNSOVs,5196
338
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
339
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py,sha256=ImhfiC_y_hihAGvlj9zRsaoW614QFCBopBD2KxnbSs0,1805
340
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py,sha256=-eWRwo2x7kR46Z_I4vFbVlbqA_1f2UEb75Dx84XTlNE,9028
341
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py,sha256=FKKybU4IeglwXCj6GZC8cAUs_GOU7ymEa6P1dkDT7uw,1350
342
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py,sha256=SyAYkhsipjJG42XfM9sljz1vly5YF-dbSEWTj_dEHIU,1048
343
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py,sha256=ovg8-FfJ8_I1xbajFGSLvERZIA1fQjaUn0zd04ZbI84,15316
344
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py,sha256=j2bDYeWdytYtkKskvuTMwLEIIqELDJJ6D2jdYzmdlJY,9628
345
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
346
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py,sha256=i-i0mlG5oRRDNYNqP7o7Ul56iL02p_anJoThXaSvFiM,2826
347
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
348
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py,sha256=JpTiCSNcqX4wRpH6Cl07jM5wMkpZ5OeR_hjfK_V-Qok,943
349
- helm/benchmark/static/benchmarking.css,sha256=7PsUCff8YcoVxyWuALItfB4TZL55T7MbVQpoc9nZznc,2104
350
- helm/benchmark/static/benchmarking.js,sha256=lqEmoAikBwycVBf1h-et3ZmHKW_DcwxzlwmDez2A1EU,54531
351
- helm/benchmark/static/config.js,sha256=kIfkgr6gaMdFOAdqB35EvuBohq0DWYSQZbe_pTK09VM,103
384
+ helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=D3nNu3uU87eMDiMZZafuRTntXjwbqPaSDygUgQm45F8,9943
385
+ helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=gWrBG5U8uoU92JPGNm5kuzo1GekoJo1rKQaNhv6MYGA,3996
386
+ helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py,sha256=OJtiGhSN_KYgEz0VGXjCjQik_Xihtgiali70Z00XOzk,2083
387
+ helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=YNwuIMJBo7wwftx-T5tCYmGo2oy_794fZ330lkDyqb0,5171
388
+ helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=DxGZ7EL22SzxpAkuiA5twuGVTm96wG_RBg3dU3Vh_c4,4241
389
+ helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=wRa_OuOdyf-qcy9hml-Kj6YtVP5MDzeTbGcqva6LqdA,3707
390
+ helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=zCnkiSya-PHc3ywAhmw03bFdsvLCxAUwGfE6OviEXDQ,4153
391
+ helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=cC8_Vyqw2f4K4hJY-eo9ptj6ANfWgiFAK7b6OOTIPLI,5239
392
+ helm/benchmark/scenarios/vision_language/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
393
+ helm/benchmark/scenarios/vision_language/image2struct/chart2csv_scenario.py,sha256=qcs3o9dPsXoeaP0bu9UVZ6P0GPEcRLoaqABxysLN6VY,1802
394
+ helm/benchmark/scenarios/vision_language/image2struct/image2struct_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
395
+ helm/benchmark/scenarios/vision_language/image2struct/latex_scenario.py,sha256=SnZuHATg5i764MAdgaGwjIGdjCZNrOqP83Y5jE_fkHs,1153
396
+ helm/benchmark/scenarios/vision_language/image2struct/musicsheet_scenario.py,sha256=c08cquz2IALY7PlpOoEfAjupKZmn5GDVZ1H8Gbj4r8s,831
397
+ helm/benchmark/scenarios/vision_language/image2struct/utils_latex.py,sha256=jW3_c63a6u39PJGJw6lM9pIa3dnF8CQgZlPNZdH0sfs,15001
398
+ helm/benchmark/scenarios/vision_language/image2struct/webpage_scenario.py,sha256=DJQIa8NaKV-nhkXEBuY97MJ8a1O3x-Yr6hACVa-67Ns,11117
399
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
400
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/driver.py,sha256=WBFbb3N_eHIa7OFvHQS3Pmwbmkl6r9VyobxlIEKhty8,2823
401
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
402
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
352
403
  helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
353
- helm/benchmark/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
354
- helm/benchmark/static/index.html,sha256=xIJGjMg0qn9eemfdBiNbTI0jzPfBD5x0v8HJF-dMqBc,3561
355
- helm/benchmark/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
356
- helm/benchmark/static/json-urls.js,sha256=AaULgfHw8OLfrQLJpBHfcC013uavQnlNNFS9vzb0qOg,1981
357
- helm/benchmark/static/plot-captions.js,sha256=bTR8gYx-QqF_RJyKX-L-eQP7hSEtawfJSoADCvgjKag,3011
358
- helm/benchmark/static/schema_classic.yaml,sha256=p-yc2WMfyGehRtD7L5ZZHbFMMQovu2HNfvct3tBlV2I,108168
359
- helm/benchmark/static/schema_image2structure.yaml,sha256=gig7HVyJWSwcHa96mf-09e68_fU5L02YRWzNbkPmpGg,13520
360
- helm/benchmark/static/schema_instruction_following.yaml,sha256=mg2g5P8TAYSCEhZbLfshPt_Hq2GKjwbvyOsQrwDqh7w,8923
361
- helm/benchmark/static/schema_lite.yaml,sha256=62ByEWhAJT0tIUFi-euxJ7XFhE6e9E6PT9dF6V3qoSU,40255
362
- helm/benchmark/static/schema_mmlu.yaml,sha256=8kiZDEGGaBXs9ucDk_Gbo2agV-OgOmWuhcYFyodRjcw,53307
363
- helm/benchmark/static/schema_unitxt.yaml,sha256=89GnKrooG7kKU2xh0MeoYZUB54FDUAmOPrbzuBhG1Ik,15496
364
- helm/benchmark/static/schema_vhelm_lite.yaml,sha256=s8tQIetR2WKu3sd8k2uZO68_5E-YtlMdsBJsTehFZKE,7331
365
- helm/benchmark/static/schema_vlm.yaml,sha256=o9AzLTKwSbPES5pISI0tmpUPKWWT9GR-dleDKZqoI0w,33243
366
- helm/benchmark/static/utils.js,sha256=bgN0PT53Dregc-nLmEmAEmg2psufWpS8jTf74WoypHw,7681
367
- helm/benchmark/static/images/crfm-logo.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
368
- helm/benchmark/static/images/helm-logo-simple.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
369
- helm/benchmark/static/images/helm-logo.png,sha256=GTqbrxJr0oQXbBRq-8v6afY5zB5x0M6PhEbKRIX9qIE,280667
370
- helm/benchmark/static/images/language-model-helm.png,sha256=mG0-bkdziXeiF0wOGd67y2jnYmVKJYqhD2N5Q8VIF8Q,26563
371
- helm/benchmark/static/images/scenarios-by-metrics.png,sha256=F7g9mvIYopm-n7sDGg-7I0XCyZvloKsi2wIq1i6da_Q,51331
372
- helm/benchmark/static/images/taxonomy-scenarios.png,sha256=2MiuCLaxnuHvwsWWJHnZFc-rvoQIi_tNIjDatY7I-Dg,100766
373
- helm/benchmark/static/images/organizations/ai21.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
374
- helm/benchmark/static/images/organizations/anthropic.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
375
- helm/benchmark/static/images/organizations/bigscience.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
376
- helm/benchmark/static/images/organizations/cohere.png,sha256=7cr4LI8WK9yPryQboyWK_T5baSND-d-tVrlPNflLQMg,8757
377
- helm/benchmark/static/images/organizations/eleutherai.png,sha256=uUURFF8YWY85mwGoKVEjArO5DUBCy4es5naCXsBzn6c,4526
378
- helm/benchmark/static/images/organizations/google.png,sha256=BtmXrVQZHr3WH5c8c23ent2FO8aPWeNwO8czl22lDCo,4914
379
- helm/benchmark/static/images/organizations/meta.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
380
- helm/benchmark/static/images/organizations/microsoft.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
381
- helm/benchmark/static/images/organizations/nvidia.png,sha256=hvp1wZMwYxkfrVMvJs73PX71JwY5L8ZvxIH_fL4n6Po,27945
382
- helm/benchmark/static/images/organizations/openai.png,sha256=P4ZT5ISIlt6Dl0mOp7juSM4Y7dfyRNPqdc0PJuwNoqg,16877
383
- helm/benchmark/static/images/organizations/together.png,sha256=pmWjW4r7GnlKqFhKLPTiBeILiOighL3XzcSCsxWtB7U,48053
384
- helm/benchmark/static/images/organizations/tsinghua-keg.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
385
- helm/benchmark/static/images/organizations/yandex.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
404
+ helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
405
+ helm/benchmark/static/schema_bhasa.yaml,sha256=R3f48oqk9Va8rtSe9B93K_rCy_IfAhHZdTh4vNDdsOY,27444
406
+ helm/benchmark/static/schema_call_center.yaml,sha256=Mt7_rLG6IT701YrjiJdNb7HpoMVkFjabrawnBieUUhM,8049
407
+ helm/benchmark/static/schema_classic.yaml,sha256=sK3yVQCrk3Tn3Kmg9WITBmJZI7AKVjmIY0f3zgH_t0c,104611
408
+ helm/benchmark/static/schema_cleva.yaml,sha256=TDh-zcCzzTTs7bu0IWlY5dXYaTFhxly8sJIBGQdBvug,25401
409
+ helm/benchmark/static/schema_decodingtrust.yaml,sha256=2VPxzcyKYea7mx-qmswyVRjPfVatjVH4Rs3OU82mgII,15670
410
+ helm/benchmark/static/schema_ewok.yaml,sha256=MluPnZSy22wZLFB2pR7ycBRgUSvIUsqvq4qM0Vk2ur4,12113
411
+ helm/benchmark/static/schema_finance.yaml,sha256=OgsYMSFK__8ZZS96ktsgVRfM40-BhbOY15j9OlV-rNE,7010
412
+ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0yPsYgu_MB7uu5pwHE,19894
413
+ helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
414
+ helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
415
+ helm/benchmark/static/schema_medical.yaml,sha256=hDk4834FKn-5cMr6pHcu1P60sh6cXJ2J0Z1ADIj2MSc,8455
416
+ helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
417
+ helm/benchmark/static/schema_safety.yaml,sha256=LEGt9EuwjHZX-oLVrBQushbL4YUQmIYpHCjlauK_tGQ,8099
418
+ helm/benchmark/static/schema_tables.yaml,sha256=PSk00UHgbMZA8xnAVE6ka2a-py_4rX7VDdodjYBqe-4,10400
419
+ helm/benchmark/static/schema_thai.yaml,sha256=yJUrevvgTJ46TpyXfNecW_B9urh7LPwSbBi_mT4ZngA,8348
420
+ helm/benchmark/static/schema_unitxt.yaml,sha256=9FQhoueYNNYQ2xMuJ2KHzpg_9-_ZhZ9efk6jtTQ3tlc,11855
421
+ helm/benchmark/static/schema_vhelm.yaml,sha256=ryxslQJZun-HqM9ib4rp3_dBVufa01jgdo1bsHccYSk,29943
422
+ helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
386
423
  helm/benchmark/static_build/config.js,sha256=ER8utDIqVZi9uge7Qrk1gmlT88TOOkFF9xYp3j10m8U,165
387
- helm/benchmark/static_build/index.html,sha256=g3pMdAovQ4VMr7dPGgyzWv2K1tN-E8LLkAs45ppLPGw,1149
424
+ helm/benchmark/static_build/index.html,sha256=YHWao7kJaMx9osFxRgfuCDxu-FwaBOWDhUcaAEVe7-0,1149
388
425
  helm/benchmark/static_build/assets/01-694cb9b7.png,sha256=aUy5t0DYCg4r52HDOmeNi1S2CHsnv3mE7ySokJg3Ouo,8903
426
+ helm/benchmark/static_build/assets/accenture-6f97eeda.png,sha256=b5fu2p7L_mnwg-p5jjPk1sFRwJEBRtGwXsVyQU_Runk,9537
389
427
  helm/benchmark/static_build/assets/ai21-0eb91ec3.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
428
+ helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
429
+ helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png,sha256=bfyazxJvVs5GTSSlnm6nOb2r_jzo3TJybqF04S5Dxhw,69372
390
430
  helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png,sha256=fOEANHS8RymKaCzUWn9gQWebts2ghSmtW9Fdda_TjR8,7224
391
431
  helm/benchmark/static_build/assets/anthropic-70d8bc39.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
392
432
  helm/benchmark/static_build/assets/bigscience-7f0400c0.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
393
433
  helm/benchmark/static_build/assets/cohere-3550c6cb.png,sha256=NVDGy09xliCqZy2TKUAka-B90jVDB_VRCS9A2_sN7VU,4414
434
+ helm/benchmark/static_build/assets/cresta-9e22b983.png,sha256=niK5g8HYADkbhKM9gSVtYEdPegBS40zZXF4nNe9Fu4o,8131
394
435
  helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
436
+ helm/benchmark/static_build/assets/cuhk-8c5631e9.png,sha256=jFYx6Xx-SGYANpsSnqrlaQytYuOBOsTHhpqPJZk3EwE,30385
395
437
  helm/benchmark/static_build/assets/eleutherai-b9451114.png,sha256=uUURFF8YWY85mwGoKVEjArO5DUBCy4es5naCXsBzn6c,4526
396
438
  helm/benchmark/static_build/assets/google-06d997ad.png,sha256=BtmXrVQZHr3WH5c8c23ent2FO8aPWeNwO8czl22lDCo,4914
397
439
  helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
398
440
  helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
399
441
  helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
400
- helm/benchmark/static_build/assets/index-737eef9e.js,sha256=PvNcOghX7gGSYAGk2bR3pvIBnwDbeWHu0JyfPNaan3o,70614
401
- helm/benchmark/static_build/assets/index-878a1094.css,sha256=h4oQlJUZdqMk6nS_TEkyXMZ6rtGmepw4ljoSAHZX1vY,486381
442
+ helm/benchmark/static_build/assets/index-05c76bb1.css,sha256=BcdrsQgUFadqYf5z-wdFNosV_c2MlxV8xktld2BFKBk,489017
443
+ helm/benchmark/static_build/assets/index-58f97dcd.js,sha256=XJY99lqQJAVIYis7oEhi6Hl4drYXcG2WDGUCAGX1YVg,91191
402
444
  helm/benchmark/static_build/assets/meta-5580e9f1.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
403
445
  helm/benchmark/static_build/assets/microsoft-f5ee5016.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
404
446
  helm/benchmark/static_build/assets/mistral-18e1be23.png,sha256=GOG-Ix7XlctGOUmvJfO2oVSBM7E5O562G88OnoxsjBw,14402
405
447
  helm/benchmark/static_build/assets/nvidia-86fa75c1.png,sha256=hvp1wZMwYxkfrVMvJs73PX71JwY5L8ZvxIH_fL4n6Po,27945
406
448
  helm/benchmark/static_build/assets/openai-3f8653e4.png,sha256=P4ZT5ISIlt6Dl0mOp7juSM4Y7dfyRNPqdc0PJuwNoqg,16877
449
+ helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
450
+ helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
407
451
  helm/benchmark/static_build/assets/react-d4a0b69b.js,sha256=rNTpl8Is3LkYXqJowRMc8vc4SXQwP94Ozy4DZZWwldU,275141
408
452
  helm/benchmark/static_build/assets/recharts-6d337683.js,sha256=rDrVmtTCCSLY2hpcxSDxhlQ6CQmTTSQOESNeO3oVQgg,432466
453
+ helm/benchmark/static_build/assets/scb10x-204bd786.png,sha256=IEvXhlxgBA9NCH4RrGWJkMx0Yc7V9EK6o7vrAI5KZCE,4990
409
454
  helm/benchmark/static_build/assets/tii-24de195c.png,sha256=JN4ZXAa0rbR2IlxPfd_mKtntFZcYpDcXocSiqrC2rNg,63389
410
455
  helm/benchmark/static_build/assets/together-a665a35b.png,sha256=pmWjW4r7GnlKqFhKLPTiBeILiOighL3XzcSCsxWtB7U,48053
411
456
  helm/benchmark/static_build/assets/tremor-54a99cc4.js,sha256=x_K5Bp7szI2zsvESrKqffUOHbm8ohjjvuoIeY_yD_CA,293015
412
457
  helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
413
458
  helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png,sha256=zedhimhku2Q3QIvaRSYlUAQ0b5ia9pU4cFzKnABfr4c,118544
414
459
  helm/benchmark/static_build/assets/vhelm-model-6d812526.png,sha256=bYElJoVkSaMJ_lFZj5qoSrIbygbNyBk35q89jtFRet8,168494
460
+ helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png,sha256=qGpsSjEu7HFlPAk_zXuUEdDqj0wkCfFHA1bCtu8Ugdw,8531
415
461
  helm/benchmark/static_build/assets/yandex-38e09d70.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
416
462
  helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
417
- helm/benchmark/window_services/ai21_window_service.py,sha256=1ZDLJv73bxoLj_MzEBu4TgH5xHw-hx0nI6KX6RU73kE,12593
418
- helm/benchmark/window_services/cohere_window_service.py,sha256=5jm8o5ZYrbDUluA5LbMWLOOrOlIuHR7MhAJkOuzBagM,4750
419
463
  helm/benchmark/window_services/default_window_service.py,sha256=F099qF-YeM7YPVtph0dRFPry5vP8_BiudHTy2CpuICQ,151
420
464
  helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=EU3QevFOiQYBN2te54FsVRnGYZdgDxK6KqOWSQOa0q4,2125
421
465
  helm/benchmark/window_services/ice_window_service.py,sha256=9NeBN_tmOvwrK1miUnX3wJA70BP5ifIIeHpNR2gVwls,1070
422
466
  helm/benchmark/window_services/local_window_service.py,sha256=e9JHG72kFWlV6UKg_IhRCipOWQUrOD6ZjsT-_Mwewps,5232
423
467
  helm/benchmark/window_services/no_decoding_window_service.py,sha256=s_i_cqIuU9p0GDRIBApaOHzjH7gHrBPTJ2X5NEcN33Y,1375
424
- helm/benchmark/window_services/test_ai21_window_service.py,sha256=HkpNSaJAClZfaa-bQZ2BrRm1UB_u4sLAGSBlGQqRUD4,8221
425
468
  helm/benchmark/window_services/test_anthropic_window_service.py,sha256=lnxLiW5BPaWN6m03L93qCFugsxnVBbLmYPCarlrO-So,4196
426
469
  helm/benchmark/window_services/test_bloom_window_service.py,sha256=x7WBh0S223ABC9KvL2-y9G-cUxFUPm6oIkqvYO_4mt8,4288
427
- helm/benchmark/window_services/test_cohere_window_service.py,sha256=rKXnw2E7MLAtkLgtrUvnZuQp99_agDO4qcpb3daik-E,3348
428
- helm/benchmark/window_services/test_cohere_window_service_utils.py,sha256=sf25f9MeXzoqsbDzZ7d7le13hm8RkDe54nhLtKF2pqo,158150
429
470
  helm/benchmark/window_services/test_flan_t5_window_service.py,sha256=IhQMWBq2d39O3uNKGwbaMWJkz8585Zc-J_yqvPJfwu4,695
430
471
  helm/benchmark/window_services/test_gpt2_window_service.py,sha256=2UHKt4Wmh6XmSCdepjuMbZHFpb1oUcrKRSxcdOzBE1s,2671
431
472
  helm/benchmark/window_services/test_gpt4_window_service.py,sha256=tV5WdpxYxewchEp1rnsIlEfdJFrHVFKYQ-_8NhGK2yo,1052
432
473
  helm/benchmark/window_services/test_gptj_window_service.py,sha256=0lu4Os_3x3N-AbejG3LZ3-_ikxEHg1Lbmfq-Pzg_D9Y,2374
433
474
  helm/benchmark/window_services/test_gptneox_window_service.py,sha256=8CaOW_ln9bxKA4--dVLfLdsASo6RrR7ouP6EcSruzdA,4210
434
- helm/benchmark/window_services/test_ice_window_service.py,sha256=1DudvCYh4te_UDLg14XeXwKUoin9QnCgZ_PSCwxxaM0,23579
435
- helm/benchmark/window_services/test_openai_window_service.py,sha256=W_QJKaMgzYU7qGFuSS6JeM_f50UX0SuHpkH-u2bEvI4,2312
475
+ helm/benchmark/window_services/test_openai_window_service.py,sha256=Mt-dDtjQmz25n7hwNVyy1T_rl0TMvcvJfuhWNe_AvSw,2314
436
476
  helm/benchmark/window_services/test_opt_window_service.py,sha256=Gh1GzWnlgYIGwDNBw4EnHds3fXwMaSjzkfFXeLn47os,4215
437
477
  helm/benchmark/window_services/test_palmyra_window_service.py,sha256=yy7D2C0ZzExCbptYNsEI9zuX2AEGsEUTj0a_vbqub4o,4212
438
478
  helm/benchmark/window_services/test_t0pp_window_service.py,sha256=pvp55FyqjunkDpHVAhPup3h-iNkepQpxyr4nC87-5iY,3998
@@ -451,41 +491,44 @@ helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,s
451
491
  helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
452
492
  helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
453
493
  helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
454
- helm/clients/ai21_client.py,sha256=LIdkmzcUDR9uIF2tIk5YgDNGNmfQ9JDYmgscvFoCHDs,5509
494
+ helm/clients/ai21_client.py,sha256=PYyqpbnMK1l18Rv_qhE5KdHHqZHgHePaJtJOowTyG7I,8128
455
495
  helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
456
496
  helm/clients/aleph_alpha_client.py,sha256=koPqXF6uRD905atoiCaPg5yxr6B25J0g2OTWk8geebQ,4969
457
- helm/clients/anthropic_client.py,sha256=0hAmv3f6FQURScmDpcGbwGjnvskNRP2vhRH02OSe70I,33224
458
- helm/clients/auto_client.py,sha256=Qs0XFq9pyH4M9HTOLoI3_5m8kW305x3pzVukgETdrZM,10732
497
+ helm/clients/anthropic_client.py,sha256=s3eCwHh8mbhxLi8up1WtQWKkUsHJa-LO44prNd7XYFc,34059
498
+ helm/clients/auto_client.py,sha256=uK9EWQFWBt4DoV1oytm0dIeA3YpcfGi_H0rCRZSVE8c,11438
459
499
  helm/clients/bedrock_client.py,sha256=BsH9UopsP6ZHf-K0Yzg1PYSMLDwY0yIUmPHDhJVMUi0,5293
460
500
  helm/clients/bedrock_utils.py,sha256=okZ6Z8pviGOUNlrdF2QquAqFs8-QYgcqci95eij8giM,2574
461
- helm/clients/client.py,sha256=xoxPwV-aar7suM-3eAMsB9FmrempyqZ5FFXcLIQJz9w,8628
501
+ helm/clients/client.py,sha256=InjCQi62TWhWHmfyi-mC3fSAVztd-YDyfB3BkpacHXk,9002
462
502
  helm/clients/clip_score_client.py,sha256=ct3GHZ2Zh3fGwyvQ9DyoIPT6PwDPI-nUaFkUFuc8PIE,1622
463
- helm/clients/cohere_client.py,sha256=0UUsFnHwZjEkKjXKPzM6EpZ_iuAduZTg3sCrPM1zGt0,7359
503
+ helm/clients/cohere_client.py,sha256=PtVrDdm_-dXBiWzu_dfwiJPt5GLGw3wdN-Qw3u8ugtU,10976
464
504
  helm/clients/cohere_utils.py,sha256=aYmj60m0e9RF9BIdxp1vmA-uZv17TEALw0dbgTUSpCc,504
465
505
  helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,3231
466
506
  helm/clients/google_client.py,sha256=EOpPzK5_9yzWkMjK-4ILiixDF3aeOa8AbR2SPnEO-nw,2900
467
507
  helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
468
508
  helm/clients/http_model_client.py,sha256=DBgkVDZPmg99DCcO_1Xdf6nFQo2kyxLkgoQpwC-wkHI,2806
469
- helm/clients/huggingface_client.py,sha256=vzUmNJKsgIXLD8ho4kUGyFCRFGXC61C74X7No0yY7N4,13235
509
+ helm/clients/huggingface_client.py,sha256=k-8J4nnDbve8UtGsa0RytWhS9IpAy8hoJAUw4nRZTMI,15734
470
510
  helm/clients/lit_gpt_client.py,sha256=Sjec16bNODosEhDoBkRc4t-LNS-nCUY_jVivWj5zvfU,6205
471
511
  helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
472
512
  helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
473
513
  helm/clients/mistral_client.py,sha256=thOLMcEfrzWR00JUabIZ_PnW2o9YZsdSmNf9z3jbYKo,5982
474
514
  helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
515
+ helm/clients/nvidia_nim_client.py,sha256=f3ZWoTnJmBIFeWsHeUDaTCbDZLK_kdlUWNO1hWumUOo,987
475
516
  helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
476
- helm/clients/openai_client.py,sha256=gWqr4dvYfbUnBtfySSUGGVZYV-pLtqcrnYaf7nPk5-s,13936
477
- helm/clients/palmyra_client.py,sha256=LBYFHNc5LdpPbiSp1AAHuMm8cUUCQ2EB03BB6XnDTYQ,6551
478
- helm/clients/perspective_api_client.py,sha256=WQDArqlKVWwcK2SicnSIAgV6JGVHsxibTzkdezT3z_U,5920
517
+ helm/clients/openai_client.py,sha256=faWpoZjKxQu3EoeYwMz0deesFlH9VTVIjJ2W74c3gxY,14117
518
+ helm/clients/palmyra_client.py,sha256=XBfrTE-mxiYhLF2EXqd87DckfuZ4mwVLoI_Qif_p5KA,7223
519
+ helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
520
+ helm/clients/reka_client.py,sha256=K8b9p7U6LLAy4PRjgYrUS06gF4G2xjhjRoMEO4XDe0o,8329
479
521
  helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
480
522
  helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
481
- helm/clients/test_client.py,sha256=g29C1WLUONnNuE2oGFZhaqMahb-doS4l_Ph4OHrQvrc,3895
482
- helm/clients/test_huggingface_client.py,sha256=WUPrA7VT3nnMNht7w20I6411hlpIS_77XbQC2vC0WU0,2723
523
+ helm/clients/test_client.py,sha256=6cLpQc2IMR5o7iBxZYPvoRtHJa5i0E7JHh1VKaCtfBw,3842
524
+ helm/clients/test_huggingface_client.py,sha256=x2NjMuIrinfUy0wQ1S6F5cYZVr09YfvN6LfhWmyGNAM,3388
483
525
  helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
484
- helm/clients/test_together_client.py,sha256=lAtGKn3WdsYe5MEfTYVYRnu_rS4DPnfFr5jRn42rvoQ,3865
485
- helm/clients/together_client.py,sha256=fCPJ39fX3xm_Gp6cGsc1HIf1jVMLNiE2kIkee45-Ufk,16208
526
+ helm/clients/test_together_client.py,sha256=yYNrhU3kQjmHwhILuoP5QwUgbmkm2gg2NHiNycHjoeE,6145
527
+ helm/clients/together_client.py,sha256=rtYdx53ZE19ziJpBc7MYTeSHJjN3Ke51I3Uldg0IAbs,20595
486
528
  helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
487
- helm/clients/vertexai_client.py,sha256=Mt1rb9lWeQqJLGcBSR5mflYBvJvJfsv5OeIuQz4_ng0,19726
529
+ helm/clients/vertexai_client.py,sha256=K_vCanJU97o2P_WJOeLhUFJA8SdfJDlVNl7Mi1HuIrQ,21860
488
530
  helm/clients/vllm_client.py,sha256=p9atBtq3PBOoPkOPSifkMrYZjNLnNM_sWM6tL_3N-WY,1675
531
+ helm/clients/yi_client.py,sha256=0t4WJ8MTLOpB1LCZ-P6UdYa-KbGB7hkDrBluSkioot0,835
489
532
  helm/clients/clip_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
490
533
  helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgjbZLmy9dRnBaLiWwk,695
491
534
  helm/clients/clip_scorers/clip_scorer.py,sha256=waLI_rI6dQPjmtywvGeQKK7bGCWXyoIgIuBc8P3zSB8,1907
@@ -544,9 +587,11 @@ helm/clients/image_generation/mindalle/utils/sampling.py,sha256=soTHaJrN4FV1lDdh
544
587
  helm/clients/image_generation/mindalle/utils/utils.py,sha256=ESugpzG-_73GKl07mj-8o-_nim_FOICxfYkczy3s9x4,3119
545
588
  helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
546
589
  helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=hTywh5nM95BmPoDyKOSDWg9G3-QwLO3KZEJZVkmFroo,6478
547
- helm/clients/vision_language/huggingface_vlm_client.py,sha256=X5SX2iMZkFe9Pmq4Gx0O4bnP4gBPnKvamLThRshAEik,4875
590
+ helm/clients/vision_language/huggingface_vlm_client.py,sha256=H7AE8mm506PkEcUO8VaLVtptHTwVX58nZx1A_BWdKzA,4968
548
591
  helm/clients/vision_language/idefics_client.py,sha256=hi1VCDBegHfBssmW0C62H3OX3U2ISVRhaSkd24gb1K4,7692
549
- helm/clients/vision_language/open_flamingo_client.py,sha256=CkN0JCeR742ZG9Nc4A85hp4BSE0WLU-3Rs-ZwdmDkzs,6632
592
+ helm/clients/vision_language/open_flamingo_client.py,sha256=QH6el-wkEl4PMZM9b3_H-o2PRaMvumGbN29ee9dmkMU,6519
593
+ helm/clients/vision_language/paligemma_client.py,sha256=IU_T8r1RgpGkEAqabLKBbmoUOWV6c1a9_FXgiTy8exE,6835
594
+ helm/clients/vision_language/palmyra_vision_client.py,sha256=4elEdmwllMr2qzTzBdlRC8L5Ut3vOXFtanGGYrx4lv8,4074
550
595
  helm/clients/vision_language/qwen_vlm_client.py,sha256=6rCH4gJMDyQHyjAE_GDIrLsInH_bvd6to-4RMWbRLeM,7407
551
596
  helm/clients/vision_language/open_flamingo/__init__.py,sha256=i1tGJj6ckeE6eS1EWV5tbQKYLmPCrdSI45mPchfv_Ic,88
552
597
  helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -563,23 +608,23 @@ helm/common/clip_score_request.py,sha256=WnNg89owDCmG7tyy8nnQL0RdKQLsUdMWiYH9Xqq
563
608
  helm/common/codec.py,sha256=gTh6AwIQ0Bbul_QSnIO7eItwMZmYtnkIrG1jkc4GOL4,7100
564
609
  helm/common/concurrency.py,sha256=8THtHlCtXo5c8iCuz_UcBBdzZX6aiEALLc4u0M4SYL0,856
565
610
  helm/common/credentials_utils.py,sha256=O-57nUgkWLbZF0k3lsSaVGPPHj2_OYeVuCMe0to3bRE,1118
566
- helm/common/critique_request.py,sha256=Exu8Ans05zCU5d5-AglEbG40mBwKYED2Z3WqY_XjXBY,2772
611
+ helm/common/critique_request.py,sha256=yo4aRe-DEjudUmydthtpTj6LdhRXfZ3JZptxTkWzZ3U,3068
567
612
  helm/common/file_upload_request.py,sha256=OZeAW1_zsiNdXnWDwNNvhPs0b48TUmW_e4kzzCYmyiY,543
568
613
  helm/common/general.py,sha256=nMfHNPXyAAorAMmgDClD8r8XXeJcvfF0QXTP-FgH5PQ,11690
569
614
  helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
570
615
  helm/common/hierarchical_logger.py,sha256=EnKLnfbQftca08EJfjGEQb4tcnCKbx-JtwLnoCnhMQs,2908
571
616
  helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
572
- helm/common/images_utils.py,sha256=zbzS8C_oCDb9dY2xpWY6nljI8of72rqwijryMeiBKKo,2527
617
+ helm/common/images_utils.py,sha256=icE0tH9P3FT_qggfbi8vVwkmIjOAN5l3HcGDF9gmNnY,3345
573
618
  helm/common/key_value_store.py,sha256=iHi1WQuWttLNJnuM48QNOAXHoneNbmbBmtXYPq-dyys,3147
574
619
  helm/common/media_object.py,sha256=3VZqfb0py5dDKwWtnLp2kdl8svaike-Cn7Mjk-b0cvM,5130
575
620
  helm/common/moderations_api_request.py,sha256=3xTsErSsCr2PHD2jpdV1JglHaYHwP2Yqu25_JFtfa68,2234
576
- helm/common/mongo_key_value_store.py,sha256=yK1qyh1RgKB_hYMD1BA6hQw6oGJdrALPMpqqlkn7h0M,3811
621
+ helm/common/mongo_key_value_store.py,sha256=Qky55n8jkbJb8oIw6UCLnCbJoUR3H3yBZV7J8wVu1Ns,3878
577
622
  helm/common/multimodal_request_utils.py,sha256=GNZQQCcwsARyFCO-uoeeglyK2PEfC4MjClAKDeKqokk,1404
578
623
  helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
579
624
  helm/common/object_spec.py,sha256=_usgTDQULBF6_jy7C6m-9ZNVvNxbGoTE_CdGcSvBASU,4327
580
625
  helm/common/optional_dependencies.py,sha256=Qam3QCHff8tuXbS-fCw-MVe-pK18gSvHw-uQoXXxT7M,616
581
626
  helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
582
- helm/common/request.py,sha256=B94Dey42OJZ5lgcf71KsGW2nKo8eB4My_pj6tDkIQOg,8012
627
+ helm/common/request.py,sha256=Z_YUd77WQ15yeSN8YYdT48dI4ehUc869KuaDisAiyIA,8806
583
628
  helm/common/test_cache.py,sha256=XqboYHQAkFWIHPsuIjuageRSLeN7QoATKF7wwxggPqE,7054
584
629
  helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
585
630
  helm/common/test_general.py,sha256=c8Lh0mK8I-SfcMprq909B6zWRBxSBngq2nNL1L6-cYA,1788
@@ -590,13 +635,13 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
590
635
  helm/common/file_caches/local_file_cache.py,sha256=wBOAbbkGLiClaX4YdunokRfSQCKNkTYmMVx2KTLy4Lc,1921
591
636
  helm/common/file_caches/test_local_file_cache.py,sha256=bOCWR9MglwQXV98xk8auyjgFxaOr85zRdxWwxMBQW9s,663
592
637
  helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
593
- helm/config/model_deployments.yaml,sha256=KAD0FZ45ERfEjr3y7HbPxZmEnnJBQiiOHRHN7VxqiF4,74817
594
- helm/config/model_metadata.yaml,sha256=XpJnlu0kiI5sGEqswF_S6_ra0Iys3VOfsDs2Jiz_Vqk,112991
595
- helm/config/tokenizer_configs.yaml,sha256=3IhRANDTlN39TWqDWuPy507wQlZWOBlyaS8fA6WLDD0,12070
638
+ helm/config/model_deployments.yaml,sha256=_Yeji7Zz8XfyYGJzrTEFzIDL1hpVPcv_mPDvANKSGQ8,89215
639
+ helm/config/model_metadata.yaml,sha256=E2Rg5_4kR3RGtjz9XaSKg_B7nfz9KgtqGXWgXw7bLWI,158654
640
+ helm/config/tokenizer_configs.yaml,sha256=RD7lrDgoEW-foqJI0QxLo4XPHS7G8HyuaB3r4rwIK6Q,18761
596
641
  helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
597
642
  helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
598
643
  helm/proxy/cli.py,sha256=l8F7UYqrIOoBD9ZCIxJFA4fhxlzhae0-2Nn8A7FMkzk,8244
599
- helm/proxy/example_queries.py,sha256=uYc05CIhTzFCjLPdkWqtOweyHqsRUX4s6ByP8wJbjVk,4650
644
+ helm/proxy/example_queries.py,sha256=rVGmQ2ej4OS7m5Y3uI5dp9Mfdw6bv53c0o2QknsmYes,4379
600
645
  helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
601
646
  helm/proxy/retry.py,sha256=iLZmKATEJQa9jsSpOIx6YDRhmrA8G1Qm21cUxCuo2Ug,3490
602
647
  helm/proxy/server.py,sha256=V05YdMy0lZqYfYkxLDqksGYe-8CIFa6Jg8aSb8YHM7I,10753
@@ -608,34 +653,34 @@ helm/proxy/critique/mechanical_turk_critique_client.py,sha256=OcppmFOMweBSfVTiLI
608
653
  helm/proxy/critique/mechanical_turk_critique_exporter.py,sha256=taULrc_cIP0O9c5UpGz3l9DmWQadTVzN_v-qzTgMoyo,8470
609
654
  helm/proxy/critique/mechanical_turk_critique_importer.py,sha256=NL97joO5pRkcICRdVyG4kf9JhfYRaySsxRoZ7KWDYv0,5581
610
655
  helm/proxy/critique/mechanical_turk_utils.py,sha256=mKpUv4zz3s5ptzDY7UrwuI7Cr5HmNgSjPC10BnN9AL4,1766
611
- helm/proxy/critique/model_critique_client.py,sha256=nrNjnvOFdcRk9tUk2MjoBugAfMM92X0hxKGSg4xsy9E,11187
656
+ helm/proxy/critique/model_critique_client.py,sha256=QMFiMpALXnneumKbJpXOZDEb3lPPdkIaSCasmdXHB8o,12806
612
657
  helm/proxy/critique/scale_critique_client.py,sha256=B4povtceyfal95eE3N7em9cC_B5Vy4jMrHXcsXc_5m4,15889
613
658
  helm/proxy/critique/surge_ai_critique_client.py,sha256=HnzgAoF4Du9Me0GS_lbNaozZslS4a2OZx735gh-coo0,8357
614
659
  helm/proxy/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
615
660
  helm/proxy/services/remote_service.py,sha256=emYN0qWOJLQ7q1n06V4TwlvXaqylQcUxmqDcGZXqPJ8,9097
616
- helm/proxy/services/server_service.py,sha256=ehKs1gITG8ZsPpxzjbzlHqWjAJVLahiKZn5odsLhcPM,11535
661
+ helm/proxy/services/server_service.py,sha256=SPaiP4D4zYwaNKaULugNtDCYxz1HqgoUPcI7BU-eS64,11469
617
662
  helm/proxy/services/service.py,sha256=Be-Z5F6AN4vMzsJr3BS6tJ9NHHy_dc_yn2Ex9cm0ChU,6193
618
663
  helm/proxy/services/test_remote_service.py,sha256=NFnLjg3QNHoDKdK0DlcrtylwlKXx1vdzheNZRrLEv7c,6605
619
664
  helm/proxy/services/test_service.py,sha256=FUZoI8pGiUg5adgB1wTJ869QOgFYjPtM6yf6FGMdE64,8968
620
665
  helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
621
666
  helm/proxy/token_counters/auto_token_counter.py,sha256=34PWvF96DurTrUtUxW4Td5VNV1_BhAebCkXQLl3xp4M,2046
622
- helm/proxy/token_counters/test_auto_token_counter.py,sha256=lDe1lXa5keRi1iLsKz2aBtoQyQ1fycYymZcGvshWvUk,8609
667
+ helm/proxy/token_counters/test_auto_token_counter.py,sha256=LO3H_NbVeoeaMmEuFNCmhoEWKjWVvxeW5U4yTKfE-84,8590
623
668
  helm/proxy/token_counters/token_counter.py,sha256=TCij1Cp08RoFTLLLdjNPoaeDGHpA1A2hQsrRV775Kf4,425
624
669
  helm/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
625
- helm/tokenizers/ai21_tokenizer.py,sha256=oXImuAY9kMohHH6Zm7BWysfT88b00NBoSELeGQ920y4,2255
670
+ helm/tokenizers/ai21_tokenizer.py,sha256=CE-u39ZY5Y4XQHONpiPHKK7uvEmySYLBQi2n70OV004,2059
626
671
  helm/tokenizers/aleph_alpha_tokenizer.py,sha256=UlWC_SjObBvexpZ3OfKZT2yjhbSsHlKjQe_oWuRrXno,3818
627
672
  helm/tokenizers/anthropic_tokenizer.py,sha256=d-HO9OEFkhYzFZu0VkOsHjxbqqSUseCNX0KQqgb3s2Q,2114
628
- helm/tokenizers/auto_tokenizer.py,sha256=xKL_rLnjiaCnyH5oJUlo5gfdVSen7PmBFFD60gl9R8A,4217
673
+ helm/tokenizers/auto_tokenizer.py,sha256=Of-T-CFOhLAjjU45T1hnrEPG_k_hzPufuDE7FRAcSN8,4251
629
674
  helm/tokenizers/caching_tokenizer.py,sha256=kSegrCFotRevSDgJsn0g52dWiSUCNa7_EZpRNrELeUE,8163
630
- helm/tokenizers/cohere_tokenizer.py,sha256=-WuvEKHzwqcpnhDPauw7x8wyZ5eVWTZalygx1LkkLnQ,3739
675
+ helm/tokenizers/cohere_tokenizer.py,sha256=6WwHIt7SsICmYR2QQpwDJ7pfNF8VWrFHFxF5Kynq6aY,2116
631
676
  helm/tokenizers/http_model_tokenizer.py,sha256=wBTtDA2UdEYspffa1wqgkT3y3YHoyLXXoucnJ5PGjhs,3109
632
- helm/tokenizers/huggingface_tokenizer.py,sha256=IY9RxJ3YwVKfXtvMXR9DLO4uTaz9j_8hr1MOyA60H7Y,7791
633
- helm/tokenizers/ice_tokenizer.py,sha256=4ZTIRpmt2cqwcxnmrDpCRhiJ0BI3ELE-GHoBuHWgrDA,1200
677
+ helm/tokenizers/huggingface_tokenizer.py,sha256=vmzcbgzMMlwx1x2n0syyp6KuN47nskgoP9yi1BNEGMQ,8696
634
678
  helm/tokenizers/lit_gpt_tokenizer.py,sha256=LMrpaje64UmnDKoYjPG_RQeXVA4xQUwW5t48IJIeLaQ,1660
635
679
  helm/tokenizers/simple_tokenizer.py,sha256=6_NROqVbygs-HRA7bYAZluN4YB5gUhVaRsYQeRTjA1E,1147
680
+ helm/tokenizers/test_ai21_tokenizer.py,sha256=V8orjdKxmEV44VYoZ9Sq5E7CIq2caNnr6vjdk0T_w1A,1646
636
681
  helm/tokenizers/test_anthropic_tokenizer.py,sha256=_wzXp9FVR2Ml0s2A79TTXbSPHyTRp28i9tiEyQ9S6Ko,3792
637
- helm/tokenizers/test_huggingface_tokenizer.py,sha256=o1oqYT2MS-7xrnffj48WuvJfKAHd4p8pee9W4WxwQb8,6172
638
- helm/tokenizers/test_ice_tokenizer.py,sha256=-xi_f8TBSkAYr5CcA56HDq7rZ9HAGd99J7twNfkLzFU,2619
682
+ helm/tokenizers/test_cohere_tokenizer.py,sha256=15z2GJtZ-VlrliC2_Fk5DIZhQYFkJS7J73fjxYMf8YM,1431
683
+ helm/tokenizers/test_huggingface_tokenizer.py,sha256=8tFyZQb4DLg6MdKg13a66bLbp0yf4Ar1fGWM_sYeSjg,6309
639
684
  helm/tokenizers/test_simple_tokenizer.py,sha256=vUNdcnJqZV99-E8H1rwUH85AQPJ2HTnDr5DrZ_-zRL4,1219
640
685
  helm/tokenizers/test_yalm_tokenizer.py,sha256=qWpKnUuAlePd6t-UJB_mAiBwtAacnC8caKXLJ_GdTkk,2477
641
686
  helm/tokenizers/tiktoken_tokenizer.py,sha256=FU2g_FF0pVoyspYhHcz3SyCBGNbsTby-nWVrj0Cq4_c,1265
@@ -646,9 +691,9 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
646
691
  helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=W9p5QNn1GSm-y85yVEQe_82zn5CVK_vR6jvhk7JTs_k,869
647
692
  helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
648
693
  helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
649
- crfm_helm-0.5.1.dist-info/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
650
- crfm_helm-0.5.1.dist-info/METADATA,sha256=dVxnv-vEsYZb3v-ALFNpSdpbxwi5WQG5_I1oD3cMs6Y,19157
651
- crfm_helm-0.5.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
652
- crfm_helm-0.5.1.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
653
- crfm_helm-0.5.1.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
654
- crfm_helm-0.5.1.dist-info/RECORD,,
694
+ crfm_helm-0.5.3.dist-info/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
695
+ crfm_helm-0.5.3.dist-info/METADATA,sha256=JNa1JuzCQTPbczD-UfRLsa_f8OW7JT1zHQML-ilNh_c,19060
696
+ crfm_helm-0.5.3.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
697
+ crfm_helm-0.5.3.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
698
+ crfm_helm-0.5.3.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
699
+ crfm_helm-0.5.3.dist-info/RECORD,,