crfm-helm 0.5.7__py3-none-any.whl → 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/METADATA +5 -77
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/RECORD +228 -197
- helm/benchmark/adaptation/adapter_spec.py +5 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +11 -3
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +11 -8
- helm/benchmark/annotation/aci_bench_annotator.py +11 -22
- helm/benchmark/annotation/alrage_annotator.py +90 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +10 -21
- helm/benchmark/annotation/dischargeme_annotator.py +11 -22
- helm/benchmark/annotation/med_dialog_annotator.py +11 -22
- helm/benchmark/annotation/medalign_annotator.py +11 -22
- helm/benchmark/annotation/medi_qa_annotator.py +11 -22
- helm/benchmark/annotation/medication_qa_annotator.py +11 -22
- helm/benchmark/annotation/mental_health_annotator.py +11 -22
- helm/benchmark/annotation/mimic_bhc_annotator.py +11 -22
- helm/benchmark/annotation/mimic_rrs_annotator.py +11 -22
- helm/benchmark/annotation/model_as_judge.py +23 -18
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +11 -22
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +11 -22
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +11 -22
- helm/benchmark/metrics/air_bench_metrics.py +3157 -1
- helm/benchmark/metrics/alrage_metric.py +35 -0
- helm/benchmark/metrics/basic_metrics.py +267 -2
- helm/benchmark/metrics/classification_metrics.py +19 -1
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +12 -1
- helm/benchmark/metrics/dry_run_metrics.py +30 -1
- helm/benchmark/metrics/efficiency_metrics.py +74 -0
- helm/benchmark/metrics/ehr_sql_metrics.py +57 -1
- helm/benchmark/metrics/evaluate_reference_metrics.py +299 -0
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +13 -1
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +13 -1
- helm/benchmark/metrics/ifeval_metrics.py +13 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +41 -1
- helm/benchmark/metrics/kpi_edgar_metrics.py +21 -0
- helm/benchmark/metrics/language_modeling_metrics.py +13 -1
- helm/benchmark/metrics/live_qa_metrics.py +13 -1
- helm/benchmark/metrics/llm_jury_metrics.py +13 -1
- helm/benchmark/metrics/medcalc_bench_metrics.py +14 -1
- helm/benchmark/metrics/medec_metrics.py +25 -2
- helm/benchmark/metrics/metric.py +25 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +32 -1
- helm/benchmark/metrics/omni_math_metrics.py +13 -1
- helm/benchmark/metrics/seahelm_metrics.py +14 -1
- helm/benchmark/metrics/summac/model_summac.py +2 -2
- helm/benchmark/metrics/summarization_metrics.py +129 -1
- helm/benchmark/metrics/toxicity_metrics.py +31 -1
- helm/benchmark/metrics/wildbench_metrics.py +21 -1
- helm/benchmark/presentation/schema.py +5 -22
- helm/benchmark/presentation/summarize.py +180 -11
- helm/benchmark/presentation/taxonomy_info.py +20 -0
- helm/benchmark/run_expander.py +4 -0
- helm/benchmark/run_specs/arabic_run_specs.py +134 -16
- helm/benchmark/run_specs/bluex_run_specs.py +1 -1
- helm/benchmark/run_specs/classic_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +2 -2
- helm/benchmark/run_specs/medhelm/__init__.py +0 -0
- helm/benchmark/run_specs/medhelm/benchmark_config.py +219 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +360 -50
- helm/benchmark/scenarios/aci_bench_scenario.py +23 -0
- helm/benchmark/scenarios/air_bench_scenario.py +21 -0
- helm/benchmark/scenarios/alrage_scenario.py +54 -0
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +23 -1
- helm/benchmark/scenarios/arabic_exams_scenario.py +114 -0
- helm/benchmark/scenarios/arabic_mmlu_scenario.py +8 -4
- helm/benchmark/scenarios/aratrust_scenario.py +19 -0
- helm/benchmark/scenarios/babi_qa_scenario.py +15 -0
- helm/benchmark/scenarios/bbq_scenario.py +15 -0
- helm/benchmark/scenarios/best_chatgpt_prompts.yaml +473 -0
- helm/benchmark/scenarios/bluex_scenario.py +6 -2
- helm/benchmark/scenarios/bold_scenario.py +15 -0
- helm/benchmark/scenarios/boolq_scenario.py +20 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +23 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +13 -0
- helm/benchmark/scenarios/clear_scenario.py +23 -0
- helm/benchmark/scenarios/cleva_scenario.py +479 -0
- helm/benchmark/scenarios/code_scenario.py +28 -0
- helm/benchmark/scenarios/commonsense_scenario.py +26 -0
- helm/benchmark/scenarios/compositional_instructions.yaml +70 -0
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +21 -0
- helm/benchmark/scenarios/copyright_scenario.py +35 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +21 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +23 -1
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +21 -1
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +13 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +13 -1
- helm/benchmark/scenarios/dischargeme_scenario.py +24 -0
- helm/benchmark/scenarios/disinformation_scenario.py +22 -0
- helm/benchmark/scenarios/dyck_language_scenario.py +15 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +22 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +19 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +14 -0
- helm/benchmark/scenarios/entity_matching_scenario.py +14 -0
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +21 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +21 -0
- helm/benchmark/scenarios/gpqa_scenario.py +18 -0
- helm/benchmark/scenarios/grammar_scenario.py +20 -1
- helm/benchmark/scenarios/gsm_scenario.py +15 -0
- helm/benchmark/scenarios/headqa_scenario.py +22 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +13 -0
- helm/benchmark/scenarios/ice_scenario.py +21 -1
- helm/benchmark/scenarios/ifeval_scenario.py +18 -0
- helm/benchmark/scenarios/imdb_scenario.py +15 -0
- helm/benchmark/scenarios/koala_scenario.py +21 -1
- helm/benchmark/scenarios/kpi_edgar_scenario.py +21 -0
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +20 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +50 -0
- helm/benchmark/scenarios/legal_support_scenario.py +13 -0
- helm/benchmark/scenarios/legalbench_scenario.py +20 -0
- helm/benchmark/scenarios/lex_glue_scenario.py +11 -0
- helm/benchmark/scenarios/lextreme_scenario.py +11 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +14 -0
- helm/benchmark/scenarios/madinah_qa_scenario.py +73 -0
- helm/benchmark/scenarios/math_scenario.py +26 -0
- helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py +68 -0
- helm/benchmark/scenarios/med_dialog_scenario.py +32 -1
- helm/benchmark/scenarios/med_mcqa_scenario.py +14 -0
- helm/benchmark/scenarios/med_qa_scenario.py +14 -0
- helm/benchmark/scenarios/medalign_scenario.py +23 -0
- helm/benchmark/scenarios/medbullets_scenario.py +22 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +22 -0
- helm/benchmark/scenarios/medec_scenario.py +23 -0
- helm/benchmark/scenarios/medhallu_scenario.py +23 -0
- helm/benchmark/scenarios/medhelm/__init__.py +0 -0
- helm/benchmark/scenarios/medhelm/judges.yaml +14 -0
- helm/benchmark/scenarios/medhelm_configurable_scenario.py +101 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +23 -0
- helm/benchmark/scenarios/medication_qa_scenario.py +31 -1
- helm/benchmark/scenarios/mental_health_scenario.py +23 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +24 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +23 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +22 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +18 -0
- helm/benchmark/scenarios/mmlu_scenario.py +15 -0
- helm/benchmark/scenarios/msmarco_scenario.py +30 -0
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +22 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +22 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +20 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +20 -0
- helm/benchmark/scenarios/natural_qa_scenario.py +32 -0
- helm/benchmark/scenarios/omni_math_scenario.py +18 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +22 -0
- helm/benchmark/scenarios/pubmed_qa_scenario.py +22 -0
- helm/benchmark/scenarios/quac_scenario.py +14 -0
- helm/benchmark/scenarios/race_based_med_scenario.py +23 -0
- helm/benchmark/scenarios/raft_scenario.py +15 -0
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +14 -1
- helm/benchmark/scenarios/scenario.py +31 -0
- helm/benchmark/scenarios/seahelm_scenario.py +348 -0
- helm/benchmark/scenarios/self_instruct_scenario.py +29 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +22 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +20 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +23 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +21 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +20 -0
- helm/benchmark/scenarios/shc_privacy_scenario.py +22 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +22 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +23 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +21 -0
- helm/benchmark/scenarios/situation_prompts.yaml +49 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +22 -0
- helm/benchmark/scenarios/summarization_scenario.py +37 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +22 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +13 -0
- helm/benchmark/scenarios/test_alrage_scenario.py +23 -0
- helm/benchmark/scenarios/test_arabic_exams_scenario.py +21 -0
- helm/benchmark/scenarios/test_aratrust_scenario.py +1 -1
- helm/benchmark/scenarios/test_bluex_scenario.py +2 -2
- helm/benchmark/scenarios/the_pile_scenario.py +13 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +14 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +20 -1
- helm/benchmark/scenarios/vicuna_scenario.py +21 -1
- helm/benchmark/scenarios/wikifact_scenario.py +20 -0
- helm/benchmark/scenarios/wildbench_scenario.py +18 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +12 -0
- helm/benchmark/static/schema_arabic.yaml +55 -12
- helm/benchmark/static/schema_long_context.yaml +17 -17
- helm/benchmark/static/schema_medhelm.yaml +36 -0
- helm/benchmark/static/schema_slp.yaml +219 -0
- helm/benchmark/static_build/assets/index-671a5e06.js +10 -0
- helm/benchmark/static_build/assets/index-9352595e.css +1 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/clients/audio_language/llama_omni/arguments.py +61 -0
- helm/clients/audio_language/llama_omni/constants.py +9 -0
- helm/clients/audio_language/llama_omni/conversation.py +213 -0
- helm/clients/audio_language/llama_omni/model/__init__.py +0 -0
- helm/clients/audio_language/llama_omni/model/builder.py +88 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py +190 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py +118 -0
- helm/clients/audio_language/llama_omni/model/omni_speech_arch.py +249 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py +27 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/generation.py +622 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py +104 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py +27 -0
- helm/clients/audio_language/llama_omni/preprocess.py +295 -0
- helm/clients/audio_language/llama_omni/utils.py +202 -0
- helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py +519 -0
- helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py +4308 -0
- helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py +270 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py +0 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py +8 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py +56 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py +380 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +1 -1
- helm/clients/image_generation/mindalle/models/stage1/layers.py +2 -2
- helm/clients/openai_client.py +31 -19
- helm/clients/openai_responses_client.py +27 -3
- helm/clients/openrouter_client.py +31 -0
- helm/clients/test_openrouter_client.py +69 -0
- helm/clients/together_client.py +48 -11
- helm/clients/vertexai_client.py +8 -2
- helm/config/model_deployments.yaml +75 -1
- helm/config/model_metadata.yaml +70 -2
- helm/config/tokenizer_configs.yaml +19 -1
- helm/proxy/example_queries.py +8 -8
- helm/proxy/server.py +2 -1
- helm/proxy/static/index.css +4 -0
- helm/proxy/static/index.js +7 -1
- helm/benchmark/metrics/aci_bench_metrics.py +0 -14
- helm/benchmark/metrics/chw_care_plan_metrics.py +0 -14
- helm/benchmark/metrics/dischargeme_metrics.py +0 -14
- helm/benchmark/metrics/med_dialog_metrics.py +0 -14
- helm/benchmark/metrics/medalign_metrics.py +0 -14
- helm/benchmark/metrics/medi_qa_metrics.py +0 -14
- helm/benchmark/metrics/medication_qa_metrics.py +0 -14
- helm/benchmark/metrics/mental_health_metrics.py +0 -14
- helm/benchmark/metrics/mimic_bhc_metrics.py +0 -14
- helm/benchmark/metrics/mimic_rrs_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +0 -14
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +0 -14
- helm/benchmark/static_build/assets/index-b9779128.css +0 -1
- helm/benchmark/static_build/assets/index-e439d5e1.js +0 -10
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/WHEEL +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/licenses/LICENSE +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
crfm_helm-0.5.
|
|
1
|
+
crfm_helm-0.5.8.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
|
|
2
2
|
helm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
helm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
helm/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -13,7 +13,7 @@ helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_
|
|
|
13
13
|
helm/benchmark/reeval_run.py,sha256=vImL8JNhveEOftZbRQ6JAxF0L-XCKIwh65M6fIYo4RU,7198
|
|
14
14
|
helm/benchmark/reeval_runner.py,sha256=bJPl7XVOVwK2fUA7voOVQYwVFEOfKVnrT2tbSGQzQY8,15584
|
|
15
15
|
helm/benchmark/run.py,sha256=ZyqkKnqkMqM2AH4HL6sH72H8-mrDWu0NW0piE7BY0HM,13973
|
|
16
|
-
helm/benchmark/run_expander.py,sha256=
|
|
16
|
+
helm/benchmark/run_expander.py,sha256=IMPhg16Yd3diaFRLGYcLCXGO4L_B2WXW69oZP0fx6lE,56857
|
|
17
17
|
helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
|
|
18
18
|
helm/benchmark/run_spec_factory.py,sha256=Hxeft3fXoWNz9yGo-2nIfb5pd3GDWlwYWc6YYvAkTjM,7785
|
|
19
19
|
helm/benchmark/runner.py,sha256=O-91eRRrNgE4_tlCVeLq9_0QsRfNELvaQT-KWtJw894,14618
|
|
@@ -25,7 +25,7 @@ helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5
|
|
|
25
25
|
helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
|
|
26
26
|
helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
|
|
27
27
|
helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
helm/benchmark/adaptation/adapter_spec.py,sha256=
|
|
28
|
+
helm/benchmark/adaptation/adapter_spec.py,sha256=mfqU5lkvN2UOOUrldgTNq_u8iqRajagvzimyGWQhPQs,6054
|
|
29
29
|
helm/benchmark/adaptation/common_adapter_specs.py,sha256=V8aYhQYuwohzwW0T_IU_ymGlxEwARKIiChLvwLKt-ew,12553
|
|
30
30
|
helm/benchmark/adaptation/prompt.py,sha256=vPCFeKVUwpbnTe0IbphkyAKFkkM0YnEONfvjcb8Hj50,2158
|
|
31
31
|
helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
|
|
@@ -40,7 +40,7 @@ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=LI7uWpKIHvTUjGiy
|
|
|
40
40
|
helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=8LepCkI5b0MOL70pRPGb7vEH0KFMxIlpCQIVIzQT_vE,15030
|
|
41
41
|
helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=u_GFEgg5wmpate-s5U5aMsmcHuFmreJcA8J0TO1kPCc,14907
|
|
42
42
|
helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=-fY4mvzoGCCoR0HesT_xf2U2m2arVjgDuj59lm07_tg,1923
|
|
43
|
-
helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=
|
|
43
|
+
helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=nOCuX9lFKb3BHpznhTwpNCO0YsZBNhcMYuFnsLT_u-s,4579
|
|
44
44
|
helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py,sha256=RV6B3i5juBbJCtPDWzSfma49YXeDq3vQAQ5xQwnH-cA,3282
|
|
45
45
|
helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=hhH9ehK092j1WdUwrKYSy5PvNJ73gsIu6-5W8aLoYVI,2190
|
|
46
46
|
helm/benchmark/adaptation/adapters/test_adapter.py,sha256=7Nr6kMK3JN0UjMjjZ6P1fsD5xhOeaqh0D1xI6LFKCos,641
|
|
@@ -51,12 +51,13 @@ helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_T
|
|
|
51
51
|
helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=MvE7YdIt8Y0nefXLskY9gPmXp7QWi2b8cqg8fxUpzbM,1980
|
|
52
52
|
helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=KXP9MzDdmUao3uVjPgZYKjZQ_LvGHgZvI-86o3E87xA,6404
|
|
53
53
|
helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py,sha256=jyL61UxBsIr68hUz-jtjBUnyB2HBp5ESNyECGp_Gf6Q,2129
|
|
54
|
-
helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=
|
|
54
|
+
helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=GP2Fg1kW0-5jCkjgzVkhuN7YBQFyFgQpPTfpSgfbAvk,5178
|
|
55
55
|
helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py,sha256=mjjyn9p31V-yt6S8BX7SvqvkQ56D9cKSff6d-daM6HM,10250
|
|
56
56
|
helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py,sha256=6nuz0Vn89A1mOedutsiq2SwTOG3qn8dUZTiaXhKffiw,3587
|
|
57
57
|
helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
helm/benchmark/annotation/aci_bench_annotator.py,sha256=
|
|
58
|
+
helm/benchmark/annotation/aci_bench_annotator.py,sha256=aAzXqbjj_3bv0-ATCrFu4JvrsqORE5lkYpgxtXAEGSA,2777
|
|
59
59
|
helm/benchmark/annotation/air_bench_annotator.py,sha256=CDyHVwD4eoymfLduJC5WvvhDX1DOgYBqgjvqBjoCfU8,3501
|
|
60
|
+
helm/benchmark/annotation/alrage_annotator.py,sha256=3DcHbD8WXTg5PN3feipHTsFls0v5owMyb_rqpNWokls,3531
|
|
60
61
|
helm/benchmark/annotation/annotator.py,sha256=__BkMVpAEpSs1pbwPK5sVWLdCAXnjsHcPYgmOqmNPu0,1843
|
|
61
62
|
helm/benchmark/annotation/annotator_factory.py,sha256=8uo5uz1UpIVCHUd7CRvmy6b9XB1gspdHmgxH5UZMPVI,2335
|
|
62
63
|
helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=4hob15m2k9e2A97E0aG9FstCbJ_oMM7-9y-nh2EaYqc,2395
|
|
@@ -65,28 +66,28 @@ helm/benchmark/annotation/autobencher_safety_annotator.py,sha256=w_xjZmY1zuLjVvV
|
|
|
65
66
|
helm/benchmark/annotation/bigcodebench_annotator.py,sha256=CJG2pn1DeHJCp3yHETRquNIkCHfd6ZNuOiUjG1cQ_JY,4448
|
|
66
67
|
helm/benchmark/annotation/bird_sql_annotator.py,sha256=FQDZs1-O1jfJOET0eDeU7lf5xLaiMPohC5BdmQ4XkzI,2436
|
|
67
68
|
helm/benchmark/annotation/call_center_annotator.py,sha256=pTEjwfA4tgZhroFbamoQ8IO_D1O9r6k5GIlD50JEg5c,11601
|
|
68
|
-
helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=
|
|
69
|
+
helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=R6Hexh20T6WBBRBhwLhQv_IQvW7Z55Pf9IYBCWxUTaQ,2517
|
|
69
70
|
helm/benchmark/annotation/czech_bank_qa_annotator.py,sha256=YIH5g4zHe3BQF2Y-6uRVw7g9u_SPBncqBobdvZdIzyA,3096
|
|
70
|
-
helm/benchmark/annotation/dischargeme_annotator.py,sha256=
|
|
71
|
+
helm/benchmark/annotation/dischargeme_annotator.py,sha256=blP76BgwmbHDDDRdaaGwtTHfukCvXXLN72vjGj_LI_U,3225
|
|
71
72
|
helm/benchmark/annotation/ehr_sql_annotator.py,sha256=Izpq0biZ9lkJOPk6NwTuv2wk8Bg88vj56BKZrY8XhT4,4021
|
|
72
73
|
helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
|
|
73
74
|
helm/benchmark/annotation/harm_bench_annotator.py,sha256=zhkWnV3qZgY-nvHgQRHGrrCMC7605JwFHesY7UC3ZnQ,2293
|
|
74
75
|
helm/benchmark/annotation/helpdesk_call_summarization_annotator.py,sha256=I7TjpN502Sa-Z4uUKemJXSAdOiVA3MMO92YIAAXeDBg,6034
|
|
75
76
|
helm/benchmark/annotation/live_qa_annotator.py,sha256=PSff59mU_t3ypmptYsYRKU3m1vMLF0dMyUySIOxBrPw,3553
|
|
76
|
-
helm/benchmark/annotation/med_dialog_annotator.py,sha256=
|
|
77
|
-
helm/benchmark/annotation/medalign_annotator.py,sha256=
|
|
78
|
-
helm/benchmark/annotation/medi_qa_annotator.py,sha256=
|
|
79
|
-
helm/benchmark/annotation/medication_qa_annotator.py,sha256=
|
|
80
|
-
helm/benchmark/annotation/mental_health_annotator.py,sha256=
|
|
81
|
-
helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=
|
|
82
|
-
helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=
|
|
83
|
-
helm/benchmark/annotation/model_as_judge.py,sha256=
|
|
84
|
-
helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=
|
|
85
|
-
helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=
|
|
77
|
+
helm/benchmark/annotation/med_dialog_annotator.py,sha256=uGp8d74WGgOOiexpoKj5CMdr5jOvAnfe-ZLKGSHT6ng,2711
|
|
78
|
+
helm/benchmark/annotation/medalign_annotator.py,sha256=glAPpVdIfebm39GhrBY3BE2hdofVBIBXUxPU3_qqZOw,2789
|
|
79
|
+
helm/benchmark/annotation/medi_qa_annotator.py,sha256=bLXxXe-obPvud15sPrqp9i-wSq1QqguCPt_UJaXRz_I,2623
|
|
80
|
+
helm/benchmark/annotation/medication_qa_annotator.py,sha256=98XU2VVSoQ8XlAkuVKWnNBOS76X_lIviq_A-nyrlqcw,2639
|
|
81
|
+
helm/benchmark/annotation/mental_health_annotator.py,sha256=08b_XqgfSpIhutDUaaSgVRdiZB6metAQQ_WHF8U2-c0,2824
|
|
82
|
+
helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=a9AHMFY2shV4I2qVUfKnOvZFbmQjL5vPKsbytTBfU0A,2723
|
|
83
|
+
helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=eu9rZhRAXVbo0j7BP7vuAKwGkuwhTCvVRvJ4dPbcR4I,2753
|
|
84
|
+
helm/benchmark/annotation/model_as_judge.py,sha256=eZZlyCrW6U9a8bHhaPrbV1AJ23q3uP0ho1NbVErGBXs,12160
|
|
85
|
+
helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=ZgJVtNpab3BrMs0ZXFW6L0CNp1Hcqfgv7FHP4rpxFPg,2750
|
|
86
|
+
helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=VtHiEGFZLUsd3zkgnSoti5itZnDPgERMPZlORkEp7ok,2865
|
|
86
87
|
helm/benchmark/annotation/omni_math_annotator.py,sha256=PvZZb1oGw60qT-oHRIs93AZbh5wTbpsmD8BforudFhA,6144
|
|
87
88
|
helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=if4S8MaENr1HZ42ZsOjDPXZ-kJ0p4l4B2j9m994RuxQ,2140
|
|
88
89
|
helm/benchmark/annotation/spider_annotator.py,sha256=B48ylGg5J7xuTSUio7VztdXk3lI6ilMqrUvAD-ve0sE,621
|
|
89
|
-
helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=
|
|
90
|
+
helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=Te9rQhcUV-T2I4oBCBzInAZW65EV3lv0LXLPgGzLd8c,2735
|
|
90
91
|
helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
|
|
91
92
|
helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
|
|
92
93
|
helm/benchmark/annotation/wildbench_annotator.py,sha256=OXR59zdKw9W7v3Q_sFnt1cEPN3nOzQDVqSbh4jDbEUs,5457
|
|
@@ -126,17 +127,16 @@ helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8
|
|
|
126
127
|
helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
|
|
127
128
|
helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
|
|
128
129
|
helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
129
|
-
helm/benchmark/metrics/
|
|
130
|
-
helm/benchmark/metrics/
|
|
130
|
+
helm/benchmark/metrics/air_bench_metrics.py,sha256=WvfjjHLSE567Y7BC8tGlMINBwP-d1URRUZcMUF1yf1g,171277
|
|
131
|
+
helm/benchmark/metrics/alrage_metric.py,sha256=4QHtL00aEIRYQx2QkDs5uldu7ZAkbFYMALH6DL9LSJg,1233
|
|
131
132
|
helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
|
|
132
|
-
helm/benchmark/metrics/basic_metrics.py,sha256=
|
|
133
|
+
helm/benchmark/metrics/basic_metrics.py,sha256=3y1M0mFJL8FlkMkQWWs4ZV2NiriaMGydddbeY3F-vXk,30547
|
|
133
134
|
helm/benchmark/metrics/bbq_metrics.py,sha256=GeZhSSJzqGD0e5EAiRHitIC3XtPICF7rDI6GfeYQc8E,6201
|
|
134
135
|
helm/benchmark/metrics/bias_metrics.py,sha256=8qcInRJwQsuCI-lMC1umd-ZZaYvorUPrMjnuC6vSeb4,11602
|
|
135
136
|
helm/benchmark/metrics/bias_word_lists.py,sha256=eyk6we2J4SW8ZaZxQUWLB7Yapn92uM5TCekhFB5vg-U,13908
|
|
136
137
|
helm/benchmark/metrics/bigcodebench_metrics.py,sha256=JcPZrSiHR-kxT-MFM8zXqOs6wTC5Hus3TbxuHFQVZow,860
|
|
137
138
|
helm/benchmark/metrics/bird_sql_metrics.py,sha256=ooCuXW5nPpRs_-4seCONQmn25DzTbcUgGXznXTK9y0Y,1153
|
|
138
|
-
helm/benchmark/metrics/
|
|
139
|
-
helm/benchmark/metrics/classification_metrics.py,sha256=1Xa_bO4PqIAV2iZitE69kc4VKS4A7PloG5ElZAgvmh8,8851
|
|
139
|
+
helm/benchmark/metrics/classification_metrics.py,sha256=CfkyMiiWo74VbIB7eEhNxIcPbGA_imbzETrAExqn5WM,9498
|
|
140
140
|
helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIefAoi_edUwKpp-XxYbmeU,2740
|
|
141
141
|
helm/benchmark/metrics/cleva_harms_metrics.py,sha256=xVubv2pG3iinVs3namoVHWAmV9oUPywZwFB_0JGhP_w,11277
|
|
142
142
|
helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
|
|
@@ -149,57 +149,47 @@ helm/benchmark/metrics/codeinsights_edge_case_metrics.py,sha256=B7EEELwwH67Vxmgr
|
|
|
149
149
|
helm/benchmark/metrics/codeinsights_metric_specs.py,sha256=BkKWII9yTkChdZVsGeeeCbiWQDYvvcAKo0nxi_RTTUk,1798
|
|
150
150
|
helm/benchmark/metrics/comet_metric.py,sha256=EJWZ9x8CGeDDQlfxYrY-np_NVJBt5gun0XLJvtpjXVI,4798
|
|
151
151
|
helm/benchmark/metrics/common_metric_specs.py,sha256=JKqmO4ovBdfOYKC-00OSzOMv--g9NTCVfUHLaz-1Uns,6025
|
|
152
|
-
helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=
|
|
152
|
+
helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=F2bfg8XbjH3WOQ0O_c5S7UUxgpzu7AD5wRtNdNcJlUs,2997
|
|
153
153
|
helm/benchmark/metrics/copyright_metrics.py,sha256=RYOWKFN97UCD2Vj51gzKGbnnY9wAq6KJgiRt2cecVfs,7824
|
|
154
154
|
helm/benchmark/metrics/czech_bank_qa_metrics.py,sha256=bKoooK2T5v_fFKNbUnsuW6Mv9muAirJD5lTrzuHfpz8,1113
|
|
155
155
|
helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=x66XP0iQGk4ThT7ddmrlLCA0XF4arRbQMDT42LHf2kE,3297
|
|
156
156
|
helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py,sha256=TxTkkWdx6d6ym0MirZTiucl_TWFdn4uJLnlTfLjQvgk,2925
|
|
157
157
|
helm/benchmark/metrics/decodingtrust_privacy_metrics.py,sha256=OU7lka-hm6PubR5Gjj4uNyrqhjlfhe0mmjBCAz9vlRs,3456
|
|
158
158
|
helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py,sha256=bW4zafRyKFa__8fGrdiTPUu848ovNnvakLCfqcMrcHk,6461
|
|
159
|
-
helm/benchmark/metrics/dischargeme_metrics.py,sha256=D8LI52E17hNSPDpEvb2tw1za4QWDE3p9xgx7Nm9l7_Y,454
|
|
160
159
|
helm/benchmark/metrics/disinformation_metrics.py,sha256=5n8wgRBb6FaDjqe1nR3Cj9aS48esmMsIUq4KpBHoQoU,7870
|
|
161
|
-
helm/benchmark/metrics/dry_run_metrics.py,sha256=
|
|
162
|
-
helm/benchmark/metrics/efficiency_metrics.py,sha256=
|
|
163
|
-
helm/benchmark/metrics/ehr_sql_metrics.py,sha256=
|
|
160
|
+
helm/benchmark/metrics/dry_run_metrics.py,sha256=ouS6_8lESuCGSQgegN4xKKyoGr7Rb1K-dufHPT1fDwc,4886
|
|
161
|
+
helm/benchmark/metrics/efficiency_metrics.py,sha256=VnM5PgxxK6UKk9MzPprnN_7d-t6xVlIgFMQYrFh8dwY,15262
|
|
162
|
+
helm/benchmark/metrics/ehr_sql_metrics.py,sha256=yyz-2tsk4Fu6D5ELp3cbLaAWGjqtDGrUdvFvgHvxevg,7418
|
|
164
163
|
helm/benchmark/metrics/evaluate_instances_metric.py,sha256=LGk1Dv_76Ak0YUlWKFTsOLEFiBSmcGVhNrbj_4zg9g4,2913
|
|
165
|
-
helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=
|
|
164
|
+
helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=T1AUnN1wYFrTBMLyys3AbvlArIenZwCPwHa_F7J9ODg,31476
|
|
166
165
|
helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
|
|
167
166
|
helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
|
|
168
|
-
helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=
|
|
167
|
+
helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=Lkil9DRtO3NS3zr5Ef_qqGxZBL-ObCNpbKoJvMhCrb8,4762
|
|
169
168
|
helm/benchmark/metrics/gpt4_audio_critique_metrics.py,sha256=L9tGFwvl1-Ew3MdInQ7KPa8OlI5YexIB2KuCYVYsuPY,7023
|
|
170
169
|
helm/benchmark/metrics/gpt4_audio_refusal_metrics.py,sha256=vYPRJq-4uNhUWUWMrDkpHmfIBkhEyAgaMNEI6RKPP80,5896
|
|
171
170
|
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py,sha256=1m7IWy9vu66svnmdBRjZQI-2YsGYzH2vXZMptlRGM0Y,5654
|
|
172
|
-
helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=
|
|
173
|
-
helm/benchmark/metrics/ifeval_metrics.py,sha256=
|
|
174
|
-
helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=
|
|
175
|
-
helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=
|
|
176
|
-
helm/benchmark/metrics/language_modeling_metrics.py,sha256=
|
|
177
|
-
helm/benchmark/metrics/live_qa_metrics.py,sha256=
|
|
178
|
-
helm/benchmark/metrics/llm_jury_metrics.py,sha256
|
|
171
|
+
helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=5Z43F9ZI9OHBxeZENBGSE4fB1YTo1NKOquPt_Sw-F5s,1835
|
|
172
|
+
helm/benchmark/metrics/ifeval_metrics.py,sha256=33IqTVdYlX9ZI6sR-FfFAKbVJ9tAGDNqZpLHS5yInio,3036
|
|
173
|
+
helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=AK_ZpayimVZ9MxX8CJG-K1uPKo2j1dNJ_H9uSz1CWiY,11612
|
|
174
|
+
helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=rnvVlvFgWwaavaIu9n8iVlODhkk2g3liOiK7kwfGbN8,5474
|
|
175
|
+
helm/benchmark/metrics/language_modeling_metrics.py,sha256=NK8vYLFyFAidDG8UXVkP242zbg_6W6EZ4xZPNbokGlw,5001
|
|
176
|
+
helm/benchmark/metrics/live_qa_metrics.py,sha256=YGodrQ-b9ucQTK3ICKXRla5r26RR0wxC4iPOTcYrV1k,1195
|
|
177
|
+
helm/benchmark/metrics/llm_jury_metrics.py,sha256=-5w8tFG4JE0cMcH3KS7xQ1z6mbdtDf7reCMz6u5vtag,2158
|
|
179
178
|
helm/benchmark/metrics/lmkt_metric_specs.py,sha256=0Fa0xLjQDXwsRCE5VqGzEfb5ZdzKsDoSCwR_zHogFcc,376
|
|
180
179
|
helm/benchmark/metrics/lmkt_metrics.py,sha256=GaZTfl-NQXa1YSzcJUGlZ5wZURH1CnJxGkPFBj8ydTQ,1856
|
|
181
180
|
helm/benchmark/metrics/machine_translation_metrics.py,sha256=22vaGBCSw12uM1wmtDG-MBBZW8OiTZwNPaerjckdtDE,3860
|
|
182
|
-
helm/benchmark/metrics/
|
|
183
|
-
helm/benchmark/metrics/
|
|
184
|
-
helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=9wZgg20-9QBNk0_XhuwR3LT940fqDPkCM4Kl0dPkbAs,5353
|
|
185
|
-
helm/benchmark/metrics/medec_metrics.py,sha256=hNBOGX52G_QOmgTCp9LnIMrmGSRxbb5vgjxKU069TMQ,4152
|
|
186
|
-
helm/benchmark/metrics/medi_qa_metrics.py,sha256=JWAEMuT0UXDZrb7qHn13W6W79ilbprk492V_9vWrB4s,432
|
|
187
|
-
helm/benchmark/metrics/medication_qa_metrics.py,sha256=wit3nKNWpGFfgauu6Xye2IDTePAS0VHAQI_7OO9HR6M,462
|
|
181
|
+
helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=2viECYEj8y65_w5MPH295Z1OgLTNrgP_iMzzYSgc2hQ,5895
|
|
182
|
+
helm/benchmark/metrics/medec_metrics.py,sha256=5z3HKZCEuQsOix-22PPzTHhWlYmjyHOAVFV-bgGUVJE,5137
|
|
188
183
|
helm/benchmark/metrics/melt_bias_metric.py,sha256=mHDCkRGLD-0pyJA_depi_KX3sn7g7Bgd3_m0XdLQahY,11520
|
|
189
184
|
helm/benchmark/metrics/melt_bias_word_lists.py,sha256=xA0araUdszAIOqfxiTi6MIJhKYwr_Gwsc1L9qinZx9U,27891
|
|
190
185
|
helm/benchmark/metrics/melt_metric_specs.py,sha256=zaeV57LQEl8qK7be36NaojiUJlzmkoKY8JyOkOVuPqs,1619
|
|
191
186
|
helm/benchmark/metrics/melt_toxicity_metric.py,sha256=ni6bb_QC51NM5jQpbFYLWtsQy3tNOLwQ_5b3PDV5vVk,4193
|
|
192
|
-
helm/benchmark/metrics/
|
|
193
|
-
helm/benchmark/metrics/metric.py,sha256=jqQyiKDq_pQv-ulGqfZI56ydRDQs3N3XhfHIPysUhrk,14311
|
|
187
|
+
helm/benchmark/metrics/metric.py,sha256=gF7KlWPoPIGUvbvqDeXagBNBZnl8rclh8JfgCPvuXvs,15065
|
|
194
188
|
helm/benchmark/metrics/metric_name.py,sha256=POhgmUqqIWh_LjCbYpiKkzGqqChBLeW3FADy9u_FcWw,1354
|
|
195
189
|
helm/benchmark/metrics/metric_service.py,sha256=bJaM7GisEgSWR3vPTcg7b67XF9X2K5viODacIgbGb24,1692
|
|
196
|
-
helm/benchmark/metrics/
|
|
197
|
-
helm/benchmark/metrics/mimic_rrs_metrics.py,sha256=x3vSj1VG1UkNF3gbgJYDeA4z-crxfGIkK7iZo0xjq8c,442
|
|
198
|
-
helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=Pu9efXoBrhsvxSeGHqwbUA5k365-pJTeXpMNhmcg0L0,3927
|
|
199
|
-
helm/benchmark/metrics/mtsamples_procedures_metrics.py,sha256=XrddVk-gnc8jF8amCI1RBa_XTS9yEXD2Y9Ld9W7Q-m8,497
|
|
200
|
-
helm/benchmark/metrics/mtsamples_replicate_metrics.py,sha256=rmH34aTX_wZWxLi4jrxf3sR1RIqNRF0QDANLRQUGhqM,492
|
|
190
|
+
helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=3kypTnrkbdG-Dpdbg_A_WQYVx35ylvZFjh2-R5wvhSE,5347
|
|
201
191
|
helm/benchmark/metrics/nltk_helper.py,sha256=QMEps-lqJZ_pCgvjlMf4BvC0pzDu3ez5jit5F4p8dAk,1313
|
|
202
|
-
helm/benchmark/metrics/omni_math_metrics.py,sha256=
|
|
192
|
+
helm/benchmark/metrics/omni_math_metrics.py,sha256=WF0cWpmJwduTdZw7c_O5QsXDNwet5GgHYV0Ww9PfKc8,1709
|
|
203
193
|
helm/benchmark/metrics/openai_mrcr_metrics.py,sha256=TAop7G50FKaR-Jyo2EGLqmMOfJRmS2vNRDFiifa6mhg,2313
|
|
204
194
|
helm/benchmark/metrics/output_processing_metric.py,sha256=ey9UBi2f3780OwFlp82ymzfjLR3MA2fpA9vW5R4W5TA,2581
|
|
205
195
|
helm/benchmark/metrics/output_processors.py,sha256=ULZlDBOf6NupAXzDKBKyTDdgPZ5PSxOAlOYTbrQEek8,472
|
|
@@ -210,23 +200,22 @@ helm/benchmark/metrics/reference_metric.py,sha256=hseI7A16SOC8ymYZYFCL6nxnyxn0q9
|
|
|
210
200
|
helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
|
|
211
201
|
helm/benchmark/metrics/ruler_qa_metrics.py,sha256=OuiA0ksByl0Tw1Oal7zbedhKjTrhJgQJDLXAgoTLXuc,1473
|
|
212
202
|
helm/benchmark/metrics/safety_metrics.py,sha256=oARko_EwVnykBKYxi-w3ytKme4qcb1waz_0N2GKbSlg,3348
|
|
213
|
-
helm/benchmark/metrics/seahelm_metrics.py,sha256=
|
|
203
|
+
helm/benchmark/metrics/seahelm_metrics.py,sha256=GlNoK1O7kcuiuEOJEgTsnrfK9TcGwH7-tPj6Qe6JV90,7493
|
|
214
204
|
helm/benchmark/metrics/seahelm_metrics_specs.py,sha256=cx8p4kwTuEOWxZioK9CVoeTNJT0fZjxRy_6_EM9F394,452
|
|
215
205
|
helm/benchmark/metrics/spider_metrics.py,sha256=RSrFJoA5SNcNxfmgVqCQixcSLrfJBYuVQw5jsfrc9Xg,189
|
|
216
|
-
helm/benchmark/metrics/starr_patient_instructions_metrics.py,sha256=YHdTeIFdZxRbvqBnlWpAyIsWzZyWAjjDFuKOXhHYiSM,525
|
|
217
206
|
helm/benchmark/metrics/statistic.py,sha256=ATuOm0jU3L-0ELiZaF2GVMNF22W66-rMvzxRtlfqcII,3446
|
|
218
207
|
helm/benchmark/metrics/summarization_critique_metrics.py,sha256=-mki8-zvZx54dQg8X0BG2Y6wmfypQhkIuD_9ZjNBl78,4782
|
|
219
|
-
helm/benchmark/metrics/summarization_metrics.py,sha256=
|
|
208
|
+
helm/benchmark/metrics/summarization_metrics.py,sha256=S99uhtvBtH0UQS-gDEuQLLTPYNG-dNUV1n3OnaOP7p8,22647
|
|
220
209
|
helm/benchmark/metrics/test_bias_metrics.py,sha256=qEZsCULvwjVdIyfNgJSc2L7Xp9suKKW7L5OuQmGrwZ8,6393
|
|
221
210
|
helm/benchmark/metrics/test_classification_metrics.py,sha256=CRDMGmVmzEUnNaM0C02qUTOU2AS11Mt2-GdEl89y7lw,9541
|
|
222
211
|
helm/benchmark/metrics/test_disinformation_metrics.py,sha256=U3ZmS9s33oimTQbKO-7pgWeX_WiDB9chlOCtf_vslXw,2249
|
|
223
212
|
helm/benchmark/metrics/test_evaluate_reference_metrics.py,sha256=B7xtDDWPAxF7d-vcUx_R51hFMae-DD52nUwbu_eWt6Y,1601
|
|
224
213
|
helm/benchmark/metrics/test_metric.py,sha256=0sGlXE3_Al_VyKpOPBhQR_xT-XrcVgGepLpwut37DmA,771
|
|
225
214
|
helm/benchmark/metrics/test_statistic.py,sha256=yK6m2BZ5UXWmb2D1cQzDH_2ELvrNDaR_lyzX4WoHw9Q,1273
|
|
226
|
-
helm/benchmark/metrics/toxicity_metrics.py,sha256=
|
|
215
|
+
helm/benchmark/metrics/toxicity_metrics.py,sha256=s5Ypodu4cBmIc_fCbbQ9kCqcvVJf-OQ6zAvb85r8Cv8,5509
|
|
227
216
|
helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
|
|
228
217
|
helm/benchmark/metrics/unitxt_metrics.py,sha256=8fawxnrg0xsAe0xO2wbL7S_yisj8RzJnrn6xtk8C6q8,4852
|
|
229
|
-
helm/benchmark/metrics/wildbench_metrics.py,sha256=
|
|
218
|
+
helm/benchmark/metrics/wildbench_metrics.py,sha256=THOguxE6GUun0zTr-pITXfQGEd664sScrfIzFGdNPXk,2163
|
|
230
219
|
helm/benchmark/metrics/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
231
220
|
helm/benchmark/metrics/ifeval/instructions.py,sha256=qNoa1vMPDNz6ORWfyMv_efwKZ4U5zkI-cf4aApyfSqU,53247
|
|
232
221
|
helm/benchmark/metrics/ifeval/instructions_registry.py,sha256=NprvkRQz0QWaIpJsFp95CQCWsnuY_57ZSqFn2IISDP8,7555
|
|
@@ -267,7 +256,7 @@ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBS
|
|
|
267
256
|
helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=Ir4u8blJWTRtEBogb6u22qCy3JXAIzvx-Th6dSBLfdw,698
|
|
268
257
|
helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
|
|
269
258
|
helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
270
|
-
helm/benchmark/metrics/summac/model_summac.py,sha256=
|
|
259
|
+
helm/benchmark/metrics/summac/model_summac.py,sha256=zheAPIJAz5MH6GU1gXpWSc9Q9gouhNzYx92PDd5PUXU,17447
|
|
271
260
|
helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
|
|
272
261
|
helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
273
262
|
helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=XDZGK8h84F2w_pK8Zjko8ssKZmVxKFqTOuHL0mLBzMY,694
|
|
@@ -288,9 +277,10 @@ helm/benchmark/presentation/contamination.py,sha256=07IuIP92vfuI0GwfeNC-i_NZUlF8
|
|
|
288
277
|
helm/benchmark/presentation/create_plots.py,sha256=bM6UNzH0Bx8Bv2iKcyMoYp7IwfCZSQob-w_XOOI6r1M,29090
|
|
289
278
|
helm/benchmark/presentation/run_display.py,sha256=LmY2HES4dU94kRYuUxt-c9LTMDN6MU5CspWTF6rZwDo,12419
|
|
290
279
|
helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
|
|
291
|
-
helm/benchmark/presentation/schema.py,sha256=
|
|
292
|
-
helm/benchmark/presentation/summarize.py,sha256=
|
|
280
|
+
helm/benchmark/presentation/schema.py,sha256=AMGmEwqxkHoZFkOKD-UVZ8aXwgbafG6KYASsWo6YEw8,11005
|
|
281
|
+
helm/benchmark/presentation/summarize.py,sha256=m3RSw6ogUFasdeZ8xSUh4wKV-nYzVi3iQv-KrrwtDFM,67828
|
|
293
282
|
helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
|
|
283
|
+
helm/benchmark/presentation/taxonomy_info.py,sha256=pPIFOicis9H1sWeXApfsHHcqZpus1ezukxLQO7Lj2Vg,473
|
|
294
284
|
helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
|
|
295
285
|
helm/benchmark/presentation/test_create_plots.py,sha256=1FrJZnPW-5QUQKt_pf4y47uDha4B8wHyY1o5hqhKWhc,1293
|
|
296
286
|
helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
|
|
@@ -299,12 +289,12 @@ helm/benchmark/presentation/test_summarize.py,sha256=GzZNwBDybpstzl6wT0Rgqn75N9i
|
|
|
299
289
|
helm/benchmark/presentation/torr_robustness_summarizer.py,sha256=SmMOZWCQ-KaJBp78otwvAeE1btWignyWalaQ8QG87r4,8242
|
|
300
290
|
helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
301
291
|
helm/benchmark/run_specs/air_bench_run_specs.py,sha256=K86SqpINMBOiLIpuHz-jwlQL3SrH6n6WbqjD90i4LQQ,2231
|
|
302
|
-
helm/benchmark/run_specs/arabic_run_specs.py,sha256=
|
|
292
|
+
helm/benchmark/run_specs/arabic_run_specs.py,sha256=fPAI9GCV_D0BHPcLGSNZN45sAO2d449Gb54iHW1nocc,7399
|
|
303
293
|
helm/benchmark/run_specs/audio_run_specs.py,sha256=baJz5LZiwWZP3KD0hluKgpidtswzdorQnshX0CoqKAc,23383
|
|
304
|
-
helm/benchmark/run_specs/bluex_run_specs.py,sha256=
|
|
294
|
+
helm/benchmark/run_specs/bluex_run_specs.py,sha256=jwrH33YeXqoAex11071XMUwTCKNkoJTQQS7iNoJDLmg,1797
|
|
305
295
|
helm/benchmark/run_specs/call_center_run_specs.py,sha256=QhRQw91WblB9UaB319XNCO5K8PX8Riiza41Ym-1CcRU,7044
|
|
306
296
|
helm/benchmark/run_specs/capabilities_run_specs.py,sha256=sbqhIj4AoujV45erwoVK61lWdlkjg4qssmGlu0eSr1U,12067
|
|
307
|
-
helm/benchmark/run_specs/classic_run_specs.py,sha256=
|
|
297
|
+
helm/benchmark/run_specs/classic_run_specs.py,sha256=fe98HhzMkfloKpOZbi_mIMp1Hi-clv22rgWT-EdS0e4,53743
|
|
308
298
|
helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
|
|
309
299
|
helm/benchmark/run_specs/codeinsights_run_specs.py,sha256=lz3yysrPjCIiObzrIkRjJsWzkABh9qIXn-o7FSqZPl0,9207
|
|
310
300
|
helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
|
|
@@ -318,8 +308,8 @@ helm/benchmark/run_specs/imdb_ptbr_run_specs.py,sha256=nkW5A_xeD5kCKeJVxsL8RFS8r
|
|
|
318
308
|
helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
|
|
319
309
|
helm/benchmark/run_specs/lite_run_specs.py,sha256=8OkL9g3wQBG96g0ijGZ9L1Trb59b7VPDyYMqvA3hXfE,11129
|
|
320
310
|
helm/benchmark/run_specs/lmkt_run_specs.py,sha256=tNZvlA4mXUX-NBC9enRR90qFLeh8SNGFq701rXmXc18,5376
|
|
321
|
-
helm/benchmark/run_specs/long_context_run_specs.py,sha256=
|
|
322
|
-
helm/benchmark/run_specs/medhelm_run_specs.py,sha256
|
|
311
|
+
helm/benchmark/run_specs/long_context_run_specs.py,sha256=wn7yY9rMIBJY30SN-275qg9U49aGPUl4hVZphKYFkBI,6442
|
|
312
|
+
helm/benchmark/run_specs/medhelm_run_specs.py,sha256=bi7sGIx5I7KQXAF_Uj6n_O_DFNgtc496unrVh7UuLcQ,53256
|
|
323
313
|
helm/benchmark/run_specs/melt_run_specs.py,sha256=729MkALud2wG07yulx9zqAzejdXW_eVGkfF5cQWeGGY,32031
|
|
324
314
|
helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py,sha256=kenpGGMK1XXaNtvNXsshPvdvN9ubv1sOfaPdjFM4obA,2034
|
|
325
315
|
helm/benchmark/run_specs/multilingual_run_specs.py,sha256=umf8e6ZDgRXiU0G_BPoovj1UZ_dxyrXtIQ7i9WC6USg,2296
|
|
@@ -333,35 +323,40 @@ helm/benchmark/run_specs/tweetsentbr_run_specs.py,sha256=qogc-fb83Rh1DooKKaskhak
|
|
|
333
323
|
helm/benchmark/run_specs/unitxt_run_specs.py,sha256=4Vbsq0MPpSe4cIJOXzeVpMm60N9Qafa2R85X5BeFQew,1873
|
|
334
324
|
helm/benchmark/run_specs/vlm_run_specs.py,sha256=v-eWuDYc8u5HO46isLONPfAWv5zdA1ZOQrdyOvX3vlU,37512
|
|
335
325
|
helm/benchmark/run_specs/winogrande_afr_run_specs.py,sha256=dhOm8z6Q_ZpnzYKrsS0nEbRQPWs_phkXxmL5pxCJzQA,1853
|
|
326
|
+
helm/benchmark/run_specs/medhelm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
327
|
+
helm/benchmark/run_specs/medhelm/benchmark_config.py,sha256=O1D5N4q1QwzrI1ioAQK815cch6hNoJoaIzzAlJo6GXk,7860
|
|
336
328
|
helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
337
|
-
helm/benchmark/scenarios/aci_bench_scenario.py,sha256=
|
|
338
|
-
helm/benchmark/scenarios/air_bench_scenario.py,sha256=
|
|
329
|
+
helm/benchmark/scenarios/aci_bench_scenario.py,sha256=ry22AJdd3lvQuEFdzNf6wXzMyPFn46b0kScrYdpj-nA,6783
|
|
330
|
+
helm/benchmark/scenarios/air_bench_scenario.py,sha256=Ufcpxm5KaXHI2FfK4tdQsURaCSdcWNcXVaNmYkE4bo4,2820
|
|
339
331
|
helm/benchmark/scenarios/alghafa_scenario.py,sha256=FJXO3W6qYzCgLJMSiJEhpddNcFyR3N5Brh8pATW_9GM,5217
|
|
340
|
-
helm/benchmark/scenarios/
|
|
332
|
+
helm/benchmark/scenarios/alrage_scenario.py,sha256=MN-gMQboAaJCasYNg_rLJVgcrk5KZ1WCBN9R_lyRrhE,1499
|
|
333
|
+
helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=EzS8td1lJE1yxEwFtuwTbjHtHm1hGIaur93BKAL_Hm4,6212
|
|
341
334
|
helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=_OWE33eVRaZI0gmfP7bLd572uOi_6jb39z_J6nkcvfg,3182
|
|
342
|
-
helm/benchmark/scenarios/
|
|
343
|
-
helm/benchmark/scenarios/
|
|
335
|
+
helm/benchmark/scenarios/arabic_exams_scenario.py,sha256=hv28A2pM66ejrO6oFOgmCx3JIP_nqwdUYvIsfGc0Kew,5359
|
|
336
|
+
helm/benchmark/scenarios/arabic_mmlu_scenario.py,sha256=xMRWPA16Wn8ONgAeyyHOB95X2SQca7tKUpUP8L5ZNJc,3018
|
|
337
|
+
helm/benchmark/scenarios/aratrust_scenario.py,sha256=ismiWLm1M6JmBgVZ0SoVglaOyFbAlyOHsSsiAv8Np8Y,3125
|
|
344
338
|
helm/benchmark/scenarios/autobencher_capabilities_scenario.py,sha256=fOCHumFWZa4OJZcTZefJiJbdWsb3zjQnWLJYd10Cctw,2496
|
|
345
339
|
helm/benchmark/scenarios/autobencher_safety_scenario.py,sha256=MFt3f5baN5r-FmzWZfUChGR1mX_PUB_5hxoINac_Whs,1854
|
|
346
|
-
helm/benchmark/scenarios/babi_qa_scenario.py,sha256=
|
|
340
|
+
helm/benchmark/scenarios/babi_qa_scenario.py,sha256=CAmh3GfFjB9Xsuh9K-PUu-2xIFTV0v0YNgWbSuv09Y0,5711
|
|
347
341
|
helm/benchmark/scenarios/banking77_scenario.py,sha256=dtiM-Q_pMDWqkLi-hgl0tH-aGuDdgHkXgweE1JqrPYs,1883
|
|
348
|
-
helm/benchmark/scenarios/bbq_scenario.py,sha256=
|
|
342
|
+
helm/benchmark/scenarios/bbq_scenario.py,sha256=mVfxztgLI9sFwOYntx0dxElm8RmOb7XQYS9DOfgYjkI,10360
|
|
343
|
+
helm/benchmark/scenarios/best_chatgpt_prompts.yaml,sha256=KZdXj4KUbkwFzgIEXVakMpZLTqJ7rldxNuXVDIdlk-A,31304
|
|
349
344
|
helm/benchmark/scenarios/big_bench_scenario.py,sha256=g1TLoDTYQAe-efzQnV9J5UBCaUfN1jWTTjTd-ZJQmVQ,8146
|
|
350
345
|
helm/benchmark/scenarios/bigcodebench_scenario.py,sha256=zQLv91uwfGAR9N4jm_iBUmYOVFj9cL14Nj8aqoCqUM0,2004
|
|
351
346
|
helm/benchmark/scenarios/bird_sql_scenario.py,sha256=n5elzanKEX9YclAl2l1y33aCjihTmaw1VF_ZsAU5IaM,3613
|
|
352
347
|
helm/benchmark/scenarios/bird_sql_scenario_helper.py,sha256=FIwPk-dwfTY-8gDXeAiTZbfbS0Oe1OuWRlYiJOhZwk4,4664
|
|
353
348
|
helm/benchmark/scenarios/blimp_scenario.py,sha256=9Ge3QKRgtVHpWy7aehZVKiO6JrsxK7zrEdtqAb4zxtQ,6284
|
|
354
|
-
helm/benchmark/scenarios/bluex_scenario.py,sha256=
|
|
355
|
-
helm/benchmark/scenarios/bold_scenario.py,sha256=
|
|
356
|
-
helm/benchmark/scenarios/boolq_scenario.py,sha256=
|
|
349
|
+
helm/benchmark/scenarios/bluex_scenario.py,sha256=K4ob5_rd1hTOzlPJjuEvujcOdt_Ybgxj3jqj_BYjA9o,2599
|
|
350
|
+
helm/benchmark/scenarios/bold_scenario.py,sha256=MsXwUiJgZgFyVxh-E5gAagi4aPGicDe2C0xct5lQYwA,4882
|
|
351
|
+
helm/benchmark/scenarios/boolq_scenario.py,sha256=qQyJ0BdljChX9U_eEETdFyWLCSQvI0D4NrY6zOCXPh8,8824
|
|
357
352
|
helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
|
|
358
353
|
helm/benchmark/scenarios/casehold_scenario.py,sha256=QSe0D3KQJhlTOo6kM9OHwdKy6NlclsFGRVCAB3mTG7s,3174
|
|
359
|
-
helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=
|
|
354
|
+
helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=PE4vbj0y39674UIIdH6mgUwSKe4wW_XqRrNsksrwQRs,5104
|
|
360
355
|
helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
|
|
361
|
-
helm/benchmark/scenarios/civil_comments_scenario.py,sha256=
|
|
362
|
-
helm/benchmark/scenarios/clear_scenario.py,sha256=
|
|
363
|
-
helm/benchmark/scenarios/cleva_scenario.py,sha256=
|
|
364
|
-
helm/benchmark/scenarios/code_scenario.py,sha256=
|
|
356
|
+
helm/benchmark/scenarios/civil_comments_scenario.py,sha256=N1ZmQyKXkRjRXKPTyEHOpbDhBkjcY8WyHPKMWaBl2qo,5481
|
|
357
|
+
helm/benchmark/scenarios/clear_scenario.py,sha256=cLFlcWKUT1Uy6bYDnAjf1ySR06mK16NhN1AtsaEBZs0,7226
|
|
358
|
+
helm/benchmark/scenarios/cleva_scenario.py,sha256=WQDiDCVo6bhtI926_p3uvr1WhIAkBU1gLNLA5viEwMw,78127
|
|
359
|
+
helm/benchmark/scenarios/code_scenario.py,sha256=tdki0m59NzN4YOm1pMfaSkUP5uUDeTNMqUAB84p5QGI,13953
|
|
365
360
|
helm/benchmark/scenarios/code_scenario_apps_pinned_file_order.py,sha256=KC-5MQ-d8Nn46aDN4FaPxmd6yk1DtVUmVR-CIZsNCp4,1738
|
|
366
361
|
helm/benchmark/scenarios/code_scenario_helper.py,sha256=TnXAlY-wdAFwIDylFItf0z7HOu93WD6dNThwzZYe330,5904
|
|
367
362
|
helm/benchmark/scenarios/codeinsights_code_efficiency_scenario.py,sha256=PK4wtuBXs4cPPwOoGfhBA4J4cGLQYC_MvRWuvWrkrv8,9068
|
|
@@ -369,84 +364,88 @@ helm/benchmark/scenarios/codeinsights_correct_code_scenario.py,sha256=7BpcezugYH
|
|
|
369
364
|
helm/benchmark/scenarios/codeinsights_edge_case_scenario.py,sha256=csTwe-mv1f6Tyvnj9uZ0SYuj1GRVvgjzukV28gIhNpk,8703
|
|
370
365
|
helm/benchmark/scenarios/codeinsights_student_coding_scenario.py,sha256=wc5Fefn4jpCw03dQ6WswCztJ8AO5j0Vrn6omcOVUq2k,7409
|
|
371
366
|
helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py,sha256=qX3yckZdMojYhiwvokvEuQpRXOzmN2zmzKjQb96Ljg8,9651
|
|
372
|
-
helm/benchmark/scenarios/commonsense_scenario.py,sha256=
|
|
373
|
-
helm/benchmark/scenarios/
|
|
374
|
-
helm/benchmark/scenarios/
|
|
367
|
+
helm/benchmark/scenarios/commonsense_scenario.py,sha256=VN6nNZZpz9a1IC-tW5MvqztaW71f2zsV8lq-A34p3iE,10696
|
|
368
|
+
helm/benchmark/scenarios/compositional_instructions.yaml,sha256=mPsFzPU6uaAD0xghzv-QD5Wk4uhoLY2sF3Fw_lunAsI,1822
|
|
369
|
+
helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=sR3UzObloLUzgjNwTbSHLGGkeA0g9-Aq_utpBPT2u_4,4757
|
|
370
|
+
helm/benchmark/scenarios/copyright_scenario.py,sha256=GWRCJdLlnWZcz6ztB5XIASGMPNd2o8EZNR2GueP8xuc,5035
|
|
375
371
|
helm/benchmark/scenarios/covid_dialog_scenario.py,sha256=Vnxfn6EKwN-KR1vH-x46YHUC5jf7UAOv7zsnXVHYmZY,4032
|
|
376
|
-
helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=
|
|
372
|
+
helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=FM6ty-JSFTDqdKLzfwgfhl3zV2oh_DWjRw4qI4-IrI0,11169
|
|
377
373
|
helm/benchmark/scenarios/custom_mcqa_scenario.py,sha256=rgdHsSh8QknlcdGfZQ4VvqBUMLfTTHaNolCv4QgWHzE,1939
|
|
378
374
|
helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=ZBfkUYlIa-BagRVBf97RoyLfEloAjnM0RPv5wmEWueQ,4406
|
|
379
|
-
helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=
|
|
380
|
-
helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=
|
|
381
|
-
helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=
|
|
382
|
-
helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=
|
|
383
|
-
helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=
|
|
384
|
-
helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=
|
|
385
|
-
helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=
|
|
386
|
-
helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=
|
|
375
|
+
helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=pZK3dbKKNfNOHvNaGMkN9pjFznu4raNyLe4fWkxNHSo,8604
|
|
376
|
+
helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=hBKRRYIHegOrhIo_i7-1RPtbxmuhXcg29DkUIep0x_o,6304
|
|
377
|
+
helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=KzBz8nkrvPUTw5WmEoivtl0lLJ-mORek-IVKYmct2Pk,3460
|
|
378
|
+
helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=OvJ3pfxbxtJRxeSfeK-uoYFZ4ZIDSqE7ZbqZBuO93DE,14743
|
|
379
|
+
helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=zWhQWEE9Aa1O9ASLE5IAw55lzNLJ1ifGsBKZFk-jiXM,8942
|
|
380
|
+
helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=XO--1Rxb6kyLDRUQw-GhzLG-aTagVyN7ktWriAbBTAE,20756
|
|
381
|
+
helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=vIkAgy4LysSSIm553bnts3CEN6NVIDKr3xeGkZ2GNyk,3520
|
|
382
|
+
helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=5l0lRRNNJ8nAb1R4bMxq3lakMF-P3XFvVpnT1PrwMms,3556
|
|
387
383
|
helm/benchmark/scenarios/dialogue_scenarios.py,sha256=yXCMZegzlgL0CXTY1W5lXdkFFHicUvq4z7_284MfRpw,5778
|
|
388
|
-
helm/benchmark/scenarios/dischargeme_scenario.py,sha256=
|
|
389
|
-
helm/benchmark/scenarios/disinformation_scenario.py,sha256=
|
|
390
|
-
helm/benchmark/scenarios/dyck_language_scenario.py,sha256=
|
|
384
|
+
helm/benchmark/scenarios/dischargeme_scenario.py,sha256=WTlqFnM76DFVGOUSLWv-g--vHWR71UWZ9VFXoEec3fo,9026
|
|
385
|
+
helm/benchmark/scenarios/disinformation_scenario.py,sha256=lq9Aj-DDpPJeFVk99wXEd2Qv3kahiBe9c8-RoBieCDM,9581
|
|
386
|
+
helm/benchmark/scenarios/dyck_language_scenario.py,sha256=HZEXetj5BkXrNJbAvg9HidrkxDgi2UUGIAVphNiN-jg,10052
|
|
391
387
|
helm/benchmark/scenarios/echr_judgment_classification_scenario.py,sha256=IqODoUY1-zJD1KW4Qkg3VwJcUeeLgGUKThr62bW-wx8,4915
|
|
392
388
|
helm/benchmark/scenarios/ehr_sql_scenario.py,sha256=Gm7Kw_TSUUxHW8ns-2e4E_tTBVX7h6Ta273VOpkMCQ8,5480
|
|
393
|
-
helm/benchmark/scenarios/ehrshot_scenario.py,sha256=
|
|
394
|
-
helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=
|
|
395
|
-
helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=
|
|
396
|
-
helm/benchmark/scenarios/entity_matching_scenario.py,sha256=
|
|
389
|
+
helm/benchmark/scenarios/ehrshot_scenario.py,sha256=OzZrgi-UZrMH70ZnHSeUWPCOesUue5vxPqnNOaN45dE,68830
|
|
390
|
+
helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=gceJqjxX-RxvOqPDANEwOrbHwKxtddpMz-FcsBfby0k,2854
|
|
391
|
+
helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=03Ju45Sju2r4A_Peq2EsOyg5Ik99lMUv-6X--ejB9fk,7332
|
|
392
|
+
helm/benchmark/scenarios/entity_matching_scenario.py,sha256=83F017FPFED_106IOawJN1jdY6IfREGJPNRvCokKGNk,7761
|
|
397
393
|
helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
|
|
398
394
|
helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
|
|
399
395
|
helm/benchmark/scenarios/exams_multilingual_scenario.py,sha256=c9zMGGL8EbCeNogTm-88g_5wWUiX1Zr7z_tsyjUq2h0,5404
|
|
400
396
|
helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
|
|
401
397
|
helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
|
|
402
|
-
helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=
|
|
403
|
-
helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256
|
|
404
|
-
helm/benchmark/scenarios/gpqa_scenario.py,sha256=
|
|
398
|
+
helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=I7eoymZfxu4gky3YjyLnZgaFIJcMkprxQxiCLM7wJV8,5455
|
|
399
|
+
helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=Qw8OJzvp12716GRW5kIxxX--f92OFRcaP0oEy-gakjM,5674
|
|
400
|
+
helm/benchmark/scenarios/gpqa_scenario.py,sha256=MsMsBqgxz6jKt2-ys98XAslGWkxZgzpYOws0b9e4Uj8,3520
|
|
405
401
|
helm/benchmark/scenarios/grammar.py,sha256=58tQYKPj013V9jIpW7fXUqZBLuboqEi_WLlDjx74spM,5590
|
|
406
|
-
helm/benchmark/scenarios/grammar_scenario.py,sha256=
|
|
407
|
-
helm/benchmark/scenarios/gsm_scenario.py,sha256=
|
|
402
|
+
helm/benchmark/scenarios/grammar_scenario.py,sha256=c3ATPkHM0WkA9QZEf2VNfThhuEUXD448uOuW6CAeVFw,2309
|
|
403
|
+
helm/benchmark/scenarios/gsm_scenario.py,sha256=S_rD8uZsajgqyaJGNMpqYvshYYIW9hMV9N2udbI1Ax8,3405
|
|
408
404
|
helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=8_ShEuOoEGu7iRE2b0tgi-cfBrCPF9k1L-Pgb__n3Bg,2005
|
|
409
405
|
helm/benchmark/scenarios/harm_bench_scenario.py,sha256=CBo_AfbtHTlvJdsiquP0EDTKApVmDZc7EW0VTENNAfQ,2478
|
|
410
|
-
helm/benchmark/scenarios/headqa_scenario.py,sha256=
|
|
406
|
+
helm/benchmark/scenarios/headqa_scenario.py,sha256=0hJewHkF9IKQfW6NUJ0DPjlwQmr7N90a2eSXrBQiFNA,6635
|
|
411
407
|
helm/benchmark/scenarios/healthqa_br_scenario.py,sha256=YneXTfp8V6k8rYCF3BTX6bxN2ASxdG3qrBr7uH_IFWc,3406
|
|
412
|
-
helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=
|
|
413
|
-
helm/benchmark/scenarios/ice_scenario.py,sha256=
|
|
408
|
+
helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=5R9En7lTNirZCVsMNqNB2metw0dIEPa9usoFB9W11B4,1855
|
|
409
|
+
helm/benchmark/scenarios/ice_scenario.py,sha256=tEkXqRtvtXaoC6JfbJOcY0E8xWyYKGMOvsSYJGjM_9Q,17674
|
|
414
410
|
helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
|
|
415
|
-
helm/benchmark/scenarios/ifeval_scenario.py,sha256=
|
|
411
|
+
helm/benchmark/scenarios/ifeval_scenario.py,sha256=v2Q1uYCd5i1jO4_gcIlTrbZdPZ27tJrCXi9e0sqcm8s,2308
|
|
416
412
|
helm/benchmark/scenarios/imdb_ptbr_scenario.py,sha256=laq9UwyvBvZZuo54rf-8SdKTLrMdDHTdGWJ4TdC8Eng,2340
|
|
417
|
-
helm/benchmark/scenarios/imdb_scenario.py,sha256=
|
|
413
|
+
helm/benchmark/scenarios/imdb_scenario.py,sha256=H9iHmKK-q4a5edSMcS166f1fjkNbOS5BEIgR3md3k7M,6887
|
|
418
414
|
helm/benchmark/scenarios/imdb_scenario_pinned_file_order.py,sha256=fjW0Gkzg2Y3IAbtYJ3KC7MueWd9U8h0tlcBCqxYmRrM,1621
|
|
419
415
|
helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py,sha256=JRTLaQc3PDpYeX9ewGnBteT9jXeaGbmJ1VzYGT8TsXI,3067
|
|
420
416
|
helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py,sha256=5fJHFonb7Ko7exHFtoUtvHar_7PhK2HjW9uDlU8Ljj0,2872
|
|
421
417
|
helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=6z3VlcucrwK2B30artWiSpo-mOTr9tiwYV6Fu8XD0VY,2657
|
|
422
418
|
helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py,sha256=F-gDO6r4GPBJTLirhF5noRaV0edvoIT7tiIDlovBFfE,2253
|
|
423
|
-
helm/benchmark/scenarios/koala_scenario.py,sha256=
|
|
424
|
-
helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=
|
|
425
|
-
helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=
|
|
419
|
+
helm/benchmark/scenarios/koala_scenario.py,sha256=h-dTHQrNVoi7p7sTXZDqWcpjlznfUgxNrgr4nW8Hrk0,2212
|
|
420
|
+
helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=DE8efUmcPW5R62tZ46Rdsjv-EQs4lXm403O5XxM9heQ,7303
|
|
421
|
+
helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=JTm4Zkwqed7PijdeHzSbQ2l4YZY037OYF_fbnKmlpKg,6185
|
|
426
422
|
helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py,sha256=q_iezJo23_HNNoIXYT4cLYCbwNzLYJx6uvxgPSE5bQA,2804
|
|
427
|
-
helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=
|
|
428
|
-
helm/benchmark/scenarios/legal_support_scenario.py,sha256=
|
|
429
|
-
helm/benchmark/scenarios/legalbench_scenario.py,sha256=
|
|
430
|
-
helm/benchmark/scenarios/lex_glue_scenario.py,sha256
|
|
431
|
-
helm/benchmark/scenarios/lextreme_scenario.py,sha256=
|
|
423
|
+
helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=0DraJdQebbl8tv7S3WmLos98wnQFGJOzY6suGPoxR40,10954
|
|
424
|
+
helm/benchmark/scenarios/legal_support_scenario.py,sha256=cM98PnIAfjQzciUYGtgHqHYnWIdbdJfh3uy6uEIo488,4567
|
|
425
|
+
helm/benchmark/scenarios/legalbench_scenario.py,sha256=K_KjDH3Rk1AM6pXLRedo-6o2rEw9OIk3porlCr3IGvQ,5623
|
|
426
|
+
helm/benchmark/scenarios/lex_glue_scenario.py,sha256=H7f3F7gK7bgf6FXvqXGTQrecTE6RtZaitIKmwQLksck,10736
|
|
427
|
+
helm/benchmark/scenarios/lextreme_scenario.py,sha256=dR5UUIymth3J3RInoNybygZg0rNZ-8wwzVHneuTTOGE,20843
|
|
432
428
|
helm/benchmark/scenarios/live_qa_scenario.py,sha256=TnWaOPOcA4U1_8JdahQOUZ9KBj0MpMf4BcK2TDBl3BE,3666
|
|
433
429
|
helm/benchmark/scenarios/lm_entry_scenario.py,sha256=kQTnj5gKJmDxCgynmzQOmghwNySpna7aTY7K7RPD2x4,9109
|
|
434
430
|
helm/benchmark/scenarios/lmkt_scenarios.py,sha256=K51CdOZqMOMOozUmADjrJuNCpUtXVEZwcOeIY-EZrwM,11162
|
|
435
|
-
helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=
|
|
436
|
-
helm/benchmark/scenarios/
|
|
431
|
+
helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=ZtheFEcsBMSqGIPw5UPOO_b3v93mPFar1yqxVnsLq4E,6785
|
|
432
|
+
helm/benchmark/scenarios/madinah_qa_scenario.py,sha256=W7YEQTHyNWUJD8sKFmXU9e-ubzvleWQs7Cj_1zdq2bk,2482
|
|
433
|
+
helm/benchmark/scenarios/math_scenario.py,sha256=p9tsdNsiYFtuG89cMByZYn60QjWzEsnCO21OHPr4DJo,16034
|
|
434
|
+
helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py,sha256=Gtc9DgV2bLPIDngROmizTWQHbTftnwVodi9CYT0_P2A,2146
|
|
437
435
|
helm/benchmark/scenarios/me_q_sum_scenario.py,sha256=7DOqQmO70BpDeJy_S4fJ5i2UcCH8tunxzjFgTIim9bQ,4062
|
|
438
|
-
helm/benchmark/scenarios/med_dialog_scenario.py,sha256=
|
|
439
|
-
helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=
|
|
436
|
+
helm/benchmark/scenarios/med_dialog_scenario.py,sha256=MKDlZLJEUq1nDRzlkHlpTWOxHwgghWMXcQvHJcM2LP0,8615
|
|
437
|
+
helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=tvF6d6e4WQi_mUIlZoLQvbOpVIfHR4nyMVVR8z4AkAE,5752
|
|
440
438
|
helm/benchmark/scenarios/med_paragraph_simplification_scenario.py,sha256=0Z1JrizLygjd9v_LLFMk8uZ805IWjJPvg-ZvPVhtMm4,7652
|
|
441
|
-
helm/benchmark/scenarios/med_qa_scenario.py,sha256=
|
|
442
|
-
helm/benchmark/scenarios/medalign_scenario.py,sha256=
|
|
439
|
+
helm/benchmark/scenarios/med_qa_scenario.py,sha256=uW8FOEQhMw6k0WF_LKlH0oFTQVS9D_9MHXvVTNwDC7k,5140
|
|
440
|
+
helm/benchmark/scenarios/medalign_scenario.py,sha256=5ALak5Hq2XQbqwTF3fQYKg-QPtL_vjY7J1UsMm9SOFk,4481
|
|
443
441
|
helm/benchmark/scenarios/medalign_scenario_helper.py,sha256=fKXJFVLGnLcZKRBLsbjJA6YA4WqMaQAjkEU-i6YzSTQ,11626
|
|
444
|
-
helm/benchmark/scenarios/medbullets_scenario.py,sha256=
|
|
445
|
-
helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=
|
|
446
|
-
helm/benchmark/scenarios/medec_scenario.py,sha256=
|
|
447
|
-
helm/benchmark/scenarios/medhallu_scenario.py,sha256=
|
|
448
|
-
helm/benchmark/scenarios/
|
|
449
|
-
helm/benchmark/scenarios/
|
|
442
|
+
helm/benchmark/scenarios/medbullets_scenario.py,sha256=oMqnF3Ri9dghEWpGQYzfcTnYGMK5b2cJNVpJoqdtdUo,7694
|
|
443
|
+
helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=EDeeBKmbosUaMo3dg2MNVs_Cb_ws6WfnBYk15_B3lkU,6608
|
|
444
|
+
helm/benchmark/scenarios/medec_scenario.py,sha256=sLx6tcFXcvhDIThGNVi-425znECAn5pkUgRk83CM-Q8,6343
|
|
445
|
+
helm/benchmark/scenarios/medhallu_scenario.py,sha256=0EgeIxGuYMyBzM8xIOF4WcxfCOVqCp-oOuZe4Ai-CRM,3660
|
|
446
|
+
helm/benchmark/scenarios/medhelm_configurable_scenario.py,sha256=vxvvAaIFW4cWaMez1xbEOZBh6S2wEH6Ws8KcGpnaZbs,3852
|
|
447
|
+
helm/benchmark/scenarios/medi_qa_scenario.py,sha256=KXHQIliik9Cihaw2_M6GW5QdmHBeGoPc-0tnTw-_M5w,5224
|
|
448
|
+
helm/benchmark/scenarios/medication_qa_scenario.py,sha256=uyYxtCm_dX9Jt6X-3ha2gAUyxF55wKn3_k95g7VAzHQ,3636
|
|
450
449
|
helm/benchmark/scenarios/melt_ir_scenario.py,sha256=d88DEGKVJZCeGnbrXrQZO_W4VJeqW8XNaYc8wIUiJtA,5978
|
|
451
450
|
helm/benchmark/scenarios/melt_knowledge_scenario.py,sha256=FDG4OGYEV6Ac40VC7KAeikzbFKAK2XXFhH1-QUTw8jo,7923
|
|
452
451
|
helm/benchmark/scenarios/melt_lm_scenarios.py,sha256=kSm0lRRixhnXctMprPnzi09PLOmgfs-C7TAW3QI8RmE,8969
|
|
@@ -454,60 +453,63 @@ helm/benchmark/scenarios/melt_scenarios.py,sha256=_WShDpmPaKrujGbZcazCqleDn0TKDh
|
|
|
454
453
|
helm/benchmark/scenarios/melt_srn_scenario.py,sha256=EQSOZIXbfvVWCJMJ4H2e_CiBz6wc8THJndnbK2WwTHM,14674
|
|
455
454
|
helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py,sha256=ptMQWgNn6R-XpAVAAjutSdZg_9ZUqG6fVotzAgeead4,7945
|
|
456
455
|
helm/benchmark/scenarios/melt_translation_scenario.py,sha256=j9YrY60DQHZz4m1MJZaGLzyI6FERlHRx2wy9auyAVB8,5415
|
|
457
|
-
helm/benchmark/scenarios/mental_health_scenario.py,sha256=
|
|
458
|
-
helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=
|
|
459
|
-
helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=
|
|
460
|
-
helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=
|
|
456
|
+
helm/benchmark/scenarios/mental_health_scenario.py,sha256=dwirS093vIdS1VG5yKqUw863TJoCF_keO-pr7ysTIxA,6066
|
|
457
|
+
helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=OIDB-f8wyn0ApsPqwpP11yJEpEtSpf3aYc6VVap6Jr8,5275
|
|
458
|
+
helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=pG_NK1Et0QZosQAOLAxbciyNSq_wIdOT7hkXsBb4mTg,4902
|
|
459
|
+
helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=KRl1lYX-ITWTGxWS_NNQ0o3I4E__jlzNDhAYvI1by7g,3749
|
|
461
460
|
helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py,sha256=-OkPMRyB7aO6QBFwoTl6a2rpzcoHeEl84tqz7k9kpCM,2982
|
|
462
|
-
helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=
|
|
463
|
-
helm/benchmark/scenarios/mmlu_scenario.py,sha256=
|
|
461
|
+
helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=2FVL-6Umn0BufFpJ0e405q1ZgeeP8Np1kCvsE61GaOE,4686
|
|
462
|
+
helm/benchmark/scenarios/mmlu_scenario.py,sha256=P68i3gBlvVwjItZhLimtM6-zVGv3cYitSPH8ARwnkEk,4610
|
|
464
463
|
helm/benchmark/scenarios/mmmlu_scenario.py,sha256=CyOISLOsXF9IEYGfeqWyYYkWGvrUvGivlWSJ5ttN9qY,2762
|
|
465
|
-
helm/benchmark/scenarios/msmarco_scenario.py,sha256
|
|
466
|
-
helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=
|
|
467
|
-
helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=
|
|
468
|
-
helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256
|
|
469
|
-
helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=
|
|
470
|
-
helm/benchmark/scenarios/natural_qa_scenario.py,sha256=
|
|
464
|
+
helm/benchmark/scenarios/msmarco_scenario.py,sha256=p9YNL5oTa9isCGVvmqHHVofKmiwitjPQd28ElXmRAN4,35601
|
|
465
|
+
helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=gtVSZxrs321tOolyD0gOoLzc0--uTc--3_HdlBVIuHo,6607
|
|
466
|
+
helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=FIdI509nn0LN9opC4yJ8UsvWmh6-KECUMZF88duIEq0,6395
|
|
467
|
+
helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=o7RydazvQkYK90epvuXsdEyE02fmpsDEwS6253fNptk,14365
|
|
468
|
+
helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=XBGq3_gz1vaMhVX17RWF7mhXaSlKsv-_-JWCyHDkGWA,6428
|
|
469
|
+
helm/benchmark/scenarios/natural_qa_scenario.py,sha256=3wkXvYm7m0Isxv2EW6SIuIEwZEV2lihsSLQZaANsKZo,14017
|
|
471
470
|
helm/benchmark/scenarios/newsqa_scenario.py,sha256=G25VYaLrV_JyyoT0jpzJ6p4l5qsOydm8rlzTvSptNKQ,7284
|
|
472
471
|
helm/benchmark/scenarios/oab_exams_scenario.py,sha256=vbjUzQP0zU4ckvMbsk4lh24NddVWbUAtfWmsq1h24_w,2101
|
|
473
|
-
helm/benchmark/scenarios/omni_math_scenario.py,sha256=
|
|
474
|
-
helm/benchmark/scenarios/open_assistant_scenario.py,sha256=
|
|
472
|
+
helm/benchmark/scenarios/omni_math_scenario.py,sha256=nB2miRRQ-cWwhpqUkypOZibYugD56wZ299nxE5bty9Q,2582
|
|
473
|
+
helm/benchmark/scenarios/open_assistant_scenario.py,sha256=Z9eyaaHGRtFZTogIkOe1Pr6d70lqSe80tMsNPWR_jog,6577
|
|
475
474
|
helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=XbO8Wpjjq2e8OsC2s_ZScV4TcZg3hlpVGy56hgxXY9w,3253
|
|
476
475
|
helm/benchmark/scenarios/opinions_qa_scenario.py,sha256=JK39tq306tKe0RDBDLz1AfAdZwNjK_Ng-rHvu6bTRY4,7395
|
|
477
|
-
helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=
|
|
478
|
-
helm/benchmark/scenarios/quac_scenario.py,sha256=
|
|
479
|
-
helm/benchmark/scenarios/race_based_med_scenario.py,sha256=
|
|
480
|
-
helm/benchmark/scenarios/raft_scenario.py,sha256=
|
|
481
|
-
helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=
|
|
476
|
+
helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=Z8gR19kiTIugBTvBj6g9LiBXicfAxZ1AFh_GF_axgQc,9043
|
|
477
|
+
helm/benchmark/scenarios/quac_scenario.py,sha256=y5bm1LXHIICqPIkWOg3sibnH_sC15b2zYUfT-_Y0V4E,7349
|
|
478
|
+
helm/benchmark/scenarios/race_based_med_scenario.py,sha256=pyeOUjWlQ30WgNr48BuV7kP7fhqZljLfizbTfWjyV_k,6862
|
|
479
|
+
helm/benchmark/scenarios/raft_scenario.py,sha256=BQ-faIiWBuUYmHTMCRbI8XpymtWvKK8DN6oNejjNi7M,5443
|
|
480
|
+
helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=USsjBVzoL-Bgq8B2clQvl3d-g4XlOlt8gvBje9VD7Dk,3077
|
|
482
481
|
helm/benchmark/scenarios/ruler_qa_scenario_helper.py,sha256=jgVf1D4eTSxwxQsW0GBou5hfSo2dnlEJvHpVJqk3BxM,6327
|
|
483
482
|
helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=Dy0INRMzxSiIs9Pm3fa0hYodN-W--WPSv4kcmeQhucM,3270
|
|
484
|
-
helm/benchmark/scenarios/scenario.py,sha256=
|
|
485
|
-
helm/benchmark/scenarios/seahelm_scenario.py,sha256=
|
|
486
|
-
helm/benchmark/scenarios/self_instruct_scenario.py,sha256=
|
|
487
|
-
helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=
|
|
488
|
-
helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=
|
|
489
|
-
helm/benchmark/scenarios/shc_conf_scenario.py,sha256=
|
|
490
|
-
helm/benchmark/scenarios/shc_ent_scenario.py,sha256=
|
|
491
|
-
helm/benchmark/scenarios/shc_gip_scenario.py,sha256=
|
|
492
|
-
helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=
|
|
493
|
-
helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=
|
|
494
|
-
helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=
|
|
483
|
+
helm/benchmark/scenarios/scenario.py,sha256=6zYT0heGPh1HXmv9l2g360Y3CwcV4xjA6jUq5snNF5I,9482
|
|
484
|
+
helm/benchmark/scenarios/seahelm_scenario.py,sha256=Pgw05ZT9NByV7GL0031vGImbhGOZPrHv8aOR5DmP7sA,94098
|
|
485
|
+
helm/benchmark/scenarios/self_instruct_scenario.py,sha256=uPVclF96zh0P2VJ163nLa7XuTKlMKGaTDFN-6IcdbXQ,3164
|
|
486
|
+
helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=kLnoSmpNaPKUcHDPhS6sDP38TC0YII5dlvEKpiUZYKY,3787
|
|
487
|
+
helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=Fg6PKKLLtmVxuu8pTOAmmoRpPIlFhxWl4VzIUNr7w6Y,3519
|
|
488
|
+
helm/benchmark/scenarios/shc_conf_scenario.py,sha256=605KB8lTHlJh44XwbkilKXXAfJQGD2XVnZJmFoaV4Vw,3948
|
|
489
|
+
helm/benchmark/scenarios/shc_ent_scenario.py,sha256=Sr4E3z0keK69b0DIZ1QFISvG0TsEQ6S567h84eSEHcc,3737
|
|
490
|
+
helm/benchmark/scenarios/shc_gip_scenario.py,sha256=MhQ4mdKMJOtcZJ0gKxoVCg2RVyM8OKfjW_EA3wna_2c,3564
|
|
491
|
+
helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=OTYdD5mifaEZeI84RF5fz3Q10M8cE74H0GR3a7QisAE,3974
|
|
492
|
+
helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=bM_qSCv5Qp_03TiDezgl1gUSSs49IZ_M1L4xZnMzToc,3915
|
|
493
|
+
helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=BttMbH39uai4qg621W0ySAFX-UtoRLuyEi-f4bfSrFo,4461
|
|
495
494
|
helm/benchmark/scenarios/shc_sei_scenario.py,sha256=pTcb7n97VkesyRuqUqe5JGed1jDsQEd19udciDras8E,4532
|
|
496
|
-
helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=
|
|
495
|
+
helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=IPOuJ74AIWOLDVIQ5lNUjMswcU9zeB_gOXg-K9HLTO4,3703
|
|
497
496
|
helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=sjIHT5NZlHv_IcXr_15-pOiBUPKKwykyH-QpMfvrHAY,1247
|
|
498
497
|
helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
|
|
498
|
+
helm/benchmark/scenarios/situation_prompts.yaml,sha256=nJA3X_I67PIpXgd7LTekWwEr5zn1ryqIHgvqCpAwoGQ,1790
|
|
499
499
|
helm/benchmark/scenarios/spider_scenario.py,sha256=mhiV3XWGwpnIQkaHFM_rvZlrwE7nqS12-F9t1eB8kdI,3306
|
|
500
|
-
helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=
|
|
501
|
-
helm/benchmark/scenarios/summarization_scenario.py,sha256=
|
|
500
|
+
helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=ZiXGXeKelEm9NrFsHQS5ft1L4oL6a_IlAJm_flRv-Z4,5228
|
|
501
|
+
helm/benchmark/scenarios/summarization_scenario.py,sha256=wry6hAO_YXk56gS79jJ6HP6VhrRjpExvEZSsl2vM910,8883
|
|
502
502
|
helm/benchmark/scenarios/sumosum_scenario.py,sha256=HG3wrKj5alV0a2aKb_nau8bB4oKDtTOLtdf3bx8h7sw,7695
|
|
503
|
-
helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=
|
|
504
|
-
helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=
|
|
503
|
+
helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=HbCeVUzPm3miSZoIDivTcAkP-fwi6X4TnyaAx0jUumk,3737
|
|
504
|
+
helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=Ir8CVC0aD7Cy7H_ZKGMd1c0iLK-dWbkuMuUl2D7kcR4,17048
|
|
505
505
|
helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=7STCSHiHGIQ2aaN_PwDE5jXUJ-qcu8PaS4pC-pbOceE,8410
|
|
506
506
|
helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
|
|
507
507
|
helm/benchmark/scenarios/test_alghafa_scenario.py,sha256=ARQyzjmEpX_FpN2QLnIB7P-ToAeMtE4dqsolzlq8KPQ,1696
|
|
508
|
-
helm/benchmark/scenarios/
|
|
508
|
+
helm/benchmark/scenarios/test_alrage_scenario.py,sha256=9ofFc05Sy1mdfU9VgHdL_SNp8olJ4ComnZllkMU6itU,6711
|
|
509
|
+
helm/benchmark/scenarios/test_arabic_exams_scenario.py,sha256=nD221WpNE3Ddy-VOdLQGYOHiYVBAcyJxeMc__lVNRLo,985
|
|
510
|
+
helm/benchmark/scenarios/test_aratrust_scenario.py,sha256=6Ks4DA13gU4BAP46qKwPISkqIJw-RiZt4ZhyviXdrUY,918
|
|
509
511
|
helm/benchmark/scenarios/test_bigcodebench_scenario.py,sha256=q9FWJsxLJoFaB3PSMLjI_-YyPoZYusOsMPwn6X6NKXw,1304
|
|
510
|
-
helm/benchmark/scenarios/test_bluex_scenario.py,sha256=
|
|
512
|
+
helm/benchmark/scenarios/test_bluex_scenario.py,sha256=QCIqq0GRRrjb55lwLpBiEkDwSFzEZxBKbCQHvyYO_Fk,1928
|
|
511
513
|
helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
|
|
512
514
|
helm/benchmark/scenarios/test_czech_bank_qa_scenario.py,sha256=bZNLEGu58iHmutGlSp-2uVC2931TO6Rxw7giqFh9RHY,828
|
|
513
515
|
helm/benchmark/scenarios/test_enem_challenge_scenario.py,sha256=XfPkYaSwdGa63ToC_BLuVKTRSldWNBlKsZYK6CFzL3w,2000
|
|
@@ -538,18 +540,18 @@ helm/benchmark/scenarios/test_tweetsentbr_scenario.py,sha256=V6ZsT405ltgC3pYXW-F
|
|
|
538
540
|
helm/benchmark/scenarios/test_wildbench_scenario.py,sha256=pmQ87MNoGAXwAmPf0eoep5qf9hk6BPP2zzgzGuKXwzs,527
|
|
539
541
|
helm/benchmark/scenarios/test_winogrande_afr_scenario.py,sha256=LZfE4J42BZ7OF3BvfKgMWuCHpdw4-LpWnFiKyrHGXp8,910
|
|
540
542
|
helm/benchmark/scenarios/thai_exam_scenario.py,sha256=YjFsom1yiu-xBZ3SGenNuczVCwQcmyoITTMavGv-QEk,6069
|
|
541
|
-
helm/benchmark/scenarios/the_pile_scenario.py,sha256=
|
|
542
|
-
helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=
|
|
543
|
+
helm/benchmark/scenarios/the_pile_scenario.py,sha256=Dz51JxxazqPiX_fk6viOav8hQ2n6Iw0LIPhouquu6aw,5632
|
|
544
|
+
helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=0U7q8E9XB0H9oSN3OzhfsiZ-8PJrYXCCC04dffjicB8,6822
|
|
543
545
|
helm/benchmark/scenarios/tweetsentbr_scenario.py,sha256=ppugbPWd_3hHesLC52QbC-wUknctr9ZX4tmHefnPf6w,2879
|
|
544
|
-
helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=
|
|
546
|
+
helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=ydG8MvBF3v6TXHScMK0_-HPAhmPhMWh5G4foBEHDp84,2905
|
|
545
547
|
helm/benchmark/scenarios/unitxt_scenario.py,sha256=uL8Gni-Uw_eIp9xKQefp4J7XtKSttjJHzJE4USyoC2U,1930
|
|
546
548
|
helm/benchmark/scenarios/verifiability_judgment_scenario.py,sha256=2iCJplnxdR7NNKjhsLR5o51pL55Q0bcbjjWlvrk5lw4,6067
|
|
547
|
-
helm/benchmark/scenarios/vicuna_scenario.py,sha256=
|
|
548
|
-
helm/benchmark/scenarios/wikifact_scenario.py,sha256=
|
|
549
|
+
helm/benchmark/scenarios/vicuna_scenario.py,sha256=OWwbFkhgEMHd5YH2G3v2E_p22DmYmPVsDbKKhBbyTDY,2478
|
|
550
|
+
helm/benchmark/scenarios/wikifact_scenario.py,sha256=AHHZz_trtGf8HRoCnE6vukqrTD_Of9XQ1GcrqyctgR0,6702
|
|
549
551
|
helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=k13TxITriwqoBrMzf-JzPKr5wHaC9M2A_HyxxBaASnk,3111
|
|
550
|
-
helm/benchmark/scenarios/wildbench_scenario.py,sha256=
|
|
552
|
+
helm/benchmark/scenarios/wildbench_scenario.py,sha256=dWJSqF06ZWAyZhaejNmrZ0Uu4Vlh5HMdTaMLNkMfe8Q,3668
|
|
551
553
|
helm/benchmark/scenarios/winogrande_afr_scenario.py,sha256=3SOVyrQ8D7Wzz06uSbczDE-IN4sjKSEAJ7Po-_-O6qw,3131
|
|
552
|
-
helm/benchmark/scenarios/wmt_14_scenario.py,sha256=
|
|
554
|
+
helm/benchmark/scenarios/wmt_14_scenario.py,sha256=TNIYBXnbuvaOcpfmKqRZF6-yta1pTZSLA4Fd_XHhjCY,5159
|
|
553
555
|
helm/benchmark/scenarios/xstest_scenario.py,sha256=ndRNB5ApW4th5iltlmT9-Nfw9eTaVZQw5AMC4HZCI-k,1309
|
|
554
556
|
helm/benchmark/scenarios/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
555
557
|
helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py,sha256=NtTEHzmbeCicbjTRxPBUueZrBGOPwF6RVc2Yftc-VKs,5634
|
|
@@ -600,6 +602,8 @@ helm/benchmark/scenarios/image_generation/radiology_scenario.py,sha256=7JN8OYap8
|
|
|
600
602
|
helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,sha256=DoabanZhd-2MHFDZeR9EoPit0T2TvbVwZGUR0RfJyW0,2362
|
|
601
603
|
helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
|
|
602
604
|
helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
|
|
605
|
+
helm/benchmark/scenarios/medhelm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
606
|
+
helm/benchmark/scenarios/medhelm/judges.yaml,sha256=g_O6zVgOMSL4_f1yNz8muDuUUBzcsM8e5gpfe56eI4Y,663
|
|
603
607
|
helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
604
608
|
helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
|
|
605
609
|
helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
|
|
@@ -644,7 +648,7 @@ helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,s
|
|
|
644
648
|
helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
|
|
645
649
|
helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
|
|
646
650
|
helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
|
|
647
|
-
helm/benchmark/static/schema_arabic.yaml,sha256=
|
|
651
|
+
helm/benchmark/static/schema_arabic.yaml,sha256=Iui-4_M4tV45Xzs3bz0diI3UZwVAuaLAxD5uNhjurgs,8925
|
|
648
652
|
helm/benchmark/static/schema_audio.yaml,sha256=lVslZX7JmFo0ZgLU4n6amrs9DK8y43Ux0I9QyDUG-14,29119
|
|
649
653
|
helm/benchmark/static/schema_autobencher.yaml,sha256=yb-NkF5w5R2YOg7RIsadNHJ_5G7lG1gbcDVq_25luEk,5716
|
|
650
654
|
helm/benchmark/static/schema_call_center.yaml,sha256=i30aFzWqdOJRyAHN8vAzyHEX1v95DEK0TI1SMKTN4TE,9106
|
|
@@ -662,13 +666,14 @@ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0
|
|
|
662
666
|
helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
|
|
663
667
|
helm/benchmark/static/schema_legal.yaml,sha256=RpoFOuVSIowNgxlPn3UMfJC-68RFr3CGDciUGLPfVqc,28806
|
|
664
668
|
helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
|
|
665
|
-
helm/benchmark/static/schema_long_context.yaml,sha256=
|
|
666
|
-
helm/benchmark/static/schema_medhelm.yaml,sha256=
|
|
669
|
+
helm/benchmark/static/schema_long_context.yaml,sha256=p01u7yPN75ZNmJhQodCRJo4q4Zb4vBieHKYqp4fD9Jg,11520
|
|
670
|
+
helm/benchmark/static/schema_medhelm.yaml,sha256=e3vVHdEXcS-joOUMUoIoFA3x9hEE__svDoajbjfqpLE,51793
|
|
667
671
|
helm/benchmark/static/schema_melt.yaml,sha256=mmPqwDa26DVZXsRJkmKQSyD0OStvjlxaMoSPM25SpD4,47494
|
|
668
672
|
helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
|
|
669
673
|
helm/benchmark/static/schema_mmlu_winogrande_afr.yaml,sha256=YIVYf-mOFPq82UVBdMhnCWNOr4sV8Oi3-ozOszJ2tWQ,40143
|
|
670
674
|
helm/benchmark/static/schema_safety.yaml,sha256=7RfZDX4wr8Xr1BJ149ZwmplPzPkNL0-BKbEZuzUsl_0,9278
|
|
671
675
|
helm/benchmark/static/schema_seahelm.yaml,sha256=9XF9Rlr7I-g-uW6R0LNh7Xg52Xs3_058QybXEiN-hnM,28296
|
|
676
|
+
helm/benchmark/static/schema_slp.yaml,sha256=5AV2leKoSBZwP3rIfXcwiqqpXPQbyWjXKE5kU73IAt4,7122
|
|
672
677
|
helm/benchmark/static/schema_slphelm.yaml,sha256=3avOfp-ZEmVRGei3_M_WX6cSP5hQjbfHsDr1XrjayMY,5294
|
|
673
678
|
helm/benchmark/static/schema_social_audio.yaml,sha256=Nj3ORXDT4RHD52cyo1RHfueWwbhqp1qW06TaVJ2lUfE,8653
|
|
674
679
|
helm/benchmark/static/schema_sql.yaml,sha256=8rRff6p_i1CsH7oDbUjau2qRWbLGspuM1Hy-g5pOQiU,6047
|
|
@@ -680,15 +685,15 @@ helm/benchmark/static/schema_vhelm.yaml,sha256=0slYep2eepUefgtK_m4iSS785sHdJzljm
|
|
|
680
685
|
helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
|
|
681
686
|
helm/benchmark/static/schema_video.yaml,sha256=FkpI5Slc4w-ty4hns82ArXIvTdqppWDnkJSpIp74QN4,9713
|
|
682
687
|
helm/benchmark/static_build/config.js,sha256=o98g6QSly1NAfqhYWbU4lEoZB4LEpIrePZtmimiuoXc,165
|
|
683
|
-
helm/benchmark/static_build/index.html,sha256=
|
|
688
|
+
helm/benchmark/static_build/index.html,sha256=MRRycZym58h-5KW7aKyiqGxIpRB8DV5OHkND5JL5aDk,1178
|
|
684
689
|
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
|
|
685
690
|
helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
|
|
686
691
|
helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
|
|
687
692
|
helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
|
|
688
693
|
helm/benchmark/static_build/assets/helm-safety-2907a7b6.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
|
|
689
694
|
helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
|
|
690
|
-
helm/benchmark/static_build/assets/index-
|
|
691
|
-
helm/benchmark/static_build/assets/index-
|
|
695
|
+
helm/benchmark/static_build/assets/index-671a5e06.js,sha256=XEa85-IyP6ZeHfsWGoPno-Qj9pSxlnHsjLYmaqzdzqg,124954
|
|
696
|
+
helm/benchmark/static_build/assets/index-9352595e.css,sha256=k1JZXkXPFsUerOZ37oDhxjcb1ypOFEdDogJUP6H-NAQ,491553
|
|
692
697
|
helm/benchmark/static_build/assets/medhelm-overview-eac29843.png,sha256=6sKYQ79cN07-cUsnt-JPsdoVwUBWu5KxOaHWSdwjdgA,284408
|
|
693
698
|
helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
|
|
694
699
|
helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
|
|
@@ -758,8 +763,9 @@ helm/clients/mistral_client.py,sha256=ceM8KLAcniAqK1BNVdUGzqy4av2SEEau6PVmPivxc0
|
|
|
758
763
|
helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
|
|
759
764
|
helm/clients/nvidia_nim_client.py,sha256=Z1UAqR2jHacIO_QGqQl1JUZ_82JiSPstBOtj6xURmQk,902
|
|
760
765
|
helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
|
|
761
|
-
helm/clients/openai_client.py,sha256=
|
|
762
|
-
helm/clients/openai_responses_client.py,sha256=
|
|
766
|
+
helm/clients/openai_client.py,sha256=4Q4LVMqvPo-37MV_BhsMydpwmMLfo-2kftRZH9lGtZs,28538
|
|
767
|
+
helm/clients/openai_responses_client.py,sha256=FhQcOcXNZc5AuDMh1KBD3ZoRdEREy73dIeFBjUg9YDo,8444
|
|
768
|
+
helm/clients/openrouter_client.py,sha256=oK8gXBhBs1y0AriZ9tVp8kx5lSY7gUgQJv-mfywSTfI,980
|
|
763
769
|
helm/clients/palmyra_client.py,sha256=4AaZcV2tPHU4HJ9FWSkOY8_C9ndEckH3PH715QxJQ8E,7086
|
|
764
770
|
helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
|
|
765
771
|
helm/clients/reka_client.py,sha256=hA0tq3Hc9669q2sYa4Jr5yWy2NAbvoFDnVqQ6vds62w,8334
|
|
@@ -773,12 +779,13 @@ helm/clients/stanfordhealthcare_shc_openai_client.py,sha256=V7K4KZaSjIiE0FkoY4qy
|
|
|
773
779
|
helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
|
|
774
780
|
helm/clients/test_client.py,sha256=T27UsIPWsbE1JK_8DN_DW9LkEcIGRbgDjio14YOIAb0,3854
|
|
775
781
|
helm/clients/test_huggingface_client.py,sha256=8Shzrf1Pad1UsiUAdeOSqsTPQaay0CrWXmdNeIfrJ2Y,3418
|
|
782
|
+
helm/clients/test_openrouter_client.py,sha256=gCzchJMQZi4kkgtpGe1Ma0xF2nsP1uDevJcqbprZ6RE,2414
|
|
776
783
|
helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
|
|
777
784
|
helm/clients/test_together_client.py,sha256=kyBLu-2i4EJyuJm5ft0yg8W-H1IqmULRXggEbChuxdo,6178
|
|
778
|
-
helm/clients/together_client.py,sha256=
|
|
785
|
+
helm/clients/together_client.py,sha256=kEa6z54zPWlcLHCb2g2PCxLRpdJ8aE9zvG5Yzkaeun4,25518
|
|
779
786
|
helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
|
|
780
787
|
helm/clients/upstage_client.py,sha256=iSL1G8G3jWSbrpacz4I0l6Lwc5T01fsLR-wZzF39ftM,679
|
|
781
|
-
helm/clients/vertexai_client.py,sha256=
|
|
788
|
+
helm/clients/vertexai_client.py,sha256=Qm-EkbpXnwiwZzB592-FPBuSlxKIkVH7tWBFFvOBvCY,23631
|
|
782
789
|
helm/clients/vllm_client.py,sha256=xmXf35WX2oOZhpQnRxeooXGshENySOHZCUQ1E4pbQbA,2647
|
|
783
790
|
helm/clients/vllm_granite_thinking_client.py,sha256=fds2i8LUG78OJYke1uYdDy6XRFqE3rZgSornFjzu4Sk,2172
|
|
784
791
|
helm/clients/writer_client.py,sha256=flKLeMbFkyGfNmv1ozZGU4dxNy-QF5bFJF0mGHqpU3c,4467
|
|
@@ -790,6 +797,30 @@ helm/clients/audio_language/qwen2_5_omni_client.py,sha256=ftAVtOG0azvRQEcFjkSSBM
|
|
|
790
797
|
helm/clients/audio_language/qwen2_audiolm_client.py,sha256=s9eH8fnVgw5xV39b_8AGt6IyNN3q9Uhcx6HZVxt7TM8,8981
|
|
791
798
|
helm/clients/audio_language/qwen_audiolm_client.py,sha256=RvYweXANEyzhHYDx38H10F0ZEFaL8kj7n7TZ-UrRmZs,6338
|
|
792
799
|
helm/clients/audio_language/test.py,sha256=FrKpirOwJW1__E2egq4VPgsTrgiSHZHBwfUCvxNjC0o,1969
|
|
800
|
+
helm/clients/audio_language/llama_omni/arguments.py,sha256=MxzZKE8sNsOe5eUse96gejOsmu_MfTJGiuOwR87xiSA,2334
|
|
801
|
+
helm/clients/audio_language/llama_omni/constants.py,sha256=IjFS9EUI5p1DLtGcX0B1lSxESkxcx5dMbuMkMm1UaSs,183
|
|
802
|
+
helm/clients/audio_language/llama_omni/conversation.py,sha256=SgoMEf1Roi_8ZxiIM6DXwY3ozw0ExOCYdFFX-5rRA0g,6881
|
|
803
|
+
helm/clients/audio_language/llama_omni/preprocess.py,sha256=2-YA6czgO1Zr-C1ChHvqVEfYa8qHhHp6n1Lb1Uw67qg,10764
|
|
804
|
+
helm/clients/audio_language/llama_omni/utils.py,sha256=GycpuTkNEZtMNG2ZTZ7cYVjPEilyC4o2itT9K9kwJFI,7556
|
|
805
|
+
helm/clients/audio_language/llama_omni/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
806
|
+
helm/clients/audio_language/llama_omni/model/builder.py,sha256=-y7amgUyPMEMknVutSSb_W3Zsm09r3K7u08jgEMinYA,3875
|
|
807
|
+
helm/clients/audio_language/llama_omni/model/omni_speech_arch.py,sha256=-Sgo9fEGHRBfkZrR63i3-uXZ19wkqYbGLqAiDqevRr0,11465
|
|
808
|
+
helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py,sha256=CqtEURdHlk6_29iM8WZnsmd7DMrUcnULGD2U2inWIxw,8426
|
|
809
|
+
helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py,sha256=ZmtQY7JT74O4OH78UYSuBnmxq5Hi4-86kEY5-svfU-M,4564
|
|
810
|
+
helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py,sha256=TwSVGfSOA5N82pB2_P4cElN7w_4w2XHBXr9qicluM2w,389
|
|
811
|
+
helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py,sha256=LF8znt1puoExQ87ovtoyc1-pzO9kWNqTu_CvUWr3nBE,965
|
|
812
|
+
helm/clients/audio_language/llama_omni/model/speech_generator/builder.py,sha256=nIjOSYgJTrdnqDvy5jnYjMcHvpOirAyvMNLuUbnL9pY,358
|
|
813
|
+
helm/clients/audio_language/llama_omni/model/speech_generator/generation.py,sha256=Rka7iVephHHj0z0mPPQLfe-3Tt_UsWbTI7VRevs1ek4,30644
|
|
814
|
+
helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py,sha256=mllXYemRl-laMRntRsKtak8SIWEbVfWk0EpxPqs-su0,4612
|
|
815
|
+
helm/clients/audio_language/llama_omni/model/speech_projector/builder.py,sha256=rmzWg4yZIfGpYD7VhfSrRNN7t5U4xNq8TVugq0KLYWc,372
|
|
816
|
+
helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py,sha256=naunMdDZXzK8VpASZJYsY6TwvuxQn3Uw9r_MUouUG5k,950
|
|
817
|
+
helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py,sha256=oIaVRv1KlFYPqbT1nPtATgTcVomfNvtHmxnIZ2wcTC4,19088
|
|
818
|
+
helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py,sha256=s08H7EY_TzHqVk1b6DZv_gI4VVwP_ub_FwF6JJu0z-c,180552
|
|
819
|
+
helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py,sha256=n8by91xA1xTYz8BfsbYAwCL5G0x1FuLhSGDAP33Qyyw,12216
|
|
820
|
+
helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
821
|
+
helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py,sha256=ZZ5I9X_p1-ttDbYsLBxImO_CxbC5LESLI8AAIe9kKv0,365
|
|
822
|
+
helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py,sha256=VKATc5W9kl0fo9TuU19MaXYSObGxX2V2Fo1NlD4GC4I,2516
|
|
823
|
+
helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py,sha256=TFvQvPiP0X8Zt-agQR84o75LUZp0uXDZAUqUl0vhPM8,14635
|
|
793
824
|
helm/clients/clip_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
794
825
|
helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgjbZLmy9dRnBaLiWwk,695
|
|
795
826
|
helm/clients/clip_scorers/clip_scorer.py,sha256=5KzYTrGuy5zA8yHX6c67Is98HLkqQooWhioPxHNLJ7s,1932
|
|
@@ -813,7 +844,7 @@ helm/clients/image_generation/cogview2/coglm_strategy.py,sha256=P3NU3Z4jsj171PrH
|
|
|
813
844
|
helm/clients/image_generation/cogview2/coglm_utils.py,sha256=EJPOEQJInCDVi2LHqkjEUsgw6GgVlLDrIptlT9cXk-Y,2900
|
|
814
845
|
helm/clients/image_generation/cogview2/sr_pipeline/__init__.py,sha256=qWuNwKlcvGwEFcw5932wk_t0_baNwUILIJzQWJjgh2A,488
|
|
815
846
|
helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py,sha256=1DwcUw9Tb563JpKpkPNIB5Ew1djozvPiGASShffiABk,3716
|
|
816
|
-
helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=
|
|
847
|
+
helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=IUTvHpIaaYrH00CQZZX9L45JMRb-twYir99K7LLnOzQ,10819
|
|
817
848
|
helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py,sha256=OonYjdtNKJo12cNb-t-gFHLXRFxItCXjKgS9YxWAI-k,7718
|
|
818
849
|
helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py,sha256=LSvAHRupsOqk3yb4GxyTsubRxrnPOEfObFym2j4eiKc,5120
|
|
819
850
|
helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py,sha256=5D1QWyAcY0CpwITk7EBN6ylUtc7mvZaE9iHG628AqMQ,10390
|
|
@@ -837,7 +868,7 @@ helm/clients/image_generation/mindalle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
|
|
|
837
868
|
helm/clients/image_generation/mindalle/models/__init__.py,sha256=1UieFJ0LGinYSB-idy3atl-gFAmS_ouiiGX6TM2Mh-I,8372
|
|
838
869
|
helm/clients/image_generation/mindalle/models/tokenizer.py,sha256=NFFdLUhoxEkv9SZqU3QIFk0ukaCcn6w_xFWQIRGhZJ4,1190
|
|
839
870
|
helm/clients/image_generation/mindalle/models/stage1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
840
|
-
helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=
|
|
871
|
+
helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=Q-yZeB8ZIxwOdQaKpEeBVbwF9nXeQJ2xJhiD6KjqRi4,11046
|
|
841
872
|
helm/clients/image_generation/mindalle/models/stage1/vqgan.py,sha256=KcarvKoMuPBpP0H8F8W67FogdvHaAQuo9jP3rFRxc5E,4035
|
|
842
873
|
helm/clients/image_generation/mindalle/models/stage2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
843
874
|
helm/clients/image_generation/mindalle/models/stage2/layers.py,sha256=LvDADun5nMaencaRT0pm-dq78xHpPPkpi8rlu7RLHco,5306
|
|
@@ -904,16 +935,16 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
|
|
|
904
935
|
helm/common/file_caches/local_file_cache.py,sha256=NiXbat1BBGl5P27oERqSLFfhIHpYqA1IQrvE_N1sWR8,1944
|
|
905
936
|
helm/common/file_caches/test_local_file_cache.py,sha256=ANb01ctUV-J4i1ab3l4uhg9Ce54U_56xq9Hayjt1WhQ,686
|
|
906
937
|
helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
907
|
-
helm/config/model_deployments.yaml,sha256=
|
|
908
|
-
helm/config/model_metadata.yaml,sha256=
|
|
909
|
-
helm/config/tokenizer_configs.yaml,sha256=
|
|
938
|
+
helm/config/model_deployments.yaml,sha256=JGM4eLHXv3KgndTu2ZqnMH5rwvoXNvKAoTAnmfZDs7A,174425
|
|
939
|
+
helm/config/model_metadata.yaml,sha256=8W9u04RugI_L6Kj3ipGqQlWLeXAd_FQwcw-2usKm5y4,274605
|
|
940
|
+
helm/config/tokenizer_configs.yaml,sha256=KZ6nReCV6AoActBoQYfi9BH4eGYkSx4OmSa2gzWh0uo,41039
|
|
910
941
|
helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
911
942
|
helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
|
|
912
943
|
helm/proxy/cli.py,sha256=kEDoHpisFO0EJ0Wfm1FLpJdP9sXk9j8WCILEq42RKb0,8317
|
|
913
|
-
helm/proxy/example_queries.py,sha256=
|
|
944
|
+
helm/proxy/example_queries.py,sha256=A4JKvLwkHQIprsgMFhGvruW1-Ud4YKNqwUWhv6iWfzw,4449
|
|
914
945
|
helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
|
|
915
946
|
helm/proxy/retry.py,sha256=o64BZsW2vwu2iewRA18wdsru2xC3eNBQ7WUw3IjC_5g,3698
|
|
916
|
-
helm/proxy/server.py,sha256=
|
|
947
|
+
helm/proxy/server.py,sha256=PYG8oMb-lq8eGR3Kad2ZTudJxgY4QH4jVbyoOgjes7I,10904
|
|
917
948
|
helm/proxy/test_accounts.py,sha256=Vs1iOzTPN29LosDAAEs6IagQ3PccvutrJTlR1qNIcj0,1146
|
|
918
949
|
helm/proxy/test_retry.py,sha256=db0owyGTThmIMhYWU_Eh1U-AJvQ-Wa9j_kRmC9DNjOA,1059
|
|
919
950
|
helm/proxy/critique/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -933,9 +964,9 @@ helm/proxy/services/test_remote_service.py,sha256=xzkyptctXw3y5d1fgbidBMyw8B4rIL
|
|
|
933
964
|
helm/proxy/services/test_service.py,sha256=oDYen-71iwZ6YMNBVbVSdEFsH6GMvZYw5tS5Eg4YHjY,8987
|
|
934
965
|
helm/proxy/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
|
|
935
966
|
helm/proxy/static/help.html,sha256=2Rn_lGZspqrZhNfLQ4wIAvYO_BK9q67Q_AS2-3WsMpY,6231
|
|
936
|
-
helm/proxy/static/index.css,sha256=
|
|
967
|
+
helm/proxy/static/index.css,sha256=3z_JuWVuJFngWtHI4T5-EVyk4LyaCPDcSzlalvUYhmQ,754
|
|
937
968
|
helm/proxy/static/index.html,sha256=nUJf_hwBPokqrm_hDZsVfHcJrnhZLYhkVSoLdGOocf8,2009
|
|
938
|
-
helm/proxy/static/index.js,sha256
|
|
969
|
+
helm/proxy/static/index.js,sha256=bCjx29j88UnfoeYL4jRYGaqg7fd6o8IePZ0sTl-HRy8,15292
|
|
939
970
|
helm/proxy/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
|
|
940
971
|
helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
941
972
|
helm/proxy/token_counters/auto_token_counter.py,sha256=Ag368Sb-eLQUMLW7lmWc2EOKN3kgkiCTsYnHNrsf9kw,2071
|
|
@@ -967,8 +998,8 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
|
|
|
967
998
|
helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=1ZcPL3srfk031LmA8bEdPcIraAPnHGiYi_CqTiJSTlc,904
|
|
968
999
|
helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
|
|
969
1000
|
helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
|
|
970
|
-
crfm_helm-0.5.
|
|
971
|
-
crfm_helm-0.5.
|
|
972
|
-
crfm_helm-0.5.
|
|
973
|
-
crfm_helm-0.5.
|
|
974
|
-
crfm_helm-0.5.
|
|
1001
|
+
crfm_helm-0.5.8.dist-info/METADATA,sha256=UCr1ojkpYEsbV8_KfuhviO1vhPRs0fnfz7ADVaqa32E,18414
|
|
1002
|
+
crfm_helm-0.5.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1003
|
+
crfm_helm-0.5.8.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
|
|
1004
|
+
crfm_helm-0.5.8.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
|
|
1005
|
+
crfm_helm-0.5.8.dist-info/RECORD,,
|