crfm-helm 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/METADATA +7 -77
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/RECORD +315 -282
- helm/benchmark/adaptation/adapter_spec.py +10 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +11 -3
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +11 -8
- helm/benchmark/annotation/aci_bench_annotator.py +11 -22
- helm/benchmark/annotation/alrage_annotator.py +90 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +10 -21
- helm/benchmark/annotation/dischargeme_annotator.py +11 -22
- helm/benchmark/annotation/med_dialog_annotator.py +11 -22
- helm/benchmark/annotation/medalign_annotator.py +11 -22
- helm/benchmark/annotation/medi_qa_annotator.py +11 -22
- helm/benchmark/annotation/medication_qa_annotator.py +11 -22
- helm/benchmark/annotation/mental_health_annotator.py +11 -22
- helm/benchmark/annotation/mimic_bhc_annotator.py +11 -22
- helm/benchmark/annotation/mimic_rrs_annotator.py +11 -22
- helm/benchmark/annotation/model_as_judge.py +23 -18
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +11 -22
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +11 -22
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +11 -22
- helm/benchmark/metrics/air_bench_metrics.py +3157 -1
- helm/benchmark/metrics/alrage_metric.py +35 -0
- helm/benchmark/metrics/basic_metrics.py +267 -2
- helm/benchmark/metrics/bbq_metrics.py +12 -0
- helm/benchmark/metrics/classification_metrics.py +19 -1
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +12 -1
- helm/benchmark/metrics/dry_run_metrics.py +30 -1
- helm/benchmark/metrics/efficiency_metrics.py +74 -0
- helm/benchmark/metrics/ehr_sql_metrics.py +57 -1
- helm/benchmark/metrics/evaluate_reference_metrics.py +311 -0
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +13 -1
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +13 -1
- helm/benchmark/metrics/ifeval_metrics.py +13 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +41 -1
- helm/benchmark/metrics/kpi_edgar_metrics.py +21 -0
- helm/benchmark/metrics/language_modeling_metrics.py +13 -1
- helm/benchmark/metrics/live_qa_metrics.py +13 -1
- helm/benchmark/metrics/llm_jury_metrics.py +13 -1
- helm/benchmark/metrics/medcalc_bench_metrics.py +14 -1
- helm/benchmark/metrics/medec_metrics.py +25 -2
- helm/benchmark/metrics/metric.py +25 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +32 -1
- helm/benchmark/metrics/omni_math_metrics.py +13 -1
- helm/benchmark/metrics/safety_metrics.py +13 -1
- helm/benchmark/metrics/seahelm_metrics.py +14 -1
- helm/benchmark/metrics/summac/model_summac.py +2 -2
- helm/benchmark/metrics/summarization_metrics.py +129 -1
- helm/benchmark/metrics/toxicity_metrics.py +31 -1
- helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
- helm/benchmark/metrics/wildbench_metrics.py +21 -1
- helm/benchmark/presentation/run_display.py +13 -3
- helm/benchmark/presentation/run_entry.py +2 -2
- helm/benchmark/presentation/schema.py +5 -22
- helm/benchmark/presentation/summarize.py +180 -11
- helm/benchmark/presentation/taxonomy_info.py +20 -0
- helm/benchmark/run.py +1 -1
- helm/benchmark/run_expander.py +4 -0
- helm/benchmark/run_specs/arabic_run_specs.py +140 -16
- helm/benchmark/run_specs/bluex_run_specs.py +1 -1
- helm/benchmark/run_specs/classic_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +2 -2
- helm/benchmark/run_specs/medhelm/__init__.py +0 -0
- helm/benchmark/run_specs/medhelm/benchmark_config.py +219 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +362 -52
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +6 -2
- helm/benchmark/scenarios/aci_bench_scenario.py +23 -0
- helm/benchmark/scenarios/air_bench_scenario.py +21 -0
- helm/benchmark/scenarios/alrage_scenario.py +54 -0
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +23 -1
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
- helm/benchmark/scenarios/arabic_exams_scenario.py +114 -0
- helm/benchmark/scenarios/arabic_mmlu_scenario.py +8 -4
- helm/benchmark/scenarios/aratrust_scenario.py +19 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +24 -54
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +19 -48
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -61
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -29
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -60
- helm/benchmark/scenarios/babi_qa_scenario.py +15 -0
- helm/benchmark/scenarios/banking77_scenario.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +15 -0
- helm/benchmark/scenarios/best_chatgpt_prompts.yaml +473 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
- helm/benchmark/scenarios/bluex_scenario.py +6 -2
- helm/benchmark/scenarios/bold_scenario.py +15 -0
- helm/benchmark/scenarios/boolq_scenario.py +20 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +23 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +13 -0
- helm/benchmark/scenarios/clear_scenario.py +23 -0
- helm/benchmark/scenarios/cleva_scenario.py +479 -0
- helm/benchmark/scenarios/code_scenario.py +28 -0
- helm/benchmark/scenarios/commonsense_scenario.py +32 -0
- helm/benchmark/scenarios/compositional_instructions.yaml +70 -0
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +21 -0
- helm/benchmark/scenarios/copyright_scenario.py +35 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +21 -0
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +23 -1
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +21 -1
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +13 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +13 -1
- helm/benchmark/scenarios/dischargeme_scenario.py +24 -0
- helm/benchmark/scenarios/disinformation_scenario.py +22 -0
- helm/benchmark/scenarios/dyck_language_scenario.py +15 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +22 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +19 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +14 -0
- helm/benchmark/scenarios/entity_matching_scenario.py +14 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
- helm/benchmark/scenarios/financebench_scenario.py +21 -0
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +21 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +21 -0
- helm/benchmark/scenarios/gpqa_scenario.py +18 -0
- helm/benchmark/scenarios/grammar_scenario.py +20 -1
- helm/benchmark/scenarios/gsm_scenario.py +21 -0
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
- helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
- helm/benchmark/scenarios/headqa_scenario.py +22 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +13 -0
- helm/benchmark/scenarios/ice_scenario.py +21 -1
- helm/benchmark/scenarios/ifeval_scenario.py +18 -0
- helm/benchmark/scenarios/imdb_scenario.py +15 -0
- helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +21 -0
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
- helm/benchmark/scenarios/koala_scenario.py +21 -1
- helm/benchmark/scenarios/kpi_edgar_scenario.py +21 -0
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +20 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +50 -0
- helm/benchmark/scenarios/legal_support_scenario.py +13 -0
- helm/benchmark/scenarios/legalbench_scenario.py +19 -0
- helm/benchmark/scenarios/lex_glue_scenario.py +11 -0
- helm/benchmark/scenarios/lextreme_scenario.py +11 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +14 -0
- helm/benchmark/scenarios/madinah_qa_scenario.py +73 -0
- helm/benchmark/scenarios/math_scenario.py +33 -0
- helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py +68 -0
- helm/benchmark/scenarios/med_dialog_scenario.py +32 -1
- helm/benchmark/scenarios/med_mcqa_scenario.py +14 -0
- helm/benchmark/scenarios/med_qa_scenario.py +20 -0
- helm/benchmark/scenarios/medalign_scenario.py +23 -0
- helm/benchmark/scenarios/medbullets_scenario.py +22 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +22 -0
- helm/benchmark/scenarios/medec_scenario.py +23 -0
- helm/benchmark/scenarios/medhallu_scenario.py +23 -0
- helm/benchmark/scenarios/medhelm/__init__.py +0 -0
- helm/benchmark/scenarios/medhelm/judges.yaml +14 -0
- helm/benchmark/scenarios/medhelm_configurable_scenario.py +101 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +24 -1
- helm/benchmark/scenarios/medication_qa_scenario.py +31 -1
- helm/benchmark/scenarios/mental_health_scenario.py +23 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +24 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +23 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +22 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +18 -0
- helm/benchmark/scenarios/mmlu_scenario.py +21 -0
- helm/benchmark/scenarios/msmarco_scenario.py +30 -0
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +22 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +22 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +20 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +19 -0
- helm/benchmark/scenarios/natural_qa_scenario.py +32 -0
- helm/benchmark/scenarios/omni_math_scenario.py +18 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +22 -0
- helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
- helm/benchmark/scenarios/pubmed_qa_scenario.py +22 -0
- helm/benchmark/scenarios/quac_scenario.py +14 -0
- helm/benchmark/scenarios/race_based_med_scenario.py +23 -0
- helm/benchmark/scenarios/raft_scenario.py +15 -0
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +14 -1
- helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
- helm/benchmark/scenarios/scenario.py +31 -0
- helm/benchmark/scenarios/seahelm_scenario.py +348 -0
- helm/benchmark/scenarios/self_instruct_scenario.py +29 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +22 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +20 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +23 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +21 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +20 -0
- helm/benchmark/scenarios/shc_privacy_scenario.py +22 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +22 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +23 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +21 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
- helm/benchmark/scenarios/situation_prompts.yaml +49 -0
- helm/benchmark/scenarios/spider_scenario.py +18 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +22 -0
- helm/benchmark/scenarios/summarization_scenario.py +37 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +22 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +13 -0
- helm/benchmark/scenarios/test_alrage_scenario.py +23 -0
- helm/benchmark/scenarios/test_arabic_exams_scenario.py +21 -0
- helm/benchmark/scenarios/test_aratrust_scenario.py +1 -1
- helm/benchmark/scenarios/test_bluex_scenario.py +2 -2
- helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
- helm/benchmark/scenarios/the_pile_scenario.py +13 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +14 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +20 -1
- helm/benchmark/scenarios/vicuna_scenario.py +21 -1
- helm/benchmark/scenarios/wikifact_scenario.py +20 -0
- helm/benchmark/scenarios/wildbench_scenario.py +18 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +19 -0
- helm/benchmark/static/schema_arabic.yaml +55 -12
- helm/benchmark/static/schema_long_context.yaml +11 -30
- helm/benchmark/static/schema_medhelm.yaml +36 -0
- helm/benchmark/static/schema_slp.yaml +219 -0
- helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
- helm/benchmark/static_build/assets/index-oIeiQW2g.css +1 -0
- helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
- helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
- helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
- helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
- helm/benchmark/static_build/index.html +5 -6
- helm/clients/ai21_client.py +2 -0
- helm/clients/aleph_alpha_client.py +2 -0
- helm/clients/anthropic_client.py +7 -1
- helm/clients/audio_language/diva_llama_client.py +2 -0
- helm/clients/audio_language/llama_omni/arguments.py +61 -0
- helm/clients/audio_language/llama_omni/constants.py +9 -0
- helm/clients/audio_language/llama_omni/conversation.py +213 -0
- helm/clients/audio_language/llama_omni/model/__init__.py +0 -0
- helm/clients/audio_language/llama_omni/model/builder.py +88 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py +190 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py +118 -0
- helm/clients/audio_language/llama_omni/model/omni_speech_arch.py +249 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py +27 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/generation.py +622 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py +104 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py +27 -0
- helm/clients/audio_language/llama_omni/preprocess.py +295 -0
- helm/clients/audio_language/llama_omni/utils.py +202 -0
- helm/clients/audio_language/llama_omni_client.py +2 -1
- helm/clients/audio_language/qwen2_5_omni_client.py +2 -1
- helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py +519 -0
- helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py +4308 -0
- helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py +270 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py +0 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py +8 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py +56 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py +380 -0
- helm/clients/bedrock_client.py +2 -0
- helm/clients/cohere_client.py +3 -0
- helm/clients/google_client.py +2 -0
- helm/clients/http_model_client.py +2 -0
- helm/clients/huggingface_client.py +2 -1
- helm/clients/ibm_client.py +3 -1
- helm/clients/image_generation/adobe_vision_client.py +2 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +1 -1
- helm/clients/image_generation/cogview2_client.py +2 -1
- helm/clients/image_generation/dalle2_client.py +2 -0
- helm/clients/image_generation/dalle_mini_client.py +2 -1
- helm/clients/image_generation/deep_floyd_client.py +2 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
- helm/clients/image_generation/lexica_client.py +2 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +2 -2
- helm/clients/image_generation/mindalle_client.py +2 -1
- helm/clients/image_generation/together_image_generation_client.py +2 -0
- helm/clients/megatron_client.py +2 -0
- helm/clients/mistral_client.py +2 -0
- helm/clients/moderation_api_client.py +2 -0
- helm/clients/openai_client.py +36 -20
- helm/clients/openai_responses_client.py +27 -3
- helm/clients/openrouter_client.py +31 -0
- helm/clients/palmyra_client.py +2 -1
- helm/clients/reka_client.py +2 -1
- helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
- helm/clients/stanfordhealthcare_http_model_client.py +2 -0
- helm/clients/test_openrouter_client.py +69 -0
- helm/clients/together_client.py +52 -11
- helm/clients/vertexai_client.py +12 -2
- helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
- helm/clients/vision_language/huggingface_vlm_client.py +2 -0
- helm/clients/vision_language/idefics_client.py +2 -1
- helm/clients/vision_language/open_flamingo_client.py +2 -1
- helm/clients/vision_language/paligemma_client.py +2 -1
- helm/clients/vision_language/palmyra_vision_client.py +2 -0
- helm/clients/vision_language/qwen2_vlm_client.py +2 -1
- helm/clients/vision_language/qwen_vlm_client.py +2 -1
- helm/clients/writer_client.py +2 -0
- helm/common/hierarchical_logger.py +20 -0
- helm/common/optional_dependencies.py +1 -1
- helm/common/test_general.py +4 -0
- helm/config/model_deployments.yaml +300 -1
- helm/config/model_metadata.yaml +302 -9
- helm/config/tokenizer_configs.yaml +92 -4
- helm/proxy/example_queries.py +8 -8
- helm/proxy/server.py +2 -1
- helm/proxy/static/index.css +4 -0
- helm/proxy/static/index.js +7 -1
- helm/benchmark/metrics/aci_bench_metrics.py +0 -14
- helm/benchmark/metrics/chw_care_plan_metrics.py +0 -14
- helm/benchmark/metrics/dischargeme_metrics.py +0 -14
- helm/benchmark/metrics/med_dialog_metrics.py +0 -14
- helm/benchmark/metrics/medalign_metrics.py +0 -14
- helm/benchmark/metrics/medi_qa_metrics.py +0 -14
- helm/benchmark/metrics/medication_qa_metrics.py +0 -14
- helm/benchmark/metrics/mental_health_metrics.py +0 -14
- helm/benchmark/metrics/mimic_bhc_metrics.py +0 -14
- helm/benchmark/metrics/mimic_rrs_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +0 -14
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +0 -14
- helm/benchmark/static_build/assets/index-b9779128.css +0 -1
- helm/benchmark/static_build/assets/index-e439d5e1.js +0 -10
- helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
- helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
- helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/WHEEL +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/licenses/LICENSE +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/top_level.txt +0 -0
- /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
- /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
- /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
- /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
- /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
- /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
crfm_helm-0.5.
|
|
1
|
+
crfm_helm-0.5.9.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
|
|
2
2
|
helm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
helm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
helm/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -12,8 +12,8 @@ helm/benchmark/model_metadata_registry.py,sha256=7XisV0an_edM8hvP8LSoCnTeUN2QLJr
|
|
|
12
12
|
helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
|
|
13
13
|
helm/benchmark/reeval_run.py,sha256=vImL8JNhveEOftZbRQ6JAxF0L-XCKIwh65M6fIYo4RU,7198
|
|
14
14
|
helm/benchmark/reeval_runner.py,sha256=bJPl7XVOVwK2fUA7voOVQYwVFEOfKVnrT2tbSGQzQY8,15584
|
|
15
|
-
helm/benchmark/run.py,sha256=
|
|
16
|
-
helm/benchmark/run_expander.py,sha256=
|
|
15
|
+
helm/benchmark/run.py,sha256=n6FYrAOkdCLVzPK-HqNx0MWkdqazviUVqMBAVcpSUUk,14004
|
|
16
|
+
helm/benchmark/run_expander.py,sha256=IMPhg16Yd3diaFRLGYcLCXGO4L_B2WXW69oZP0fx6lE,56857
|
|
17
17
|
helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
|
|
18
18
|
helm/benchmark/run_spec_factory.py,sha256=Hxeft3fXoWNz9yGo-2nIfb5pd3GDWlwYWc6YYvAkTjM,7785
|
|
19
19
|
helm/benchmark/runner.py,sha256=O-91eRRrNgE4_tlCVeLq9_0QsRfNELvaQT-KWtJw894,14618
|
|
@@ -25,7 +25,7 @@ helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5
|
|
|
25
25
|
helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
|
|
26
26
|
helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
|
|
27
27
|
helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
helm/benchmark/adaptation/adapter_spec.py,sha256=
|
|
28
|
+
helm/benchmark/adaptation/adapter_spec.py,sha256=ONFbOdQiVbffP48_VonC3QgkwtJVObfG7j4wsCNGiJQ,6407
|
|
29
29
|
helm/benchmark/adaptation/common_adapter_specs.py,sha256=V8aYhQYuwohzwW0T_IU_ymGlxEwARKIiChLvwLKt-ew,12553
|
|
30
30
|
helm/benchmark/adaptation/prompt.py,sha256=vPCFeKVUwpbnTe0IbphkyAKFkkM0YnEONfvjcb8Hj50,2158
|
|
31
31
|
helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
|
|
@@ -40,7 +40,7 @@ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=LI7uWpKIHvTUjGiy
|
|
|
40
40
|
helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=8LepCkI5b0MOL70pRPGb7vEH0KFMxIlpCQIVIzQT_vE,15030
|
|
41
41
|
helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=u_GFEgg5wmpate-s5U5aMsmcHuFmreJcA8J0TO1kPCc,14907
|
|
42
42
|
helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=-fY4mvzoGCCoR0HesT_xf2U2m2arVjgDuj59lm07_tg,1923
|
|
43
|
-
helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=
|
|
43
|
+
helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=nOCuX9lFKb3BHpznhTwpNCO0YsZBNhcMYuFnsLT_u-s,4579
|
|
44
44
|
helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py,sha256=RV6B3i5juBbJCtPDWzSfma49YXeDq3vQAQ5xQwnH-cA,3282
|
|
45
45
|
helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=hhH9ehK092j1WdUwrKYSy5PvNJ73gsIu6-5W8aLoYVI,2190
|
|
46
46
|
helm/benchmark/adaptation/adapters/test_adapter.py,sha256=7Nr6kMK3JN0UjMjjZ6P1fsD5xhOeaqh0D1xI6LFKCos,641
|
|
@@ -51,12 +51,13 @@ helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_T
|
|
|
51
51
|
helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=MvE7YdIt8Y0nefXLskY9gPmXp7QWi2b8cqg8fxUpzbM,1980
|
|
52
52
|
helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=KXP9MzDdmUao3uVjPgZYKjZQ_LvGHgZvI-86o3E87xA,6404
|
|
53
53
|
helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py,sha256=jyL61UxBsIr68hUz-jtjBUnyB2HBp5ESNyECGp_Gf6Q,2129
|
|
54
|
-
helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=
|
|
54
|
+
helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=GP2Fg1kW0-5jCkjgzVkhuN7YBQFyFgQpPTfpSgfbAvk,5178
|
|
55
55
|
helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py,sha256=mjjyn9p31V-yt6S8BX7SvqvkQ56D9cKSff6d-daM6HM,10250
|
|
56
56
|
helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py,sha256=6nuz0Vn89A1mOedutsiq2SwTOG3qn8dUZTiaXhKffiw,3587
|
|
57
57
|
helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
helm/benchmark/annotation/aci_bench_annotator.py,sha256=
|
|
58
|
+
helm/benchmark/annotation/aci_bench_annotator.py,sha256=aAzXqbjj_3bv0-ATCrFu4JvrsqORE5lkYpgxtXAEGSA,2777
|
|
59
59
|
helm/benchmark/annotation/air_bench_annotator.py,sha256=CDyHVwD4eoymfLduJC5WvvhDX1DOgYBqgjvqBjoCfU8,3501
|
|
60
|
+
helm/benchmark/annotation/alrage_annotator.py,sha256=3DcHbD8WXTg5PN3feipHTsFls0v5owMyb_rqpNWokls,3531
|
|
60
61
|
helm/benchmark/annotation/annotator.py,sha256=__BkMVpAEpSs1pbwPK5sVWLdCAXnjsHcPYgmOqmNPu0,1843
|
|
61
62
|
helm/benchmark/annotation/annotator_factory.py,sha256=8uo5uz1UpIVCHUd7CRvmy6b9XB1gspdHmgxH5UZMPVI,2335
|
|
62
63
|
helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=4hob15m2k9e2A97E0aG9FstCbJ_oMM7-9y-nh2EaYqc,2395
|
|
@@ -65,28 +66,28 @@ helm/benchmark/annotation/autobencher_safety_annotator.py,sha256=w_xjZmY1zuLjVvV
|
|
|
65
66
|
helm/benchmark/annotation/bigcodebench_annotator.py,sha256=CJG2pn1DeHJCp3yHETRquNIkCHfd6ZNuOiUjG1cQ_JY,4448
|
|
66
67
|
helm/benchmark/annotation/bird_sql_annotator.py,sha256=FQDZs1-O1jfJOET0eDeU7lf5xLaiMPohC5BdmQ4XkzI,2436
|
|
67
68
|
helm/benchmark/annotation/call_center_annotator.py,sha256=pTEjwfA4tgZhroFbamoQ8IO_D1O9r6k5GIlD50JEg5c,11601
|
|
68
|
-
helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=
|
|
69
|
+
helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=R6Hexh20T6WBBRBhwLhQv_IQvW7Z55Pf9IYBCWxUTaQ,2517
|
|
69
70
|
helm/benchmark/annotation/czech_bank_qa_annotator.py,sha256=YIH5g4zHe3BQF2Y-6uRVw7g9u_SPBncqBobdvZdIzyA,3096
|
|
70
|
-
helm/benchmark/annotation/dischargeme_annotator.py,sha256=
|
|
71
|
+
helm/benchmark/annotation/dischargeme_annotator.py,sha256=blP76BgwmbHDDDRdaaGwtTHfukCvXXLN72vjGj_LI_U,3225
|
|
71
72
|
helm/benchmark/annotation/ehr_sql_annotator.py,sha256=Izpq0biZ9lkJOPk6NwTuv2wk8Bg88vj56BKZrY8XhT4,4021
|
|
72
73
|
helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
|
|
73
74
|
helm/benchmark/annotation/harm_bench_annotator.py,sha256=zhkWnV3qZgY-nvHgQRHGrrCMC7605JwFHesY7UC3ZnQ,2293
|
|
74
75
|
helm/benchmark/annotation/helpdesk_call_summarization_annotator.py,sha256=I7TjpN502Sa-Z4uUKemJXSAdOiVA3MMO92YIAAXeDBg,6034
|
|
75
76
|
helm/benchmark/annotation/live_qa_annotator.py,sha256=PSff59mU_t3ypmptYsYRKU3m1vMLF0dMyUySIOxBrPw,3553
|
|
76
|
-
helm/benchmark/annotation/med_dialog_annotator.py,sha256=
|
|
77
|
-
helm/benchmark/annotation/medalign_annotator.py,sha256=
|
|
78
|
-
helm/benchmark/annotation/medi_qa_annotator.py,sha256=
|
|
79
|
-
helm/benchmark/annotation/medication_qa_annotator.py,sha256=
|
|
80
|
-
helm/benchmark/annotation/mental_health_annotator.py,sha256=
|
|
81
|
-
helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=
|
|
82
|
-
helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=
|
|
83
|
-
helm/benchmark/annotation/model_as_judge.py,sha256=
|
|
84
|
-
helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=
|
|
85
|
-
helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=
|
|
77
|
+
helm/benchmark/annotation/med_dialog_annotator.py,sha256=uGp8d74WGgOOiexpoKj5CMdr5jOvAnfe-ZLKGSHT6ng,2711
|
|
78
|
+
helm/benchmark/annotation/medalign_annotator.py,sha256=glAPpVdIfebm39GhrBY3BE2hdofVBIBXUxPU3_qqZOw,2789
|
|
79
|
+
helm/benchmark/annotation/medi_qa_annotator.py,sha256=bLXxXe-obPvud15sPrqp9i-wSq1QqguCPt_UJaXRz_I,2623
|
|
80
|
+
helm/benchmark/annotation/medication_qa_annotator.py,sha256=98XU2VVSoQ8XlAkuVKWnNBOS76X_lIviq_A-nyrlqcw,2639
|
|
81
|
+
helm/benchmark/annotation/mental_health_annotator.py,sha256=08b_XqgfSpIhutDUaaSgVRdiZB6metAQQ_WHF8U2-c0,2824
|
|
82
|
+
helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=a9AHMFY2shV4I2qVUfKnOvZFbmQjL5vPKsbytTBfU0A,2723
|
|
83
|
+
helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=eu9rZhRAXVbo0j7BP7vuAKwGkuwhTCvVRvJ4dPbcR4I,2753
|
|
84
|
+
helm/benchmark/annotation/model_as_judge.py,sha256=eZZlyCrW6U9a8bHhaPrbV1AJ23q3uP0ho1NbVErGBXs,12160
|
|
85
|
+
helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=ZgJVtNpab3BrMs0ZXFW6L0CNp1Hcqfgv7FHP4rpxFPg,2750
|
|
86
|
+
helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=VtHiEGFZLUsd3zkgnSoti5itZnDPgERMPZlORkEp7ok,2865
|
|
86
87
|
helm/benchmark/annotation/omni_math_annotator.py,sha256=PvZZb1oGw60qT-oHRIs93AZbh5wTbpsmD8BforudFhA,6144
|
|
87
88
|
helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=if4S8MaENr1HZ42ZsOjDPXZ-kJ0p4l4B2j9m994RuxQ,2140
|
|
88
89
|
helm/benchmark/annotation/spider_annotator.py,sha256=B48ylGg5J7xuTSUio7VztdXk3lI6ilMqrUvAD-ve0sE,621
|
|
89
|
-
helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=
|
|
90
|
+
helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=Te9rQhcUV-T2I4oBCBzInAZW65EV3lv0LXLPgGzLd8c,2735
|
|
90
91
|
helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
|
|
91
92
|
helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
|
|
92
93
|
helm/benchmark/annotation/wildbench_annotator.py,sha256=OXR59zdKw9W7v3Q_sFnt1cEPN3nOzQDVqSbh4jDbEUs,5457
|
|
@@ -126,17 +127,16 @@ helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8
|
|
|
126
127
|
helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
|
|
127
128
|
helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
|
|
128
129
|
helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
129
|
-
helm/benchmark/metrics/
|
|
130
|
-
helm/benchmark/metrics/
|
|
130
|
+
helm/benchmark/metrics/air_bench_metrics.py,sha256=WvfjjHLSE567Y7BC8tGlMINBwP-d1URRUZcMUF1yf1g,171277
|
|
131
|
+
helm/benchmark/metrics/alrage_metric.py,sha256=4QHtL00aEIRYQx2QkDs5uldu7ZAkbFYMALH6DL9LSJg,1233
|
|
131
132
|
helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
|
|
132
|
-
helm/benchmark/metrics/basic_metrics.py,sha256=
|
|
133
|
-
helm/benchmark/metrics/bbq_metrics.py,sha256=
|
|
133
|
+
helm/benchmark/metrics/basic_metrics.py,sha256=3y1M0mFJL8FlkMkQWWs4ZV2NiriaMGydddbeY3F-vXk,30547
|
|
134
|
+
helm/benchmark/metrics/bbq_metrics.py,sha256=oHd4U6Q5sv2h0UtVnAJ2_cf32XiISWaDvc-2y0fU-gk,6574
|
|
134
135
|
helm/benchmark/metrics/bias_metrics.py,sha256=8qcInRJwQsuCI-lMC1umd-ZZaYvorUPrMjnuC6vSeb4,11602
|
|
135
136
|
helm/benchmark/metrics/bias_word_lists.py,sha256=eyk6we2J4SW8ZaZxQUWLB7Yapn92uM5TCekhFB5vg-U,13908
|
|
136
137
|
helm/benchmark/metrics/bigcodebench_metrics.py,sha256=JcPZrSiHR-kxT-MFM8zXqOs6wTC5Hus3TbxuHFQVZow,860
|
|
137
138
|
helm/benchmark/metrics/bird_sql_metrics.py,sha256=ooCuXW5nPpRs_-4seCONQmn25DzTbcUgGXznXTK9y0Y,1153
|
|
138
|
-
helm/benchmark/metrics/
|
|
139
|
-
helm/benchmark/metrics/classification_metrics.py,sha256=1Xa_bO4PqIAV2iZitE69kc4VKS4A7PloG5ElZAgvmh8,8851
|
|
139
|
+
helm/benchmark/metrics/classification_metrics.py,sha256=CfkyMiiWo74VbIB7eEhNxIcPbGA_imbzETrAExqn5WM,9498
|
|
140
140
|
helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIefAoi_edUwKpp-XxYbmeU,2740
|
|
141
141
|
helm/benchmark/metrics/cleva_harms_metrics.py,sha256=xVubv2pG3iinVs3namoVHWAmV9oUPywZwFB_0JGhP_w,11277
|
|
142
142
|
helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
|
|
@@ -149,57 +149,47 @@ helm/benchmark/metrics/codeinsights_edge_case_metrics.py,sha256=B7EEELwwH67Vxmgr
|
|
|
149
149
|
helm/benchmark/metrics/codeinsights_metric_specs.py,sha256=BkKWII9yTkChdZVsGeeeCbiWQDYvvcAKo0nxi_RTTUk,1798
|
|
150
150
|
helm/benchmark/metrics/comet_metric.py,sha256=EJWZ9x8CGeDDQlfxYrY-np_NVJBt5gun0XLJvtpjXVI,4798
|
|
151
151
|
helm/benchmark/metrics/common_metric_specs.py,sha256=JKqmO4ovBdfOYKC-00OSzOMv--g9NTCVfUHLaz-1Uns,6025
|
|
152
|
-
helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=
|
|
152
|
+
helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=F2bfg8XbjH3WOQ0O_c5S7UUxgpzu7AD5wRtNdNcJlUs,2997
|
|
153
153
|
helm/benchmark/metrics/copyright_metrics.py,sha256=RYOWKFN97UCD2Vj51gzKGbnnY9wAq6KJgiRt2cecVfs,7824
|
|
154
154
|
helm/benchmark/metrics/czech_bank_qa_metrics.py,sha256=bKoooK2T5v_fFKNbUnsuW6Mv9muAirJD5lTrzuHfpz8,1113
|
|
155
155
|
helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=x66XP0iQGk4ThT7ddmrlLCA0XF4arRbQMDT42LHf2kE,3297
|
|
156
156
|
helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py,sha256=TxTkkWdx6d6ym0MirZTiucl_TWFdn4uJLnlTfLjQvgk,2925
|
|
157
157
|
helm/benchmark/metrics/decodingtrust_privacy_metrics.py,sha256=OU7lka-hm6PubR5Gjj4uNyrqhjlfhe0mmjBCAz9vlRs,3456
|
|
158
158
|
helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py,sha256=bW4zafRyKFa__8fGrdiTPUu848ovNnvakLCfqcMrcHk,6461
|
|
159
|
-
helm/benchmark/metrics/dischargeme_metrics.py,sha256=D8LI52E17hNSPDpEvb2tw1za4QWDE3p9xgx7Nm9l7_Y,454
|
|
160
159
|
helm/benchmark/metrics/disinformation_metrics.py,sha256=5n8wgRBb6FaDjqe1nR3Cj9aS48esmMsIUq4KpBHoQoU,7870
|
|
161
|
-
helm/benchmark/metrics/dry_run_metrics.py,sha256=
|
|
162
|
-
helm/benchmark/metrics/efficiency_metrics.py,sha256=
|
|
163
|
-
helm/benchmark/metrics/ehr_sql_metrics.py,sha256=
|
|
160
|
+
helm/benchmark/metrics/dry_run_metrics.py,sha256=ouS6_8lESuCGSQgegN4xKKyoGr7Rb1K-dufHPT1fDwc,4886
|
|
161
|
+
helm/benchmark/metrics/efficiency_metrics.py,sha256=VnM5PgxxK6UKk9MzPprnN_7d-t6xVlIgFMQYrFh8dwY,15262
|
|
162
|
+
helm/benchmark/metrics/ehr_sql_metrics.py,sha256=yyz-2tsk4Fu6D5ELp3cbLaAWGjqtDGrUdvFvgHvxevg,7418
|
|
164
163
|
helm/benchmark/metrics/evaluate_instances_metric.py,sha256=LGk1Dv_76Ak0YUlWKFTsOLEFiBSmcGVhNrbj_4zg9g4,2913
|
|
165
|
-
helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=
|
|
164
|
+
helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=kVYKCFX6LiG8ucA12Ib3RAkDd1kFaSONEtvgIatvIrE,31884
|
|
166
165
|
helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
|
|
167
166
|
helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
|
|
168
|
-
helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=
|
|
167
|
+
helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=Lkil9DRtO3NS3zr5Ef_qqGxZBL-ObCNpbKoJvMhCrb8,4762
|
|
169
168
|
helm/benchmark/metrics/gpt4_audio_critique_metrics.py,sha256=L9tGFwvl1-Ew3MdInQ7KPa8OlI5YexIB2KuCYVYsuPY,7023
|
|
170
169
|
helm/benchmark/metrics/gpt4_audio_refusal_metrics.py,sha256=vYPRJq-4uNhUWUWMrDkpHmfIBkhEyAgaMNEI6RKPP80,5896
|
|
171
170
|
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py,sha256=1m7IWy9vu66svnmdBRjZQI-2YsGYzH2vXZMptlRGM0Y,5654
|
|
172
|
-
helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=
|
|
173
|
-
helm/benchmark/metrics/ifeval_metrics.py,sha256=
|
|
174
|
-
helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=
|
|
175
|
-
helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=
|
|
176
|
-
helm/benchmark/metrics/language_modeling_metrics.py,sha256=
|
|
177
|
-
helm/benchmark/metrics/live_qa_metrics.py,sha256=
|
|
178
|
-
helm/benchmark/metrics/llm_jury_metrics.py,sha256
|
|
171
|
+
helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=5Z43F9ZI9OHBxeZENBGSE4fB1YTo1NKOquPt_Sw-F5s,1835
|
|
172
|
+
helm/benchmark/metrics/ifeval_metrics.py,sha256=33IqTVdYlX9ZI6sR-FfFAKbVJ9tAGDNqZpLHS5yInio,3036
|
|
173
|
+
helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=AK_ZpayimVZ9MxX8CJG-K1uPKo2j1dNJ_H9uSz1CWiY,11612
|
|
174
|
+
helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=rnvVlvFgWwaavaIu9n8iVlODhkk2g3liOiK7kwfGbN8,5474
|
|
175
|
+
helm/benchmark/metrics/language_modeling_metrics.py,sha256=NK8vYLFyFAidDG8UXVkP242zbg_6W6EZ4xZPNbokGlw,5001
|
|
176
|
+
helm/benchmark/metrics/live_qa_metrics.py,sha256=YGodrQ-b9ucQTK3ICKXRla5r26RR0wxC4iPOTcYrV1k,1195
|
|
177
|
+
helm/benchmark/metrics/llm_jury_metrics.py,sha256=-5w8tFG4JE0cMcH3KS7xQ1z6mbdtDf7reCMz6u5vtag,2158
|
|
179
178
|
helm/benchmark/metrics/lmkt_metric_specs.py,sha256=0Fa0xLjQDXwsRCE5VqGzEfb5ZdzKsDoSCwR_zHogFcc,376
|
|
180
179
|
helm/benchmark/metrics/lmkt_metrics.py,sha256=GaZTfl-NQXa1YSzcJUGlZ5wZURH1CnJxGkPFBj8ydTQ,1856
|
|
181
180
|
helm/benchmark/metrics/machine_translation_metrics.py,sha256=22vaGBCSw12uM1wmtDG-MBBZW8OiTZwNPaerjckdtDE,3860
|
|
182
|
-
helm/benchmark/metrics/
|
|
183
|
-
helm/benchmark/metrics/
|
|
184
|
-
helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=9wZgg20-9QBNk0_XhuwR3LT940fqDPkCM4Kl0dPkbAs,5353
|
|
185
|
-
helm/benchmark/metrics/medec_metrics.py,sha256=hNBOGX52G_QOmgTCp9LnIMrmGSRxbb5vgjxKU069TMQ,4152
|
|
186
|
-
helm/benchmark/metrics/medi_qa_metrics.py,sha256=JWAEMuT0UXDZrb7qHn13W6W79ilbprk492V_9vWrB4s,432
|
|
187
|
-
helm/benchmark/metrics/medication_qa_metrics.py,sha256=wit3nKNWpGFfgauu6Xye2IDTePAS0VHAQI_7OO9HR6M,462
|
|
181
|
+
helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=2viECYEj8y65_w5MPH295Z1OgLTNrgP_iMzzYSgc2hQ,5895
|
|
182
|
+
helm/benchmark/metrics/medec_metrics.py,sha256=5z3HKZCEuQsOix-22PPzTHhWlYmjyHOAVFV-bgGUVJE,5137
|
|
188
183
|
helm/benchmark/metrics/melt_bias_metric.py,sha256=mHDCkRGLD-0pyJA_depi_KX3sn7g7Bgd3_m0XdLQahY,11520
|
|
189
184
|
helm/benchmark/metrics/melt_bias_word_lists.py,sha256=xA0araUdszAIOqfxiTi6MIJhKYwr_Gwsc1L9qinZx9U,27891
|
|
190
185
|
helm/benchmark/metrics/melt_metric_specs.py,sha256=zaeV57LQEl8qK7be36NaojiUJlzmkoKY8JyOkOVuPqs,1619
|
|
191
186
|
helm/benchmark/metrics/melt_toxicity_metric.py,sha256=ni6bb_QC51NM5jQpbFYLWtsQy3tNOLwQ_5b3PDV5vVk,4193
|
|
192
|
-
helm/benchmark/metrics/
|
|
193
|
-
helm/benchmark/metrics/metric.py,sha256=jqQyiKDq_pQv-ulGqfZI56ydRDQs3N3XhfHIPysUhrk,14311
|
|
187
|
+
helm/benchmark/metrics/metric.py,sha256=gF7KlWPoPIGUvbvqDeXagBNBZnl8rclh8JfgCPvuXvs,15065
|
|
194
188
|
helm/benchmark/metrics/metric_name.py,sha256=POhgmUqqIWh_LjCbYpiKkzGqqChBLeW3FADy9u_FcWw,1354
|
|
195
189
|
helm/benchmark/metrics/metric_service.py,sha256=bJaM7GisEgSWR3vPTcg7b67XF9X2K5viODacIgbGb24,1692
|
|
196
|
-
helm/benchmark/metrics/
|
|
197
|
-
helm/benchmark/metrics/mimic_rrs_metrics.py,sha256=x3vSj1VG1UkNF3gbgJYDeA4z-crxfGIkK7iZo0xjq8c,442
|
|
198
|
-
helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=Pu9efXoBrhsvxSeGHqwbUA5k365-pJTeXpMNhmcg0L0,3927
|
|
199
|
-
helm/benchmark/metrics/mtsamples_procedures_metrics.py,sha256=XrddVk-gnc8jF8amCI1RBa_XTS9yEXD2Y9Ld9W7Q-m8,497
|
|
200
|
-
helm/benchmark/metrics/mtsamples_replicate_metrics.py,sha256=rmH34aTX_wZWxLi4jrxf3sR1RIqNRF0QDANLRQUGhqM,492
|
|
190
|
+
helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=3kypTnrkbdG-Dpdbg_A_WQYVx35ylvZFjh2-R5wvhSE,5347
|
|
201
191
|
helm/benchmark/metrics/nltk_helper.py,sha256=QMEps-lqJZ_pCgvjlMf4BvC0pzDu3ez5jit5F4p8dAk,1313
|
|
202
|
-
helm/benchmark/metrics/omni_math_metrics.py,sha256=
|
|
192
|
+
helm/benchmark/metrics/omni_math_metrics.py,sha256=WF0cWpmJwduTdZw7c_O5QsXDNwet5GgHYV0Ww9PfKc8,1709
|
|
203
193
|
helm/benchmark/metrics/openai_mrcr_metrics.py,sha256=TAop7G50FKaR-Jyo2EGLqmMOfJRmS2vNRDFiifa6mhg,2313
|
|
204
194
|
helm/benchmark/metrics/output_processing_metric.py,sha256=ey9UBi2f3780OwFlp82ymzfjLR3MA2fpA9vW5R4W5TA,2581
|
|
205
195
|
helm/benchmark/metrics/output_processors.py,sha256=ULZlDBOf6NupAXzDKBKyTDdgPZ5PSxOAlOYTbrQEek8,472
|
|
@@ -209,24 +199,24 @@ helm/benchmark/metrics/ranking_metrics.py,sha256=hSNKy4h7zRkGYSgo6RWt4PXQztA5ZX1
|
|
|
209
199
|
helm/benchmark/metrics/reference_metric.py,sha256=hseI7A16SOC8ymYZYFCL6nxnyxn0q9_Gywuvb1r9FLE,6092
|
|
210
200
|
helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
|
|
211
201
|
helm/benchmark/metrics/ruler_qa_metrics.py,sha256=OuiA0ksByl0Tw1Oal7zbedhKjTrhJgQJDLXAgoTLXuc,1473
|
|
212
|
-
helm/benchmark/metrics/safety_metrics.py,sha256=
|
|
213
|
-
helm/benchmark/metrics/seahelm_metrics.py,sha256=
|
|
202
|
+
helm/benchmark/metrics/safety_metrics.py,sha256=PZjyNsxiBe4VTdIujsqrLUtsQfLUpcm8snlAk3g9zWA,3870
|
|
203
|
+
helm/benchmark/metrics/seahelm_metrics.py,sha256=GlNoK1O7kcuiuEOJEgTsnrfK9TcGwH7-tPj6Qe6JV90,7493
|
|
214
204
|
helm/benchmark/metrics/seahelm_metrics_specs.py,sha256=cx8p4kwTuEOWxZioK9CVoeTNJT0fZjxRy_6_EM9F394,452
|
|
215
205
|
helm/benchmark/metrics/spider_metrics.py,sha256=RSrFJoA5SNcNxfmgVqCQixcSLrfJBYuVQw5jsfrc9Xg,189
|
|
216
|
-
helm/benchmark/metrics/starr_patient_instructions_metrics.py,sha256=YHdTeIFdZxRbvqBnlWpAyIsWzZyWAjjDFuKOXhHYiSM,525
|
|
217
206
|
helm/benchmark/metrics/statistic.py,sha256=ATuOm0jU3L-0ELiZaF2GVMNF22W66-rMvzxRtlfqcII,3446
|
|
218
207
|
helm/benchmark/metrics/summarization_critique_metrics.py,sha256=-mki8-zvZx54dQg8X0BG2Y6wmfypQhkIuD_9ZjNBl78,4782
|
|
219
|
-
helm/benchmark/metrics/summarization_metrics.py,sha256=
|
|
208
|
+
helm/benchmark/metrics/summarization_metrics.py,sha256=S99uhtvBtH0UQS-gDEuQLLTPYNG-dNUV1n3OnaOP7p8,22647
|
|
220
209
|
helm/benchmark/metrics/test_bias_metrics.py,sha256=qEZsCULvwjVdIyfNgJSc2L7Xp9suKKW7L5OuQmGrwZ8,6393
|
|
221
210
|
helm/benchmark/metrics/test_classification_metrics.py,sha256=CRDMGmVmzEUnNaM0C02qUTOU2AS11Mt2-GdEl89y7lw,9541
|
|
222
211
|
helm/benchmark/metrics/test_disinformation_metrics.py,sha256=U3ZmS9s33oimTQbKO-7pgWeX_WiDB9chlOCtf_vslXw,2249
|
|
223
212
|
helm/benchmark/metrics/test_evaluate_reference_metrics.py,sha256=B7xtDDWPAxF7d-vcUx_R51hFMae-DD52nUwbu_eWt6Y,1601
|
|
224
213
|
helm/benchmark/metrics/test_metric.py,sha256=0sGlXE3_Al_VyKpOPBhQR_xT-XrcVgGepLpwut37DmA,771
|
|
225
214
|
helm/benchmark/metrics/test_statistic.py,sha256=yK6m2BZ5UXWmb2D1cQzDH_2ELvrNDaR_lyzX4WoHw9Q,1273
|
|
226
|
-
helm/benchmark/metrics/toxicity_metrics.py,sha256=
|
|
215
|
+
helm/benchmark/metrics/toxicity_metrics.py,sha256=s5Ypodu4cBmIc_fCbbQ9kCqcvVJf-OQ6zAvb85r8Cv8,5509
|
|
227
216
|
helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
|
|
217
|
+
helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py,sha256=dSJXAS7--n2sxRaajDo20Omzwx4LY5x0gd8nTxX3DAE,2317
|
|
228
218
|
helm/benchmark/metrics/unitxt_metrics.py,sha256=8fawxnrg0xsAe0xO2wbL7S_yisj8RzJnrn6xtk8C6q8,4852
|
|
229
|
-
helm/benchmark/metrics/wildbench_metrics.py,sha256=
|
|
219
|
+
helm/benchmark/metrics/wildbench_metrics.py,sha256=THOguxE6GUun0zTr-pITXfQGEd664sScrfIzFGdNPXk,2163
|
|
230
220
|
helm/benchmark/metrics/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
231
221
|
helm/benchmark/metrics/ifeval/instructions.py,sha256=qNoa1vMPDNz6ORWfyMv_efwKZ4U5zkI-cf4aApyfSqU,53247
|
|
232
222
|
helm/benchmark/metrics/ifeval/instructions_registry.py,sha256=NprvkRQz0QWaIpJsFp95CQCWsnuY_57ZSqFn2IISDP8,7555
|
|
@@ -267,7 +257,7 @@ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBS
|
|
|
267
257
|
helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=Ir4u8blJWTRtEBogb6u22qCy3JXAIzvx-Th6dSBLfdw,698
|
|
268
258
|
helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
|
|
269
259
|
helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
270
|
-
helm/benchmark/metrics/summac/model_summac.py,sha256=
|
|
260
|
+
helm/benchmark/metrics/summac/model_summac.py,sha256=zheAPIJAz5MH6GU1gXpWSc9Q9gouhNzYx92PDd5PUXU,17447
|
|
271
261
|
helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
|
|
272
262
|
helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
273
263
|
helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=XDZGK8h84F2w_pK8Zjko8ssKZmVxKFqTOuHL0mLBzMY,694
|
|
@@ -286,11 +276,12 @@ helm/benchmark/metrics/vision_language/image_utils.py,sha256=xwtydR8-s23cJacIGXD
|
|
|
286
276
|
helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
287
277
|
helm/benchmark/presentation/contamination.py,sha256=07IuIP92vfuI0GwfeNC-i_NZUlF8N1azzagC19YHOMQ,2802
|
|
288
278
|
helm/benchmark/presentation/create_plots.py,sha256=bM6UNzH0Bx8Bv2iKcyMoYp7IwfCZSQob-w_XOOI6r1M,29090
|
|
289
|
-
helm/benchmark/presentation/run_display.py,sha256=
|
|
290
|
-
helm/benchmark/presentation/run_entry.py,sha256=
|
|
291
|
-
helm/benchmark/presentation/schema.py,sha256=
|
|
292
|
-
helm/benchmark/presentation/summarize.py,sha256=
|
|
279
|
+
helm/benchmark/presentation/run_display.py,sha256=byOcVknL6UgwSBGWUPiWEdSBRbW6PYwmo7vJ1Ms50iY,12890
|
|
280
|
+
helm/benchmark/presentation/run_entry.py,sha256=_hgsKMpZ-WpgaK7nta68GohXe07JCyaWD6jRjINujXk,1182
|
|
281
|
+
helm/benchmark/presentation/schema.py,sha256=AMGmEwqxkHoZFkOKD-UVZ8aXwgbafG6KYASsWo6YEw8,11005
|
|
282
|
+
helm/benchmark/presentation/summarize.py,sha256=m3RSw6ogUFasdeZ8xSUh4wKV-nYzVi3iQv-KrrwtDFM,67828
|
|
293
283
|
helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
|
|
284
|
+
helm/benchmark/presentation/taxonomy_info.py,sha256=pPIFOicis9H1sWeXApfsHHcqZpus1ezukxLQO7Lj2Vg,473
|
|
294
285
|
helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
|
|
295
286
|
helm/benchmark/presentation/test_create_plots.py,sha256=1FrJZnPW-5QUQKt_pf4y47uDha4B8wHyY1o5hqhKWhc,1293
|
|
296
287
|
helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
|
|
@@ -299,12 +290,12 @@ helm/benchmark/presentation/test_summarize.py,sha256=GzZNwBDybpstzl6wT0Rgqn75N9i
|
|
|
299
290
|
helm/benchmark/presentation/torr_robustness_summarizer.py,sha256=SmMOZWCQ-KaJBp78otwvAeE1btWignyWalaQ8QG87r4,8242
|
|
300
291
|
helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
301
292
|
helm/benchmark/run_specs/air_bench_run_specs.py,sha256=K86SqpINMBOiLIpuHz-jwlQL3SrH6n6WbqjD90i4LQQ,2231
|
|
302
|
-
helm/benchmark/run_specs/arabic_run_specs.py,sha256=
|
|
293
|
+
helm/benchmark/run_specs/arabic_run_specs.py,sha256=x3pBNbUcYfx6f0APXroLBQodOgv6oWuJNb301c_QUhg,7768
|
|
303
294
|
helm/benchmark/run_specs/audio_run_specs.py,sha256=baJz5LZiwWZP3KD0hluKgpidtswzdorQnshX0CoqKAc,23383
|
|
304
|
-
helm/benchmark/run_specs/bluex_run_specs.py,sha256=
|
|
295
|
+
helm/benchmark/run_specs/bluex_run_specs.py,sha256=jwrH33YeXqoAex11071XMUwTCKNkoJTQQS7iNoJDLmg,1797
|
|
305
296
|
helm/benchmark/run_specs/call_center_run_specs.py,sha256=QhRQw91WblB9UaB319XNCO5K8PX8Riiza41Ym-1CcRU,7044
|
|
306
297
|
helm/benchmark/run_specs/capabilities_run_specs.py,sha256=sbqhIj4AoujV45erwoVK61lWdlkjg4qssmGlu0eSr1U,12067
|
|
307
|
-
helm/benchmark/run_specs/classic_run_specs.py,sha256=
|
|
298
|
+
helm/benchmark/run_specs/classic_run_specs.py,sha256=fe98HhzMkfloKpOZbi_mIMp1Hi-clv22rgWT-EdS0e4,53743
|
|
308
299
|
helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
|
|
309
300
|
helm/benchmark/run_specs/codeinsights_run_specs.py,sha256=lz3yysrPjCIiObzrIkRjJsWzkABh9qIXn-o7FSqZPl0,9207
|
|
310
301
|
helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
|
|
@@ -318,8 +309,8 @@ helm/benchmark/run_specs/imdb_ptbr_run_specs.py,sha256=nkW5A_xeD5kCKeJVxsL8RFS8r
|
|
|
318
309
|
helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
|
|
319
310
|
helm/benchmark/run_specs/lite_run_specs.py,sha256=8OkL9g3wQBG96g0ijGZ9L1Trb59b7VPDyYMqvA3hXfE,11129
|
|
320
311
|
helm/benchmark/run_specs/lmkt_run_specs.py,sha256=tNZvlA4mXUX-NBC9enRR90qFLeh8SNGFq701rXmXc18,5376
|
|
321
|
-
helm/benchmark/run_specs/long_context_run_specs.py,sha256=
|
|
322
|
-
helm/benchmark/run_specs/medhelm_run_specs.py,sha256
|
|
312
|
+
helm/benchmark/run_specs/long_context_run_specs.py,sha256=wn7yY9rMIBJY30SN-275qg9U49aGPUl4hVZphKYFkBI,6442
|
|
313
|
+
helm/benchmark/run_specs/medhelm_run_specs.py,sha256=CGFHmoQB58gpqi4b4BbffuHzRBs12aeq8suUkaaFbqc,53262
|
|
323
314
|
helm/benchmark/run_specs/melt_run_specs.py,sha256=729MkALud2wG07yulx9zqAzejdXW_eVGkfF5cQWeGGY,32031
|
|
324
315
|
helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py,sha256=kenpGGMK1XXaNtvNXsshPvdvN9ubv1sOfaPdjFM4obA,2034
|
|
325
316
|
helm/benchmark/run_specs/multilingual_run_specs.py,sha256=umf8e6ZDgRXiU0G_BPoovj1UZ_dxyrXtIQ7i9WC6USg,2296
|
|
@@ -327,41 +318,46 @@ helm/benchmark/run_specs/oab_exams_specs.py,sha256=ws7Vppo_zJvxKqQ_sNhm9N7-5eQbX
|
|
|
327
318
|
helm/benchmark/run_specs/safety_run_specs.py,sha256=3X6tYaq2SlRsZs9q6SCtBUgjNEpOwUtV6M7iY2Kowm0,6807
|
|
328
319
|
helm/benchmark/run_specs/seahelm_run_specs.py,sha256=R3mg4_OoaRizZ5n0FHcUQpJLny3j-ulBlHzOyF0a0Ok,23904
|
|
329
320
|
helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
|
|
330
|
-
helm/benchmark/run_specs/speech_disorder_audio_run_specs.py,sha256=
|
|
321
|
+
helm/benchmark/run_specs/speech_disorder_audio_run_specs.py,sha256=Hx0BxdzORXU8cyEGFYJJWs60Ssuny6tIpWqCR6fFSfI,7464
|
|
331
322
|
helm/benchmark/run_specs/sql_run_specs.py,sha256=JWCICELKi81m11MggyR6CJNl3vpWPwk4kr8DZSsWvj4,1965
|
|
332
323
|
helm/benchmark/run_specs/tweetsentbr_run_specs.py,sha256=qogc-fb83Rh1DooKKaskhak52ycvu8DAnhabw9rc7yA,1129
|
|
333
324
|
helm/benchmark/run_specs/unitxt_run_specs.py,sha256=4Vbsq0MPpSe4cIJOXzeVpMm60N9Qafa2R85X5BeFQew,1873
|
|
334
325
|
helm/benchmark/run_specs/vlm_run_specs.py,sha256=v-eWuDYc8u5HO46isLONPfAWv5zdA1ZOQrdyOvX3vlU,37512
|
|
335
326
|
helm/benchmark/run_specs/winogrande_afr_run_specs.py,sha256=dhOm8z6Q_ZpnzYKrsS0nEbRQPWs_phkXxmL5pxCJzQA,1853
|
|
327
|
+
helm/benchmark/run_specs/medhelm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
328
|
+
helm/benchmark/run_specs/medhelm/benchmark_config.py,sha256=O1D5N4q1QwzrI1ioAQK815cch6hNoJoaIzzAlJo6GXk,7860
|
|
336
329
|
helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
337
|
-
helm/benchmark/scenarios/aci_bench_scenario.py,sha256=
|
|
338
|
-
helm/benchmark/scenarios/air_bench_scenario.py,sha256=
|
|
330
|
+
helm/benchmark/scenarios/aci_bench_scenario.py,sha256=ry22AJdd3lvQuEFdzNf6wXzMyPFn46b0kScrYdpj-nA,6783
|
|
331
|
+
helm/benchmark/scenarios/air_bench_scenario.py,sha256=Ufcpxm5KaXHI2FfK4tdQsURaCSdcWNcXVaNmYkE4bo4,2820
|
|
339
332
|
helm/benchmark/scenarios/alghafa_scenario.py,sha256=FJXO3W6qYzCgLJMSiJEhpddNcFyR3N5Brh8pATW_9GM,5217
|
|
340
|
-
helm/benchmark/scenarios/
|
|
341
|
-
helm/benchmark/scenarios/
|
|
342
|
-
helm/benchmark/scenarios/
|
|
343
|
-
helm/benchmark/scenarios/
|
|
333
|
+
helm/benchmark/scenarios/alrage_scenario.py,sha256=MN-gMQboAaJCasYNg_rLJVgcrk5KZ1WCBN9R_lyRrhE,1499
|
|
334
|
+
helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=EzS8td1lJE1yxEwFtuwTbjHtHm1hGIaur93BKAL_Hm4,6212
|
|
335
|
+
helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=3axwlXnKI0-mRDaYfD-hcCNZwtw_1jte_5THuyuV7Ok,3683
|
|
336
|
+
helm/benchmark/scenarios/arabic_exams_scenario.py,sha256=hv28A2pM66ejrO6oFOgmCx3JIP_nqwdUYvIsfGc0Kew,5359
|
|
337
|
+
helm/benchmark/scenarios/arabic_mmlu_scenario.py,sha256=xMRWPA16Wn8ONgAeyyHOB95X2SQca7tKUpUP8L5ZNJc,3018
|
|
338
|
+
helm/benchmark/scenarios/aratrust_scenario.py,sha256=ismiWLm1M6JmBgVZ0SoVglaOyFbAlyOHsSsiAv8Np8Y,3125
|
|
344
339
|
helm/benchmark/scenarios/autobencher_capabilities_scenario.py,sha256=fOCHumFWZa4OJZcTZefJiJbdWsb3zjQnWLJYd10Cctw,2496
|
|
345
340
|
helm/benchmark/scenarios/autobencher_safety_scenario.py,sha256=MFt3f5baN5r-FmzWZfUChGR1mX_PUB_5hxoINac_Whs,1854
|
|
346
|
-
helm/benchmark/scenarios/babi_qa_scenario.py,sha256=
|
|
347
|
-
helm/benchmark/scenarios/banking77_scenario.py,sha256=
|
|
348
|
-
helm/benchmark/scenarios/bbq_scenario.py,sha256=
|
|
341
|
+
helm/benchmark/scenarios/babi_qa_scenario.py,sha256=CAmh3GfFjB9Xsuh9K-PUu-2xIFTV0v0YNgWbSuv09Y0,5711
|
|
342
|
+
helm/benchmark/scenarios/banking77_scenario.py,sha256=cYM5Itksjy-tufjC9mmIKy_FG0kqPuDkDIhPhDZUX2I,2773
|
|
343
|
+
helm/benchmark/scenarios/bbq_scenario.py,sha256=VAlwXpAegpmvb3Zf-oMaBE7HGnfKVbhprCn2SE8ayKM,10355
|
|
344
|
+
helm/benchmark/scenarios/best_chatgpt_prompts.yaml,sha256=KZdXj4KUbkwFzgIEXVakMpZLTqJ7rldxNuXVDIdlk-A,31304
|
|
349
345
|
helm/benchmark/scenarios/big_bench_scenario.py,sha256=g1TLoDTYQAe-efzQnV9J5UBCaUfN1jWTTjTd-ZJQmVQ,8146
|
|
350
346
|
helm/benchmark/scenarios/bigcodebench_scenario.py,sha256=zQLv91uwfGAR9N4jm_iBUmYOVFj9cL14Nj8aqoCqUM0,2004
|
|
351
|
-
helm/benchmark/scenarios/bird_sql_scenario.py,sha256=
|
|
347
|
+
helm/benchmark/scenarios/bird_sql_scenario.py,sha256=KoCcy4enWJzrwK1X405EWnK8E0kjpmcwhVFfBrKSRRQ,4235
|
|
352
348
|
helm/benchmark/scenarios/bird_sql_scenario_helper.py,sha256=FIwPk-dwfTY-8gDXeAiTZbfbS0Oe1OuWRlYiJOhZwk4,4664
|
|
353
349
|
helm/benchmark/scenarios/blimp_scenario.py,sha256=9Ge3QKRgtVHpWy7aehZVKiO6JrsxK7zrEdtqAb4zxtQ,6284
|
|
354
|
-
helm/benchmark/scenarios/bluex_scenario.py,sha256=
|
|
355
|
-
helm/benchmark/scenarios/bold_scenario.py,sha256=
|
|
356
|
-
helm/benchmark/scenarios/boolq_scenario.py,sha256=
|
|
350
|
+
helm/benchmark/scenarios/bluex_scenario.py,sha256=K4ob5_rd1hTOzlPJjuEvujcOdt_Ybgxj3jqj_BYjA9o,2599
|
|
351
|
+
helm/benchmark/scenarios/bold_scenario.py,sha256=MsXwUiJgZgFyVxh-E5gAagi4aPGicDe2C0xct5lQYwA,4882
|
|
352
|
+
helm/benchmark/scenarios/boolq_scenario.py,sha256=qQyJ0BdljChX9U_eEETdFyWLCSQvI0D4NrY6zOCXPh8,8824
|
|
357
353
|
helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
|
|
358
354
|
helm/benchmark/scenarios/casehold_scenario.py,sha256=QSe0D3KQJhlTOo6kM9OHwdKy6NlclsFGRVCAB3mTG7s,3174
|
|
359
|
-
helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=
|
|
355
|
+
helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=PE4vbj0y39674UIIdH6mgUwSKe4wW_XqRrNsksrwQRs,5104
|
|
360
356
|
helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
|
|
361
|
-
helm/benchmark/scenarios/civil_comments_scenario.py,sha256=
|
|
362
|
-
helm/benchmark/scenarios/clear_scenario.py,sha256=
|
|
363
|
-
helm/benchmark/scenarios/cleva_scenario.py,sha256=
|
|
364
|
-
helm/benchmark/scenarios/code_scenario.py,sha256=
|
|
357
|
+
helm/benchmark/scenarios/civil_comments_scenario.py,sha256=N1ZmQyKXkRjRXKPTyEHOpbDhBkjcY8WyHPKMWaBl2qo,5481
|
|
358
|
+
helm/benchmark/scenarios/clear_scenario.py,sha256=cLFlcWKUT1Uy6bYDnAjf1ySR06mK16NhN1AtsaEBZs0,7226
|
|
359
|
+
helm/benchmark/scenarios/cleva_scenario.py,sha256=WQDiDCVo6bhtI926_p3uvr1WhIAkBU1gLNLA5viEwMw,78127
|
|
360
|
+
helm/benchmark/scenarios/code_scenario.py,sha256=tdki0m59NzN4YOm1pMfaSkUP5uUDeTNMqUAB84p5QGI,13953
|
|
365
361
|
helm/benchmark/scenarios/code_scenario_apps_pinned_file_order.py,sha256=KC-5MQ-d8Nn46aDN4FaPxmd6yk1DtVUmVR-CIZsNCp4,1738
|
|
366
362
|
helm/benchmark/scenarios/code_scenario_helper.py,sha256=TnXAlY-wdAFwIDylFItf0z7HOu93WD6dNThwzZYe330,5904
|
|
367
363
|
helm/benchmark/scenarios/codeinsights_code_efficiency_scenario.py,sha256=PK4wtuBXs4cPPwOoGfhBA4J4cGLQYC_MvRWuvWrkrv8,9068
|
|
@@ -369,84 +365,88 @@ helm/benchmark/scenarios/codeinsights_correct_code_scenario.py,sha256=7BpcezugYH
|
|
|
369
365
|
helm/benchmark/scenarios/codeinsights_edge_case_scenario.py,sha256=csTwe-mv1f6Tyvnj9uZ0SYuj1GRVvgjzukV28gIhNpk,8703
|
|
370
366
|
helm/benchmark/scenarios/codeinsights_student_coding_scenario.py,sha256=wc5Fefn4jpCw03dQ6WswCztJ8AO5j0Vrn6omcOVUq2k,7409
|
|
371
367
|
helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py,sha256=qX3yckZdMojYhiwvokvEuQpRXOzmN2zmzKjQb96Ljg8,9651
|
|
372
|
-
helm/benchmark/scenarios/commonsense_scenario.py,sha256=
|
|
373
|
-
helm/benchmark/scenarios/
|
|
374
|
-
helm/benchmark/scenarios/
|
|
368
|
+
helm/benchmark/scenarios/commonsense_scenario.py,sha256=f1E94zQAArwha730LcdJFo75KrX50lqcaFRn9sq85Yw,10855
|
|
369
|
+
helm/benchmark/scenarios/compositional_instructions.yaml,sha256=mPsFzPU6uaAD0xghzv-QD5Wk4uhoLY2sF3Fw_lunAsI,1822
|
|
370
|
+
helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=sR3UzObloLUzgjNwTbSHLGGkeA0g9-Aq_utpBPT2u_4,4757
|
|
371
|
+
helm/benchmark/scenarios/copyright_scenario.py,sha256=GWRCJdLlnWZcz6ztB5XIASGMPNd2o8EZNR2GueP8xuc,5035
|
|
375
372
|
helm/benchmark/scenarios/covid_dialog_scenario.py,sha256=Vnxfn6EKwN-KR1vH-x46YHUC5jf7UAOv7zsnXVHYmZY,4032
|
|
376
|
-
helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=
|
|
373
|
+
helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=FM6ty-JSFTDqdKLzfwgfhl3zV2oh_DWjRw4qI4-IrI0,11169
|
|
377
374
|
helm/benchmark/scenarios/custom_mcqa_scenario.py,sha256=rgdHsSh8QknlcdGfZQ4VvqBUMLfTTHaNolCv4QgWHzE,1939
|
|
378
|
-
helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=
|
|
379
|
-
helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=
|
|
380
|
-
helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=
|
|
381
|
-
helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=
|
|
382
|
-
helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=
|
|
383
|
-
helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=
|
|
384
|
-
helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=
|
|
385
|
-
helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=
|
|
386
|
-
helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=
|
|
375
|
+
helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=jnBIx4RPnTCGfWIcKNTTCyzBZWqUidWGTzTleOm958A,5020
|
|
376
|
+
helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=pZK3dbKKNfNOHvNaGMkN9pjFznu4raNyLe4fWkxNHSo,8604
|
|
377
|
+
helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=hBKRRYIHegOrhIo_i7-1RPtbxmuhXcg29DkUIep0x_o,6304
|
|
378
|
+
helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=KzBz8nkrvPUTw5WmEoivtl0lLJ-mORek-IVKYmct2Pk,3460
|
|
379
|
+
helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=OvJ3pfxbxtJRxeSfeK-uoYFZ4ZIDSqE7ZbqZBuO93DE,14743
|
|
380
|
+
helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=zWhQWEE9Aa1O9ASLE5IAw55lzNLJ1ifGsBKZFk-jiXM,8942
|
|
381
|
+
helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=XO--1Rxb6kyLDRUQw-GhzLG-aTagVyN7ktWriAbBTAE,20756
|
|
382
|
+
helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=vIkAgy4LysSSIm553bnts3CEN6NVIDKr3xeGkZ2GNyk,3520
|
|
383
|
+
helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=5l0lRRNNJ8nAb1R4bMxq3lakMF-P3XFvVpnT1PrwMms,3556
|
|
387
384
|
helm/benchmark/scenarios/dialogue_scenarios.py,sha256=yXCMZegzlgL0CXTY1W5lXdkFFHicUvq4z7_284MfRpw,5778
|
|
388
|
-
helm/benchmark/scenarios/dischargeme_scenario.py,sha256=
|
|
389
|
-
helm/benchmark/scenarios/disinformation_scenario.py,sha256=
|
|
390
|
-
helm/benchmark/scenarios/dyck_language_scenario.py,sha256=
|
|
385
|
+
helm/benchmark/scenarios/dischargeme_scenario.py,sha256=WTlqFnM76DFVGOUSLWv-g--vHWR71UWZ9VFXoEec3fo,9026
|
|
386
|
+
helm/benchmark/scenarios/disinformation_scenario.py,sha256=lq9Aj-DDpPJeFVk99wXEd2Qv3kahiBe9c8-RoBieCDM,9581
|
|
387
|
+
helm/benchmark/scenarios/dyck_language_scenario.py,sha256=HZEXetj5BkXrNJbAvg9HidrkxDgi2UUGIAVphNiN-jg,10052
|
|
391
388
|
helm/benchmark/scenarios/echr_judgment_classification_scenario.py,sha256=IqODoUY1-zJD1KW4Qkg3VwJcUeeLgGUKThr62bW-wx8,4915
|
|
392
389
|
helm/benchmark/scenarios/ehr_sql_scenario.py,sha256=Gm7Kw_TSUUxHW8ns-2e4E_tTBVX7h6Ta273VOpkMCQ8,5480
|
|
393
|
-
helm/benchmark/scenarios/ehrshot_scenario.py,sha256=
|
|
394
|
-
helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=
|
|
395
|
-
helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=
|
|
396
|
-
helm/benchmark/scenarios/entity_matching_scenario.py,sha256=
|
|
390
|
+
helm/benchmark/scenarios/ehrshot_scenario.py,sha256=OzZrgi-UZrMH70ZnHSeUWPCOesUue5vxPqnNOaN45dE,68830
|
|
391
|
+
helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=gceJqjxX-RxvOqPDANEwOrbHwKxtddpMz-FcsBfby0k,2854
|
|
392
|
+
helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=03Ju45Sju2r4A_Peq2EsOyg5Ik99lMUv-6X--ejB9fk,7332
|
|
393
|
+
helm/benchmark/scenarios/entity_matching_scenario.py,sha256=83F017FPFED_106IOawJN1jdY6IfREGJPNRvCokKGNk,7761
|
|
397
394
|
helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
|
|
398
395
|
helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
|
|
399
396
|
helm/benchmark/scenarios/exams_multilingual_scenario.py,sha256=c9zMGGL8EbCeNogTm-88g_5wWUiX1Zr7z_tsyjUq2h0,5404
|
|
400
|
-
helm/benchmark/scenarios/fin_qa_scenario.py,sha256=
|
|
401
|
-
helm/benchmark/scenarios/financebench_scenario.py,sha256=
|
|
402
|
-
helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=
|
|
403
|
-
helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256
|
|
404
|
-
helm/benchmark/scenarios/gpqa_scenario.py,sha256=
|
|
397
|
+
helm/benchmark/scenarios/fin_qa_scenario.py,sha256=kwjdhO6_09csdZJ7KqMKXpnpOy6necDDZVOkiSW1lhY,6807
|
|
398
|
+
helm/benchmark/scenarios/financebench_scenario.py,sha256=_DompAP_3JzR6sGkvaBe_qubz7fS0BHB4wV0Gt8jGrQ,2900
|
|
399
|
+
helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=I7eoymZfxu4gky3YjyLnZgaFIJcMkprxQxiCLM7wJV8,5455
|
|
400
|
+
helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=Qw8OJzvp12716GRW5kIxxX--f92OFRcaP0oEy-gakjM,5674
|
|
401
|
+
helm/benchmark/scenarios/gpqa_scenario.py,sha256=MsMsBqgxz6jKt2-ys98XAslGWkxZgzpYOws0b9e4Uj8,3520
|
|
405
402
|
helm/benchmark/scenarios/grammar.py,sha256=58tQYKPj013V9jIpW7fXUqZBLuboqEi_WLlDjx74spM,5590
|
|
406
|
-
helm/benchmark/scenarios/grammar_scenario.py,sha256=
|
|
407
|
-
helm/benchmark/scenarios/gsm_scenario.py,sha256=
|
|
408
|
-
helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=
|
|
409
|
-
helm/benchmark/scenarios/harm_bench_scenario.py,sha256=
|
|
410
|
-
helm/benchmark/scenarios/headqa_scenario.py,sha256=
|
|
403
|
+
helm/benchmark/scenarios/grammar_scenario.py,sha256=c3ATPkHM0WkA9QZEf2VNfThhuEUXD448uOuW6CAeVFw,2309
|
|
404
|
+
helm/benchmark/scenarios/gsm_scenario.py,sha256=dwIHWplfz0wCxD8BasRaIoCmG9cfMt3fRF3KhfhjyH0,3579
|
|
405
|
+
helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=hBh0ci-WXlAbhiPpsA96XEIkJPC9w_A2DAGRAHrIf9Q,2511
|
|
406
|
+
helm/benchmark/scenarios/harm_bench_scenario.py,sha256=ZFuVusNrbYxLwi57Dx2ACgLY3ydZySFB6PDwh8XP3XU,2949
|
|
407
|
+
helm/benchmark/scenarios/headqa_scenario.py,sha256=0hJewHkF9IKQfW6NUJ0DPjlwQmr7N90a2eSXrBQiFNA,6635
|
|
411
408
|
helm/benchmark/scenarios/healthqa_br_scenario.py,sha256=YneXTfp8V6k8rYCF3BTX6bxN2ASxdG3qrBr7uH_IFWc,3406
|
|
412
|
-
helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=
|
|
413
|
-
helm/benchmark/scenarios/ice_scenario.py,sha256=
|
|
409
|
+
helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=5R9En7lTNirZCVsMNqNB2metw0dIEPa9usoFB9W11B4,1855
|
|
410
|
+
helm/benchmark/scenarios/ice_scenario.py,sha256=tEkXqRtvtXaoC6JfbJOcY0E8xWyYKGMOvsSYJGjM_9Q,17674
|
|
414
411
|
helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
|
|
415
|
-
helm/benchmark/scenarios/ifeval_scenario.py,sha256=
|
|
412
|
+
helm/benchmark/scenarios/ifeval_scenario.py,sha256=v2Q1uYCd5i1jO4_gcIlTrbZdPZ27tJrCXi9e0sqcm8s,2308
|
|
416
413
|
helm/benchmark/scenarios/imdb_ptbr_scenario.py,sha256=laq9UwyvBvZZuo54rf-8SdKTLrMdDHTdGWJ4TdC8Eng,2340
|
|
417
|
-
helm/benchmark/scenarios/imdb_scenario.py,sha256=
|
|
414
|
+
helm/benchmark/scenarios/imdb_scenario.py,sha256=H9iHmKK-q4a5edSMcS166f1fjkNbOS5BEIgR3md3k7M,6887
|
|
418
415
|
helm/benchmark/scenarios/imdb_scenario_pinned_file_order.py,sha256=fjW0Gkzg2Y3IAbtYJ3KC7MueWd9U8h0tlcBCqxYmRrM,1621
|
|
419
|
-
helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py,sha256=
|
|
416
|
+
helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py,sha256=L_ii0n5vWoLGkwrBcqaAyaaigX-7y6Quu6piXflX8EI,3979
|
|
420
417
|
helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py,sha256=5fJHFonb7Ko7exHFtoUtvHar_7PhK2HjW9uDlU8Ljj0,2872
|
|
421
|
-
helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=
|
|
418
|
+
helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=qs3ID1f9Nobba2Mv3DxXzVVJmSU6RxtZW-DMJEAbkRc,3427
|
|
422
419
|
helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py,sha256=F-gDO6r4GPBJTLirhF5noRaV0edvoIT7tiIDlovBFfE,2253
|
|
423
|
-
helm/benchmark/scenarios/koala_scenario.py,sha256=
|
|
424
|
-
helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=
|
|
425
|
-
helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=
|
|
420
|
+
helm/benchmark/scenarios/koala_scenario.py,sha256=h-dTHQrNVoi7p7sTXZDqWcpjlznfUgxNrgr4nW8Hrk0,2212
|
|
421
|
+
helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=DE8efUmcPW5R62tZ46Rdsjv-EQs4lXm403O5XxM9heQ,7303
|
|
422
|
+
helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=JTm4Zkwqed7PijdeHzSbQ2l4YZY037OYF_fbnKmlpKg,6185
|
|
426
423
|
helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py,sha256=q_iezJo23_HNNoIXYT4cLYCbwNzLYJx6uvxgPSE5bQA,2804
|
|
427
|
-
helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=
|
|
428
|
-
helm/benchmark/scenarios/legal_support_scenario.py,sha256=
|
|
429
|
-
helm/benchmark/scenarios/legalbench_scenario.py,sha256=
|
|
430
|
-
helm/benchmark/scenarios/lex_glue_scenario.py,sha256
|
|
431
|
-
helm/benchmark/scenarios/lextreme_scenario.py,sha256=
|
|
424
|
+
helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=0DraJdQebbl8tv7S3WmLos98wnQFGJOzY6suGPoxR40,10954
|
|
425
|
+
helm/benchmark/scenarios/legal_support_scenario.py,sha256=cM98PnIAfjQzciUYGtgHqHYnWIdbdJfh3uy6uEIo488,4567
|
|
426
|
+
helm/benchmark/scenarios/legalbench_scenario.py,sha256=TaFwrzJzhPrnHrOV3GF7PYETRR5ywmMmn7oOJtZokZU,5604
|
|
427
|
+
helm/benchmark/scenarios/lex_glue_scenario.py,sha256=H7f3F7gK7bgf6FXvqXGTQrecTE6RtZaitIKmwQLksck,10736
|
|
428
|
+
helm/benchmark/scenarios/lextreme_scenario.py,sha256=dR5UUIymth3J3RInoNybygZg0rNZ-8wwzVHneuTTOGE,20843
|
|
432
429
|
helm/benchmark/scenarios/live_qa_scenario.py,sha256=TnWaOPOcA4U1_8JdahQOUZ9KBj0MpMf4BcK2TDBl3BE,3666
|
|
433
430
|
helm/benchmark/scenarios/lm_entry_scenario.py,sha256=kQTnj5gKJmDxCgynmzQOmghwNySpna7aTY7K7RPD2x4,9109
|
|
434
431
|
helm/benchmark/scenarios/lmkt_scenarios.py,sha256=K51CdOZqMOMOozUmADjrJuNCpUtXVEZwcOeIY-EZrwM,11162
|
|
435
|
-
helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=
|
|
436
|
-
helm/benchmark/scenarios/
|
|
432
|
+
helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=ZtheFEcsBMSqGIPw5UPOO_b3v93mPFar1yqxVnsLq4E,6785
|
|
433
|
+
helm/benchmark/scenarios/madinah_qa_scenario.py,sha256=W7YEQTHyNWUJD8sKFmXU9e-ubzvleWQs7Cj_1zdq2bk,2482
|
|
434
|
+
helm/benchmark/scenarios/math_scenario.py,sha256=4BBhEvgfqPDrXxxW-4x4I0v3lWjscoLCf9vCURXs7hA,16043
|
|
435
|
+
helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py,sha256=Gtc9DgV2bLPIDngROmizTWQHbTftnwVodi9CYT0_P2A,2146
|
|
437
436
|
helm/benchmark/scenarios/me_q_sum_scenario.py,sha256=7DOqQmO70BpDeJy_S4fJ5i2UcCH8tunxzjFgTIim9bQ,4062
|
|
438
|
-
helm/benchmark/scenarios/med_dialog_scenario.py,sha256=
|
|
439
|
-
helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=
|
|
437
|
+
helm/benchmark/scenarios/med_dialog_scenario.py,sha256=MKDlZLJEUq1nDRzlkHlpTWOxHwgghWMXcQvHJcM2LP0,8615
|
|
438
|
+
helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=tvF6d6e4WQi_mUIlZoLQvbOpVIfHR4nyMVVR8z4AkAE,5752
|
|
440
439
|
helm/benchmark/scenarios/med_paragraph_simplification_scenario.py,sha256=0Z1JrizLygjd9v_LLFMk8uZ805IWjJPvg-ZvPVhtMm4,7652
|
|
441
|
-
helm/benchmark/scenarios/med_qa_scenario.py,sha256=
|
|
442
|
-
helm/benchmark/scenarios/medalign_scenario.py,sha256=
|
|
440
|
+
helm/benchmark/scenarios/med_qa_scenario.py,sha256=w7xTavAi7v-xmQXpxXM3Z071qR-eVbj_0yxwILAcLHk,5294
|
|
441
|
+
helm/benchmark/scenarios/medalign_scenario.py,sha256=5ALak5Hq2XQbqwTF3fQYKg-QPtL_vjY7J1UsMm9SOFk,4481
|
|
443
442
|
helm/benchmark/scenarios/medalign_scenario_helper.py,sha256=fKXJFVLGnLcZKRBLsbjJA6YA4WqMaQAjkEU-i6YzSTQ,11626
|
|
444
|
-
helm/benchmark/scenarios/medbullets_scenario.py,sha256=
|
|
445
|
-
helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=
|
|
446
|
-
helm/benchmark/scenarios/medec_scenario.py,sha256=
|
|
447
|
-
helm/benchmark/scenarios/medhallu_scenario.py,sha256=
|
|
448
|
-
helm/benchmark/scenarios/
|
|
449
|
-
helm/benchmark/scenarios/
|
|
443
|
+
helm/benchmark/scenarios/medbullets_scenario.py,sha256=oMqnF3Ri9dghEWpGQYzfcTnYGMK5b2cJNVpJoqdtdUo,7694
|
|
444
|
+
helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=EDeeBKmbosUaMo3dg2MNVs_Cb_ws6WfnBYk15_B3lkU,6608
|
|
445
|
+
helm/benchmark/scenarios/medec_scenario.py,sha256=sLx6tcFXcvhDIThGNVi-425znECAn5pkUgRk83CM-Q8,6343
|
|
446
|
+
helm/benchmark/scenarios/medhallu_scenario.py,sha256=0EgeIxGuYMyBzM8xIOF4WcxfCOVqCp-oOuZe4Ai-CRM,3660
|
|
447
|
+
helm/benchmark/scenarios/medhelm_configurable_scenario.py,sha256=vxvvAaIFW4cWaMez1xbEOZBh6S2wEH6Ws8KcGpnaZbs,3852
|
|
448
|
+
helm/benchmark/scenarios/medi_qa_scenario.py,sha256=iv4_GUZJ9mGS7JGOMaPL747ujjrvnmeFjg1LbCpeMLo,5210
|
|
449
|
+
helm/benchmark/scenarios/medication_qa_scenario.py,sha256=uyYxtCm_dX9Jt6X-3ha2gAUyxF55wKn3_k95g7VAzHQ,3636
|
|
450
450
|
helm/benchmark/scenarios/melt_ir_scenario.py,sha256=d88DEGKVJZCeGnbrXrQZO_W4VJeqW8XNaYc8wIUiJtA,5978
|
|
451
451
|
helm/benchmark/scenarios/melt_knowledge_scenario.py,sha256=FDG4OGYEV6Ac40VC7KAeikzbFKAK2XXFhH1-QUTw8jo,7923
|
|
452
452
|
helm/benchmark/scenarios/melt_lm_scenarios.py,sha256=kSm0lRRixhnXctMprPnzi09PLOmgfs-C7TAW3QI8RmE,8969
|
|
@@ -454,60 +454,63 @@ helm/benchmark/scenarios/melt_scenarios.py,sha256=_WShDpmPaKrujGbZcazCqleDn0TKDh
|
|
|
454
454
|
helm/benchmark/scenarios/melt_srn_scenario.py,sha256=EQSOZIXbfvVWCJMJ4H2e_CiBz6wc8THJndnbK2WwTHM,14674
|
|
455
455
|
helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py,sha256=ptMQWgNn6R-XpAVAAjutSdZg_9ZUqG6fVotzAgeead4,7945
|
|
456
456
|
helm/benchmark/scenarios/melt_translation_scenario.py,sha256=j9YrY60DQHZz4m1MJZaGLzyI6FERlHRx2wy9auyAVB8,5415
|
|
457
|
-
helm/benchmark/scenarios/mental_health_scenario.py,sha256=
|
|
458
|
-
helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=
|
|
459
|
-
helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=
|
|
460
|
-
helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=
|
|
457
|
+
helm/benchmark/scenarios/mental_health_scenario.py,sha256=dwirS093vIdS1VG5yKqUw863TJoCF_keO-pr7ysTIxA,6066
|
|
458
|
+
helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=OIDB-f8wyn0ApsPqwpP11yJEpEtSpf3aYc6VVap6Jr8,5275
|
|
459
|
+
helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=pG_NK1Et0QZosQAOLAxbciyNSq_wIdOT7hkXsBb4mTg,4902
|
|
460
|
+
helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=KRl1lYX-ITWTGxWS_NNQ0o3I4E__jlzNDhAYvI1by7g,3749
|
|
461
461
|
helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py,sha256=-OkPMRyB7aO6QBFwoTl6a2rpzcoHeEl84tqz7k9kpCM,2982
|
|
462
|
-
helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=
|
|
463
|
-
helm/benchmark/scenarios/mmlu_scenario.py,sha256=
|
|
462
|
+
helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=2FVL-6Umn0BufFpJ0e405q1ZgeeP8Np1kCvsE61GaOE,4686
|
|
463
|
+
helm/benchmark/scenarios/mmlu_scenario.py,sha256=uHJny3NXaqqUfBav30T7ip0FJJ1hxqcUk8spEpUq818,4772
|
|
464
464
|
helm/benchmark/scenarios/mmmlu_scenario.py,sha256=CyOISLOsXF9IEYGfeqWyYYkWGvrUvGivlWSJ5ttN9qY,2762
|
|
465
|
-
helm/benchmark/scenarios/msmarco_scenario.py,sha256
|
|
466
|
-
helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=
|
|
467
|
-
helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=
|
|
468
|
-
helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256
|
|
469
|
-
helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=
|
|
470
|
-
helm/benchmark/scenarios/natural_qa_scenario.py,sha256=
|
|
465
|
+
helm/benchmark/scenarios/msmarco_scenario.py,sha256=p9YNL5oTa9isCGVvmqHHVofKmiwitjPQd28ElXmRAN4,35601
|
|
466
|
+
helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=gtVSZxrs321tOolyD0gOoLzc0--uTc--3_HdlBVIuHo,6607
|
|
467
|
+
helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=FIdI509nn0LN9opC4yJ8UsvWmh6-KECUMZF88duIEq0,6395
|
|
468
|
+
helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=o7RydazvQkYK90epvuXsdEyE02fmpsDEwS6253fNptk,14365
|
|
469
|
+
helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=I4hjn0czmygPLB3tE-VTgCHWC28PaB6BdxL6eSBLL_I,6431
|
|
470
|
+
helm/benchmark/scenarios/natural_qa_scenario.py,sha256=3wkXvYm7m0Isxv2EW6SIuIEwZEV2lihsSLQZaANsKZo,14017
|
|
471
471
|
helm/benchmark/scenarios/newsqa_scenario.py,sha256=G25VYaLrV_JyyoT0jpzJ6p4l5qsOydm8rlzTvSptNKQ,7284
|
|
472
472
|
helm/benchmark/scenarios/oab_exams_scenario.py,sha256=vbjUzQP0zU4ckvMbsk4lh24NddVWbUAtfWmsq1h24_w,2101
|
|
473
|
-
helm/benchmark/scenarios/omni_math_scenario.py,sha256=
|
|
474
|
-
helm/benchmark/scenarios/open_assistant_scenario.py,sha256=
|
|
475
|
-
helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=
|
|
473
|
+
helm/benchmark/scenarios/omni_math_scenario.py,sha256=nB2miRRQ-cWwhpqUkypOZibYugD56wZ299nxE5bty9Q,2582
|
|
474
|
+
helm/benchmark/scenarios/open_assistant_scenario.py,sha256=Z9eyaaHGRtFZTogIkOe1Pr6d70lqSe80tMsNPWR_jog,6577
|
|
475
|
+
helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=MTzTZVGN-5c6wASKIK5kBFiGywjvAzRR2rjSbgiELhw,4064
|
|
476
476
|
helm/benchmark/scenarios/opinions_qa_scenario.py,sha256=JK39tq306tKe0RDBDLz1AfAdZwNjK_Ng-rHvu6bTRY4,7395
|
|
477
|
-
helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=
|
|
478
|
-
helm/benchmark/scenarios/quac_scenario.py,sha256=
|
|
479
|
-
helm/benchmark/scenarios/race_based_med_scenario.py,sha256=
|
|
480
|
-
helm/benchmark/scenarios/raft_scenario.py,sha256=
|
|
481
|
-
helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=
|
|
477
|
+
helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=Z8gR19kiTIugBTvBj6g9LiBXicfAxZ1AFh_GF_axgQc,9043
|
|
478
|
+
helm/benchmark/scenarios/quac_scenario.py,sha256=y5bm1LXHIICqPIkWOg3sibnH_sC15b2zYUfT-_Y0V4E,7349
|
|
479
|
+
helm/benchmark/scenarios/race_based_med_scenario.py,sha256=pyeOUjWlQ30WgNr48BuV7kP7fhqZljLfizbTfWjyV_k,6862
|
|
480
|
+
helm/benchmark/scenarios/raft_scenario.py,sha256=BQ-faIiWBuUYmHTMCRbI8XpymtWvKK8DN6oNejjNi7M,5443
|
|
481
|
+
helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=USsjBVzoL-Bgq8B2clQvl3d-g4XlOlt8gvBje9VD7Dk,3077
|
|
482
482
|
helm/benchmark/scenarios/ruler_qa_scenario_helper.py,sha256=jgVf1D4eTSxwxQsW0GBou5hfSo2dnlEJvHpVJqk3BxM,6327
|
|
483
|
-
helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=
|
|
484
|
-
helm/benchmark/scenarios/scenario.py,sha256=
|
|
485
|
-
helm/benchmark/scenarios/seahelm_scenario.py,sha256=
|
|
486
|
-
helm/benchmark/scenarios/self_instruct_scenario.py,sha256=
|
|
487
|
-
helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=
|
|
488
|
-
helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=
|
|
489
|
-
helm/benchmark/scenarios/shc_conf_scenario.py,sha256=
|
|
490
|
-
helm/benchmark/scenarios/shc_ent_scenario.py,sha256=
|
|
491
|
-
helm/benchmark/scenarios/shc_gip_scenario.py,sha256=
|
|
492
|
-
helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=
|
|
493
|
-
helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=
|
|
494
|
-
helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=
|
|
483
|
+
helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=sUJs9eocWUrnBBOEFK4pUq4KgozL-QTra67zkcTHn1s,5048
|
|
484
|
+
helm/benchmark/scenarios/scenario.py,sha256=6zYT0heGPh1HXmv9l2g360Y3CwcV4xjA6jUq5snNF5I,9482
|
|
485
|
+
helm/benchmark/scenarios/seahelm_scenario.py,sha256=Pgw05ZT9NByV7GL0031vGImbhGOZPrHv8aOR5DmP7sA,94098
|
|
486
|
+
helm/benchmark/scenarios/self_instruct_scenario.py,sha256=uPVclF96zh0P2VJ163nLa7XuTKlMKGaTDFN-6IcdbXQ,3164
|
|
487
|
+
helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=kLnoSmpNaPKUcHDPhS6sDP38TC0YII5dlvEKpiUZYKY,3787
|
|
488
|
+
helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=Fg6PKKLLtmVxuu8pTOAmmoRpPIlFhxWl4VzIUNr7w6Y,3519
|
|
489
|
+
helm/benchmark/scenarios/shc_conf_scenario.py,sha256=605KB8lTHlJh44XwbkilKXXAfJQGD2XVnZJmFoaV4Vw,3948
|
|
490
|
+
helm/benchmark/scenarios/shc_ent_scenario.py,sha256=Sr4E3z0keK69b0DIZ1QFISvG0TsEQ6S567h84eSEHcc,3737
|
|
491
|
+
helm/benchmark/scenarios/shc_gip_scenario.py,sha256=MhQ4mdKMJOtcZJ0gKxoVCg2RVyM8OKfjW_EA3wna_2c,3564
|
|
492
|
+
helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=OTYdD5mifaEZeI84RF5fz3Q10M8cE74H0GR3a7QisAE,3974
|
|
493
|
+
helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=bM_qSCv5Qp_03TiDezgl1gUSSs49IZ_M1L4xZnMzToc,3915
|
|
494
|
+
helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=BttMbH39uai4qg621W0ySAFX-UtoRLuyEi-f4bfSrFo,4461
|
|
495
495
|
helm/benchmark/scenarios/shc_sei_scenario.py,sha256=pTcb7n97VkesyRuqUqe5JGed1jDsQEd19udciDras8E,4532
|
|
496
|
-
helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=
|
|
497
|
-
helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=
|
|
496
|
+
helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=IPOuJ74AIWOLDVIQ5lNUjMswcU9zeB_gOXg-K9HLTO4,3703
|
|
497
|
+
helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=IjBVVLUG4muHvU_wd-12ML-YZqN4Qe39TLwb7hiYT4c,1743
|
|
498
498
|
helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
|
|
499
|
-
helm/benchmark/scenarios/
|
|
500
|
-
helm/benchmark/scenarios/
|
|
501
|
-
helm/benchmark/scenarios/
|
|
499
|
+
helm/benchmark/scenarios/situation_prompts.yaml,sha256=nJA3X_I67PIpXgd7LTekWwEr5zn1ryqIHgvqCpAwoGQ,1790
|
|
500
|
+
helm/benchmark/scenarios/spider_scenario.py,sha256=qN1TTLfJmsOdRwf6a-sL9cMzXmJsu09nQPvSqn9L0hk,3932
|
|
501
|
+
helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=ZiXGXeKelEm9NrFsHQS5ft1L4oL6a_IlAJm_flRv-Z4,5228
|
|
502
|
+
helm/benchmark/scenarios/summarization_scenario.py,sha256=wry6hAO_YXk56gS79jJ6HP6VhrRjpExvEZSsl2vM910,8883
|
|
502
503
|
helm/benchmark/scenarios/sumosum_scenario.py,sha256=HG3wrKj5alV0a2aKb_nau8bB4oKDtTOLtdf3bx8h7sw,7695
|
|
503
|
-
helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=
|
|
504
|
-
helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=
|
|
504
|
+
helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=HbCeVUzPm3miSZoIDivTcAkP-fwi6X4TnyaAx0jUumk,3737
|
|
505
|
+
helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=Ir8CVC0aD7Cy7H_ZKGMd1c0iLK-dWbkuMuUl2D7kcR4,17048
|
|
505
506
|
helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=7STCSHiHGIQ2aaN_PwDE5jXUJ-qcu8PaS4pC-pbOceE,8410
|
|
506
507
|
helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
|
|
507
508
|
helm/benchmark/scenarios/test_alghafa_scenario.py,sha256=ARQyzjmEpX_FpN2QLnIB7P-ToAeMtE4dqsolzlq8KPQ,1696
|
|
508
|
-
helm/benchmark/scenarios/
|
|
509
|
+
helm/benchmark/scenarios/test_alrage_scenario.py,sha256=9ofFc05Sy1mdfU9VgHdL_SNp8olJ4ComnZllkMU6itU,6711
|
|
510
|
+
helm/benchmark/scenarios/test_arabic_exams_scenario.py,sha256=nD221WpNE3Ddy-VOdLQGYOHiYVBAcyJxeMc__lVNRLo,985
|
|
511
|
+
helm/benchmark/scenarios/test_aratrust_scenario.py,sha256=6Ks4DA13gU4BAP46qKwPISkqIJw-RiZt4ZhyviXdrUY,918
|
|
509
512
|
helm/benchmark/scenarios/test_bigcodebench_scenario.py,sha256=q9FWJsxLJoFaB3PSMLjI_-YyPoZYusOsMPwn6X6NKXw,1304
|
|
510
|
-
helm/benchmark/scenarios/test_bluex_scenario.py,sha256=
|
|
513
|
+
helm/benchmark/scenarios/test_bluex_scenario.py,sha256=QCIqq0GRRrjb55lwLpBiEkDwSFzEZxBKbCQHvyYO_Fk,1928
|
|
511
514
|
helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
|
|
512
515
|
helm/benchmark/scenarios/test_czech_bank_qa_scenario.py,sha256=bZNLEGu58iHmutGlSp-2uVC2931TO6Rxw7giqFh9RHY,828
|
|
513
516
|
helm/benchmark/scenarios/test_enem_challenge_scenario.py,sha256=XfPkYaSwdGa63ToC_BLuVKTRSldWNBlKsZYK6CFzL3w,2000
|
|
@@ -537,19 +540,19 @@ helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsH
|
|
|
537
540
|
helm/benchmark/scenarios/test_tweetsentbr_scenario.py,sha256=V6ZsT405ltgC3pYXW-FVN7Z4nGH8ZLiFfh_F9OPXZjw,789
|
|
538
541
|
helm/benchmark/scenarios/test_wildbench_scenario.py,sha256=pmQ87MNoGAXwAmPf0eoep5qf9hk6BPP2zzgzGuKXwzs,527
|
|
539
542
|
helm/benchmark/scenarios/test_winogrande_afr_scenario.py,sha256=LZfE4J42BZ7OF3BvfKgMWuCHpdw4-LpWnFiKyrHGXp8,910
|
|
540
|
-
helm/benchmark/scenarios/thai_exam_scenario.py,sha256=
|
|
541
|
-
helm/benchmark/scenarios/the_pile_scenario.py,sha256=
|
|
542
|
-
helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=
|
|
543
|
+
helm/benchmark/scenarios/thai_exam_scenario.py,sha256=7FALls8tnT5QxC1TT8A0Mce9kmRT-icsQ7SPU4oqWPs,10461
|
|
544
|
+
helm/benchmark/scenarios/the_pile_scenario.py,sha256=Dz51JxxazqPiX_fk6viOav8hQ2n6Iw0LIPhouquu6aw,5632
|
|
545
|
+
helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=0U7q8E9XB0H9oSN3OzhfsiZ-8PJrYXCCC04dffjicB8,6822
|
|
543
546
|
helm/benchmark/scenarios/tweetsentbr_scenario.py,sha256=ppugbPWd_3hHesLC52QbC-wUknctr9ZX4tmHefnPf6w,2879
|
|
544
|
-
helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=
|
|
547
|
+
helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=ydG8MvBF3v6TXHScMK0_-HPAhmPhMWh5G4foBEHDp84,2905
|
|
545
548
|
helm/benchmark/scenarios/unitxt_scenario.py,sha256=uL8Gni-Uw_eIp9xKQefp4J7XtKSttjJHzJE4USyoC2U,1930
|
|
546
549
|
helm/benchmark/scenarios/verifiability_judgment_scenario.py,sha256=2iCJplnxdR7NNKjhsLR5o51pL55Q0bcbjjWlvrk5lw4,6067
|
|
547
|
-
helm/benchmark/scenarios/vicuna_scenario.py,sha256=
|
|
548
|
-
helm/benchmark/scenarios/wikifact_scenario.py,sha256=
|
|
550
|
+
helm/benchmark/scenarios/vicuna_scenario.py,sha256=OWwbFkhgEMHd5YH2G3v2E_p22DmYmPVsDbKKhBbyTDY,2478
|
|
551
|
+
helm/benchmark/scenarios/wikifact_scenario.py,sha256=AHHZz_trtGf8HRoCnE6vukqrTD_Of9XQ1GcrqyctgR0,6702
|
|
549
552
|
helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=k13TxITriwqoBrMzf-JzPKr5wHaC9M2A_HyxxBaASnk,3111
|
|
550
|
-
helm/benchmark/scenarios/wildbench_scenario.py,sha256=
|
|
553
|
+
helm/benchmark/scenarios/wildbench_scenario.py,sha256=dWJSqF06ZWAyZhaejNmrZ0Uu4Vlh5HMdTaMLNkMfe8Q,3668
|
|
551
554
|
helm/benchmark/scenarios/winogrande_afr_scenario.py,sha256=3SOVyrQ8D7Wzz06uSbczDE-IN4sjKSEAJ7Po-_-O6qw,3131
|
|
552
|
-
helm/benchmark/scenarios/wmt_14_scenario.py,sha256=
|
|
555
|
+
helm/benchmark/scenarios/wmt_14_scenario.py,sha256=7V2AFfd_LlbYTyi-tLXi5YvE8b1zrTxQJ6Z6e1xONso,5401
|
|
553
556
|
helm/benchmark/scenarios/xstest_scenario.py,sha256=ndRNB5ApW4th5iltlmT9-Nfw9eTaVZQw5AMC4HZCI-k,1309
|
|
554
557
|
helm/benchmark/scenarios/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
555
558
|
helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py,sha256=NtTEHzmbeCicbjTRxPBUueZrBGOPwF6RVc2Yftc-VKs,5634
|
|
@@ -573,11 +576,11 @@ helm/benchmark/scenarios/audio_language/mustard_scenario.py,sha256=7YHgfSpua5OdE
|
|
|
573
576
|
helm/benchmark/scenarios/audio_language/mutox_scenario.py,sha256=bDCQbhsRDR6iQGNlCu_35kjmjGjuzjOIoraSncfOlOY,10277
|
|
574
577
|
helm/benchmark/scenarios/audio_language/parade_scenario.py,sha256=UuOa5cSrHh5n3VF_SuJp4cy1MxlI3uEKHLrNEhGuyuw,4186
|
|
575
578
|
helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py,sha256=oN4vBkElVzjccaEK2JFqoXMCGFTTHD0gcYwSDhvHTpQ,5438
|
|
576
|
-
helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py,sha256=
|
|
577
|
-
helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py,sha256=
|
|
578
|
-
helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py,sha256=
|
|
579
|
-
helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py,sha256
|
|
580
|
-
helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py,sha256=
|
|
579
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py,sha256=Xw256FUD3mNZRtrnR1N9q5oSbHwGCP9KzLlcNjb5vn4,2740
|
|
580
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py,sha256=MrjlgmeVFhdmvVZclFmOGK0wZDQBFK5E2wBG8bVyj-c,2565
|
|
581
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py,sha256=xBrqLDVU-94NNRsByLa8BovFc7fblWa3BO3eftcw-TU,3603
|
|
582
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py,sha256=-3ZKJVoNRLpnooI9Nl_cMH250r7Pg0hxtgcjYN4LbSE,3993
|
|
583
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py,sha256=2w1RuNMO2I9zhphO0LI5wgYVH9K7gbos_DeBilucakM,3960
|
|
581
584
|
helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py,sha256=wkKyTCtx4isQSMufap_6DsNdGkHi7L8FQ2p7n58kKYI,3124
|
|
582
585
|
helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py,sha256=4M_gTWs4CoJ1Ce9dDFBTAe9dzSovpsve_sN1eco2V2A,3155
|
|
583
586
|
helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py,sha256=L04ee5bM5E0UNNmkwEzVwug4HJXQoIcVjujPgxtU2h0,4366
|
|
@@ -600,6 +603,8 @@ helm/benchmark/scenarios/image_generation/radiology_scenario.py,sha256=7JN8OYap8
|
|
|
600
603
|
helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,sha256=DoabanZhd-2MHFDZeR9EoPit0T2TvbVwZGUR0RfJyW0,2362
|
|
601
604
|
helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
|
|
602
605
|
helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
|
|
606
|
+
helm/benchmark/scenarios/medhelm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
607
|
+
helm/benchmark/scenarios/medhelm/judges.yaml,sha256=g_O6zVgOMSL4_f1yNz8muDuUUBzcsM8e5gpfe56eI4Y,663
|
|
603
608
|
helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
604
609
|
helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
|
|
605
610
|
helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
|
|
@@ -644,7 +649,7 @@ helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,s
|
|
|
644
649
|
helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
|
|
645
650
|
helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
|
|
646
651
|
helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
|
|
647
|
-
helm/benchmark/static/schema_arabic.yaml,sha256=
|
|
652
|
+
helm/benchmark/static/schema_arabic.yaml,sha256=Iui-4_M4tV45Xzs3bz0diI3UZwVAuaLAxD5uNhjurgs,8925
|
|
648
653
|
helm/benchmark/static/schema_audio.yaml,sha256=lVslZX7JmFo0ZgLU4n6amrs9DK8y43Ux0I9QyDUG-14,29119
|
|
649
654
|
helm/benchmark/static/schema_autobencher.yaml,sha256=yb-NkF5w5R2YOg7RIsadNHJ_5G7lG1gbcDVq_25luEk,5716
|
|
650
655
|
helm/benchmark/static/schema_call_center.yaml,sha256=i30aFzWqdOJRyAHN8vAzyHEX1v95DEK0TI1SMKTN4TE,9106
|
|
@@ -662,13 +667,14 @@ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0
|
|
|
662
667
|
helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
|
|
663
668
|
helm/benchmark/static/schema_legal.yaml,sha256=RpoFOuVSIowNgxlPn3UMfJC-68RFr3CGDciUGLPfVqc,28806
|
|
664
669
|
helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
|
|
665
|
-
helm/benchmark/static/schema_long_context.yaml,sha256=
|
|
666
|
-
helm/benchmark/static/schema_medhelm.yaml,sha256=
|
|
670
|
+
helm/benchmark/static/schema_long_context.yaml,sha256=NH7poEOCpmbqOZvbHeWrnSB7MWavh7EX2NU-Yl-nXNg,10829
|
|
671
|
+
helm/benchmark/static/schema_medhelm.yaml,sha256=e3vVHdEXcS-joOUMUoIoFA3x9hEE__svDoajbjfqpLE,51793
|
|
667
672
|
helm/benchmark/static/schema_melt.yaml,sha256=mmPqwDa26DVZXsRJkmKQSyD0OStvjlxaMoSPM25SpD4,47494
|
|
668
673
|
helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
|
|
669
674
|
helm/benchmark/static/schema_mmlu_winogrande_afr.yaml,sha256=YIVYf-mOFPq82UVBdMhnCWNOr4sV8Oi3-ozOszJ2tWQ,40143
|
|
670
675
|
helm/benchmark/static/schema_safety.yaml,sha256=7RfZDX4wr8Xr1BJ149ZwmplPzPkNL0-BKbEZuzUsl_0,9278
|
|
671
676
|
helm/benchmark/static/schema_seahelm.yaml,sha256=9XF9Rlr7I-g-uW6R0LNh7Xg52Xs3_058QybXEiN-hnM,28296
|
|
677
|
+
helm/benchmark/static/schema_slp.yaml,sha256=5AV2leKoSBZwP3rIfXcwiqqpXPQbyWjXKE5kU73IAt4,7122
|
|
672
678
|
helm/benchmark/static/schema_slphelm.yaml,sha256=3avOfp-ZEmVRGei3_M_WX6cSP5hQjbfHsDr1XrjayMY,5294
|
|
673
679
|
helm/benchmark/static/schema_social_audio.yaml,sha256=Nj3ORXDT4RHD52cyo1RHfueWwbhqp1qW06TaVJ2lUfE,8653
|
|
674
680
|
helm/benchmark/static/schema_sql.yaml,sha256=8rRff6p_i1CsH7oDbUjau2qRWbLGspuM1Hy-g5pOQiU,6047
|
|
@@ -680,25 +686,26 @@ helm/benchmark/static/schema_vhelm.yaml,sha256=0slYep2eepUefgtK_m4iSS785sHdJzljm
|
|
|
680
686
|
helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
|
|
681
687
|
helm/benchmark/static/schema_video.yaml,sha256=FkpI5Slc4w-ty4hns82ArXIvTdqppWDnkJSpIp74QN4,9713
|
|
682
688
|
helm/benchmark/static_build/config.js,sha256=o98g6QSly1NAfqhYWbU4lEoZB4LEpIrePZtmimiuoXc,165
|
|
683
|
-
helm/benchmark/static_build/index.html,sha256=
|
|
684
|
-
helm/benchmark/static_build/assets/air-overview-
|
|
685
|
-
helm/benchmark/static_build/assets/
|
|
686
|
-
helm/benchmark/static_build/assets/
|
|
687
|
-
helm/benchmark/static_build/assets/
|
|
688
|
-
helm/benchmark/static_build/assets/helm-
|
|
689
|
-
helm/benchmark/static_build/assets/
|
|
690
|
-
helm/benchmark/static_build/assets/
|
|
691
|
-
helm/benchmark/static_build/assets/index-
|
|
692
|
-
helm/benchmark/static_build/assets/
|
|
693
|
-
helm/benchmark/static_build/assets/medhelm-
|
|
694
|
-
helm/benchmark/static_build/assets/overview-
|
|
695
|
-
helm/benchmark/static_build/assets/
|
|
696
|
-
helm/benchmark/static_build/assets/
|
|
697
|
-
helm/benchmark/static_build/assets/
|
|
698
|
-
helm/benchmark/static_build/assets/
|
|
699
|
-
helm/benchmark/static_build/assets/
|
|
700
|
-
helm/benchmark/static_build/assets/vhelm-
|
|
701
|
-
helm/benchmark/static_build/assets/vhelm-
|
|
689
|
+
helm/benchmark/static_build/index.html,sha256=BaMObuai-TufVapXx7P4wX8ZGvoQuyQh4bdD2ZDukoE,1185
|
|
690
|
+
helm/benchmark/static_build/assets/air-overview-DpBbyagA.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
|
|
691
|
+
helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png,sha256=_I8OI_2Fy_Vkmxl74qbSKtOb-C5mbHMye4JaC6LylDk,377331
|
|
692
|
+
helm/benchmark/static_build/assets/crfm-logo-Du4T1uWZ.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
|
|
693
|
+
helm/benchmark/static_build/assets/heim-logo-BJtQlEbV.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
|
|
694
|
+
helm/benchmark/static_build/assets/helm-logo-simple-DzOhNN41.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
|
|
695
|
+
helm/benchmark/static_build/assets/helm-safety-COfndXuS.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
|
|
696
|
+
helm/benchmark/static_build/assets/helmhero-D9TvmJsp.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
|
|
697
|
+
helm/benchmark/static_build/assets/index-oIeiQW2g.css,sha256=k1JZXkXPFsUerOZ37oDhxjcb1ypOFEdDogJUP6H-NAQ,491553
|
|
698
|
+
helm/benchmark/static_build/assets/index-qOFpOyHb.js,sha256=AwlcCxBOsMoY19-58uUNxyZC9llXwzDheC5ARsE98kM,129215
|
|
699
|
+
helm/benchmark/static_build/assets/medhelm-overview-CND0EIsy.png,sha256=6sKYQ79cN07-cUsnt-JPsdoVwUBWu5KxOaHWSdwjdgA,284408
|
|
700
|
+
helm/benchmark/static_build/assets/medhelm-v1-overview-Cu2tphBB.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
|
|
701
|
+
helm/benchmark/static_build/assets/overview-BwypNWnk.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
|
|
702
|
+
helm/benchmark/static_build/assets/process-flow-DWDJC733.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
|
|
703
|
+
helm/benchmark/static_build/assets/react-BteFIppM.js,sha256=rtvePuxI4R_ecUu6MekBI3bolSJCKhriCQYdFqg6HuI,275079
|
|
704
|
+
helm/benchmark/static_build/assets/recharts-DxuQtTOs.js,sha256=h1N20jF_qA400VP6AQSdA1GhlNsFEuXqQk5hMpInUjg,430871
|
|
705
|
+
helm/benchmark/static_build/assets/tremor-DR4fE7ko.js,sha256=xL2aEMpCmZYl2FDAA6G2MOjOIjfwp4v40hxilO10j1I,288436
|
|
706
|
+
helm/benchmark/static_build/assets/vhelm-aspects-NiDQofvP.png,sha256=FDfWcwGcJhJco4qmZli_ROomLiASrrnsX-wtKSDvMkc,542231
|
|
707
|
+
helm/benchmark/static_build/assets/vhelm-framework-NxJE4fdA.png,sha256=oco_P6kwqp0cC3YaT_2H2RhJ6p1sh3sEQq3R0RA_cT0,71934
|
|
708
|
+
helm/benchmark/static_build/assets/vhelm-model-ypCL5Yvq.png,sha256=ivt2FhDk8dwnzp1MAle5WfbXzht_Mxg4rpy-xHRybjs,180285
|
|
702
709
|
helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
703
710
|
helm/benchmark/window_services/default_window_service.py,sha256=HlLI3be8s-GNxDygNGrvo9exEhbrO8Vtr3w0rnSIx7M,181
|
|
704
711
|
helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=wfdydJY6AmpYCfAv5PQu9D6nFXbuxIRum7Tsv0DemJE,2148
|
|
@@ -731,89 +738,115 @@ helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,s
|
|
|
731
738
|
helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
|
|
732
739
|
helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
|
|
733
740
|
helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
734
|
-
helm/clients/ai21_client.py,sha256=
|
|
741
|
+
helm/clients/ai21_client.py,sha256=wDhdlPvmRDqY4v5bLzL1TDu-HwZ9vuqEy2FbmH-jg7A,8239
|
|
735
742
|
helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
|
|
736
|
-
helm/clients/aleph_alpha_client.py,sha256=
|
|
737
|
-
helm/clients/anthropic_client.py,sha256=
|
|
743
|
+
helm/clients/aleph_alpha_client.py,sha256=yqVBGDg5N_py0CB02GezK3lwJ2j2bFLm5qATO_d7R5M,5062
|
|
744
|
+
helm/clients/anthropic_client.py,sha256=XEHfZL8jc8ii8RC4ZTnxUdLi6c5gk9TYHMLOS4laJDI,36492
|
|
738
745
|
helm/clients/auto_client.py,sha256=J5bCxIDZJUdV1dCv_EtbvwPzd1p2Ogtg207vpb3PhgI,11624
|
|
739
746
|
helm/clients/azure_openai_client.py,sha256=mZ0udOAjadp7ZyE2KEtq8XuQp45eHlX_qM_getyzbA0,2009
|
|
740
|
-
helm/clients/bedrock_client.py,sha256=
|
|
747
|
+
helm/clients/bedrock_client.py,sha256=CswF6TOq4M3We-ukDY4kjTUBiPUkdyczinZEYeZAdVI,12352
|
|
741
748
|
helm/clients/bedrock_utils.py,sha256=8ZZfyOuZkgxL_naJ-wwBnH4GKv425fu3MfyakGHxeb4,3764
|
|
742
749
|
helm/clients/client.py,sha256=fWJ_Eg4NyhPqlvpDvM7AjWN7cr2LU2uWdsnENLJXlTs,8963
|
|
743
750
|
helm/clients/clip_score_client.py,sha256=ct3GHZ2Zh3fGwyvQ9DyoIPT6PwDPI-nUaFkUFuc8PIE,1622
|
|
744
|
-
helm/clients/cohere_client.py,sha256=
|
|
751
|
+
helm/clients/cohere_client.py,sha256=HQ8MeQKZVa-A3zZhpGK3sGzxJ8uE7EsJWCINzZus-kI,11082
|
|
745
752
|
helm/clients/cohere_utils.py,sha256=aYmj60m0e9RF9BIdxp1vmA-uZv17TEALw0dbgTUSpCc,504
|
|
746
753
|
helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,3231
|
|
747
|
-
helm/clients/google_client.py,sha256=
|
|
754
|
+
helm/clients/google_client.py,sha256=8O-98kt7SLSZ-_bkPMTBdQPsxTb_UgfFrKV8tXidUuU,2993
|
|
748
755
|
helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
|
|
749
756
|
helm/clients/grok_client.py,sha256=SbVB6AduTwfElzUgEMnQW2kQUFVTCv4TpPPJvElQEe0,1127
|
|
750
|
-
helm/clients/http_model_client.py,sha256=
|
|
751
|
-
helm/clients/huggingface_client.py,sha256=
|
|
757
|
+
helm/clients/http_model_client.py,sha256=rDCzfugWoZegOSt-ZimKePYs4YA3SZ9CJ5Xuycjuccc,2899
|
|
758
|
+
helm/clients/huggingface_client.py,sha256=a4QV2h-voZ5MC0agrM4AibLdVUbyNWp1Pk4XUFMVc04,17709
|
|
752
759
|
helm/clients/huggingface_pipeline_client.py,sha256=ivFTMNHBwwIUjkeOHkl-veZi5nNAjtnkYvneRFWs-6Q,6154
|
|
753
|
-
helm/clients/ibm_client.py,sha256=
|
|
760
|
+
helm/clients/ibm_client.py,sha256=9pQh0Ho5DN27bHYt6NrUHVdgD-iQyP8m_-OlraYWxok,9765
|
|
754
761
|
helm/clients/lit_gpt_client.py,sha256=pgLfSvusNpdj8F5DVxzQdHxTDRNX4RVt6unegao803U,6229
|
|
755
762
|
helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
|
|
756
|
-
helm/clients/megatron_client.py,sha256=
|
|
757
|
-
helm/clients/mistral_client.py,sha256=
|
|
758
|
-
helm/clients/moderation_api_client.py,sha256=
|
|
763
|
+
helm/clients/megatron_client.py,sha256=Zk80yeDFWSFDy2ILtuOC5hs9ruH-AUDhxZiMWw_IJi8,4188
|
|
764
|
+
helm/clients/mistral_client.py,sha256=Nlh97asTsMSHo7-m1JgbYdqnUjSeQ83spaNeHVNkvzg,8454
|
|
765
|
+
helm/clients/moderation_api_client.py,sha256=1lB875B5F2I32u8j-Q8DoA9CQA5-kMJ6RnPfkOqS6AQ,4800
|
|
759
766
|
helm/clients/nvidia_nim_client.py,sha256=Z1UAqR2jHacIO_QGqQl1JUZ_82JiSPstBOtj6xURmQk,902
|
|
760
767
|
helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
|
|
761
|
-
helm/clients/openai_client.py,sha256=
|
|
762
|
-
helm/clients/openai_responses_client.py,sha256=
|
|
763
|
-
helm/clients/
|
|
768
|
+
helm/clients/openai_client.py,sha256=f_RY84FPNt04vfR7py4iXDr9i6cB7824v9PYwTna-Q0,28650
|
|
769
|
+
helm/clients/openai_responses_client.py,sha256=FhQcOcXNZc5AuDMh1KBD3ZoRdEREy73dIeFBjUg9YDo,8444
|
|
770
|
+
helm/clients/openrouter_client.py,sha256=oK8gXBhBs1y0AriZ9tVp8kx5lSY7gUgQJv-mfywSTfI,980
|
|
771
|
+
helm/clients/palmyra_client.py,sha256=_dUeVY-64C94aJdbgzpGWy6b2AbmIxCG4ZqTSgRRLcg,7128
|
|
764
772
|
helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
|
|
765
|
-
helm/clients/reka_client.py,sha256=
|
|
773
|
+
helm/clients/reka_client.py,sha256=6FNiH7b8ADO8NHS7759rDeIGGbgVFagpDZ7_u_rYgaA,8376
|
|
766
774
|
helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
|
|
767
|
-
helm/clients/stanfordhealthcare_azure_openai_client.py,sha256=
|
|
775
|
+
helm/clients/stanfordhealthcare_azure_openai_client.py,sha256=EhgDbDoDNwTow65jea4dJNqnBn5CjYUl_N1MueeB33g,2057
|
|
768
776
|
helm/clients/stanfordhealthcare_claude_client.py,sha256=ShhbLttPDRa-Pnvr35_2WmVx5s0XpsJMGzu5qhzLoLI,1020
|
|
769
777
|
helm/clients/stanfordhealthcare_google_client.py,sha256=cJK_uH-YBQpBJsltNuiUi0x77bh0eCM5UNBaJQ1zai4,1475
|
|
770
|
-
helm/clients/stanfordhealthcare_http_model_client.py,sha256=
|
|
778
|
+
helm/clients/stanfordhealthcare_http_model_client.py,sha256=2ppahR35twHqxDMb7Dzy2rfjoFVuHZTB05MZJeKOy_8,3128
|
|
771
779
|
helm/clients/stanfordhealthcare_openai_client.py,sha256=Qyl8voGz1hJPqT6g4PunMuN99EYaW8U-NXQQSgJbiiM,2169
|
|
772
780
|
helm/clients/stanfordhealthcare_shc_openai_client.py,sha256=V7K4KZaSjIiE0FkoY4qy6ifJ8pUiNa3vBcWiDsIwXFI,1343
|
|
773
781
|
helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
|
|
774
782
|
helm/clients/test_client.py,sha256=T27UsIPWsbE1JK_8DN_DW9LkEcIGRbgDjio14YOIAb0,3854
|
|
775
783
|
helm/clients/test_huggingface_client.py,sha256=8Shzrf1Pad1UsiUAdeOSqsTPQaay0CrWXmdNeIfrJ2Y,3418
|
|
784
|
+
helm/clients/test_openrouter_client.py,sha256=gCzchJMQZi4kkgtpGe1Ma0xF2nsP1uDevJcqbprZ6RE,2414
|
|
776
785
|
helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
|
|
777
786
|
helm/clients/test_together_client.py,sha256=kyBLu-2i4EJyuJm5ft0yg8W-H1IqmULRXggEbChuxdo,6178
|
|
778
|
-
helm/clients/together_client.py,sha256=
|
|
787
|
+
helm/clients/together_client.py,sha256=Nj1FY1nMN5pYiHOG6lKPinVwgqBCTBT-9kHdgq953KU,25667
|
|
779
788
|
helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
|
|
780
789
|
helm/clients/upstage_client.py,sha256=iSL1G8G3jWSbrpacz4I0l6Lwc5T01fsLR-wZzF39ftM,679
|
|
781
|
-
helm/clients/vertexai_client.py,sha256=
|
|
790
|
+
helm/clients/vertexai_client.py,sha256=Hf8ncfCrpoG8ZLl_TRTX5vdxjf0kyzwIaFR8V-qfSEc,23768
|
|
782
791
|
helm/clients/vllm_client.py,sha256=xmXf35WX2oOZhpQnRxeooXGshENySOHZCUQ1E4pbQbA,2647
|
|
783
792
|
helm/clients/vllm_granite_thinking_client.py,sha256=fds2i8LUG78OJYke1uYdDy6XRFqE3rZgSornFjzu4Sk,2172
|
|
784
|
-
helm/clients/writer_client.py,sha256=
|
|
793
|
+
helm/clients/writer_client.py,sha256=Eyae245YkOcW1yHtVMqLPRRvzddr9IcXZLstr8UO5iw,4552
|
|
785
794
|
helm/clients/yi_client.py,sha256=nC60d2HiUL2W59FTne9tWmZ9bGGY1OvI7Ob3Ng4wSPE,750
|
|
786
795
|
helm/clients/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
787
|
-
helm/clients/audio_language/diva_llama_client.py,sha256=
|
|
788
|
-
helm/clients/audio_language/llama_omni_client.py,sha256=
|
|
789
|
-
helm/clients/audio_language/qwen2_5_omni_client.py,sha256=
|
|
790
|
-
helm/clients/audio_language/qwen2_audiolm_client.py,sha256=
|
|
791
|
-
helm/clients/audio_language/qwen_audiolm_client.py,sha256=
|
|
796
|
+
helm/clients/audio_language/diva_llama_client.py,sha256=NoBeG74AqDLxJXZuyBtdEwXmAprj3cbTjon-_-_C4oY,4945
|
|
797
|
+
helm/clients/audio_language/llama_omni_client.py,sha256=JIOxGd1iCyCP8LPqA5KupwSsXKXb64GYmllZ5UtKgJc,8774
|
|
798
|
+
helm/clients/audio_language/qwen2_5_omni_client.py,sha256=3rWuCKfrvwpRzQoDxWJMhThS0cX1hCg9fCWiyVstXSk,9740
|
|
799
|
+
helm/clients/audio_language/qwen2_audiolm_client.py,sha256=AZKLQY6ipfmpPm2Kg-Ecmm0Nut_Ni6syx1P57XCmYLw,9037
|
|
800
|
+
helm/clients/audio_language/qwen_audiolm_client.py,sha256=j2OQaLnHU5v4IaVZrpzV7D5JFzXYXYHxcLvO1rnO23E,6394
|
|
792
801
|
helm/clients/audio_language/test.py,sha256=FrKpirOwJW1__E2egq4VPgsTrgiSHZHBwfUCvxNjC0o,1969
|
|
802
|
+
helm/clients/audio_language/llama_omni/arguments.py,sha256=MxzZKE8sNsOe5eUse96gejOsmu_MfTJGiuOwR87xiSA,2334
|
|
803
|
+
helm/clients/audio_language/llama_omni/constants.py,sha256=IjFS9EUI5p1DLtGcX0B1lSxESkxcx5dMbuMkMm1UaSs,183
|
|
804
|
+
helm/clients/audio_language/llama_omni/conversation.py,sha256=SgoMEf1Roi_8ZxiIM6DXwY3ozw0ExOCYdFFX-5rRA0g,6881
|
|
805
|
+
helm/clients/audio_language/llama_omni/preprocess.py,sha256=2-YA6czgO1Zr-C1ChHvqVEfYa8qHhHp6n1Lb1Uw67qg,10764
|
|
806
|
+
helm/clients/audio_language/llama_omni/utils.py,sha256=GycpuTkNEZtMNG2ZTZ7cYVjPEilyC4o2itT9K9kwJFI,7556
|
|
807
|
+
helm/clients/audio_language/llama_omni/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
808
|
+
helm/clients/audio_language/llama_omni/model/builder.py,sha256=-y7amgUyPMEMknVutSSb_W3Zsm09r3K7u08jgEMinYA,3875
|
|
809
|
+
helm/clients/audio_language/llama_omni/model/omni_speech_arch.py,sha256=-Sgo9fEGHRBfkZrR63i3-uXZ19wkqYbGLqAiDqevRr0,11465
|
|
810
|
+
helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py,sha256=CqtEURdHlk6_29iM8WZnsmd7DMrUcnULGD2U2inWIxw,8426
|
|
811
|
+
helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py,sha256=ZmtQY7JT74O4OH78UYSuBnmxq5Hi4-86kEY5-svfU-M,4564
|
|
812
|
+
helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py,sha256=TwSVGfSOA5N82pB2_P4cElN7w_4w2XHBXr9qicluM2w,389
|
|
813
|
+
helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py,sha256=LF8znt1puoExQ87ovtoyc1-pzO9kWNqTu_CvUWr3nBE,965
|
|
814
|
+
helm/clients/audio_language/llama_omni/model/speech_generator/builder.py,sha256=nIjOSYgJTrdnqDvy5jnYjMcHvpOirAyvMNLuUbnL9pY,358
|
|
815
|
+
helm/clients/audio_language/llama_omni/model/speech_generator/generation.py,sha256=Rka7iVephHHj0z0mPPQLfe-3Tt_UsWbTI7VRevs1ek4,30644
|
|
816
|
+
helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py,sha256=mllXYemRl-laMRntRsKtak8SIWEbVfWk0EpxPqs-su0,4612
|
|
817
|
+
helm/clients/audio_language/llama_omni/model/speech_projector/builder.py,sha256=rmzWg4yZIfGpYD7VhfSrRNN7t5U4xNq8TVugq0KLYWc,372
|
|
818
|
+
helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py,sha256=naunMdDZXzK8VpASZJYsY6TwvuxQn3Uw9r_MUouUG5k,950
|
|
819
|
+
helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py,sha256=oIaVRv1KlFYPqbT1nPtATgTcVomfNvtHmxnIZ2wcTC4,19088
|
|
820
|
+
helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py,sha256=s08H7EY_TzHqVk1b6DZv_gI4VVwP_ub_FwF6JJu0z-c,180552
|
|
821
|
+
helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py,sha256=n8by91xA1xTYz8BfsbYAwCL5G0x1FuLhSGDAP33Qyyw,12216
|
|
822
|
+
helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
823
|
+
helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py,sha256=ZZ5I9X_p1-ttDbYsLBxImO_CxbC5LESLI8AAIe9kKv0,365
|
|
824
|
+
helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py,sha256=VKATc5W9kl0fo9TuU19MaXYSObGxX2V2Fo1NlD4GC4I,2516
|
|
825
|
+
helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py,sha256=TFvQvPiP0X8Zt-agQR84o75LUZp0uXDZAUqUl0vhPM8,14635
|
|
793
826
|
helm/clients/clip_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
794
827
|
helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgjbZLmy9dRnBaLiWwk,695
|
|
795
828
|
helm/clients/clip_scorers/clip_scorer.py,sha256=5KzYTrGuy5zA8yHX6c67Is98HLkqQooWhioPxHNLJ7s,1932
|
|
796
829
|
helm/clients/clip_scorers/multilingual_clip_scorer.py,sha256=LgV1hN6y2FiFQ30UakxRmlwtLs_LCMxrOCewriN1nkk,2066
|
|
797
830
|
helm/clients/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
798
|
-
helm/clients/image_generation/adobe_vision_client.py,sha256=
|
|
799
|
-
helm/clients/image_generation/aleph_alpha_image_generation_client.py,sha256=
|
|
800
|
-
helm/clients/image_generation/cogview2_client.py,sha256=
|
|
801
|
-
helm/clients/image_generation/dalle2_client.py,sha256=
|
|
831
|
+
helm/clients/image_generation/adobe_vision_client.py,sha256=x8nOWO4oQLy8vp_iiZN0dAKQz2QxEfcDKFgSETH9hHQ,2973
|
|
832
|
+
helm/clients/image_generation/aleph_alpha_image_generation_client.py,sha256=91--D-nax3rzcfBYdinMxtH0xa0uwWZs_4jH_HgOet8,4109
|
|
833
|
+
helm/clients/image_generation/cogview2_client.py,sha256=hRNkJjw9DbqEioiA1PKtg5-GX5zqidSabw-M5lvr57U,8493
|
|
834
|
+
helm/clients/image_generation/dalle2_client.py,sha256=58JACUUO0d9EETqyM7k2eA-YsnmNFhhqtVrAlExrNq4,8515
|
|
802
835
|
helm/clients/image_generation/dalle3_client.py,sha256=sabS7AJ6O5ewmTkGmHr4cK14tlMcmh-xrGgj7J-xa3k,4639
|
|
803
|
-
helm/clients/image_generation/dalle_mini_client.py,sha256=
|
|
804
|
-
helm/clients/image_generation/deep_floyd_client.py,sha256=
|
|
805
|
-
helm/clients/image_generation/huggingface_diffusers_client.py,sha256=
|
|
836
|
+
helm/clients/image_generation/dalle_mini_client.py,sha256=axO4mmBZQ22juEwqYFdiFBtH6cbqweXbwmLKy5d-03Y,8213
|
|
837
|
+
helm/clients/image_generation/deep_floyd_client.py,sha256=scEifSsu2fRD08rHzHhSBjHRbaYnKDSC_Z8I2VQXO3E,3109
|
|
838
|
+
helm/clients/image_generation/huggingface_diffusers_client.py,sha256=atj0YBQYHgrTzCkHFZVhNC2SXdgvWEc0Yg-62475xQo,12352
|
|
806
839
|
helm/clients/image_generation/image_generation_client_utils.py,sha256=N130PbHLLvE9Q1iVefPvTCJzs3hG3osZCeYdJyjLjCw,437
|
|
807
|
-
helm/clients/image_generation/lexica_client.py,sha256=
|
|
808
|
-
helm/clients/image_generation/mindalle_client.py,sha256=
|
|
840
|
+
helm/clients/image_generation/lexica_client.py,sha256=7uM9Zq5JXbsjriJyYnVA_S6_3xCKKyGw-lMZAKtfENo,3762
|
|
841
|
+
helm/clients/image_generation/mindalle_client.py,sha256=6YWzCjyV5ELRvmIiq-WjHO-rVdOulcC9PH7ughy-H8s,4692
|
|
809
842
|
helm/clients/image_generation/nudity_check_client.py,sha256=TeFga6HvBKgdX7LitBoioXUD4BQGavVwzr5BFFE29x8,2599
|
|
810
|
-
helm/clients/image_generation/together_image_generation_client.py,sha256=
|
|
843
|
+
helm/clients/image_generation/together_image_generation_client.py,sha256=MkVwuK9iTGjMjtnqv3RFJpIm9_RHXb6Ys4te_WRfkO0,4474
|
|
811
844
|
helm/clients/image_generation/cogview2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
812
845
|
helm/clients/image_generation/cogview2/coglm_strategy.py,sha256=P3NU3Z4jsj171PrHPtGDiCRq05kEh-KHjSTgxPDw6R8,3766
|
|
813
846
|
helm/clients/image_generation/cogview2/coglm_utils.py,sha256=EJPOEQJInCDVi2LHqkjEUsgw6GgVlLDrIptlT9cXk-Y,2900
|
|
814
847
|
helm/clients/image_generation/cogview2/sr_pipeline/__init__.py,sha256=qWuNwKlcvGwEFcw5932wk_t0_baNwUILIJzQWJjgh2A,488
|
|
815
848
|
helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py,sha256=1DwcUw9Tb563JpKpkPNIB5Ew1djozvPiGASShffiABk,3716
|
|
816
|
-
helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=
|
|
849
|
+
helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=IUTvHpIaaYrH00CQZZX9L45JMRb-twYir99K7LLnOzQ,10819
|
|
817
850
|
helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py,sha256=OonYjdtNKJo12cNb-t-gFHLXRFxItCXjKgS9YxWAI-k,7718
|
|
818
851
|
helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py,sha256=LSvAHRupsOqk3yb4GxyTsubRxrnPOEfObFym2j4eiKc,5120
|
|
819
852
|
helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py,sha256=5D1QWyAcY0CpwITk7EBN6ylUtc7mvZaE9iHG628AqMQ,10390
|
|
@@ -837,7 +870,7 @@ helm/clients/image_generation/mindalle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
|
|
|
837
870
|
helm/clients/image_generation/mindalle/models/__init__.py,sha256=1UieFJ0LGinYSB-idy3atl-gFAmS_ouiiGX6TM2Mh-I,8372
|
|
838
871
|
helm/clients/image_generation/mindalle/models/tokenizer.py,sha256=NFFdLUhoxEkv9SZqU3QIFk0ukaCcn6w_xFWQIRGhZJ4,1190
|
|
839
872
|
helm/clients/image_generation/mindalle/models/stage1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
840
|
-
helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=
|
|
873
|
+
helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=Q-yZeB8ZIxwOdQaKpEeBVbwF9nXeQJ2xJhiD6KjqRi4,11046
|
|
841
874
|
helm/clients/image_generation/mindalle/models/stage1/vqgan.py,sha256=KcarvKoMuPBpP0H8F8W67FogdvHaAQuo9jP3rFRxc5E,4035
|
|
842
875
|
helm/clients/image_generation/mindalle/models/stage2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
843
876
|
helm/clients/image_generation/mindalle/models/stage2/layers.py,sha256=LvDADun5nMaencaRT0pm-dq78xHpPPkpi8rlu7RLHco,5306
|
|
@@ -847,14 +880,14 @@ helm/clients/image_generation/mindalle/utils/config.py,sha256=lh8dXvL7ctKmuYEbeT
|
|
|
847
880
|
helm/clients/image_generation/mindalle/utils/sampling.py,sha256=soTHaJrN4FV1lDdh9HMveJs6F49UMK57Xfa0ccnHqI8,5029
|
|
848
881
|
helm/clients/image_generation/mindalle/utils/utils.py,sha256=ESugpzG-_73GKl07mj-8o-_nim_FOICxfYkczy3s9x4,3119
|
|
849
882
|
helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
850
|
-
helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=
|
|
851
|
-
helm/clients/vision_language/huggingface_vlm_client.py,sha256=
|
|
852
|
-
helm/clients/vision_language/idefics_client.py,sha256=
|
|
853
|
-
helm/clients/vision_language/open_flamingo_client.py,sha256=
|
|
854
|
-
helm/clients/vision_language/paligemma_client.py,sha256=
|
|
855
|
-
helm/clients/vision_language/palmyra_vision_client.py,sha256=
|
|
856
|
-
helm/clients/vision_language/qwen2_vlm_client.py,sha256=
|
|
857
|
-
helm/clients/vision_language/qwen_vlm_client.py,sha256=
|
|
883
|
+
helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=dBf-tQJSwjHjZ3-eOaf2xfpltMRSWfczNLh7_OOPwVw,6640
|
|
884
|
+
helm/clients/vision_language/huggingface_vlm_client.py,sha256=3qQ4Ks2M-CWWn3h-Kl_xIO8Dyd_2Bc9wvvNkufyfMsA,5081
|
|
885
|
+
helm/clients/vision_language/idefics_client.py,sha256=7TNV7JTXeD6A5SZf9CtM7ugnd910B0-fFYhKxu2dA0Q,7836
|
|
886
|
+
helm/clients/vision_language/open_flamingo_client.py,sha256=w-bUzcSlwn_t_pX16HRFM9Vb3GhY3MhEPVShQKd0dKw,6558
|
|
887
|
+
helm/clients/vision_language/paligemma_client.py,sha256=9SW_QYJm8PDmQpT4iDkbIohbMv0f2-QacJpBRpDknN4,6919
|
|
888
|
+
helm/clients/vision_language/palmyra_vision_client.py,sha256=oUKfD_gJnWYs0l7u6axIt5xIoHwRkEMRK4agq50_JGc,4156
|
|
889
|
+
helm/clients/vision_language/qwen2_vlm_client.py,sha256=t_u36ZjZlORnCxaWsx8q5T2eAzKBW2VTRZAIYTHOf6s,8381
|
|
890
|
+
helm/clients/vision_language/qwen_vlm_client.py,sha256=VVhPVI0Xc2BuHQdWWc89jJTVXyscn62DxyzHpKmbmvs,7621
|
|
858
891
|
helm/clients/vision_language/open_flamingo/__init__.py,sha256=RTxnxjYnTmTZv-608o66_W74qmKLpEO6hx0cxaZaYv8,172
|
|
859
892
|
helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
860
893
|
helm/clients/vision_language/open_flamingo/src/factory.py,sha256=4KRXLV5mOEZ34-Foq2zVgTye3sQD-Buz6NZTSp2X9_A,5790
|
|
@@ -876,7 +909,7 @@ helm/common/critique_request.py,sha256=DZhJ_sY2IMluOxz-FeHvuEkA2Ujsx65HXT__7T3Ux
|
|
|
876
909
|
helm/common/file_upload_request.py,sha256=OZeAW1_zsiNdXnWDwNNvhPs0b48TUmW_e4kzzCYmyiY,543
|
|
877
910
|
helm/common/general.py,sha256=TcdPXn_bgPFvXtFP2lJhncz4Q8SdTXnKOinHOTBsegw,12027
|
|
878
911
|
helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
|
|
879
|
-
helm/common/hierarchical_logger.py,sha256=
|
|
912
|
+
helm/common/hierarchical_logger.py,sha256=iGVHqCSOlVijjPNvzQDHOdxP8-2ll2PGA2Y5n-u4_sQ,6827
|
|
880
913
|
helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
|
|
881
914
|
helm/common/images_utils.py,sha256=8BsN0fd8pc0rh_TSDvippWhTfwmJJXKNF2zqKLB8cps,3372
|
|
882
915
|
helm/common/key_value_store.py,sha256=D9ZBORzZncf3zHQOP4AuNbQnV8cZpO_kqHY1mDRugqQ,3174
|
|
@@ -887,7 +920,7 @@ helm/common/mongo_key_value_store.py,sha256=G0TIWQcvwMjyXh4TnN6xJ462HKHUAZtQJJYQ
|
|
|
887
920
|
helm/common/multimodal_request_utils.py,sha256=n6HgTyHNqfGmU9qmVK-wxQzrkPZ5Wdh-lO_y_ln6VYc,2184
|
|
888
921
|
helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
|
|
889
922
|
helm/common/object_spec.py,sha256=sKcEdggqRa3a8TovHAS4lf1LaahOFInvMl5DUF4tE6c,5186
|
|
890
|
-
helm/common/optional_dependencies.py,sha256=
|
|
923
|
+
helm/common/optional_dependencies.py,sha256=mM5qeuTq6-BiNJPjAsq29olq8_5TOVF-FIK0EeM25Po,618
|
|
891
924
|
helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
|
|
892
925
|
helm/common/reeval_parameters.py,sha256=exaEucXnSI8a076uq_qhO3CTBztMMRoRzL_7v1N4adE,300
|
|
893
926
|
helm/common/remote_context.py,sha256=DzFMii9AN03CoWp1J3k703-7oQJYHwEf9TDV5YzM6v4,2825
|
|
@@ -895,7 +928,7 @@ helm/common/request.py,sha256=HWj6IizIwJm9_NigO-geira_rI6aqhj5CevQB694m94,9161
|
|
|
895
928
|
helm/common/response_format.py,sha256=wIptA8FydZoRjMvO5SFIplgDXhwpZvZmFI-Bi-7mcGU,516
|
|
896
929
|
helm/common/test_cache.py,sha256=j19p-qzv_98X_TMW4b39ZHwSJ-MX3p91PrkYumarS6Y,4870
|
|
897
930
|
helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
|
|
898
|
-
helm/common/test_general.py,sha256=
|
|
931
|
+
helm/common/test_general.py,sha256=ZPuRRkMG0gA95GOVxfd4xvtSV-1T09rBj3Huwi72-Ks,1909
|
|
899
932
|
helm/common/test_logging.py,sha256=tkb_QDPkKBfaEQ5Y8Xip9PgMYhqOFakcENqyzO5Mj2o,2681
|
|
900
933
|
helm/common/test_media_object.py,sha256=SUWLfms_vkXNivRYM0ZT8AI3_2ru6GON5l-Hb-lk-t0,1661
|
|
901
934
|
helm/common/tokenization_request.py,sha256=NND9ESiiDE0H8QRNpfHVjXS7MQfKKIwtVRKDIjPnnJM,3344
|
|
@@ -904,16 +937,16 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
|
|
|
904
937
|
helm/common/file_caches/local_file_cache.py,sha256=NiXbat1BBGl5P27oERqSLFfhIHpYqA1IQrvE_N1sWR8,1944
|
|
905
938
|
helm/common/file_caches/test_local_file_cache.py,sha256=ANb01ctUV-J4i1ab3l4uhg9Ce54U_56xq9Hayjt1WhQ,686
|
|
906
939
|
helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
907
|
-
helm/config/model_deployments.yaml,sha256=
|
|
908
|
-
helm/config/model_metadata.yaml,sha256=
|
|
909
|
-
helm/config/tokenizer_configs.yaml,sha256=
|
|
940
|
+
helm/config/model_deployments.yaml,sha256=DNY-6M7CVA0SSvS4nMNpK7zLAPhHDDR4NHimRrFrQjA,182596
|
|
941
|
+
helm/config/model_metadata.yaml,sha256=2IGQlmx6GWHveVjfSsSAn8WMdDLji5OD7d7BK4cDG7o,291347
|
|
942
|
+
helm/config/tokenizer_configs.yaml,sha256=TUuOUkORUVnpiHsPFO9BPcKTM3WPCaFcN8IzIBFsvkg,43555
|
|
910
943
|
helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
911
944
|
helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
|
|
912
945
|
helm/proxy/cli.py,sha256=kEDoHpisFO0EJ0Wfm1FLpJdP9sXk9j8WCILEq42RKb0,8317
|
|
913
|
-
helm/proxy/example_queries.py,sha256=
|
|
946
|
+
helm/proxy/example_queries.py,sha256=A4JKvLwkHQIprsgMFhGvruW1-Ud4YKNqwUWhv6iWfzw,4449
|
|
914
947
|
helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
|
|
915
948
|
helm/proxy/retry.py,sha256=o64BZsW2vwu2iewRA18wdsru2xC3eNBQ7WUw3IjC_5g,3698
|
|
916
|
-
helm/proxy/server.py,sha256=
|
|
949
|
+
helm/proxy/server.py,sha256=PYG8oMb-lq8eGR3Kad2ZTudJxgY4QH4jVbyoOgjes7I,10904
|
|
917
950
|
helm/proxy/test_accounts.py,sha256=Vs1iOzTPN29LosDAAEs6IagQ3PccvutrJTlR1qNIcj0,1146
|
|
918
951
|
helm/proxy/test_retry.py,sha256=db0owyGTThmIMhYWU_Eh1U-AJvQ-Wa9j_kRmC9DNjOA,1059
|
|
919
952
|
helm/proxy/critique/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -933,9 +966,9 @@ helm/proxy/services/test_remote_service.py,sha256=xzkyptctXw3y5d1fgbidBMyw8B4rIL
|
|
|
933
966
|
helm/proxy/services/test_service.py,sha256=oDYen-71iwZ6YMNBVbVSdEFsH6GMvZYw5tS5Eg4YHjY,8987
|
|
934
967
|
helm/proxy/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
|
|
935
968
|
helm/proxy/static/help.html,sha256=2Rn_lGZspqrZhNfLQ4wIAvYO_BK9q67Q_AS2-3WsMpY,6231
|
|
936
|
-
helm/proxy/static/index.css,sha256=
|
|
969
|
+
helm/proxy/static/index.css,sha256=3z_JuWVuJFngWtHI4T5-EVyk4LyaCPDcSzlalvUYhmQ,754
|
|
937
970
|
helm/proxy/static/index.html,sha256=nUJf_hwBPokqrm_hDZsVfHcJrnhZLYhkVSoLdGOocf8,2009
|
|
938
|
-
helm/proxy/static/index.js,sha256
|
|
971
|
+
helm/proxy/static/index.js,sha256=bCjx29j88UnfoeYL4jRYGaqg7fd6o8IePZ0sTl-HRy8,15292
|
|
939
972
|
helm/proxy/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
|
|
940
973
|
helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
941
974
|
helm/proxy/token_counters/auto_token_counter.py,sha256=Ag368Sb-eLQUMLW7lmWc2EOKN3kgkiCTsYnHNrsf9kw,2071
|
|
@@ -967,8 +1000,8 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
|
|
|
967
1000
|
helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=1ZcPL3srfk031LmA8bEdPcIraAPnHGiYi_CqTiJSTlc,904
|
|
968
1001
|
helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
|
|
969
1002
|
helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
|
|
970
|
-
crfm_helm-0.5.
|
|
971
|
-
crfm_helm-0.5.
|
|
972
|
-
crfm_helm-0.5.
|
|
973
|
-
crfm_helm-0.5.
|
|
974
|
-
crfm_helm-0.5.
|
|
1003
|
+
crfm_helm-0.5.9.dist-info/METADATA,sha256=LuiU5r_-9KfusWA04IwPffmA5KfGYXwZvRR-noA20Ns,18617
|
|
1004
|
+
crfm_helm-0.5.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1005
|
+
crfm_helm-0.5.9.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
|
|
1006
|
+
crfm_helm-0.5.9.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
|
|
1007
|
+
crfm_helm-0.5.9.dist-info/RECORD,,
|