crfm-helm 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- crfm_helm-0.5.6.dist-info/METADATA +427 -0
- crfm_helm-0.5.6.dist-info/RECORD +941 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +13 -1
- helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
- helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +4 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/test_adapter.py +4 -4
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/common_adapter_specs.py +69 -4
- helm/benchmark/adaptation/prompt.py +1 -1
- helm/benchmark/annotation/aci_bench_annotator.py +95 -0
- helm/benchmark/annotation/air_bench_annotator.py +21 -6
- helm/benchmark/annotation/annotator.py +5 -0
- helm/benchmark/annotation/annotator_factory.py +3 -20
- helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
- helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
- helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
- helm/benchmark/annotation/bird_sql_annotator.py +58 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +93 -0
- helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
- helm/benchmark/annotation/dischargeme_annotator.py +107 -0
- helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
- helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
- helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
- helm/benchmark/annotation/live_qa_annotator.py +1 -1
- helm/benchmark/annotation/med_dialog_annotator.py +99 -0
- helm/benchmark/annotation/medalign_annotator.py +100 -0
- helm/benchmark/annotation/medi_qa_annotator.py +98 -0
- helm/benchmark/annotation/medication_qa_annotator.py +87 -63
- helm/benchmark/annotation/mental_health_annotator.py +98 -0
- helm/benchmark/annotation/mimic_bhc_annotator.py +100 -0
- helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
- helm/benchmark/annotation/model_as_judge.py +214 -6
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
- helm/benchmark/annotation/omni_math_annotator.py +131 -0
- helm/benchmark/annotation/spider_annotator.py +18 -0
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
- helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
- helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
- helm/benchmark/annotation/wildbench_annotator.py +119 -0
- helm/benchmark/annotation_executor.py +35 -15
- helm/benchmark/augmentations/cleva_perturbation.py +9 -8
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
- helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
- helm/benchmark/augmentations/dialect_perturbation.py +4 -5
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +2 -2
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +4 -5
- helm/benchmark/augmentations/perturbation.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +2 -2
- helm/benchmark/augmentations/synonym_perturbation.py +4 -3
- helm/benchmark/augmentations/test_perturbation.py +16 -13
- helm/benchmark/augmentations/translate_perturbation.py +2 -2
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/data_preprocessor.py +2 -2
- helm/benchmark/executor.py +11 -12
- helm/benchmark/huggingface_registration.py +2 -7
- helm/benchmark/metrics/aci_bench_metrics.py +14 -0
- helm/benchmark/metrics/basic_metrics.py +6 -6
- helm/benchmark/metrics/bbq_metrics.py +2 -2
- helm/benchmark/metrics/bias_metrics.py +12 -3
- helm/benchmark/metrics/bias_word_lists.py +1 -1
- helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
- helm/benchmark/metrics/bird_sql_metrics.py +28 -0
- helm/benchmark/metrics/chw_care_plan_metrics.py +14 -0
- helm/benchmark/metrics/classification_metrics.py +76 -12
- helm/benchmark/metrics/cleva_harms_metrics.py +10 -9
- helm/benchmark/metrics/code_metrics.py +5 -5
- helm/benchmark/metrics/comet_metric.py +125 -0
- helm/benchmark/metrics/common_metric_specs.py +9 -2
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
- helm/benchmark/metrics/copyright_metrics.py +4 -4
- helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
- helm/benchmark/metrics/dischargeme_metrics.py +14 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -4
- helm/benchmark/metrics/dry_run_metrics.py +5 -5
- helm/benchmark/metrics/efficiency_metrics.py +6 -6
- helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
- helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
- helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
- helm/benchmark/metrics/gpt4_audio_refusal_metrics.py +145 -0
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
- helm/benchmark/metrics/ifeval/__init__.py +0 -0
- helm/benchmark/metrics/ifeval/instructions.py +1574 -0
- helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
- helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
- helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
- helm/benchmark/metrics/ifeval_metrics.py +55 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
- helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
- helm/benchmark/metrics/kpi_edgar_metrics.py +121 -0
- helm/benchmark/metrics/language_modeling_metrics.py +4 -4
- helm/benchmark/metrics/llm_jury_metrics.py +46 -0
- helm/benchmark/metrics/machine_translation_metrics.py +2 -2
- helm/benchmark/metrics/med_dialog_metrics.py +14 -0
- helm/benchmark/metrics/medalign_metrics.py +14 -0
- helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
- helm/benchmark/metrics/medec_metrics.py +101 -0
- helm/benchmark/metrics/medi_qa_metrics.py +14 -0
- helm/benchmark/metrics/medication_qa_metrics.py +10 -19
- helm/benchmark/metrics/melt_bias_metric.py +234 -0
- helm/benchmark/metrics/melt_bias_word_lists.py +1367 -0
- helm/benchmark/metrics/melt_metric_specs.py +43 -0
- helm/benchmark/metrics/melt_toxicity_metric.py +107 -0
- helm/benchmark/metrics/mental_health_metrics.py +14 -0
- helm/benchmark/metrics/metric.py +3 -3
- helm/benchmark/metrics/metric_service.py +11 -11
- helm/benchmark/metrics/mimic_bhc_metrics.py +14 -0
- helm/benchmark/metrics/mimic_rrs_metrics.py +14 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +14 -0
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +14 -0
- helm/benchmark/metrics/nltk_helper.py +32 -0
- helm/benchmark/metrics/numeracy_metrics.py +4 -4
- helm/benchmark/metrics/omni_math_metrics.py +32 -0
- helm/benchmark/metrics/openai_mrcr_metrics.py +52 -0
- helm/benchmark/metrics/output_processing_metric.py +60 -0
- helm/benchmark/metrics/output_processors.py +15 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
- helm/benchmark/metrics/ranking_metrics.py +3 -3
- helm/benchmark/metrics/reference_metric.py +3 -3
- helm/benchmark/metrics/ruler_qa_metrics.py +34 -0
- helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
- helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
- helm/benchmark/metrics/spider_metrics.py +7 -0
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +14 -0
- helm/benchmark/metrics/statistic.py +1 -1
- helm/benchmark/metrics/summac/model_summac.py +2 -3
- helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
- helm/benchmark/metrics/summarization_metrics.py +20 -9
- helm/benchmark/metrics/test_bias_metrics.py +5 -1
- helm/benchmark/metrics/test_classification_metrics.py +140 -68
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
- helm/benchmark/metrics/test_metric.py +1 -1
- helm/benchmark/metrics/test_statistic.py +2 -2
- helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
- helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +3 -3
- helm/benchmark/metrics/toxicity_metrics.py +6 -6
- helm/benchmark/metrics/unitxt_metrics.py +7 -5
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -2
- helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
- helm/benchmark/metrics/vision_language/image_utils.py +2 -2
- helm/benchmark/metrics/wildbench_metrics.py +34 -0
- helm/benchmark/model_deployment_registry.py +6 -8
- helm/benchmark/model_metadata_registry.py +16 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +33 -12
- helm/benchmark/presentation/run_display.py +13 -0
- helm/benchmark/presentation/schema.py +2 -1
- helm/benchmark/presentation/summarize.py +97 -67
- helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
- helm/benchmark/reeval_run.py +202 -0
- helm/benchmark/reeval_runner.py +355 -0
- helm/benchmark/run.py +86 -90
- helm/benchmark/run_expander.py +90 -9
- helm/benchmark/run_spec_factory.py +13 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
- helm/benchmark/run_specs/audio_run_specs.py +657 -0
- helm/benchmark/run_specs/call_center_run_specs.py +49 -0
- helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
- helm/benchmark/run_specs/classic_run_specs.py +1 -69
- helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
- helm/benchmark/run_specs/enterprise_run_specs.py +280 -0
- helm/benchmark/run_specs/experimental_run_specs.py +142 -3
- helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
- helm/benchmark/run_specs/lite_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +141 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +1260 -0
- helm/benchmark/run_specs/melt_run_specs.py +783 -0
- helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
- helm/benchmark/run_specs/oab_exams_specs.py +32 -0
- helm/benchmark/run_specs/safety_run_specs.py +37 -0
- helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +44 -44
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +169 -0
- helm/benchmark/run_specs/sql_run_specs.py +54 -0
- helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
- helm/benchmark/run_specs/vlm_run_specs.py +103 -2
- helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
- helm/benchmark/runner.py +5 -5
- helm/benchmark/scenarios/aci_bench_scenario.py +126 -0
- helm/benchmark/scenarios/air_bench_scenario.py +6 -1
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/__init__.py +0 -0
- helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +130 -0
- helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
- helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
- helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
- helm/benchmark/scenarios/audio_language/corebench_scenario.py +77 -0
- helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
- helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
- helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
- helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
- helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
- helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
- helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py +103 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +110 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +78 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +109 -0
- helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
- helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +105 -0
- helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
- helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
- helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
- helm/benchmark/scenarios/banking77_scenario.py +6 -1
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/big_bench_scenario.py +11 -1
- helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
- helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
- helm/benchmark/scenarios/blimp_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +1 -1
- helm/benchmark/scenarios/boolq_scenario.py +1 -1
- helm/benchmark/scenarios/casehold_scenario.py +79 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +106 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
- helm/benchmark/scenarios/clear_scenario.py +157 -0
- helm/benchmark/scenarios/cleva_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +17 -4
- helm/benchmark/scenarios/commonsense_scenario.py +1 -1
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
- helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
- helm/benchmark/scenarios/dischargeme_scenario.py +172 -0
- helm/benchmark/scenarios/disinformation_scenario.py +10 -1
- helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
- helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
- helm/benchmark/scenarios/ehr_sql_scenario.py +137 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +1519 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
- helm/benchmark/scenarios/gpqa_scenario.py +80 -0
- helm/benchmark/scenarios/grammar.py +2 -2
- helm/benchmark/scenarios/grammar_scenario.py +2 -2
- helm/benchmark/scenarios/gsm_scenario.py +10 -1
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
- helm/benchmark/scenarios/headqa_scenario.py +136 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
- helm/benchmark/scenarios/ice_scenario.py +8 -4
- helm/benchmark/scenarios/ifeval_scenario.py +53 -0
- helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
- helm/benchmark/scenarios/imdb_scenario.py +11 -2
- helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +85 -0
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +79 -0
- helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
- helm/benchmark/scenarios/koala_scenario.py +1 -1
- helm/benchmark/scenarios/kpi_edgar_scenario.py +151 -0
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
- helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
- helm/benchmark/scenarios/legal_support_scenario.py +11 -1
- helm/benchmark/scenarios/legalbench_scenario.py +22 -3
- helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
- helm/benchmark/scenarios/lextreme_scenario.py +11 -1
- helm/benchmark/scenarios/live_qa_scenario.py +1 -1
- helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
- helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
- helm/benchmark/scenarios/math_scenario.py +9 -1
- helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
- helm/benchmark/scenarios/med_dialog_scenario.py +25 -22
- helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
- helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
- helm/benchmark/scenarios/med_qa_scenario.py +10 -1
- helm/benchmark/scenarios/medalign_scenario.py +94 -0
- helm/benchmark/scenarios/medalign_scenario_helper.py +432 -0
- helm/benchmark/scenarios/medbullets_scenario.py +145 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +127 -0
- helm/benchmark/scenarios/medec_scenario.py +125 -0
- helm/benchmark/scenarios/medhallu_scenario.py +72 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +111 -0
- helm/benchmark/scenarios/medication_qa_scenario.py +8 -2
- helm/benchmark/scenarios/melt_ir_scenario.py +171 -0
- helm/benchmark/scenarios/melt_knowledge_scenario.py +246 -0
- helm/benchmark/scenarios/melt_lm_scenarios.py +252 -0
- helm/benchmark/scenarios/melt_scenarios.py +793 -0
- helm/benchmark/scenarios/melt_srn_scenario.py +342 -0
- helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py +222 -0
- helm/benchmark/scenarios/melt_translation_scenario.py +152 -0
- helm/benchmark/scenarios/mental_health_scenario.py +123 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +103 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +98 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +77 -0
- helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
- helm/benchmark/scenarios/mmlu_scenario.py +11 -1
- helm/benchmark/scenarios/msmarco_scenario.py +1 -1
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +144 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +142 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +277 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
- helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
- helm/benchmark/scenarios/newsqa_scenario.py +1 -1
- helm/benchmark/scenarios/numeracy_scenario.py +12 -2
- helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
- helm/benchmark/scenarios/omni_math_scenario.py +53 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
- helm/benchmark/scenarios/openai_mrcr_scenario.py +79 -0
- helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
- helm/benchmark/scenarios/pubmed_qa_scenario.py +59 -43
- helm/benchmark/scenarios/quac_scenario.py +10 -1
- helm/benchmark/scenarios/race_based_med_scenario.py +152 -0
- helm/benchmark/scenarios/raft_scenario.py +17 -2
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
- helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
- helm/benchmark/scenarios/scenario.py +9 -1
- helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +7 -2
- helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +75 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +75 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +76 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +77 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +74 -0
- helm/benchmark/scenarios/shc_privacy_scenario.py +78 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +76 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +81 -0
- helm/benchmark/scenarios/shc_sei_scenario.py +94 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +77 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
- helm/benchmark/scenarios/spider_scenario.py +91 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +97 -0
- helm/benchmark/scenarios/summarization_scenario.py +11 -1
- helm/benchmark/scenarios/sumosum_scenario.py +157 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
- helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
- helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
- helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
- helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
- helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
- helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
- helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py +31 -0
- helm/benchmark/scenarios/test_math_scenario.py +1 -0
- helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
- helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
- helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
- helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
- helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
- helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
- helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +12 -2
- helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
- helm/benchmark/scenarios/unitxt_scenario.py +8 -2
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
- helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
- helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py +75 -0
- helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
- helm/benchmark/scenarios/wikifact_scenario.py +11 -1
- helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
- helm/benchmark/scenarios/wildbench_scenario.py +83 -0
- helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
- helm/benchmark/scenarios/xstest_scenario.py +1 -1
- helm/benchmark/server.py +13 -1
- helm/benchmark/slurm_runner.py +1 -1
- helm/benchmark/static/schema_audio.yaml +763 -0
- helm/benchmark/static/schema_autobencher.yaml +150 -0
- helm/benchmark/static/schema_call_center.yaml +97 -60
- helm/benchmark/static/{schema_medical.yaml → schema_capabilities.yaml} +100 -101
- helm/benchmark/static/schema_czech_bank.yaml +148 -0
- helm/benchmark/static/schema_enem_challenge.yaml +146 -0
- helm/benchmark/static/schema_enterprise.yaml +319 -0
- helm/benchmark/static/schema_finance.yaml +14 -12
- helm/benchmark/static/schema_heim.yaml +1389 -0
- helm/benchmark/static/schema_long_context.yaml +283 -0
- helm/benchmark/static/schema_medhelm.yaml +1140 -0
- helm/benchmark/static/schema_melt.yaml +1257 -0
- helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
- helm/benchmark/static/schema_safety.yaml +18 -1
- helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +30 -16
- helm/benchmark/static/schema_slphelm.yaml +162 -0
- helm/benchmark/static/schema_social_audio.yaml +224 -0
- helm/benchmark/static/schema_sql.yaml +171 -0
- helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +169 -36
- helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
- helm/benchmark/static/schema_vhelm.yaml +129 -56
- helm/benchmark/static/schema_video.yaml +219 -0
- helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
- helm/benchmark/static_build/assets/index-94295e78.js +10 -0
- helm/benchmark/static_build/assets/index-b9779128.css +1 -0
- helm/benchmark/static_build/assets/medhelm-overview-eac29843.png +0 -0
- helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png +0 -0
- helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
- helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
- helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-38a10867.js} +2 -2
- helm/benchmark/static_build/config.js +1 -1
- helm/benchmark/static_build/index.html +6 -6
- helm/benchmark/window_services/default_window_service.py +1 -1
- helm/benchmark/window_services/encoder_decoder_window_service.py +4 -4
- helm/benchmark/window_services/ice_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
- helm/benchmark/window_services/local_window_service.py +2 -2
- helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
- helm/benchmark/window_services/test_bloom_window_service.py +3 -3
- helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
- helm/benchmark/window_services/test_gptj_window_service.py +8 -3
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
- helm/benchmark/window_services/test_openai_window_service.py +8 -3
- helm/benchmark/window_services/test_opt_window_service.py +3 -3
- helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
- helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
- helm/benchmark/window_services/test_t511b_window_service.py +3 -3
- helm/benchmark/window_services/test_ul2_window_service.py +3 -3
- helm/benchmark/window_services/test_utils.py +4 -5
- helm/benchmark/window_services/test_yalm_window_service.py +3 -3
- helm/benchmark/window_services/tokenizer_service.py +7 -8
- helm/benchmark/window_services/yalm_window_service.py +1 -1
- helm/clients/ai21_client.py +3 -3
- helm/clients/aleph_alpha_client.py +1 -1
- helm/clients/anthropic_client.py +69 -29
- helm/clients/audio_language/__init__.py +0 -0
- helm/clients/audio_language/diva_llama_client.py +120 -0
- helm/clients/audio_language/llama_omni_client.py +198 -0
- helm/clients/audio_language/qwen2_5_omni_client.py +197 -0
- helm/clients/audio_language/qwen2_audiolm_client.py +190 -0
- helm/clients/audio_language/qwen_audiolm_client.py +152 -0
- helm/clients/audio_language/test.py +62 -0
- helm/clients/auto_client.py +4 -2
- helm/clients/azure_openai_client.py +55 -0
- helm/clients/bedrock_client.py +203 -7
- helm/clients/bedrock_utils.py +33 -0
- helm/clients/client.py +7 -7
- helm/clients/clip_scorers/clip_scorer.py +1 -1
- helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
- helm/clients/cohere_client.py +3 -3
- helm/clients/google_client.py +1 -1
- helm/clients/grok_client.py +36 -0
- helm/clients/http_model_client.py +1 -1
- helm/clients/huggingface_client.py +52 -21
- helm/clients/huggingface_pipeline_client.py +138 -0
- helm/clients/ibm_client.py +267 -0
- helm/clients/image_generation/adobe_vision_client.py +1 -1
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
- helm/clients/image_generation/cogview2_client.py +1 -1
- helm/clients/image_generation/dalle2_client.py +1 -1
- helm/clients/image_generation/dalle3_client.py +2 -2
- helm/clients/image_generation/dalle_mini/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/data.py +1 -1
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
- helm/clients/image_generation/dalle_mini/model/configuration.py +2 -2
- helm/clients/image_generation/dalle_mini/model/modeling.py +3 -3
- helm/clients/image_generation/dalle_mini/model/processor.py +5 -5
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
- helm/clients/image_generation/dalle_mini_client.py +1 -1
- helm/clients/image_generation/deep_floyd_client.py +1 -1
- helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
- helm/clients/image_generation/lexica_client.py +1 -1
- helm/clients/image_generation/mindalle/models/__init__.py +6 -6
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
- helm/clients/image_generation/mindalle_client.py +1 -1
- helm/clients/image_generation/together_image_generation_client.py +1 -1
- helm/clients/lit_gpt_client.py +2 -2
- helm/clients/mistral_client.py +62 -18
- helm/clients/nvidia_nim_client.py +0 -3
- helm/clients/openai_client.py +308 -43
- helm/clients/openai_responses_client.py +174 -0
- helm/clients/palmyra_client.py +3 -9
- helm/clients/reka_client.py +3 -3
- helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
- helm/clients/stanfordhealthcare_claude_client.py +31 -0
- helm/clients/stanfordhealthcare_google_client.py +43 -0
- helm/clients/stanfordhealthcare_http_model_client.py +93 -0
- helm/clients/stanfordhealthcare_openai_client.py +62 -0
- helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
- helm/clients/test_client.py +1 -1
- helm/clients/test_together_client.py +6 -1
- helm/clients/together_client.py +76 -9
- helm/clients/upstage_client.py +23 -0
- helm/clients/vertexai_client.py +45 -13
- helm/clients/vision_language/huggingface_vision2seq_client.py +6 -4
- helm/clients/vision_language/huggingface_vlm_client.py +2 -2
- helm/clients/vision_language/idefics_client.py +6 -2
- helm/clients/vision_language/open_flamingo/__init__.py +2 -2
- helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
- helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
- helm/clients/vision_language/paligemma_client.py +2 -2
- helm/clients/vision_language/qwen2_vlm_client.py +188 -0
- helm/clients/vision_language/qwen_vlm_client.py +7 -5
- helm/clients/vllm_client.py +4 -6
- helm/clients/writer_client.py +102 -0
- helm/clients/yi_client.py +0 -3
- helm/common/audio_utils.py +111 -0
- helm/common/context.py +80 -0
- helm/common/credentials_utils.py +5 -5
- helm/common/file_caches/local_file_cache.py +1 -1
- helm/common/file_caches/test_local_file_cache.py +1 -1
- helm/common/general.py +9 -2
- helm/common/hierarchical_logger.py +46 -3
- helm/common/images_utils.py +2 -2
- helm/common/local_context.py +140 -0
- helm/common/media_object.py +2 -2
- helm/common/multimodal_request_utils.py +26 -0
- helm/common/reeval_parameters.py +12 -0
- helm/common/remote_context.py +61 -0
- helm/common/request.py +14 -2
- helm/common/response_format.py +18 -0
- helm/common/test_media_object.py +1 -1
- helm/config/model_deployments.yaml +1792 -28
- helm/config/model_metadata.yaml +1606 -51
- helm/config/tokenizer_configs.yaml +521 -4
- helm/proxy/cli.py +5 -3
- helm/proxy/critique/mechanical_turk_utils.py +1 -1
- helm/proxy/example_queries.py +1 -1
- helm/proxy/server.py +11 -4
- helm/proxy/services/remote_service.py +1 -1
- helm/proxy/services/server_service.py +22 -86
- helm/proxy/services/test_remote_service.py +2 -2
- helm/proxy/services/test_service.py +1 -1
- helm/proxy/static/general.js +122 -0
- helm/proxy/static/help.html +99 -0
- helm/proxy/static/index.css +57 -0
- helm/proxy/static/index.html +40 -0
- helm/proxy/static/index.js +456 -0
- helm/proxy/static/info-icon.png +0 -0
- helm/proxy/test_retry.py +1 -1
- helm/proxy/token_counters/auto_token_counter.py +1 -1
- helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
- helm/tokenizers/caching_tokenizer.py +2 -30
- helm/tokenizers/grok_tokenizer.py +53 -0
- helm/tokenizers/http_model_tokenizer.py +1 -1
- helm/tokenizers/huggingface_tokenizer.py +3 -3
- helm/tokenizers/lit_gpt_tokenizer.py +1 -1
- helm/tokenizers/test_anthropic_tokenizer.py +6 -2
- helm/tokenizers/test_grok_tokenizer.py +33 -0
- helm/tokenizers/test_huggingface_tokenizer.py +1 -1
- helm/tokenizers/test_yalm_tokenizer.py +1 -1
- helm/tokenizers/tiktoken_tokenizer.py +1 -1
- helm/tokenizers/tokenizer.py +3 -1
- helm/tokenizers/yalm_tokenizer.py +3 -3
- helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
- crfm_helm-0.5.4.dist-info/METADATA +0 -350
- crfm_helm-0.5.4.dist-info/RECORD +0 -697
- helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
- helm/benchmark/static_build/assets/index-3ee38b3d.js +0 -10
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/tokenizers/anthropic_tokenizer.py +0 -52
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info/licenses}/LICENSE +0 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,941 @@
|
|
|
1
|
+
crfm_helm-0.5.6.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
|
|
2
|
+
helm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
helm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
helm/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
helm/benchmark/annotation_executor.py,sha256=LEehcWmkmqV_bFFzzmdm3GqsObJGCqoAYi1ekwG-yQ4,5757
|
|
6
|
+
helm/benchmark/config_registry.py,sha256=Cd25a8FHriUzAgvGGU5sBAPyhisdSIjdUJR4YbYs6T4,1603
|
|
7
|
+
helm/benchmark/data_preprocessor.py,sha256=wqGzAiLwOYa4v6TVPe6ayrnuzdNbmfjeiofRQiO2uso,2201
|
|
8
|
+
helm/benchmark/executor.py,sha256=E7cF1vMXBn5eT1z5Le5ng4M9AaIMLjxfLgMmF1EfZy0,4843
|
|
9
|
+
helm/benchmark/huggingface_registration.py,sha256=DAiHffNmo4H90rBfvQ_LHADtUCnCk6dfpI7Wbat1DZA,4389
|
|
10
|
+
helm/benchmark/model_deployment_registry.py,sha256=zDpqsgjCvtesRan-z2TQA7G97g14UPgjG0Cbi9owWaY,9472
|
|
11
|
+
helm/benchmark/model_metadata_registry.py,sha256=7XisV0an_edM8hvP8LSoCnTeUN2QLJrQknOCA6-OE7M,8841
|
|
12
|
+
helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
|
|
13
|
+
helm/benchmark/reeval_run.py,sha256=vImL8JNhveEOftZbRQ6JAxF0L-XCKIwh65M6fIYo4RU,7198
|
|
14
|
+
helm/benchmark/reeval_runner.py,sha256=bJPl7XVOVwK2fUA7voOVQYwVFEOfKVnrT2tbSGQzQY8,15584
|
|
15
|
+
helm/benchmark/run.py,sha256=F65P6eG3S6dHDxRK8HMqDFGQjPBGIJouX80ANsHb0Y8,13806
|
|
16
|
+
helm/benchmark/run_expander.py,sha256=hKFLpmq8W2KBl_mBf-ahHEbt67qZFgu-VxjvidOeQuE,56543
|
|
17
|
+
helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
|
|
18
|
+
helm/benchmark/run_spec_factory.py,sha256=Hxeft3fXoWNz9yGo-2nIfb5pd3GDWlwYWc6YYvAkTjM,7785
|
|
19
|
+
helm/benchmark/runner.py,sha256=O-91eRRrNgE4_tlCVeLq9_0QsRfNELvaQT-KWtJw894,14618
|
|
20
|
+
helm/benchmark/runner_config_registry.py,sha256=2gW5wBLkHdYb2WNbZulto06hTcto2ROvjy8HULw3jNM,515
|
|
21
|
+
helm/benchmark/server.py,sha256=uphh9L0FQnVZVVoGx50MMb_jXh-uen6ouE3uDN5GKFE,6422
|
|
22
|
+
helm/benchmark/slurm_jobs.py,sha256=eNCAoaWDfT0Wk32ZJRIGo-x8kgjhDPnPB4Xrvw_eLB0,3225
|
|
23
|
+
helm/benchmark/slurm_runner.py,sha256=RjmwMqMdKwOzd9B2S6fkuSqB2UjybmiSRVjraiLtzgM,16567
|
|
24
|
+
helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5drG-4Y4UhIM,2219
|
|
25
|
+
helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
|
|
26
|
+
helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
|
|
27
|
+
helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
helm/benchmark/adaptation/adapter_spec.py,sha256=WrDOvQoeV5Ciw2bmvtnz6HTCAEfjCHTYgfTZwRZzkN8,5680
|
|
29
|
+
helm/benchmark/adaptation/common_adapter_specs.py,sha256=V8aYhQYuwohzwW0T_IU_ymGlxEwARKIiChLvwLKt-ew,12553
|
|
30
|
+
helm/benchmark/adaptation/prompt.py,sha256=vPCFeKVUwpbnTe0IbphkyAKFkkM0YnEONfvjcb8Hj50,2158
|
|
31
|
+
helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
|
|
32
|
+
helm/benchmark/adaptation/scenario_state.py,sha256=mWEhgzk18SVoMEuj2pSnc_r9JrGAHLdOlteHJKUMA5k,1961
|
|
33
|
+
helm/benchmark/adaptation/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
helm/benchmark/adaptation/adapters/adapter.py,sha256=NbsvNITD_xByHxwknkHS_vWrzvO1peA9T1rjWbz_cy0,1239
|
|
35
|
+
helm/benchmark/adaptation/adapters/adapter_factory.py,sha256=hAcjDUqFg496FNaz6jftcObfoTuX1Z15r3-oKVdiyWw,3629
|
|
36
|
+
helm/benchmark/adaptation/adapters/binary_ranking_adapter.py,sha256=dvwirvz4dRzJHo2VpX1uGA8e9LN6F1Iy_zPkerKzO9A,5816
|
|
37
|
+
helm/benchmark/adaptation/adapters/chat_adapter.py,sha256=1Pf2XgdtrqAxbZPkUfw7TUH2lrulYoDTkC8Q0sckQHA,1852
|
|
38
|
+
helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py,sha256=dhDZANH5lyL5VdR_Ks72cNlP-NHbJqThZVP6xKHmXaE,5034
|
|
39
|
+
helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=LI7uWpKIHvTUjGiygmjB_1HLk26vNkYYCBWIx0EEyL4,2180
|
|
40
|
+
helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=8LepCkI5b0MOL70pRPGb7vEH0KFMxIlpCQIVIzQT_vE,15030
|
|
41
|
+
helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=u_GFEgg5wmpate-s5U5aMsmcHuFmreJcA8J0TO1kPCc,14907
|
|
42
|
+
helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=-fY4mvzoGCCoR0HesT_xf2U2m2arVjgDuj59lm07_tg,1923
|
|
43
|
+
helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=lzmHwvDOHWl9IWC3NTLGfJDbduXtK_zrS2_YoUQmdc8,4464
|
|
44
|
+
helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py,sha256=RV6B3i5juBbJCtPDWzSfma49YXeDq3vQAQ5xQwnH-cA,3282
|
|
45
|
+
helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=hhH9ehK092j1WdUwrKYSy5PvNJ73gsIu6-5W8aLoYVI,2190
|
|
46
|
+
helm/benchmark/adaptation/adapters/test_adapter.py,sha256=7Nr6kMK3JN0UjMjjZ6P1fsD5xhOeaqh0D1xI6LFKCos,641
|
|
47
|
+
helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=Iq5q0HpBHrI3d2SodI0OwQ-COXuM7KvCjlBk_zNguNI,12868
|
|
48
|
+
helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=HASZNtKXYWOOIMKVe16yokWNfCNJITJXoUhDLVkk-FQ,8048
|
|
49
|
+
helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=-412yPKMylDMDXpbG-SlssXEjZlr3dshecrTFZoE-wY,11942
|
|
50
|
+
helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
+
helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=MvE7YdIt8Y0nefXLskY9gPmXp7QWi2b8cqg8fxUpzbM,1980
|
|
52
|
+
helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=KXP9MzDdmUao3uVjPgZYKjZQ_LvGHgZvI-86o3E87xA,6404
|
|
53
|
+
helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py,sha256=jyL61UxBsIr68hUz-jtjBUnyB2HBp5ESNyECGp_Gf6Q,2129
|
|
54
|
+
helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=ftwSOTPugDuw8vh2WaQDJb0tQAeWR7S7qtD4yE_nOt4,4804
|
|
55
|
+
helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py,sha256=mjjyn9p31V-yt6S8BX7SvqvkQ56D9cKSff6d-daM6HM,10250
|
|
56
|
+
helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py,sha256=6nuz0Vn89A1mOedutsiq2SwTOG3qn8dUZTiaXhKffiw,3587
|
|
57
|
+
helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
+
helm/benchmark/annotation/aci_bench_annotator.py,sha256=SjXidlbpm5HOhdhNXg3HjabMEQvt3hq1iJ5GPajxt8M,3228
|
|
59
|
+
helm/benchmark/annotation/air_bench_annotator.py,sha256=Xvqzf-f29dzLGuAMeNiQe_kSkMbXEN1_U1LwCAn6nJQ,3500
|
|
60
|
+
helm/benchmark/annotation/annotator.py,sha256=__BkMVpAEpSs1pbwPK5sVWLdCAXnjsHcPYgmOqmNPu0,1843
|
|
61
|
+
helm/benchmark/annotation/annotator_factory.py,sha256=8uo5uz1UpIVCHUd7CRvmy6b9XB1gspdHmgxH5UZMPVI,2335
|
|
62
|
+
helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=4hob15m2k9e2A97E0aG9FstCbJ_oMM7-9y-nh2EaYqc,2395
|
|
63
|
+
helm/benchmark/annotation/autobencher_capabilities_annotator.py,sha256=TkW3xbcEuaPeGwuFrlu0YNSmj896WarmVT0WYL1it_E,4913
|
|
64
|
+
helm/benchmark/annotation/autobencher_safety_annotator.py,sha256=w_xjZmY1zuLjVvVbcbUygNvqcfn5dtwpXeV99yqm9aU,3914
|
|
65
|
+
helm/benchmark/annotation/bigcodebench_annotator.py,sha256=CJG2pn1DeHJCp3yHETRquNIkCHfd6ZNuOiUjG1cQ_JY,4448
|
|
66
|
+
helm/benchmark/annotation/bird_sql_annotator.py,sha256=FQDZs1-O1jfJOET0eDeU7lf5xLaiMPohC5BdmQ4XkzI,2436
|
|
67
|
+
helm/benchmark/annotation/call_center_annotator.py,sha256=pTEjwfA4tgZhroFbamoQ8IO_D1O9r6k5GIlD50JEg5c,11601
|
|
68
|
+
helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=6ybNBvJi59i0cpAhI_fLwXoSnqhAH6m7Lo6ad_PufBs,2966
|
|
69
|
+
helm/benchmark/annotation/czech_bank_qa_annotator.py,sha256=YIH5g4zHe3BQF2Y-6uRVw7g9u_SPBncqBobdvZdIzyA,3096
|
|
70
|
+
helm/benchmark/annotation/dischargeme_annotator.py,sha256=Z6xnUK1cNrFco9x0w8B_qhlLOEZrzXBwT6TKZPKoPBk,3676
|
|
71
|
+
helm/benchmark/annotation/ehr_sql_annotator.py,sha256=Izpq0biZ9lkJOPk6NwTuv2wk8Bg88vj56BKZrY8XhT4,4021
|
|
72
|
+
helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
|
|
73
|
+
helm/benchmark/annotation/harm_bench_annotator.py,sha256=zhkWnV3qZgY-nvHgQRHGrrCMC7605JwFHesY7UC3ZnQ,2293
|
|
74
|
+
helm/benchmark/annotation/helpdesk_call_summarization_annotator.py,sha256=I7TjpN502Sa-Z4uUKemJXSAdOiVA3MMO92YIAAXeDBg,6034
|
|
75
|
+
helm/benchmark/annotation/live_qa_annotator.py,sha256=8DXsjwmeSyvC0kfp1uYds4cwpxqzF7FcskeZaXxXiOw,3552
|
|
76
|
+
helm/benchmark/annotation/med_dialog_annotator.py,sha256=OVTFIlvdhcOr_hdK0tnrDes9hYdN1mDWFTp4GDYY7O0,3162
|
|
77
|
+
helm/benchmark/annotation/medalign_annotator.py,sha256=8edAZh8oQgDKUT1bQ3Hp2NBE-QnBZ_-ZQjHkV7YKWhs,3240
|
|
78
|
+
helm/benchmark/annotation/medi_qa_annotator.py,sha256=v8e6hkHZX1x9KtTedCnpCseh-Y72z5kUgUrXHWPUkX8,3074
|
|
79
|
+
helm/benchmark/annotation/medication_qa_annotator.py,sha256=uZ3VpJ0nsDyF70_kn8kSSBPr4OlfiNdZC7q8wq_jJFE,3090
|
|
80
|
+
helm/benchmark/annotation/mental_health_annotator.py,sha256=JwgSeXtwf4KFZxNtAxsnqdLJQSvP-F-ZoCcCWdasrMQ,3275
|
|
81
|
+
helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=pwwniNlu5VTa1ZdyO0KFcMFZcpqM5CjguujgSpEGslw,3174
|
|
82
|
+
helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=zABO1FJH9pOFhUe5vc2B-c14Hf5RsuU9jQAGiMg6G0I,3204
|
|
83
|
+
helm/benchmark/annotation/model_as_judge.py,sha256=FIJOUzIhf2QpxqFf6hjgAM5hPEm0VlXzB-jiHJUrPDs,11985
|
|
84
|
+
helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=qqWHY2HfCwMP5GqvObS3JpMIYVs4yyITCsA1B7lcDks,3201
|
|
85
|
+
helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=TUxNzJcItErsw0gw76hiKZAWeQTNHGHnC0qf-_CGeF0,3316
|
|
86
|
+
helm/benchmark/annotation/omni_math_annotator.py,sha256=PvZZb1oGw60qT-oHRIs93AZbh5wTbpsmD8BforudFhA,6144
|
|
87
|
+
helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=if4S8MaENr1HZ42ZsOjDPXZ-kJ0p4l4B2j9m994RuxQ,2140
|
|
88
|
+
helm/benchmark/annotation/spider_annotator.py,sha256=B48ylGg5J7xuTSUio7VztdXk3lI6ilMqrUvAD-ve0sE,621
|
|
89
|
+
helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=5jU-dK_0OvB_jXNLDZtQ5E3gaSUcAxFNzv6prA17eAg,3186
|
|
90
|
+
helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
|
|
91
|
+
helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
|
|
92
|
+
helm/benchmark/annotation/wildbench_annotator.py,sha256=OXR59zdKw9W7v3Q_sFnt1cEPN3nOzQDVqSbh4jDbEUs,5457
|
|
93
|
+
helm/benchmark/annotation/xstest_annotator.py,sha256=arL5DyA_nYkiSCAtl6G7MliZz5ZYRsyc7xQJNu0RBcA,3604
|
|
94
|
+
helm/benchmark/annotation/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
|
+
helm/benchmark/annotation/image2struct/image_compiler_annotator.py,sha256=iWqPDXscrXDkmzRGDg0o6ibmDVo5bQqvcWxZkr6P-d0,3620
|
|
96
|
+
helm/benchmark/annotation/image2struct/latex_compiler_annotator.py,sha256=drbxogMMGwGxgVFbhT7hxPGDh7uyhptlmEmeP1Gq2xM,2471
|
|
97
|
+
helm/benchmark/annotation/image2struct/lilypond_compiler_annotator.py,sha256=odIGciLX2oVq_O8_H15lWUZoSfVvY-jRb0ILjs7GCIg,4061
|
|
98
|
+
helm/benchmark/annotation/image2struct/webpage_compiler_annotator.py,sha256=w6RKv7Fz__j_abKXnsTn98kHPv9tWKipdLW3NVT55m8,6389
|
|
99
|
+
helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt,sha256=XtD4ysEHDHN1icMKSvBi7E69jG6NoVUkfGGdG0ccW4A,9223
|
|
100
|
+
helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt,sha256=KcSlBgagkCtY5A3boy4o4lsDdumLNDhwIET0vruGmhU,2050
|
|
101
|
+
helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md,sha256=zNV72MTHP0-Dz4lj7zwml8HHuekH6tkeDQUSzKSuehE,2380
|
|
102
|
+
helm/benchmark/annotation/wildbench/eval_template.score.v2.md,sha256=6mJzJHf56uSM8WCBs1V_12VRYLE6-5uXBFW72rDJf3s,2228
|
|
103
|
+
helm/benchmark/augmentations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
|
+
helm/benchmark/augmentations/cleva_perturbation.py,sha256=PJkJ_It_rvup_jWwVWmvCHHKR6csZDoCTFRsnZfEkTQ,29378
|
|
105
|
+
helm/benchmark/augmentations/contraction_expansion_perturbation.py,sha256=J097zFSqpwMwSvjwhkRT9BlAuq9ib6K-Ua1bkFkvvRg,4906
|
|
106
|
+
helm/benchmark/augmentations/contrast_sets_perturbation.py,sha256=frVnb8SaXtKu8ih37g2eGB00qspAIBgyl_m9YuxmunQ,3394
|
|
107
|
+
helm/benchmark/augmentations/correct_to_misspelling.json,sha256=L44RiJXlJCa6zQzTLf0MFHCOhFyRDRKfLQNXH-n3XIs,213429
|
|
108
|
+
helm/benchmark/augmentations/data_augmenter.py,sha256=Uk7rMDEgJGDoFyJLuOepjclBiNLB7Y3fATCH0HP_k_4,3847
|
|
109
|
+
helm/benchmark/augmentations/dialect_perturbation.py,sha256=Eas09Uo0435JnsgMdS4lBQ0hIC0aBnLZG5rg1Om1ef8,6303
|
|
110
|
+
helm/benchmark/augmentations/extra_space_perturbation.py,sha256=vDXptbwBzH31lNPgPBzNJ8GZVxA2Xpw_l1YA3XyUNic,899
|
|
111
|
+
helm/benchmark/augmentations/filler_words_perturbation.py,sha256=JTyciXOqHGw2e2TrUJlueFHUUyf4ORu053Yc3spd_bQ,2978
|
|
112
|
+
helm/benchmark/augmentations/gender_perturbation.py,sha256=D0t-o6w15QHJ8CvfzcB_KQcTsqhebpvL9dI4N8Oc7Cc,9443
|
|
113
|
+
helm/benchmark/augmentations/lowercase_perturbation.py,sha256=eCj8nt8mvNbLxHPZ7QOJuD9pzep09zXJaMnFXoqGIYY,575
|
|
114
|
+
helm/benchmark/augmentations/mild_mix_perturbation.py,sha256=nYbWNCuqh0uOAIYOQSiL-pO9MkiJSjUJ_13VwghmBOU,1955
|
|
115
|
+
helm/benchmark/augmentations/misspelling_perturbation.py,sha256=W3nARzIowF-fgWFeBF4fPgVLsjk-ewPgQTzf4LoTdiw,2200
|
|
116
|
+
helm/benchmark/augmentations/person_name_perturbation.py,sha256=EqxqhDfo5llXCq-QjEHIzfFWHRBFpeg8eBBPHu3jAHY,14408
|
|
117
|
+
helm/benchmark/augmentations/perturbation.py,sha256=vGQg8VHLv0qvd8rGqoSheuIwzv6kNFWiQqzmnMRsoBY,3908
|
|
118
|
+
helm/benchmark/augmentations/perturbation_description.py,sha256=VKOwBRPQY-0vuxhGvtac1Z5F10metPfpFnfs8ykFVmU,1184
|
|
119
|
+
helm/benchmark/augmentations/space_perturbation.py,sha256=6w7DjoyTZu5T0jWiAAs7OklAeOTQKRkTx4pjDy0U4RM,991
|
|
120
|
+
helm/benchmark/augmentations/suffix_perturbation.py,sha256=HGuxFHMsFyEdoz86X3Gx2dIHGuadKQaNbzaN3GljMn0,841
|
|
121
|
+
helm/benchmark/augmentations/synonym_perturbation.py,sha256=EHD9kOyG9CL5DoVjHhr_V3oTyF5xBc4h-Ve_Buuk-2E,4276
|
|
122
|
+
helm/benchmark/augmentations/test_perturbation.py,sha256=9V65K6mQKPq2DfK4qMaIq3kwFFOnuojJ5QpUKUQQ74Y,13562
|
|
123
|
+
helm/benchmark/augmentations/translate_perturbation.py,sha256=IgU8wHyQ748TyoYAeRv-0W1I5gT1WQUUCakelfsH7-0,1153
|
|
124
|
+
helm/benchmark/augmentations/typos_perturbation.py,sha256=C7N55rYHZxTgvcjOKLaQpYmeGENmwZdaUv5DBiMa4Bg,2854
|
|
125
|
+
helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8wtXvkVAx0iI2zwCxqHvk3XKTx31qHPalsI,4203
|
|
126
|
+
helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
|
|
127
|
+
helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
|
|
128
|
+
helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
129
|
+
helm/benchmark/metrics/aci_bench_metrics.py,sha256=fAuTm8Sr1vvyd7Tjcz9WWKrFkqrwCV-CiF6lqUO3dKU,442
|
|
130
|
+
helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
|
|
131
|
+
helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
|
|
132
|
+
helm/benchmark/metrics/basic_metrics.py,sha256=d0iwYnwrbF7w7CFtazx8vPIsZnj51U2PVVoscCb-HJA,20495
|
|
133
|
+
helm/benchmark/metrics/bbq_metrics.py,sha256=GeZhSSJzqGD0e5EAiRHitIC3XtPICF7rDI6GfeYQc8E,6201
|
|
134
|
+
helm/benchmark/metrics/bias_metrics.py,sha256=8qcInRJwQsuCI-lMC1umd-ZZaYvorUPrMjnuC6vSeb4,11602
|
|
135
|
+
helm/benchmark/metrics/bias_word_lists.py,sha256=eyk6we2J4SW8ZaZxQUWLB7Yapn92uM5TCekhFB5vg-U,13908
|
|
136
|
+
helm/benchmark/metrics/bigcodebench_metrics.py,sha256=JcPZrSiHR-kxT-MFM8zXqOs6wTC5Hus3TbxuHFQVZow,860
|
|
137
|
+
helm/benchmark/metrics/bird_sql_metrics.py,sha256=ooCuXW5nPpRs_-4seCONQmn25DzTbcUgGXznXTK9y0Y,1153
|
|
138
|
+
helm/benchmark/metrics/chw_care_plan_metrics.py,sha256=WOAdwuF4vusZhjaXSAB3r7PD_ZxeNmVu2oAmOqzVLtU,460
|
|
139
|
+
helm/benchmark/metrics/classification_metrics.py,sha256=1Xa_bO4PqIAV2iZitE69kc4VKS4A7PloG5ElZAgvmh8,8851
|
|
140
|
+
helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIefAoi_edUwKpp-XxYbmeU,2740
|
|
141
|
+
helm/benchmark/metrics/cleva_harms_metrics.py,sha256=xVubv2pG3iinVs3namoVHWAmV9oUPywZwFB_0JGhP_w,11277
|
|
142
|
+
helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
|
|
143
|
+
helm/benchmark/metrics/code_metrics.py,sha256=SebQ5MXJe_phTiMfGMfhgYago-hwh_g9ctBWEHGqCnU,5230
|
|
144
|
+
helm/benchmark/metrics/code_metrics_helper.py,sha256=UNai154RuhYRZM_YK-rveLct4Ui5iEBNPYmYdKq34Xs,22712
|
|
145
|
+
helm/benchmark/metrics/comet_metric.py,sha256=qOvwE0ov1plb6SwwT3CbX1XuSo4GJ-M3iRe98yMiMaM,4797
|
|
146
|
+
helm/benchmark/metrics/common_metric_specs.py,sha256=JKqmO4ovBdfOYKC-00OSzOMv--g9NTCVfUHLaz-1Uns,6025
|
|
147
|
+
helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=Zrf6HyH_WNe7gGFgW0j8FJlX5KZvbk-05iX8QFPJDyU,2656
|
|
148
|
+
helm/benchmark/metrics/copyright_metrics.py,sha256=_Lp7sKWgacY_13kFadNfnhrM2Ks8syBXnUW7zYuJkwo,7817
|
|
149
|
+
helm/benchmark/metrics/czech_bank_qa_metrics.py,sha256=bKoooK2T5v_fFKNbUnsuW6Mv9muAirJD5lTrzuHfpz8,1113
|
|
150
|
+
helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=x66XP0iQGk4ThT7ddmrlLCA0XF4arRbQMDT42LHf2kE,3297
|
|
151
|
+
helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py,sha256=TxTkkWdx6d6ym0MirZTiucl_TWFdn4uJLnlTfLjQvgk,2925
|
|
152
|
+
helm/benchmark/metrics/decodingtrust_privacy_metrics.py,sha256=OU7lka-hm6PubR5Gjj4uNyrqhjlfhe0mmjBCAz9vlRs,3456
|
|
153
|
+
helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py,sha256=BKDD3lblqT6Ebi5kEC4zbN_OvQwD1SdEtBv5Wf0kzWw,6460
|
|
154
|
+
helm/benchmark/metrics/dischargeme_metrics.py,sha256=D8LI52E17hNSPDpEvb2tw1za4QWDE3p9xgx7Nm9l7_Y,454
|
|
155
|
+
helm/benchmark/metrics/disinformation_metrics.py,sha256=5n8wgRBb6FaDjqe1nR3Cj9aS48esmMsIUq4KpBHoQoU,7870
|
|
156
|
+
helm/benchmark/metrics/dry_run_metrics.py,sha256=Ss0lzf944HIbL1CX6QuJpGFPqOzhBT0qVWLNR1BoEjk,3784
|
|
157
|
+
helm/benchmark/metrics/efficiency_metrics.py,sha256=SJqpA1d_GfBPl9H6moai8ra1GVe7tlaCfg3PeiWT54c,11845
|
|
158
|
+
helm/benchmark/metrics/ehr_sql_metrics.py,sha256=YRjvPIty7zlyoyGD6wo3HYOz7y_PThySOZzVRJ38iww,4797
|
|
159
|
+
helm/benchmark/metrics/evaluate_instances_metric.py,sha256=LGk1Dv_76Ak0YUlWKFTsOLEFiBSmcGVhNrbj_4zg9g4,2913
|
|
160
|
+
helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=t0251_2aA0CrXB8oUBKlPRgPl-xYjzdVhLcGjwuhOgo,19621
|
|
161
|
+
helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
|
|
162
|
+
helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
|
|
163
|
+
helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=HRRKkcTbCu5ScOVwmjzYaA7UAEGE_AJUZVOCDRuv4Po,4321
|
|
164
|
+
helm/benchmark/metrics/gpt4_audio_critique_metrics.py,sha256=L9tGFwvl1-Ew3MdInQ7KPa8OlI5YexIB2KuCYVYsuPY,7023
|
|
165
|
+
helm/benchmark/metrics/gpt4_audio_refusal_metrics.py,sha256=vYPRJq-4uNhUWUWMrDkpHmfIBkhEyAgaMNEI6RKPP80,5896
|
|
166
|
+
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py,sha256=1m7IWy9vu66svnmdBRjZQI-2YsGYzH2vXZMptlRGM0Y,5654
|
|
167
|
+
helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=9-kB3NeBacI6nxs2oQ7Km_1SHyiz98UVZuR8PAlvCHM,1442
|
|
168
|
+
helm/benchmark/metrics/ifeval_metrics.py,sha256=4_Vp9bNnrctKtv6xZ1RpvBstPAZPwv1xiohH-ogs99U,2565
|
|
169
|
+
helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=RR9cMIG113oXUnBjU_denn7DaCGB11k1oGtQ5dQON3M,9874
|
|
170
|
+
helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=1GsW-nBz8TgP4wFIVEGA4_BhI17kihmk96zuLpD4NZc,4636
|
|
171
|
+
helm/benchmark/metrics/language_modeling_metrics.py,sha256=yS7k8iFjxfkckSBA0RVA7VdOivSEBtNzCjczK6We7y0,4598
|
|
172
|
+
helm/benchmark/metrics/live_qa_metrics.py,sha256=f2XFmQaohjQNqYqNg8NcDVavCzyP4cd8Cl8rLArn9EM,816
|
|
173
|
+
helm/benchmark/metrics/llm_jury_metrics.py,sha256=yzAsdacyX0MFJy2qKIjhI0y7JvtflELpCh6R14wuCgk,1704
|
|
174
|
+
helm/benchmark/metrics/machine_translation_metrics.py,sha256=22vaGBCSw12uM1wmtDG-MBBZW8OiTZwNPaerjckdtDE,3860
|
|
175
|
+
helm/benchmark/metrics/med_dialog_metrics.py,sha256=kzmrkQcmJ15zuOF9_Onk9N0oeNeyl9Rri1JEb1AqRT4,447
|
|
176
|
+
helm/benchmark/metrics/medalign_metrics.py,sha256=q6l8p5Pie-H9pxhaA-lQkSOnliJWXr6zUeN8syEQ91Q,439
|
|
177
|
+
helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=9wZgg20-9QBNk0_XhuwR3LT940fqDPkCM4Kl0dPkbAs,5353
|
|
178
|
+
helm/benchmark/metrics/medec_metrics.py,sha256=hNBOGX52G_QOmgTCp9LnIMrmGSRxbb5vgjxKU069TMQ,4152
|
|
179
|
+
helm/benchmark/metrics/medi_qa_metrics.py,sha256=JWAEMuT0UXDZrb7qHn13W6W79ilbprk492V_9vWrB4s,432
|
|
180
|
+
helm/benchmark/metrics/medication_qa_metrics.py,sha256=wit3nKNWpGFfgauu6Xye2IDTePAS0VHAQI_7OO9HR6M,462
|
|
181
|
+
helm/benchmark/metrics/melt_bias_metric.py,sha256=mHDCkRGLD-0pyJA_depi_KX3sn7g7Bgd3_m0XdLQahY,11520
|
|
182
|
+
helm/benchmark/metrics/melt_bias_word_lists.py,sha256=xA0araUdszAIOqfxiTi6MIJhKYwr_Gwsc1L9qinZx9U,27891
|
|
183
|
+
helm/benchmark/metrics/melt_metric_specs.py,sha256=zaeV57LQEl8qK7be36NaojiUJlzmkoKY8JyOkOVuPqs,1619
|
|
184
|
+
helm/benchmark/metrics/melt_toxicity_metric.py,sha256=8HxViwOJCAZ-luE_Br55xUfJn5XAVXg6lqcAUsP0GT8,4187
|
|
185
|
+
helm/benchmark/metrics/mental_health_metrics.py,sha256=4HXCXl2GxFPn6wDzHptHeBTuP4BJVLUzEUKffpd5R_k,462
|
|
186
|
+
helm/benchmark/metrics/metric.py,sha256=jqQyiKDq_pQv-ulGqfZI56ydRDQs3N3XhfHIPysUhrk,14311
|
|
187
|
+
helm/benchmark/metrics/metric_name.py,sha256=POhgmUqqIWh_LjCbYpiKkzGqqChBLeW3FADy9u_FcWw,1354
|
|
188
|
+
helm/benchmark/metrics/metric_service.py,sha256=bJaM7GisEgSWR3vPTcg7b67XF9X2K5viODacIgbGb24,1692
|
|
189
|
+
helm/benchmark/metrics/mimic_bhc_metrics.py,sha256=da1YYrE8fL3YHeIJ9hf4WCKZtuj_8cksm3rJ24rcy70,442
|
|
190
|
+
helm/benchmark/metrics/mimic_rrs_metrics.py,sha256=x3vSj1VG1UkNF3gbgJYDeA4z-crxfGIkK7iZo0xjq8c,442
|
|
191
|
+
helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=Pu9efXoBrhsvxSeGHqwbUA5k365-pJTeXpMNhmcg0L0,3927
|
|
192
|
+
helm/benchmark/metrics/mtsamples_procedures_metrics.py,sha256=XrddVk-gnc8jF8amCI1RBa_XTS9yEXD2Y9Ld9W7Q-m8,497
|
|
193
|
+
helm/benchmark/metrics/mtsamples_replicate_metrics.py,sha256=rmH34aTX_wZWxLi4jrxf3sR1RIqNRF0QDANLRQUGhqM,492
|
|
194
|
+
helm/benchmark/metrics/nltk_helper.py,sha256=QMEps-lqJZ_pCgvjlMf4BvC0pzDu3ez5jit5F4p8dAk,1313
|
|
195
|
+
helm/benchmark/metrics/numeracy_metrics.py,sha256=3E-CMmB2wuGW5tLjmEm8wFMf85DJ1ZDUANfh84SQuP0,2906
|
|
196
|
+
helm/benchmark/metrics/omni_math_metrics.py,sha256=Gqih87UrE93-a0hbRhTBkjmfGLNTkuKQGaG-sTQeuG8,1287
|
|
197
|
+
helm/benchmark/metrics/openai_mrcr_metrics.py,sha256=TAop7G50FKaR-Jyo2EGLqmMOfJRmS2vNRDFiifa6mhg,2313
|
|
198
|
+
helm/benchmark/metrics/output_processing_metric.py,sha256=ey9UBi2f3780OwFlp82ymzfjLR3MA2fpA9vW5R4W5TA,2581
|
|
199
|
+
helm/benchmark/metrics/output_processors.py,sha256=ULZlDBOf6NupAXzDKBKyTDdgPZ5PSxOAlOYTbrQEek8,472
|
|
200
|
+
helm/benchmark/metrics/paraphrase_generation_metrics.py,sha256=771CjpW5Ek00OCaCFfEsO6Cdy9eZb1fMlgWASvQgiK4,2025
|
|
201
|
+
helm/benchmark/metrics/prometheus_vision_critique_metrics.py,sha256=pexBbEFF3-bzWoPWNFuVs-3fm7XJw2EC4xgiSb3gSa4,8508
|
|
202
|
+
helm/benchmark/metrics/ranking_metrics.py,sha256=hSNKy4h7zRkGYSgo6RWt4PXQztA5ZX1PCJorVqpCvpA,17457
|
|
203
|
+
helm/benchmark/metrics/reference_metric.py,sha256=hseI7A16SOC8ymYZYFCL6nxnyxn0q9_Gywuvb1r9FLE,6092
|
|
204
|
+
helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
|
|
205
|
+
helm/benchmark/metrics/ruler_qa_metrics.py,sha256=OuiA0ksByl0Tw1Oal7zbedhKjTrhJgQJDLXAgoTLXuc,1473
|
|
206
|
+
helm/benchmark/metrics/safety_metrics.py,sha256=oARko_EwVnykBKYxi-w3ytKme4qcb1waz_0N2GKbSlg,3348
|
|
207
|
+
helm/benchmark/metrics/seahelm_metrics.py,sha256=egRkeXnnb8Nqi9qJJMDXJRSl4NK6WvdUxAc_LffBips,6964
|
|
208
|
+
helm/benchmark/metrics/seahelm_metrics_specs.py,sha256=cx8p4kwTuEOWxZioK9CVoeTNJT0fZjxRy_6_EM9F394,452
|
|
209
|
+
helm/benchmark/metrics/spider_metrics.py,sha256=RSrFJoA5SNcNxfmgVqCQixcSLrfJBYuVQw5jsfrc9Xg,189
|
|
210
|
+
helm/benchmark/metrics/starr_patient_instructions_metrics.py,sha256=YHdTeIFdZxRbvqBnlWpAyIsWzZyWAjjDFuKOXhHYiSM,525
|
|
211
|
+
helm/benchmark/metrics/statistic.py,sha256=ATuOm0jU3L-0ELiZaF2GVMNF22W66-rMvzxRtlfqcII,3446
|
|
212
|
+
helm/benchmark/metrics/summarization_critique_metrics.py,sha256=-mki8-zvZx54dQg8X0BG2Y6wmfypQhkIuD_9ZjNBl78,4782
|
|
213
|
+
helm/benchmark/metrics/summarization_metrics.py,sha256=FJCdGRmlCJX5A-AmbtpGGlGRfNgg5Z8Bo0d9yFiE33E,16876
|
|
214
|
+
helm/benchmark/metrics/test_bias_metrics.py,sha256=qEZsCULvwjVdIyfNgJSc2L7Xp9suKKW7L5OuQmGrwZ8,6393
|
|
215
|
+
helm/benchmark/metrics/test_classification_metrics.py,sha256=CRDMGmVmzEUnNaM0C02qUTOU2AS11Mt2-GdEl89y7lw,9541
|
|
216
|
+
helm/benchmark/metrics/test_disinformation_metrics.py,sha256=U3ZmS9s33oimTQbKO-7pgWeX_WiDB9chlOCtf_vslXw,2249
|
|
217
|
+
helm/benchmark/metrics/test_evaluate_reference_metrics.py,sha256=B7xtDDWPAxF7d-vcUx_R51hFMae-DD52nUwbu_eWt6Y,1601
|
|
218
|
+
helm/benchmark/metrics/test_metric.py,sha256=0sGlXE3_Al_VyKpOPBhQR_xT-XrcVgGepLpwut37DmA,771
|
|
219
|
+
helm/benchmark/metrics/test_numeracy_metrics.py,sha256=ls1ZIHDePKpHMoqAbf4HmJ1SIBjLFuLIzGbfg6OiZvM,4162
|
|
220
|
+
helm/benchmark/metrics/test_statistic.py,sha256=yK6m2BZ5UXWmb2D1cQzDH_2ELvrNDaR_lyzX4WoHw9Q,1273
|
|
221
|
+
helm/benchmark/metrics/toxicity_metrics.py,sha256=ZLOzxDlMgbljl-9y6vT2ZgwdhsBZ4MfV-T66VpKk00U,4114
|
|
222
|
+
helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
|
|
223
|
+
helm/benchmark/metrics/unitxt_metrics.py,sha256=8fawxnrg0xsAe0xO2wbL7S_yisj8RzJnrn6xtk8C6q8,4852
|
|
224
|
+
helm/benchmark/metrics/wildbench_metrics.py,sha256=sY7MNTzRlJJK3yph3rCijgbMaajtLyCCquThlsoE5wU,1380
|
|
225
|
+
helm/benchmark/metrics/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
226
|
+
helm/benchmark/metrics/ifeval/instructions.py,sha256=qNoa1vMPDNz6ORWfyMv_efwKZ4U5zkI-cf4aApyfSqU,53247
|
|
227
|
+
helm/benchmark/metrics/ifeval/instructions_registry.py,sha256=NprvkRQz0QWaIpJsFp95CQCWsnuY_57ZSqFn2IISDP8,7555
|
|
228
|
+
helm/benchmark/metrics/ifeval/instructions_registry.pyi,sha256=ryH3Jimbvk9T0PtxTN6TPXv476ukLVJtTcQWYXYYtp0,63
|
|
229
|
+
helm/benchmark/metrics/ifeval/instructions_util.py,sha256=VhkJfZLCaHi094rZSoeQbok7-Q-IH5gHfAYnOs7geeo,19787
|
|
230
|
+
helm/benchmark/metrics/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
231
|
+
helm/benchmark/metrics/image_generation/aesthetics_metrics.py,sha256=UqjBgAi1ylegvHBjALJ8vxINhHEqqr2fSvN9lXgyIZk,2140
|
|
232
|
+
helm/benchmark/metrics/image_generation/aesthetics_scorer.py,sha256=ISdThDKMrx-SHQe69dCcr8qUrMCa_GsxX3BeZnd0WPA,2538
|
|
233
|
+
helm/benchmark/metrics/image_generation/clip_score_metrics.py,sha256=tUnAoew24jjjbjPaoE2-4iyRTq6YNW8Xfk1p5JWZkAU,3338
|
|
234
|
+
helm/benchmark/metrics/image_generation/denoised_runtime_metric.py,sha256=Nom_yw15ePU7wUuV2DFHpLnEAqaZQjlkW9LowRElOAI,1646
|
|
235
|
+
helm/benchmark/metrics/image_generation/detection_metrics.py,sha256=mfYoPbLCmqWxqMSXbcX6TM0niNnpCeipcHImuV3mZ3c,2160
|
|
236
|
+
helm/benchmark/metrics/image_generation/efficiency_metrics.py,sha256=neeNJNtHAVUMWqr5rvRIRlPKl225cXUGCURLB0z-rKQ,1459
|
|
237
|
+
helm/benchmark/metrics/image_generation/fidelity_metrics.py,sha256=Vewml_NOcM2jK-yyKHWsHB0KC3NVG8HfweA4rGZ9RAQ,7583
|
|
238
|
+
helm/benchmark/metrics/image_generation/fractal_dimension_metric.py,sha256=-WtHsMKiUolekyBBLKtONF8NdwCpIPSNxeGS6CEZxHI,2135
|
|
239
|
+
helm/benchmark/metrics/image_generation/gender_metrics.py,sha256=j_sHhAkq1fA2VL483OX80cC9EQjzOIWGHQAeGVEN8fY,2371
|
|
240
|
+
helm/benchmark/metrics/image_generation/image_critique_metrics.py,sha256=fVgb-GdA9QmNywq7byNlG1TTxb_zvzP_bcURruZ54Wo,12958
|
|
241
|
+
helm/benchmark/metrics/image_generation/lpips_metrics.py,sha256=naVxg-yXdXum-yQD6MgRcemVr0L567Y2drGjfehravQ,3582
|
|
242
|
+
helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py,sha256=z3xaiKrfarF-lfQ8Aa-Dveaun3LfMMpQIWR8bHbQIrY,3507
|
|
243
|
+
helm/benchmark/metrics/image_generation/nsfw_detector.py,sha256=X1hsWRBa-1KOhT_TTfCk9_jsXXMXHadAxddsFmTKQHc,3914
|
|
244
|
+
helm/benchmark/metrics/image_generation/nsfw_metrics.py,sha256=ZAyd5n0yt2fj30vBiXDPHmEQBiMgr-5G8JeCeZnwrvY,4665
|
|
245
|
+
helm/benchmark/metrics/image_generation/nudity_metrics.py,sha256=KQG-jybgaWuXB5rL-kLBWBD7gdZKWdTJYYrAEEMnZAU,1553
|
|
246
|
+
helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py,sha256=0bwfJZ5pOAQFec4TEf_eUd9qQxwximwzDrYFkZnO9Yk,6951
|
|
247
|
+
helm/benchmark/metrics/image_generation/psnr_metrics.py,sha256=VLq9gOkaoIZNAGII9fMI3tOCMpKAzbfLuqGbWo_mOV8,3126
|
|
248
|
+
helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py,sha256=veb_QJdWiwm8HRElSBI4qwqthyMW_QNO0f3z7xHFoLs,1963
|
|
249
|
+
helm/benchmark/metrics/image_generation/skin_tone_metrics.py,sha256=Ikxq4nsDhfaGcIZTblT1pCRxMfCt0G8NNDnCDJohsnU,6023
|
|
250
|
+
helm/benchmark/metrics/image_generation/uiqi_metrics.py,sha256=NaBxJvZJdNw8g7Z6jxUC3mtUhdPXsib45TSK2fr5DUM,3904
|
|
251
|
+
helm/benchmark/metrics/image_generation/watermark_metrics.py,sha256=pK_076GaxMoqG6-SvQG60uKQ3z5n84OwG__gK0GYs6s,1924
|
|
252
|
+
helm/benchmark/metrics/image_generation/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
253
|
+
helm/benchmark/metrics/image_generation/detectors/base_detector.py,sha256=e4c8vPfioGzl2ftYzWOFIBDJcZJxBmpjU13n4fXaSvY,226
|
|
254
|
+
helm/benchmark/metrics/image_generation/detectors/vitdet.py,sha256=kxXS8uNBC0pQ7LatuN85CXU8pJHZn0pJXY0rOLd_39g,7526
|
|
255
|
+
helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
256
|
+
helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py,sha256=vFO6s8QHo6Pt1QfbOKAI0m3mJrc0BeH1Hcf7u2uWMIk,2116
|
|
257
|
+
helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py,sha256=5qKL-gHnEVmzSDW2GKDq6Uox_EJMDLe0QA55Nrl4H6s,1472
|
|
258
|
+
helm/benchmark/metrics/image_generation/q16/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
259
|
+
helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py,sha256=8Y5h-6RMjivm50RnNbNwV7wCug4RhKT5g8R_YeEp54I,3467
|
|
260
|
+
helm/benchmark/metrics/image_generation/q16/test_q16.py,sha256=aDas2UJ6N8Mqq7jISXkMkrypDTKyAUL-6qO9paervCw,828
|
|
261
|
+
helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
262
|
+
helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=Ir4u8blJWTRtEBogb6u22qCy3JXAIzvx-Th6dSBLfdw,698
|
|
263
|
+
helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
|
|
264
|
+
helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
265
|
+
helm/benchmark/metrics/summac/model_summac.py,sha256=82S9BpPJENr_jiY-cNubECEhniu5Y3Arzv7AXK93PmE,17442
|
|
266
|
+
helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
|
|
267
|
+
helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
268
|
+
helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=XDZGK8h84F2w_pK8Zjko8ssKZmVxKFqTOuHL0mLBzMY,694
|
|
269
|
+
helm/benchmark/metrics/tokens/auto_token_cost_estimator.py,sha256=HtL3FtgDK1KPjs2FhH-FbmarT5jMbfx7ZQODrmRFA9k,2148
|
|
270
|
+
helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py,sha256=i715T4OW9yng-eJjsb8Qip5JFuEl4x-k9adnq7O164w,552
|
|
271
|
+
helm/benchmark/metrics/tokens/free_token_cost_estimator.py,sha256=PiraoV3WtAYtcF5NM9sFEGHrFSxMqasdJDVgDIgk80U,490
|
|
272
|
+
helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py,sha256=sa7Cu0S9IPF35puSVU-gYnLg1uXEZYAdRyKmCc-_5ss,1549
|
|
273
|
+
helm/benchmark/metrics/tokens/openai_token_cost_estimator.py,sha256=CovkJ4zeVn89bjno2gP0K8ix_Ie0EC2tUJLHLCEl378,1427
|
|
274
|
+
helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=n9f2rcgaNHROORvSYjULXC_LEA4KZZjs8wASk0vAG7o,1100
|
|
275
|
+
helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=eDooaAAtkmIGGbK672Db9simp2soXXr5GiEG3hEQBq8,2649
|
|
276
|
+
helm/benchmark/metrics/tokens/token_cost_estimator.py,sha256=fTGUfhHV6yMwpTkCEMTGMxKO8jskqJz4sAtwXT6M_C8,425
|
|
277
|
+
helm/benchmark/metrics/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
278
|
+
helm/benchmark/metrics/vision_language/emd_utils.py,sha256=nVqQ7oosjKjhpR5YPPvO4ssB92bGChgODOtsqMYVEpU,15230
|
|
279
|
+
helm/benchmark/metrics/vision_language/image_metrics.py,sha256=RgKAn7ftl4KCZ86V3zO_LUstNbc6Lla-0hdQq77JDXw,23841
|
|
280
|
+
helm/benchmark/metrics/vision_language/image_utils.py,sha256=xwtydR8-s23cJacIGXDXL_pUhAqi6O5CbhM4XNEFlDo,3787
|
|
281
|
+
helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
282
|
+
helm/benchmark/presentation/contamination.py,sha256=07IuIP92vfuI0GwfeNC-i_NZUlF8N1azzagC19YHOMQ,2802
|
|
283
|
+
helm/benchmark/presentation/create_plots.py,sha256=m51mFsYD51Y1rbEQgwTbKZjCI3xQir437WyOS5z5k64,28916
|
|
284
|
+
helm/benchmark/presentation/run_display.py,sha256=LmY2HES4dU94kRYuUxt-c9LTMDN6MU5CspWTF6rZwDo,12419
|
|
285
|
+
helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
|
|
286
|
+
helm/benchmark/presentation/schema.py,sha256=gYlMysq_rIzQTE9I1K3mIC1fFjBdDe1yHqgwb4EIciU,10989
|
|
287
|
+
helm/benchmark/presentation/summarize.py,sha256=Xk5FJRnWz7xAbPu6JQ96TJ6Fvb1-xWUGBdfetrTsmbA,59882
|
|
288
|
+
helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
|
|
289
|
+
helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
|
|
290
|
+
helm/benchmark/presentation/test_create_plots.py,sha256=5PPPegMTdBZurxyyUxI4rN13AVsjV3eQrwFqlobJ8UA,1286
|
|
291
|
+
helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
|
|
292
|
+
helm/benchmark/presentation/test_schema.py,sha256=6mq6CeAOLW2Kxi1lX_ZW8QCVqVR73XImR8ylcRGFkBE,378
|
|
293
|
+
helm/benchmark/presentation/test_summarize.py,sha256=GzZNwBDybpstzl6wT0Rgqn75N9iCNrUIzrdjOfUolu0,6317
|
|
294
|
+
helm/benchmark/presentation/torr_robustness_summarizer.py,sha256=SmMOZWCQ-KaJBp78otwvAeE1btWignyWalaQ8QG87r4,8242
|
|
295
|
+
helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
296
|
+
helm/benchmark/run_specs/air_bench_run_specs.py,sha256=K86SqpINMBOiLIpuHz-jwlQL3SrH6n6WbqjD90i4LQQ,2231
|
|
297
|
+
helm/benchmark/run_specs/audio_run_specs.py,sha256=baJz5LZiwWZP3KD0hluKgpidtswzdorQnshX0CoqKAc,23383
|
|
298
|
+
helm/benchmark/run_specs/call_center_run_specs.py,sha256=QhRQw91WblB9UaB319XNCO5K8PX8Riiza41Ym-1CcRU,7044
|
|
299
|
+
helm/benchmark/run_specs/capabilities_run_specs.py,sha256=sbqhIj4AoujV45erwoVK61lWdlkjg4qssmGlu0eSr1U,12067
|
|
300
|
+
helm/benchmark/run_specs/classic_run_specs.py,sha256=1NYeYIwC2F7EjkPEPxNoFb3Ap6BUcUJK_hxBKq4lzt0,56144
|
|
301
|
+
helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
|
|
302
|
+
helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
|
|
303
|
+
helm/benchmark/run_specs/enem_challenge_specs.py,sha256=5UWeP2bsnwCHMMXI3DFRMUPKcnJ9_EL01qPUthbWIvE,1351
|
|
304
|
+
helm/benchmark/run_specs/enterprise_run_specs.py,sha256=ul2YMPpvThOmi7yIc6xR3W0rtE-8tUIaIzuhGlMg2rY,9598
|
|
305
|
+
helm/benchmark/run_specs/experimental_run_specs.py,sha256=tIgAdK3cm4t6ZBGkcPcPkxx0XAslKShYA1i3QxWVJEY,7675
|
|
306
|
+
helm/benchmark/run_specs/finance_run_specs.py,sha256=5mwb7GbAcSLVZiumqCiAr9dr8qBYApkEt5Oben5CFXs,4371
|
|
307
|
+
helm/benchmark/run_specs/heim_run_specs.py,sha256=Pt1eVbzvwZ5EXq8WB2b3XYw62SWYN_i1P_H3oE4i8KY,22096
|
|
308
|
+
helm/benchmark/run_specs/imdb_ptbr_run_specs.py,sha256=nkW5A_xeD5kCKeJVxsL8RFS8r3UpP_WCcwSdMh2s850,1215
|
|
309
|
+
helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
|
|
310
|
+
helm/benchmark/run_specs/lite_run_specs.py,sha256=8OkL9g3wQBG96g0ijGZ9L1Trb59b7VPDyYMqvA3hXfE,11129
|
|
311
|
+
helm/benchmark/run_specs/long_context_run_specs.py,sha256=mxgFgjdHnatOif4-xmTicGmpr4U720mfkhPIigeTrGQ,4773
|
|
312
|
+
helm/benchmark/run_specs/medhelm_run_specs.py,sha256=--KgkjVwKt4uyiTebalrbeGV4FB-jGqPciYjFZED7zA,43407
|
|
313
|
+
helm/benchmark/run_specs/melt_run_specs.py,sha256=729MkALud2wG07yulx9zqAzejdXW_eVGkfF5cQWeGGY,32031
|
|
314
|
+
helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py,sha256=kenpGGMK1XXaNtvNXsshPvdvN9ubv1sOfaPdjFM4obA,2034
|
|
315
|
+
helm/benchmark/run_specs/oab_exams_specs.py,sha256=ws7Vppo_zJvxKqQ_sNhm9N7-5eQbX2CBkcDI5c_sRG4,1658
|
|
316
|
+
helm/benchmark/run_specs/safety_run_specs.py,sha256=3X6tYaq2SlRsZs9q6SCtBUgjNEpOwUtV6M7iY2Kowm0,6807
|
|
317
|
+
helm/benchmark/run_specs/seahelm_run_specs.py,sha256=R3mg4_OoaRizZ5n0FHcUQpJLny3j-ulBlHzOyF0a0Ok,23904
|
|
318
|
+
helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
|
|
319
|
+
helm/benchmark/run_specs/speech_disorder_audio_run_specs.py,sha256=FvigS7LXxKkg9ipTaIPXDN47qFk__Vrv47hb46_cR3Y,7441
|
|
320
|
+
helm/benchmark/run_specs/sql_run_specs.py,sha256=JWCICELKi81m11MggyR6CJNl3vpWPwk4kr8DZSsWvj4,1965
|
|
321
|
+
helm/benchmark/run_specs/tweetsentbr_run_specs.py,sha256=qogc-fb83Rh1DooKKaskhak52ycvu8DAnhabw9rc7yA,1129
|
|
322
|
+
helm/benchmark/run_specs/unitxt_run_specs.py,sha256=4Vbsq0MPpSe4cIJOXzeVpMm60N9Qafa2R85X5BeFQew,1873
|
|
323
|
+
helm/benchmark/run_specs/vlm_run_specs.py,sha256=v-eWuDYc8u5HO46isLONPfAWv5zdA1ZOQrdyOvX3vlU,37512
|
|
324
|
+
helm/benchmark/run_specs/winogrande_afr_run_specs.py,sha256=dhOm8z6Q_ZpnzYKrsS0nEbRQPWs_phkXxmL5pxCJzQA,1853
|
|
325
|
+
helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
326
|
+
helm/benchmark/scenarios/aci_bench_scenario.py,sha256=W8h7eWz9mjR0kRAffKWSnA1Fs8t2l83sPyW8fjPOxWQ,5670
|
|
327
|
+
helm/benchmark/scenarios/air_bench_scenario.py,sha256=B6_WMowLFe4gWfnoFA_yrHe0kagbIkZabEnK4kGGqSU,1884
|
|
328
|
+
helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=jnUGbppDGEsbe5xoJjmv7nW_RvwPIYm6cwSULeqk2Fk,5133
|
|
329
|
+
helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=_OWE33eVRaZI0gmfP7bLd572uOi_6jb39z_J6nkcvfg,3182
|
|
330
|
+
helm/benchmark/scenarios/autobencher_capabilities_scenario.py,sha256=fOCHumFWZa4OJZcTZefJiJbdWsb3zjQnWLJYd10Cctw,2496
|
|
331
|
+
helm/benchmark/scenarios/autobencher_safety_scenario.py,sha256=MFt3f5baN5r-FmzWZfUChGR1mX_PUB_5hxoINac_Whs,1854
|
|
332
|
+
helm/benchmark/scenarios/babi_qa_scenario.py,sha256=SyM6RP4v08B1PjumkdQnuKrM9L8SyK0bXbx-LgmyTPo,5067
|
|
333
|
+
helm/benchmark/scenarios/banking77_scenario.py,sha256=dtiM-Q_pMDWqkLi-hgl0tH-aGuDdgHkXgweE1JqrPYs,1883
|
|
334
|
+
helm/benchmark/scenarios/bbq_scenario.py,sha256=2A7MX6iMAZHuPpH9RePi9rVBeFRmGdiE6GlqZ5uNdAM,9603
|
|
335
|
+
helm/benchmark/scenarios/big_bench_scenario.py,sha256=g1TLoDTYQAe-efzQnV9J5UBCaUfN1jWTTjTd-ZJQmVQ,8146
|
|
336
|
+
helm/benchmark/scenarios/bigcodebench_scenario.py,sha256=zQLv91uwfGAR9N4jm_iBUmYOVFj9cL14Nj8aqoCqUM0,2004
|
|
337
|
+
helm/benchmark/scenarios/bird_sql_scenario.py,sha256=n5elzanKEX9YclAl2l1y33aCjihTmaw1VF_ZsAU5IaM,3613
|
|
338
|
+
helm/benchmark/scenarios/bird_sql_scenario_helper.py,sha256=FIwPk-dwfTY-8gDXeAiTZbfbS0Oe1OuWRlYiJOhZwk4,4664
|
|
339
|
+
helm/benchmark/scenarios/blimp_scenario.py,sha256=9Ge3QKRgtVHpWy7aehZVKiO6JrsxK7zrEdtqAb4zxtQ,6284
|
|
340
|
+
helm/benchmark/scenarios/bold_scenario.py,sha256=iE9drB9IeXfRn3xvLnaQi3-nJAp-bV1RE0GJGnp9dJc,4130
|
|
341
|
+
helm/benchmark/scenarios/boolq_scenario.py,sha256=wPETIu5jcI4jgP5GoFa_xi4SsvHtS9gxQ5TD8neHmdk,8037
|
|
342
|
+
helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
|
|
343
|
+
helm/benchmark/scenarios/casehold_scenario.py,sha256=QSe0D3KQJhlTOo6kM9OHwdKy6NlclsFGRVCAB3mTG7s,3174
|
|
344
|
+
helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=BbEjDqa4C5wpdil5jIb1nzj16CCZ29hKoZVsfapSfho,4005
|
|
345
|
+
helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
|
|
346
|
+
helm/benchmark/scenarios/civil_comments_scenario.py,sha256=pnZU2U_cYFYOJmlmwTehHU5oLIPx_Yg8Ayxinroh4IQ,4875
|
|
347
|
+
helm/benchmark/scenarios/clear_scenario.py,sha256=yGdPxWO6vY4JHNa4xywtvD-9lOn6s5cr3njpZyFA0D0,6183
|
|
348
|
+
helm/benchmark/scenarios/cleva_scenario.py,sha256=yPIiToKow76YMc0EDYeqQEPx-9a_6Bm3w4S1IsRRV5E,57987
|
|
349
|
+
helm/benchmark/scenarios/code_scenario.py,sha256=lSbZWw67ie9osOjXDZukj3EEZGa3L6TrMvTg--IbuxE,12520
|
|
350
|
+
helm/benchmark/scenarios/code_scenario_apps_pinned_file_order.py,sha256=KC-5MQ-d8Nn46aDN4FaPxmd6yk1DtVUmVR-CIZsNCp4,1738
|
|
351
|
+
helm/benchmark/scenarios/code_scenario_helper.py,sha256=TnXAlY-wdAFwIDylFItf0z7HOu93WD6dNThwzZYe330,5904
|
|
352
|
+
helm/benchmark/scenarios/commonsense_scenario.py,sha256=yZ6n9aqOi7UWY3q4uTDNc2JRNZxaBZPIp7n_Snt_8g8,9511
|
|
353
|
+
helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=gKEwqHDD8KlKmW8z3xAxSIGmALTXrRRPcoDUzbv_IXg,3854
|
|
354
|
+
helm/benchmark/scenarios/copyright_scenario.py,sha256=FHzUYEabj-BTKl90fgq7jSCq5_Yf9cO9MA9djn50B1Q,3697
|
|
355
|
+
helm/benchmark/scenarios/covid_dialog_scenario.py,sha256=Vnxfn6EKwN-KR1vH-x46YHUC5jf7UAOv7zsnXVHYmZY,4032
|
|
356
|
+
helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=pfHAteKXLNUrhKyAYk6m9j-d7iuEgz58o15xukp_GFQ,10260
|
|
357
|
+
helm/benchmark/scenarios/custom_mcqa_scenario.py,sha256=rgdHsSh8QknlcdGfZQ4VvqBUMLfTTHaNolCv4QgWHzE,1939
|
|
358
|
+
helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=ZBfkUYlIa-BagRVBf97RoyLfEloAjnM0RPv5wmEWueQ,4406
|
|
359
|
+
helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=vOUE5-rj_Wr6m7n76knte-kCMsphb-SSq9LraYf-Dh8,7933
|
|
360
|
+
helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=9qo3l44aby1EfQqohh1M2DVtHXqY1fuvj1XT3_n4hBk,5588
|
|
361
|
+
helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=rAOZnFSxO3ENOvcNz688P_f3Y7NzdwiWgoYTNvAaw3A,2866
|
|
362
|
+
helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=qhzqW614WnsiyN7TiHUdZY_NpEdW_iMO0AMrLK8DmK0,14116
|
|
363
|
+
helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=RSigvRdqjeFTwFfXNmslz8zyAGSmLf6UtBDA4NrQBCo,8304
|
|
364
|
+
helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=zaXn4sRPUEZiqPoudiDT1xHMV2DaiEXOOTz3qB5q7Go,20143
|
|
365
|
+
helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=NjutVTOVVze-IJniRFecz8gqh_BUpuJG3-BUboTGKRw,2933
|
|
366
|
+
helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=EYKoXDWMesbY5dCNY-N0eYMRL0rjEfGsuS_TkeD3Suo,2952
|
|
367
|
+
helm/benchmark/scenarios/dialogue_scenarios.py,sha256=yXCMZegzlgL0CXTY1W5lXdkFFHicUvq4z7_284MfRpw,5778
|
|
368
|
+
helm/benchmark/scenarios/dischargeme_scenario.py,sha256=rBzagg0JVVN3o0VUfmHy2cN7gutV_RAJAo5Fa_El0GY,7842
|
|
369
|
+
helm/benchmark/scenarios/disinformation_scenario.py,sha256=0T7LhXguzBP645Fruc2udfTaMuy7XGtOEMJKpFMIFRk,8565
|
|
370
|
+
helm/benchmark/scenarios/dyck_language_scenario.py,sha256=hygFPTcICGUEPwjtxULLKBSbuBOXLYpozIgiGcT__W0,9379
|
|
371
|
+
helm/benchmark/scenarios/echr_judgment_classification_scenario.py,sha256=IqODoUY1-zJD1KW4Qkg3VwJcUeeLgGUKThr62bW-wx8,4915
|
|
372
|
+
helm/benchmark/scenarios/ehr_sql_scenario.py,sha256=Gm7Kw_TSUUxHW8ns-2e4E_tTBVX7h6Ta273VOpkMCQ8,5480
|
|
373
|
+
helm/benchmark/scenarios/ehrshot_scenario.py,sha256=MWcTejCtwohBPbZYWei_WNZ-Hdnhml7ovTVbJAgUetU,67770
|
|
374
|
+
helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=sxYXKvf-mGNqctTkemwI9rrA_Rg2xA8mz3_W3TIfzUE,2147
|
|
375
|
+
helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4V426oOuexGg59q0djHCTQjQmqYgyLT191Z5fayubmU,6681
|
|
376
|
+
helm/benchmark/scenarios/entity_matching_scenario.py,sha256=kzzDaoVikL2P7Z-17EkLIVR_W7IHcNVerUts2oXDKLA,7111
|
|
377
|
+
helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
|
|
378
|
+
helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
|
|
379
|
+
helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
|
|
380
|
+
helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
|
|
381
|
+
helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=dMTfI9MRHKXnECsXOIY8xvX6w5vAPEIa6A7TYyIu2Fw,4457
|
|
382
|
+
helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=-O4ilLwNcycmpQG5h_5WtQP7yJEr4mjWjKBe2eNP0uY,4806
|
|
383
|
+
helm/benchmark/scenarios/gpqa_scenario.py,sha256=369E0JvaR12EcgcEFKKRcDw1iztt4sb8ghIsk9Brzi4,2884
|
|
384
|
+
helm/benchmark/scenarios/grammar.py,sha256=58tQYKPj013V9jIpW7fXUqZBLuboqEi_WLlDjx74spM,5590
|
|
385
|
+
helm/benchmark/scenarios/grammar_scenario.py,sha256=Hz59gp5ivH3tIP5UAcHZbnk8pBX6GhIABSQlG33gIRI,1502
|
|
386
|
+
helm/benchmark/scenarios/gsm_scenario.py,sha256=QIj0QK5ncF31ES0GUlxbdBk6SIiJJnj5wzamj0do0tQ,2674
|
|
387
|
+
helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=8_ShEuOoEGu7iRE2b0tgi-cfBrCPF9k1L-Pgb__n3Bg,2005
|
|
388
|
+
helm/benchmark/scenarios/harm_bench_scenario.py,sha256=CBo_AfbtHTlvJdsiquP0EDTKApVmDZc7EW0VTENNAfQ,2478
|
|
389
|
+
helm/benchmark/scenarios/headqa_scenario.py,sha256=m6Kqt16JeqA1-OLJvmBPZzhVOVt7O6rbJGAwG9C7FZs,5658
|
|
390
|
+
helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=iv1khpdiWW0Z7lshyWOhhjRfYFdAU6etN8X5EDEQCrc,1302
|
|
391
|
+
helm/benchmark/scenarios/ice_scenario.py,sha256=NCbeqvpDFIIG7kSCrJrS-Z9S3iG2THZ7HpAqghpi_y4,16725
|
|
392
|
+
helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
|
|
393
|
+
helm/benchmark/scenarios/ifeval_scenario.py,sha256=SYn9itpFG0tlWSayf6v0P8bRgdtc-BmOV1dF-4TEm-0,1675
|
|
394
|
+
helm/benchmark/scenarios/imdb_ptbr_scenario.py,sha256=laq9UwyvBvZZuo54rf-8SdKTLrMdDHTdGWJ4TdC8Eng,2340
|
|
395
|
+
helm/benchmark/scenarios/imdb_scenario.py,sha256=qHXd-QIXTCBq8rWW3N5I2Rvg6Pz9v1zFhZkwc73w9io,6259
|
|
396
|
+
helm/benchmark/scenarios/imdb_scenario_pinned_file_order.py,sha256=fjW0Gkzg2Y3IAbtYJ3KC7MueWd9U8h0tlcBCqxYmRrM,1621
|
|
397
|
+
helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py,sha256=RxK5T6Nu_KP3rLMaKkJWiI_3Sqpskgqwgn4Zj95lEvI,2854
|
|
398
|
+
helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=6z3VlcucrwK2B30artWiSpo-mOTr9tiwYV6Fu8XD0VY,2657
|
|
399
|
+
helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py,sha256=F-gDO6r4GPBJTLirhF5noRaV0edvoIT7tiIDlovBFfE,2253
|
|
400
|
+
helm/benchmark/scenarios/koala_scenario.py,sha256=A5M6SD7Jjg7r9QlbHCtMaydBe-wpOtB6oc6gFXuZ47o,1389
|
|
401
|
+
helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=23rZM3IA-phf2VnuPY9QWd64scE6eaJks49apDUNfic,6355
|
|
402
|
+
helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=xjw3iKRf8P50Wo58n7ssnFiWHR2QFehzHlZhh9P1XKs,5374
|
|
403
|
+
helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py,sha256=q_iezJo23_HNNoIXYT4cLYCbwNzLYJx6uvxgPSE5bQA,2804
|
|
404
|
+
helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=BFK524H7uLfz_ZURuRS7KrhzRCP-WyhIcOgdcBrsldA,8709
|
|
405
|
+
helm/benchmark/scenarios/legal_support_scenario.py,sha256=Ty93M8yTr_VEHomJ_36htqqBDZZKWI7PHtXA4qkSJGk,3990
|
|
406
|
+
helm/benchmark/scenarios/legalbench_scenario.py,sha256=Yfyouxb4ir16zlBea77Xzjc0BlcYPkiXoBoVI38FXwM,4779
|
|
407
|
+
helm/benchmark/scenarios/lex_glue_scenario.py,sha256=-3fsSjTXjgRN96Hl4GzDIMB_dlxSR9NR0ATUb-CiU3w,10357
|
|
408
|
+
helm/benchmark/scenarios/lextreme_scenario.py,sha256=gVTHoMYX6Q_Itt5rOVO5lYmqWfAtuuf63CnKAF8b_ak,20461
|
|
409
|
+
helm/benchmark/scenarios/live_qa_scenario.py,sha256=TnWaOPOcA4U1_8JdahQOUZ9KBj0MpMf4BcK2TDBl3BE,3666
|
|
410
|
+
helm/benchmark/scenarios/lm_entry_scenario.py,sha256=kQTnj5gKJmDxCgynmzQOmghwNySpna7aTY7K7RPD2x4,9109
|
|
411
|
+
helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=2VUJ36vHUZp6fZuLfRIuPSsU_K6Z3Im2ums06sZENqo,6153
|
|
412
|
+
helm/benchmark/scenarios/math_scenario.py,sha256=UtNj0UaCxt0RjM-uwD_Evm7SjKnvMlfCt6K0HQOAVC0,14377
|
|
413
|
+
helm/benchmark/scenarios/me_q_sum_scenario.py,sha256=7DOqQmO70BpDeJy_S4fJ5i2UcCH8tunxzjFgTIim9bQ,4062
|
|
414
|
+
helm/benchmark/scenarios/med_dialog_scenario.py,sha256=AE10W1UWhOrgKUnz7e2brKSaQR1WJkQUcPoo4s6n0Fs,7553
|
|
415
|
+
helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=XEipvuIA-QoyZrtlm8nnaPuyZzdDaeTskAhnseD3Q68,5096
|
|
416
|
+
helm/benchmark/scenarios/med_paragraph_simplification_scenario.py,sha256=0Z1JrizLygjd9v_LLFMk8uZ805IWjJPvg-ZvPVhtMm4,7652
|
|
417
|
+
helm/benchmark/scenarios/med_qa_scenario.py,sha256=m0W-FgFi58psLglZyQy_ouMQIDP-2j3aL7uInkdVtms,4478
|
|
418
|
+
helm/benchmark/scenarios/medalign_scenario.py,sha256=mhd8REXpPwxftH48-KKb0ZURJ1mdOlvPRmvN4g4M9Ho,3383
|
|
419
|
+
helm/benchmark/scenarios/medalign_scenario_helper.py,sha256=itxQxG0igEr-8PY3cXmUafM45bqxtov-iHEIy_ZuQYQ,15612
|
|
420
|
+
helm/benchmark/scenarios/medbullets_scenario.py,sha256=8O0UsPWw-ESkrgiuWz4f8gR99jH5-wS5HtCKYwZ1ycs,6713
|
|
421
|
+
helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=vwmEQZ119tOVeZtl6Zt-nXKwkA8Qt4WRiH2HogIkV0w,5560
|
|
422
|
+
helm/benchmark/scenarios/medec_scenario.py,sha256=Lo7iVkek7C9omJ5LX-C83pA_Q5OrAfdNhJY4rslJWTQ,5270
|
|
423
|
+
helm/benchmark/scenarios/medhallu_scenario.py,sha256=d4HlEi1cQtvh1a39jvIHezDDmjuIEsSPdqDLLkDTzw4,2544
|
|
424
|
+
helm/benchmark/scenarios/medi_qa_scenario.py,sha256=FmXI3UwfbL8zinFPtSyTyw4X5VIe2d32HAg93vbXR94,4118
|
|
425
|
+
helm/benchmark/scenarios/medication_qa_scenario.py,sha256=StQmfHTYi8pZLP9FMPzyS-VB9gilZS0XBme7MzAL2QA,2583
|
|
426
|
+
helm/benchmark/scenarios/melt_ir_scenario.py,sha256=d88DEGKVJZCeGnbrXrQZO_W4VJeqW8XNaYc8wIUiJtA,5978
|
|
427
|
+
helm/benchmark/scenarios/melt_knowledge_scenario.py,sha256=FDG4OGYEV6Ac40VC7KAeikzbFKAK2XXFhH1-QUTw8jo,7923
|
|
428
|
+
helm/benchmark/scenarios/melt_lm_scenarios.py,sha256=kSm0lRRixhnXctMprPnzi09PLOmgfs-C7TAW3QI8RmE,8969
|
|
429
|
+
helm/benchmark/scenarios/melt_scenarios.py,sha256=Zg_Uyq-e9Y-Er4IpWU1o29YC07Q9rOxxhokPyKq57Ik,30140
|
|
430
|
+
helm/benchmark/scenarios/melt_srn_scenario.py,sha256=EQSOZIXbfvVWCJMJ4H2e_CiBz6wc8THJndnbK2WwTHM,14674
|
|
431
|
+
helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py,sha256=ptMQWgNn6R-XpAVAAjutSdZg_9ZUqG6fVotzAgeead4,7945
|
|
432
|
+
helm/benchmark/scenarios/melt_translation_scenario.py,sha256=j9YrY60DQHZz4m1MJZaGLzyI6FERlHRx2wy9auyAVB8,5415
|
|
433
|
+
helm/benchmark/scenarios/mental_health_scenario.py,sha256=O1Lfd0MxqawLZLKUDSynaqqbaGHRjDglmePIqepnJI4,4961
|
|
434
|
+
helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=PGa0Nvbad_wH3qRSMPHgg9CgicOi7n25qLDnEucXapo,4097
|
|
435
|
+
helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=bxwVWjE_z4I_Nk5eD78g3QAGyjpsNg7DVWpkp8IGWXM,3841
|
|
436
|
+
helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=tZBUZEaUMZvfSlsU6hcPs-pxQ0kDIL6qebGd7JmpDbk,2699
|
|
437
|
+
helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py,sha256=-OkPMRyB7aO6QBFwoTl6a2rpzcoHeEl84tqz7k9kpCM,2982
|
|
438
|
+
helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=pwpp0wqNhsGc8v2V11aUyEWbwdkmIm-42N676j1T3Ws,4031
|
|
439
|
+
helm/benchmark/scenarios/mmlu_scenario.py,sha256=_5cX2uI7CxD7K_GvO3MD8CRJLuN4EzS2o_EFvbrfjSU,3855
|
|
440
|
+
helm/benchmark/scenarios/msmarco_scenario.py,sha256=-l7_rIMQjMWcpTyn6dGotmNJ5XxN_Ze8dEJyv5ftWFA,34050
|
|
441
|
+
helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=13pXjs9lFduM-QL03mpM10hU0iA8Vr2jJG2FVBQdKOI,5577
|
|
442
|
+
helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=RlyWrlI9e5MLsGbkQWpO2WRsIOZJi39xHskOIBypHdo,5399
|
|
443
|
+
helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=-Et7hJnQJOGl1U9Xdb5mLckYTpU_Ve1sCe450M-5haw,13513
|
|
444
|
+
helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=MiSq0UnUllJxHFU2gO7m4vr_vmulavJxc4ruZhsAt2U,5632
|
|
445
|
+
helm/benchmark/scenarios/natural_qa_scenario.py,sha256=g-fP8L1lXs7zwNVQOc0ZUnbYkCyElQtLVt5fe5dtvSE,12564
|
|
446
|
+
helm/benchmark/scenarios/newsqa_scenario.py,sha256=G25VYaLrV_JyyoT0jpzJ6p4l5qsOydm8rlzTvSptNKQ,7284
|
|
447
|
+
helm/benchmark/scenarios/numeracy_scenario.py,sha256=lgTGzZc81RyL8iB4K67PAHbyYz6BM2ieub8RSFi2aRc,30895
|
|
448
|
+
helm/benchmark/scenarios/oab_exams_scenario.py,sha256=vbjUzQP0zU4ckvMbsk4lh24NddVWbUAtfWmsq1h24_w,2101
|
|
449
|
+
helm/benchmark/scenarios/omni_math_scenario.py,sha256=5qb2cO-Ibb3kDbwYvkzsoU_aOsoKV3ROLgZbi83OyGU,1955
|
|
450
|
+
helm/benchmark/scenarios/open_assistant_scenario.py,sha256=zd8T6eLOlYMZiFyKrRjc-EPwk5_KpbBedAcKDbZ-TdI,5609
|
|
451
|
+
helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=XbO8Wpjjq2e8OsC2s_ZScV4TcZg3hlpVGy56hgxXY9w,3253
|
|
452
|
+
helm/benchmark/scenarios/opinions_qa_scenario.py,sha256=JK39tq306tKe0RDBDLz1AfAdZwNjK_Ng-rHvu6bTRY4,7395
|
|
453
|
+
helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=73D9D_q4Zw904qfd3tVPPhHxpGN4IZcWjlA6ZHEfp2s,8070
|
|
454
|
+
helm/benchmark/scenarios/quac_scenario.py,sha256=RpJpOPbvhB0jv3R91Odc20LcNyZsny9J4IF24GNEygQ,6689
|
|
455
|
+
helm/benchmark/scenarios/race_based_med_scenario.py,sha256=vZB43jtM47PWrl9L4HYOf1i7orpscKcHX01m0oVmk2g,5778
|
|
456
|
+
helm/benchmark/scenarios/raft_scenario.py,sha256=Yk56dUMqDGXpp6SxoGWhyxa4lAIniSQfivjkoPqMuFA,4644
|
|
457
|
+
helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=zpQthgDi-AyEgOUFO5F0qaWCctLEI5WGHBEGlPEVpqc,2424
|
|
458
|
+
helm/benchmark/scenarios/ruler_qa_scenario_helper.py,sha256=jgVf1D4eTSxwxQsW0GBou5hfSo2dnlEJvHpVJqk3BxM,6327
|
|
459
|
+
helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=Dy0INRMzxSiIs9Pm3fa0hYodN-W--WPSv4kcmeQhucM,3270
|
|
460
|
+
helm/benchmark/scenarios/scenario.py,sha256=kSy7tmtFeC6-QSEsBuvlrMTA1PB6fOY9jycMld-vBVM,8592
|
|
461
|
+
helm/benchmark/scenarios/seahelm_scenario.py,sha256=GA46ShNGUjVdMLK0ZbN4vPuGEWFQsDPJXEGHQbs1qf8,78150
|
|
462
|
+
helm/benchmark/scenarios/self_instruct_scenario.py,sha256=3Kvi3pLL6eGOEezjoQoGv9c1UxKiRVlFmILKzqst4pI,2309
|
|
463
|
+
helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=wF_sD61IZ4RDznBVQ1HYbGh3Vc2qjbcBuU0jdmp1aD8,2803
|
|
464
|
+
helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=5aVEiRgFCutEWW9yMcJBxEo11FlwW0SiZTaOyXY6ioc,2693
|
|
465
|
+
helm/benchmark/scenarios/shc_conf_scenario.py,sha256=3LDB2pT6yi-ubSooGAD_0Ao7sYLo_MMAHNfm5Ux9Yvk,2889
|
|
466
|
+
helm/benchmark/scenarios/shc_ent_scenario.py,sha256=PS_O_keZ5s5_nSKxAC1k_WV2W8umEbyyKmlFtxvaReI,2855
|
|
467
|
+
helm/benchmark/scenarios/shc_gip_scenario.py,sha256=cxMpMmS05QpZ4xW2eogPH1hcDv6GzA6UQoAi9OSFO_Q,2702
|
|
468
|
+
helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=dbQI_pDqXepV6EyxMUNumIpyQ8oDwnu37qyQ29rxZfY,2998
|
|
469
|
+
helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=edepzg5qrN_GKa7u1W0RRhkpmfUi2vFHCvI1ma205WQ,2908
|
|
470
|
+
helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=QOQdz21s_YaRyGz-ciCPHH-fCy6hiGIrHUZz0SWPm5o,3391
|
|
471
|
+
helm/benchmark/scenarios/shc_sei_scenario.py,sha256=pTcb7n97VkesyRuqUqe5JGed1jDsQEd19udciDras8E,4532
|
|
472
|
+
helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=vjDyRZXP9UjkQzmA6u7SmKtMBuUwwn6KRQ4rT3vZqqc,2796
|
|
473
|
+
helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=sjIHT5NZlHv_IcXr_15-pOiBUPKKwykyH-QpMfvrHAY,1247
|
|
474
|
+
helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
|
|
475
|
+
helm/benchmark/scenarios/spider_scenario.py,sha256=mhiV3XWGwpnIQkaHFM_rvZlrwE7nqS12-F9t1eB8kdI,3306
|
|
476
|
+
helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=zdokiMy2Lrg5mS3V2QEakcZyJxIkqcoT5CqVCAtyoKU,4146
|
|
477
|
+
helm/benchmark/scenarios/summarization_scenario.py,sha256=WZnqhMQED6UBmRjHSboygdenLecOqIhvgdYVXzy6Q-I,6912
|
|
478
|
+
helm/benchmark/scenarios/sumosum_scenario.py,sha256=HG3wrKj5alV0a2aKb_nau8bB4oKDtTOLtdf3bx8h7sw,7695
|
|
479
|
+
helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=OaxEvT1H9VjOjBSw_yKs3dcYt33vFE_UARr-UIP9pBY,3120
|
|
480
|
+
helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=pt2Aln_dX1YMSl-9hV1HJmwW90MC3fWwGsMxZg-Q-UY,16391
|
|
481
|
+
helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=7STCSHiHGIQ2aaN_PwDE5jXUJ-qcu8PaS4pC-pbOceE,8410
|
|
482
|
+
helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
|
|
483
|
+
helm/benchmark/scenarios/test_bigcodebench_scenario.py,sha256=q9FWJsxLJoFaB3PSMLjI_-YyPoZYusOsMPwn6X6NKXw,1304
|
|
484
|
+
helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
|
|
485
|
+
helm/benchmark/scenarios/test_czech_bank_qa_scenario.py,sha256=bZNLEGu58iHmutGlSp-2uVC2931TO6Rxw7giqFh9RHY,828
|
|
486
|
+
helm/benchmark/scenarios/test_enem_challenge_scenario.py,sha256=XfPkYaSwdGa63ToC_BLuVKTRSldWNBlKsZYK6CFzL3w,2000
|
|
487
|
+
helm/benchmark/scenarios/test_ewok_scenario.py,sha256=WY2vqbHF1120ht4PER0uviKMb2jnoPM3ff4KwvwcU4I,1291
|
|
488
|
+
helm/benchmark/scenarios/test_financebench_scenario.py,sha256=EFZLJXXBoyjlTiMQFaQ6MiYkve1lfQDjQWjn4BjqgAQ,1184
|
|
489
|
+
helm/benchmark/scenarios/test_gold_commodity_news_scenario.py,sha256=RO0NcIkJuujdPVO6tDygmDxhZ5YlmIIYlhwx9LeXlQs,731
|
|
490
|
+
helm/benchmark/scenarios/test_gpqa_scenario.py,sha256=QQJ_-nmujZBSmhBhikRUWznFJ4jHPbGDnUVCP_17poI,1884
|
|
491
|
+
helm/benchmark/scenarios/test_grammar.py,sha256=sPlA36sHpThbXgnGlXyOuqHfDPe2epIafmzIeL0nkoU,1364
|
|
492
|
+
helm/benchmark/scenarios/test_gsm_scenario.py,sha256=I-Sl8Sg8kmFd7u0zZbwbNmeFV1mQLuOHoQ1cQDDwovs,1123
|
|
493
|
+
helm/benchmark/scenarios/test_ifeval_scenario.py,sha256=h3CBg13VKwyb1Xaddwg2GWOzAXz4stK5lXdQtHenAw0,1646
|
|
494
|
+
helm/benchmark/scenarios/test_imdb_ptbr_scenario.py,sha256=8kfCkMRUMU7N4WIrWawFDoxaLB2iTvQ-sPj4RoE2Osg,887
|
|
495
|
+
helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py,sha256=qZE-fi1tdNOybpvEQZJUpq9fHsyrPW7NYqj_RTwsv2A,746
|
|
496
|
+
helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py,sha256=t7BJ7ouT83oNtMFFoBvdyQRu2vWW15I1HUdtmzzQKLI,1221
|
|
497
|
+
helm/benchmark/scenarios/test_legalbench_scenario.py,sha256=FqbgwBAhHWyTIUYSzI5FOnTDx0A3u1o2ANKa_6bfA4g,1212
|
|
498
|
+
helm/benchmark/scenarios/test_math_scenario.py,sha256=8Raix_ykxUENh7UREw1RhpM287oav1p59P1Dn2gXktI,829
|
|
499
|
+
helm/benchmark/scenarios/test_med_qa_scenario.py,sha256=Ekp6r5eYPkCxV3FCzVvLemKxlhENhelqdO0Mdhg5yFo,1515
|
|
500
|
+
helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py,sha256=Jp3XXq6rL62CJSmSX8rimjq0QMjxT2d92PMUF8lzdac,1118
|
|
501
|
+
helm/benchmark/scenarios/test_mmlu_pro_scenario.py,sha256=v7A5CK5bUm-YTQK1kmqL8OGM-qsk5Hxu1ououH_rzNY,2696
|
|
502
|
+
helm/benchmark/scenarios/test_mmlu_scenario.py,sha256=mxEsTydKUOt8OD1Ei82nPgUFV1Tlvu5Z6drEMToEURM,1593
|
|
503
|
+
helm/benchmark/scenarios/test_narrativeqa_scenario.py,sha256=Rac_OrUpd2ruT95YvSrmoVz2Jpycgq3Roiyogm_0aAc,6420
|
|
504
|
+
helm/benchmark/scenarios/test_oab_exams_scenario.py,sha256=6iBsG_wkG1bpWY2vS4dw0zROTJkdzGxRtNeM60WfZlI,2071
|
|
505
|
+
helm/benchmark/scenarios/test_omni_math_scenario.py,sha256=vpK1OxWrgRHbP8hfYtEdR49nSdJHg_xFAfOApVfQ_xQ,1275
|
|
506
|
+
helm/benchmark/scenarios/test_scenario.py,sha256=HexTZBKphMDJbhIYj-HRCDwltPTDqHFHdT7FjPmu8Xs,2070
|
|
507
|
+
helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsHzMZSzOe8j0FrUQmMyZM4,1736
|
|
508
|
+
helm/benchmark/scenarios/test_tweetsentbr_scenario.py,sha256=V6ZsT405ltgC3pYXW-FVN7Z4nGH8ZLiFfh_F9OPXZjw,789
|
|
509
|
+
helm/benchmark/scenarios/test_wildbench_scenario.py,sha256=pmQ87MNoGAXwAmPf0eoep5qf9hk6BPP2zzgzGuKXwzs,527
|
|
510
|
+
helm/benchmark/scenarios/test_winogrande_afr_scenario.py,sha256=LZfE4J42BZ7OF3BvfKgMWuCHpdw4-LpWnFiKyrHGXp8,910
|
|
511
|
+
helm/benchmark/scenarios/thai_exam_scenario.py,sha256=YjFsom1yiu-xBZ3SGenNuczVCwQcmyoITTMavGv-QEk,6069
|
|
512
|
+
helm/benchmark/scenarios/the_pile_scenario.py,sha256=X3GWABiJ5cSoZzeNpgNUVAz7_A9SyM5MhgpJseKpZow,5019
|
|
513
|
+
helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=kUQ-Bpu1N1s525EP3pa7v3sp9Wybl0RuJv2pVu0pAGQ,6155
|
|
514
|
+
helm/benchmark/scenarios/tweetsentbr_scenario.py,sha256=ppugbPWd_3hHesLC52QbC-wUknctr9ZX4tmHefnPf6w,2879
|
|
515
|
+
helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=wnP-zH38J62zmbdeOLzdU-E3iclbQPApgEk4AGyhdoo,2120
|
|
516
|
+
helm/benchmark/scenarios/unitxt_scenario.py,sha256=uL8Gni-Uw_eIp9xKQefp4J7XtKSttjJHzJE4USyoC2U,1930
|
|
517
|
+
helm/benchmark/scenarios/verifiability_judgment_scenario.py,sha256=2iCJplnxdR7NNKjhsLR5o51pL55Q0bcbjjWlvrk5lw4,6067
|
|
518
|
+
helm/benchmark/scenarios/vicuna_scenario.py,sha256=RFLUXx4zTfVPl5nT5j_DZ9TuHzk216PQcktomXqqR50,1685
|
|
519
|
+
helm/benchmark/scenarios/wikifact_scenario.py,sha256=tTIHk7-xEsi-CGTobcEdbsjVrtAXTZOeWXRVj0hOeWA,5856
|
|
520
|
+
helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=k13TxITriwqoBrMzf-JzPKr5wHaC9M2A_HyxxBaASnk,3111
|
|
521
|
+
helm/benchmark/scenarios/wildbench_scenario.py,sha256=Qd9b1SC9ZtY1spf1vVuuFXXVxSJ0FlmR_DP7mIvAs8I,2981
|
|
522
|
+
helm/benchmark/scenarios/winogrande_afr_scenario.py,sha256=3SOVyrQ8D7Wzz06uSbczDE-IN4sjKSEAJ7Po-_-O6qw,3131
|
|
523
|
+
helm/benchmark/scenarios/wmt_14_scenario.py,sha256=1YYjz4x2RbYfJAXBTux9X30dxYTSC-YNngCCLhEiNfI,4646
|
|
524
|
+
helm/benchmark/scenarios/xstest_scenario.py,sha256=ndRNB5ApW4th5iltlmT9-Nfw9eTaVZQw5AMC4HZCI-k,1309
|
|
525
|
+
helm/benchmark/scenarios/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
526
|
+
helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py,sha256=NtTEHzmbeCicbjTRxPBUueZrBGOPwF6RVc2Yftc-VKs,5634
|
|
527
|
+
helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py,sha256=IJlM1I0MxtBX5bhvwPPrsBfUwEm_ZqqVmPze8UH_tl4,6622
|
|
528
|
+
helm/benchmark/scenarios/audio_language/ami_scenario.py,sha256=SH4r2YyW2kQ8r6-nSRI_F4unJC-l-lzikr2O7hMKgEM,4371
|
|
529
|
+
helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py,sha256=kiUngeoAVOXfuKgqo96RgK_volpJUPFziu-cYDqT8WM,2685
|
|
530
|
+
helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py,sha256=oLOeBGjQCa3hpzjhX2bNS6637VD9VF1KbSJri9BJ3PI,2698
|
|
531
|
+
helm/benchmark/scenarios/audio_language/audiocaps_scenario.py,sha256=PkVqQM1zX6ecXYk-Pz4YWlST3Hnla8NyeBHbuHvhSlY,2447
|
|
532
|
+
helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py,sha256=uoiB3mnkudRH_rY1qeZRgobYYZ0xDn93F1Mn6Avl24Q,6724
|
|
533
|
+
helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py,sha256=CbcoGPW65xXRRkrDthotDfoVn51ozANG9s3LCsjxkLA,3706
|
|
534
|
+
helm/benchmark/scenarios/audio_language/corebench_scenario.py,sha256=R8RAUtdRAQcUAN0PFXybQUekdQFNtT8hXtoR1A1hMGk,3155
|
|
535
|
+
helm/benchmark/scenarios/audio_language/covost2_scenario.py,sha256=3YiaQXuLGfths2XswRw30Vf26bO9jEW_kAj5wZQSOSI,5119
|
|
536
|
+
helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py,sha256=OKawk6Mq6ONOxcttkk-qodeFkNet7nvP0UbeEu5EgJw,3079
|
|
537
|
+
helm/benchmark/scenarios/audio_language/fleurs_scenario.py,sha256=k8AFujDJYtH37Zaquy4TH8xYcxE62cvOK6DVDfp1TKA,9235
|
|
538
|
+
helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py,sha256=an4z2Ve7CpZwBoQEvuR7e7h0_Jbfor_Itj1FQtb4Od8,3538
|
|
539
|
+
helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py,sha256=Gx6ITS2hblIa_KpNbYeP6GBAZxU54DkVKLtgk_LsjG4,3996
|
|
540
|
+
helm/benchmark/scenarios/audio_language/librispeech_scenario.py,sha256=ogMXxnyTG05tCyJ2d4hiuiVsbQvf4TbndksYeaJXl1s,3475
|
|
541
|
+
helm/benchmark/scenarios/audio_language/meld_audio_scenario.py,sha256=j1JFX0jGfcqX0QZBKSjYjDWo1jHJbW5Q9jHyOs6Kgls,4903
|
|
542
|
+
helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py,sha256=Jo_-3zC226iKGT-ac0JNMhlEccazMMiHbomx_qU0rxg,3098
|
|
543
|
+
helm/benchmark/scenarios/audio_language/mustard_scenario.py,sha256=9bpcvFtWq5Pd9i9X8iaY9jod3YcRqk88xnXfjwcNMoY,6130
|
|
544
|
+
helm/benchmark/scenarios/audio_language/mutox_scenario.py,sha256=bDCQbhsRDR6iQGNlCu_35kjmjGjuzjOIoraSncfOlOY,10277
|
|
545
|
+
helm/benchmark/scenarios/audio_language/parade_scenario.py,sha256=UuOa5cSrHh5n3VF_SuJp4cy1MxlI3uEKHLrNEhGuyuw,4186
|
|
546
|
+
helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py,sha256=oN4vBkElVzjccaEK2JFqoXMCGFTTHD0gcYwSDhvHTpQ,5438
|
|
547
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py,sha256=TfMTdQ_D4foKO4NRPXygDgdF0ST2LYiOcV3gXO3WEYE,3691
|
|
548
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py,sha256=OUPFMOpRCTLN0o_lo7JJ7oOHxp9VuwC0fz4abWVS7hA,4713
|
|
549
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py,sha256=7-M5HXNE-YDM44f6LO4aYKBeFQxa3PfvN7q4u4BBYxU,4089
|
|
550
|
+
helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py,sha256=c36E2RkeSDumLZgN6dBGzGz1ltgPdcBSqx8XD0qNH-U,5078
|
|
551
|
+
helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py,sha256=wkKyTCtx4isQSMufap_6DsNdGkHi7L8FQ2p7n58kKYI,3124
|
|
552
|
+
helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py,sha256=4M_gTWs4CoJ1Ce9dDFBTAe9dzSovpsve_sN1eco2V2A,3155
|
|
553
|
+
helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py,sha256=L04ee5bM5E0UNNmkwEzVwug4HJXQoIcVjujPgxtU2h0,4366
|
|
554
|
+
helm/benchmark/scenarios/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
555
|
+
helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py,sha256=c8zcoGCOFqBGE4TAEx1uLsUmGXw_jIS8alI99ubGeDA,5477
|
|
556
|
+
helm/benchmark/scenarios/image_generation/cub200_scenario.py,sha256=7p3G4mJRc8QHR4Mw2GLsfAFuJcEe6OeZbezVhbyc55E,4103
|
|
557
|
+
helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py,sha256=yiaX_2Aut3hZdfggCsTCcTPOdraaX-cOjPHyE5D1lYg,9045
|
|
558
|
+
helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py,sha256=yAFh8Kn84b3RpHZVsVMYnk4GvbMgxB7oIPUzv5sp78I,3216
|
|
559
|
+
helm/benchmark/scenarios/image_generation/detection_scenario.py,sha256=E4tqQXmPSLutPYdk9Ngil7LteUwRe7p55MXSURxk_rU,3188
|
|
560
|
+
helm/benchmark/scenarios/image_generation/draw_bench_scenario.py,sha256=b4WcbttcjoIY5gcOMk6e7c_mqfjlQsfowo9-D_-wccY,3179
|
|
561
|
+
helm/benchmark/scenarios/image_generation/i2p_scenario.py,sha256=8VXVyWZmW6k4Q43GBDwqzYDj1pCKuAraGKaulZ_0MPk,2233
|
|
562
|
+
helm/benchmark/scenarios/image_generation/landing_page_scenario.py,sha256=0R1sAuS7txLxpaJOuvojg6dZEcRFGo4WabjX2ieo0FA,1361
|
|
563
|
+
helm/benchmark/scenarios/image_generation/logos_scenario.py,sha256=Sbc_vlMdHAr6AhJ6rr4TjBIMEbzcw_hdGhYGufHSlJc,10245
|
|
564
|
+
helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py,sha256=swcdlPAYYwtidMUIHWCW1-xxHH2QF3wm3IKDBkr6-DU,3599
|
|
565
|
+
helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py,sha256=kybgeSjY0nm3Wya2vQeoBtJGtHZra4XhT4VY7Izjxrk,1879
|
|
566
|
+
helm/benchmark/scenarios/image_generation/mscoco_scenario.py,sha256=KMzu0II1F8ajL7NrYBWe74UXKcLugg7bhubpSZR4TQw,4201
|
|
567
|
+
helm/benchmark/scenarios/image_generation/paint_skills_scenario.py,sha256=6nm_uo6rmkyBzbU0BntvLq9jhWlfunBEy9Vc1d7_RVU,2967
|
|
568
|
+
helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py,sha256=pTSd1XidvWwTPFpVinl28s6150a2w3iK4d1Ce81lijA,5165
|
|
569
|
+
helm/benchmark/scenarios/image_generation/radiology_scenario.py,sha256=7JN8OYap8kA36Od1_bZTkhZd-H9Qjw_dh4TIzr9UTms,1701
|
|
570
|
+
helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,sha256=DoabanZhd-2MHFDZeR9EoPit0T2TvbVwZGUR0RfJyW0,2362
|
|
571
|
+
helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
|
|
572
|
+
helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
|
|
573
|
+
helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
574
|
+
helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
|
|
575
|
+
helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
|
|
576
|
+
helm/benchmark/scenarios/vision_language/blink_scenario.py,sha256=4UuUP704OYiR7RKw6p3eYjAYOVlSk5KtMJ5RuyEmYIg,5644
|
|
577
|
+
helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=lfRHjhhXCo0YeDQe4_gfSHCzVKtqQVZ6DALLABcCmtI,4637
|
|
578
|
+
helm/benchmark/scenarios/vision_language/exams_v_scenario.py,sha256=pLD--gtL5q7jLSWQ8iwAdsiOrTJ_rBsLbwWMWKRhPbs,3853
|
|
579
|
+
helm/benchmark/scenarios/vision_language/fair_face_scenario.py,sha256=V6_1Kl2nWDRyHvwnKcSxkP0DChzKDBW0i_-t9oAxps0,4721
|
|
580
|
+
helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=CDutFh1PHLyeMdJ9HojzYKE1zJidL9ktcsfn9uHNLZY,2612
|
|
581
|
+
helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=k4E6JAN8a_KT1jjV2Ch3K5YhWKJ0f-9iCXLO-_2Xl8M,3535
|
|
582
|
+
helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=qiLLdiSzhnSyjmqCAvMxjhcZ7yBiX37L1cdsZvHL4ds,3845
|
|
583
|
+
helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py,sha256=7GK_jAOfCgRIGiN_GInDePwuT2wZqmWHp1rqdx18xQg,4994
|
|
584
|
+
helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=HnzA0L1Mm9rw9uyK-hnCGrxo33z_U_86TLnlELjDV6E,4738
|
|
585
|
+
helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=7ZHpRD7TdQQ-Mp5XQV5yyiLUE0k1KpgbLSYKLBJMxs0,4343
|
|
586
|
+
helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=cM7eTE4bpcIzLyEDye86Ud3rD4Id-0ju73EXjg0DYoI,4340
|
|
587
|
+
helm/benchmark/scenarios/vision_language/mm_star_scenario.py,sha256=cN17oBJmLHcQUPO5GpDPLc1pM13bSNqmVoLIK281zR0,3624
|
|
588
|
+
helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=7Aa3y0TWGZH3QrPDiqIMkj83LU2Klrzgcb46jv5uytY,5498
|
|
589
|
+
helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=deDMdg2-ORZPV623ngncDPlRn6z6cq_QbQtMu-z0Ydo,7665
|
|
590
|
+
helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py,sha256=HUO09uM2rBXOfCsxzwovmwtihq53xjuzDOtQO_S3J4I,4161
|
|
591
|
+
helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py,sha256=c7YfclYMDtygsLnEfA8oP6Vl7evdrqqTZazmuD9Oy-8,5353
|
|
592
|
+
helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py,sha256=qWz71kAlH4TxFSTBgAmZ7DLMVA8ir4X7jXnS4cArpZo,3024
|
|
593
|
+
helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py,sha256=HuizbYsN5Nlihfzu4bfGuC8KSBbeIc6TVknMS4kpVJY,7149
|
|
594
|
+
helm/benchmark/scenarios/vision_language/originality_scenario.py,sha256=1inr-klQEz08CM2GWqbYdy-AuXQmMhOAywAlA0lJHik,1029
|
|
595
|
+
helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=D3nNu3uU87eMDiMZZafuRTntXjwbqPaSDygUgQm45F8,9943
|
|
596
|
+
helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=gWrBG5U8uoU92JPGNm5kuzo1GekoJo1rKQaNhv6MYGA,3996
|
|
597
|
+
helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py,sha256=OJtiGhSN_KYgEz0VGXjCjQik_Xihtgiali70Z00XOzk,2083
|
|
598
|
+
helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=YNwuIMJBo7wwftx-T5tCYmGo2oy_794fZ330lkDyqb0,5171
|
|
599
|
+
helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=DxGZ7EL22SzxpAkuiA5twuGVTm96wG_RBg3dU3Vh_c4,4241
|
|
600
|
+
helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=wRa_OuOdyf-qcy9hml-Kj6YtVP5MDzeTbGcqva6LqdA,3707
|
|
601
|
+
helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=zCnkiSya-PHc3ywAhmw03bFdsvLCxAUwGfE6OviEXDQ,4153
|
|
602
|
+
helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py,sha256=7bFu6CYU9bNNuFAlNjdmsmuNlDp-YkLWD1EJuoZuNAc,2597
|
|
603
|
+
helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=cC8_Vyqw2f4K4hJY-eo9ptj6ANfWgiFAK7b6OOTIPLI,5239
|
|
604
|
+
helm/benchmark/scenarios/vision_language/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
605
|
+
helm/benchmark/scenarios/vision_language/image2struct/chart2csv_scenario.py,sha256=qcs3o9dPsXoeaP0bu9UVZ6P0GPEcRLoaqABxysLN6VY,1802
|
|
606
|
+
helm/benchmark/scenarios/vision_language/image2struct/image2struct_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
|
|
607
|
+
helm/benchmark/scenarios/vision_language/image2struct/latex_scenario.py,sha256=SnZuHATg5i764MAdgaGwjIGdjCZNrOqP83Y5jE_fkHs,1153
|
|
608
|
+
helm/benchmark/scenarios/vision_language/image2struct/musicsheet_scenario.py,sha256=c08cquz2IALY7PlpOoEfAjupKZmn5GDVZ1H8Gbj4r8s,831
|
|
609
|
+
helm/benchmark/scenarios/vision_language/image2struct/utils_latex.py,sha256=jW3_c63a6u39PJGJw6lM9pIa3dnF8CQgZlPNZdH0sfs,15001
|
|
610
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage_scenario.py,sha256=DJQIa8NaKV-nhkXEBuY97MJ8a1O3x-Yr6hACVa-67Ns,11117
|
|
611
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
612
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage/driver.py,sha256=WBFbb3N_eHIa7OFvHQS3Pmwbmkl6r9VyobxlIEKhty8,2823
|
|
613
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
|
|
614
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
|
|
615
|
+
helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
|
|
616
|
+
helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
|
|
617
|
+
helm/benchmark/static/schema_audio.yaml,sha256=lVslZX7JmFo0ZgLU4n6amrs9DK8y43Ux0I9QyDUG-14,29119
|
|
618
|
+
helm/benchmark/static/schema_autobencher.yaml,sha256=yb-NkF5w5R2YOg7RIsadNHJ_5G7lG1gbcDVq_25luEk,5716
|
|
619
|
+
helm/benchmark/static/schema_call_center.yaml,sha256=i30aFzWqdOJRyAHN8vAzyHEX1v95DEK0TI1SMKTN4TE,9106
|
|
620
|
+
helm/benchmark/static/schema_capabilities.yaml,sha256=HHy0aafhOaqL0C4TZw2mMt1Dce2_wuN062ORNZIbwYg,8733
|
|
621
|
+
helm/benchmark/static/schema_classic.yaml,sha256=sK3yVQCrk3Tn3Kmg9WITBmJZI7AKVjmIY0f3zgH_t0c,104611
|
|
622
|
+
helm/benchmark/static/schema_cleva.yaml,sha256=TDh-zcCzzTTs7bu0IWlY5dXYaTFhxly8sJIBGQdBvug,25401
|
|
623
|
+
helm/benchmark/static/schema_czech_bank.yaml,sha256=jkTRQVmmbKkbB0zPH9AtYh6Lt33ymMInRBQnHE5lIOo,5462
|
|
624
|
+
helm/benchmark/static/schema_decodingtrust.yaml,sha256=2VPxzcyKYea7mx-qmswyVRjPfVatjVH4Rs3OU82mgII,15670
|
|
625
|
+
helm/benchmark/static/schema_enem_challenge.yaml,sha256=ZDcOfonL0z-ehsW5OkwaQOeiG1jLPk_toN8s2jhVIdM,5540
|
|
626
|
+
helm/benchmark/static/schema_enterprise.yaml,sha256=W6eP79bBhKsvsxD8ve-lC-ELDtPXyGmRJ2Z35uK9pLo,11969
|
|
627
|
+
helm/benchmark/static/schema_ewok.yaml,sha256=MluPnZSy22wZLFB2pR7ycBRgUSvIUsqvq4qM0Vk2ur4,12113
|
|
628
|
+
helm/benchmark/static/schema_finance.yaml,sha256=I5-rcZmYpfwS9jVsZM53h6Iv6Um33IhQqt-LUrc4_GU,7165
|
|
629
|
+
helm/benchmark/static/schema_heim.yaml,sha256=EK5F51C6vDZtbVFKqo5GDIi4tG-sfdVm3XcYpfthqNA,44396
|
|
630
|
+
helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0yPsYgu_MB7uu5pwHE,19894
|
|
631
|
+
helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
|
|
632
|
+
helm/benchmark/static/schema_legal.yaml,sha256=RpoFOuVSIowNgxlPn3UMfJC-68RFr3CGDciUGLPfVqc,28806
|
|
633
|
+
helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
|
|
634
|
+
helm/benchmark/static/schema_long_context.yaml,sha256=0xcyw8WI4SiLM1QPnjhTM-1SMGIyA5IDwWKpJzfQt9g,10795
|
|
635
|
+
helm/benchmark/static/schema_medhelm.yaml,sha256=84BrIengbq0m42ICWvyEWoYtdERR-8J8-8QbPOqUzvA,50747
|
|
636
|
+
helm/benchmark/static/schema_melt.yaml,sha256=mmPqwDa26DVZXsRJkmKQSyD0OStvjlxaMoSPM25SpD4,47494
|
|
637
|
+
helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
|
|
638
|
+
helm/benchmark/static/schema_mmlu_winogrande_afr.yaml,sha256=YIVYf-mOFPq82UVBdMhnCWNOr4sV8Oi3-ozOszJ2tWQ,40143
|
|
639
|
+
helm/benchmark/static/schema_safety.yaml,sha256=7RfZDX4wr8Xr1BJ149ZwmplPzPkNL0-BKbEZuzUsl_0,9278
|
|
640
|
+
helm/benchmark/static/schema_seahelm.yaml,sha256=9XF9Rlr7I-g-uW6R0LNh7Xg52Xs3_058QybXEiN-hnM,28296
|
|
641
|
+
helm/benchmark/static/schema_slphelm.yaml,sha256=3avOfp-ZEmVRGei3_M_WX6cSP5hQjbfHsDr1XrjayMY,5294
|
|
642
|
+
helm/benchmark/static/schema_social_audio.yaml,sha256=Nj3ORXDT4RHD52cyo1RHfueWwbhqp1qW06TaVJ2lUfE,8653
|
|
643
|
+
helm/benchmark/static/schema_sql.yaml,sha256=8rRff6p_i1CsH7oDbUjau2qRWbLGspuM1Hy-g5pOQiU,6047
|
|
644
|
+
helm/benchmark/static/schema_thai.yaml,sha256=yJUrevvgTJ46TpyXfNecW_B9urh7LPwSbBi_mT4ZngA,8348
|
|
645
|
+
helm/benchmark/static/schema_torr.yaml,sha256=9R6HgT9ZuCnbMdhYB-pFect9apwEVuLEr3R1fx-Txd0,14583
|
|
646
|
+
helm/benchmark/static/schema_tweetsentbr.yaml,sha256=DwHE5Y2STJPDT0fFNm-GPFXq_n3DStQ1ubzhSu4xsoI,5453
|
|
647
|
+
helm/benchmark/static/schema_unitxt.yaml,sha256=9FQhoueYNNYQ2xMuJ2KHzpg_9-_ZhZ9efk6jtTQ3tlc,11855
|
|
648
|
+
helm/benchmark/static/schema_vhelm.yaml,sha256=0slYep2eepUefgtK_m4iSS785sHdJzljmO-kwDRriK0,34262
|
|
649
|
+
helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
|
|
650
|
+
helm/benchmark/static/schema_video.yaml,sha256=FkpI5Slc4w-ty4hns82ArXIvTdqppWDnkJSpIp74QN4,9713
|
|
651
|
+
helm/benchmark/static_build/config.js,sha256=o98g6QSly1NAfqhYWbU4lEoZB4LEpIrePZtmimiuoXc,165
|
|
652
|
+
helm/benchmark/static_build/index.html,sha256=kpJ5Riw0YUmOOo2lSyWPgWx5XOwxxiLvPmG3wHwn2tM,1178
|
|
653
|
+
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
|
|
654
|
+
helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
|
|
655
|
+
helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
|
|
656
|
+
helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
|
|
657
|
+
helm/benchmark/static_build/assets/helm-safety-2907a7b6.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
|
|
658
|
+
helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
|
|
659
|
+
helm/benchmark/static_build/assets/index-94295e78.js,sha256=yvo6hRwNE6Ns7NxJHOdVfUOhc8HsW8eZVadLMW0Wn0w,124386
|
|
660
|
+
helm/benchmark/static_build/assets/index-b9779128.css,sha256=uXeRKCUzQAC32ofNoaK3-WC7kRWR--KnR6--1m9NdQA,491471
|
|
661
|
+
helm/benchmark/static_build/assets/medhelm-overview-eac29843.png,sha256=6sKYQ79cN07-cUsnt-JPsdoVwUBWu5KxOaHWSdwjdgA,284408
|
|
662
|
+
helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
|
|
663
|
+
helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
|
|
664
|
+
helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
|
|
665
|
+
helm/benchmark/static_build/assets/react-f82877fd.js,sha256=ijg4n6eANaZKXPWIVTQITqrtf-zzicjslJMm6DniDkA,275149
|
|
666
|
+
helm/benchmark/static_build/assets/recharts-4037aff0.js,sha256=SP08CFvsw8cMMMMdqcXvsLviuOxkAhXGwvUIMvYUdxk,432466
|
|
667
|
+
helm/benchmark/static_build/assets/tremor-38a10867.js,sha256=prOrg5S4EeKHSd6RkgnBIbVfXIUq3xjeVE0MRdqvenI,293019
|
|
668
|
+
helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png,sha256=FDfWcwGcJhJco4qmZli_ROomLiASrrnsX-wtKSDvMkc,542231
|
|
669
|
+
helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png,sha256=oco_P6kwqp0cC3YaT_2H2RhJ6p1sh3sEQq3R0RA_cT0,71934
|
|
670
|
+
helm/benchmark/static_build/assets/vhelm-model-8afb7616.png,sha256=ivt2FhDk8dwnzp1MAle5WfbXzht_Mxg4rpy-xHRybjs,180285
|
|
671
|
+
helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
672
|
+
helm/benchmark/window_services/default_window_service.py,sha256=HlLI3be8s-GNxDygNGrvo9exEhbrO8Vtr3w0rnSIx7M,181
|
|
673
|
+
helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=wfdydJY6AmpYCfAv5PQu9D6nFXbuxIRum7Tsv0DemJE,2148
|
|
674
|
+
helm/benchmark/window_services/ice_window_service.py,sha256=snyIWVeeknf202_pzBUmvPcA7UcN_FKyIpCGpO2CmFU,1100
|
|
675
|
+
helm/benchmark/window_services/local_window_service.py,sha256=-6wlg8gN_dN80lptRWJQsPALCK6W80-KHA7gghs2-5M,5292
|
|
676
|
+
helm/benchmark/window_services/no_decoding_window_service.py,sha256=s_i_cqIuU9p0GDRIBApaOHzjH7gHrBPTJ2X5NEcN33Y,1375
|
|
677
|
+
helm/benchmark/window_services/test_anthropic_window_service.py,sha256=6LHPP-_FwhyWiQ1mfeEimddLehJu_rQm4WdT7Vr8Kio,4286
|
|
678
|
+
helm/benchmark/window_services/test_bloom_window_service.py,sha256=81xYNYmyuENOJensbdjbKIoTwZ5SKXwQif0boXsnwSw,4378
|
|
679
|
+
helm/benchmark/window_services/test_flan_t5_window_service.py,sha256=IhQMWBq2d39O3uNKGwbaMWJkz8585Zc-J_yqvPJfwu4,695
|
|
680
|
+
helm/benchmark/window_services/test_gpt2_window_service.py,sha256=RC1dP17V3BrPvHTD1xtDIzlYkX2gdFtokmCY_lfm7UM,2752
|
|
681
|
+
helm/benchmark/window_services/test_gpt4_window_service.py,sha256=9OqZni8aI4204QRrm0C1KprKkJuPFmmANyo1082xvyA,1163
|
|
682
|
+
helm/benchmark/window_services/test_gptj_window_service.py,sha256=bDMmgguDtgeWUzmIs59FczBW00VedElMoHWDSavRgm8,2485
|
|
683
|
+
helm/benchmark/window_services/test_gptneox_window_service.py,sha256=Bjk8h7Ddg6KPuG_1Qv5XcPZQlkzNw4sO94FLnNU_wGM,4300
|
|
684
|
+
helm/benchmark/window_services/test_openai_window_service.py,sha256=6TZw4AGZ6kG5BIuCtRXDStjgk-JGAhZJYX6JG3aiHCU,2425
|
|
685
|
+
helm/benchmark/window_services/test_opt_window_service.py,sha256=Hmh5Kt1yjI-PkhCPiabqu6eFCreCkMgcTQCv_YRvM7Y,4305
|
|
686
|
+
helm/benchmark/window_services/test_palmyra_window_service.py,sha256=u7xb7syXCxjvQeevWtSFPaOJy40VPk2yfvduNtFTtdw,4302
|
|
687
|
+
helm/benchmark/window_services/test_t0pp_window_service.py,sha256=rmoMW8YsNpD_zC-GBi6M5GugT_lT9lfn5CbwNbr7d7I,4088
|
|
688
|
+
helm/benchmark/window_services/test_t511b_window_service.py,sha256=zmFGL4Nwg3xQ7nRe-IEkl37wx59C33xBUS8qKHqBQeU,4091
|
|
689
|
+
helm/benchmark/window_services/test_ul2_window_service.py,sha256=RhIK4i9XaUfgeqTZEEXxyqaIxdyu29BRKb0pBl7orKk,4151
|
|
690
|
+
helm/benchmark/window_services/test_utils.py,sha256=O1jHGB0Dn0h03ayuosF_8AtikIe8p50d5HcfzT99rBU,3301
|
|
691
|
+
helm/benchmark/window_services/test_yalm_window_service.py,sha256=PJqw2ySLOMg_iiAzJGzj-1YOrDbxFkmP6wjiDcj1RWA,4391
|
|
692
|
+
helm/benchmark/window_services/tokenizer_service.py,sha256=rf6VAZkPRkwH-KKxXoQnfQ2uozC0_A_9egGPyk1P0E4,755
|
|
693
|
+
helm/benchmark/window_services/window_service.py,sha256=y6BthPY1V-ugmYfaJElm5Wfy3PSgoJLj10vHcXZZGNA,4727
|
|
694
|
+
helm/benchmark/window_services/window_service_factory.py,sha256=T55F0Y2jiOYxUHHZxT4YX4fFXY5gfFhn56zIwUBhc7s,3423
|
|
695
|
+
helm/benchmark/window_services/yalm_window_service.py,sha256=EwwCoMpr9WVLhCI7OI_7tmZHQfTUwn9FFWjbhIBFRfA,1089
|
|
696
|
+
helm/benchmark/window_services/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
697
|
+
helm/benchmark/window_services/image_generation/clip_window_service.py,sha256=2JHld8GiR_eIQyHMPSN8K2VOswmKJEPMPJLsxlLpU-Q,631
|
|
698
|
+
helm/benchmark/window_services/image_generation/lexica_search_window_service.py,sha256=uDCUclHvo8toxSTMztK3zG7Eb-hjueobGQaBqPqVJlk,454
|
|
699
|
+
helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,sha256=8U2qDrUB1QJHRy5STV5FywkeVm6qfNOaeVBkMQhyMGc,453
|
|
700
|
+
helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
|
|
701
|
+
helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
|
|
702
|
+
helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
703
|
+
helm/clients/ai21_client.py,sha256=RAXQufajYnxr3b_1Hl-wAZkeE_j6O8zX-vngWEits6c,8158
|
|
704
|
+
helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
|
|
705
|
+
helm/clients/aleph_alpha_client.py,sha256=BK2eQIHYMxLMsZNWld85ZCj17JAoy5lU7rHuSBa4fOM,4981
|
|
706
|
+
helm/clients/anthropic_client.py,sha256=R85gLYrheN2YWSGTnf3pkYTjCkTl300ktdlGLe1_1-o,36181
|
|
707
|
+
helm/clients/auto_client.py,sha256=J5bCxIDZJUdV1dCv_EtbvwPzd1p2Ogtg207vpb3PhgI,11624
|
|
708
|
+
helm/clients/azure_openai_client.py,sha256=mZ0udOAjadp7ZyE2KEtq8XuQp45eHlX_qM_getyzbA0,2009
|
|
709
|
+
helm/clients/bedrock_client.py,sha256=sXxzNTs3pwVIwvir5lyJWLRajI9p2lMiJq21XsZ_FZo,12267
|
|
710
|
+
helm/clients/bedrock_utils.py,sha256=8ZZfyOuZkgxL_naJ-wwBnH4GKv425fu3MfyakGHxeb4,3764
|
|
711
|
+
helm/clients/client.py,sha256=fWJ_Eg4NyhPqlvpDvM7AjWN7cr2LU2uWdsnENLJXlTs,8963
|
|
712
|
+
helm/clients/clip_score_client.py,sha256=ct3GHZ2Zh3fGwyvQ9DyoIPT6PwDPI-nUaFkUFuc8PIE,1622
|
|
713
|
+
helm/clients/cohere_client.py,sha256=edQO5raoJYmYzfVREqHhNvjTcqPevG0M8EPMLOANqXY,10975
|
|
714
|
+
helm/clients/cohere_utils.py,sha256=aYmj60m0e9RF9BIdxp1vmA-uZv17TEALw0dbgTUSpCc,504
|
|
715
|
+
helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,3231
|
|
716
|
+
helm/clients/google_client.py,sha256=mIaUzK7GHCa9pqK1BEVhdt6dZsJfHv1Qdsf3I0Ayq8A,2912
|
|
717
|
+
helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
|
|
718
|
+
helm/clients/grok_client.py,sha256=SbVB6AduTwfElzUgEMnQW2kQUFVTCv4TpPPJvElQEe0,1127
|
|
719
|
+
helm/clients/http_model_client.py,sha256=_F3_y2UWqbzESQdzV0FMEsECIKjporVSAW6iUQhJ35c,2818
|
|
720
|
+
helm/clients/huggingface_client.py,sha256=FYrg8XoCHXi5eUWjS0S_n-eiva-Ri0g1oaaeT_ky-tE,17615
|
|
721
|
+
helm/clients/huggingface_pipeline_client.py,sha256=ivFTMNHBwwIUjkeOHkl-veZi5nNAjtnkYvneRFWs-6Q,6154
|
|
722
|
+
helm/clients/ibm_client.py,sha256=4W4fbjnDNjXrP4gVwSfBHPus0QcqFOQzFvfaST1BE1Y,9701
|
|
723
|
+
helm/clients/lit_gpt_client.py,sha256=pgLfSvusNpdj8F5DVxzQdHxTDRNX4RVt6unegao803U,6229
|
|
724
|
+
helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
|
|
725
|
+
helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
|
|
726
|
+
helm/clients/mistral_client.py,sha256=ceM8KLAcniAqK1BNVdUGzqy4av2SEEau6PVmPivxc0o,8369
|
|
727
|
+
helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
|
|
728
|
+
helm/clients/nvidia_nim_client.py,sha256=Z1UAqR2jHacIO_QGqQl1JUZ_82JiSPstBOtj6xURmQk,902
|
|
729
|
+
helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
|
|
730
|
+
helm/clients/openai_client.py,sha256=s62_qafDVbDu5pzIkfQsflIwRzc4sXkSiDNkmZz68Ow,27775
|
|
731
|
+
helm/clients/openai_responses_client.py,sha256=zua7DZWLeOdpb1yY8YV10gmuGdqvvo_9YQPW3OIGPDU,7219
|
|
732
|
+
helm/clients/palmyra_client.py,sha256=4AaZcV2tPHU4HJ9FWSkOY8_C9ndEckH3PH715QxJQ8E,7086
|
|
733
|
+
helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
|
|
734
|
+
helm/clients/reka_client.py,sha256=hA0tq3Hc9669q2sYa4Jr5yWy2NAbvoFDnVqQ6vds62w,8334
|
|
735
|
+
helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
|
|
736
|
+
helm/clients/stanfordhealthcare_azure_openai_client.py,sha256=NGbeI6sMenmgqPQTWxYF3C1Aen29LybRcHcsmS3Jqmg,2059
|
|
737
|
+
helm/clients/stanfordhealthcare_claude_client.py,sha256=ShhbLttPDRa-Pnvr35_2WmVx5s0XpsJMGzu5qhzLoLI,1020
|
|
738
|
+
helm/clients/stanfordhealthcare_google_client.py,sha256=cJK_uH-YBQpBJsltNuiUi0x77bh0eCM5UNBaJQ1zai4,1475
|
|
739
|
+
helm/clients/stanfordhealthcare_http_model_client.py,sha256=LEq1fIxHnTnwCsvkF1AUlw6L4Gwv4egx0-PUvNiMNTo,3047
|
|
740
|
+
helm/clients/stanfordhealthcare_openai_client.py,sha256=Qyl8voGz1hJPqT6g4PunMuN99EYaW8U-NXQQSgJbiiM,2169
|
|
741
|
+
helm/clients/stanfordhealthcare_shc_openai_client.py,sha256=V7K4KZaSjIiE0FkoY4qy6ifJ8pUiNa3vBcWiDsIwXFI,1343
|
|
742
|
+
helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
|
|
743
|
+
helm/clients/test_client.py,sha256=T27UsIPWsbE1JK_8DN_DW9LkEcIGRbgDjio14YOIAb0,3854
|
|
744
|
+
helm/clients/test_huggingface_client.py,sha256=x2NjMuIrinfUy0wQ1S6F5cYZVr09YfvN6LfhWmyGNAM,3388
|
|
745
|
+
helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
|
|
746
|
+
helm/clients/test_together_client.py,sha256=kyBLu-2i4EJyuJm5ft0yg8W-H1IqmULRXggEbChuxdo,6178
|
|
747
|
+
helm/clients/together_client.py,sha256=tgjMlWscrauLFfMxDenh14oEBfLWyP9XYhz--YlvKVw,24264
|
|
748
|
+
helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
|
|
749
|
+
helm/clients/upstage_client.py,sha256=iSL1G8G3jWSbrpacz4I0l6Lwc5T01fsLR-wZzF39ftM,679
|
|
750
|
+
helm/clients/vertexai_client.py,sha256=PjMnz4u5YQdpIbfLLBFsrPuHCNrj0_fatf1rY89d-nQ,23113
|
|
751
|
+
helm/clients/vllm_client.py,sha256=YLIxGoQ_ZXejA4nfVpmFE4tmHROEFxEbFsV8Ba25Eac,1658
|
|
752
|
+
helm/clients/writer_client.py,sha256=flKLeMbFkyGfNmv1ozZGU4dxNy-QF5bFJF0mGHqpU3c,4467
|
|
753
|
+
helm/clients/yi_client.py,sha256=nC60d2HiUL2W59FTne9tWmZ9bGGY1OvI7Ob3Ng4wSPE,750
|
|
754
|
+
helm/clients/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
755
|
+
helm/clients/audio_language/diva_llama_client.py,sha256=Bvcf4wE7yMZlqETgKEMtCug8-2fQI8QCDdaGWSeQ2X8,4864
|
|
756
|
+
helm/clients/audio_language/llama_omni_client.py,sha256=OCak716q97uEk9CBXQqnmUsbLFR-dddMzg5eyIZ4gzE,8718
|
|
757
|
+
helm/clients/audio_language/qwen2_5_omni_client.py,sha256=lbv6Hr22p0ReyR1bnN-dR8BzdPgilvGES7G03of8BWA,9090
|
|
758
|
+
helm/clients/audio_language/qwen2_audiolm_client.py,sha256=s9eH8fnVgw5xV39b_8AGt6IyNN3q9Uhcx6HZVxt7TM8,8981
|
|
759
|
+
helm/clients/audio_language/qwen_audiolm_client.py,sha256=RvYweXANEyzhHYDx38H10F0ZEFaL8kj7n7TZ-UrRmZs,6338
|
|
760
|
+
helm/clients/audio_language/test.py,sha256=FrKpirOwJW1__E2egq4VPgsTrgiSHZHBwfUCvxNjC0o,1969
|
|
761
|
+
helm/clients/clip_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
762
|
+
helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgjbZLmy9dRnBaLiWwk,695
|
|
763
|
+
helm/clients/clip_scorers/clip_scorer.py,sha256=5KzYTrGuy5zA8yHX6c67Is98HLkqQooWhioPxHNLJ7s,1932
|
|
764
|
+
helm/clients/clip_scorers/multilingual_clip_scorer.py,sha256=LgV1hN6y2FiFQ30UakxRmlwtLs_LCMxrOCewriN1nkk,2066
|
|
765
|
+
helm/clients/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
766
|
+
helm/clients/image_generation/adobe_vision_client.py,sha256=eFEeuyyQlas9Oz-w7cdQ_mRIwbPJFECwmAu5xkvt3wA,2892
|
|
767
|
+
helm/clients/image_generation/aleph_alpha_image_generation_client.py,sha256=6HXLS_kxJouNyLVLfhoepMf5CNIQukWzxkVNV71PJ6Y,4028
|
|
768
|
+
helm/clients/image_generation/cogview2_client.py,sha256=ssIrM6-7bbBQq_w6SCE4DtPWnF_S2x_FqMhSjomlBK4,8455
|
|
769
|
+
helm/clients/image_generation/dalle2_client.py,sha256=amsgWBzP266UmOYM6QFIxEV4xAybw_OpPVdtH748REk,8430
|
|
770
|
+
helm/clients/image_generation/dalle3_client.py,sha256=sabS7AJ6O5ewmTkGmHr4cK14tlMcmh-xrGgj7J-xa3k,4639
|
|
771
|
+
helm/clients/image_generation/dalle_mini_client.py,sha256=wTXW79KJ5tCku46AiotA2x0vQAqKi6g4aX05n-Qm_pQ,8175
|
|
772
|
+
helm/clients/image_generation/deep_floyd_client.py,sha256=P566sdeBvLLv6vi9Y4IbtEfmoQykMDh0viUeRXY7Ik0,3028
|
|
773
|
+
helm/clients/image_generation/huggingface_diffusers_client.py,sha256=OgkWWNGquu0xTKVSAduAMz8T_pWG-SwwG9iZegPrXEw,12313
|
|
774
|
+
helm/clients/image_generation/image_generation_client_utils.py,sha256=N130PbHLLvE9Q1iVefPvTCJzs3hG3osZCeYdJyjLjCw,437
|
|
775
|
+
helm/clients/image_generation/lexica_client.py,sha256=zyyfxZdTiBopPLZJ-uu5ewQYf9tUhtrEIHZddq8gCBI,3681
|
|
776
|
+
helm/clients/image_generation/mindalle_client.py,sha256=IH9XM6n-ZHXyidTG66ew-pAgb9XaXLu-_AD_-YBydU8,4653
|
|
777
|
+
helm/clients/image_generation/nudity_check_client.py,sha256=TeFga6HvBKgdX7LitBoioXUD4BQGavVwzr5BFFE29x8,2599
|
|
778
|
+
helm/clients/image_generation/together_image_generation_client.py,sha256=onvBeRFuuuzpAVg5lZAbnUzmv5L5HwsPwRcflDPqWUI,4393
|
|
779
|
+
helm/clients/image_generation/cogview2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
780
|
+
helm/clients/image_generation/cogview2/coglm_strategy.py,sha256=P3NU3Z4jsj171PrHPtGDiCRq05kEh-KHjSTgxPDw6R8,3766
|
|
781
|
+
helm/clients/image_generation/cogview2/coglm_utils.py,sha256=EJPOEQJInCDVi2LHqkjEUsgw6GgVlLDrIptlT9cXk-Y,2900
|
|
782
|
+
helm/clients/image_generation/cogview2/sr_pipeline/__init__.py,sha256=qWuNwKlcvGwEFcw5932wk_t0_baNwUILIJzQWJjgh2A,488
|
|
783
|
+
helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py,sha256=1DwcUw9Tb563JpKpkPNIB5Ew1djozvPiGASShffiABk,3716
|
|
784
|
+
helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=xYn3acxU4BRdDeRjk98Vj0qq8qqty93kPCLdz-bOMKs,10818
|
|
785
|
+
helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py,sha256=OonYjdtNKJo12cNb-t-gFHLXRFxItCXjKgS9YxWAI-k,7718
|
|
786
|
+
helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py,sha256=LSvAHRupsOqk3yb4GxyTsubRxrnPOEfObFym2j4eiKc,5120
|
|
787
|
+
helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py,sha256=5D1QWyAcY0CpwITk7EBN6ylUtc7mvZaE9iHG628AqMQ,10390
|
|
788
|
+
helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py,sha256=d8voNZnXfqh-LzIzAqcMD4h4XJx_u_mvu9mewMrJC5k,4392
|
|
789
|
+
helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py,sha256=IpDLoPBDA-ck-IP4YUqQu8NM5W7_1f3Tg-sWhU3pB6k,1557
|
|
790
|
+
helm/clients/image_generation/dalle_mini/__init__.py,sha256=4RmnjfGTmgYaWsQmaDkOHxgo0Wxr9qqwtpMBC_5XeGg,112
|
|
791
|
+
helm/clients/image_generation/dalle_mini/data.py,sha256=1unTc4lkUZ-6A2DfcbcglGtnE2KP3OuL4YWFROlsEQo,17622
|
|
792
|
+
helm/clients/image_generation/dalle_mini/model/__init__.py,sha256=fyMDjpuzHxWjF5Fk9Rkfyn7KpvFAwxyRCJFoA2RDPdM,428
|
|
793
|
+
helm/clients/image_generation/dalle_mini/model/configuration.py,sha256=AAeqmSiGOPd831VrytkWMbSSAv-4uEGk190svHsUGNU,7859
|
|
794
|
+
helm/clients/image_generation/dalle_mini/model/modeling.py,sha256=w9TSQYBjOygqj-QCQSqjzujahGicXRtnJObtXrCpCEQ,69700
|
|
795
|
+
helm/clients/image_generation/dalle_mini/model/partitions.py,sha256=_fDpk34GL6NhNecHuP78y_gmKpWjbfw3fxMCWVEO4pc,2721
|
|
796
|
+
helm/clients/image_generation/dalle_mini/model/processor.py,sha256=2JvF8XmYMiFrxxi4YcGDF1JrTFQPqBXfzYmb_ylCRls,2404
|
|
797
|
+
helm/clients/image_generation/dalle_mini/model/text.py,sha256=Kfba8JdO2LrSmCVlQtgc7J2kSordCgjeg7WV9V45B80,7302
|
|
798
|
+
helm/clients/image_generation/dalle_mini/model/tokenizer.py,sha256=fggtXzlh8HHHgT0T0d78KX6i16zFApnpkp7xOMAuD6c,243
|
|
799
|
+
helm/clients/image_generation/dalle_mini/model/utils.py,sha256=clu2IiIpAT0DzTc2HvmI0ySnETFsJtpi7tocPkqOreY,1171
|
|
800
|
+
helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py,sha256=01pV_QWUmcIpj5kBVihle_VGrJyw2AmV3QuhWASds2M,66
|
|
801
|
+
helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py,sha256=4q39kdTUxeW55SN8NNkA9MdFZtH6rWssN8XauuOwyi0,1213
|
|
802
|
+
helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py,sha256=7OKxVD7eJG7TmyPc9RdbKqz6SAXqJlZ21D-ENlLlqZE,4612
|
|
803
|
+
helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py,sha256=l-01MAjdbCiaaZoLycV7BcpeYwKOaN-GeZUn3mcsmhg,21067
|
|
804
|
+
helm/clients/image_generation/mindalle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
805
|
+
helm/clients/image_generation/mindalle/models/__init__.py,sha256=1UieFJ0LGinYSB-idy3atl-gFAmS_ouiiGX6TM2Mh-I,8372
|
|
806
|
+
helm/clients/image_generation/mindalle/models/tokenizer.py,sha256=NFFdLUhoxEkv9SZqU3QIFk0ukaCcn6w_xFWQIRGhZJ4,1190
|
|
807
|
+
helm/clients/image_generation/mindalle/models/stage1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
808
|
+
helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=cg5c6KShCMbMmHFuzDBIG-WgIBBkDrG9XvXC1DxqowA,11044
|
|
809
|
+
helm/clients/image_generation/mindalle/models/stage1/vqgan.py,sha256=KcarvKoMuPBpP0H8F8W67FogdvHaAQuo9jP3rFRxc5E,4035
|
|
810
|
+
helm/clients/image_generation/mindalle/models/stage2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
811
|
+
helm/clients/image_generation/mindalle/models/stage2/layers.py,sha256=LvDADun5nMaencaRT0pm-dq78xHpPPkpi8rlu7RLHco,5306
|
|
812
|
+
helm/clients/image_generation/mindalle/models/stage2/transformer.py,sha256=MjcFrbOgtwc6zL5izNAPEOwXFhasyZajwczaXFLunZg,10387
|
|
813
|
+
helm/clients/image_generation/mindalle/utils/__init__.py,sha256=qNxJFCN97656FlGo2UDLubtvVaArHHK3MDdWitzFPnw,199
|
|
814
|
+
helm/clients/image_generation/mindalle/utils/config.py,sha256=lh8dXvL7ctKmuYEbeTQZfXN-_DkHQLjGuFuvo53u5pM,3234
|
|
815
|
+
helm/clients/image_generation/mindalle/utils/sampling.py,sha256=soTHaJrN4FV1lDdh9HMveJs6F49UMK57Xfa0ccnHqI8,5029
|
|
816
|
+
helm/clients/image_generation/mindalle/utils/utils.py,sha256=ESugpzG-_73GKl07mj-8o-_nim_FOICxfYkczy3s9x4,3119
|
|
817
|
+
helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
818
|
+
helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=S4FDbSO917bUw3cK64xnxwH5HFH_Eb-w2zQ8ZL4eSSk,6588
|
|
819
|
+
helm/clients/vision_language/huggingface_vlm_client.py,sha256=OHV41AA-WZo_CnsHymwslgjDcVK0uHmIrvGbrxBDK5w,5000
|
|
820
|
+
helm/clients/vision_language/idefics_client.py,sha256=DURync-8rh2ccdlGDPl3NMgryBcMn5yCrrmFZisf5m0,7784
|
|
821
|
+
helm/clients/vision_language/open_flamingo_client.py,sha256=QH6el-wkEl4PMZM9b3_H-o2PRaMvumGbN29ee9dmkMU,6519
|
|
822
|
+
helm/clients/vision_language/paligemma_client.py,sha256=K9MzXlgjXoiVafA8bbu-mKNt3Z9kq8v8AJL286DyQqI,6867
|
|
823
|
+
helm/clients/vision_language/palmyra_vision_client.py,sha256=4elEdmwllMr2qzTzBdlRC8L5Ut3vOXFtanGGYrx4lv8,4074
|
|
824
|
+
helm/clients/vision_language/qwen2_vlm_client.py,sha256=jvh_-jyvFL4r3LPX-gWPCYHT503JtJ73FVHQS2KyQ2c,8325
|
|
825
|
+
helm/clients/vision_language/qwen_vlm_client.py,sha256=wNxEuYOrhjaW5s4vtdRxKvJ-LCTTGyKqiqD84j7H1Do,7565
|
|
826
|
+
helm/clients/vision_language/open_flamingo/__init__.py,sha256=RTxnxjYnTmTZv-608o66_W74qmKLpEO6hx0cxaZaYv8,172
|
|
827
|
+
helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
828
|
+
helm/clients/vision_language/open_flamingo/src/factory.py,sha256=4KRXLV5mOEZ34-Foq2zVgTye3sQD-Buz6NZTSp2X9_A,5790
|
|
829
|
+
helm/clients/vision_language/open_flamingo/src/flamingo.py,sha256=g4ZtQX-ZBauF6UADDGiRlJdB7rlA_gb37pJzxluPXrQ,14753
|
|
830
|
+
helm/clients/vision_language/open_flamingo/src/flamingo_lm.py,sha256=n6eaH9OBhpjIHH822mNE8WIPi0pChNQBx4pRXhAjsPw,6317
|
|
831
|
+
helm/clients/vision_language/open_flamingo/src/helpers.py,sha256=pq_BgkUflYBDw8gxTO2evuiqvjw3bE9rx06iYHp0kTw,8595
|
|
832
|
+
helm/clients/vision_language/open_flamingo/src/utils.py,sha256=6FYU0NgshZadF3QYWQkPW8jyEFiOd6jyb8p5rv_vOj0,1444
|
|
833
|
+
helm/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
834
|
+
helm/common/audio_utils.py,sha256=iMMS0nC6EYKXpqHMewKYhi8M3J5J9A15pNxDEQzHI-E,3676
|
|
835
|
+
helm/common/authentication.py,sha256=RlMx29_TSrfU7ujE7dJkxmFub5EqLj2NswV5lAVFFDk,179
|
|
836
|
+
helm/common/cache.py,sha256=0gXq97M9JgSO5aO2puEV1WRpEy8jdc_wRsKL4rVVoY0,6725
|
|
837
|
+
helm/common/cache_backend_config.py,sha256=4u5A6BHNBmGnnrDNhCVgrdwhXQtyAbWcUeoo7hdgZSo,1530
|
|
838
|
+
helm/common/clip_score_request.py,sha256=WnNg89owDCmG7tyy8nnQL0RdKQLsUdMWiYH9XqqbGw8,840
|
|
839
|
+
helm/common/codec.py,sha256=gTh6AwIQ0Bbul_QSnIO7eItwMZmYtnkIrG1jkc4GOL4,7100
|
|
840
|
+
helm/common/concurrency.py,sha256=8THtHlCtXo5c8iCuz_UcBBdzZX6aiEALLc4u0M4SYL0,856
|
|
841
|
+
helm/common/context.py,sha256=0U5KNNKLHiiqjb8JVq03mninagEp9zTzFKP0He8o7A8,2788
|
|
842
|
+
helm/common/credentials_utils.py,sha256=BX_P6wUpLKA7Bg3Dztm7jVI2j4ls7H-h38UbmGMBt3A,1101
|
|
843
|
+
helm/common/critique_request.py,sha256=yo4aRe-DEjudUmydthtpTj6LdhRXfZ3JZptxTkWzZ3U,3068
|
|
844
|
+
helm/common/file_upload_request.py,sha256=OZeAW1_zsiNdXnWDwNNvhPs0b48TUmW_e4kzzCYmyiY,543
|
|
845
|
+
helm/common/general.py,sha256=TcdPXn_bgPFvXtFP2lJhncz4Q8SdTXnKOinHOTBsegw,12027
|
|
846
|
+
helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
|
|
847
|
+
helm/common/hierarchical_logger.py,sha256=KR5R7tjUJN-hTFdnfzEyfwAhvgTFH3JJCH-LSiilqLk,4192
|
|
848
|
+
helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
|
|
849
|
+
helm/common/images_utils.py,sha256=8BsN0fd8pc0rh_TSDvippWhTfwmJJXKNF2zqKLB8cps,3372
|
|
850
|
+
helm/common/key_value_store.py,sha256=D9ZBORzZncf3zHQOP4AuNbQnV8cZpO_kqHY1mDRugqQ,3174
|
|
851
|
+
helm/common/local_context.py,sha256=lpQSLqybZda7LDg5drYQrT8blWORvOOB4yXyCU9d8Ts,6493
|
|
852
|
+
helm/common/media_object.py,sha256=1SlilnsrfZVVpfci1atin8hbREnGoNQwjBcNAH8RgBU,5151
|
|
853
|
+
helm/common/moderations_api_request.py,sha256=3xTsErSsCr2PHD2jpdV1JglHaYHwP2Yqu25_JFtfa68,2234
|
|
854
|
+
helm/common/mongo_key_value_store.py,sha256=G0TIWQcvwMjyXh4TnN6xJ462HKHUAZtQJJYQOrHK-K8,3887
|
|
855
|
+
helm/common/multimodal_request_utils.py,sha256=n6HgTyHNqfGmU9qmVK-wxQzrkPZ5Wdh-lO_y_ln6VYc,2184
|
|
856
|
+
helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
|
|
857
|
+
helm/common/object_spec.py,sha256=_usgTDQULBF6_jy7C6m-9ZNVvNxbGoTE_CdGcSvBASU,4327
|
|
858
|
+
helm/common/optional_dependencies.py,sha256=Qam3QCHff8tuXbS-fCw-MVe-pK18gSvHw-uQoXXxT7M,616
|
|
859
|
+
helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
|
|
860
|
+
helm/common/reeval_parameters.py,sha256=exaEucXnSI8a076uq_qhO3CTBztMMRoRzL_7v1N4adE,300
|
|
861
|
+
helm/common/remote_context.py,sha256=DzFMii9AN03CoWp1J3k703-7oQJYHwEf9TDV5YzM6v4,2825
|
|
862
|
+
helm/common/request.py,sha256=HWj6IizIwJm9_NigO-geira_rI6aqhj5CevQB694m94,9161
|
|
863
|
+
helm/common/response_format.py,sha256=wIptA8FydZoRjMvO5SFIplgDXhwpZvZmFI-Bi-7mcGU,516
|
|
864
|
+
helm/common/test_cache.py,sha256=j19p-qzv_98X_TMW4b39ZHwSJ-MX3p91PrkYumarS6Y,4870
|
|
865
|
+
helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
|
|
866
|
+
helm/common/test_general.py,sha256=c8Lh0mK8I-SfcMprq909B6zWRBxSBngq2nNL1L6-cYA,1788
|
|
867
|
+
helm/common/test_media_object.py,sha256=SUWLfms_vkXNivRYM0ZT8AI3_2ru6GON5l-Hb-lk-t0,1661
|
|
868
|
+
helm/common/tokenization_request.py,sha256=NND9ESiiDE0H8QRNpfHVjXS7MQfKKIwtVRKDIjPnnJM,3344
|
|
869
|
+
helm/common/file_caches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
870
|
+
helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFqx4ftRuPA,359
|
|
871
|
+
helm/common/file_caches/local_file_cache.py,sha256=NiXbat1BBGl5P27oERqSLFfhIHpYqA1IQrvE_N1sWR8,1944
|
|
872
|
+
helm/common/file_caches/test_local_file_cache.py,sha256=ANb01ctUV-J4i1ab3l4uhg9Ce54U_56xq9Hayjt1WhQ,686
|
|
873
|
+
helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
874
|
+
helm/config/model_deployments.yaml,sha256=ec7CZLii6mpJeNC93J4gMgh1YrkU6Fj2XpXJaes01xY,160890
|
|
875
|
+
helm/config/model_metadata.yaml,sha256=JvvKKEePcGCQf_cHGQv-k_Yj4GmB71lvRY2Is176a9s,263155
|
|
876
|
+
helm/config/tokenizer_configs.yaml,sha256=Xju6-GcWJD-nmS5U0dUgkOexHWVWCd-J59EiVufoOCs,37687
|
|
877
|
+
helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
878
|
+
helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
|
|
879
|
+
helm/proxy/cli.py,sha256=apG3ByfyMciZFXV5wX2177p1B5eqkxCY6VoRgwJ81Kk,8316
|
|
880
|
+
helm/proxy/example_queries.py,sha256=EB2vVpAryOUAFiLrwsMiFz0zGl_UAQ8TJ9SkWngvsu4,4389
|
|
881
|
+
helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
|
|
882
|
+
helm/proxy/retry.py,sha256=iLZmKATEJQa9jsSpOIx6YDRhmrA8G1Qm21cUxCuo2Ug,3490
|
|
883
|
+
helm/proxy/server.py,sha256=Q4Mzts8mketktGVJ5AoOEA-_SGCue5QeOlK8dqPUuHI,10853
|
|
884
|
+
helm/proxy/test_accounts.py,sha256=Vs1iOzTPN29LosDAAEs6IagQ3PccvutrJTlR1qNIcj0,1146
|
|
885
|
+
helm/proxy/test_retry.py,sha256=db0owyGTThmIMhYWU_Eh1U-AJvQ-Wa9j_kRmC9DNjOA,1059
|
|
886
|
+
helm/proxy/critique/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
887
|
+
helm/proxy/critique/critique_client.py,sha256=ATZuXw77lejwtpgLg3Soy3VDyv8D8xetl0o4guxDM2M,1764
|
|
888
|
+
helm/proxy/critique/mechanical_turk_critique_client.py,sha256=OcppmFOMweBSfVTiLIICIwjvPpHHTkdu9fFUTaubitQ,574
|
|
889
|
+
helm/proxy/critique/mechanical_turk_critique_exporter.py,sha256=taULrc_cIP0O9c5UpGz3l9DmWQadTVzN_v-qzTgMoyo,8470
|
|
890
|
+
helm/proxy/critique/mechanical_turk_critique_importer.py,sha256=NL97joO5pRkcICRdVyG4kf9JhfYRaySsxRoZ7KWDYv0,5581
|
|
891
|
+
helm/proxy/critique/mechanical_turk_utils.py,sha256=MUMcxMA08OXJTtgCX7ejGQQivMNF3Xfu4AAHkvuft9s,1766
|
|
892
|
+
helm/proxy/critique/model_critique_client.py,sha256=QMFiMpALXnneumKbJpXOZDEb3lPPdkIaSCasmdXHB8o,12806
|
|
893
|
+
helm/proxy/critique/scale_critique_client.py,sha256=B4povtceyfal95eE3N7em9cC_B5Vy4jMrHXcsXc_5m4,15889
|
|
894
|
+
helm/proxy/critique/surge_ai_critique_client.py,sha256=HnzgAoF4Du9Me0GS_lbNaozZslS4a2OZx735gh-coo0,8357
|
|
895
|
+
helm/proxy/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
896
|
+
helm/proxy/services/remote_service.py,sha256=zehXO0JYIR6fIgqSZ1p7icPBITYPYfjgTX1ZbxiN1dI,8806
|
|
897
|
+
helm/proxy/services/server_service.py,sha256=VTDkULezp2vniGKfH2fP7PHf_DAtsh4qXwKQ0tD_Wxc,7357
|
|
898
|
+
helm/proxy/services/service.py,sha256=YFG5ZlBYBz3IdSVRKDIKVlAmA-oLjFCeBHE3iIe_SU8,6020
|
|
899
|
+
helm/proxy/services/test_remote_service.py,sha256=xzkyptctXw3y5d1fgbidBMyw8B4rILZStC_C-hLgLUc,6643
|
|
900
|
+
helm/proxy/services/test_service.py,sha256=oDYen-71iwZ6YMNBVbVSdEFsH6GMvZYw5tS5Eg4YHjY,8987
|
|
901
|
+
helm/proxy/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
|
|
902
|
+
helm/proxy/static/help.html,sha256=2Rn_lGZspqrZhNfLQ4wIAvYO_BK9q67Q_AS2-3WsMpY,6231
|
|
903
|
+
helm/proxy/static/index.css,sha256=1OBOJ87LhwI2PtpoIyZoGQbSxQK2dz2vxk8BVmAybWY,717
|
|
904
|
+
helm/proxy/static/index.html,sha256=nUJf_hwBPokqrm_hDZsVfHcJrnhZLYhkVSoLdGOocf8,2009
|
|
905
|
+
helm/proxy/static/index.js,sha256=-OXgf2rUYI49vg4KhwdL2VygKgzAGoYHKngaWgMb4E0,14996
|
|
906
|
+
helm/proxy/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
|
|
907
|
+
helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
908
|
+
helm/proxy/token_counters/auto_token_counter.py,sha256=Ag368Sb-eLQUMLW7lmWc2EOKN3kgkiCTsYnHNrsf9kw,2071
|
|
909
|
+
helm/proxy/token_counters/test_auto_token_counter.py,sha256=LO3H_NbVeoeaMmEuFNCmhoEWKjWVvxeW5U4yTKfE-84,8590
|
|
910
|
+
helm/proxy/token_counters/token_counter.py,sha256=TCij1Cp08RoFTLLLdjNPoaeDGHpA1A2hQsrRV775Kf4,425
|
|
911
|
+
helm/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
912
|
+
helm/tokenizers/ai21_tokenizer.py,sha256=CE-u39ZY5Y4XQHONpiPHKK7uvEmySYLBQi2n70OV004,2059
|
|
913
|
+
helm/tokenizers/aleph_alpha_tokenizer.py,sha256=Ofc5thTfW_eb5ztiU-y_0p6e2PIGbHMb2wz169sy1fc,3833
|
|
914
|
+
helm/tokenizers/auto_tokenizer.py,sha256=Of-T-CFOhLAjjU45T1hnrEPG_k_hzPufuDE7FRAcSN8,4251
|
|
915
|
+
helm/tokenizers/caching_tokenizer.py,sha256=BwcyVzG7vy3R2O0UgbNxNP2nN4wBnsvpG_9mXQuDYfw,7300
|
|
916
|
+
helm/tokenizers/cohere_tokenizer.py,sha256=6WwHIt7SsICmYR2QQpwDJ7pfNF8VWrFHFxF5Kynq6aY,2116
|
|
917
|
+
helm/tokenizers/grok_tokenizer.py,sha256=Ms7QFYNookeq29AIfHUIXfKhrpRrPOPsNs0zBzWdLKA,2084
|
|
918
|
+
helm/tokenizers/http_model_tokenizer.py,sha256=J5Myg6JVDNgHMN7XOHwGV3WrhilUZ9Sw_FrgO4frYuY,3124
|
|
919
|
+
helm/tokenizers/huggingface_tokenizer.py,sha256=P2ri4n-SUWB9ShMlxlJ9kO-mPmbSTizMGwAf41JE5ds,8734
|
|
920
|
+
helm/tokenizers/lit_gpt_tokenizer.py,sha256=0c6KDeLNHPd6h27SXQvkUfmrCSLYa1kQY1GqCHVfhvw,1675
|
|
921
|
+
helm/tokenizers/simple_tokenizer.py,sha256=6_NROqVbygs-HRA7bYAZluN4YB5gUhVaRsYQeRTjA1E,1147
|
|
922
|
+
helm/tokenizers/test_ai21_tokenizer.py,sha256=V8orjdKxmEV44VYoZ9Sq5E7CIq2caNnr6vjdk0T_w1A,1646
|
|
923
|
+
helm/tokenizers/test_anthropic_tokenizer.py,sha256=h7sJMRv_O2yAuEzbrXLJJIo9Gy8wkTycc4gu6UFvDaw,3937
|
|
924
|
+
helm/tokenizers/test_cohere_tokenizer.py,sha256=15z2GJtZ-VlrliC2_Fk5DIZhQYFkJS7J73fjxYMf8YM,1431
|
|
925
|
+
helm/tokenizers/test_grok_tokenizer.py,sha256=b094C_M2a1zNM3SsGzp9cNNm8aDmmoz1kFbPkubbVTQ,1212
|
|
926
|
+
helm/tokenizers/test_huggingface_tokenizer.py,sha256=7OB2d0PaCp-qmGXVt0V3yf0ciilN3Kd2qnAYprWRl64,6324
|
|
927
|
+
helm/tokenizers/test_simple_tokenizer.py,sha256=vUNdcnJqZV99-E8H1rwUH85AQPJ2HTnDr5DrZ_-zRL4,1219
|
|
928
|
+
helm/tokenizers/test_yalm_tokenizer.py,sha256=8IeJM3X61p3ygBfK_bJtPh_xOJ83IluaZ3UM2xTtbEY,2492
|
|
929
|
+
helm/tokenizers/tiktoken_tokenizer.py,sha256=u10haWtRHiSSj11MDIYIK_wpT8loQVJY2WJxIEPg0Vc,1280
|
|
930
|
+
helm/tokenizers/tokenizer.py,sha256=5dsxihHWA-SRSYwx2AlsLZR0L2MSMwfKRbBQy5rz_Zs,1639
|
|
931
|
+
helm/tokenizers/vertexai_tokenizer.py,sha256=lf-xckyeachaJI86ujNC2Cp_AVZ-BBcFgLZQy4lg7aA,4105
|
|
932
|
+
helm/tokenizers/yalm_tokenizer.py,sha256=u1n_zB5FMSXBU02JrCkipNLpkbmF0LA2i_53xID5d0w,1496
|
|
933
|
+
helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
934
|
+
helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=1ZcPL3srfk031LmA8bEdPcIraAPnHGiYi_CqTiJSTlc,904
|
|
935
|
+
helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
|
|
936
|
+
helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
|
|
937
|
+
crfm_helm-0.5.6.dist-info/METADATA,sha256=QlR8qMFpWzt_gIs6aCdrEEUuOS5uCdg1kbRMoI7YGYc,23069
|
|
938
|
+
crfm_helm-0.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
939
|
+
crfm_helm-0.5.6.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
|
|
940
|
+
crfm_helm-0.5.6.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
|
|
941
|
+
crfm_helm-0.5.6.dist-info/RECORD,,
|