crfm-helm 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- crfm_helm-0.5.5.dist-info/METADATA +413 -0
- crfm_helm-0.5.5.dist-info/RECORD +894 -0
- {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +13 -1
- helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
- helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/common_adapter_specs.py +69 -4
- helm/benchmark/adaptation/prompt.py +1 -1
- helm/benchmark/annotation/aci_bench_annotator.py +95 -0
- helm/benchmark/annotation/air_bench_annotator.py +20 -5
- helm/benchmark/annotation/annotator.py +5 -0
- helm/benchmark/annotation/annotator_factory.py +3 -20
- helm/benchmark/annotation/anthropic_red_team_annotator.py +11 -24
- helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
- helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
- helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
- helm/benchmark/annotation/bird_sql_annotator.py +58 -0
- helm/benchmark/annotation/call_center_annotator.py +22 -11
- helm/benchmark/annotation/chw_care_plan_annotator.py +98 -0
- helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
- helm/benchmark/annotation/dischargeme_annotator.py +107 -0
- helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
- helm/benchmark/annotation/harm_bench_annotator.py +11 -24
- helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
- helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
- helm/benchmark/annotation/live_qa_annotator.py +10 -5
- helm/benchmark/annotation/med_dialog_annotator.py +99 -0
- helm/benchmark/annotation/medalign_annotator.py +100 -0
- helm/benchmark/annotation/medi_qa_annotator.py +98 -0
- helm/benchmark/annotation/medication_qa_annotator.py +90 -61
- helm/benchmark/annotation/mental_health_annotator.py +98 -0
- helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
- helm/benchmark/annotation/model_as_judge.py +281 -18
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
- helm/benchmark/annotation/omni_math_annotator.py +132 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +11 -25
- helm/benchmark/annotation/spider_annotator.py +18 -0
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
- helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
- helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
- helm/benchmark/annotation/wildbench_annotator.py +119 -0
- helm/benchmark/annotation/xstest_annotator.py +20 -30
- helm/benchmark/annotation_executor.py +35 -15
- helm/benchmark/augmentations/cleva_perturbation.py +9 -8
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
- helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
- helm/benchmark/augmentations/dialect_perturbation.py +4 -5
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +2 -2
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +4 -5
- helm/benchmark/augmentations/perturbation.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +2 -2
- helm/benchmark/augmentations/synonym_perturbation.py +4 -3
- helm/benchmark/augmentations/test_perturbation.py +16 -13
- helm/benchmark/augmentations/translate_perturbation.py +2 -2
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/data_preprocessor.py +2 -2
- helm/benchmark/huggingface_registration.py +2 -7
- helm/benchmark/metrics/aci_bench_metrics.py +34 -0
- helm/benchmark/metrics/basic_metrics.py +6 -6
- helm/benchmark/metrics/bbq_metrics.py +2 -2
- helm/benchmark/metrics/bias_metrics.py +12 -3
- helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
- helm/benchmark/metrics/bird_sql_metrics.py +28 -0
- helm/benchmark/metrics/chw_care_plan_metrics.py +34 -0
- helm/benchmark/metrics/classification_metrics.py +76 -12
- helm/benchmark/metrics/cleva_harms_metrics.py +8 -7
- helm/benchmark/metrics/code_metrics.py +5 -5
- helm/benchmark/metrics/comet_metric.py +125 -0
- helm/benchmark/metrics/common_metric_specs.py +9 -2
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
- helm/benchmark/metrics/copyright_metrics.py +4 -4
- helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
- helm/benchmark/metrics/dischargeme_metrics.py +34 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -4
- helm/benchmark/metrics/dry_run_metrics.py +5 -5
- helm/benchmark/metrics/efficiency_metrics.py +3 -3
- helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
- helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
- helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
- helm/benchmark/metrics/ifeval/instructions.py +1574 -0
- helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
- helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
- helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
- helm/benchmark/metrics/ifeval_metrics.py +55 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
- helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
- helm/benchmark/metrics/language_modeling_metrics.py +4 -4
- helm/benchmark/metrics/machine_translation_metrics.py +2 -2
- helm/benchmark/metrics/med_dialog_metrics.py +34 -0
- helm/benchmark/metrics/medalign_metrics.py +34 -0
- helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
- helm/benchmark/metrics/medec_metrics.py +101 -0
- helm/benchmark/metrics/medi_qa_metrics.py +34 -0
- helm/benchmark/metrics/medication_qa_metrics.py +15 -4
- helm/benchmark/metrics/mental_health_metrics.py +34 -0
- helm/benchmark/metrics/metric.py +3 -3
- helm/benchmark/metrics/mimic_rrs_metrics.py +34 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +34 -0
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +34 -0
- helm/benchmark/metrics/nltk_helper.py +32 -0
- helm/benchmark/metrics/numeracy_metrics.py +4 -4
- helm/benchmark/metrics/omni_math_metrics.py +32 -0
- helm/benchmark/metrics/output_processing_metric.py +60 -0
- helm/benchmark/metrics/output_processors.py +15 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
- helm/benchmark/metrics/ranking_metrics.py +3 -3
- helm/benchmark/metrics/reference_metric.py +3 -3
- helm/benchmark/metrics/safety_metrics.py +39 -17
- helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
- helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
- helm/benchmark/metrics/spider_metrics.py +7 -0
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +34 -0
- helm/benchmark/metrics/statistic.py +1 -1
- helm/benchmark/metrics/summac/model_summac.py +1 -1
- helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
- helm/benchmark/metrics/summarization_metrics.py +19 -9
- helm/benchmark/metrics/test_bias_metrics.py +5 -1
- helm/benchmark/metrics/test_classification_metrics.py +140 -68
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
- helm/benchmark/metrics/test_metric.py +1 -1
- helm/benchmark/metrics/test_statistic.py +2 -2
- helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
- helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/toxicity_metrics.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +21 -4
- helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
- helm/benchmark/metrics/wildbench_metrics.py +34 -0
- helm/benchmark/model_metadata_registry.py +16 -0
- helm/benchmark/presentation/create_plots.py +1 -1
- helm/benchmark/presentation/schema.py +3 -0
- helm/benchmark/presentation/summarize.py +119 -256
- helm/benchmark/presentation/test_summarize.py +145 -3
- helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
- helm/benchmark/reeval_run.py +203 -0
- helm/benchmark/reeval_runner.py +355 -0
- helm/benchmark/run.py +8 -17
- helm/benchmark/run_expander.py +105 -8
- helm/benchmark/run_spec_factory.py +12 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
- helm/benchmark/run_specs/audio_run_specs.py +613 -0
- helm/benchmark/run_specs/call_center_run_specs.py +49 -0
- helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
- helm/benchmark/run_specs/classic_run_specs.py +1 -69
- helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
- helm/benchmark/run_specs/enterprise_run_specs.py +260 -0
- helm/benchmark/run_specs/experimental_run_specs.py +112 -3
- helm/benchmark/run_specs/finance_run_specs.py +6 -2
- helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
- helm/benchmark/run_specs/lite_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +89 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +1155 -0
- helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
- helm/benchmark/run_specs/oab_exams_specs.py +32 -0
- helm/benchmark/run_specs/safety_run_specs.py +37 -0
- helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +66 -52
- helm/benchmark/run_specs/sql_run_specs.py +54 -0
- helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
- helm/benchmark/run_specs/vlm_run_specs.py +83 -5
- helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
- helm/benchmark/scenarios/aci_bench_scenario.py +120 -0
- helm/benchmark/scenarios/air_bench_scenario.py +6 -1
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/__init__.py +0 -0
- helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +128 -0
- helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
- helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
- helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
- helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
- helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
- helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
- helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
- helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
- helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
- helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
- helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +69 -0
- helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
- helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +106 -0
- helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
- helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
- helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
- helm/benchmark/scenarios/banking77_scenario.py +6 -1
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/big_bench_scenario.py +11 -1
- helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
- helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
- helm/benchmark/scenarios/blimp_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +1 -1
- helm/benchmark/scenarios/boolq_scenario.py +1 -1
- helm/benchmark/scenarios/casehold_scenario.py +79 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +105 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
- helm/benchmark/scenarios/clear_scenario.py +153 -0
- helm/benchmark/scenarios/cleva_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +17 -4
- helm/benchmark/scenarios/commonsense_scenario.py +1 -1
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
- helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
- helm/benchmark/scenarios/dischargeme_scenario.py +157 -0
- helm/benchmark/scenarios/disinformation_scenario.py +10 -1
- helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
- helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
- helm/benchmark/scenarios/ehr_sql_scenario.py +131 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +1546 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
- helm/benchmark/scenarios/gpqa_scenario.py +80 -0
- helm/benchmark/scenarios/grammar_scenario.py +2 -2
- helm/benchmark/scenarios/gsm_scenario.py +10 -1
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
- helm/benchmark/scenarios/headqa_scenario.py +131 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
- helm/benchmark/scenarios/ice_scenario.py +8 -4
- helm/benchmark/scenarios/ifeval_scenario.py +53 -0
- helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
- helm/benchmark/scenarios/imdb_scenario.py +11 -2
- helm/benchmark/scenarios/infinite_bench_sum_scenario.py +82 -0
- helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
- helm/benchmark/scenarios/koala_scenario.py +1 -1
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
- helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
- helm/benchmark/scenarios/legal_support_scenario.py +11 -1
- helm/benchmark/scenarios/legalbench_scenario.py +22 -3
- helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
- helm/benchmark/scenarios/lextreme_scenario.py +11 -1
- helm/benchmark/scenarios/live_qa_scenario.py +1 -1
- helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
- helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
- helm/benchmark/scenarios/math_scenario.py +9 -1
- helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
- helm/benchmark/scenarios/med_dialog_scenario.py +22 -24
- helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
- helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
- helm/benchmark/scenarios/med_qa_scenario.py +10 -1
- helm/benchmark/scenarios/medalign_scenario.py +88 -0
- helm/benchmark/scenarios/medalign_scenario_helper.py +429 -0
- helm/benchmark/scenarios/medbullets_scenario.py +140 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +125 -0
- helm/benchmark/scenarios/medec_scenario.py +120 -0
- helm/benchmark/scenarios/medhallu_scenario.py +66 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +105 -0
- helm/benchmark/scenarios/medication_qa_scenario.py +2 -2
- helm/benchmark/scenarios/mental_health_scenario.py +112 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +98 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +89 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +71 -0
- helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
- helm/benchmark/scenarios/mmlu_scenario.py +11 -1
- helm/benchmark/scenarios/msmarco_scenario.py +1 -1
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +141 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +141 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +271 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
- helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
- helm/benchmark/scenarios/newsqa_scenario.py +1 -1
- helm/benchmark/scenarios/numeracy_scenario.py +10 -1
- helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
- helm/benchmark/scenarios/omni_math_scenario.py +53 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
- helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
- helm/benchmark/scenarios/pubmed_qa_scenario.py +54 -43
- helm/benchmark/scenarios/quac_scenario.py +10 -1
- helm/benchmark/scenarios/race_based_med_scenario.py +142 -0
- helm/benchmark/scenarios/raft_scenario.py +18 -3
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
- helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
- helm/benchmark/scenarios/scenario.py +9 -1
- helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +233 -84
- helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +69 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +70 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +70 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +72 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +66 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +76 -0
- helm/benchmark/scenarios/shc_sei_scenario.py +89 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +69 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
- helm/benchmark/scenarios/spider_scenario.py +91 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +90 -0
- helm/benchmark/scenarios/summarization_scenario.py +11 -1
- helm/benchmark/scenarios/sumosum_scenario.py +157 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
- helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
- helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
- helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
- helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
- helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
- helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
- helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +46 -0
- helm/benchmark/scenarios/test_math_scenario.py +1 -0
- helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
- helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
- helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
- helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
- helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
- helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
- helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +10 -1
- helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
- helm/benchmark/scenarios/unitxt_scenario.py +8 -2
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
- helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
- helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
- helm/benchmark/scenarios/wikifact_scenario.py +11 -1
- helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
- helm/benchmark/scenarios/wildbench_scenario.py +83 -0
- helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
- helm/benchmark/scenarios/xstest_scenario.py +1 -1
- helm/benchmark/server.py +11 -0
- helm/benchmark/slurm_runner.py +1 -1
- helm/benchmark/static/schema_audio.yaml +752 -0
- helm/benchmark/static/schema_autobencher.yaml +150 -0
- helm/benchmark/static/schema_call_center.yaml +97 -60
- helm/benchmark/static/schema_capabilities.yaml +254 -0
- helm/benchmark/static/schema_czech_bank.yaml +148 -0
- helm/benchmark/static/schema_enem_challenge.yaml +146 -0
- helm/benchmark/static/schema_enterprise.yaml +298 -0
- helm/benchmark/static/schema_finance.yaml +14 -12
- helm/benchmark/static/schema_heim.yaml +1389 -0
- helm/benchmark/static/schema_legal.yaml +566 -0
- helm/benchmark/static/{schema_medical.yaml → schema_long_context.yaml} +67 -82
- helm/benchmark/static/schema_medhelm.yaml +1081 -0
- helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
- helm/benchmark/static/schema_safety.yaml +42 -6
- helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +40 -26
- helm/benchmark/static/schema_social_audio.yaml +224 -0
- helm/benchmark/static/schema_sql.yaml +171 -0
- helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +187 -30
- helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
- helm/benchmark/static/schema_vhelm.yaml +151 -47
- helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
- helm/benchmark/static_build/assets/index-262903c1.js +10 -0
- helm/benchmark/static_build/assets/index-42060d71.css +1 -0
- helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png +0 -0
- helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
- helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
- helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-9cefc3c5.js} +1 -1
- helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
- helm/benchmark/static_build/config.js +1 -1
- helm/benchmark/static_build/index.html +5 -5
- helm/benchmark/window_services/default_window_service.py +1 -1
- helm/benchmark/window_services/encoder_decoder_window_service.py +1 -1
- helm/benchmark/window_services/ice_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
- helm/benchmark/window_services/local_window_service.py +2 -2
- helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
- helm/benchmark/window_services/test_bloom_window_service.py +3 -3
- helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
- helm/benchmark/window_services/test_gptj_window_service.py +8 -3
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
- helm/benchmark/window_services/test_openai_window_service.py +8 -3
- helm/benchmark/window_services/test_opt_window_service.py +3 -3
- helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
- helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
- helm/benchmark/window_services/test_t511b_window_service.py +3 -3
- helm/benchmark/window_services/test_ul2_window_service.py +3 -3
- helm/benchmark/window_services/test_utils.py +1 -1
- helm/benchmark/window_services/test_yalm_window_service.py +3 -3
- helm/benchmark/window_services/tokenizer_service.py +0 -5
- helm/benchmark/window_services/yalm_window_service.py +1 -1
- helm/clients/ai21_client.py +3 -3
- helm/clients/aleph_alpha_client.py +1 -1
- helm/clients/audio_language/__init__.py +0 -0
- helm/clients/audio_language/diva_llama_client.py +118 -0
- helm/clients/audio_language/llama_omni_client.py +198 -0
- helm/clients/audio_language/qwen2_audiolm_client.py +188 -0
- helm/clients/audio_language/qwen_audiolm_client.py +150 -0
- helm/clients/auto_client.py +4 -2
- helm/clients/azure_openai_client.py +55 -0
- helm/clients/bedrock_client.py +201 -7
- helm/clients/bedrock_utils.py +33 -0
- helm/clients/clip_scorers/clip_scorer.py +1 -1
- helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
- helm/clients/cohere_client.py +3 -3
- helm/clients/google_client.py +1 -1
- helm/clients/http_model_client.py +1 -1
- helm/clients/huggingface_client.py +10 -18
- helm/clients/ibm_client.py +267 -0
- helm/clients/image_generation/adobe_vision_client.py +1 -1
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
- helm/clients/image_generation/cogview2_client.py +1 -1
- helm/clients/image_generation/dalle2_client.py +1 -1
- helm/clients/image_generation/dalle3_client.py +2 -2
- helm/clients/image_generation/dalle_mini/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/data.py +1 -1
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
- helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
- helm/clients/image_generation/dalle_mini/model/modeling.py +2 -2
- helm/clients/image_generation/dalle_mini/model/processor.py +4 -4
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
- helm/clients/image_generation/dalle_mini_client.py +1 -1
- helm/clients/image_generation/deep_floyd_client.py +1 -1
- helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
- helm/clients/image_generation/lexica_client.py +1 -1
- helm/clients/image_generation/mindalle/models/__init__.py +6 -6
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
- helm/clients/image_generation/mindalle_client.py +1 -1
- helm/clients/image_generation/together_image_generation_client.py +1 -1
- helm/clients/lit_gpt_client.py +2 -2
- helm/clients/mistral_client.py +62 -18
- helm/clients/nvidia_nim_client.py +0 -3
- helm/clients/openai_client.py +255 -21
- helm/clients/palmyra_client.py +2 -6
- helm/clients/reka_client.py +1 -1
- helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
- helm/clients/stanfordhealthcare_claude_client.py +31 -0
- helm/clients/stanfordhealthcare_google_client.py +43 -0
- helm/clients/stanfordhealthcare_http_model_client.py +93 -0
- helm/clients/stanfordhealthcare_openai_client.py +62 -0
- helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
- helm/clients/test_client.py +1 -1
- helm/clients/test_together_client.py +6 -1
- helm/clients/together_client.py +69 -7
- helm/clients/upstage_client.py +23 -0
- helm/clients/vertexai_client.py +39 -13
- helm/clients/vision_language/open_flamingo/__init__.py +2 -2
- helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
- helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
- helm/clients/vision_language/qwen2_vlm_client.py +175 -0
- helm/clients/vllm_client.py +4 -6
- helm/clients/yi_client.py +0 -3
- helm/common/audio_utils.py +111 -0
- helm/common/cache.py +8 -30
- helm/common/file_caches/local_file_cache.py +1 -1
- helm/common/file_caches/test_local_file_cache.py +1 -1
- helm/common/images_utils.py +2 -2
- helm/common/key_value_store.py +9 -9
- helm/common/media_object.py +2 -2
- helm/common/mongo_key_value_store.py +3 -3
- helm/common/multimodal_request_utils.py +26 -0
- helm/common/reeval_parameters.py +12 -0
- helm/common/request.py +6 -2
- helm/common/response_format.py +18 -0
- helm/common/test_cache.py +1 -48
- helm/common/test_media_object.py +1 -1
- helm/common/tokenization_request.py +0 -9
- helm/config/model_deployments.yaml +1258 -33
- helm/config/model_metadata.yaml +1110 -41
- helm/config/tokenizer_configs.yaml +403 -3
- helm/proxy/cli.py +2 -2
- helm/proxy/example_queries.py +1 -1
- helm/proxy/server.py +11 -13
- helm/proxy/services/remote_service.py +1 -7
- helm/proxy/services/server_service.py +6 -19
- helm/proxy/services/service.py +0 -6
- helm/proxy/services/test_remote_service.py +2 -2
- helm/proxy/services/test_service.py +1 -1
- helm/proxy/static/general.js +122 -0
- helm/proxy/static/help.html +99 -0
- helm/proxy/static/index.css +57 -0
- helm/proxy/static/index.html +40 -0
- helm/proxy/static/index.js +456 -0
- helm/proxy/static/info-icon.png +0 -0
- helm/proxy/test_retry.py +1 -1
- helm/proxy/token_counters/auto_token_counter.py +1 -1
- helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
- helm/tokenizers/caching_tokenizer.py +2 -30
- helm/tokenizers/http_model_tokenizer.py +1 -1
- helm/tokenizers/huggingface_tokenizer.py +2 -2
- helm/tokenizers/lit_gpt_tokenizer.py +1 -1
- helm/tokenizers/test_anthropic_tokenizer.py +6 -2
- helm/tokenizers/test_huggingface_tokenizer.py +1 -1
- helm/tokenizers/test_yalm_tokenizer.py +1 -1
- helm/tokenizers/tiktoken_tokenizer.py +1 -1
- helm/tokenizers/tokenizer.py +3 -1
- helm/tokenizers/yalm_tokenizer.py +3 -3
- helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
- crfm_helm-0.5.3.dist-info/METADATA +0 -355
- crfm_helm-0.5.3.dist-info/RECORD +0 -699
- helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
- helm/benchmark/data_overlap/export_scenario_text.py +0 -119
- helm/benchmark/data_overlap/light_scenario.py +0 -60
- helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
- helm/benchmark/static_build/assets/index-58f97dcd.js +0 -10
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/tokenizers/anthropic_tokenizer.py +0 -52
- {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info/licenses}/LICENSE +0 -0
- {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/top_level.txt +0 -0
- /helm/benchmark/{data_overlap → metrics/ifeval}/__init__.py +0 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import Any, Dict, List
|
|
2
|
+
|
|
3
|
+
from helm.common.request import (
|
|
4
|
+
Request,
|
|
5
|
+
GeneratedOutput,
|
|
6
|
+
)
|
|
7
|
+
from helm.clients.stanfordhealthcare_http_model_client import StanfordHealthCareHTTPModelClient
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class StanfordHealthCareGoogleClient(StanfordHealthCareHTTPModelClient):
|
|
11
|
+
"""
|
|
12
|
+
Client for accessing Google models hosted on Stanford Health Care's model API.
|
|
13
|
+
|
|
14
|
+
Configure by setting the following in prod_env/credentials.conf:
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
stanfordhealthcareEndpoint: https://your-domain-name/
|
|
18
|
+
stanfordhealthcareApiKey: your-private-key
|
|
19
|
+
```
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def get_request(self, request: Request) -> Dict[str, Any]:
|
|
23
|
+
return {
|
|
24
|
+
"contents": {
|
|
25
|
+
"role": "user",
|
|
26
|
+
"parts": {"text": request.prompt},
|
|
27
|
+
},
|
|
28
|
+
"generation_config": {
|
|
29
|
+
"temperature": request.temperature,
|
|
30
|
+
"topP": request.top_p,
|
|
31
|
+
"topK": request.top_k_per_token,
|
|
32
|
+
"candidateCount": 1,
|
|
33
|
+
"maxOutputTokens": request.max_tokens,
|
|
34
|
+
"stopSequences": request.stop_sequences,
|
|
35
|
+
},
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
def parse_response(self, response: Dict[str, Any]) -> List[GeneratedOutput]:
|
|
39
|
+
completion = ""
|
|
40
|
+
for item in response["content"]:
|
|
41
|
+
if "content" in item["candidates"][0]:
|
|
42
|
+
completion += item["candidates"][0]["content"]["parts"][0]["text"]
|
|
43
|
+
return [GeneratedOutput(text=completion, logprob=0, tokens=[])]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import asdict
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from helm.common.cache import CacheConfig
|
|
8
|
+
from helm.common.request import (
|
|
9
|
+
wrap_request_time,
|
|
10
|
+
Request,
|
|
11
|
+
RequestResult,
|
|
12
|
+
GeneratedOutput,
|
|
13
|
+
EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
|
|
14
|
+
)
|
|
15
|
+
from helm.clients.client import CachingClient
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class StanfordHealthCareHTTPModelClient(CachingClient, ABC):
|
|
19
|
+
"""
|
|
20
|
+
Client for accessing Stanford Health Care models via HTTP requests.
|
|
21
|
+
|
|
22
|
+
Configure by setting the following in prod_env/credentials.conf:
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
stanfordhealthcareEndpoint: https://your-domain-name/
|
|
26
|
+
stanfordhealthcareApiKey: your-private-key
|
|
27
|
+
```
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key"
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
cache_config: CacheConfig,
|
|
35
|
+
deployment: str,
|
|
36
|
+
endpoint: str = "http://localhost:8080",
|
|
37
|
+
do_cache: bool = False,
|
|
38
|
+
timeout: int = 3000,
|
|
39
|
+
api_key: Optional[str] = None,
|
|
40
|
+
model: Optional[str] = None,
|
|
41
|
+
):
|
|
42
|
+
super().__init__(cache_config=cache_config)
|
|
43
|
+
assert api_key, "API key must be provided"
|
|
44
|
+
self.endpoint = endpoint
|
|
45
|
+
self.timeout = timeout
|
|
46
|
+
self.do_cache = do_cache
|
|
47
|
+
self.deployment = deployment
|
|
48
|
+
self.model = model
|
|
49
|
+
self.default_headers = {StanfordHealthCareHTTPModelClient.CREDENTIAL_HEADER_NAME: api_key}
|
|
50
|
+
|
|
51
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
52
|
+
cache_key = asdict(request)
|
|
53
|
+
if request.embedding:
|
|
54
|
+
return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
|
|
55
|
+
|
|
56
|
+
raw_request = self.get_request(request)
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
|
|
60
|
+
def do_it() -> Dict[str, Any]:
|
|
61
|
+
url = f"{self.endpoint}/{self.deployment}"
|
|
62
|
+
response = requests.post(url, json=raw_request, headers=self.default_headers, timeout=self.timeout)
|
|
63
|
+
response.raise_for_status()
|
|
64
|
+
response_json = response.json()
|
|
65
|
+
if type(response_json) == list:
|
|
66
|
+
response_json = {"content": response_json}
|
|
67
|
+
return response_json
|
|
68
|
+
|
|
69
|
+
if self.do_cache:
|
|
70
|
+
response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
71
|
+
else:
|
|
72
|
+
response, cached = wrap_request_time(do_it)(), False
|
|
73
|
+
|
|
74
|
+
completions = self.parse_response(response)
|
|
75
|
+
|
|
76
|
+
return RequestResult(
|
|
77
|
+
success=True,
|
|
78
|
+
cached=cached,
|
|
79
|
+
error=None,
|
|
80
|
+
completions=completions,
|
|
81
|
+
embedding=[],
|
|
82
|
+
request_time=response["request_time"],
|
|
83
|
+
)
|
|
84
|
+
except requests.exceptions.RequestException as e:
|
|
85
|
+
return RequestResult(success=False, cached=False, error=f"Request error: {e}", completions=[], embedding=[])
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def get_request(self, request: Request) -> Dict[str, Any]:
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
def parse_response(self, response: Dict[str, Any]) -> List[GeneratedOutput]:
|
|
93
|
+
pass
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from helm.clients.openai_client import OpenAIClient
|
|
4
|
+
from helm.common.cache import CacheConfig
|
|
5
|
+
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
6
|
+
from helm.proxy.retry import NonRetriableException
|
|
7
|
+
from helm.tokenizers.tokenizer import Tokenizer
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from openai import OpenAI
|
|
11
|
+
except ModuleNotFoundError as e:
|
|
12
|
+
handle_module_not_found_error(e, ["openai"])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StanfordHealthCareOpenAIClient(OpenAIClient):
|
|
16
|
+
"""
|
|
17
|
+
Client for accessing OpenAI models hosted on Stanford Health Care's model API.
|
|
18
|
+
|
|
19
|
+
Configure by setting the following in prod_env/credentials.conf:
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
stanfordhealthcareEndpoint: https://your-domain-name/
|
|
23
|
+
stanfordhealthcareApiKey: your-private-key
|
|
24
|
+
```
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key"
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
tokenizer: Tokenizer,
|
|
32
|
+
tokenizer_name: str,
|
|
33
|
+
cache_config: CacheConfig,
|
|
34
|
+
model_name: str,
|
|
35
|
+
api_key: Optional[str] = None,
|
|
36
|
+
endpoint: Optional[str] = None,
|
|
37
|
+
base_url: Optional[str] = None,
|
|
38
|
+
openai_model_name: Optional[str] = None,
|
|
39
|
+
output_processor: Optional[str] = None,
|
|
40
|
+
):
|
|
41
|
+
super().__init__(
|
|
42
|
+
tokenizer=tokenizer,
|
|
43
|
+
tokenizer_name=tokenizer_name,
|
|
44
|
+
cache_config=cache_config,
|
|
45
|
+
api_key="unused",
|
|
46
|
+
openai_model_name=openai_model_name,
|
|
47
|
+
output_processor=output_processor,
|
|
48
|
+
)
|
|
49
|
+
if not endpoint:
|
|
50
|
+
raise NonRetriableException("Must provide endpoint through credentials.conf")
|
|
51
|
+
if not api_key:
|
|
52
|
+
raise NonRetriableException("Must provide API key through credentials.conf")
|
|
53
|
+
if not base_url:
|
|
54
|
+
raise NonRetriableException("Must provide base url through model_deployments")
|
|
55
|
+
# Guess the base URL part based on the model name
|
|
56
|
+
# Maybe make this configurable instead?
|
|
57
|
+
base_url = base_url.format(endpoint=endpoint)
|
|
58
|
+
self.client = OpenAI(
|
|
59
|
+
api_key="dummy",
|
|
60
|
+
base_url=base_url,
|
|
61
|
+
default_headers={StanfordHealthCareOpenAIClient.CREDENTIAL_HEADER_NAME: api_key},
|
|
62
|
+
)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Dict, Optional
|
|
2
|
+
|
|
3
|
+
from helm.clients.azure_openai_client import AzureOpenAIClient
|
|
4
|
+
from helm.common.cache import CacheConfig
|
|
5
|
+
from helm.proxy.retry import NonRetriableException
|
|
6
|
+
from helm.tokenizers.tokenizer import Tokenizer
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StanfordHealthCareSHCOpenAIClient(AzureOpenAIClient):
|
|
10
|
+
"""
|
|
11
|
+
Client for accessing OpenAI models hosted on Stanford Health Care's model API.
|
|
12
|
+
|
|
13
|
+
Configure by setting the following in prod_env/credentials.conf:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
stanfordhealthcareEndpoint: https://your-domain-name/
|
|
17
|
+
stanfordhealthcareApiKey: your-private-key
|
|
18
|
+
```
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
API_VERSION = "2024-08-01-preview"
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
tokenizer: Tokenizer,
|
|
26
|
+
tokenizer_name: str,
|
|
27
|
+
cache_config: CacheConfig,
|
|
28
|
+
api_key: Optional[str] = None,
|
|
29
|
+
endpoint: Optional[str] = None,
|
|
30
|
+
default_headers: Optional[Dict[str, str]] = None,
|
|
31
|
+
):
|
|
32
|
+
if not api_key:
|
|
33
|
+
raise NonRetriableException("Must provide API key through credentials.conf")
|
|
34
|
+
super().__init__(
|
|
35
|
+
tokenizer=tokenizer,
|
|
36
|
+
tokenizer_name=tokenizer_name,
|
|
37
|
+
cache_config=cache_config,
|
|
38
|
+
api_key=api_key,
|
|
39
|
+
endpoint=endpoint,
|
|
40
|
+
api_version=StanfordHealthCareSHCOpenAIClient.API_VERSION,
|
|
41
|
+
default_headers=default_headers,
|
|
42
|
+
)
|
helm/clients/test_client.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
2
2
|
from helm.tokenizers.auto_tokenizer import AutoTokenizer
|
|
3
|
-
from .client import truncate_sequence, truncate_and_tokenize_response_text
|
|
3
|
+
from helm.clients.client import truncate_sequence, truncate_and_tokenize_response_text
|
|
4
4
|
from typing import List
|
|
5
5
|
from helm.common.request import Request, GeneratedOutput, Token
|
|
6
6
|
|
|
@@ -5,7 +5,12 @@ import tempfile
|
|
|
5
5
|
from helm.common.cache import BlackHoleCacheConfig, SqliteCacheConfig
|
|
6
6
|
from helm.common.request import Request
|
|
7
7
|
|
|
8
|
-
from .together_client import
|
|
8
|
+
from helm.clients.together_client import (
|
|
9
|
+
TogetherClient,
|
|
10
|
+
TogetherChatClient,
|
|
11
|
+
TogetherCompletionClient,
|
|
12
|
+
TogetherClientError,
|
|
13
|
+
)
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
class TestTogetherClient:
|
helm/clients/together_client.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
from copy import deepcopy
|
|
2
2
|
from itertools import zip_longest
|
|
3
3
|
import threading
|
|
4
|
-
from typing import List, Dict, Any, Mapping, Optional, TypedDict, Union
|
|
4
|
+
from typing import Callable, List, Dict, Any, Mapping, Optional, TypedDict, Union
|
|
5
|
+
from typing_extensions import NotRequired
|
|
5
6
|
|
|
6
7
|
import requests
|
|
7
8
|
from retrying import retry
|
|
8
9
|
|
|
9
10
|
from helm.common.cache import CacheConfig
|
|
11
|
+
from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
|
|
12
|
+
from helm.common.object_spec import get_class_by_name
|
|
10
13
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
11
14
|
from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
|
|
12
15
|
from helm.clients.client import CachingClient, truncate_sequence, cleanup_str
|
|
@@ -312,26 +315,65 @@ class TogetherRawChatRequest(TypedDict):
|
|
|
312
315
|
logprobs: int
|
|
313
316
|
echo: bool
|
|
314
317
|
n: int
|
|
318
|
+
response_format: NotRequired[Dict[str, Any]]
|
|
315
319
|
|
|
316
320
|
|
|
317
321
|
class TogetherChatClient(CachingClient):
|
|
318
322
|
"""Client that uses the Python Together library for chat models."""
|
|
319
323
|
|
|
320
|
-
def __init__(
|
|
324
|
+
def __init__(
|
|
325
|
+
self,
|
|
326
|
+
cache_config: CacheConfig,
|
|
327
|
+
api_key: Optional[str],
|
|
328
|
+
together_model: Optional[str] = None,
|
|
329
|
+
disable_logprobs: Optional[bool] = None,
|
|
330
|
+
output_processor: Optional[str] = None,
|
|
331
|
+
):
|
|
321
332
|
super().__init__(cache_config=cache_config)
|
|
322
333
|
self._client = Together(api_key=api_key)
|
|
323
334
|
self._together_model = together_model
|
|
335
|
+
self._disable_logprobs = bool(disable_logprobs)
|
|
336
|
+
# self.output_processor is actually a function, not a class
|
|
337
|
+
|
|
338
|
+
self.output_processor: Optional[Callable[[str], str]] = (
|
|
339
|
+
get_class_by_name(output_processor) if output_processor else None
|
|
340
|
+
)
|
|
324
341
|
|
|
325
342
|
def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatRequest:
|
|
343
|
+
request.validate()
|
|
344
|
+
messages: List[Dict[str, Any]]
|
|
326
345
|
if request.messages:
|
|
327
346
|
messages = request.messages
|
|
347
|
+
elif request.multimodal_prompt:
|
|
348
|
+
message_contents = []
|
|
349
|
+
for media_object in request.multimodal_prompt.media_objects:
|
|
350
|
+
if media_object.is_type(IMAGE_TYPE) and media_object.location:
|
|
351
|
+
assert media_object.location
|
|
352
|
+
if media_object.is_local_file:
|
|
353
|
+
from helm.common.images_utils import encode_base64
|
|
354
|
+
|
|
355
|
+
base64_image: str = encode_base64(media_object.location)
|
|
356
|
+
image_url = f"data:image/jpeg;base64,{base64_image}"
|
|
357
|
+
else:
|
|
358
|
+
image_url = media_object.location
|
|
359
|
+
message_contents.append({"type": "image_url", "image_url": {"url": image_url}})
|
|
360
|
+
elif media_object.is_type(TEXT_TYPE):
|
|
361
|
+
assert media_object.text
|
|
362
|
+
message_contents.append({"type": "text", "text": media_object.text})
|
|
363
|
+
else:
|
|
364
|
+
raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
|
|
365
|
+
messages = [{"role": "user", "content": message_contents}]
|
|
328
366
|
else:
|
|
329
367
|
messages = [{"role": "user", "content": request.prompt}]
|
|
330
368
|
if self._together_model is not None:
|
|
331
369
|
model = self._together_model
|
|
332
370
|
else:
|
|
333
371
|
model = request.model
|
|
334
|
-
|
|
372
|
+
if self._disable_logprobs:
|
|
373
|
+
logprobs = 0
|
|
374
|
+
else:
|
|
375
|
+
logprobs = min(request.top_k_per_token, 1)
|
|
376
|
+
raw_chat_request: TogetherRawChatRequest = {
|
|
335
377
|
"messages": messages,
|
|
336
378
|
"model": model,
|
|
337
379
|
"max_tokens": request.max_tokens,
|
|
@@ -339,10 +381,16 @@ class TogetherChatClient(CachingClient):
|
|
|
339
381
|
"temperature": request.temperature,
|
|
340
382
|
"top_p": request.top_p,
|
|
341
383
|
"top_k": request.top_k_per_token,
|
|
342
|
-
"logprobs":
|
|
384
|
+
"logprobs": logprobs,
|
|
343
385
|
"echo": request.echo_prompt,
|
|
344
386
|
"n": request.num_completions,
|
|
345
387
|
}
|
|
388
|
+
if request.response_format and request.response_format.json_schema:
|
|
389
|
+
raw_chat_request["response_format"] = {
|
|
390
|
+
"type": "json_object",
|
|
391
|
+
"schema": request.response_format.json_schema,
|
|
392
|
+
}
|
|
393
|
+
return raw_chat_request
|
|
346
394
|
|
|
347
395
|
def make_request(self, request: Request) -> RequestResult:
|
|
348
396
|
raw_request = self.convert_to_raw_chat_request(request)
|
|
@@ -377,7 +425,10 @@ class TogetherChatClient(CachingClient):
|
|
|
377
425
|
break
|
|
378
426
|
tokens.append(Token(text=token_text, logprob=token_logprob or 0.0))
|
|
379
427
|
assert choice.message.role == "assistant"
|
|
380
|
-
|
|
428
|
+
output_text = choice.message.content
|
|
429
|
+
if self.output_processor:
|
|
430
|
+
output_text = self.output_processor(output_text)
|
|
431
|
+
generated_outputs.append(GeneratedOutput(text=output_text, logprob=0.0, tokens=tokens))
|
|
381
432
|
return RequestResult(
|
|
382
433
|
success=True,
|
|
383
434
|
cached=cached,
|
|
@@ -404,16 +455,27 @@ class TogetherRawCompletionRequest(TypedDict):
|
|
|
404
455
|
class TogetherCompletionClient(CachingClient):
|
|
405
456
|
"""Client that uses the Python Together library for text completion models."""
|
|
406
457
|
|
|
407
|
-
def __init__(
|
|
458
|
+
def __init__(
|
|
459
|
+
self,
|
|
460
|
+
cache_config: CacheConfig,
|
|
461
|
+
api_key: Optional[str],
|
|
462
|
+
together_model: Optional[str] = None,
|
|
463
|
+
disable_logprobs: Optional[bool] = None,
|
|
464
|
+
):
|
|
408
465
|
super().__init__(cache_config=cache_config)
|
|
409
466
|
self._client = Together(api_key=api_key)
|
|
410
467
|
self._together_model = together_model
|
|
468
|
+
self._disable_logprobs = bool(disable_logprobs)
|
|
411
469
|
|
|
412
470
|
def convert_to_raw_completion_request(self, request: Request) -> TogetherRawCompletionRequest:
|
|
413
471
|
if self._together_model is not None:
|
|
414
472
|
model = self._together_model
|
|
415
473
|
else:
|
|
416
474
|
model = request.model
|
|
475
|
+
if self._disable_logprobs:
|
|
476
|
+
logprobs = 0
|
|
477
|
+
else:
|
|
478
|
+
logprobs = min(request.top_k_per_token, 1)
|
|
417
479
|
return {
|
|
418
480
|
"prompt": request.prompt,
|
|
419
481
|
"model": model,
|
|
@@ -422,7 +484,7 @@ class TogetherCompletionClient(CachingClient):
|
|
|
422
484
|
"temperature": request.temperature,
|
|
423
485
|
"top_p": request.top_p,
|
|
424
486
|
"top_k": request.top_k_per_token,
|
|
425
|
-
"logprobs":
|
|
487
|
+
"logprobs": logprobs,
|
|
426
488
|
"echo": request.echo_prompt,
|
|
427
489
|
"n": request.num_completions,
|
|
428
490
|
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from helm.clients.openai_client import OpenAIClient
|
|
2
|
+
from helm.common.cache import CacheConfig
|
|
3
|
+
from helm.tokenizers.tokenizer import Tokenizer
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class UpstageChatClient(OpenAIClient):
|
|
7
|
+
"""Sends request to a Upstage model using a OpenAI-compatible Chat API."""
|
|
8
|
+
|
|
9
|
+
def __init__(
|
|
10
|
+
self,
|
|
11
|
+
tokenizer: Tokenizer,
|
|
12
|
+
tokenizer_name: str,
|
|
13
|
+
cache_config: CacheConfig,
|
|
14
|
+
api_key: str,
|
|
15
|
+
):
|
|
16
|
+
super().__init__(
|
|
17
|
+
tokenizer=tokenizer,
|
|
18
|
+
tokenizer_name=tokenizer_name,
|
|
19
|
+
cache_config=cache_config,
|
|
20
|
+
api_key=api_key,
|
|
21
|
+
org_id=None,
|
|
22
|
+
base_url="https://api.upstage.ai/v1/solar",
|
|
23
|
+
)
|
helm/clients/vertexai_client.py
CHANGED
|
@@ -4,6 +4,7 @@ from threading import Lock
|
|
|
4
4
|
from typing import Any, Dict, Mapping, Optional, List, Union
|
|
5
5
|
|
|
6
6
|
from helm.common.cache import CacheConfig
|
|
7
|
+
from helm.common.multimodal_request_utils import get_contents_as_bytes
|
|
7
8
|
from helm.common.media_object import TEXT_TYPE
|
|
8
9
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
9
10
|
from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, ErrorFlags
|
|
@@ -12,7 +13,14 @@ from helm.clients.client import CachingClient, truncate_sequence, generate_uid_f
|
|
|
12
13
|
try:
|
|
13
14
|
import vertexai
|
|
14
15
|
from vertexai.language_models import TextGenerationModel, TextGenerationResponse # PaLM2
|
|
15
|
-
from vertexai.preview.generative_models import
|
|
16
|
+
from vertexai.preview.generative_models import (
|
|
17
|
+
GenerativeModel,
|
|
18
|
+
GenerationResponse,
|
|
19
|
+
Candidate,
|
|
20
|
+
Content,
|
|
21
|
+
Part,
|
|
22
|
+
Image,
|
|
23
|
+
) # Gemini
|
|
16
24
|
from google.cloud.aiplatform_v1beta1.types import SafetySetting, HarmCategory
|
|
17
25
|
except ModuleNotFoundError as e:
|
|
18
26
|
handle_module_not_found_error(e, ["google"])
|
|
@@ -48,17 +56,16 @@ def _get_safety_settings_for_preset(
|
|
|
48
56
|
raise ValueError(f"Unknown safety_settings_preset: {safety_settings_preset}")
|
|
49
57
|
|
|
50
58
|
|
|
51
|
-
def _get_model_name_for_request(request: Request) -> str:
|
|
52
|
-
# We have to strip "-safety-" suffixes from model names because they are not part of the Vertex AI model name
|
|
53
|
-
# TODO: Clean up this hack
|
|
54
|
-
return request.model_engine.split("-safety-")[0]
|
|
55
|
-
|
|
56
|
-
|
|
57
59
|
class VertexAIClient(CachingClient, ABC):
|
|
58
60
|
"""Client for Vertex AI models"""
|
|
59
61
|
|
|
60
62
|
def __init__(
|
|
61
|
-
self,
|
|
63
|
+
self,
|
|
64
|
+
cache_config: CacheConfig,
|
|
65
|
+
project_id: str,
|
|
66
|
+
location: str,
|
|
67
|
+
safety_settings_preset: Optional[str] = None,
|
|
68
|
+
vertexai_model: Optional[str] = None,
|
|
62
69
|
) -> None:
|
|
63
70
|
super().__init__(cache_config=cache_config)
|
|
64
71
|
self.project_id = project_id
|
|
@@ -67,8 +74,15 @@ class VertexAIClient(CachingClient, ABC):
|
|
|
67
74
|
self.safety_settings_preset = safety_settings_preset
|
|
68
75
|
self.safety_settings = _get_safety_settings_for_preset(safety_settings_preset)
|
|
69
76
|
|
|
77
|
+
self.vertexai_model = vertexai_model
|
|
78
|
+
|
|
70
79
|
vertexai.init(project=self.project_id, location=self.location)
|
|
71
80
|
|
|
81
|
+
def _get_model_name_for_request(self, request: Request) -> str:
|
|
82
|
+
if self.vertexai_model is not None:
|
|
83
|
+
return self.vertexai_model
|
|
84
|
+
return request.model_engine
|
|
85
|
+
|
|
72
86
|
def make_cache_key_with_safety_settings_preset(self, raw_request: Mapping, request: Request) -> Mapping:
|
|
73
87
|
"""Construct the key for the cache using the raw request.
|
|
74
88
|
|
|
@@ -111,7 +125,7 @@ class VertexAITextClient(VertexAIClient):
|
|
|
111
125
|
}
|
|
112
126
|
|
|
113
127
|
completions: List[GeneratedOutput] = []
|
|
114
|
-
model_name: str = _get_model_name_for_request(request)
|
|
128
|
+
model_name: str = self._get_model_name_for_request(request)
|
|
115
129
|
|
|
116
130
|
try:
|
|
117
131
|
|
|
@@ -193,12 +207,20 @@ class VertexAIChatClient(VertexAIClient):
|
|
|
193
207
|
|
|
194
208
|
def make_request(self, request: Request) -> RequestResult:
|
|
195
209
|
"""Make a request"""
|
|
196
|
-
contents
|
|
210
|
+
contents = [request.prompt]
|
|
197
211
|
|
|
198
212
|
# For the multimodal case, build up the content with the media objects of `request.multimodal_prompt`
|
|
199
213
|
if request.multimodal_prompt is not None:
|
|
200
214
|
return self._make_multimodal_request(request)
|
|
201
215
|
|
|
216
|
+
if request.messages is not None:
|
|
217
|
+
contents = []
|
|
218
|
+
role_mapping = {"user": "user", "assistant": "model"}
|
|
219
|
+
for msg in request.messages:
|
|
220
|
+
contents.append(
|
|
221
|
+
Content(role=role_mapping.get(msg["role"], "user"), parts=[Part.from_text(msg["content"])])
|
|
222
|
+
)
|
|
223
|
+
|
|
202
224
|
parameters = {
|
|
203
225
|
"temperature": request.temperature,
|
|
204
226
|
"max_output_tokens": request.max_tokens,
|
|
@@ -217,7 +239,7 @@ class VertexAIChatClient(VertexAIClient):
|
|
|
217
239
|
}
|
|
218
240
|
|
|
219
241
|
completions: List[GeneratedOutput] = []
|
|
220
|
-
model_name: str = _get_model_name_for_request(request)
|
|
242
|
+
model_name: str = self._get_model_name_for_request(request)
|
|
221
243
|
model = self.get_model(model_name)
|
|
222
244
|
|
|
223
245
|
try:
|
|
@@ -263,7 +285,7 @@ class VertexAIChatClient(VertexAIClient):
|
|
|
263
285
|
cache_key = self.make_cache_key_with_safety_settings_preset(
|
|
264
286
|
{
|
|
265
287
|
"model_name": model_name,
|
|
266
|
-
"prompt": request.prompt,
|
|
288
|
+
"prompt": request.messages or request.prompt,
|
|
267
289
|
**parameters,
|
|
268
290
|
},
|
|
269
291
|
request,
|
|
@@ -338,6 +360,10 @@ class VertexAIChatClient(VertexAIClient):
|
|
|
338
360
|
for media_object in request.multimodal_prompt.media_objects:
|
|
339
361
|
if media_object.is_type("image") and media_object.location:
|
|
340
362
|
contents.append(Part.from_image(Image.load_from_file(media_object.location)))
|
|
363
|
+
elif media_object.is_type("audio") and media_object.location:
|
|
364
|
+
contents.append(
|
|
365
|
+
Part.from_data(get_contents_as_bytes(media_object.location), mime_type=media_object.content_type)
|
|
366
|
+
)
|
|
341
367
|
elif media_object.is_type(TEXT_TYPE):
|
|
342
368
|
if media_object.text is None:
|
|
343
369
|
raise ValueError("MediaObject of text type has missing text field value")
|
|
@@ -355,7 +381,7 @@ class VertexAIChatClient(VertexAIClient):
|
|
|
355
381
|
}
|
|
356
382
|
|
|
357
383
|
completions: List[GeneratedOutput] = []
|
|
358
|
-
model_name: str = _get_model_name_for_request(request)
|
|
384
|
+
model_name: str = self._get_model_name_for_request(request)
|
|
359
385
|
model = self.get_model(model_name)
|
|
360
386
|
|
|
361
387
|
request_time = 0
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
from .src.flamingo import Flamingo
|
|
2
|
-
from .src.factory import create_model_and_transforms
|
|
1
|
+
from helm.clients.vision_language.open_flamingo.src.flamingo import Flamingo
|
|
2
|
+
from helm.clients.vision_language.open_flamingo.src.factory import create_model_and_transforms
|
|
@@ -7,9 +7,9 @@ from typing import Optional
|
|
|
7
7
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
8
8
|
|
|
9
9
|
from helm.common.general import handle_module_not_found_error
|
|
10
|
-
from .flamingo import Flamingo
|
|
11
|
-
from .flamingo_lm import FlamingoLMMixin
|
|
12
|
-
from .utils import extend_instance
|
|
10
|
+
from helm.clients.vision_language.open_flamingo.src.flamingo import Flamingo
|
|
11
|
+
from helm.clients.vision_language.open_flamingo.src.flamingo_lm import FlamingoLMMixin
|
|
12
|
+
from helm.clients.vision_language.open_flamingo.src.utils import extend_instance
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def create_model_and_transforms(
|
|
@@ -5,7 +5,7 @@ Source: https://github.com/mlfoundations/open_flamingo
|
|
|
5
5
|
import torch
|
|
6
6
|
from einops import rearrange
|
|
7
7
|
from torch import nn
|
|
8
|
-
from .helpers import PerceiverResampler
|
|
8
|
+
from helm.clients.vision_language.open_flamingo.src.helpers import PerceiverResampler
|
|
9
9
|
from torch.distributed.fsdp.wrap import (
|
|
10
10
|
enable_wrap,
|
|
11
11
|
wrap,
|
|
@@ -15,7 +15,7 @@ from torch.distributed.fsdp import (
|
|
|
15
15
|
FullyShardedDataParallel as FSDP,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
from .utils import apply_with_stopping_condition
|
|
18
|
+
from helm.clients.vision_language.open_flamingo.src.utils import apply_with_stopping_condition
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class Flamingo(nn.Module):
|
|
@@ -3,8 +3,8 @@ Source: https://github.com/mlfoundations/open_flamingo
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import torch.nn as nn
|
|
6
|
-
from .helpers import GatedCrossAttentionBlock
|
|
7
|
-
from .utils import getattr_recursive, setattr_recursive
|
|
6
|
+
from helm.clients.vision_language.open_flamingo.src.helpers import GatedCrossAttentionBlock
|
|
7
|
+
from helm.clients.vision_language.open_flamingo.src.utils import getattr_recursive, setattr_recursive
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class FlamingoLayer(nn.Module):
|