crfm-helm 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- crfm_helm-0.5.6.dist-info/METADATA +427 -0
- crfm_helm-0.5.6.dist-info/RECORD +941 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +13 -1
- helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
- helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +4 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/test_adapter.py +4 -4
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/common_adapter_specs.py +69 -4
- helm/benchmark/adaptation/prompt.py +1 -1
- helm/benchmark/annotation/aci_bench_annotator.py +95 -0
- helm/benchmark/annotation/air_bench_annotator.py +21 -6
- helm/benchmark/annotation/annotator.py +5 -0
- helm/benchmark/annotation/annotator_factory.py +3 -20
- helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
- helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
- helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
- helm/benchmark/annotation/bird_sql_annotator.py +58 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +93 -0
- helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
- helm/benchmark/annotation/dischargeme_annotator.py +107 -0
- helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
- helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
- helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
- helm/benchmark/annotation/live_qa_annotator.py +1 -1
- helm/benchmark/annotation/med_dialog_annotator.py +99 -0
- helm/benchmark/annotation/medalign_annotator.py +100 -0
- helm/benchmark/annotation/medi_qa_annotator.py +98 -0
- helm/benchmark/annotation/medication_qa_annotator.py +87 -63
- helm/benchmark/annotation/mental_health_annotator.py +98 -0
- helm/benchmark/annotation/mimic_bhc_annotator.py +100 -0
- helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
- helm/benchmark/annotation/model_as_judge.py +214 -6
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
- helm/benchmark/annotation/omni_math_annotator.py +131 -0
- helm/benchmark/annotation/spider_annotator.py +18 -0
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
- helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
- helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
- helm/benchmark/annotation/wildbench_annotator.py +119 -0
- helm/benchmark/annotation_executor.py +35 -15
- helm/benchmark/augmentations/cleva_perturbation.py +9 -8
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
- helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
- helm/benchmark/augmentations/dialect_perturbation.py +4 -5
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +2 -2
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +4 -5
- helm/benchmark/augmentations/perturbation.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +2 -2
- helm/benchmark/augmentations/synonym_perturbation.py +4 -3
- helm/benchmark/augmentations/test_perturbation.py +16 -13
- helm/benchmark/augmentations/translate_perturbation.py +2 -2
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/data_preprocessor.py +2 -2
- helm/benchmark/executor.py +11 -12
- helm/benchmark/huggingface_registration.py +2 -7
- helm/benchmark/metrics/aci_bench_metrics.py +14 -0
- helm/benchmark/metrics/basic_metrics.py +6 -6
- helm/benchmark/metrics/bbq_metrics.py +2 -2
- helm/benchmark/metrics/bias_metrics.py +12 -3
- helm/benchmark/metrics/bias_word_lists.py +1 -1
- helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
- helm/benchmark/metrics/bird_sql_metrics.py +28 -0
- helm/benchmark/metrics/chw_care_plan_metrics.py +14 -0
- helm/benchmark/metrics/classification_metrics.py +76 -12
- helm/benchmark/metrics/cleva_harms_metrics.py +10 -9
- helm/benchmark/metrics/code_metrics.py +5 -5
- helm/benchmark/metrics/comet_metric.py +125 -0
- helm/benchmark/metrics/common_metric_specs.py +9 -2
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
- helm/benchmark/metrics/copyright_metrics.py +4 -4
- helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
- helm/benchmark/metrics/dischargeme_metrics.py +14 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -4
- helm/benchmark/metrics/dry_run_metrics.py +5 -5
- helm/benchmark/metrics/efficiency_metrics.py +6 -6
- helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
- helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
- helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
- helm/benchmark/metrics/gpt4_audio_refusal_metrics.py +145 -0
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
- helm/benchmark/metrics/ifeval/__init__.py +0 -0
- helm/benchmark/metrics/ifeval/instructions.py +1574 -0
- helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
- helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
- helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
- helm/benchmark/metrics/ifeval_metrics.py +55 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
- helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
- helm/benchmark/metrics/kpi_edgar_metrics.py +121 -0
- helm/benchmark/metrics/language_modeling_metrics.py +4 -4
- helm/benchmark/metrics/llm_jury_metrics.py +46 -0
- helm/benchmark/metrics/machine_translation_metrics.py +2 -2
- helm/benchmark/metrics/med_dialog_metrics.py +14 -0
- helm/benchmark/metrics/medalign_metrics.py +14 -0
- helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
- helm/benchmark/metrics/medec_metrics.py +101 -0
- helm/benchmark/metrics/medi_qa_metrics.py +14 -0
- helm/benchmark/metrics/medication_qa_metrics.py +10 -19
- helm/benchmark/metrics/melt_bias_metric.py +234 -0
- helm/benchmark/metrics/melt_bias_word_lists.py +1367 -0
- helm/benchmark/metrics/melt_metric_specs.py +43 -0
- helm/benchmark/metrics/melt_toxicity_metric.py +107 -0
- helm/benchmark/metrics/mental_health_metrics.py +14 -0
- helm/benchmark/metrics/metric.py +3 -3
- helm/benchmark/metrics/metric_service.py +11 -11
- helm/benchmark/metrics/mimic_bhc_metrics.py +14 -0
- helm/benchmark/metrics/mimic_rrs_metrics.py +14 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +14 -0
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +14 -0
- helm/benchmark/metrics/nltk_helper.py +32 -0
- helm/benchmark/metrics/numeracy_metrics.py +4 -4
- helm/benchmark/metrics/omni_math_metrics.py +32 -0
- helm/benchmark/metrics/openai_mrcr_metrics.py +52 -0
- helm/benchmark/metrics/output_processing_metric.py +60 -0
- helm/benchmark/metrics/output_processors.py +15 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
- helm/benchmark/metrics/ranking_metrics.py +3 -3
- helm/benchmark/metrics/reference_metric.py +3 -3
- helm/benchmark/metrics/ruler_qa_metrics.py +34 -0
- helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
- helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
- helm/benchmark/metrics/spider_metrics.py +7 -0
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +14 -0
- helm/benchmark/metrics/statistic.py +1 -1
- helm/benchmark/metrics/summac/model_summac.py +2 -3
- helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
- helm/benchmark/metrics/summarization_metrics.py +20 -9
- helm/benchmark/metrics/test_bias_metrics.py +5 -1
- helm/benchmark/metrics/test_classification_metrics.py +140 -68
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
- helm/benchmark/metrics/test_metric.py +1 -1
- helm/benchmark/metrics/test_statistic.py +2 -2
- helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
- helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +3 -3
- helm/benchmark/metrics/toxicity_metrics.py +6 -6
- helm/benchmark/metrics/unitxt_metrics.py +7 -5
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -2
- helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
- helm/benchmark/metrics/vision_language/image_utils.py +2 -2
- helm/benchmark/metrics/wildbench_metrics.py +34 -0
- helm/benchmark/model_deployment_registry.py +6 -8
- helm/benchmark/model_metadata_registry.py +16 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +33 -12
- helm/benchmark/presentation/run_display.py +13 -0
- helm/benchmark/presentation/schema.py +2 -1
- helm/benchmark/presentation/summarize.py +97 -67
- helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
- helm/benchmark/reeval_run.py +202 -0
- helm/benchmark/reeval_runner.py +355 -0
- helm/benchmark/run.py +86 -90
- helm/benchmark/run_expander.py +90 -9
- helm/benchmark/run_spec_factory.py +13 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
- helm/benchmark/run_specs/audio_run_specs.py +657 -0
- helm/benchmark/run_specs/call_center_run_specs.py +49 -0
- helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
- helm/benchmark/run_specs/classic_run_specs.py +1 -69
- helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
- helm/benchmark/run_specs/enterprise_run_specs.py +280 -0
- helm/benchmark/run_specs/experimental_run_specs.py +142 -3
- helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
- helm/benchmark/run_specs/lite_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +141 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +1260 -0
- helm/benchmark/run_specs/melt_run_specs.py +783 -0
- helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
- helm/benchmark/run_specs/oab_exams_specs.py +32 -0
- helm/benchmark/run_specs/safety_run_specs.py +37 -0
- helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +44 -44
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +169 -0
- helm/benchmark/run_specs/sql_run_specs.py +54 -0
- helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
- helm/benchmark/run_specs/vlm_run_specs.py +103 -2
- helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
- helm/benchmark/runner.py +5 -5
- helm/benchmark/scenarios/aci_bench_scenario.py +126 -0
- helm/benchmark/scenarios/air_bench_scenario.py +6 -1
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/__init__.py +0 -0
- helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +130 -0
- helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
- helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
- helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
- helm/benchmark/scenarios/audio_language/corebench_scenario.py +77 -0
- helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
- helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
- helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
- helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
- helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
- helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
- helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py +103 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +110 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +78 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +109 -0
- helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
- helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +105 -0
- helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
- helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
- helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
- helm/benchmark/scenarios/banking77_scenario.py +6 -1
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/big_bench_scenario.py +11 -1
- helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
- helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
- helm/benchmark/scenarios/blimp_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +1 -1
- helm/benchmark/scenarios/boolq_scenario.py +1 -1
- helm/benchmark/scenarios/casehold_scenario.py +79 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +106 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
- helm/benchmark/scenarios/clear_scenario.py +157 -0
- helm/benchmark/scenarios/cleva_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +17 -4
- helm/benchmark/scenarios/commonsense_scenario.py +1 -1
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
- helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
- helm/benchmark/scenarios/dischargeme_scenario.py +172 -0
- helm/benchmark/scenarios/disinformation_scenario.py +10 -1
- helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
- helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
- helm/benchmark/scenarios/ehr_sql_scenario.py +137 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +1519 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
- helm/benchmark/scenarios/gpqa_scenario.py +80 -0
- helm/benchmark/scenarios/grammar.py +2 -2
- helm/benchmark/scenarios/grammar_scenario.py +2 -2
- helm/benchmark/scenarios/gsm_scenario.py +10 -1
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
- helm/benchmark/scenarios/headqa_scenario.py +136 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
- helm/benchmark/scenarios/ice_scenario.py +8 -4
- helm/benchmark/scenarios/ifeval_scenario.py +53 -0
- helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
- helm/benchmark/scenarios/imdb_scenario.py +11 -2
- helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +85 -0
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +79 -0
- helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
- helm/benchmark/scenarios/koala_scenario.py +1 -1
- helm/benchmark/scenarios/kpi_edgar_scenario.py +151 -0
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
- helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
- helm/benchmark/scenarios/legal_support_scenario.py +11 -1
- helm/benchmark/scenarios/legalbench_scenario.py +22 -3
- helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
- helm/benchmark/scenarios/lextreme_scenario.py +11 -1
- helm/benchmark/scenarios/live_qa_scenario.py +1 -1
- helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
- helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
- helm/benchmark/scenarios/math_scenario.py +9 -1
- helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
- helm/benchmark/scenarios/med_dialog_scenario.py +25 -22
- helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
- helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
- helm/benchmark/scenarios/med_qa_scenario.py +10 -1
- helm/benchmark/scenarios/medalign_scenario.py +94 -0
- helm/benchmark/scenarios/medalign_scenario_helper.py +432 -0
- helm/benchmark/scenarios/medbullets_scenario.py +145 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +127 -0
- helm/benchmark/scenarios/medec_scenario.py +125 -0
- helm/benchmark/scenarios/medhallu_scenario.py +72 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +111 -0
- helm/benchmark/scenarios/medication_qa_scenario.py +8 -2
- helm/benchmark/scenarios/melt_ir_scenario.py +171 -0
- helm/benchmark/scenarios/melt_knowledge_scenario.py +246 -0
- helm/benchmark/scenarios/melt_lm_scenarios.py +252 -0
- helm/benchmark/scenarios/melt_scenarios.py +793 -0
- helm/benchmark/scenarios/melt_srn_scenario.py +342 -0
- helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py +222 -0
- helm/benchmark/scenarios/melt_translation_scenario.py +152 -0
- helm/benchmark/scenarios/mental_health_scenario.py +123 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +103 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +98 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +77 -0
- helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
- helm/benchmark/scenarios/mmlu_scenario.py +11 -1
- helm/benchmark/scenarios/msmarco_scenario.py +1 -1
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +144 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +142 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +277 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
- helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
- helm/benchmark/scenarios/newsqa_scenario.py +1 -1
- helm/benchmark/scenarios/numeracy_scenario.py +12 -2
- helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
- helm/benchmark/scenarios/omni_math_scenario.py +53 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
- helm/benchmark/scenarios/openai_mrcr_scenario.py +79 -0
- helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
- helm/benchmark/scenarios/pubmed_qa_scenario.py +59 -43
- helm/benchmark/scenarios/quac_scenario.py +10 -1
- helm/benchmark/scenarios/race_based_med_scenario.py +152 -0
- helm/benchmark/scenarios/raft_scenario.py +17 -2
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
- helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
- helm/benchmark/scenarios/scenario.py +9 -1
- helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +7 -2
- helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +75 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +75 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +76 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +77 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +74 -0
- helm/benchmark/scenarios/shc_privacy_scenario.py +78 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +76 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +81 -0
- helm/benchmark/scenarios/shc_sei_scenario.py +94 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +77 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
- helm/benchmark/scenarios/spider_scenario.py +91 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +97 -0
- helm/benchmark/scenarios/summarization_scenario.py +11 -1
- helm/benchmark/scenarios/sumosum_scenario.py +157 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
- helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
- helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
- helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
- helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
- helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
- helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
- helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py +31 -0
- helm/benchmark/scenarios/test_math_scenario.py +1 -0
- helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
- helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
- helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
- helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
- helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
- helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
- helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +12 -2
- helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
- helm/benchmark/scenarios/unitxt_scenario.py +8 -2
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
- helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
- helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py +75 -0
- helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
- helm/benchmark/scenarios/wikifact_scenario.py +11 -1
- helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
- helm/benchmark/scenarios/wildbench_scenario.py +83 -0
- helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
- helm/benchmark/scenarios/xstest_scenario.py +1 -1
- helm/benchmark/server.py +13 -1
- helm/benchmark/slurm_runner.py +1 -1
- helm/benchmark/static/schema_audio.yaml +763 -0
- helm/benchmark/static/schema_autobencher.yaml +150 -0
- helm/benchmark/static/schema_call_center.yaml +97 -60
- helm/benchmark/static/{schema_medical.yaml → schema_capabilities.yaml} +100 -101
- helm/benchmark/static/schema_czech_bank.yaml +148 -0
- helm/benchmark/static/schema_enem_challenge.yaml +146 -0
- helm/benchmark/static/schema_enterprise.yaml +319 -0
- helm/benchmark/static/schema_finance.yaml +14 -12
- helm/benchmark/static/schema_heim.yaml +1389 -0
- helm/benchmark/static/schema_long_context.yaml +283 -0
- helm/benchmark/static/schema_medhelm.yaml +1140 -0
- helm/benchmark/static/schema_melt.yaml +1257 -0
- helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
- helm/benchmark/static/schema_safety.yaml +18 -1
- helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +30 -16
- helm/benchmark/static/schema_slphelm.yaml +162 -0
- helm/benchmark/static/schema_social_audio.yaml +224 -0
- helm/benchmark/static/schema_sql.yaml +171 -0
- helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +169 -36
- helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
- helm/benchmark/static/schema_vhelm.yaml +129 -56
- helm/benchmark/static/schema_video.yaml +219 -0
- helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
- helm/benchmark/static_build/assets/index-94295e78.js +10 -0
- helm/benchmark/static_build/assets/index-b9779128.css +1 -0
- helm/benchmark/static_build/assets/medhelm-overview-eac29843.png +0 -0
- helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png +0 -0
- helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
- helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
- helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-38a10867.js} +2 -2
- helm/benchmark/static_build/config.js +1 -1
- helm/benchmark/static_build/index.html +6 -6
- helm/benchmark/window_services/default_window_service.py +1 -1
- helm/benchmark/window_services/encoder_decoder_window_service.py +4 -4
- helm/benchmark/window_services/ice_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
- helm/benchmark/window_services/local_window_service.py +2 -2
- helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
- helm/benchmark/window_services/test_bloom_window_service.py +3 -3
- helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
- helm/benchmark/window_services/test_gptj_window_service.py +8 -3
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
- helm/benchmark/window_services/test_openai_window_service.py +8 -3
- helm/benchmark/window_services/test_opt_window_service.py +3 -3
- helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
- helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
- helm/benchmark/window_services/test_t511b_window_service.py +3 -3
- helm/benchmark/window_services/test_ul2_window_service.py +3 -3
- helm/benchmark/window_services/test_utils.py +4 -5
- helm/benchmark/window_services/test_yalm_window_service.py +3 -3
- helm/benchmark/window_services/tokenizer_service.py +7 -8
- helm/benchmark/window_services/yalm_window_service.py +1 -1
- helm/clients/ai21_client.py +3 -3
- helm/clients/aleph_alpha_client.py +1 -1
- helm/clients/anthropic_client.py +69 -29
- helm/clients/audio_language/__init__.py +0 -0
- helm/clients/audio_language/diva_llama_client.py +120 -0
- helm/clients/audio_language/llama_omni_client.py +198 -0
- helm/clients/audio_language/qwen2_5_omni_client.py +197 -0
- helm/clients/audio_language/qwen2_audiolm_client.py +190 -0
- helm/clients/audio_language/qwen_audiolm_client.py +152 -0
- helm/clients/audio_language/test.py +62 -0
- helm/clients/auto_client.py +4 -2
- helm/clients/azure_openai_client.py +55 -0
- helm/clients/bedrock_client.py +203 -7
- helm/clients/bedrock_utils.py +33 -0
- helm/clients/client.py +7 -7
- helm/clients/clip_scorers/clip_scorer.py +1 -1
- helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
- helm/clients/cohere_client.py +3 -3
- helm/clients/google_client.py +1 -1
- helm/clients/grok_client.py +36 -0
- helm/clients/http_model_client.py +1 -1
- helm/clients/huggingface_client.py +52 -21
- helm/clients/huggingface_pipeline_client.py +138 -0
- helm/clients/ibm_client.py +267 -0
- helm/clients/image_generation/adobe_vision_client.py +1 -1
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
- helm/clients/image_generation/cogview2_client.py +1 -1
- helm/clients/image_generation/dalle2_client.py +1 -1
- helm/clients/image_generation/dalle3_client.py +2 -2
- helm/clients/image_generation/dalle_mini/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/data.py +1 -1
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
- helm/clients/image_generation/dalle_mini/model/configuration.py +2 -2
- helm/clients/image_generation/dalle_mini/model/modeling.py +3 -3
- helm/clients/image_generation/dalle_mini/model/processor.py +5 -5
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
- helm/clients/image_generation/dalle_mini_client.py +1 -1
- helm/clients/image_generation/deep_floyd_client.py +1 -1
- helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
- helm/clients/image_generation/lexica_client.py +1 -1
- helm/clients/image_generation/mindalle/models/__init__.py +6 -6
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
- helm/clients/image_generation/mindalle_client.py +1 -1
- helm/clients/image_generation/together_image_generation_client.py +1 -1
- helm/clients/lit_gpt_client.py +2 -2
- helm/clients/mistral_client.py +62 -18
- helm/clients/nvidia_nim_client.py +0 -3
- helm/clients/openai_client.py +308 -43
- helm/clients/openai_responses_client.py +174 -0
- helm/clients/palmyra_client.py +3 -9
- helm/clients/reka_client.py +3 -3
- helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
- helm/clients/stanfordhealthcare_claude_client.py +31 -0
- helm/clients/stanfordhealthcare_google_client.py +43 -0
- helm/clients/stanfordhealthcare_http_model_client.py +93 -0
- helm/clients/stanfordhealthcare_openai_client.py +62 -0
- helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
- helm/clients/test_client.py +1 -1
- helm/clients/test_together_client.py +6 -1
- helm/clients/together_client.py +76 -9
- helm/clients/upstage_client.py +23 -0
- helm/clients/vertexai_client.py +45 -13
- helm/clients/vision_language/huggingface_vision2seq_client.py +6 -4
- helm/clients/vision_language/huggingface_vlm_client.py +2 -2
- helm/clients/vision_language/idefics_client.py +6 -2
- helm/clients/vision_language/open_flamingo/__init__.py +2 -2
- helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
- helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
- helm/clients/vision_language/paligemma_client.py +2 -2
- helm/clients/vision_language/qwen2_vlm_client.py +188 -0
- helm/clients/vision_language/qwen_vlm_client.py +7 -5
- helm/clients/vllm_client.py +4 -6
- helm/clients/writer_client.py +102 -0
- helm/clients/yi_client.py +0 -3
- helm/common/audio_utils.py +111 -0
- helm/common/context.py +80 -0
- helm/common/credentials_utils.py +5 -5
- helm/common/file_caches/local_file_cache.py +1 -1
- helm/common/file_caches/test_local_file_cache.py +1 -1
- helm/common/general.py +9 -2
- helm/common/hierarchical_logger.py +46 -3
- helm/common/images_utils.py +2 -2
- helm/common/local_context.py +140 -0
- helm/common/media_object.py +2 -2
- helm/common/multimodal_request_utils.py +26 -0
- helm/common/reeval_parameters.py +12 -0
- helm/common/remote_context.py +61 -0
- helm/common/request.py +14 -2
- helm/common/response_format.py +18 -0
- helm/common/test_media_object.py +1 -1
- helm/config/model_deployments.yaml +1792 -28
- helm/config/model_metadata.yaml +1606 -51
- helm/config/tokenizer_configs.yaml +521 -4
- helm/proxy/cli.py +5 -3
- helm/proxy/critique/mechanical_turk_utils.py +1 -1
- helm/proxy/example_queries.py +1 -1
- helm/proxy/server.py +11 -4
- helm/proxy/services/remote_service.py +1 -1
- helm/proxy/services/server_service.py +22 -86
- helm/proxy/services/test_remote_service.py +2 -2
- helm/proxy/services/test_service.py +1 -1
- helm/proxy/static/general.js +122 -0
- helm/proxy/static/help.html +99 -0
- helm/proxy/static/index.css +57 -0
- helm/proxy/static/index.html +40 -0
- helm/proxy/static/index.js +456 -0
- helm/proxy/static/info-icon.png +0 -0
- helm/proxy/test_retry.py +1 -1
- helm/proxy/token_counters/auto_token_counter.py +1 -1
- helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
- helm/tokenizers/caching_tokenizer.py +2 -30
- helm/tokenizers/grok_tokenizer.py +53 -0
- helm/tokenizers/http_model_tokenizer.py +1 -1
- helm/tokenizers/huggingface_tokenizer.py +3 -3
- helm/tokenizers/lit_gpt_tokenizer.py +1 -1
- helm/tokenizers/test_anthropic_tokenizer.py +6 -2
- helm/tokenizers/test_grok_tokenizer.py +33 -0
- helm/tokenizers/test_huggingface_tokenizer.py +1 -1
- helm/tokenizers/test_yalm_tokenizer.py +1 -1
- helm/tokenizers/tiktoken_tokenizer.py +1 -1
- helm/tokenizers/tokenizer.py +3 -1
- helm/tokenizers/yalm_tokenizer.py +3 -3
- helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
- crfm_helm-0.5.4.dist-info/METADATA +0 -350
- crfm_helm-0.5.4.dist-info/RECORD +0 -697
- helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
- helm/benchmark/static_build/assets/index-3ee38b3d.js +0 -10
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/tokenizers/anthropic_tokenizer.py +0 -52
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info/licenses}/LICENSE +0 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from threading import Lock
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
import transformers
|
|
5
|
+
|
|
6
|
+
from helm.clients.client import CachingClient
|
|
7
|
+
from helm.common.cache import CacheConfig
|
|
8
|
+
from helm.common.hierarchical_logger import htrack_block, hwarn
|
|
9
|
+
from helm.common.request import GeneratedOutput, Request, RequestResult, wrap_request_time
|
|
10
|
+
from helm.proxy.retry import NonRetriableException
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
_pipelines: Dict[str, transformers.Pipeline] = {}
|
|
14
|
+
_pipelines_lock: Lock = Lock()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _get_pipeline(
|
|
18
|
+
helm_model_name: str,
|
|
19
|
+
pipeline_kwargs: Dict[str, Any],
|
|
20
|
+
) -> Any:
|
|
21
|
+
"""
|
|
22
|
+
Checks if the desired HuggingFaceModel is cached. Creates the HuggingFaceModel if it's not cached.
|
|
23
|
+
Returns the HuggingFaceModel.
|
|
24
|
+
"""
|
|
25
|
+
global _pipelines
|
|
26
|
+
global _pipelines_lock
|
|
27
|
+
with _pipelines_lock:
|
|
28
|
+
if helm_model_name not in _pipelines:
|
|
29
|
+
huggingface_model_name = pipeline_kwargs["model"]
|
|
30
|
+
with htrack_block(
|
|
31
|
+
f"Loading HuggingFace model {huggingface_model_name} (kwargs={pipeline_kwargs}) "
|
|
32
|
+
f"for HELM model {helm_model_name} with transformers.pipeline"
|
|
33
|
+
):
|
|
34
|
+
_pipelines[helm_model_name] = transformers.pipeline(**pipeline_kwargs)
|
|
35
|
+
|
|
36
|
+
return _pipelines[helm_model_name]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class HuggingFacePipelineClient(CachingClient):
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
cache_config: CacheConfig,
|
|
43
|
+
model_name: str,
|
|
44
|
+
pretrained_model_name_or_path: Optional[str] = None,
|
|
45
|
+
apply_chat_template: Optional[bool] = None,
|
|
46
|
+
**kwargs,
|
|
47
|
+
):
|
|
48
|
+
# Include `pretrained_model_name_or_path` parameter so that model deployments can use
|
|
49
|
+
# the `pretrained_model_name_or_path` arg to override `model_name`
|
|
50
|
+
super().__init__(cache_config=cache_config)
|
|
51
|
+
self._helm_model_name = model_name
|
|
52
|
+
self._pipeline_kwargs = {
|
|
53
|
+
"model": pretrained_model_name_or_path or self._helm_model_name,
|
|
54
|
+
"task": "text-generation",
|
|
55
|
+
**kwargs,
|
|
56
|
+
}
|
|
57
|
+
self._pipeline = _get_pipeline(self._helm_model_name, self._pipeline_kwargs)
|
|
58
|
+
if apply_chat_template is not None:
|
|
59
|
+
self._apply_chat_template = apply_chat_template
|
|
60
|
+
else:
|
|
61
|
+
# If the user did not explicitly configure whether the model is a chat model with `apply_chat_template` arg,
|
|
62
|
+
# auto-infer if the model is a chat model based on whether the tokenizer has a chat template.
|
|
63
|
+
# Note: Auto-inference is incorrect for some non-chat models that still have chat templates
|
|
64
|
+
# e.g. Qwen2, Qwen 2.5.
|
|
65
|
+
# For these models, the `apply_chat_template` arg should be explicitly set to false.
|
|
66
|
+
self._apply_chat_template = bool(self._pipeline.tokenizer.chat_template)
|
|
67
|
+
hwarn(
|
|
68
|
+
f"Automatically set `apply_chat_template` to {self._apply_chat_template} based on "
|
|
69
|
+
"whether the tokenizer has a chat template. "
|
|
70
|
+
"If this is incorrect, please explicitly set `apply_chat_template`."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def make_text_inputs(self, request: Request) -> Union[str, List[Dict[str, str]]]:
|
|
74
|
+
if request.prompt and request.messages:
|
|
75
|
+
raise NonRetriableException(f"More than one of `prompt` and `messages` was set in request: {request}")
|
|
76
|
+
# Chat model expects a list of messages as input
|
|
77
|
+
if self._apply_chat_template:
|
|
78
|
+
if request.messages:
|
|
79
|
+
return request.messages
|
|
80
|
+
else:
|
|
81
|
+
return [{"role": "user", "content": request.prompt}]
|
|
82
|
+
# Base non-chat model expects a string as input
|
|
83
|
+
else:
|
|
84
|
+
if request.messages:
|
|
85
|
+
raise NonRetriableException("Chat mesages not supported by non-chat model")
|
|
86
|
+
else:
|
|
87
|
+
return request.prompt
|
|
88
|
+
|
|
89
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
90
|
+
"""Make a request"""
|
|
91
|
+
if request.model != self._helm_model_name:
|
|
92
|
+
raise NonRetriableException(
|
|
93
|
+
f"This instance of HuggingFacePipelineClient has loaded model {self._helm_model_name} but the request was for model {request.model}" # noqa: E501
|
|
94
|
+
)
|
|
95
|
+
completions: List[GeneratedOutput] = []
|
|
96
|
+
do_sample = request.temperature > 0.0
|
|
97
|
+
raw_request = {
|
|
98
|
+
"text_inputs": self.make_text_inputs(request),
|
|
99
|
+
"return_full_text": request.echo_prompt,
|
|
100
|
+
"temperature": request.temperature if do_sample else None,
|
|
101
|
+
"num_return_sequences": request.num_completions,
|
|
102
|
+
"max_new_tokens": request.max_tokens,
|
|
103
|
+
"top_p": request.top_p,
|
|
104
|
+
"top_k": request.top_k_per_token if do_sample else None,
|
|
105
|
+
"do_sample": do_sample,
|
|
106
|
+
"return_dict_in_generate": True,
|
|
107
|
+
}
|
|
108
|
+
if request.stop_sequences:
|
|
109
|
+
stop_sequence_ids = self._pipeline.tokenizer(
|
|
110
|
+
request.stop_sequences, return_token_type_ids=False, add_special_tokens=False
|
|
111
|
+
)
|
|
112
|
+
if len(stop_sequence_ids.input_ids) == 1 and len(stop_sequence_ids.input_ids[0]) == 1:
|
|
113
|
+
raw_request["eos_token_id"] = stop_sequence_ids.input_ids[0][0]
|
|
114
|
+
else:
|
|
115
|
+
raise NonRetriableException(
|
|
116
|
+
"Multiple stop sequences and stop sequences of multiple tokens, are not yet supported by HuggingFacePipelineClient" # noqa: E501
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def do_it() -> Dict[str, Any]:
|
|
120
|
+
pipeline_outputs = self._pipeline(**raw_request)
|
|
121
|
+
return {"outputs": pipeline_outputs}
|
|
122
|
+
|
|
123
|
+
cache_key = CachingClient.make_cache_key(
|
|
124
|
+
{"pipeline_kwargs": self._pipeline_kwargs, **raw_request},
|
|
125
|
+
request,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
129
|
+
for raw_output in response["outputs"]:
|
|
130
|
+
completions.append(GeneratedOutput(text=raw_output["generated_text"], logprob=0, tokens=[]))
|
|
131
|
+
return RequestResult(
|
|
132
|
+
success=True,
|
|
133
|
+
cached=cached,
|
|
134
|
+
request_time=response["request_time"],
|
|
135
|
+
request_datetime=response["request_datetime"],
|
|
136
|
+
completions=completions,
|
|
137
|
+
embedding=[],
|
|
138
|
+
)
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
|
|
4
|
+
from helm.common.hierarchical_logger import hlog
|
|
5
|
+
from helm.common.cache import CacheConfig
|
|
6
|
+
from helm.common.request import (
|
|
7
|
+
Request,
|
|
8
|
+
RequestResult,
|
|
9
|
+
Token,
|
|
10
|
+
wrap_request_time,
|
|
11
|
+
EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
|
|
12
|
+
GeneratedOutput,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from helm.clients.client import CachingClient
|
|
16
|
+
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
17
|
+
from typing import TypeVar, Generic
|
|
18
|
+
from typing import Any, Dict, List
|
|
19
|
+
from threading import Semaphore, Lock
|
|
20
|
+
import threading
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from ibm_watsonx_ai import Credentials
|
|
24
|
+
from ibm_watsonx_ai.foundation_models import ModelInference
|
|
25
|
+
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
|
|
26
|
+
from ibm_watsonx_ai.foundation_models.schema import (
|
|
27
|
+
TextChatParameters,
|
|
28
|
+
TextGenParameters,
|
|
29
|
+
ReturnOptionProperties,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
except ModuleNotFoundError as e:
|
|
33
|
+
handle_module_not_found_error(e, ["ibm"])
|
|
34
|
+
|
|
35
|
+
# Define the maximum number of parallel executions is limited by IBM API
|
|
36
|
+
MAX_CONCURRENT_REQUESTS = 8
|
|
37
|
+
__semaphores: Dict[str, Semaphore] = dict()
|
|
38
|
+
__semaphores_lock = Lock()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_semaphore(model: str) -> Semaphore:
|
|
42
|
+
with __semaphores_lock:
|
|
43
|
+
if model not in __semaphores:
|
|
44
|
+
__semaphores[model] = threading.Semaphore(MAX_CONCURRENT_REQUESTS)
|
|
45
|
+
|
|
46
|
+
return __semaphores[model]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
T = TypeVar("T", TextGenParameters, TextChatParameters)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ModelInferenceHandler(ABC, Generic[T]):
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def __init__(self, inference_engine: ModelInference):
|
|
55
|
+
"""
|
|
56
|
+
:type inference_engine: object
|
|
57
|
+
"""
|
|
58
|
+
self.inference_engine = inference_engine
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def serve_request(self, prompt: str, params: T) -> Dict:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
def parse_response(self, response: dict) -> List[GeneratedOutput]:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
def create_params(self, request: Request) -> T:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GenerateInferenceHandler(ModelInferenceHandler[TextGenParameters]):
|
|
74
|
+
|
|
75
|
+
def __init__(self, inference_engine: ModelInference):
|
|
76
|
+
self.inference_engine = inference_engine
|
|
77
|
+
|
|
78
|
+
def create_params(self, request: Request) -> TextGenParameters:
|
|
79
|
+
def set_temperature_requirements():
|
|
80
|
+
# Default temperature 0.05 required by ibm/granite-13b-instruct-v2
|
|
81
|
+
if self.inference_engine.model_id == "ibm/granite-13b-instruct-v2":
|
|
82
|
+
return 0.05
|
|
83
|
+
return 1e-7 if request.temperature == 0 else request.temperature
|
|
84
|
+
|
|
85
|
+
return TextGenParameters(
|
|
86
|
+
temperature=set_temperature_requirements(),
|
|
87
|
+
top_p=request.top_p,
|
|
88
|
+
max_new_tokens=request.max_tokens,
|
|
89
|
+
return_options=ReturnOptionProperties(
|
|
90
|
+
input_text=True,
|
|
91
|
+
generated_tokens=True,
|
|
92
|
+
input_tokens=False,
|
|
93
|
+
token_logprobs=True,
|
|
94
|
+
token_ranks=False,
|
|
95
|
+
),
|
|
96
|
+
include_stop_sequence=False,
|
|
97
|
+
prompt_variables=None,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def serve_request(self, prompt: str, params: TextGenParameters) -> Dict:
|
|
101
|
+
semaphore = _get_semaphore(self.inference_engine.model_id)
|
|
102
|
+
|
|
103
|
+
with semaphore:
|
|
104
|
+
response = self.inference_engine.generate(
|
|
105
|
+
prompt=prompt,
|
|
106
|
+
params=params,
|
|
107
|
+
)
|
|
108
|
+
return response
|
|
109
|
+
|
|
110
|
+
def parse_response(self, response: dict) -> List[GeneratedOutput]:
|
|
111
|
+
completions = []
|
|
112
|
+
try:
|
|
113
|
+
for r in response["results"]:
|
|
114
|
+
sequence_logprob: float = 0
|
|
115
|
+
tokens: List[Token] = []
|
|
116
|
+
generated_text = r["generated_text"]
|
|
117
|
+
for token_and_logprob in r["generated_tokens"]:
|
|
118
|
+
logprob = token_and_logprob.get("logprob", 0)
|
|
119
|
+
text = token_and_logprob["text"]
|
|
120
|
+
tokens.append(Token(text=text, logprob=logprob))
|
|
121
|
+
sequence_logprob += logprob
|
|
122
|
+
|
|
123
|
+
completion = GeneratedOutput(text=generated_text, logprob=sequence_logprob, tokens=tokens)
|
|
124
|
+
completions.append(completion)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
hlog(f"GenerateInferenceHandler failed with exception {e} during parse_response {response}")
|
|
127
|
+
return completions
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ChatModelInferenceHandler(ModelInferenceHandler[TextChatParameters]):
|
|
131
|
+
def __init__(self, inference_engine: ModelInference):
|
|
132
|
+
self.inference_engine = inference_engine
|
|
133
|
+
|
|
134
|
+
def create_params(self, request: Request) -> TextChatParameters:
|
|
135
|
+
return TextChatParameters(
|
|
136
|
+
logprobs=True,
|
|
137
|
+
presence_penalty=0,
|
|
138
|
+
frequency_penalty=0,
|
|
139
|
+
temperature=request.temperature,
|
|
140
|
+
max_tokens=request.max_tokens,
|
|
141
|
+
top_p=request.top_p,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def parse_response(self, response: dict) -> List[GeneratedOutput]:
|
|
145
|
+
completions = []
|
|
146
|
+
try:
|
|
147
|
+
for raw_completion in response["choices"]:
|
|
148
|
+
sequence_logprob: float = 0
|
|
149
|
+
tokens: List[Token] = []
|
|
150
|
+
generated_text = raw_completion["message"]["content"]
|
|
151
|
+
|
|
152
|
+
for token_and_logprob in raw_completion["logprobs"]["content"]:
|
|
153
|
+
logprob = token_and_logprob["logprob"]
|
|
154
|
+
text = token_and_logprob["token"]
|
|
155
|
+
tokens.append(Token(text=text, logprob=logprob))
|
|
156
|
+
sequence_logprob += logprob
|
|
157
|
+
|
|
158
|
+
completion = GeneratedOutput(text=generated_text, logprob=sequence_logprob, tokens=tokens)
|
|
159
|
+
completions.append(completion)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
hlog(f"ChatModelInferenceHandler failed with exception {e} during parse_response {response}")
|
|
162
|
+
return completions
|
|
163
|
+
|
|
164
|
+
def serve_request(self, prompt: str, params: TextChatParameters) -> Dict:
|
|
165
|
+
semaphore = _get_semaphore(self.inference_engine.model_id)
|
|
166
|
+
|
|
167
|
+
with semaphore:
|
|
168
|
+
response = self.inference_engine.chat(
|
|
169
|
+
messages=[{"role": "user", "content": prompt}],
|
|
170
|
+
params=params,
|
|
171
|
+
)
|
|
172
|
+
return response
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class IbmClient(CachingClient, ABC):
|
|
176
|
+
def __init__(
|
|
177
|
+
self,
|
|
178
|
+
cache_config: CacheConfig,
|
|
179
|
+
api_key: str,
|
|
180
|
+
region: str,
|
|
181
|
+
location: dict,
|
|
182
|
+
watsonx_model_name: str,
|
|
183
|
+
**kwargs,
|
|
184
|
+
):
|
|
185
|
+
super().__init__(cache_config=cache_config)
|
|
186
|
+
self.project_id = None
|
|
187
|
+
self.url = None
|
|
188
|
+
self.watsonx_model_name = watsonx_model_name
|
|
189
|
+
self.api_key = api_key
|
|
190
|
+
self.region = region
|
|
191
|
+
self.kwargs = kwargs
|
|
192
|
+
for entry in location:
|
|
193
|
+
if entry["region"].lower() == self.region.lower():
|
|
194
|
+
self.project_id = entry["project_id"]
|
|
195
|
+
self.url = entry["url"]
|
|
196
|
+
|
|
197
|
+
assert self.project_id is not None, (
|
|
198
|
+
"Missed project_id for specified region configuration in credentials.conf, should be in list "
|
|
199
|
+
"of JSON objects with 'region', 'url', 'project_id' per region"
|
|
200
|
+
)
|
|
201
|
+
assert self.url is not None, (
|
|
202
|
+
"Missed url for specified region configuration in credentials.conf, should be in list "
|
|
203
|
+
"of JSON objects with 'region', 'url', 'project_id' per region"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
self.inference_engine = ModelInference(
|
|
207
|
+
model_id=self.watsonx_model_name,
|
|
208
|
+
params={GenParams.MAX_NEW_TOKENS: 2000},
|
|
209
|
+
credentials=Credentials(api_key=api_key, url=self.url),
|
|
210
|
+
project_id=self.project_id,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
hlog("Started IBM Client")
|
|
214
|
+
|
|
215
|
+
@abstractmethod
|
|
216
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
def do_call(self, inference_handler: ModelInferenceHandler, request: Request) -> RequestResult:
|
|
220
|
+
params = inference_handler.create_params(request=request)
|
|
221
|
+
|
|
222
|
+
def do_it() -> Dict[str, Any]:
|
|
223
|
+
return inference_handler.serve_request(prompt=request.prompt, params=params)
|
|
224
|
+
|
|
225
|
+
raw_request = {"prompt": request.prompt, "params": params.to_dict(), "model": request.model}
|
|
226
|
+
|
|
227
|
+
cache_key = CachingClient.make_cache_key(raw_request, request)
|
|
228
|
+
response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
229
|
+
completions = inference_handler.parse_response(response)
|
|
230
|
+
return RequestResult(
|
|
231
|
+
success=True,
|
|
232
|
+
cached=cached,
|
|
233
|
+
request_time=response["request_time"],
|
|
234
|
+
request_datetime=response.get("request_datetime"),
|
|
235
|
+
completions=completions,
|
|
236
|
+
embedding=[],
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class IbmChatClient(IbmClient):
|
|
241
|
+
|
|
242
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
243
|
+
# Embedding not supported for this model
|
|
244
|
+
if request.embedding:
|
|
245
|
+
return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
|
|
246
|
+
try:
|
|
247
|
+
return self.do_call(
|
|
248
|
+
inference_handler=ChatModelInferenceHandler(inference_engine=self.inference_engine), request=request
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
except Exception as e:
|
|
252
|
+
error: str = f"IBM Chat client Model error: {e}"
|
|
253
|
+
return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class IbmTextClient(IbmClient):
|
|
257
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
258
|
+
# Embedding not supported for this model
|
|
259
|
+
if request.embedding:
|
|
260
|
+
return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
|
|
261
|
+
try:
|
|
262
|
+
return self.do_call(
|
|
263
|
+
inference_handler=GenerateInferenceHandler(inference_engine=self.inference_engine), request=request
|
|
264
|
+
)
|
|
265
|
+
except Exception as e:
|
|
266
|
+
error: str = f"IBM Text client Model error: {e}"
|
|
267
|
+
return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
DecodeRequestResult,
|
|
10
10
|
)
|
|
11
11
|
from helm.clients.client import Client, CachingClient
|
|
12
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
12
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class AdobeVisionClient(Client):
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
DecodeRequestResult,
|
|
10
10
|
)
|
|
11
11
|
from helm.clients.client import Client, CachingClient
|
|
12
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
12
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class AlephAlphaImageGenerationClient(Client):
|
|
@@ -6,9 +6,9 @@
|
|
|
6
6
|
@Contact : dm18@mails.tsinghua.edu.cn
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from .direct_sr import DirectSuperResolution
|
|
10
|
-
from .iterative_sr import IterativeSuperResolution
|
|
11
|
-
from .sr_group import SRGroup
|
|
9
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.direct_sr import DirectSuperResolution
|
|
10
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.iterative_sr import IterativeSuperResolution
|
|
11
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.sr_group import SRGroup
|
|
12
12
|
|
|
13
13
|
DirectSuperResolution
|
|
14
14
|
IterativeSuperResolution
|
|
@@ -10,8 +10,11 @@
|
|
|
10
10
|
import torch
|
|
11
11
|
from icetk import icetk as tokenizer
|
|
12
12
|
|
|
13
|
-
from .dsr_sampling import
|
|
14
|
-
|
|
13
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.dsr_sampling import (
|
|
14
|
+
filling_sequence_dsr,
|
|
15
|
+
IterativeEntfilterStrategy,
|
|
16
|
+
)
|
|
17
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.dsr_model import DsrModel
|
|
15
18
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
16
19
|
|
|
17
20
|
|
|
@@ -8,8 +8,11 @@
|
|
|
8
8
|
import torch
|
|
9
9
|
from icetk import icetk as tokenizer
|
|
10
10
|
|
|
11
|
-
from .itersr_sampling import
|
|
12
|
-
|
|
11
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.itersr_sampling import (
|
|
12
|
+
filling_sequence_itersr,
|
|
13
|
+
IterativeEntfilterStrategy,
|
|
14
|
+
)
|
|
15
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.itersr_model import ItersrModel
|
|
13
16
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
14
17
|
|
|
15
18
|
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
# here put the import lib
|
|
10
|
-
from .direct_sr import DirectSuperResolution
|
|
11
|
-
from .iterative_sr import IterativeSuperResolution
|
|
10
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.direct_sr import DirectSuperResolution
|
|
11
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.iterative_sr import IterativeSuperResolution
|
|
12
12
|
|
|
13
13
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
14
14
|
|
|
@@ -20,7 +20,7 @@ from helm.common.tokenization_request import (
|
|
|
20
20
|
)
|
|
21
21
|
from helm.clients.client import Client, CachingClient
|
|
22
22
|
from helm.clients.image_generation.cogview2.coglm_strategy import CoglmStrategy
|
|
23
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
23
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class CogView2Client(Client):
|
|
@@ -15,7 +15,7 @@ from helm.common.tokenization_request import (
|
|
|
15
15
|
)
|
|
16
16
|
from helm.clients.moderation_api_client import ModerationAPIClient
|
|
17
17
|
from helm.clients.client import Client, CachingClient
|
|
18
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
18
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
19
19
|
|
|
20
20
|
try:
|
|
21
21
|
import openai
|
|
@@ -7,8 +7,8 @@ from helm.common.optional_dependencies import handle_module_not_found_error
|
|
|
7
7
|
from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
|
|
8
8
|
from helm.clients.moderation_api_client import ModerationAPIClient
|
|
9
9
|
from helm.clients.client import CachingClient
|
|
10
|
-
from .dalle2_client import DALLE2Client
|
|
11
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
10
|
+
from helm.clients.image_generation.dalle2_client import DALLE2Client
|
|
11
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
import openai
|
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
from datasets import Dataset, load_dataset
|
|
8
8
|
|
|
9
|
-
from .model.text import TextNormalizer
|
|
9
|
+
from helm.clients.image_generation.dalle_mini.model.text import TextNormalizer
|
|
10
10
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
11
11
|
|
|
12
12
|
try:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from .configuration import DalleBartConfig
|
|
2
|
-
from .modeling import DalleBart
|
|
3
|
-
from .partitions import set_partitions
|
|
4
|
-
from .processor import DalleBartProcessor
|
|
5
|
-
from .tokenizer import DalleBartTokenizer
|
|
1
|
+
from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
|
|
2
|
+
from helm.clients.image_generation.dalle_mini.model.modeling import DalleBart
|
|
3
|
+
from helm.clients.image_generation.dalle_mini.model.partitions import set_partitions
|
|
4
|
+
from helm.clients.image_generation.dalle_mini.model.processor import DalleBartProcessor
|
|
5
|
+
from helm.clients.image_generation.dalle_mini.model.tokenizer import DalleBartTokenizer
|
|
@@ -12,13 +12,13 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
"""
|
|
15
|
+
"""DalleBart model configuration"""
|
|
16
16
|
import warnings
|
|
17
17
|
|
|
18
18
|
from transformers.configuration_utils import PretrainedConfig
|
|
19
19
|
from transformers.utils import logging
|
|
20
20
|
|
|
21
|
-
from .utils import PretrainedFromWandbMixin
|
|
21
|
+
from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
|
|
22
22
|
|
|
23
23
|
logger = logging.get_logger(__name__)
|
|
24
24
|
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
"""
|
|
15
|
+
"""DalleBart model."""
|
|
16
16
|
|
|
17
17
|
import math
|
|
18
18
|
from functools import partial
|
|
@@ -35,8 +35,8 @@ from transformers.utils import ModelOutput, logging
|
|
|
35
35
|
from transformers.generation.configuration_utils import GenerationConfig
|
|
36
36
|
|
|
37
37
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
38
|
-
from .configuration import DalleBartConfig
|
|
39
|
-
from .utils import PretrainedFromWandbMixin
|
|
38
|
+
from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
|
|
39
|
+
from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
|
|
40
40
|
|
|
41
41
|
try:
|
|
42
42
|
import flax
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""DalleBart processor"""
|
|
2
2
|
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
-
from .configuration import DalleBartConfig
|
|
6
|
-
from .text import TextNormalizer
|
|
7
|
-
from .tokenizer import DalleBartTokenizer
|
|
8
|
-
from .utils import PretrainedFromWandbMixin
|
|
5
|
+
from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
|
|
6
|
+
from helm.clients.image_generation.dalle_mini.model.text import TextNormalizer
|
|
7
|
+
from helm.clients.image_generation.dalle_mini.model.tokenizer import DalleBartTokenizer
|
|
8
|
+
from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
|
|
9
9
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
10
10
|
|
|
11
11
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""DalleBart tokenizer"""
|
|
2
2
|
|
|
3
3
|
from transformers import BartTokenizerFast
|
|
4
4
|
|
|
5
|
-
from .utils import PretrainedFromWandbMixin
|
|
5
|
+
from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class DalleBartTokenizer(PretrainedFromWandbMixin, BartTokenizerFast):
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from . import *
|
|
1
|
+
from helm.clients.image_generation.dalle_mini.vqgan_jax import *
|
|
@@ -2,8 +2,8 @@ import re
|
|
|
2
2
|
|
|
3
3
|
import torch
|
|
4
4
|
|
|
5
|
-
from .modeling_flax_vqgan import VQModel
|
|
6
|
-
from .configuration_vqgan import VQGANConfig
|
|
5
|
+
from helm.clients.image_generation.dalle_mini.vqgan_jax.modeling_flax_vqgan import VQModel
|
|
6
|
+
from helm.clients.image_generation.dalle_mini.vqgan_jax.configuration_vqgan import VQGANConfig
|
|
7
7
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
8
8
|
|
|
9
9
|
try:
|
|
@@ -6,7 +6,7 @@ import math
|
|
|
6
6
|
|
|
7
7
|
from transformers.modeling_flax_utils import FlaxPreTrainedModel
|
|
8
8
|
|
|
9
|
-
from .configuration_vqgan import VQGANConfig
|
|
9
|
+
from helm.clients.image_generation.dalle_mini.vqgan_jax.configuration_vqgan import VQGANConfig
|
|
10
10
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
11
11
|
|
|
12
12
|
try:
|
|
@@ -15,7 +15,7 @@ from helm.common.tokenization_request import (
|
|
|
15
15
|
TokenizationRequestResult,
|
|
16
16
|
)
|
|
17
17
|
from helm.clients.client import Client, CachingClient
|
|
18
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
18
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class DALLEMiniClient(Client):
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
DecodeRequestResult,
|
|
10
10
|
)
|
|
11
11
|
from helm.clients.client import Client, CachingClient
|
|
12
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
12
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class DeepFloydClient(Client):
|
|
@@ -17,7 +17,7 @@ from helm.common.tokenization_request import (
|
|
|
17
17
|
TokenizationRequestResult,
|
|
18
18
|
)
|
|
19
19
|
from helm.clients.client import Client, CachingClient
|
|
20
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
20
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
_models_lock: Lock = Lock()
|