crfm-helm 0.5.6__py3-none-any.whl → 0.5.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/METADATA +72 -130
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/RECORD +372 -305
- helm/benchmark/adaptation/adapter_spec.py +10 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +11 -3
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +11 -8
- helm/benchmark/annotation/aci_bench_annotator.py +11 -22
- helm/benchmark/annotation/air_bench_annotator.py +1 -1
- helm/benchmark/annotation/alrage_annotator.py +90 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +10 -21
- helm/benchmark/annotation/dischargeme_annotator.py +11 -22
- helm/benchmark/annotation/live_qa_annotator.py +1 -1
- helm/benchmark/annotation/med_dialog_annotator.py +11 -22
- helm/benchmark/annotation/medalign_annotator.py +11 -22
- helm/benchmark/annotation/medi_qa_annotator.py +11 -22
- helm/benchmark/annotation/medication_qa_annotator.py +11 -22
- helm/benchmark/annotation/mental_health_annotator.py +11 -22
- helm/benchmark/annotation/mimic_bhc_annotator.py +11 -22
- helm/benchmark/annotation/mimic_rrs_annotator.py +11 -22
- helm/benchmark/annotation/model_as_judge.py +23 -18
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +11 -22
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +11 -22
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +11 -22
- helm/benchmark/metrics/air_bench_metrics.py +3157 -1
- helm/benchmark/metrics/alrage_metric.py +35 -0
- helm/benchmark/metrics/basic_metrics.py +267 -2
- helm/benchmark/metrics/bbq_metrics.py +12 -0
- helm/benchmark/metrics/classification_metrics.py +19 -1
- helm/benchmark/metrics/codeinsights_code_efficiency_metrics.py +186 -0
- helm/benchmark/metrics/codeinsights_code_evaluation_metrics.py +477 -0
- helm/benchmark/metrics/codeinsights_correct_code_metrics.py +366 -0
- helm/benchmark/metrics/codeinsights_edge_case_metrics.py +92 -0
- helm/benchmark/metrics/codeinsights_metric_specs.py +51 -0
- helm/benchmark/metrics/comet_metric.py +1 -1
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +12 -1
- helm/benchmark/metrics/copyright_metrics.py +1 -1
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +1 -1
- helm/benchmark/metrics/dry_run_metrics.py +30 -1
- helm/benchmark/metrics/efficiency_metrics.py +74 -0
- helm/benchmark/metrics/ehr_sql_metrics.py +57 -1
- helm/benchmark/metrics/evaluate_reference_metrics.py +312 -1
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +13 -1
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +13 -1
- helm/benchmark/metrics/ifeval_metrics.py +13 -1
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +13 -2
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +41 -1
- helm/benchmark/metrics/kpi_edgar_metrics.py +21 -0
- helm/benchmark/metrics/language_modeling_metrics.py +13 -1
- helm/benchmark/metrics/live_qa_metrics.py +13 -1
- helm/benchmark/metrics/llm_jury_metrics.py +13 -1
- helm/benchmark/metrics/lmkt_metric_specs.py +12 -0
- helm/benchmark/metrics/lmkt_metrics.py +47 -0
- helm/benchmark/metrics/medcalc_bench_metrics.py +14 -1
- helm/benchmark/metrics/medec_metrics.py +25 -2
- helm/benchmark/metrics/melt_toxicity_metric.py +1 -1
- helm/benchmark/metrics/metric.py +25 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +32 -1
- helm/benchmark/metrics/omni_math_metrics.py +13 -1
- helm/benchmark/metrics/safety_metrics.py +13 -1
- helm/benchmark/metrics/seahelm_metrics.py +14 -1
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/summarization_metrics.py +129 -1
- helm/benchmark/metrics/toxicity_metrics.py +31 -1
- helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
- helm/benchmark/metrics/wildbench_metrics.py +21 -1
- helm/benchmark/model_deployment_registry.py +11 -19
- helm/benchmark/presentation/create_plots.py +11 -2
- helm/benchmark/presentation/run_display.py +13 -3
- helm/benchmark/presentation/run_entry.py +2 -2
- helm/benchmark/presentation/schema.py +10 -22
- helm/benchmark/presentation/summarize.py +189 -14
- helm/benchmark/presentation/taxonomy_info.py +20 -0
- helm/benchmark/presentation/test_create_plots.py +4 -1
- helm/benchmark/run.py +15 -4
- helm/benchmark/run_expander.py +4 -0
- helm/benchmark/run_specs/arabic_run_specs.py +197 -0
- helm/benchmark/run_specs/bluex_run_specs.py +40 -0
- helm/benchmark/run_specs/classic_run_specs.py +2 -55
- helm/benchmark/run_specs/codeinsights_run_specs.py +192 -0
- helm/benchmark/run_specs/healthqa_br_run_specs.py +40 -0
- helm/benchmark/run_specs/heim_run_specs.py +3 -1
- helm/benchmark/run_specs/lmkt_run_specs.py +144 -0
- helm/benchmark/run_specs/long_context_run_specs.py +48 -1
- helm/benchmark/run_specs/medhelm/__init__.py +0 -0
- helm/benchmark/run_specs/medhelm/benchmark_config.py +219 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +363 -53
- helm/benchmark/run_specs/multilingual_run_specs.py +50 -0
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +11 -13
- helm/benchmark/runner.py +7 -0
- helm/benchmark/scenarios/aci_bench_scenario.py +23 -0
- helm/benchmark/scenarios/air_bench_scenario.py +21 -0
- helm/benchmark/scenarios/alghafa_scenario.py +126 -0
- helm/benchmark/scenarios/alrage_scenario.py +54 -0
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +23 -1
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
- helm/benchmark/scenarios/arabic_exams_scenario.py +114 -0
- helm/benchmark/scenarios/arabic_mmlu_scenario.py +82 -0
- helm/benchmark/scenarios/aratrust_scenario.py +95 -0
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +74 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +70 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -53
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -21
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -52
- helm/benchmark/scenarios/babi_qa_scenario.py +15 -0
- helm/benchmark/scenarios/banking77_scenario.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +15 -0
- helm/benchmark/scenarios/best_chatgpt_prompts.yaml +473 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
- helm/benchmark/scenarios/bluex_scenario.py +70 -0
- helm/benchmark/scenarios/bold_scenario.py +15 -0
- helm/benchmark/scenarios/boolq_scenario.py +20 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +23 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +13 -0
- helm/benchmark/scenarios/clear_scenario.py +23 -0
- helm/benchmark/scenarios/cleva_scenario.py +480 -1
- helm/benchmark/scenarios/code_scenario.py +28 -0
- helm/benchmark/scenarios/codeinsights_code_efficiency_scenario.py +197 -0
- helm/benchmark/scenarios/codeinsights_correct_code_scenario.py +78 -0
- helm/benchmark/scenarios/codeinsights_edge_case_scenario.py +192 -0
- helm/benchmark/scenarios/codeinsights_student_coding_scenario.py +162 -0
- helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py +188 -0
- helm/benchmark/scenarios/commonsense_scenario.py +32 -0
- helm/benchmark/scenarios/compositional_instructions.yaml +70 -0
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +21 -0
- helm/benchmark/scenarios/copyright_scenario.py +35 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +21 -0
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +23 -1
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +21 -1
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +13 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +13 -1
- helm/benchmark/scenarios/dischargeme_scenario.py +24 -0
- helm/benchmark/scenarios/disinformation_scenario.py +22 -0
- helm/benchmark/scenarios/dyck_language_scenario.py +15 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +22 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +19 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +14 -0
- helm/benchmark/scenarios/entity_matching_scenario.py +14 -0
- helm/benchmark/scenarios/exams_multilingual_scenario.py +115 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
- helm/benchmark/scenarios/financebench_scenario.py +21 -0
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +21 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +21 -0
- helm/benchmark/scenarios/gpqa_scenario.py +18 -0
- helm/benchmark/scenarios/grammar_scenario.py +20 -1
- helm/benchmark/scenarios/gsm_scenario.py +21 -0
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
- helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
- helm/benchmark/scenarios/headqa_scenario.py +22 -0
- helm/benchmark/scenarios/healthqa_br_scenario.py +80 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +13 -0
- helm/benchmark/scenarios/ice_scenario.py +21 -1
- helm/benchmark/scenarios/ifeval_scenario.py +18 -0
- helm/benchmark/scenarios/imdb_scenario.py +15 -0
- helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +111 -0
- helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +1 -1
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
- helm/benchmark/scenarios/koala_scenario.py +21 -1
- helm/benchmark/scenarios/kpi_edgar_scenario.py +21 -0
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +20 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +50 -0
- helm/benchmark/scenarios/legal_support_scenario.py +13 -0
- helm/benchmark/scenarios/legalbench_scenario.py +19 -0
- helm/benchmark/scenarios/lex_glue_scenario.py +11 -0
- helm/benchmark/scenarios/lextreme_scenario.py +11 -0
- helm/benchmark/scenarios/lmkt_scenarios.py +288 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +14 -0
- helm/benchmark/scenarios/madinah_qa_scenario.py +73 -0
- helm/benchmark/scenarios/math_scenario.py +54 -20
- helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py +68 -0
- helm/benchmark/scenarios/med_dialog_scenario.py +32 -1
- helm/benchmark/scenarios/med_mcqa_scenario.py +14 -0
- helm/benchmark/scenarios/med_qa_scenario.py +20 -0
- helm/benchmark/scenarios/medalign_scenario.py +23 -0
- helm/benchmark/scenarios/medalign_scenario_helper.py +19 -125
- helm/benchmark/scenarios/medbullets_scenario.py +22 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +22 -0
- helm/benchmark/scenarios/medec_scenario.py +23 -0
- helm/benchmark/scenarios/medhallu_scenario.py +23 -0
- helm/benchmark/scenarios/medhelm/__init__.py +0 -0
- helm/benchmark/scenarios/medhelm/judges.yaml +14 -0
- helm/benchmark/scenarios/medhelm_configurable_scenario.py +101 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +24 -1
- helm/benchmark/scenarios/medication_qa_scenario.py +31 -1
- helm/benchmark/scenarios/melt_scenarios.py +2 -2
- helm/benchmark/scenarios/mental_health_scenario.py +23 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +25 -1
- helm/benchmark/scenarios/mimic_rrs_scenario.py +23 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +22 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +18 -0
- helm/benchmark/scenarios/mmlu_scenario.py +21 -0
- helm/benchmark/scenarios/mmmlu_scenario.py +85 -0
- helm/benchmark/scenarios/msmarco_scenario.py +30 -0
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +22 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +22 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +20 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +19 -0
- helm/benchmark/scenarios/natural_qa_scenario.py +32 -0
- helm/benchmark/scenarios/omni_math_scenario.py +18 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +22 -0
- helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
- helm/benchmark/scenarios/pubmed_qa_scenario.py +22 -0
- helm/benchmark/scenarios/quac_scenario.py +14 -0
- helm/benchmark/scenarios/race_based_med_scenario.py +23 -0
- helm/benchmark/scenarios/raft_scenario.py +15 -0
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +14 -1
- helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
- helm/benchmark/scenarios/scenario.py +31 -0
- helm/benchmark/scenarios/seahelm_scenario.py +350 -2
- helm/benchmark/scenarios/self_instruct_scenario.py +29 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +22 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +20 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +23 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +21 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +20 -0
- helm/benchmark/scenarios/shc_privacy_scenario.py +22 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +23 -1
- helm/benchmark/scenarios/shc_ptbm_scenario.py +23 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +21 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
- helm/benchmark/scenarios/situation_prompts.yaml +49 -0
- helm/benchmark/scenarios/spider_scenario.py +18 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +22 -0
- helm/benchmark/scenarios/summarization_scenario.py +37 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +22 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +13 -0
- helm/benchmark/scenarios/test_alghafa_scenario.py +29 -0
- helm/benchmark/scenarios/test_alrage_scenario.py +23 -0
- helm/benchmark/scenarios/test_arabic_exams_scenario.py +21 -0
- helm/benchmark/scenarios/test_aratrust_scenario.py +21 -0
- helm/benchmark/scenarios/test_bluex_scenario.py +59 -0
- helm/benchmark/scenarios/test_exams_multilingual_scenario.py +29 -0
- helm/benchmark/scenarios/test_healtha_br_scenario.py +57 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
- helm/benchmark/scenarios/the_pile_scenario.py +13 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +14 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +20 -1
- helm/benchmark/scenarios/vicuna_scenario.py +21 -1
- helm/benchmark/scenarios/wikifact_scenario.py +20 -0
- helm/benchmark/scenarios/wildbench_scenario.py +18 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +19 -0
- helm/benchmark/slurm_jobs.py +1 -2
- helm/benchmark/slurm_runner.py +8 -1
- helm/benchmark/static/schema_arabic.yaml +271 -0
- helm/benchmark/static/schema_classic.yaml +0 -17
- helm/benchmark/static/schema_long_context.yaml +17 -18
- helm/benchmark/static/schema_medhelm.yaml +36 -0
- helm/benchmark/static/schema_slp.yaml +219 -0
- helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
- helm/benchmark/static_build/assets/index-oIeiQW2g.css +1 -0
- helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
- helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
- helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
- helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
- helm/benchmark/static_build/index.html +5 -6
- helm/benchmark/window_services/image_generation/clip_window_service.py +1 -3
- helm/clients/ai21_client.py +2 -0
- helm/clients/aleph_alpha_client.py +2 -0
- helm/clients/anthropic_client.py +7 -1
- helm/clients/audio_language/diva_llama_client.py +2 -0
- helm/clients/audio_language/llama_omni/arguments.py +61 -0
- helm/clients/audio_language/llama_omni/constants.py +9 -0
- helm/clients/audio_language/llama_omni/conversation.py +213 -0
- helm/clients/audio_language/llama_omni/model/__init__.py +0 -0
- helm/clients/audio_language/llama_omni/model/builder.py +88 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py +190 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py +118 -0
- helm/clients/audio_language/llama_omni/model/omni_speech_arch.py +249 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py +27 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/generation.py +622 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py +104 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py +27 -0
- helm/clients/audio_language/llama_omni/preprocess.py +295 -0
- helm/clients/audio_language/llama_omni/utils.py +202 -0
- helm/clients/audio_language/llama_omni_client.py +2 -1
- helm/clients/audio_language/qwen2_5_omni_client.py +21 -8
- helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py +519 -0
- helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py +4308 -0
- helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py +270 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py +0 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py +8 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py +56 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py +380 -0
- helm/clients/bedrock_client.py +63 -6
- helm/clients/cohere_client.py +3 -0
- helm/clients/dspy_client.py +135 -0
- helm/clients/google_client.py +2 -0
- helm/clients/http_model_client.py +2 -0
- helm/clients/huggingface_client.py +4 -3
- helm/clients/ibm_client.py +3 -1
- helm/clients/image_generation/adobe_vision_client.py +2 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +1 -1
- helm/clients/image_generation/cogview2_client.py +2 -1
- helm/clients/image_generation/dalle2_client.py +2 -0
- helm/clients/image_generation/dalle_mini_client.py +2 -1
- helm/clients/image_generation/deep_floyd_client.py +2 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
- helm/clients/image_generation/lexica_client.py +2 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +2 -2
- helm/clients/image_generation/mindalle_client.py +2 -1
- helm/clients/image_generation/together_image_generation_client.py +2 -0
- helm/clients/megatron_client.py +2 -0
- helm/clients/mistral_client.py +2 -0
- helm/clients/moderation_api_client.py +2 -0
- helm/clients/openai_client.py +38 -21
- helm/clients/openai_responses_client.py +34 -8
- helm/clients/openrouter_client.py +31 -0
- helm/clients/palmyra_client.py +2 -1
- helm/clients/reka_client.py +2 -1
- helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
- helm/clients/stanfordhealthcare_http_model_client.py +2 -0
- helm/clients/test_huggingface_client.py +3 -3
- helm/clients/test_openrouter_client.py +69 -0
- helm/clients/together_client.py +52 -13
- helm/clients/vertexai_client.py +23 -11
- helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
- helm/clients/vision_language/huggingface_vlm_client.py +2 -0
- helm/clients/vision_language/idefics_client.py +2 -1
- helm/clients/vision_language/open_flamingo_client.py +2 -1
- helm/clients/vision_language/paligemma_client.py +2 -1
- helm/clients/vision_language/palmyra_vision_client.py +2 -0
- helm/clients/vision_language/qwen2_vlm_client.py +2 -1
- helm/clients/vision_language/qwen_vlm_client.py +2 -1
- helm/clients/vllm_client.py +43 -7
- helm/clients/vllm_granite_thinking_client.py +56 -0
- helm/clients/writer_client.py +5 -2
- helm/common/critique_request.py +0 -1
- helm/common/hierarchical_logger.py +103 -34
- helm/common/object_spec.py +23 -8
- helm/common/optional_dependencies.py +1 -1
- helm/common/test_general.py +4 -0
- helm/common/test_logging.py +94 -0
- helm/config/model_deployments.yaml +1001 -187
- helm/config/model_metadata.yaml +602 -18
- helm/config/tokenizer_configs.yaml +202 -5
- helm/proxy/cli.py +1 -1
- helm/proxy/example_queries.py +8 -8
- helm/proxy/retry.py +5 -0
- helm/proxy/server.py +2 -1
- helm/proxy/static/index.css +4 -0
- helm/proxy/static/index.js +7 -1
- helm/tokenizers/auto_tokenizer.py +2 -2
- helm/tokenizers/grok_tokenizer.py +2 -0
- helm/benchmark/metrics/aci_bench_metrics.py +0 -14
- helm/benchmark/metrics/chw_care_plan_metrics.py +0 -14
- helm/benchmark/metrics/dischargeme_metrics.py +0 -14
- helm/benchmark/metrics/med_dialog_metrics.py +0 -14
- helm/benchmark/metrics/medalign_metrics.py +0 -14
- helm/benchmark/metrics/medi_qa_metrics.py +0 -14
- helm/benchmark/metrics/medication_qa_metrics.py +0 -14
- helm/benchmark/metrics/mental_health_metrics.py +0 -14
- helm/benchmark/metrics/mimic_bhc_metrics.py +0 -14
- helm/benchmark/metrics/mimic_rrs_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +0 -14
- helm/benchmark/metrics/numeracy_metrics.py +0 -72
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +0 -14
- helm/benchmark/metrics/test_numeracy_metrics.py +0 -95
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py +0 -103
- helm/benchmark/scenarios/numeracy_scenario.py +0 -794
- helm/benchmark/static_build/assets/index-94295e78.js +0 -10
- helm/benchmark/static_build/assets/index-b9779128.css +0 -1
- helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
- helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
- helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/WHEEL +0 -0
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/licenses/LICENSE +0 -0
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/top_level.txt +0 -0
- /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
- /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
- /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
- /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
- /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
- /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
|
@@ -9,7 +9,7 @@ from helm.clients.audio_language.qwen_omni.qwen2_5_omni_utils.v2_5 import proces
|
|
|
9
9
|
|
|
10
10
|
from helm.common.cache import CacheConfig
|
|
11
11
|
from helm.common.gpu_utils import get_torch_device_name
|
|
12
|
-
from helm.common.hierarchical_logger import hlog, htrack_block
|
|
12
|
+
from helm.common.hierarchical_logger import hexception, hlog, htrack_block
|
|
13
13
|
from helm.common.media_object import TEXT_TYPE
|
|
14
14
|
from helm.common.request import Request, RequestResult, GeneratedOutput, Token
|
|
15
15
|
from helm.common.request import wrap_request_time
|
|
@@ -27,6 +27,7 @@ class LoadedQwen2_5OmniModelProcessor:
|
|
|
27
27
|
_models_lock: Lock = Lock()
|
|
28
28
|
_models: Dict[str, Optional[LoadedQwen2_5OmniModelProcessor]] = {
|
|
29
29
|
"Qwen/Qwen2.5-Omni-7B": None,
|
|
30
|
+
"Qwen/Qwen2.5-Omni-3B": None,
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
|
|
@@ -39,7 +40,7 @@ class Qwen2_5OmniAudioLMClient(CachingClient):
|
|
|
39
40
|
Paper: https://arxiv.org/abs/2503.20215
|
|
40
41
|
"""
|
|
41
42
|
|
|
42
|
-
END_OF_TEXT_TOKEN: str = "<|endoftext
|
|
43
|
+
END_OF_TEXT_TOKEN: str = "<|endoftext|>"
|
|
43
44
|
|
|
44
45
|
def __init__(self, cache_config: CacheConfig):
|
|
45
46
|
super().__init__(cache_config=cache_config)
|
|
@@ -52,6 +53,8 @@ class Qwen2_5OmniAudioLMClient(CachingClient):
|
|
|
52
53
|
model_name: str
|
|
53
54
|
if helm_model_name == "qwen2.5-omni-7b":
|
|
54
55
|
model_name = "Qwen/Qwen2.5-Omni-7B"
|
|
56
|
+
elif helm_model_name == "qwen2.5-omni-3b":
|
|
57
|
+
model_name = "Qwen/Qwen2.5-Omni-3B"
|
|
55
58
|
else:
|
|
56
59
|
raise ValueError(f"Unhandled model name: {helm_model_name}")
|
|
57
60
|
|
|
@@ -60,12 +63,21 @@ class Qwen2_5OmniAudioLMClient(CachingClient):
|
|
|
60
63
|
loaded_model_processor = _models[model_name]
|
|
61
64
|
if loaded_model_processor is None:
|
|
62
65
|
hlog(f"Loading model {model_name} and caching in memory...")
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
66
|
+
if torch.cuda.get_device_capability()[0] >= 8:
|
|
67
|
+
# Use flash attention 2 for A100 and H100 GPUs
|
|
68
|
+
model = Qwen2_5OmniModel.from_pretrained(
|
|
69
|
+
model_name,
|
|
70
|
+
attn_implementation="flash_attention_2",
|
|
71
|
+
torch_dtype=torch.bfloat16,
|
|
72
|
+
device_map=self._device,
|
|
73
|
+
).eval()
|
|
74
|
+
else:
|
|
75
|
+
# Use default attention for other GPUs
|
|
76
|
+
model = Qwen2_5OmniModel.from_pretrained(
|
|
77
|
+
model_name,
|
|
78
|
+
torch_dtype=torch.float16,
|
|
79
|
+
device_map=self._device,
|
|
80
|
+
).eval()
|
|
69
81
|
tokenizer = Qwen2_5OmniProcessor.from_pretrained(
|
|
70
82
|
model_name,
|
|
71
83
|
)
|
|
@@ -168,6 +180,7 @@ class Qwen2_5OmniAudioLMClient(CachingClient):
|
|
|
168
180
|
)
|
|
169
181
|
result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
170
182
|
except RuntimeError as model_error:
|
|
183
|
+
hexception(model_error)
|
|
171
184
|
return RequestResult(
|
|
172
185
|
success=False, cached=False, error=str(model_error), completions=[], embedding=[]
|
|
173
186
|
)
|
|
@@ -7,7 +7,7 @@ from transformers import Qwen2AudioForConditionalGeneration, AutoProcessor
|
|
|
7
7
|
|
|
8
8
|
from helm.common.cache import CacheConfig
|
|
9
9
|
from helm.common.gpu_utils import get_torch_device_name
|
|
10
|
-
from helm.common.hierarchical_logger import hlog, htrack_block
|
|
10
|
+
from helm.common.hierarchical_logger import hexception, hlog, htrack_block
|
|
11
11
|
from helm.common.media_object import TEXT_TYPE
|
|
12
12
|
from helm.common.request import Request, RequestResult, GeneratedOutput, Token
|
|
13
13
|
from helm.common.request import wrap_request_time
|
|
@@ -161,6 +161,7 @@ class Qwen2AudioLMClient(CachingClient):
|
|
|
161
161
|
)
|
|
162
162
|
result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
163
163
|
except RuntimeError as model_error:
|
|
164
|
+
hexception(model_error)
|
|
164
165
|
return RequestResult(
|
|
165
166
|
success=False, cached=False, error=str(model_error), completions=[], embedding=[]
|
|
166
167
|
)
|
|
@@ -6,7 +6,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
6
6
|
|
|
7
7
|
from helm.common.cache import CacheConfig
|
|
8
8
|
from helm.common.gpu_utils import get_torch_device_name
|
|
9
|
-
from helm.common.hierarchical_logger import hlog, htrack_block
|
|
9
|
+
from helm.common.hierarchical_logger import hexception, hlog, htrack_block
|
|
10
10
|
from helm.common.media_object import TEXT_TYPE
|
|
11
11
|
from helm.common.request import Request, RequestResult, GeneratedOutput, Token
|
|
12
12
|
from helm.common.request import wrap_request_time
|
|
@@ -124,6 +124,7 @@ class QwenAudioLMClient(CachingClient):
|
|
|
124
124
|
)
|
|
125
125
|
result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
126
126
|
except RuntimeError as model_error:
|
|
127
|
+
hexception(model_error)
|
|
127
128
|
return RequestResult(
|
|
128
129
|
success=False, cached=False, error=str(model_error), completions=[], embedding=[]
|
|
129
130
|
)
|
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
2
|
+
# This file was automatically generated from src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py.
|
|
3
|
+
# Do NOT edit this file manually as any edits will be overwritten by the generation of
|
|
4
|
+
# the file from the modular. If any change should be done, please apply the change to the
|
|
5
|
+
# modular_qwen2_5_omni.py file directly. One of our CI enforces this.
|
|
6
|
+
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
7
|
+
# coding=utf-8
|
|
8
|
+
# Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
|
|
9
|
+
#
|
|
10
|
+
#
|
|
11
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
12
|
+
# you may not use this file except in compliance with the License.
|
|
13
|
+
# You may obtain a copy of the License at
|
|
14
|
+
#
|
|
15
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
16
|
+
#
|
|
17
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
18
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
19
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
20
|
+
# See the License for the specific language governing permissions and
|
|
21
|
+
# limitations under the License.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
from transformers.configuration_utils import PretrainedConfig
|
|
25
|
+
from transformers.utils import logging
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
logger = logging.get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Qwen2_5OmniVisionEncoderConfig(PretrainedConfig):
|
|
32
|
+
r"""
|
|
33
|
+
This is the configuration class to store the configuration of a [`Qwen2_5OmniThinkerVision`].
|
|
34
|
+
It is used to instantiate a Qwen2.5-VL vision encoder according to the specified arguments,
|
|
35
|
+
defining the model architecture. Instantiating a configuration with the defaults will yield a
|
|
36
|
+
similar configuration to that of the audio encoder of the Qwen2.5-VL architecture.
|
|
37
|
+
|
|
38
|
+
e.g. [Qwen/Qwen2.5-Omni-7B](https://huggingface.co/Qwen/Qwen2.5-Omni-7B)
|
|
39
|
+
|
|
40
|
+
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the
|
|
41
|
+
model outputs. Read the documentation from [`PretrainedConfig`] for more information.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
depth (`int`, *optional*, defaults to 32):
|
|
45
|
+
Number of layers (depth) in the model.
|
|
46
|
+
hidden_size (`int`, *optional*, defaults to 3584):
|
|
47
|
+
The size of the hidden layers.
|
|
48
|
+
hidden_act (`str`, *optional*, defaults to `"quick_gelu"`):
|
|
49
|
+
The non-linear activation function used in the model. Supported options include
|
|
50
|
+
`"quick_gelu"` and others as applicable.
|
|
51
|
+
mlp_ratio (`float`, *optional*, defaults to 4):
|
|
52
|
+
The ratio used to determine the size of the MLP (Multi-Layer Perceptron) hidden layer.
|
|
53
|
+
num_heads (`int`, *optional*, defaults to 16):
|
|
54
|
+
Number of attention heads for each attention layer.
|
|
55
|
+
in_channels (`int`, *optional*, defaults to 3):
|
|
56
|
+
Number of input channels.
|
|
57
|
+
patch_size (`int`, *optional*, defaults to 14):
|
|
58
|
+
The size of the patches extracted from the input.
|
|
59
|
+
spatial_merge_size (`int`, *optional*, defaults to 2):
|
|
60
|
+
The size used for merging spatial dimensions.
|
|
61
|
+
temporal_patch_size (`int`, *optional*, defaults to 2):
|
|
62
|
+
The size used for patches along the temporal dimension.
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
>>> # xdoctest: +SKIP
|
|
66
|
+
>>> from transformers import Qwen2_5OmniVisionEncoderConfig, Qwen2_5OmniVisionEncoder
|
|
67
|
+
>>> # Initializing a Qwen2_5OmniVisionEncoderConfig
|
|
68
|
+
>>> configuration = Qwen2_5OmniVisionEncoderConfig()
|
|
69
|
+
>>> # Initializing a Qwen2_5OmniVisionEncoder (with random weights)
|
|
70
|
+
>>> model = Qwen2_5OmniVisionEncoder(configuration)
|
|
71
|
+
>>> # Accessing the model configuration
|
|
72
|
+
>>> configuration = model.config
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
model_type = "qwen2_5_omni_vision_encoder"
|
|
76
|
+
base_config_key = "vision_config"
|
|
77
|
+
|
|
78
|
+
def __init__(
|
|
79
|
+
self,
|
|
80
|
+
depth=32,
|
|
81
|
+
hidden_size=3584,
|
|
82
|
+
hidden_act="silu",
|
|
83
|
+
intermediate_size=3420,
|
|
84
|
+
num_heads=16,
|
|
85
|
+
in_channels=3,
|
|
86
|
+
patch_size=14,
|
|
87
|
+
spatial_merge_size=2,
|
|
88
|
+
temporal_patch_size=2,
|
|
89
|
+
window_size=112,
|
|
90
|
+
out_hidden_size=3584,
|
|
91
|
+
fullatt_block_indexes=[7, 15, 23, 31],
|
|
92
|
+
**kwargs,
|
|
93
|
+
):
|
|
94
|
+
super().__init__(**kwargs)
|
|
95
|
+
|
|
96
|
+
self.depth = depth
|
|
97
|
+
self.hidden_size = hidden_size
|
|
98
|
+
self.hidden_act = hidden_act
|
|
99
|
+
self.intermediate_size = intermediate_size
|
|
100
|
+
self.num_heads = num_heads
|
|
101
|
+
self.in_channels = in_channels
|
|
102
|
+
self.patch_size = patch_size
|
|
103
|
+
self.spatial_merge_size = spatial_merge_size
|
|
104
|
+
self.temporal_patch_size = temporal_patch_size
|
|
105
|
+
self.window_size = window_size
|
|
106
|
+
self.fullatt_block_indexes = fullatt_block_indexes
|
|
107
|
+
self.out_hidden_size = out_hidden_size
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class Qwen2_5OmniAudioEncoderConfig(PretrainedConfig):
|
|
111
|
+
|
|
112
|
+
model_type = "qwen2_5_omni_audio_encoder"
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
num_mel_bins=128,
|
|
117
|
+
encoder_layers=32,
|
|
118
|
+
encoder_attention_heads=20,
|
|
119
|
+
encoder_ffn_dim=5120,
|
|
120
|
+
d_model=1280,
|
|
121
|
+
dropout=0,
|
|
122
|
+
attention_dropout=0,
|
|
123
|
+
activation_function="gelu",
|
|
124
|
+
activation_dropout=0,
|
|
125
|
+
scale_embedding=False,
|
|
126
|
+
initializer_range=0.02,
|
|
127
|
+
max_source_positions=1500,
|
|
128
|
+
n_window=100,
|
|
129
|
+
output_dim=3584,
|
|
130
|
+
**kwargs,
|
|
131
|
+
):
|
|
132
|
+
super().__init__(**kwargs)
|
|
133
|
+
|
|
134
|
+
self.num_mel_bins = num_mel_bins
|
|
135
|
+
self.d_model = d_model
|
|
136
|
+
self.encoder_layers = encoder_layers
|
|
137
|
+
self.encoder_attention_heads = encoder_attention_heads
|
|
138
|
+
self.encoder_ffn_dim = encoder_ffn_dim
|
|
139
|
+
self.dropout = dropout
|
|
140
|
+
self.attention_dropout = attention_dropout
|
|
141
|
+
self.activation_function = activation_function
|
|
142
|
+
self.activation_dropout = activation_dropout
|
|
143
|
+
self.num_hidden_layers = encoder_layers
|
|
144
|
+
self.initializer_range = initializer_range
|
|
145
|
+
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
|
|
146
|
+
self.max_source_positions = max_source_positions
|
|
147
|
+
self.n_window = n_window
|
|
148
|
+
self.output_dim = output_dim
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class Qwen2_5OmniTextConfig(PretrainedConfig):
|
|
152
|
+
|
|
153
|
+
model_type = "qwen2_5_omni_text"
|
|
154
|
+
is_composition = False
|
|
155
|
+
|
|
156
|
+
def __init__(
|
|
157
|
+
self,
|
|
158
|
+
vocab_size=152064,
|
|
159
|
+
hidden_size=3584,
|
|
160
|
+
intermediate_size=18944,
|
|
161
|
+
num_hidden_layers=28,
|
|
162
|
+
num_attention_heads=28,
|
|
163
|
+
num_key_value_heads=4,
|
|
164
|
+
hidden_act="silu",
|
|
165
|
+
max_position_embeddings=32768,
|
|
166
|
+
rms_norm_eps=1e-06,
|
|
167
|
+
use_cache=True,
|
|
168
|
+
rope_theta=1000000.0,
|
|
169
|
+
use_sliding_window=False,
|
|
170
|
+
sliding_window=32768,
|
|
171
|
+
max_window_layers=28,
|
|
172
|
+
attention_dropout=0.0,
|
|
173
|
+
rope_scaling=None,
|
|
174
|
+
initializer_range=0.02,
|
|
175
|
+
**kwargs,
|
|
176
|
+
):
|
|
177
|
+
self.vocab_size = vocab_size
|
|
178
|
+
self.max_position_embeddings = max_position_embeddings
|
|
179
|
+
self.hidden_size = hidden_size
|
|
180
|
+
self.intermediate_size = intermediate_size
|
|
181
|
+
self.num_hidden_layers = num_hidden_layers
|
|
182
|
+
self.num_attention_heads = num_attention_heads
|
|
183
|
+
self.use_sliding_window = use_sliding_window
|
|
184
|
+
self.sliding_window = sliding_window
|
|
185
|
+
self.max_window_layers = max_window_layers
|
|
186
|
+
|
|
187
|
+
# for backward compatibility
|
|
188
|
+
if num_key_value_heads is None:
|
|
189
|
+
num_key_value_heads = num_attention_heads
|
|
190
|
+
|
|
191
|
+
self.num_key_value_heads = num_key_value_heads
|
|
192
|
+
self.hidden_act = hidden_act
|
|
193
|
+
self.rms_norm_eps = rms_norm_eps
|
|
194
|
+
self.use_cache = use_cache
|
|
195
|
+
self.rope_theta = rope_theta
|
|
196
|
+
self.attention_dropout = attention_dropout
|
|
197
|
+
self.rope_scaling = rope_scaling
|
|
198
|
+
if self.rope_scaling is None:
|
|
199
|
+
self.rope_scaling = {"mrope_section": [16, 24, 24], "rope_type": "default", "type": "default"}
|
|
200
|
+
self.initializer_range = initializer_range
|
|
201
|
+
|
|
202
|
+
super().__init__(**kwargs)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class Qwen2_5OmniThinkerConfig(PretrainedConfig):
|
|
206
|
+
model_type = "qwen2_5_omni_thinker"
|
|
207
|
+
sub_configs = {
|
|
208
|
+
"audio_config": Qwen2_5OmniAudioEncoderConfig,
|
|
209
|
+
"vision_config": Qwen2_5OmniVisionEncoderConfig,
|
|
210
|
+
"text_config": Qwen2_5OmniTextConfig,
|
|
211
|
+
}
|
|
212
|
+
is_composition = True
|
|
213
|
+
|
|
214
|
+
def __init__(
|
|
215
|
+
self,
|
|
216
|
+
audio_config=None,
|
|
217
|
+
vision_config=None,
|
|
218
|
+
text_config=None,
|
|
219
|
+
audio_token_index=151646,
|
|
220
|
+
image_token_index=151655,
|
|
221
|
+
video_token_index=151656,
|
|
222
|
+
tie_word_embeddings=False,
|
|
223
|
+
position_id_per_seconds=25,
|
|
224
|
+
seconds_per_chunk=2,
|
|
225
|
+
audio_start_token_id=151647,
|
|
226
|
+
audio_end_token_id=151648,
|
|
227
|
+
user_token_id=872,
|
|
228
|
+
initializer_range=0.02,
|
|
229
|
+
**kwargs,
|
|
230
|
+
):
|
|
231
|
+
self.audio_token_index = audio_token_index
|
|
232
|
+
self.image_token_index = image_token_index
|
|
233
|
+
self.video_token_index = video_token_index
|
|
234
|
+
# 2025.02.20 the add
|
|
235
|
+
self.user_token_id = user_token_id
|
|
236
|
+
self.position_id_per_seconds = position_id_per_seconds
|
|
237
|
+
self.seconds_per_chunk = seconds_per_chunk
|
|
238
|
+
self.audio_start_token_id = audio_start_token_id
|
|
239
|
+
self.audio_end_token_id = audio_end_token_id
|
|
240
|
+
self.initializer_range = initializer_range
|
|
241
|
+
|
|
242
|
+
if isinstance(vision_config, dict):
|
|
243
|
+
vision_config = Qwen2_5OmniVisionEncoderConfig(**vision_config)
|
|
244
|
+
elif vision_config is None:
|
|
245
|
+
vision_config = Qwen2_5OmniVisionEncoderConfig()
|
|
246
|
+
self.vision_config = vision_config
|
|
247
|
+
|
|
248
|
+
if isinstance(audio_config, dict):
|
|
249
|
+
audio_config = Qwen2_5OmniAudioEncoderConfig(**audio_config)
|
|
250
|
+
elif audio_config is None:
|
|
251
|
+
audio_config = Qwen2_5OmniAudioEncoderConfig()
|
|
252
|
+
self.audio_config = audio_config
|
|
253
|
+
|
|
254
|
+
if isinstance(text_config, dict):
|
|
255
|
+
text_config = Qwen2_5OmniTextConfig(**text_config)
|
|
256
|
+
elif text_config is None:
|
|
257
|
+
text_config = Qwen2_5OmniTextConfig()
|
|
258
|
+
self.text_config = text_config
|
|
259
|
+
|
|
260
|
+
super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class Qwen2_5OmniTalkerConfig(PretrainedConfig):
|
|
264
|
+
|
|
265
|
+
model_type = "qwen2_5_omni_talker"
|
|
266
|
+
is_composition = False
|
|
267
|
+
|
|
268
|
+
def __init__(
|
|
269
|
+
self,
|
|
270
|
+
audio_token_index=151646,
|
|
271
|
+
image_token_index=151655,
|
|
272
|
+
video_token_index=151656,
|
|
273
|
+
vocab_size=8448,
|
|
274
|
+
tts_text_start_token_id=151860,
|
|
275
|
+
tts_text_end_token_id=151861,
|
|
276
|
+
tts_text_pad_token_id=151859,
|
|
277
|
+
tts_codec_start_token_id=8293,
|
|
278
|
+
tts_codec_end_token_id=8294,
|
|
279
|
+
tts_codec_pad_token_id=8292,
|
|
280
|
+
tts_codec_mask_token_id=8296,
|
|
281
|
+
vision_start_token_id=151652,
|
|
282
|
+
vision_end_token_id=151653,
|
|
283
|
+
embedding_size=3584,
|
|
284
|
+
hidden_size=3584,
|
|
285
|
+
intermediate_size=18944,
|
|
286
|
+
num_hidden_layers=28,
|
|
287
|
+
num_attention_heads=28,
|
|
288
|
+
num_key_value_heads=4,
|
|
289
|
+
hidden_act="silu",
|
|
290
|
+
max_position_embeddings=32768,
|
|
291
|
+
rms_norm_eps=1e-06,
|
|
292
|
+
head_dim=128,
|
|
293
|
+
use_cache=True,
|
|
294
|
+
tie_word_embeddings=False,
|
|
295
|
+
rope_theta=1000000.0,
|
|
296
|
+
use_sliding_window=False,
|
|
297
|
+
sliding_window=32768,
|
|
298
|
+
max_window_layers=28,
|
|
299
|
+
attention_dropout=0.0,
|
|
300
|
+
rope_scaling=None,
|
|
301
|
+
position_id_per_seconds=25,
|
|
302
|
+
seconds_per_chunk=2,
|
|
303
|
+
audio_start_token_id=151647,
|
|
304
|
+
audio_end_token_id=151648,
|
|
305
|
+
initializer_range=0.02,
|
|
306
|
+
spatial_merge_size=2,
|
|
307
|
+
**kwargs,
|
|
308
|
+
):
|
|
309
|
+
self.audio_token_index = audio_token_index
|
|
310
|
+
self.image_token_index = image_token_index
|
|
311
|
+
self.video_token_index = video_token_index
|
|
312
|
+
|
|
313
|
+
self.tts_text_start_token_id = tts_text_start_token_id
|
|
314
|
+
self.tts_text_end_token_id = tts_text_end_token_id
|
|
315
|
+
self.tts_text_pad_token_id = tts_text_pad_token_id
|
|
316
|
+
self.tts_codec_start_token_id = tts_codec_start_token_id
|
|
317
|
+
self.tts_codec_end_token_id = tts_codec_end_token_id
|
|
318
|
+
self.tts_codec_pad_token_id = tts_codec_pad_token_id
|
|
319
|
+
|
|
320
|
+
self.tts_codec_mask_token_id = tts_codec_mask_token_id
|
|
321
|
+
|
|
322
|
+
self.vision_start_token_id = vision_start_token_id
|
|
323
|
+
self.vision_end_token_id = vision_end_token_id
|
|
324
|
+
|
|
325
|
+
self.vocab_size = vocab_size
|
|
326
|
+
self.head_dim = head_dim
|
|
327
|
+
self.embedding_size = embedding_size
|
|
328
|
+
self.max_position_embeddings = max_position_embeddings
|
|
329
|
+
self.hidden_size = hidden_size
|
|
330
|
+
self.intermediate_size = intermediate_size
|
|
331
|
+
self.num_hidden_layers = num_hidden_layers
|
|
332
|
+
self.num_attention_heads = num_attention_heads
|
|
333
|
+
self.use_sliding_window = use_sliding_window
|
|
334
|
+
self.sliding_window = sliding_window
|
|
335
|
+
self.max_window_layers = max_window_layers
|
|
336
|
+
|
|
337
|
+
# for backward compatibility
|
|
338
|
+
if num_key_value_heads is None:
|
|
339
|
+
num_key_value_heads = num_attention_heads
|
|
340
|
+
|
|
341
|
+
self.num_key_value_heads = num_key_value_heads
|
|
342
|
+
self.hidden_act = hidden_act
|
|
343
|
+
self.rms_norm_eps = rms_norm_eps
|
|
344
|
+
self.use_cache = use_cache
|
|
345
|
+
self.rope_theta = rope_theta
|
|
346
|
+
self.attention_dropout = attention_dropout
|
|
347
|
+
self.rope_scaling = rope_scaling
|
|
348
|
+
self.position_id_per_seconds = position_id_per_seconds # zf
|
|
349
|
+
self.seconds_per_chunk = seconds_per_chunk # zf
|
|
350
|
+
self.audio_start_token_id = audio_start_token_id # zf
|
|
351
|
+
self.audio_end_token_id = audio_end_token_id # zf
|
|
352
|
+
|
|
353
|
+
self.initializer_range = initializer_range
|
|
354
|
+
self.spatial_merge_size = spatial_merge_size
|
|
355
|
+
|
|
356
|
+
super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class Qwen2_5OmniDiTConfig(PretrainedConfig):
|
|
360
|
+
|
|
361
|
+
model_type = "qwen2_5_omni_dit"
|
|
362
|
+
|
|
363
|
+
def __init__(
|
|
364
|
+
self,
|
|
365
|
+
hidden_size=1024,
|
|
366
|
+
num_hidden_layers=22,
|
|
367
|
+
num_attention_heads=16,
|
|
368
|
+
ff_mult=2,
|
|
369
|
+
emb_dim=512,
|
|
370
|
+
head_dim=64,
|
|
371
|
+
rope_theta=10000.0,
|
|
372
|
+
max_position_embeddings=32768,
|
|
373
|
+
block_size=24,
|
|
374
|
+
look_ahead_layers=[10],
|
|
375
|
+
look_backward_layers=[0, 20],
|
|
376
|
+
repeats=2,
|
|
377
|
+
num_embeds=8193,
|
|
378
|
+
mel_dim=80,
|
|
379
|
+
dropout=0.1,
|
|
380
|
+
enc_emb_dim=192,
|
|
381
|
+
enc_dim=128,
|
|
382
|
+
enc_channels=[256, 256, 256, 256, 768],
|
|
383
|
+
enc_kernel_sizes=[5, 3, 3, 3, 1],
|
|
384
|
+
enc_dilations=[1, 2, 3, 4, 1],
|
|
385
|
+
enc_attention_channels=64,
|
|
386
|
+
enc_res2net_scale=2,
|
|
387
|
+
enc_se_channels=64,
|
|
388
|
+
**kwargs,
|
|
389
|
+
):
|
|
390
|
+
self.hidden_size = hidden_size
|
|
391
|
+
self.num_hidden_layers = num_hidden_layers
|
|
392
|
+
self.num_attention_heads = num_attention_heads
|
|
393
|
+
self.ff_mult = ff_mult
|
|
394
|
+
self.emb_dim = emb_dim
|
|
395
|
+
self.head_dim = head_dim
|
|
396
|
+
self.rope_theta = rope_theta
|
|
397
|
+
self.max_position_embeddings = max_position_embeddings
|
|
398
|
+
self.block_size = block_size
|
|
399
|
+
self.look_ahead_layers = look_ahead_layers
|
|
400
|
+
self.look_backward_layers = look_backward_layers
|
|
401
|
+
self.repeats = repeats
|
|
402
|
+
self.num_embeds = num_embeds
|
|
403
|
+
self.mel_dim = mel_dim
|
|
404
|
+
self.dropout = dropout
|
|
405
|
+
self.enc_emb_dim = enc_emb_dim
|
|
406
|
+
self.enc_dim = enc_dim
|
|
407
|
+
self.enc_channels = enc_channels
|
|
408
|
+
self.enc_kernel_sizes = enc_kernel_sizes
|
|
409
|
+
self.enc_dilations = enc_dilations
|
|
410
|
+
self.enc_attention_channels = enc_attention_channels
|
|
411
|
+
self.enc_res2net_scale = enc_res2net_scale
|
|
412
|
+
self.enc_se_channels = enc_se_channels
|
|
413
|
+
super().__init__(**kwargs)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
class Qwen2_5OmniBigVGANConfig(PretrainedConfig):
|
|
417
|
+
|
|
418
|
+
model_type = "qwen2_5_omni_bigvgan"
|
|
419
|
+
|
|
420
|
+
def __init__(
|
|
421
|
+
self,
|
|
422
|
+
mel_dim=80,
|
|
423
|
+
upsample_initial_channel=1536,
|
|
424
|
+
resblock_kernel_sizes=[3, 7, 11],
|
|
425
|
+
resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
|
426
|
+
upsample_rates=[5, 3, 2, 2, 2, 2],
|
|
427
|
+
upsample_kernel_sizes=[11, 7, 4, 4, 4, 4],
|
|
428
|
+
**kwargs,
|
|
429
|
+
):
|
|
430
|
+
self.mel_dim = mel_dim
|
|
431
|
+
self.upsample_initial_channel = upsample_initial_channel
|
|
432
|
+
self.resblock_kernel_sizes = resblock_kernel_sizes
|
|
433
|
+
self.resblock_dilation_sizes = resblock_dilation_sizes
|
|
434
|
+
self.upsample_rates = upsample_rates
|
|
435
|
+
self.upsample_kernel_sizes = upsample_kernel_sizes
|
|
436
|
+
super().__init__(**kwargs)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
class Qwen2_5OmniToken2WavConfig(PretrainedConfig):
|
|
440
|
+
|
|
441
|
+
model_type = "qwen2_5_omni_token2wav"
|
|
442
|
+
sub_configs = {
|
|
443
|
+
"dit_config": Qwen2_5OmniDiTConfig,
|
|
444
|
+
"bigvgan_config": Qwen2_5OmniBigVGANConfig,
|
|
445
|
+
}
|
|
446
|
+
is_composition = True
|
|
447
|
+
|
|
448
|
+
def __init__(self, dit_config=None, bigvgan_config=None, **kwargs):
|
|
449
|
+
if dit_config is None:
|
|
450
|
+
dit_config = {}
|
|
451
|
+
if bigvgan_config is None:
|
|
452
|
+
bigvgan_config = {}
|
|
453
|
+
self.dit_config = Qwen2_5OmniDiTConfig(**dit_config)
|
|
454
|
+
self.bigvgan_config = Qwen2_5OmniBigVGANConfig(**bigvgan_config)
|
|
455
|
+
super().__init__(**kwargs)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
class Qwen2_5OmniConfig(PretrainedConfig):
|
|
459
|
+
|
|
460
|
+
model_type = "qwen2_5_omni"
|
|
461
|
+
sub_configs = {
|
|
462
|
+
"thinker_config": Qwen2_5OmniThinkerConfig,
|
|
463
|
+
"talker_config": Qwen2_5OmniTalkerConfig,
|
|
464
|
+
"token2wav_config": Qwen2_5OmniToken2WavConfig,
|
|
465
|
+
}
|
|
466
|
+
is_composition = True
|
|
467
|
+
|
|
468
|
+
def __init__(
|
|
469
|
+
self,
|
|
470
|
+
thinker_config=None,
|
|
471
|
+
talker_config=None,
|
|
472
|
+
token2wav_config=None,
|
|
473
|
+
enable_audio_output: bool = True,
|
|
474
|
+
**kwargs,
|
|
475
|
+
):
|
|
476
|
+
if thinker_config is None:
|
|
477
|
+
thinker_config = {}
|
|
478
|
+
logger.info("thinker_config is None. Initializing thinker model with default values")
|
|
479
|
+
|
|
480
|
+
if talker_config is None:
|
|
481
|
+
talker_config = {}
|
|
482
|
+
logger.info("talker_config is None. Initializing talker model with default values")
|
|
483
|
+
|
|
484
|
+
if token2wav_config is None:
|
|
485
|
+
token2wav_config = {}
|
|
486
|
+
logger.info("token2wav_config is None. Initializing token2wav model with default values")
|
|
487
|
+
|
|
488
|
+
self.thinker_config = Qwen2_5OmniThinkerConfig(**thinker_config)
|
|
489
|
+
self.talker_config = Qwen2_5OmniTalkerConfig(**talker_config)
|
|
490
|
+
self.token2wav_config = Qwen2_5OmniToken2WavConfig(**token2wav_config)
|
|
491
|
+
self.enable_audio_output = enable_audio_output
|
|
492
|
+
|
|
493
|
+
super().__init__(**kwargs)
|
|
494
|
+
|
|
495
|
+
@classmethod
|
|
496
|
+
def from_sub_model_configs(
|
|
497
|
+
cls,
|
|
498
|
+
thinker_config: Qwen2_5OmniThinkerConfig,
|
|
499
|
+
talker_config: Qwen2_5OmniTalkerConfig,
|
|
500
|
+
token2wav_config: Qwen2_5OmniToken2WavConfig,
|
|
501
|
+
enable_audio_output: bool = True,
|
|
502
|
+
**kwargs,
|
|
503
|
+
):
|
|
504
|
+
r"""
|
|
505
|
+
Instantiate a [`Qwen2_5OmniConfig`] (or a derived class) from sub-models configuration.
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
[`Qwen2_5OmniConfig`]: An instance of a configuration object
|
|
509
|
+
"""
|
|
510
|
+
return cls(
|
|
511
|
+
thinker_config=thinker_config.to_dict(),
|
|
512
|
+
talker_config=talker_config.to_dict(),
|
|
513
|
+
token2wav_config=token2wav_config.to_dict(),
|
|
514
|
+
enable_audio_output=enable_audio_output,
|
|
515
|
+
**kwargs,
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
__all__ = ["Qwen2_5OmniConfig", "Qwen2_5OmniThinkerConfig", "Qwen2_5OmniTalkerConfig", "Qwen2_5OmniToken2WavConfig"]
|