crfm-helm 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- crfm_helm-0.5.5.dist-info/METADATA +413 -0
- crfm_helm-0.5.5.dist-info/RECORD +894 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +13 -1
- helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
- helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/common_adapter_specs.py +69 -4
- helm/benchmark/adaptation/prompt.py +1 -1
- helm/benchmark/annotation/aci_bench_annotator.py +95 -0
- helm/benchmark/annotation/air_bench_annotator.py +20 -5
- helm/benchmark/annotation/annotator.py +5 -0
- helm/benchmark/annotation/annotator_factory.py +3 -20
- helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
- helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
- helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
- helm/benchmark/annotation/bird_sql_annotator.py +58 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +98 -0
- helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
- helm/benchmark/annotation/dischargeme_annotator.py +107 -0
- helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
- helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
- helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
- helm/benchmark/annotation/live_qa_annotator.py +1 -1
- helm/benchmark/annotation/med_dialog_annotator.py +99 -0
- helm/benchmark/annotation/medalign_annotator.py +100 -0
- helm/benchmark/annotation/medi_qa_annotator.py +98 -0
- helm/benchmark/annotation/medication_qa_annotator.py +87 -63
- helm/benchmark/annotation/mental_health_annotator.py +98 -0
- helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
- helm/benchmark/annotation/model_as_judge.py +218 -6
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
- helm/benchmark/annotation/omni_math_annotator.py +132 -0
- helm/benchmark/annotation/spider_annotator.py +18 -0
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
- helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
- helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
- helm/benchmark/annotation/wildbench_annotator.py +119 -0
- helm/benchmark/annotation_executor.py +35 -15
- helm/benchmark/augmentations/cleva_perturbation.py +9 -8
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
- helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
- helm/benchmark/augmentations/dialect_perturbation.py +4 -5
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +2 -2
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +4 -5
- helm/benchmark/augmentations/perturbation.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +2 -2
- helm/benchmark/augmentations/synonym_perturbation.py +4 -3
- helm/benchmark/augmentations/test_perturbation.py +16 -13
- helm/benchmark/augmentations/translate_perturbation.py +2 -2
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/data_preprocessor.py +2 -2
- helm/benchmark/huggingface_registration.py +2 -7
- helm/benchmark/metrics/aci_bench_metrics.py +34 -0
- helm/benchmark/metrics/basic_metrics.py +6 -6
- helm/benchmark/metrics/bbq_metrics.py +2 -2
- helm/benchmark/metrics/bias_metrics.py +12 -3
- helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
- helm/benchmark/metrics/bird_sql_metrics.py +28 -0
- helm/benchmark/metrics/chw_care_plan_metrics.py +34 -0
- helm/benchmark/metrics/classification_metrics.py +76 -12
- helm/benchmark/metrics/cleva_harms_metrics.py +8 -7
- helm/benchmark/metrics/code_metrics.py +5 -5
- helm/benchmark/metrics/comet_metric.py +125 -0
- helm/benchmark/metrics/common_metric_specs.py +9 -2
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
- helm/benchmark/metrics/copyright_metrics.py +4 -4
- helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
- helm/benchmark/metrics/dischargeme_metrics.py +34 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -4
- helm/benchmark/metrics/dry_run_metrics.py +5 -5
- helm/benchmark/metrics/efficiency_metrics.py +3 -3
- helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
- helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
- helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
- helm/benchmark/metrics/ifeval/__init__.py +0 -0
- helm/benchmark/metrics/ifeval/instructions.py +1574 -0
- helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
- helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
- helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
- helm/benchmark/metrics/ifeval_metrics.py +55 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
- helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
- helm/benchmark/metrics/language_modeling_metrics.py +4 -4
- helm/benchmark/metrics/machine_translation_metrics.py +2 -2
- helm/benchmark/metrics/med_dialog_metrics.py +34 -0
- helm/benchmark/metrics/medalign_metrics.py +34 -0
- helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
- helm/benchmark/metrics/medec_metrics.py +101 -0
- helm/benchmark/metrics/medi_qa_metrics.py +34 -0
- helm/benchmark/metrics/medication_qa_metrics.py +15 -4
- helm/benchmark/metrics/mental_health_metrics.py +34 -0
- helm/benchmark/metrics/metric.py +3 -3
- helm/benchmark/metrics/mimic_rrs_metrics.py +34 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +34 -0
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +34 -0
- helm/benchmark/metrics/nltk_helper.py +32 -0
- helm/benchmark/metrics/numeracy_metrics.py +4 -4
- helm/benchmark/metrics/omni_math_metrics.py +32 -0
- helm/benchmark/metrics/output_processing_metric.py +60 -0
- helm/benchmark/metrics/output_processors.py +15 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
- helm/benchmark/metrics/ranking_metrics.py +3 -3
- helm/benchmark/metrics/reference_metric.py +3 -3
- helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
- helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
- helm/benchmark/metrics/spider_metrics.py +7 -0
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +34 -0
- helm/benchmark/metrics/statistic.py +1 -1
- helm/benchmark/metrics/summac/model_summac.py +1 -1
- helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
- helm/benchmark/metrics/summarization_metrics.py +19 -9
- helm/benchmark/metrics/test_bias_metrics.py +5 -1
- helm/benchmark/metrics/test_classification_metrics.py +140 -68
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
- helm/benchmark/metrics/test_metric.py +1 -1
- helm/benchmark/metrics/test_statistic.py +2 -2
- helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
- helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/toxicity_metrics.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +4 -1
- helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
- helm/benchmark/metrics/wildbench_metrics.py +34 -0
- helm/benchmark/model_metadata_registry.py +16 -0
- helm/benchmark/presentation/summarize.py +23 -10
- helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
- helm/benchmark/reeval_run.py +203 -0
- helm/benchmark/reeval_runner.py +355 -0
- helm/benchmark/run.py +8 -17
- helm/benchmark/run_expander.py +78 -8
- helm/benchmark/run_spec_factory.py +12 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
- helm/benchmark/run_specs/audio_run_specs.py +613 -0
- helm/benchmark/run_specs/call_center_run_specs.py +49 -0
- helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
- helm/benchmark/run_specs/classic_run_specs.py +1 -69
- helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
- helm/benchmark/run_specs/enterprise_run_specs.py +260 -0
- helm/benchmark/run_specs/experimental_run_specs.py +112 -3
- helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
- helm/benchmark/run_specs/lite_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +89 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +1155 -0
- helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
- helm/benchmark/run_specs/oab_exams_specs.py +32 -0
- helm/benchmark/run_specs/safety_run_specs.py +37 -0
- helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +44 -44
- helm/benchmark/run_specs/sql_run_specs.py +54 -0
- helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
- helm/benchmark/run_specs/vlm_run_specs.py +75 -2
- helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
- helm/benchmark/scenarios/aci_bench_scenario.py +120 -0
- helm/benchmark/scenarios/air_bench_scenario.py +6 -1
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/__init__.py +0 -0
- helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +128 -0
- helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
- helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
- helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
- helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
- helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
- helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
- helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
- helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
- helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
- helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
- helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +69 -0
- helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
- helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +106 -0
- helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
- helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
- helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
- helm/benchmark/scenarios/banking77_scenario.py +6 -1
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/big_bench_scenario.py +11 -1
- helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
- helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
- helm/benchmark/scenarios/blimp_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +1 -1
- helm/benchmark/scenarios/boolq_scenario.py +1 -1
- helm/benchmark/scenarios/casehold_scenario.py +79 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +105 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
- helm/benchmark/scenarios/clear_scenario.py +153 -0
- helm/benchmark/scenarios/cleva_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +17 -4
- helm/benchmark/scenarios/commonsense_scenario.py +1 -1
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
- helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
- helm/benchmark/scenarios/dischargeme_scenario.py +157 -0
- helm/benchmark/scenarios/disinformation_scenario.py +10 -1
- helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
- helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
- helm/benchmark/scenarios/ehr_sql_scenario.py +131 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +1546 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
- helm/benchmark/scenarios/gpqa_scenario.py +80 -0
- helm/benchmark/scenarios/grammar_scenario.py +2 -2
- helm/benchmark/scenarios/gsm_scenario.py +10 -1
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
- helm/benchmark/scenarios/headqa_scenario.py +131 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
- helm/benchmark/scenarios/ice_scenario.py +8 -4
- helm/benchmark/scenarios/ifeval_scenario.py +53 -0
- helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
- helm/benchmark/scenarios/imdb_scenario.py +11 -2
- helm/benchmark/scenarios/infinite_bench_sum_scenario.py +82 -0
- helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
- helm/benchmark/scenarios/koala_scenario.py +1 -1
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
- helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
- helm/benchmark/scenarios/legal_support_scenario.py +11 -1
- helm/benchmark/scenarios/legalbench_scenario.py +22 -3
- helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
- helm/benchmark/scenarios/lextreme_scenario.py +11 -1
- helm/benchmark/scenarios/live_qa_scenario.py +1 -1
- helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
- helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
- helm/benchmark/scenarios/math_scenario.py +9 -1
- helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
- helm/benchmark/scenarios/med_dialog_scenario.py +22 -24
- helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
- helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
- helm/benchmark/scenarios/med_qa_scenario.py +10 -1
- helm/benchmark/scenarios/medalign_scenario.py +88 -0
- helm/benchmark/scenarios/medalign_scenario_helper.py +429 -0
- helm/benchmark/scenarios/medbullets_scenario.py +140 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +125 -0
- helm/benchmark/scenarios/medec_scenario.py +120 -0
- helm/benchmark/scenarios/medhallu_scenario.py +66 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +105 -0
- helm/benchmark/scenarios/medication_qa_scenario.py +2 -2
- helm/benchmark/scenarios/mental_health_scenario.py +112 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +98 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +89 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +71 -0
- helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
- helm/benchmark/scenarios/mmlu_scenario.py +11 -1
- helm/benchmark/scenarios/msmarco_scenario.py +1 -1
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +141 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +141 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +271 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
- helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
- helm/benchmark/scenarios/newsqa_scenario.py +1 -1
- helm/benchmark/scenarios/numeracy_scenario.py +10 -1
- helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
- helm/benchmark/scenarios/omni_math_scenario.py +53 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
- helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
- helm/benchmark/scenarios/pubmed_qa_scenario.py +54 -43
- helm/benchmark/scenarios/quac_scenario.py +10 -1
- helm/benchmark/scenarios/race_based_med_scenario.py +142 -0
- helm/benchmark/scenarios/raft_scenario.py +17 -2
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
- helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
- helm/benchmark/scenarios/scenario.py +9 -1
- helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +7 -2
- helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +69 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +70 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +70 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +72 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +66 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +76 -0
- helm/benchmark/scenarios/shc_sei_scenario.py +89 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +69 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
- helm/benchmark/scenarios/spider_scenario.py +91 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +90 -0
- helm/benchmark/scenarios/summarization_scenario.py +11 -1
- helm/benchmark/scenarios/sumosum_scenario.py +157 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
- helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
- helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
- helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
- helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
- helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
- helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
- helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +46 -0
- helm/benchmark/scenarios/test_math_scenario.py +1 -0
- helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
- helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
- helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
- helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
- helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
- helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
- helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +10 -1
- helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
- helm/benchmark/scenarios/unitxt_scenario.py +8 -2
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
- helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
- helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
- helm/benchmark/scenarios/wikifact_scenario.py +11 -1
- helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
- helm/benchmark/scenarios/wildbench_scenario.py +83 -0
- helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
- helm/benchmark/scenarios/xstest_scenario.py +1 -1
- helm/benchmark/server.py +11 -0
- helm/benchmark/slurm_runner.py +1 -1
- helm/benchmark/static/schema_audio.yaml +752 -0
- helm/benchmark/static/schema_autobencher.yaml +150 -0
- helm/benchmark/static/schema_call_center.yaml +97 -60
- helm/benchmark/static/schema_capabilities.yaml +254 -0
- helm/benchmark/static/schema_czech_bank.yaml +148 -0
- helm/benchmark/static/schema_enem_challenge.yaml +146 -0
- helm/benchmark/static/schema_enterprise.yaml +298 -0
- helm/benchmark/static/schema_finance.yaml +14 -12
- helm/benchmark/static/schema_heim.yaml +1389 -0
- helm/benchmark/static/{schema_medical.yaml → schema_long_context.yaml} +67 -82
- helm/benchmark/static/schema_medhelm.yaml +1081 -0
- helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
- helm/benchmark/static/schema_safety.yaml +18 -1
- helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +30 -16
- helm/benchmark/static/schema_social_audio.yaml +224 -0
- helm/benchmark/static/schema_sql.yaml +171 -0
- helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +169 -36
- helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
- helm/benchmark/static/schema_vhelm.yaml +109 -36
- helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
- helm/benchmark/static_build/assets/index-262903c1.js +10 -0
- helm/benchmark/static_build/assets/index-42060d71.css +1 -0
- helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png +0 -0
- helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
- helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
- helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-9cefc3c5.js} +1 -1
- helm/benchmark/static_build/config.js +1 -1
- helm/benchmark/static_build/index.html +5 -5
- helm/benchmark/window_services/default_window_service.py +1 -1
- helm/benchmark/window_services/encoder_decoder_window_service.py +1 -1
- helm/benchmark/window_services/ice_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
- helm/benchmark/window_services/local_window_service.py +2 -2
- helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
- helm/benchmark/window_services/test_bloom_window_service.py +3 -3
- helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
- helm/benchmark/window_services/test_gptj_window_service.py +8 -3
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
- helm/benchmark/window_services/test_openai_window_service.py +8 -3
- helm/benchmark/window_services/test_opt_window_service.py +3 -3
- helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
- helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
- helm/benchmark/window_services/test_t511b_window_service.py +3 -3
- helm/benchmark/window_services/test_ul2_window_service.py +3 -3
- helm/benchmark/window_services/test_utils.py +1 -1
- helm/benchmark/window_services/test_yalm_window_service.py +3 -3
- helm/benchmark/window_services/yalm_window_service.py +1 -1
- helm/clients/ai21_client.py +3 -3
- helm/clients/aleph_alpha_client.py +1 -1
- helm/clients/audio_language/__init__.py +0 -0
- helm/clients/audio_language/diva_llama_client.py +118 -0
- helm/clients/audio_language/llama_omni_client.py +198 -0
- helm/clients/audio_language/qwen2_audiolm_client.py +188 -0
- helm/clients/audio_language/qwen_audiolm_client.py +150 -0
- helm/clients/auto_client.py +4 -2
- helm/clients/azure_openai_client.py +55 -0
- helm/clients/bedrock_client.py +201 -7
- helm/clients/bedrock_utils.py +33 -0
- helm/clients/clip_scorers/clip_scorer.py +1 -1
- helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
- helm/clients/cohere_client.py +3 -3
- helm/clients/google_client.py +1 -1
- helm/clients/http_model_client.py +1 -1
- helm/clients/huggingface_client.py +10 -18
- helm/clients/ibm_client.py +267 -0
- helm/clients/image_generation/adobe_vision_client.py +1 -1
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
- helm/clients/image_generation/cogview2_client.py +1 -1
- helm/clients/image_generation/dalle2_client.py +1 -1
- helm/clients/image_generation/dalle3_client.py +2 -2
- helm/clients/image_generation/dalle_mini/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/data.py +1 -1
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
- helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
- helm/clients/image_generation/dalle_mini/model/modeling.py +2 -2
- helm/clients/image_generation/dalle_mini/model/processor.py +4 -4
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
- helm/clients/image_generation/dalle_mini_client.py +1 -1
- helm/clients/image_generation/deep_floyd_client.py +1 -1
- helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
- helm/clients/image_generation/lexica_client.py +1 -1
- helm/clients/image_generation/mindalle/models/__init__.py +6 -6
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
- helm/clients/image_generation/mindalle_client.py +1 -1
- helm/clients/image_generation/together_image_generation_client.py +1 -1
- helm/clients/lit_gpt_client.py +2 -2
- helm/clients/mistral_client.py +62 -18
- helm/clients/nvidia_nim_client.py +0 -3
- helm/clients/openai_client.py +241 -22
- helm/clients/palmyra_client.py +1 -4
- helm/clients/reka_client.py +1 -1
- helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
- helm/clients/stanfordhealthcare_claude_client.py +31 -0
- helm/clients/stanfordhealthcare_google_client.py +43 -0
- helm/clients/stanfordhealthcare_http_model_client.py +93 -0
- helm/clients/stanfordhealthcare_openai_client.py +62 -0
- helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
- helm/clients/test_client.py +1 -1
- helm/clients/test_together_client.py +6 -1
- helm/clients/together_client.py +47 -7
- helm/clients/upstage_client.py +23 -0
- helm/clients/vertexai_client.py +39 -13
- helm/clients/vision_language/open_flamingo/__init__.py +2 -2
- helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
- helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
- helm/clients/vision_language/qwen2_vlm_client.py +175 -0
- helm/clients/vllm_client.py +4 -6
- helm/clients/yi_client.py +0 -3
- helm/common/audio_utils.py +111 -0
- helm/common/file_caches/local_file_cache.py +1 -1
- helm/common/file_caches/test_local_file_cache.py +1 -1
- helm/common/images_utils.py +2 -2
- helm/common/media_object.py +2 -2
- helm/common/multimodal_request_utils.py +26 -0
- helm/common/reeval_parameters.py +12 -0
- helm/common/request.py +6 -2
- helm/common/response_format.py +18 -0
- helm/common/test_media_object.py +1 -1
- helm/config/model_deployments.yaml +1112 -19
- helm/config/model_metadata.yaml +985 -44
- helm/config/tokenizer_configs.yaml +379 -3
- helm/proxy/cli.py +2 -2
- helm/proxy/example_queries.py +1 -1
- helm/proxy/server.py +11 -4
- helm/proxy/services/remote_service.py +1 -1
- helm/proxy/services/server_service.py +1 -1
- helm/proxy/services/test_remote_service.py +2 -2
- helm/proxy/services/test_service.py +1 -1
- helm/proxy/static/general.js +122 -0
- helm/proxy/static/help.html +99 -0
- helm/proxy/static/index.css +57 -0
- helm/proxy/static/index.html +40 -0
- helm/proxy/static/index.js +456 -0
- helm/proxy/static/info-icon.png +0 -0
- helm/proxy/test_retry.py +1 -1
- helm/proxy/token_counters/auto_token_counter.py +1 -1
- helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
- helm/tokenizers/caching_tokenizer.py +2 -30
- helm/tokenizers/http_model_tokenizer.py +1 -1
- helm/tokenizers/huggingface_tokenizer.py +2 -2
- helm/tokenizers/lit_gpt_tokenizer.py +1 -1
- helm/tokenizers/test_anthropic_tokenizer.py +6 -2
- helm/tokenizers/test_huggingface_tokenizer.py +1 -1
- helm/tokenizers/test_yalm_tokenizer.py +1 -1
- helm/tokenizers/tiktoken_tokenizer.py +1 -1
- helm/tokenizers/tokenizer.py +3 -1
- helm/tokenizers/yalm_tokenizer.py +3 -3
- helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
- crfm_helm-0.5.4.dist-info/METADATA +0 -350
- crfm_helm-0.5.4.dist-info/RECORD +0 -697
- helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
- helm/benchmark/static_build/assets/index-3ee38b3d.js +0 -10
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/tokenizers/anthropic_tokenizer.py +0 -52
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info/licenses}/LICENSE +0 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
|
|
4
|
+
from helm.common.hierarchical_logger import hlog
|
|
5
|
+
from helm.common.cache import CacheConfig
|
|
6
|
+
from helm.common.request import (
|
|
7
|
+
Request,
|
|
8
|
+
RequestResult,
|
|
9
|
+
Token,
|
|
10
|
+
wrap_request_time,
|
|
11
|
+
EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
|
|
12
|
+
GeneratedOutput,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from helm.clients.client import CachingClient
|
|
16
|
+
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
17
|
+
from typing import TypeVar, Generic
|
|
18
|
+
from typing import Any, Dict, List
|
|
19
|
+
from threading import Semaphore, Lock
|
|
20
|
+
import threading
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from ibm_watsonx_ai import Credentials
|
|
24
|
+
from ibm_watsonx_ai.foundation_models import ModelInference
|
|
25
|
+
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
|
|
26
|
+
from ibm_watsonx_ai.foundation_models.schema import (
|
|
27
|
+
TextChatParameters,
|
|
28
|
+
TextGenParameters,
|
|
29
|
+
ReturnOptionProperties,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
except ModuleNotFoundError as e:
|
|
33
|
+
handle_module_not_found_error(e, ["ibm"])
|
|
34
|
+
|
|
35
|
+
# Define the maximum number of parallel executions is limited by IBM API
|
|
36
|
+
MAX_CONCURRENT_REQUESTS = 8
|
|
37
|
+
__semaphores: Dict[str, Semaphore] = dict()
|
|
38
|
+
__semaphores_lock = Lock()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_semaphore(model: str) -> Semaphore:
|
|
42
|
+
with __semaphores_lock:
|
|
43
|
+
if model not in __semaphores:
|
|
44
|
+
__semaphores[model] = threading.Semaphore(MAX_CONCURRENT_REQUESTS)
|
|
45
|
+
|
|
46
|
+
return __semaphores[model]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
T = TypeVar("T", TextGenParameters, TextChatParameters)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ModelInferenceHandler(ABC, Generic[T]):
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def __init__(self, inference_engine: ModelInference):
|
|
55
|
+
"""
|
|
56
|
+
:type inference_engine: object
|
|
57
|
+
"""
|
|
58
|
+
self.inference_engine = inference_engine
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def serve_request(self, prompt: str, params: T) -> Dict:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
def parse_response(self, response: dict) -> List[GeneratedOutput]:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
def create_params(self, request: Request) -> T:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GenerateInferenceHandler(ModelInferenceHandler[TextGenParameters]):
|
|
74
|
+
|
|
75
|
+
def __init__(self, inference_engine: ModelInference):
|
|
76
|
+
self.inference_engine = inference_engine
|
|
77
|
+
|
|
78
|
+
def create_params(self, request: Request) -> TextGenParameters:
|
|
79
|
+
def set_temperature_requirements():
|
|
80
|
+
# Default temperature 0.05 required by ibm/granite-13b-instruct-v2
|
|
81
|
+
if self.inference_engine.model_id == "ibm/granite-13b-instruct-v2":
|
|
82
|
+
return 0.05
|
|
83
|
+
return 1e-7 if request.temperature == 0 else request.temperature
|
|
84
|
+
|
|
85
|
+
return TextGenParameters(
|
|
86
|
+
temperature=set_temperature_requirements(),
|
|
87
|
+
top_p=request.top_p,
|
|
88
|
+
max_new_tokens=request.max_tokens,
|
|
89
|
+
return_options=ReturnOptionProperties(
|
|
90
|
+
input_text=True,
|
|
91
|
+
generated_tokens=True,
|
|
92
|
+
input_tokens=False,
|
|
93
|
+
token_logprobs=True,
|
|
94
|
+
token_ranks=False,
|
|
95
|
+
),
|
|
96
|
+
include_stop_sequence=False,
|
|
97
|
+
prompt_variables=None,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def serve_request(self, prompt: str, params: TextGenParameters) -> Dict:
|
|
101
|
+
semaphore = _get_semaphore(self.inference_engine.model_id)
|
|
102
|
+
|
|
103
|
+
with semaphore:
|
|
104
|
+
response = self.inference_engine.generate(
|
|
105
|
+
prompt=prompt,
|
|
106
|
+
params=params,
|
|
107
|
+
)
|
|
108
|
+
return response
|
|
109
|
+
|
|
110
|
+
def parse_response(self, response: dict) -> List[GeneratedOutput]:
|
|
111
|
+
completions = []
|
|
112
|
+
try:
|
|
113
|
+
for r in response["results"]:
|
|
114
|
+
sequence_logprob: float = 0
|
|
115
|
+
tokens: List[Token] = []
|
|
116
|
+
generated_text = r["generated_text"]
|
|
117
|
+
for token_and_logprob in r["generated_tokens"]:
|
|
118
|
+
logprob = token_and_logprob.get("logprob", 0)
|
|
119
|
+
text = token_and_logprob["text"]
|
|
120
|
+
tokens.append(Token(text=text, logprob=logprob))
|
|
121
|
+
sequence_logprob += logprob
|
|
122
|
+
|
|
123
|
+
completion = GeneratedOutput(text=generated_text, logprob=sequence_logprob, tokens=tokens)
|
|
124
|
+
completions.append(completion)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
hlog(f"GenerateInferenceHandler failed with exception {e} during parse_response {response}")
|
|
127
|
+
return completions
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ChatModelInferenceHandler(ModelInferenceHandler[TextChatParameters]):
|
|
131
|
+
def __init__(self, inference_engine: ModelInference):
|
|
132
|
+
self.inference_engine = inference_engine
|
|
133
|
+
|
|
134
|
+
def create_params(self, request: Request) -> TextChatParameters:
|
|
135
|
+
return TextChatParameters(
|
|
136
|
+
logprobs=True,
|
|
137
|
+
presence_penalty=0,
|
|
138
|
+
frequency_penalty=0,
|
|
139
|
+
temperature=request.temperature,
|
|
140
|
+
max_tokens=request.max_tokens,
|
|
141
|
+
top_p=request.top_p,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def parse_response(self, response: dict) -> List[GeneratedOutput]:
|
|
145
|
+
completions = []
|
|
146
|
+
try:
|
|
147
|
+
for raw_completion in response["choices"]:
|
|
148
|
+
sequence_logprob: float = 0
|
|
149
|
+
tokens: List[Token] = []
|
|
150
|
+
generated_text = raw_completion["message"]["content"]
|
|
151
|
+
|
|
152
|
+
for token_and_logprob in raw_completion["logprobs"]["content"]:
|
|
153
|
+
logprob = token_and_logprob["logprob"]
|
|
154
|
+
text = token_and_logprob["token"]
|
|
155
|
+
tokens.append(Token(text=text, logprob=logprob))
|
|
156
|
+
sequence_logprob += logprob
|
|
157
|
+
|
|
158
|
+
completion = GeneratedOutput(text=generated_text, logprob=sequence_logprob, tokens=tokens)
|
|
159
|
+
completions.append(completion)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
hlog(f"ChatModelInferenceHandler failed with exception {e} during parse_response {response}")
|
|
162
|
+
return completions
|
|
163
|
+
|
|
164
|
+
def serve_request(self, prompt: str, params: TextChatParameters) -> Dict:
|
|
165
|
+
semaphore = _get_semaphore(self.inference_engine.model_id)
|
|
166
|
+
|
|
167
|
+
with semaphore:
|
|
168
|
+
response = self.inference_engine.chat(
|
|
169
|
+
messages=[{"role": "user", "content": prompt}],
|
|
170
|
+
params=params,
|
|
171
|
+
)
|
|
172
|
+
return response
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class IbmClient(CachingClient, ABC):
|
|
176
|
+
def __init__(
|
|
177
|
+
self,
|
|
178
|
+
cache_config: CacheConfig,
|
|
179
|
+
api_key: str,
|
|
180
|
+
region: str,
|
|
181
|
+
location: dict,
|
|
182
|
+
watsonx_model_name: str,
|
|
183
|
+
**kwargs,
|
|
184
|
+
):
|
|
185
|
+
super().__init__(cache_config=cache_config)
|
|
186
|
+
self.project_id = None
|
|
187
|
+
self.url = None
|
|
188
|
+
self.watsonx_model_name = watsonx_model_name
|
|
189
|
+
self.api_key = api_key
|
|
190
|
+
self.region = region
|
|
191
|
+
self.kwargs = kwargs
|
|
192
|
+
for entry in location:
|
|
193
|
+
if entry["region"].lower() == self.region.lower():
|
|
194
|
+
self.project_id = entry["project_id"]
|
|
195
|
+
self.url = entry["url"]
|
|
196
|
+
|
|
197
|
+
assert self.project_id is not None, (
|
|
198
|
+
"Missed project_id for specified region configuration in credentials.conf, should be in list "
|
|
199
|
+
"of JSON objects with 'region', 'url', 'project_id' per region"
|
|
200
|
+
)
|
|
201
|
+
assert self.url is not None, (
|
|
202
|
+
"Missed url for specified region configuration in credentials.conf, should be in list "
|
|
203
|
+
"of JSON objects with 'region', 'url', 'project_id' per region"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
self.inference_engine = ModelInference(
|
|
207
|
+
model_id=self.watsonx_model_name,
|
|
208
|
+
params={GenParams.MAX_NEW_TOKENS: 2000},
|
|
209
|
+
credentials=Credentials(api_key=api_key, url=self.url),
|
|
210
|
+
project_id=self.project_id,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
hlog("Started IBM Client")
|
|
214
|
+
|
|
215
|
+
@abstractmethod
|
|
216
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
def do_call(self, inference_handler: ModelInferenceHandler, request: Request) -> RequestResult:
|
|
220
|
+
params = inference_handler.create_params(request=request)
|
|
221
|
+
|
|
222
|
+
def do_it() -> Dict[str, Any]:
|
|
223
|
+
return inference_handler.serve_request(prompt=request.prompt, params=params)
|
|
224
|
+
|
|
225
|
+
raw_request = {"prompt": request.prompt, "params": params.to_dict(), "model": request.model}
|
|
226
|
+
|
|
227
|
+
cache_key = CachingClient.make_cache_key(raw_request, request)
|
|
228
|
+
response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
229
|
+
completions = inference_handler.parse_response(response)
|
|
230
|
+
return RequestResult(
|
|
231
|
+
success=True,
|
|
232
|
+
cached=cached,
|
|
233
|
+
request_time=response["request_time"],
|
|
234
|
+
request_datetime=response.get("request_datetime"),
|
|
235
|
+
completions=completions,
|
|
236
|
+
embedding=[],
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class IbmChatClient(IbmClient):
|
|
241
|
+
|
|
242
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
243
|
+
# Embedding not supported for this model
|
|
244
|
+
if request.embedding:
|
|
245
|
+
return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
|
|
246
|
+
try:
|
|
247
|
+
return self.do_call(
|
|
248
|
+
inference_handler=ChatModelInferenceHandler(inference_engine=self.inference_engine), request=request
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
except Exception as e:
|
|
252
|
+
error: str = f"IBM Chat client Model error: {e}"
|
|
253
|
+
return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class IbmTextClient(IbmClient):
|
|
257
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
258
|
+
# Embedding not supported for this model
|
|
259
|
+
if request.embedding:
|
|
260
|
+
return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
|
|
261
|
+
try:
|
|
262
|
+
return self.do_call(
|
|
263
|
+
inference_handler=GenerateInferenceHandler(inference_engine=self.inference_engine), request=request
|
|
264
|
+
)
|
|
265
|
+
except Exception as e:
|
|
266
|
+
error: str = f"IBM Text client Model error: {e}"
|
|
267
|
+
return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
DecodeRequestResult,
|
|
10
10
|
)
|
|
11
11
|
from helm.clients.client import Client, CachingClient
|
|
12
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
12
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class AdobeVisionClient(Client):
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
DecodeRequestResult,
|
|
10
10
|
)
|
|
11
11
|
from helm.clients.client import Client, CachingClient
|
|
12
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
12
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class AlephAlphaImageGenerationClient(Client):
|
|
@@ -6,9 +6,9 @@
|
|
|
6
6
|
@Contact : dm18@mails.tsinghua.edu.cn
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from .direct_sr import DirectSuperResolution
|
|
10
|
-
from .iterative_sr import IterativeSuperResolution
|
|
11
|
-
from .sr_group import SRGroup
|
|
9
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.direct_sr import DirectSuperResolution
|
|
10
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.iterative_sr import IterativeSuperResolution
|
|
11
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.sr_group import SRGroup
|
|
12
12
|
|
|
13
13
|
DirectSuperResolution
|
|
14
14
|
IterativeSuperResolution
|
|
@@ -10,8 +10,11 @@
|
|
|
10
10
|
import torch
|
|
11
11
|
from icetk import icetk as tokenizer
|
|
12
12
|
|
|
13
|
-
from .dsr_sampling import
|
|
14
|
-
|
|
13
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.dsr_sampling import (
|
|
14
|
+
filling_sequence_dsr,
|
|
15
|
+
IterativeEntfilterStrategy,
|
|
16
|
+
)
|
|
17
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.dsr_model import DsrModel
|
|
15
18
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
16
19
|
|
|
17
20
|
|
|
@@ -8,8 +8,11 @@
|
|
|
8
8
|
import torch
|
|
9
9
|
from icetk import icetk as tokenizer
|
|
10
10
|
|
|
11
|
-
from .itersr_sampling import
|
|
12
|
-
|
|
11
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.itersr_sampling import (
|
|
12
|
+
filling_sequence_itersr,
|
|
13
|
+
IterativeEntfilterStrategy,
|
|
14
|
+
)
|
|
15
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.itersr_model import ItersrModel
|
|
13
16
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
14
17
|
|
|
15
18
|
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
# here put the import lib
|
|
10
|
-
from .direct_sr import DirectSuperResolution
|
|
11
|
-
from .iterative_sr import IterativeSuperResolution
|
|
10
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.direct_sr import DirectSuperResolution
|
|
11
|
+
from helm.clients.image_generation.cogview2.sr_pipeline.iterative_sr import IterativeSuperResolution
|
|
12
12
|
|
|
13
13
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
14
14
|
|
|
@@ -20,7 +20,7 @@ from helm.common.tokenization_request import (
|
|
|
20
20
|
)
|
|
21
21
|
from helm.clients.client import Client, CachingClient
|
|
22
22
|
from helm.clients.image_generation.cogview2.coglm_strategy import CoglmStrategy
|
|
23
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
23
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class CogView2Client(Client):
|
|
@@ -15,7 +15,7 @@ from helm.common.tokenization_request import (
|
|
|
15
15
|
)
|
|
16
16
|
from helm.clients.moderation_api_client import ModerationAPIClient
|
|
17
17
|
from helm.clients.client import Client, CachingClient
|
|
18
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
18
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
19
19
|
|
|
20
20
|
try:
|
|
21
21
|
import openai
|
|
@@ -7,8 +7,8 @@ from helm.common.optional_dependencies import handle_module_not_found_error
|
|
|
7
7
|
from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
|
|
8
8
|
from helm.clients.moderation_api_client import ModerationAPIClient
|
|
9
9
|
from helm.clients.client import CachingClient
|
|
10
|
-
from .dalle2_client import DALLE2Client
|
|
11
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
10
|
+
from helm.clients.image_generation.dalle2_client import DALLE2Client
|
|
11
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
import openai
|
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
from datasets import Dataset, load_dataset
|
|
8
8
|
|
|
9
|
-
from .model.text import TextNormalizer
|
|
9
|
+
from helm.clients.image_generation.dalle_mini.model.text import TextNormalizer
|
|
10
10
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
11
11
|
|
|
12
12
|
try:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from .configuration import DalleBartConfig
|
|
2
|
-
from .modeling import DalleBart
|
|
3
|
-
from .partitions import set_partitions
|
|
4
|
-
from .processor import DalleBartProcessor
|
|
5
|
-
from .tokenizer import DalleBartTokenizer
|
|
1
|
+
from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
|
|
2
|
+
from helm.clients.image_generation.dalle_mini.model.modeling import DalleBart
|
|
3
|
+
from helm.clients.image_generation.dalle_mini.model.partitions import set_partitions
|
|
4
|
+
from helm.clients.image_generation.dalle_mini.model.processor import DalleBartProcessor
|
|
5
|
+
from helm.clients.image_generation.dalle_mini.model.tokenizer import DalleBartTokenizer
|
|
@@ -18,7 +18,7 @@ import warnings
|
|
|
18
18
|
from transformers.configuration_utils import PretrainedConfig
|
|
19
19
|
from transformers.utils import logging
|
|
20
20
|
|
|
21
|
-
from .utils import PretrainedFromWandbMixin
|
|
21
|
+
from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
|
|
22
22
|
|
|
23
23
|
logger = logging.get_logger(__name__)
|
|
24
24
|
|
|
@@ -35,8 +35,8 @@ from transformers.utils import ModelOutput, logging
|
|
|
35
35
|
from transformers.generation.configuration_utils import GenerationConfig
|
|
36
36
|
|
|
37
37
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
38
|
-
from .configuration import DalleBartConfig
|
|
39
|
-
from .utils import PretrainedFromWandbMixin
|
|
38
|
+
from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
|
|
39
|
+
from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
|
|
40
40
|
|
|
41
41
|
try:
|
|
42
42
|
import flax
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
-
from .configuration import DalleBartConfig
|
|
6
|
-
from .text import TextNormalizer
|
|
7
|
-
from .tokenizer import DalleBartTokenizer
|
|
8
|
-
from .utils import PretrainedFromWandbMixin
|
|
5
|
+
from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
|
|
6
|
+
from helm.clients.image_generation.dalle_mini.model.text import TextNormalizer
|
|
7
|
+
from helm.clients.image_generation.dalle_mini.model.tokenizer import DalleBartTokenizer
|
|
8
|
+
from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
|
|
9
9
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
10
10
|
|
|
11
11
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from . import *
|
|
1
|
+
from helm.clients.image_generation.dalle_mini.vqgan_jax import *
|
|
@@ -2,8 +2,8 @@ import re
|
|
|
2
2
|
|
|
3
3
|
import torch
|
|
4
4
|
|
|
5
|
-
from .modeling_flax_vqgan import VQModel
|
|
6
|
-
from .configuration_vqgan import VQGANConfig
|
|
5
|
+
from helm.clients.image_generation.dalle_mini.vqgan_jax.modeling_flax_vqgan import VQModel
|
|
6
|
+
from helm.clients.image_generation.dalle_mini.vqgan_jax.configuration_vqgan import VQGANConfig
|
|
7
7
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
8
8
|
|
|
9
9
|
try:
|
|
@@ -6,7 +6,7 @@ import math
|
|
|
6
6
|
|
|
7
7
|
from transformers.modeling_flax_utils import FlaxPreTrainedModel
|
|
8
8
|
|
|
9
|
-
from .configuration_vqgan import VQGANConfig
|
|
9
|
+
from helm.clients.image_generation.dalle_mini.vqgan_jax.configuration_vqgan import VQGANConfig
|
|
10
10
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
11
11
|
|
|
12
12
|
try:
|
|
@@ -15,7 +15,7 @@ from helm.common.tokenization_request import (
|
|
|
15
15
|
TokenizationRequestResult,
|
|
16
16
|
)
|
|
17
17
|
from helm.clients.client import Client, CachingClient
|
|
18
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
18
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class DALLEMiniClient(Client):
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
DecodeRequestResult,
|
|
10
10
|
)
|
|
11
11
|
from helm.clients.client import Client, CachingClient
|
|
12
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
12
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class DeepFloydClient(Client):
|
|
@@ -17,7 +17,7 @@ from helm.common.tokenization_request import (
|
|
|
17
17
|
TokenizationRequestResult,
|
|
18
18
|
)
|
|
19
19
|
from helm.clients.client import Client, CachingClient
|
|
20
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
20
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
_models_lock: Lock = Lock()
|
|
@@ -14,7 +14,7 @@ from helm.common.tokenization_request import (
|
|
|
14
14
|
DecodeRequestResult,
|
|
15
15
|
)
|
|
16
16
|
from helm.clients.client import Client, CachingClient
|
|
17
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
17
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class LexicaClient(Client):
|
|
@@ -11,12 +11,12 @@ from typing import Optional, Tuple
|
|
|
11
11
|
from torch.cuda.amp import autocast
|
|
12
12
|
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
13
13
|
from torch.nn import functional as F
|
|
14
|
-
from .stage1.vqgan import VQGAN
|
|
15
|
-
from .stage2.transformer import Transformer1d, iGPT
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from .tokenizer import build_tokenizer
|
|
14
|
+
from helm.clients.image_generation.mindalle.models.stage1.vqgan import VQGAN
|
|
15
|
+
from helm.clients.image_generation.mindalle.models.stage2.transformer import Transformer1d, iGPT
|
|
16
|
+
from helm.clients.image_generation.mindalle import utils
|
|
17
|
+
from helm.clients.image_generation.mindalle.utils.config import get_base_config
|
|
18
|
+
from helm.clients.image_generation.mindalle.utils.sampling import sampling, sampling_igpt
|
|
19
|
+
from helm.clients.image_generation.mindalle.models.tokenizer import build_tokenizer
|
|
20
20
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
21
21
|
|
|
22
22
|
|
|
@@ -7,7 +7,7 @@ import torch
|
|
|
7
7
|
import torch.nn as nn
|
|
8
8
|
from typing import List, Tuple, Optional
|
|
9
9
|
|
|
10
|
-
from .layers import Encoder, Decoder
|
|
10
|
+
from helm.clients.image_generation.mindalle.models.stage1.layers import Encoder, Decoder
|
|
11
11
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
12
12
|
|
|
13
13
|
|
|
@@ -11,7 +11,7 @@ import torch
|
|
|
11
11
|
import torch.nn as nn
|
|
12
12
|
from typing import Optional, Tuple, List
|
|
13
13
|
from torch.cuda.amp import autocast
|
|
14
|
-
from .layers import Block
|
|
14
|
+
from helm.clients.image_generation.mindalle.models.stage2.layers import Block
|
|
15
15
|
|
|
16
16
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
17
17
|
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
from .utils import *
|
|
2
|
-
from .config import *
|
|
3
|
-
from .sampling import *
|
|
1
|
+
from helm.clients.image_generation.mindalle.utils.utils import *
|
|
2
|
+
from helm.clients.image_generation.mindalle.utils.config import *
|
|
3
|
+
from helm.clients.image_generation.mindalle.utils.sampling import *
|
|
@@ -15,7 +15,7 @@ from helm.common.tokenization_request import (
|
|
|
15
15
|
TokenizationRequestResult,
|
|
16
16
|
)
|
|
17
17
|
from helm.clients.client import Client, CachingClient
|
|
18
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
18
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
19
19
|
|
|
20
20
|
try:
|
|
21
21
|
from PIL import Image
|
|
@@ -13,7 +13,7 @@ from helm.common.tokenization_request import (
|
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
from helm.clients.client import CachingClient, Client
|
|
16
|
-
from .image_generation_client_utils import get_single_image_multimedia_object
|
|
16
|
+
from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class TogetherImageGenerationClient(Client):
|
helm/clients/lit_gpt_client.py
CHANGED
|
@@ -12,8 +12,8 @@ from helm.common.optional_dependencies import OptionalDependencyNotInstalled
|
|
|
12
12
|
from helm.common.request import Request, RequestResult, GeneratedOutput, Token
|
|
13
13
|
from helm.tokenizers.tokenizer import Tokenizer
|
|
14
14
|
|
|
15
|
-
from .client import CachingClient
|
|
16
|
-
from .lit_gpt_generate import generate # type: ignore
|
|
15
|
+
from helm.clients.client import CachingClient
|
|
16
|
+
from helm.clients.lit_gpt_generate import generate # type: ignore
|
|
17
17
|
|
|
18
18
|
try:
|
|
19
19
|
import lightning as L
|