crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +138 -31
- crfm_helm-0.5.1.dist-info/RECORD +654 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +31 -3
- helm/benchmark/adaptation/adapters/adapter.py +2 -2
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/gender_perturbation.py +1 -1
- helm/benchmark/augmentations/perturbation.py +25 -3
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/test_perturbation.py +41 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/config_registry.py +7 -1
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +20 -7
- helm/benchmark/metrics/basic_metrics.py +169 -664
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -110
- helm/benchmark/metrics/dry_run_metrics.py +2 -2
- helm/benchmark/metrics/efficiency_metrics.py +213 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +392 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +89 -0
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +2 -2
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +2 -2
- helm/benchmark/metrics/test_classification_metrics.py +8 -5
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +575 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +74 -0
- helm/benchmark/model_metadata_registry.py +41 -1
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +26 -10
- helm/benchmark/presentation/schema.py +15 -40
- helm/benchmark/presentation/summarize.py +119 -79
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +1 -2
- helm/benchmark/presentation/test_summarize.py +3 -3
- helm/benchmark/run.py +54 -26
- helm/benchmark/run_expander.py +205 -35
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +163 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/run_specs/classic_run_specs.py +1510 -0
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +757 -0
- helm/benchmark/runner.py +51 -57
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +1 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +6 -2
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +1 -1
- helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
- helm/benchmark/scenarios/scenario.py +4 -0
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +6 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +94 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +3 -4
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +5 -3
- helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
- helm/benchmark/server.py +24 -1
- helm/benchmark/slurm_runner.py +70 -49
- helm/benchmark/static/benchmarking.js +1 -1
- helm/benchmark/static/schema_classic.yaml +258 -1066
- helm/benchmark/static/schema_image2structure.yaml +304 -0
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +2 -227
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
- helm/benchmark/static/schema_vlm.yaml +823 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
- helm/benchmark/static_build/assets/index-878a1094.css +1 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -44
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +4 -1
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +3 -9
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +233 -18
- helm/{proxy/clients → clients}/auto_client.py +59 -31
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +65 -7
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +4 -11
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +5 -5
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +5 -7
- helm/{proxy/clients → clients}/huggingface_client.py +43 -64
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
- helm/{proxy/clients → clients}/megatron_client.py +5 -5
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +301 -0
- helm/{proxy/clients → clients}/palmyra_client.py +6 -8
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +13 -15
- helm/clients/test_client.py +100 -0
- helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +20 -8
- helm/{proxy/clients → clients}/together_client.py +104 -73
- helm/clients/vertexai_client.py +400 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
- helm/clients/vision_language/huggingface_vlm_client.py +111 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +54 -49
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +16 -4
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +1 -1
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +33 -3
- helm/common/key_value_store.py +35 -4
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +3 -3
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/request.py +15 -17
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +1 -1
- helm/config/model_deployments.yaml +1159 -538
- helm/config/model_metadata.yaml +868 -41
- helm/config/tokenizer_configs.yaml +149 -43
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +8 -6
- helm/proxy/example_queries.py +29 -17
- helm/proxy/server.py +70 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +96 -16
- helm/proxy/services/service.py +30 -0
- helm/proxy/services/test_remote_service.py +4 -3
- helm/proxy/services/test_service.py +0 -12
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.4.0.dist-info/RECORD +0 -397
- helm/benchmark/run_specs.py +0 -2762
- helm/benchmark/test_model_deployment_definition.py +0 -92
- helm/benchmark/test_model_properties.py +0 -1570
- helm/benchmark/vlm_run_specs.py +0 -97
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/huggingface_window_service.py +0 -60
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/common/cache_utils.py +0 -14
- helm/proxy/clients/aleph_alpha_client.py +0 -95
- helm/proxy/clients/goose_ai_client.py +0 -99
- helm/proxy/clients/microsoft_client.py +0 -180
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/simple_client.py +0 -60
- helm/proxy/clients/test_client.py +0 -49
- helm/proxy/clients/vertexai_client.py +0 -115
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
- helm/proxy/token_counters/test_openai_token_counter.py +0 -81
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
from threading import Lock
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
6
|
+
from transformers.generation import GenerationConfig
|
|
7
|
+
|
|
8
|
+
from helm.common.cache import CacheConfig
|
|
9
|
+
from helm.common.gpu_utils import get_torch_device_name
|
|
10
|
+
from helm.common.hierarchical_logger import hlog, htrack_block
|
|
11
|
+
from helm.common.media_object import TEXT_TYPE
|
|
12
|
+
from helm.common.request import Request, RequestResult, GeneratedOutput, Token
|
|
13
|
+
from helm.common.request import wrap_request_time
|
|
14
|
+
from helm.clients.client import CachingClient, generate_uid_for_multimodal_prompt
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class LoadedQwenModelProcessor:
|
|
19
|
+
"""Loaded model and processor for Qwen."""
|
|
20
|
+
|
|
21
|
+
model: AutoModelForCausalLM
|
|
22
|
+
tokenizer: AutoTokenizer
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_models_lock: Lock = Lock()
|
|
26
|
+
_models: Dict[str, Optional[LoadedQwenModelProcessor]] = {
|
|
27
|
+
"Qwen/Qwen-VL": None,
|
|
28
|
+
"Qwen/Qwen-VL-Chat": None,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class QwenVLMClient(CachingClient):
|
|
33
|
+
"""
|
|
34
|
+
From https://huggingface.co/Qwen/Qwen-VL,
|
|
35
|
+
Qwen-VL (Qwen Large Vision Language Model) is the visual multimodal version of the large model series,
|
|
36
|
+
Qwen (abbr. Tongyi Qianwen), proposed by Alibaba Cloud. Qwen-VL accepts image, text, and bounding box
|
|
37
|
+
as inputs, outputs text and bounding box.
|
|
38
|
+
Alibaba released Qwen-VL and Qwen-VL-Chat, which is a chatbot model based on Qwen-VL.
|
|
39
|
+
|
|
40
|
+
Paper: https://arxiv.org/abs/2308.12966
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
END_OF_TEXT_TOKEN: str = "<|endoftext|>"
|
|
44
|
+
|
|
45
|
+
def __init__(self, cache_config: CacheConfig):
|
|
46
|
+
super().__init__(cache_config=cache_config)
|
|
47
|
+
self._device: str = get_torch_device_name()
|
|
48
|
+
|
|
49
|
+
def _get_model(self, helm_model_name: str) -> LoadedQwenModelProcessor:
|
|
50
|
+
global _models_lock
|
|
51
|
+
global _models
|
|
52
|
+
|
|
53
|
+
model_name: str
|
|
54
|
+
if helm_model_name == "qwen-vl-chat":
|
|
55
|
+
model_name = "Qwen/Qwen-VL-Chat"
|
|
56
|
+
elif helm_model_name == "qwen-vl":
|
|
57
|
+
model_name = "Qwen/Qwen-VL"
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError(f"Unhandled model name: {helm_model_name}")
|
|
60
|
+
|
|
61
|
+
# Ensure that only one thread is loading the model at a time
|
|
62
|
+
with _models_lock:
|
|
63
|
+
loaded_model_processor = _models[model_name]
|
|
64
|
+
if loaded_model_processor is None:
|
|
65
|
+
hlog(f"Loading model {model_name} and caching in memory...")
|
|
66
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
67
|
+
model_name, device_map=self._device, trust_remote_code=True, bf16=True
|
|
68
|
+
).eval()
|
|
69
|
+
if model_name == "Qwen/Qwen-VL-Chat":
|
|
70
|
+
model.generation_config = GenerationConfig.from_pretrained(model_name, trust_remote_code=True)
|
|
71
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
72
|
+
_models[model_name] = LoadedQwenModelProcessor(model, tokenizer)
|
|
73
|
+
loaded_model_processor = _models[model_name]
|
|
74
|
+
|
|
75
|
+
assert loaded_model_processor is not None
|
|
76
|
+
return loaded_model_processor
|
|
77
|
+
|
|
78
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
79
|
+
assert request.multimodal_prompt is not None, "Multimodal prompt is required"
|
|
80
|
+
|
|
81
|
+
loaded_model_processor: LoadedQwenModelProcessor = self._get_model(request.model_engine)
|
|
82
|
+
model = loaded_model_processor.model
|
|
83
|
+
tokenizer = loaded_model_processor.tokenizer
|
|
84
|
+
|
|
85
|
+
generation_args = {
|
|
86
|
+
"max_length": request.max_tokens,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
query: List[Dict[str, str]] = []
|
|
90
|
+
prompt_text: str = ""
|
|
91
|
+
|
|
92
|
+
image_index: int = 1
|
|
93
|
+
for media_object in request.multimodal_prompt.media_objects:
|
|
94
|
+
if media_object.is_type("image") and media_object.location:
|
|
95
|
+
query.append({"image": media_object.location})
|
|
96
|
+
prompt_text += f"Picture {image_index}: <img>{media_object.location}</img>\n"
|
|
97
|
+
image_index += 1
|
|
98
|
+
elif media_object.is_type(TEXT_TYPE):
|
|
99
|
+
if media_object.text is None:
|
|
100
|
+
raise ValueError("MediaObject of text type has missing text field value")
|
|
101
|
+
|
|
102
|
+
query.append({"text": media_object.text})
|
|
103
|
+
prompt_text += media_object.text
|
|
104
|
+
else:
|
|
105
|
+
raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
|
|
106
|
+
|
|
107
|
+
completions: List[GeneratedOutput] = []
|
|
108
|
+
request_time: float = 0
|
|
109
|
+
request_datetime: Optional[int] = None
|
|
110
|
+
all_cached: bool = True
|
|
111
|
+
|
|
112
|
+
with htrack_block(f"Generating for prompt: {prompt_text}"):
|
|
113
|
+
for completion_index in range(request.num_completions):
|
|
114
|
+
try:
|
|
115
|
+
|
|
116
|
+
def do_it() -> Dict[str, Any]:
|
|
117
|
+
if request.model_engine == "qwen-vl-chat":
|
|
118
|
+
completion, _ = model.chat(tokenizer, query=tokenizer.from_list_format(query), history=None)
|
|
119
|
+
else:
|
|
120
|
+
inputs = tokenizer(tokenizer.from_list_format(query), return_tensors="pt")
|
|
121
|
+
inputs = inputs.to(self._device)
|
|
122
|
+
pred = model.generate(**inputs, **generation_args)
|
|
123
|
+
completion = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)
|
|
124
|
+
|
|
125
|
+
tokens: List[str] = tokenizer.tokenize(completion)
|
|
126
|
+
return {"output": (completion, tokens)}
|
|
127
|
+
|
|
128
|
+
# Include the prompt and model name in the cache key
|
|
129
|
+
cache_key = CachingClient.make_cache_key(
|
|
130
|
+
raw_request={
|
|
131
|
+
"completion_index": completion_index,
|
|
132
|
+
"model": request.model,
|
|
133
|
+
"prompt": generate_uid_for_multimodal_prompt(request.multimodal_prompt),
|
|
134
|
+
**generation_args,
|
|
135
|
+
},
|
|
136
|
+
request=request,
|
|
137
|
+
)
|
|
138
|
+
result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
139
|
+
except RuntimeError as model_error:
|
|
140
|
+
return RequestResult(
|
|
141
|
+
success=False, cached=False, error=str(model_error), completions=[], embedding=[]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
text, tokens = result["output"]
|
|
145
|
+
|
|
146
|
+
# Truncate the output text as the original Qwen includes the prompt in the output sequence
|
|
147
|
+
if request.model_engine == "qwen-vl":
|
|
148
|
+
text = text[len(prompt_text) :]
|
|
149
|
+
text = text.replace(self.END_OF_TEXT_TOKEN, "")
|
|
150
|
+
hlog(f"Truncated: {text}")
|
|
151
|
+
|
|
152
|
+
# Tokenize truncated text to get the list of tokens
|
|
153
|
+
completions.append(
|
|
154
|
+
GeneratedOutput(
|
|
155
|
+
text=text, logprob=0, tokens=[Token(text=str(token), logprob=0) for token in tokens]
|
|
156
|
+
)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
request_time += result["request_time"]
|
|
160
|
+
# Use the datetime from the first completion because that's when the request was fired
|
|
161
|
+
request_datetime = request_datetime or result.get("request_datetime")
|
|
162
|
+
all_cached = all_cached and cached
|
|
163
|
+
|
|
164
|
+
return RequestResult(
|
|
165
|
+
success=True,
|
|
166
|
+
cached=all_cached,
|
|
167
|
+
request_time=request_time,
|
|
168
|
+
request_datetime=request_datetime,
|
|
169
|
+
completions=completions,
|
|
170
|
+
embedding=[],
|
|
171
|
+
)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional
|
|
2
|
+
|
|
3
|
+
from helm.common.cache import CacheConfig
|
|
4
|
+
from helm.common.request import Request
|
|
5
|
+
from helm.clients.openai_client import OpenAIClient
|
|
6
|
+
from helm.tokenizers.tokenizer import Tokenizer
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class VLLMClient(OpenAIClient):
|
|
10
|
+
"""Sends request to a vLLM server using the OpenAI-compatible API.
|
|
11
|
+
|
|
12
|
+
See: https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server"""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
tokenizer: Tokenizer,
|
|
17
|
+
tokenizer_name: str,
|
|
18
|
+
cache_config: CacheConfig,
|
|
19
|
+
base_url: Optional[str] = None,
|
|
20
|
+
):
|
|
21
|
+
super().__init__(
|
|
22
|
+
tokenizer=tokenizer,
|
|
23
|
+
tokenizer_name=tokenizer_name,
|
|
24
|
+
cache_config=cache_config,
|
|
25
|
+
api_key="EMPTY",
|
|
26
|
+
org_id=None,
|
|
27
|
+
base_url=base_url,
|
|
28
|
+
)
|
|
29
|
+
self.tokenizer = tokenizer
|
|
30
|
+
self.tokenizer_name = tokenizer_name
|
|
31
|
+
|
|
32
|
+
def _is_chat_model_engine(self, model_engine: str) -> bool:
|
|
33
|
+
# Only support vLLM completion models for now.
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
def _get_model_for_request(self, request: Request) -> str:
|
|
37
|
+
# The `model` parameter for vLLM should be the whole model name including the creator organization,
|
|
38
|
+
# unlike OpenAI which only uses the model engine.
|
|
39
|
+
return request.model
|
|
40
|
+
|
|
41
|
+
def _to_raw_completion_request(self, request: Request) -> Dict[str, Any]:
|
|
42
|
+
raw_request = super()._to_raw_completion_request(request)
|
|
43
|
+
# This avoids the error: best_of must be 1 when using greedy sampling
|
|
44
|
+
if "best_of" in raw_request and raw_request["best_of"] > 1:
|
|
45
|
+
raw_request["best_of"] = 1
|
|
46
|
+
return raw_request
|
helm/common/cache.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import Dict, Callable, Generator, Optional, Tuple
|
|
3
|
+
from typing import Dict, Callable, Generator, Mapping, Optional, Tuple
|
|
4
4
|
import json
|
|
5
5
|
import threading
|
|
6
6
|
|
|
7
7
|
import sqlite3
|
|
8
8
|
|
|
9
9
|
from helm.common.general import hlog, htrack
|
|
10
|
-
from helm.common.key_value_store import KeyValueStore, SqliteKeyValueStore
|
|
10
|
+
from helm.common.key_value_store import BlackHoleKeyValueStore, KeyValueStore, SqliteKeyValueStore
|
|
11
11
|
from helm.proxy.retry import get_retry_decorator
|
|
12
12
|
|
|
13
13
|
try:
|
|
@@ -51,6 +51,16 @@ class SqliteCacheConfig(KeyValueStoreCacheConfig):
|
|
|
51
51
|
return self.path
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class BlackHoleCacheConfig(KeyValueStoreCacheConfig):
|
|
56
|
+
"""Configuration for a cache that does not save any data."""
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def cache_stats_key(self) -> str:
|
|
60
|
+
"""The string key used by CacheStats to identify this cache."""
|
|
61
|
+
return "disabled_cache"
|
|
62
|
+
|
|
63
|
+
|
|
54
64
|
@dataclass(frozen=True)
|
|
55
65
|
class MongoCacheConfig(KeyValueStoreCacheConfig):
|
|
56
66
|
"""Configuration for a cache backed by a MongoDB collection."""
|
|
@@ -113,12 +123,14 @@ def create_key_value_store(config: KeyValueStoreCacheConfig) -> KeyValueStore:
|
|
|
113
123
|
return MongoKeyValueStore(config.uri, config.collection_name)
|
|
114
124
|
elif isinstance(config, SqliteCacheConfig):
|
|
115
125
|
return SqliteKeyValueStore(config.path)
|
|
126
|
+
elif isinstance(config, BlackHoleCacheConfig):
|
|
127
|
+
return BlackHoleKeyValueStore()
|
|
116
128
|
else:
|
|
117
129
|
raise ValueError(f"KeyValueStoreCacheConfig with unknown type: {config}")
|
|
118
130
|
|
|
119
131
|
|
|
120
132
|
@retry
|
|
121
|
-
def write_to_key_value_store(key_value_store: KeyValueStore, key:
|
|
133
|
+
def write_to_key_value_store(key_value_store: KeyValueStore, key: Mapping, response: Dict) -> bool:
|
|
122
134
|
"""
|
|
123
135
|
Write to the key value store with retry. Returns boolean indicating whether the write was successful or not.
|
|
124
136
|
"""
|
|
@@ -188,7 +200,7 @@ class Cache(object):
|
|
|
188
200
|
else:
|
|
189
201
|
raise ValueError(f"CacheConfig with unknown type: {config}")
|
|
190
202
|
|
|
191
|
-
def get(self, request:
|
|
203
|
+
def get(self, request: Mapping, compute: Callable[[], Dict]) -> Tuple[Dict, bool]:
|
|
192
204
|
"""Get the result of `request` (by calling `compute` as needed)."""
|
|
193
205
|
cache_stats.increment_query(self.config.cache_stats_key)
|
|
194
206
|
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from helm.common.cache import CacheConfig, MongoCacheConfig, BlackHoleCacheConfig, SqliteCacheConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CacheBackendConfig(ABC):
|
|
9
|
+
"""Config for a cache backend."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def get_cache_config(self, shard_name: str) -> CacheConfig:
|
|
13
|
+
"""Get a CacheConfig for the given shard."""
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class MongoCacheBackendConfig(CacheBackendConfig):
|
|
19
|
+
"""Config for a MongoDB cache backend."""
|
|
20
|
+
|
|
21
|
+
uri: str
|
|
22
|
+
"""URL for the MongoDB database that contains the collection.
|
|
23
|
+
|
|
24
|
+
Example format: mongodb://[username:password@]host1[:port1]/[dbname]
|
|
25
|
+
For full format, see: https://www.mongodb.com/docs/manual/reference/connection-string/"""
|
|
26
|
+
|
|
27
|
+
def get_cache_config(self, shard_name: str) -> CacheConfig:
|
|
28
|
+
return MongoCacheConfig(uri=self.uri, collection_name=shard_name)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class BlackHoleCacheBackendConfig(CacheBackendConfig):
|
|
33
|
+
"""Config for a cache backend that does not save any data."""
|
|
34
|
+
|
|
35
|
+
def get_cache_config(self, shard_name: str) -> CacheConfig:
|
|
36
|
+
return BlackHoleCacheConfig()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class SqliteCacheBackendConfig(CacheBackendConfig):
|
|
41
|
+
"""Config for a Sqlite cache backend."""
|
|
42
|
+
|
|
43
|
+
path: str
|
|
44
|
+
"""Path for the directory that will contain Sqlite files for caches."""
|
|
45
|
+
|
|
46
|
+
def get_cache_config(self, shard_name: str) -> CacheConfig:
|
|
47
|
+
return SqliteCacheConfig(path=os.path.join(self.path, f"{shard_name}.sqlite"))
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
DEFAULT_CLIP_SCORE_MODEL = "openai/clip-vit-large-patch14"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class CLIPScoreRequest:
|
|
10
|
+
"""
|
|
11
|
+
Computes a CLIPScore for a given caption and image.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Caption to compute CLIPScore for
|
|
15
|
+
caption: str
|
|
16
|
+
|
|
17
|
+
# Location of the image
|
|
18
|
+
image_location: str
|
|
19
|
+
|
|
20
|
+
# Which CLIP model to use
|
|
21
|
+
model: str = DEFAULT_CLIP_SCORE_MODEL
|
|
22
|
+
|
|
23
|
+
# Compute multilingual CLIPScore
|
|
24
|
+
multilingual: bool = False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class CLIPScoreResult:
|
|
29
|
+
"""Result after sending a `CLIPScoreRequest`."""
|
|
30
|
+
|
|
31
|
+
# Whether the request was successful
|
|
32
|
+
success: bool
|
|
33
|
+
|
|
34
|
+
# Whether the request was cached
|
|
35
|
+
cached: bool
|
|
36
|
+
|
|
37
|
+
# The CLIPScore
|
|
38
|
+
score: float = 0.0
|
|
39
|
+
|
|
40
|
+
# If `success` is false, what was the error?
|
|
41
|
+
error: Optional[str] = None
|
|
File without changes
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class FileCache(ABC):
|
|
6
|
+
"""
|
|
7
|
+
Cache to store files.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def store(self, compute: Callable[[], bytes]) -> str:
|
|
12
|
+
"""
|
|
13
|
+
Stores the output of `compute` as a file at a unique location.
|
|
14
|
+
Returns the location of the file.
|
|
15
|
+
"""
|
|
16
|
+
pass
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
from helm.common.general import ensure_directory_exists, generate_unique_id
|
|
5
|
+
from .file_cache import FileCache
|
|
6
|
+
|
|
7
|
+
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from PIL import Image
|
|
11
|
+
except ModuleNotFoundError as e:
|
|
12
|
+
handle_module_not_found_error(e, ["images"])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LocalFileCache(FileCache):
|
|
16
|
+
def __init__(self, base_path: str, file_extension: str):
|
|
17
|
+
ensure_directory_exists(base_path)
|
|
18
|
+
self._location: str = base_path
|
|
19
|
+
self._file_extension: str = file_extension
|
|
20
|
+
|
|
21
|
+
def store(self, compute: Callable[[], bytes]) -> str:
|
|
22
|
+
"""
|
|
23
|
+
Stores the output of `compute` as a file at a unique path.
|
|
24
|
+
Returns the file path.
|
|
25
|
+
"""
|
|
26
|
+
file_path: str = self.generate_unique_new_file_path()
|
|
27
|
+
with open(file_path, "wb") as f:
|
|
28
|
+
f.write(compute())
|
|
29
|
+
|
|
30
|
+
return file_path
|
|
31
|
+
|
|
32
|
+
def generate_unique_new_file_path(self) -> str:
|
|
33
|
+
"""Generate an unique file name at `base_path`"""
|
|
34
|
+
|
|
35
|
+
def generate_one() -> str:
|
|
36
|
+
file_name: str = f"{generate_unique_id()}.{self._file_extension}"
|
|
37
|
+
return os.path.join(self._location, file_name)
|
|
38
|
+
|
|
39
|
+
file_path: str
|
|
40
|
+
while True:
|
|
41
|
+
file_path = generate_one()
|
|
42
|
+
if not os.path.exists(file_path):
|
|
43
|
+
break
|
|
44
|
+
return file_path
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class LocalPILFileCache(LocalFileCache):
|
|
48
|
+
def __init__(self, base_path: str):
|
|
49
|
+
super().__init__(base_path, "png")
|
|
50
|
+
|
|
51
|
+
def store_image(self, compute: Callable[[], Image.Image]) -> str:
|
|
52
|
+
"""
|
|
53
|
+
Stores the output of `compute` as a file at a unique path.
|
|
54
|
+
Returns the file path.
|
|
55
|
+
"""
|
|
56
|
+
file_path: str = self.generate_unique_new_file_path()
|
|
57
|
+
compute().save(file_path)
|
|
58
|
+
return file_path
|
|
59
|
+
|
|
60
|
+
def load_image(self, file_path: str) -> Image.Image:
|
|
61
|
+
return Image.open(file_path).convert("RGB")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
import tempfile
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from .local_file_cache import LocalFileCache
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestLocalFileCache(unittest.TestCase):
|
|
10
|
+
def setup_method(self, _):
|
|
11
|
+
self.path: str = tempfile.mkdtemp()
|
|
12
|
+
|
|
13
|
+
def teardown_method(self, _):
|
|
14
|
+
shutil.rmtree(self.path)
|
|
15
|
+
|
|
16
|
+
def test_get(self):
|
|
17
|
+
cache = LocalFileCache(self.path, file_extension="txt")
|
|
18
|
+
file_path1: str = cache.store(lambda: "hello.".encode())
|
|
19
|
+
|
|
20
|
+
# Verify the contents of the file
|
|
21
|
+
with open(file_path1, "r") as f:
|
|
22
|
+
assert f.read() == "hello."
|
|
23
|
+
|
|
24
|
+
cache.store(lambda: "bye.".encode())
|
|
25
|
+
assert len(os.listdir(self.path)) == 2
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass(frozen=True)
|
|
6
|
+
class FileUploadRequest:
|
|
7
|
+
"""Uploads a file at `path`."""
|
|
8
|
+
|
|
9
|
+
# Path of the file to upload
|
|
10
|
+
path: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class FileUploadResult:
|
|
15
|
+
"""Result after sending a `FileUploadRequest`."""
|
|
16
|
+
|
|
17
|
+
# Whether the request was successful
|
|
18
|
+
success: bool
|
|
19
|
+
|
|
20
|
+
# Whether the request was cached
|
|
21
|
+
cached: bool
|
|
22
|
+
|
|
23
|
+
# URL of the uploaded file
|
|
24
|
+
url: str
|
|
25
|
+
|
|
26
|
+
# If `success` is false, what was the error?
|
|
27
|
+
error: Optional[str] = None
|
helm/common/general.py
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass(frozen=True)
|
|
6
|
+
class ImageGenerationParameters:
|
|
7
|
+
"""
|
|
8
|
+
Parameters for image generation.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
output_image_width: Optional[int] = None
|
|
12
|
+
"""Width of the generated image. The model will generate images with the model's
|
|
13
|
+
default dimensions when unspecified."""
|
|
14
|
+
|
|
15
|
+
output_image_height: Optional[int] = None
|
|
16
|
+
"""Height of the generated image. The model will generate images with the model's
|
|
17
|
+
default dimensions when unspecified."""
|
|
18
|
+
|
|
19
|
+
guidance_scale: Optional[float] = None
|
|
20
|
+
"""A non-negative number determining how much importance is given to the prompt
|
|
21
|
+
when generating images. Higher values will generate images that follow more
|
|
22
|
+
closely to the prompt. Currently only for diffusion models."""
|
|
23
|
+
|
|
24
|
+
diffusion_denoising_steps: Optional[int] = None
|
|
25
|
+
"""The number of denoising steps for diffusion models."""
|
helm/common/images_utils.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import io
|
|
3
|
+
|
|
3
4
|
import requests
|
|
4
5
|
import shutil
|
|
5
|
-
from typing import Optional
|
|
6
|
+
from typing import List, Optional, Tuple
|
|
7
|
+
from urllib.request import urlopen
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
6
10
|
|
|
7
11
|
from .general import is_url
|
|
8
12
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
@@ -25,6 +29,12 @@ def open_image(image_location: str) -> Image.Image:
|
|
|
25
29
|
return image.convert("RGB")
|
|
26
30
|
|
|
27
31
|
|
|
32
|
+
def get_dimensions(image_location: str) -> Tuple[int, int]:
|
|
33
|
+
"""Returns the dimensions of the image."""
|
|
34
|
+
image: Image.Image = open_image(image_location)
|
|
35
|
+
return image.size
|
|
36
|
+
|
|
37
|
+
|
|
28
38
|
def encode_base64(image_location: str, format="JPEG") -> str:
|
|
29
39
|
"""Returns the base64 representation of an image file."""
|
|
30
40
|
image_file = io.BytesIO()
|
|
@@ -33,7 +43,7 @@ def encode_base64(image_location: str, format="JPEG") -> str:
|
|
|
33
43
|
return base64.b64encode(image_file.getvalue()).decode("ascii")
|
|
34
44
|
|
|
35
45
|
|
|
36
|
-
def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None):
|
|
46
|
+
def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None) -> None:
|
|
37
47
|
"""
|
|
38
48
|
Copies the image file from `src` path to `dest` path. If dimensions `width` and `height`
|
|
39
49
|
are specified, resizes the image before copying. `src` can be a URL.
|
|
@@ -41,7 +51,27 @@ def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optiona
|
|
|
41
51
|
if (width is not None and height is not None) or is_url(src):
|
|
42
52
|
image = open_image(src)
|
|
43
53
|
if width is not None and height is not None:
|
|
44
|
-
image = image.resize((width, height), Image.
|
|
54
|
+
image = image.resize((width, height), Image.Resampling.LANCZOS)
|
|
45
55
|
image.save(dest)
|
|
46
56
|
else:
|
|
47
57
|
shutil.copy(src, dest)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def is_blacked_out_image(image_location: str) -> bool:
|
|
61
|
+
"""Returns True if the image is all black. False otherwise."""
|
|
62
|
+
try:
|
|
63
|
+
import cv2
|
|
64
|
+
except ModuleNotFoundError as e:
|
|
65
|
+
handle_module_not_found_error(e, ["heim"])
|
|
66
|
+
|
|
67
|
+
if is_url(image_location):
|
|
68
|
+
arr = np.asarray(bytearray(urlopen(image_location).read()), dtype=np.uint8)
|
|
69
|
+
image = cv2.imdecode(arr, -1)
|
|
70
|
+
else:
|
|
71
|
+
image = cv2.imread(image_location, 0)
|
|
72
|
+
return cv2.countNonZero(image) == 0
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def filter_blacked_out_images(image_locations: List[str]) -> List[str]:
|
|
76
|
+
"""Returns a list of image locations that are not blacked out."""
|
|
77
|
+
return [image_location for image_location in image_locations if not is_blacked_out_image(image_location)]
|
helm/common/key_value_store.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
2
|
import contextlib
|
|
3
3
|
import json
|
|
4
|
-
from typing import Dict, Generator, Iterable, Optional, Tuple
|
|
4
|
+
from typing import Dict, Generator, Iterable, Mapping, Optional, Tuple
|
|
5
5
|
|
|
6
6
|
from sqlitedict import SqliteDict
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def request_to_key(request:
|
|
9
|
+
def request_to_key(request: Mapping) -> str:
|
|
10
10
|
"""Normalize a `request` into a `key` so that we can hash using it."""
|
|
11
11
|
return json.dumps(request, sort_keys=True)
|
|
12
12
|
|
|
@@ -27,7 +27,7 @@ class KeyValueStore(contextlib.AbstractContextManager):
|
|
|
27
27
|
pass
|
|
28
28
|
|
|
29
29
|
@abstractmethod
|
|
30
|
-
def put(self, key:
|
|
30
|
+
def put(self, key: Mapping, value: Dict) -> None:
|
|
31
31
|
pass
|
|
32
32
|
|
|
33
33
|
@abstractmethod
|
|
@@ -68,7 +68,7 @@ class SqliteKeyValueStore(KeyValueStore):
|
|
|
68
68
|
for key, value in self._sqlite_dict.items():
|
|
69
69
|
yield (key, value)
|
|
70
70
|
|
|
71
|
-
def put(self, key:
|
|
71
|
+
def put(self, key: Mapping, value: Dict) -> None:
|
|
72
72
|
key_string = request_to_key(key)
|
|
73
73
|
self._sqlite_dict[key_string] = value
|
|
74
74
|
self._sqlite_dict.commit()
|
|
@@ -80,3 +80,34 @@ class SqliteKeyValueStore(KeyValueStore):
|
|
|
80
80
|
def remove(self, key: Dict) -> None:
|
|
81
81
|
del self._sqlite_dict[key]
|
|
82
82
|
self._sqlite_dict.commit()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class BlackHoleKeyValueStore(KeyValueStore):
|
|
86
|
+
"""Key value store that discards all data."""
|
|
87
|
+
|
|
88
|
+
def __enter__(self) -> "BlackHoleKeyValueStore":
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __exit__(self, exc_type, exc_value, traceback) -> None:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
def contains(self, key: Dict) -> bool:
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
def get(self, key: Dict) -> Optional[Dict]:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
def get_all(self) -> Generator[Tuple[Dict, Dict], None, None]:
|
|
101
|
+
# Return an empty generator.
|
|
102
|
+
# See: https://stackoverflow.com/a/13243870
|
|
103
|
+
return
|
|
104
|
+
yield
|
|
105
|
+
|
|
106
|
+
def put(self, key: Mapping, value: Dict) -> None:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
def multi_put(self, pairs: Iterable[Tuple[Dict, Dict]]) -> None:
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
def remove(self, key: Dict) -> None:
|
|
113
|
+
return None
|