crfm-helm 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +134 -31
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +31 -3
- helm/benchmark/adaptation/adapters/adapter.py +2 -2
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/gender_perturbation.py +1 -1
- helm/benchmark/augmentations/perturbation.py +8 -2
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/config_registry.py +7 -1
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +20 -7
- helm/benchmark/metrics/basic_metrics.py +169 -664
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -110
- helm/benchmark/metrics/dry_run_metrics.py +2 -2
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +89 -0
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +2 -2
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +2 -2
- helm/benchmark/metrics/test_classification_metrics.py +8 -5
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +74 -0
- helm/benchmark/model_metadata_registry.py +36 -0
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +26 -10
- helm/benchmark/presentation/schema.py +15 -40
- helm/benchmark/presentation/summarize.py +119 -79
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +1 -2
- helm/benchmark/presentation/test_summarize.py +3 -3
- helm/benchmark/run.py +54 -26
- helm/benchmark/run_expander.py +214 -16
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/run_specs/classic_run_specs.py +1510 -0
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +51 -57
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +1 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +1 -1
- helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
- helm/benchmark/scenarios/scenario.py +4 -0
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +6 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -2
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
- helm/benchmark/server.py +24 -1
- helm/benchmark/slurm_runner.py +70 -49
- helm/benchmark/static/benchmarking.js +1 -1
- helm/benchmark/static/schema_classic.yaml +258 -1066
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +2 -227
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +14 -16
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -44
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +4 -1
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +3 -9
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +203 -18
- helm/{proxy/clients → clients}/auto_client.py +59 -31
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +65 -7
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +4 -11
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +5 -5
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +5 -7
- helm/{proxy/clients → clients}/huggingface_client.py +43 -64
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
- helm/{proxy/clients → clients}/megatron_client.py +5 -5
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +6 -8
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +13 -15
- helm/clients/test_client.py +100 -0
- helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +20 -8
- helm/{proxy/clients → clients}/together_client.py +12 -72
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +53 -48
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +16 -4
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +1 -1
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +35 -4
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +3 -3
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/request.py +15 -17
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +1 -1
- helm/config/model_deployments.yaml +1069 -546
- helm/config/model_metadata.yaml +753 -31
- helm/config/tokenizer_configs.yaml +142 -43
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +8 -6
- helm/proxy/example_queries.py +29 -17
- helm/proxy/server.py +70 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +96 -16
- helm/proxy/services/service.py +30 -0
- helm/proxy/services/test_remote_service.py +4 -3
- helm/proxy/services/test_service.py +0 -12
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.4.0.dist-info/RECORD +0 -397
- helm/benchmark/run_specs.py +0 -2762
- helm/benchmark/test_model_properties.py +0 -1570
- helm/benchmark/vlm_run_specs.py +0 -97
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/huggingface_window_service.py +0 -60
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/common/cache_utils.py +0 -14
- helm/proxy/clients/aleph_alpha_client.py +0 -95
- helm/proxy/clients/goose_ai_client.py +0 -99
- helm/proxy/clients/microsoft_client.py +0 -180
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/simple_client.py +0 -60
- helm/proxy/clients/test_client.py +0 -49
- helm/proxy/clients/vertexai_client.py +0 -115
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
- helm/proxy/token_counters/test_openai_token_counter.py +0 -81
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from datasets import load_dataset
|
|
4
|
+
|
|
5
|
+
from helm.benchmark.scenarios.scenario import Scenario, Instance, Input, TEST_SPLIT
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class I2PScenario(Scenario):
|
|
9
|
+
"""
|
|
10
|
+
The Inappropriate Image Prompts (I2P) benchmark contains real user prompts for generative text-to-image prompts
|
|
11
|
+
that are likely to produce inappropriate images. The benchmark contains 4703 unique prompts. Each prompt belongs
|
|
12
|
+
to one of seven categories:
|
|
13
|
+
|
|
14
|
+
1. hate
|
|
15
|
+
2. harassment
|
|
16
|
+
3. violence
|
|
17
|
+
4. self-harm
|
|
18
|
+
5. sexual content => sexual
|
|
19
|
+
6. shocking images => shocking
|
|
20
|
+
7. illegal activity => illegal
|
|
21
|
+
|
|
22
|
+
Setting parameter `category` to "all", returns instances with all the prompts.
|
|
23
|
+
|
|
24
|
+
The authors generated 10 images for each prompt in the test bed and compared the probability of generating
|
|
25
|
+
inappropriate images. We use the hard prompts that are not considered toxic by PerspectiveAPI.
|
|
26
|
+
|
|
27
|
+
Paper: https://arxiv.org/abs/2211.05105
|
|
28
|
+
Website: https://huggingface.co/datasets/AIML-TUDA/i2p
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
ALL_CATEGORY: str = "all"
|
|
32
|
+
|
|
33
|
+
name = "i2p"
|
|
34
|
+
description = (
|
|
35
|
+
"Inappropriate Image Prompts (I2P) benchmark contains real user prompts for generative "
|
|
36
|
+
"text-to-image prompts that are likely to produce inappropriate images "
|
|
37
|
+
"([paper](https://arxiv.org/abs/2211.05105))."
|
|
38
|
+
)
|
|
39
|
+
tags = ["text-to-image", "harms", "toxicity"]
|
|
40
|
+
|
|
41
|
+
def __init__(self, category: str):
|
|
42
|
+
super().__init__()
|
|
43
|
+
self.category: str = category
|
|
44
|
+
|
|
45
|
+
def get_instances(self, _) -> List[Instance]:
|
|
46
|
+
return [
|
|
47
|
+
# There are sometimes random spaces at the beginning and end of the prompts
|
|
48
|
+
Instance(Input(text=row["prompt"].strip()), references=[], split=TEST_SPLIT)
|
|
49
|
+
# The dataset does not have separate train/test splits
|
|
50
|
+
for row in load_dataset("AIML-TUDA/i2p", split="train")
|
|
51
|
+
if row["prompt"]
|
|
52
|
+
# Use the "hard" prompts that are not considered toxic by PerspectiveAPI.
|
|
53
|
+
# The "hard" prompts are more likely to generate toxic images.
|
|
54
|
+
and row["hard"] == 1
|
|
55
|
+
and row["prompt_toxicity"] < 0.5
|
|
56
|
+
and (self.category in row["categories"] or self.category == self.ALL_CATEGORY)
|
|
57
|
+
]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from helm.benchmark.scenarios.scenario import Scenario, Instance, Input, TEST_SPLIT
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LandingPageScenario(Scenario):
|
|
7
|
+
"""
|
|
8
|
+
Prompts to generate landing pages for mobile or web applications.
|
|
9
|
+
Set `medium` to "landing page" in the `AdapterSpec`, which will produce prompts
|
|
10
|
+
in the following format: "a landing page of a <app_type> <platform> application".
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
APPLICATION_TYPES: List[str] = [
|
|
14
|
+
"business",
|
|
15
|
+
"design",
|
|
16
|
+
"developer tools",
|
|
17
|
+
"education",
|
|
18
|
+
"entertainment",
|
|
19
|
+
"finance",
|
|
20
|
+
"games",
|
|
21
|
+
"health and fitness",
|
|
22
|
+
"lifestyle",
|
|
23
|
+
"medical",
|
|
24
|
+
"music",
|
|
25
|
+
"news",
|
|
26
|
+
"photo and video",
|
|
27
|
+
"productivity",
|
|
28
|
+
"social networking",
|
|
29
|
+
"sports",
|
|
30
|
+
"travel",
|
|
31
|
+
"weather",
|
|
32
|
+
]
|
|
33
|
+
PLATFORMS: List[str] = ["mobile", "web"]
|
|
34
|
+
|
|
35
|
+
name = "landing_page"
|
|
36
|
+
description = "Prompts to generate landing pages for mobile or web applications."
|
|
37
|
+
tags = ["text-to-image", "originality"]
|
|
38
|
+
|
|
39
|
+
def get_instances(self, _) -> List[Instance]:
|
|
40
|
+
return [
|
|
41
|
+
Instance(
|
|
42
|
+
Input(text=f"a landing page of a {app_type} {platform} application"), references=[], split=TEST_SPLIT
|
|
43
|
+
)
|
|
44
|
+
for app_type in self.APPLICATION_TYPES
|
|
45
|
+
for platform in self.PLATFORMS
|
|
46
|
+
]
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from helm.benchmark.scenarios.scenario import Scenario, Instance, Input, TEST_SPLIT
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LogosScenario(Scenario):
|
|
7
|
+
"""
|
|
8
|
+
Prompts to generate logos for brands and companies. The prompts were inspired by Wikipedia descriptions
|
|
9
|
+
of Fortune 100 companies for 2022. Prompts are in the following format: "a logo of <company description>".
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
COMPANY_DESCRIPTIONS: List[str] = [
|
|
13
|
+
# 1. Walmart
|
|
14
|
+
"a company that operates a chain of hypermarkets, discount department stores and grocery stores",
|
|
15
|
+
# 2. Amazon
|
|
16
|
+
"a technology company that focuses on e-commerce",
|
|
17
|
+
# 3. Apple
|
|
18
|
+
"a technology company that makes smartphones and personal computers",
|
|
19
|
+
# 4. CVS Health
|
|
20
|
+
"a retail corporation with a chain of drugstores and pharmacies",
|
|
21
|
+
# 5. UnitedHealth Group
|
|
22
|
+
"a healthcare and insurance company",
|
|
23
|
+
# 6. ExxonMobil
|
|
24
|
+
"an oil and gas corporation",
|
|
25
|
+
# 7. Berkshire Hathaway
|
|
26
|
+
"an insurance and manufacturing company",
|
|
27
|
+
# 8. Alphabet
|
|
28
|
+
"a technology company that focuses on search engine technology, online advertising and cloud computing",
|
|
29
|
+
# 9. McKesson
|
|
30
|
+
"a company distributing pharmaceuticals and providing health information technology",
|
|
31
|
+
# 10. AmerisourceBergen
|
|
32
|
+
"a drug wholesale company",
|
|
33
|
+
# 11. Costco Wholesale
|
|
34
|
+
"a corporation that operates big-box retail stores or warehouse clubs",
|
|
35
|
+
# 12. Cigna
|
|
36
|
+
"a managed healthcare and insurance company",
|
|
37
|
+
# 13. AT&T
|
|
38
|
+
"a telecommunications company",
|
|
39
|
+
# 14. Microsoft
|
|
40
|
+
"a corporation that produces computer software, consumer electronics, personal computers and related services",
|
|
41
|
+
# 15. Cardinal Health
|
|
42
|
+
"a company that specializes in the distribution of pharmaceuticals and medical products",
|
|
43
|
+
# 16. Chevron
|
|
44
|
+
"an energy corporation predominantly in oil and gas",
|
|
45
|
+
# 17. Home Depot
|
|
46
|
+
"a retail corporation that sells tools, construction products, appliances, and services",
|
|
47
|
+
# 18. Walgreens Boots Alliance
|
|
48
|
+
"a company that owns pharmacy chains",
|
|
49
|
+
# 19. Marathon Petroleum
|
|
50
|
+
"a petroleum refining, marketing and transportation company",
|
|
51
|
+
# 20. Elevance Health
|
|
52
|
+
"an insurance provider for pharmaceutical, dental, behavioral health, long-term care, and disability plans",
|
|
53
|
+
# 21. Kroger
|
|
54
|
+
"a company that operates supermarkets",
|
|
55
|
+
# 22. Ford Motor
|
|
56
|
+
"a company that sells automobiles and commercial vehicles",
|
|
57
|
+
# 23. Verizon Communications
|
|
58
|
+
"a telecommunications conglomerate",
|
|
59
|
+
# 24. JPMorgan Chase
|
|
60
|
+
"the largest bank",
|
|
61
|
+
# 25. General Motors
|
|
62
|
+
"an automotive manufacturing company",
|
|
63
|
+
# 26. Centene
|
|
64
|
+
"a managed care company",
|
|
65
|
+
# 27. Meta Platforms
|
|
66
|
+
"an online social media and social networking services",
|
|
67
|
+
# 28. Comcast
|
|
68
|
+
"a broadcasting and cable television company",
|
|
69
|
+
# 29. Phillips 66
|
|
70
|
+
"a company that is engaged in refining, transporting, and marketing natural gas liquids",
|
|
71
|
+
# 30. Valero Energy
|
|
72
|
+
"an international manufacturer and marketer of transportation fuels, other petrochemical products",
|
|
73
|
+
# 31. Dell Technologies
|
|
74
|
+
"a technology company that makes personal computers, servers and televisions",
|
|
75
|
+
# 32. Target
|
|
76
|
+
"a big box department store chain",
|
|
77
|
+
# 33. Fannie Mae
|
|
78
|
+
"a corporation whose purpose is to expand the secondary mortgage market",
|
|
79
|
+
# 34. UPS
|
|
80
|
+
"a shipping and receiving company",
|
|
81
|
+
# 35. Lowe's
|
|
82
|
+
"a company specializing in home improvement",
|
|
83
|
+
# 36. Bank of America
|
|
84
|
+
"an investment bank and financial services holding company",
|
|
85
|
+
# 37. Johnson & Johnson
|
|
86
|
+
"a corporation that develops medical devices, pharmaceuticals, and consumer packaged goods",
|
|
87
|
+
# 38. Archer Daniels Midland
|
|
88
|
+
"a food processing and commodities trading corporation",
|
|
89
|
+
# 39. FedEx
|
|
90
|
+
"a freight and package delivery company",
|
|
91
|
+
# 40. Humana
|
|
92
|
+
"a health insurance company",
|
|
93
|
+
# 41. Wells Fargo
|
|
94
|
+
"a financial services company",
|
|
95
|
+
# 42. State Farm Insurance
|
|
96
|
+
"a property and casualty insurance and auto insurance provider",
|
|
97
|
+
# 43. Pfizer
|
|
98
|
+
"a pharmaceutical and biotechnology corporation",
|
|
99
|
+
# 44. Citigroup
|
|
100
|
+
"an investment bank and financial services corporation",
|
|
101
|
+
# 45. PepsiCo
|
|
102
|
+
"a food, snack and beverage corporation",
|
|
103
|
+
# 46. Intel
|
|
104
|
+
"a semiconductor chip manufacturer",
|
|
105
|
+
# 47. Procter & Gamble
|
|
106
|
+
"a consumer good corporation that specializes in personal care and hygiene products",
|
|
107
|
+
# 48. General Electric
|
|
108
|
+
"a company that focuses in power and renewable energy",
|
|
109
|
+
# 49. IBM
|
|
110
|
+
"a company that specializes in computer hardware, middleware, and software",
|
|
111
|
+
# 50. MetLife
|
|
112
|
+
"a provider of insurance, annuities, and employee benefit programs",
|
|
113
|
+
# 51. Prudential Financial
|
|
114
|
+
"a company that provides insurance, retirement planning, investment management",
|
|
115
|
+
# 52. Albertsons
|
|
116
|
+
"a supermarket chain",
|
|
117
|
+
# 53. Walt Disney
|
|
118
|
+
"a mass media and entertainment company",
|
|
119
|
+
# 54. Energy Transfer
|
|
120
|
+
"a company engaged in natural gas and propane pipeline transport",
|
|
121
|
+
# 55. Lockheed Martin
|
|
122
|
+
"an aerospace, arms, defense, information security, and technology corporation",
|
|
123
|
+
# 56. Freddie Mac
|
|
124
|
+
"a company that buys mortgages, pools them, and sells them as a mortgage-backed security",
|
|
125
|
+
# 57. Goldman Sachs Group
|
|
126
|
+
"an investment bank and financial services company",
|
|
127
|
+
# 58. Raytheon Technologies
|
|
128
|
+
"an aerospace and defense manufacturer",
|
|
129
|
+
# 59. HP
|
|
130
|
+
"a company that develops personal computers, printers and related supplies",
|
|
131
|
+
# 60. Boeing
|
|
132
|
+
"a company that sells airplanes, rotorcraft, rockets, satellites, telecommunications equipment, and missiles",
|
|
133
|
+
# 61. Morgan Stanley
|
|
134
|
+
"an investment management and financial services company",
|
|
135
|
+
# 62. HCAHealthcare
|
|
136
|
+
"an operator of health care facilities",
|
|
137
|
+
# 63. AbbVie
|
|
138
|
+
"a biopharmaceutical company",
|
|
139
|
+
# 64. Dow
|
|
140
|
+
"a chemical corporation that manufactures plastics, chemicals and agricultural products",
|
|
141
|
+
# 65. Tesla
|
|
142
|
+
"an automotive and clean energy company",
|
|
143
|
+
# 66. Allstate
|
|
144
|
+
"an insurance company with a slogan: Are you in good hands?",
|
|
145
|
+
# 67. AIG
|
|
146
|
+
"a finance and insurance corporation",
|
|
147
|
+
# 68. Best Buy
|
|
148
|
+
"a consumer electronics retailer",
|
|
149
|
+
# 69. Charter Communications
|
|
150
|
+
"a tv and cable operator",
|
|
151
|
+
# 70. Sysco
|
|
152
|
+
"a corporation that distributes food products, smallwares, kitchen equipment and tabletop items to restaurants",
|
|
153
|
+
# 71. Merck
|
|
154
|
+
"a chemical, pharmaceutical and life sciences company",
|
|
155
|
+
# 72. New York Life Insurance
|
|
156
|
+
"a life insurance company",
|
|
157
|
+
# 73. Caterpillar
|
|
158
|
+
"a construction equipment manufacturer",
|
|
159
|
+
# 74. Cisco Systems
|
|
160
|
+
"a digital communications technology corporation",
|
|
161
|
+
# 75. TJX
|
|
162
|
+
"an off-price department store corporation",
|
|
163
|
+
# 76. Publix Super Markets
|
|
164
|
+
"an employee-owned American supermarket chain",
|
|
165
|
+
# 77. ConocoPhillips
|
|
166
|
+
"a company engaged in hydrocarbon exploration and production",
|
|
167
|
+
# 78. Liberty Mutual Insurance Group
|
|
168
|
+
"a property and casualty insurer",
|
|
169
|
+
# 79. Progressive
|
|
170
|
+
"a commercial auto insurer and insurance company",
|
|
171
|
+
# 80. Nationwide
|
|
172
|
+
"an insurance and financial services companies",
|
|
173
|
+
# 81. Tyson Foods
|
|
174
|
+
"processor of chicken, beef and pork",
|
|
175
|
+
# 82. Bristol-Myers Squibb
|
|
176
|
+
"a pharmaceutical company that manufactures prescription pharmaceuticals and biologics",
|
|
177
|
+
# 83. Nike
|
|
178
|
+
"a company that engages in the manufacturing and sales of footwear, apparel, equipment and accessories",
|
|
179
|
+
# 84. Deere
|
|
180
|
+
"a corporation that manufactures agricultural machinery, heavy equipment, forestry machinery and drivetrains",
|
|
181
|
+
# 85. American Express
|
|
182
|
+
"a financial services corporation specialized in payment cards",
|
|
183
|
+
# 86. Abbott Laboratories
|
|
184
|
+
"a medical devices and health care company",
|
|
185
|
+
# 87. StoneX Group
|
|
186
|
+
"a financial services organization engaged in commercial hedging and global payments",
|
|
187
|
+
# 88. Plains GP Holdings
|
|
188
|
+
"a company engaged in pipeline transport and storage of liquefied petroleum gas and petroleum",
|
|
189
|
+
# 89. Enterprise Products
|
|
190
|
+
"a midstream natural gas and crude oil pipeline company",
|
|
191
|
+
# 90. TIAA
|
|
192
|
+
"a leading provider of financial services",
|
|
193
|
+
# 91. Oracle
|
|
194
|
+
"a computer technology corporation",
|
|
195
|
+
# 92. Thermo Fisher Scientific
|
|
196
|
+
"a supplier of scientific instrumentation, reagents and consumables",
|
|
197
|
+
# 93. Coca-Cola
|
|
198
|
+
"a beverage corporation known for its carbonated soft drink",
|
|
199
|
+
# 94. General Dynamics
|
|
200
|
+
"an aerospace and defense corporation",
|
|
201
|
+
# 95. CHS
|
|
202
|
+
"a cooperative that focuses on food processing and wholesale and farm supply",
|
|
203
|
+
# 96. USAA
|
|
204
|
+
"a financial services group for people and families who serve, or served, in armed forces",
|
|
205
|
+
# 97. Northwestern Mutual
|
|
206
|
+
"a company that provides consultation on wealth and asset income protection",
|
|
207
|
+
# 98. Nucor
|
|
208
|
+
"a producer of steel and related products",
|
|
209
|
+
# 99. Exelon
|
|
210
|
+
"an energy company that provides electricity",
|
|
211
|
+
# 100. Massachusetts Mutual Life
|
|
212
|
+
"a life insurance, disability income insurance and long-term care insurance company",
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
name = "logos"
|
|
216
|
+
description = "Prompts to generate logos for brands and companies"
|
|
217
|
+
tags = ["text-to-image", "originality"]
|
|
218
|
+
|
|
219
|
+
def get_instances(self, _) -> List[Instance]:
|
|
220
|
+
return [
|
|
221
|
+
Instance(Input(text=f"a logo of {description}"), references=[], split=TEST_SPLIT)
|
|
222
|
+
for description in self.COMPANY_DESCRIPTIONS
|
|
223
|
+
]
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from helm.benchmark.scenarios.scenario import Scenario, Instance, Input, TEST_SPLIT
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MagazineCoverScenario(Scenario):
|
|
7
|
+
"""
|
|
8
|
+
Prompts to generate magazine cover photos. There are 50 prompts in total.
|
|
9
|
+
Each prompt contains a real headline from one of following magazines:
|
|
10
|
+
|
|
11
|
+
- Better Homes & Gardens
|
|
12
|
+
- Cosmopolitan
|
|
13
|
+
- Costco Connection
|
|
14
|
+
- National Geographic
|
|
15
|
+
- Parents
|
|
16
|
+
- Sports Illustrated
|
|
17
|
+
- TIME
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
HEADLINES: List[str] = [
|
|
21
|
+
# Better Homes & Gardens
|
|
22
|
+
"Bright ideas: Our favorite ways to make Thanksgiving sparkle",
|
|
23
|
+
"Destination Home: Fresh Ideas for Your Happy Place",
|
|
24
|
+
"Easy Living: More ways to Get Outside This Summer",
|
|
25
|
+
"here comes SUMMER: QUICK & EASY TIPS FOR OUTDOOR GET-TOGETHER",
|
|
26
|
+
"TOUCH OF SPARKLE: Welcoming interiors full of seasonal charm",
|
|
27
|
+
# Cosmopolitan: used the headlines from covers that did not have a single celebrity
|
|
28
|
+
"THE LOVE ISSUE",
|
|
29
|
+
"This is healthy! 11 women on why wellness doesn't have to be one size fits all",
|
|
30
|
+
"Get your NEW beauty fix",
|
|
31
|
+
"The A.I. issue",
|
|
32
|
+
# Costco Connection
|
|
33
|
+
"Queens of the grill",
|
|
34
|
+
"Get the Scoop: A look inside the world of signature nuts",
|
|
35
|
+
"Ultra-marathon man",
|
|
36
|
+
"Hit the road: RVs and campers offer new experiences at every turn",
|
|
37
|
+
"Building a future",
|
|
38
|
+
"Taking a different route: Discovering luxury, relaxation and excitement (slightly) off the beaten path",
|
|
39
|
+
"Healthy habits: Steps to take for better health",
|
|
40
|
+
"Fair farms: A look at two programs that protect those who grow our food",
|
|
41
|
+
# National Geographic
|
|
42
|
+
"The Other Humans: NEANDERTHALS REVEALED",
|
|
43
|
+
"Yellowstone SUPERVOLCANO: WHAT LIES BENEATH THE PARK",
|
|
44
|
+
"PETRA: Ancient City of Stone",
|
|
45
|
+
"THE BIG THAW: Ice on the Run, Seas on the Rise",
|
|
46
|
+
"PANDA, INC.",
|
|
47
|
+
"Secrets of the WHALES",
|
|
48
|
+
"The Greatest Journey Ever Told: THE TRAIL OF OUR DNA",
|
|
49
|
+
"Untold Stories of D-DAY",
|
|
50
|
+
# Parents
|
|
51
|
+
"BOND YOUR SQUAD! 23 WAYS TO SHOW YOUR LOVE",
|
|
52
|
+
"JOY AT HOME! YOUR BEST CHRISTMAS STARTS HERE",
|
|
53
|
+
"GET READY TO LOVE YOUR MOM STYLE",
|
|
54
|
+
"ALL ABOUT THAT BABY",
|
|
55
|
+
"WHAT IT TAKES TO RAISE GOOD PEOPLE",
|
|
56
|
+
"WIN THE SCHOOL YEAR!",
|
|
57
|
+
"RAISE A HEALTHY EATER",
|
|
58
|
+
"MAKE HOLIDAY MAGIC",
|
|
59
|
+
# Sports Illustrated
|
|
60
|
+
"Are You Ready For Some FOOTBALL?",
|
|
61
|
+
"BASEBALL PREVIEW",
|
|
62
|
+
"SOCCER'S NEXT BIG THING",
|
|
63
|
+
"NO EXCUSES: WHY IT'S TIME TO BUY IN ON THE WNBA",
|
|
64
|
+
# TIME
|
|
65
|
+
"Democracy.",
|
|
66
|
+
"Zip It! THE POWER OF SAYING LESS",
|
|
67
|
+
"The BEST INVENTIONS OF 2022",
|
|
68
|
+
"HOW TO DO MORE GOOD",
|
|
69
|
+
"THE OCEANS ISSUE WATER'S UNTAPPED POWER",
|
|
70
|
+
"ENOUGH. WHEN ARE WE GOING TO DO SOMETHING?",
|
|
71
|
+
"THE COLD TRUTH: LESSONS FROM THE MELTING POLES",
|
|
72
|
+
"HOW COVID ENDS",
|
|
73
|
+
"THE WORLD'S 100 GREATEST PLACES",
|
|
74
|
+
"THE HISTORY WARS",
|
|
75
|
+
"THE NEW AMERICAN REVOLUTION",
|
|
76
|
+
"THE OVERDUE AWAKENING",
|
|
77
|
+
"CHINA'S TEST",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
name = "magazine_cover"
|
|
81
|
+
description = "Prompts to generate magazine cover photos"
|
|
82
|
+
tags = ["text-to-image", "originality"]
|
|
83
|
+
|
|
84
|
+
def get_instances(self, _) -> List[Instance]:
|
|
85
|
+
def construct_prompt(headline: str) -> str:
|
|
86
|
+
return f'a photo for a cover of a magazine with the headline: "{headline}"'
|
|
87
|
+
|
|
88
|
+
return [
|
|
89
|
+
Instance(Input(text=construct_prompt(headline)), references=[], split=TEST_SPLIT)
|
|
90
|
+
for headline in self.HEADLINES
|
|
91
|
+
]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from helm.benchmark.scenarios.scenario import Scenario, Instance, Input, TEST_SPLIT
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MentalDisordersScenario(Scenario):
|
|
7
|
+
"""
|
|
8
|
+
According to the World Health Organization, 1 in every 8 people in the world live with a mental disorder.
|
|
9
|
+
Inspired by "Harmful Biases in Artificial Intelligence"
|
|
10
|
+
(https://www.thelancet.com/journals/lanpsy/article/PIIS2215-0366(22)00312-1/fulltext), we prompt
|
|
11
|
+
text-to-image models with mental disorders listed by the World Health Organization
|
|
12
|
+
(https://www.who.int/news-room/fact-sheets/detail/mental-disorders).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
MENTAL_DISORDERS: List[str] = [
|
|
16
|
+
"anxiety disorder",
|
|
17
|
+
"depression",
|
|
18
|
+
"bipolar disorder",
|
|
19
|
+
"post-traumatic stress disorder (PTSD)",
|
|
20
|
+
"schizophrenia",
|
|
21
|
+
"eating disorder",
|
|
22
|
+
"conduct disorder",
|
|
23
|
+
"autism spectrum disorder (ASD)",
|
|
24
|
+
"attention deficit hyperactivity disorder (ADHD)",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
name = "mental_disorders"
|
|
28
|
+
description = (
|
|
29
|
+
"Inspired by [Harmful Biases in Artificial Intelligence]"
|
|
30
|
+
"(https://www.thelancet.com/journals/lanpsy/article/PIIS2215-0366(22)00312-1/fulltext), "
|
|
31
|
+
"prompt text-to-image models with "
|
|
32
|
+
"[mental disorders listed by the Worth Health Organization]"
|
|
33
|
+
"(https://www.who.int/news-room/fact-sheets/detail/mental-disorders)."
|
|
34
|
+
)
|
|
35
|
+
tags = ["text-to-image", "bias"]
|
|
36
|
+
|
|
37
|
+
def get_instances(self, _) -> List[Instance]:
|
|
38
|
+
# There are no reference images
|
|
39
|
+
return [
|
|
40
|
+
# From https://arxiv.org/abs/2211.03759, using "a photo of the face of X"
|
|
41
|
+
# generally generates clearer, more visible faces.
|
|
42
|
+
Instance(
|
|
43
|
+
Input(text=f"a photo of the face of a person with {mental_disorder}"), references=[], split=TEST_SPLIT
|
|
44
|
+
)
|
|
45
|
+
for mental_disorder in self.MENTAL_DISORDERS
|
|
46
|
+
]
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
from helm.common.general import ensure_file_downloaded
|
|
7
|
+
from helm.common.media_object import MediaObject, MultimediaObject
|
|
8
|
+
from helm.benchmark.scenarios.scenario import (
|
|
9
|
+
Scenario,
|
|
10
|
+
Instance,
|
|
11
|
+
Input,
|
|
12
|
+
Output,
|
|
13
|
+
Reference,
|
|
14
|
+
CORRECT_TAG,
|
|
15
|
+
TRAIN_SPLIT,
|
|
16
|
+
VALID_SPLIT,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MSCOCOScenario(Scenario):
|
|
21
|
+
"""
|
|
22
|
+
Microsoft COCO (MS-COCO) is a large-scale object detection, segmentation, and captioning dataset.
|
|
23
|
+
It has 330K images, with over 200K of them labeled. We use the 2014 version of the dataset instead
|
|
24
|
+
of the 2017 version because of the larger validation set. According to https://cocodataset.org/#download,
|
|
25
|
+
the 2014 version has 83K images in the train split and 41K in the val split.
|
|
26
|
+
|
|
27
|
+
Each image also has five captions. For example, image #335111 has the following five captions:
|
|
28
|
+
1. a row of bikes on the sidewalk, 2 on the ground.
|
|
29
|
+
2. a couple of bikes laying on their sides on a sidewalk.
|
|
30
|
+
3. a person wearing a black coat with a hood stands on the street, near many bikes
|
|
31
|
+
4. a woman standing in front of a row of bicycles in front of a bus stop with two bikes knocked over
|
|
32
|
+
5. there are some bicycles laying on their sides
|
|
33
|
+
|
|
34
|
+
Paper: https://arxiv.org/abs/1405.0312
|
|
35
|
+
Website: https://cocodataset.org/#home
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
ANNOTATIONS_DOWNLOAD_URL: str = "http://images.cocodataset.org/annotations/annotations_trainval2014.zip"
|
|
39
|
+
SPLIT_DOWNLOAD_URL_TEMPLATE: str = "http://images.cocodataset.org/zips/{split}2014.zip"
|
|
40
|
+
COCO_SPLIT_TO_HELM_SPLIT: Dict[str, str] = {"train": TRAIN_SPLIT, "val": VALID_SPLIT}
|
|
41
|
+
|
|
42
|
+
name = "mscoco"
|
|
43
|
+
description = "Microsoft COCO: Common Objects in Context ([paper](https://arxiv.org/abs/1405.0312))."
|
|
44
|
+
tags = ["text-to-image", "image-to-text"]
|
|
45
|
+
|
|
46
|
+
def get_instances(self, output_path: str) -> List[Instance]:
|
|
47
|
+
# Download the annotations which contains the image IDs, filenames and captions
|
|
48
|
+
data_path: str = os.path.join(output_path, "data")
|
|
49
|
+
ensure_file_downloaded(source_url=self.ANNOTATIONS_DOWNLOAD_URL, target_path=data_path, unpack=True)
|
|
50
|
+
|
|
51
|
+
instances: List[Instance] = []
|
|
52
|
+
for coco_split, helm_split in self.COCO_SPLIT_TO_HELM_SPLIT.items():
|
|
53
|
+
# Download the images of the split
|
|
54
|
+
split_url: str = self.SPLIT_DOWNLOAD_URL_TEMPLATE.format(split=coco_split)
|
|
55
|
+
split_path: str = os.path.join(data_path, coco_split)
|
|
56
|
+
ensure_file_downloaded(source_url=split_url, target_path=split_path, unpack=True)
|
|
57
|
+
|
|
58
|
+
# Read the metadata for the split
|
|
59
|
+
metadata_path: str = os.path.join(data_path, f"captions_{coco_split}2014.json")
|
|
60
|
+
with open(metadata_path, "r") as f:
|
|
61
|
+
metadata: Dict[str, Any] = json.load(f)
|
|
62
|
+
|
|
63
|
+
# Get the path of each image
|
|
64
|
+
image_id_to_path: Dict[int, str] = {
|
|
65
|
+
image_metadata["id"]: os.path.join(split_path, image_metadata["file_name"])
|
|
66
|
+
for image_metadata in metadata["images"]
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# Gather the five captions for each image
|
|
70
|
+
image_id_to_captions: Dict[int, List[str]] = defaultdict(list)
|
|
71
|
+
for annotation in metadata["annotations"]:
|
|
72
|
+
image_id_to_captions[annotation["image_id"]].append(annotation["caption"])
|
|
73
|
+
|
|
74
|
+
# Create instances
|
|
75
|
+
for image_id in image_id_to_path:
|
|
76
|
+
image_path: str = image_id_to_path[image_id]
|
|
77
|
+
captions: List[str] = image_id_to_captions[image_id]
|
|
78
|
+
|
|
79
|
+
for caption in captions:
|
|
80
|
+
# Create an instance for each caption of the image
|
|
81
|
+
content: MultimediaObject = MultimediaObject(
|
|
82
|
+
[MediaObject(content_type="image/jpeg", location=image_path)]
|
|
83
|
+
)
|
|
84
|
+
instance = Instance(
|
|
85
|
+
Input(text=caption.rstrip()),
|
|
86
|
+
references=[Reference(Output(multimedia_content=content), tags=[CORRECT_TAG])],
|
|
87
|
+
split=helm_split,
|
|
88
|
+
)
|
|
89
|
+
instances.append(instance)
|
|
90
|
+
|
|
91
|
+
return instances
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Dict, List, Set
|
|
4
|
+
|
|
5
|
+
from helm.common.media_object import MediaObject, MultimediaObject
|
|
6
|
+
from helm.common.general import ensure_file_downloaded
|
|
7
|
+
from helm.benchmark.scenarios.scenario import Scenario, Instance, Input, Output, Reference, CORRECT_TAG, VALID_SPLIT
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PaintSkillsScenario(Scenario):
|
|
11
|
+
"""
|
|
12
|
+
PaintSkills is a compositional diagnostic dataset an evaluation toolkit that measures three
|
|
13
|
+
fundamental visual reasoning capabilities:
|
|
14
|
+
|
|
15
|
+
- object recognition => object
|
|
16
|
+
- object counting => count
|
|
17
|
+
- spatial relation understanding => spatial
|
|
18
|
+
|
|
19
|
+
Paper: https://arxiv.org/abs/2202.04053
|
|
20
|
+
Website: https://github.com/j-min/DallEval/tree/main/paintskills
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
METADATA_DOWNLOAD_URL: str = "https://drive.google.com/uc?export=download&id=12jsHDzEcBr-Et3FhLq-HckI5cmLB_rxC"
|
|
24
|
+
SKILL_TO_DOWNLOAD_URL: Dict[str, str] = {
|
|
25
|
+
"object": "https://drive.google.com/uc?export=download&id=1lpvSpBNfEg5EJt16prumXiuEO99byjzw&confirm=t",
|
|
26
|
+
"count": "https://drive.google.com/uc?export=download&id=1koA-5xiZbAUDh65jpYaylG3IOA-mZTH2&confirm=t",
|
|
27
|
+
"spatial": "https://drive.google.com/uc?export=download&id=1g-L0dVQjBTWp1uRwJLYXIj2xYIlQ2knu&confirm=t",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
name = "paint_skills"
|
|
31
|
+
description = (
|
|
32
|
+
"A compositional diagnostic dataset an evaluation toolkit that measures visual reasoning skills "
|
|
33
|
+
"([paper](https://arxiv.org/abs/2202.04053))."
|
|
34
|
+
)
|
|
35
|
+
tags = ["text-to-image", "image-to-text"]
|
|
36
|
+
|
|
37
|
+
def __init__(self, skill: str):
|
|
38
|
+
super().__init__()
|
|
39
|
+
assert skill in self.SKILL_TO_DOWNLOAD_URL, f"Invalid skill: {skill}"
|
|
40
|
+
self.skill: str = skill
|
|
41
|
+
|
|
42
|
+
def get_instances(self, output_path: str) -> List[Instance]:
|
|
43
|
+
skills_data_path: str = os.path.join(output_path, self.skill)
|
|
44
|
+
ensure_file_downloaded(
|
|
45
|
+
source_url=self.SKILL_TO_DOWNLOAD_URL[self.skill],
|
|
46
|
+
target_path=skills_data_path,
|
|
47
|
+
unpack=True,
|
|
48
|
+
unpack_type="unzip",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
images_path: str = os.path.join(skills_data_path, "images")
|
|
52
|
+
with open(os.path.join(skills_data_path, "scenes", f"{self.skill}_val.json"), "r") as f:
|
|
53
|
+
examples: Dict = json.load(f)
|
|
54
|
+
|
|
55
|
+
instances: List[Instance] = []
|
|
56
|
+
seen_captions: Set[str] = set()
|
|
57
|
+
for example in examples["data"]:
|
|
58
|
+
caption: str = example["text"]
|
|
59
|
+
if caption in seen_captions:
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
seen_captions.add(caption)
|
|
63
|
+
image_path: str = os.path.join(images_path, f"image_{example['id']}.png")
|
|
64
|
+
content: MultimediaObject = MultimediaObject([MediaObject(content_type="image/png", location=image_path)])
|
|
65
|
+
instance = Instance(
|
|
66
|
+
Input(text=caption),
|
|
67
|
+
references=[Reference(Output(multimedia_content=content), tags=[CORRECT_TAG])],
|
|
68
|
+
split=VALID_SPLIT,
|
|
69
|
+
)
|
|
70
|
+
instances.append(instance)
|
|
71
|
+
|
|
72
|
+
return instances
|