crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +37 -2
- helm/benchmark/adaptation/adapters/adapter.py +4 -42
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/prompt.py +7 -1
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/cleva_perturbation.py +7 -14
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
- helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/dialect_perturbation.py +2 -2
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +3 -3
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +0 -7
- helm/benchmark/augmentations/perturbation.py +20 -7
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/synonym_perturbation.py +2 -2
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/config_registry.py +38 -0
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +37 -7
- helm/benchmark/metrics/basic_metrics.py +172 -641
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics.py +4 -3
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +6 -112
- helm/benchmark/metrics/dry_run_metrics.py +5 -3
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +5 -5
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +6 -7
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +8 -8
- helm/benchmark/metrics/test_classification_metrics.py +9 -6
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +164 -41
- helm/benchmark/model_metadata_registry.py +181 -35
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +50 -17
- helm/benchmark/presentation/schema.py +28 -46
- helm/benchmark/presentation/summarize.py +213 -96
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +14 -9
- helm/benchmark/presentation/test_summarize.py +5 -0
- helm/benchmark/run.py +66 -54
- helm/benchmark/run_expander.py +342 -31
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +116 -69
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/cleva_scenario.py +43 -46
- helm/benchmark/scenarios/code_scenario.py +3 -2
- helm/benchmark/scenarios/commonsense_scenario.py +171 -191
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +123 -0
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +3 -3
- helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
- helm/benchmark/scenarios/raft_scenario.py +2 -6
- helm/benchmark/scenarios/scenario.py +14 -2
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +22 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/the_pile_scenario.py +6 -7
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
- helm/benchmark/server.py +59 -2
- helm/benchmark/slurm_jobs.py +12 -0
- helm/benchmark/slurm_runner.py +79 -51
- helm/benchmark/static/benchmarking.js +3 -4
- helm/benchmark/static/contamination.yaml +1 -1
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +824 -0
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +90 -0
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/tokenizer_config_registry.py +10 -14
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -35
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/no_decoding_window_service.py +32 -0
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +24 -269
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +5 -12
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +213 -24
- helm/clients/auto_client.py +215 -0
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +67 -55
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +6 -17
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +7 -8
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +6 -10
- helm/{proxy/clients → clients}/huggingface_client.py +134 -92
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
- helm/{proxy/clients → clients}/megatron_client.py +13 -7
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +15 -12
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +15 -15
- helm/clients/test_client.py +100 -0
- helm/clients/test_huggingface_client.py +70 -0
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +23 -12
- helm/{proxy/clients → clients}/together_client.py +18 -71
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +24 -179
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/concurrency.py +32 -0
- helm/common/credentials_utils.py +28 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +29 -10
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +113 -0
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +88 -0
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/object_spec.py +2 -2
- helm/common/request.py +36 -27
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +6 -3
- helm/config/__init__.py +0 -0
- helm/config/model_deployments.yaml +1942 -0
- helm/config/model_metadata.yaml +2201 -0
- helm/config/tokenizer_configs.yaml +362 -0
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +13 -5
- helm/proxy/example_queries.py +29 -17
- helm/proxy/retry.py +8 -2
- helm/proxy/server.py +77 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +103 -20
- helm/proxy/services/service.py +34 -2
- helm/proxy/services/test_remote_service.py +7 -6
- helm/proxy/services/test_service.py +27 -18
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/py.typed +0 -0
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
- helm/tokenizers/auto_tokenizer.py +93 -0
- helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/tokenizers/test_anthropic_tokenizer.py +82 -0
- helm/tokenizers/test_huggingface_tokenizer.py +136 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/tokenizers/vertexai_tokenizer.py +97 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.3.0.dist-info/RECORD +0 -396
- helm/benchmark/vlm_run_specs.py +0 -71
- helm/benchmark/window_services/anthropic_window_service.py +0 -68
- helm/benchmark/window_services/bloom_window_service.py +0 -35
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/gptj_window_service.py +0 -38
- helm/benchmark/window_services/gptneox_window_service.py +0 -41
- helm/benchmark/window_services/http_model_window_service.py +0 -28
- helm/benchmark/window_services/huggingface_window_service.py +0 -59
- helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
- helm/benchmark/window_services/llama_window_service.py +0 -28
- helm/benchmark/window_services/luminous_window_service.py +0 -67
- helm/benchmark/window_services/megatron_window_service.py +0 -10
- helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
- helm/benchmark/window_services/openai_window_service.py +0 -13
- helm/benchmark/window_services/opt_window_service.py +0 -35
- helm/benchmark/window_services/palmyra_window_service.py +0 -45
- helm/benchmark/window_services/remote_window_service.py +0 -48
- helm/benchmark/window_services/santacoder_window_service.py +0 -27
- helm/benchmark/window_services/starcoder_window_service.py +0 -27
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/benchmark/window_services/wider_openai_window_service.py +0 -52
- helm/proxy/clients/aleph_alpha_client.py +0 -99
- helm/proxy/clients/auto_client.py +0 -461
- helm/proxy/clients/goose_ai_client.py +0 -100
- helm/proxy/clients/microsoft_client.py +0 -182
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/remote_model_registry.py +0 -28
- helm/proxy/clients/simple_client.py +0 -61
- helm/proxy/clients/test_anthropic_client.py +0 -63
- helm/proxy/clients/test_client.py +0 -31
- helm/proxy/clients/test_huggingface_client.py +0 -87
- helm/proxy/models.py +0 -963
- helm/proxy/test_models.py +0 -27
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
- helm/proxy/token_counters/test_openai_token_counter.py +0 -79
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class GPT2WindowService(LocalWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
"""Return the max sequence length of this tokenizer."""
|
|
12
|
-
return 1024
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def max_request_length(self) -> int:
|
|
16
|
-
"""Return the max request length of GPT-2."""
|
|
17
|
-
return self.max_sequence_length + 1
|
|
18
|
-
|
|
19
|
-
@property
|
|
20
|
-
def end_of_text_token(self) -> str:
|
|
21
|
-
"""The end of text token."""
|
|
22
|
-
return "<|endoftext|>"
|
|
23
|
-
|
|
24
|
-
@property
|
|
25
|
-
def tokenizer_name(self) -> str:
|
|
26
|
-
"""Name of the tokenizer to use when sending a request."""
|
|
27
|
-
return "huggingface/gpt2"
|
|
28
|
-
|
|
29
|
-
@property
|
|
30
|
-
def prefix_token(self) -> str:
|
|
31
|
-
"""The prefix token for models that uses the GPT-2 tokenizer is the end of text token."""
|
|
32
|
-
return self.end_of_text_token
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class GPTJWindowService(LocalWindowService):
|
|
6
|
-
"""
|
|
7
|
-
The same tokenizer as GPT-2, but with an additional 143 tokens
|
|
8
|
-
(source: https://huggingface.co/docs/transformers/model_doc/gptj).
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
def __init__(self, service: TokenizerService):
|
|
12
|
-
super().__init__(service)
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def max_sequence_length(self) -> int:
|
|
16
|
-
"""Return the max sequence length."""
|
|
17
|
-
return 2048
|
|
18
|
-
|
|
19
|
-
@property
|
|
20
|
-
def max_request_length(self) -> int:
|
|
21
|
-
"""Return the max request length."""
|
|
22
|
-
return self.max_sequence_length + 1
|
|
23
|
-
|
|
24
|
-
@property
|
|
25
|
-
def tokenizer_name(self) -> str:
|
|
26
|
-
"""Name of the tokenizer to use when sending a request."""
|
|
27
|
-
# Not a typo: Named "gpt-j-6B" instead of "gpt-j-6b" in Hugging Face
|
|
28
|
-
return "EleutherAI/gpt-j-6B"
|
|
29
|
-
|
|
30
|
-
@property
|
|
31
|
-
def end_of_text_token(self) -> str:
|
|
32
|
-
"""The end of text token."""
|
|
33
|
-
return "<|endoftext|>"
|
|
34
|
-
|
|
35
|
-
@property
|
|
36
|
-
def prefix_token(self) -> str:
|
|
37
|
-
"""The prefix token for models is the same as the end of text token."""
|
|
38
|
-
return self.end_of_text_token
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class GPTNeoXWindowService(LocalWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
"""Return the max sequence length."""
|
|
12
|
-
return 2048
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def max_request_length(self) -> int:
|
|
16
|
-
"""Return the max request length."""
|
|
17
|
-
return self.max_sequence_length + 1
|
|
18
|
-
|
|
19
|
-
@property
|
|
20
|
-
def end_of_text_token(self) -> str:
|
|
21
|
-
"""The end of text token."""
|
|
22
|
-
return "<|endoftext|>"
|
|
23
|
-
|
|
24
|
-
@property
|
|
25
|
-
def tokenizer_name(self) -> str:
|
|
26
|
-
"""Name of the tokenizer to use when sending a request."""
|
|
27
|
-
return "EleutherAI/gpt-neox-20b"
|
|
28
|
-
|
|
29
|
-
@property
|
|
30
|
-
def prefix_token(self) -> str:
|
|
31
|
-
"""The prefix token is the same as the end of text token."""
|
|
32
|
-
return self.end_of_text_token
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class StableLMAlphaWindowService(GPTNeoXWindowService):
|
|
36
|
-
@property
|
|
37
|
-
def max_sequence_length(self) -> int:
|
|
38
|
-
"""Return the max sequence length."""
|
|
39
|
-
# The context length for these models is 4096 tokens.
|
|
40
|
-
# See: https://github.com/Stability-AI/StableLM#stablelm-alpha
|
|
41
|
-
return 4096
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
# TODO: Remove Once we have configurable model names since this hardcodes the tokenizer name
|
|
6
|
-
class HTTPModelWindowServce(LocalWindowService):
|
|
7
|
-
def __init__(self, service: TokenizerService):
|
|
8
|
-
super().__init__(service)
|
|
9
|
-
|
|
10
|
-
@property
|
|
11
|
-
def max_sequence_length(self) -> int:
|
|
12
|
-
return 2048
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def max_request_length(self) -> int:
|
|
16
|
-
return self.max_sequence_length
|
|
17
|
-
|
|
18
|
-
@property
|
|
19
|
-
def end_of_text_token(self) -> str:
|
|
20
|
-
return "<|endoftext|>"
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def tokenizer_name(self) -> str:
|
|
24
|
-
return "neurips/local"
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def prefix_token(self) -> str:
|
|
28
|
-
return self.end_of_text_token
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
from helm.proxy.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
|
|
3
|
-
from .local_window_service import LocalWindowService
|
|
4
|
-
from .tokenizer_service import TokenizerService
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class HuggingFaceWindowService(LocalWindowService):
|
|
8
|
-
def __init__(
|
|
9
|
-
self,
|
|
10
|
-
service: TokenizerService,
|
|
11
|
-
tokenizer_name: str,
|
|
12
|
-
pretrained_model_name_or_path: Optional[str] = None,
|
|
13
|
-
revision: Optional[str] = None,
|
|
14
|
-
max_sequence_length: Optional[int] = None,
|
|
15
|
-
max_reqeust_length: Optional[int] = None,
|
|
16
|
-
):
|
|
17
|
-
super().__init__(service)
|
|
18
|
-
self._tokenizer_name = tokenizer_name
|
|
19
|
-
tokenizer = HuggingFaceTokenizer.get_tokenizer(
|
|
20
|
-
helm_tokenizer_name=tokenizer_name,
|
|
21
|
-
pretrained_model_name_or_path=pretrained_model_name_or_path or tokenizer_name,
|
|
22
|
-
revision=revision,
|
|
23
|
-
)
|
|
24
|
-
self._prefix_token = tokenizer.bos_token
|
|
25
|
-
self._end_of_text_token = tokenizer.eos_token
|
|
26
|
-
# Override max_sequence_length if provided as an argument.
|
|
27
|
-
# Otherwise, auto-infer max_sequence_length from the Hugging Face tokenizer.
|
|
28
|
-
# Note that many Hugging Face tokenizers have incorrect sequence lengths,
|
|
29
|
-
# so it is recommended to set this manually.
|
|
30
|
-
if max_sequence_length:
|
|
31
|
-
self._max_sequence_length = max_sequence_length
|
|
32
|
-
else:
|
|
33
|
-
self._max_sequence_length = tokenizer.model_max_length
|
|
34
|
-
self._max_request_length = max_reqeust_length
|
|
35
|
-
|
|
36
|
-
@property
|
|
37
|
-
def max_sequence_length(self) -> int:
|
|
38
|
-
"""Return the max sequence length of this tokenizer."""
|
|
39
|
-
return self._max_sequence_length
|
|
40
|
-
|
|
41
|
-
@property
|
|
42
|
-
def max_request_length(self) -> int:
|
|
43
|
-
"""Return the max request length of this tokenizer."""
|
|
44
|
-
return self._max_request_length or self._max_sequence_length
|
|
45
|
-
|
|
46
|
-
@property
|
|
47
|
-
def end_of_text_token(self) -> str:
|
|
48
|
-
"""The end of text token."""
|
|
49
|
-
return self._end_of_text_token
|
|
50
|
-
|
|
51
|
-
@property
|
|
52
|
-
def tokenizer_name(self) -> str:
|
|
53
|
-
"""Name of the tokenizer to use when sending a request."""
|
|
54
|
-
return self._tokenizer_name
|
|
55
|
-
|
|
56
|
-
@property
|
|
57
|
-
def prefix_token(self) -> str:
|
|
58
|
-
"""The prefix token."""
|
|
59
|
-
return self._prefix_token
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class LitGPTWindowServce(LocalWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
return 2048
|
|
12
|
-
|
|
13
|
-
@property
|
|
14
|
-
def max_request_length(self) -> int:
|
|
15
|
-
return self.max_sequence_length
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def end_of_text_token(self) -> str:
|
|
19
|
-
return "<|endoftext|>"
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def tokenizer_name(self) -> str:
|
|
23
|
-
return "lightningai/lit-gpt"
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def prefix_token(self) -> str:
|
|
27
|
-
return self.end_of_text_token
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from helm.benchmark.window_services.huggingface_window_service import HuggingFaceWindowService
|
|
2
|
-
from helm.benchmark.window_services.tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class LlamaWindowService(HuggingFaceWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
# Tokenizer name hf-internal-testing/llama-tokenizer is taken from:
|
|
8
|
-
# https://huggingface.co/docs/transformers/main/en/model_doc/llama#transformers.LlamaTokenizerFast.example
|
|
9
|
-
super().__init__(service, tokenizer_name="hf-internal-testing/llama-tokenizer")
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class Llama2WindowService(HuggingFaceWindowService):
|
|
13
|
-
# To use the Llama-2 tokenizer:
|
|
14
|
-
#
|
|
15
|
-
# 1. Accept the license agreement: https://ai.meta.com/resources/models-and-libraries/llama-downloads/
|
|
16
|
-
# 2. Request to access the Hugging Face repository: https://huggingface.co/meta-llama/Llama-2-7b
|
|
17
|
-
# 3. Run `huggingface-cli login`
|
|
18
|
-
#
|
|
19
|
-
# If you encounter the following error, complete the above steps and try again:
|
|
20
|
-
#
|
|
21
|
-
# meta-llama/Llama-2-70b-hf is not a local folder and is not a valid model identifier listed on
|
|
22
|
-
# 'https://huggingface.co/models'
|
|
23
|
-
def __init__(self, service: TokenizerService):
|
|
24
|
-
super().__init__(service, "meta-llama/Llama-2-7b-hf")
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def max_sequence_length(self) -> int:
|
|
28
|
-
return 4096
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
from abc import abstractmethod
|
|
2
|
-
|
|
3
|
-
from .local_window_service import LocalWindowService
|
|
4
|
-
from .tokenizer_service import TokenizerService
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class LuminousWindowService(LocalWindowService):
|
|
8
|
-
def __init__(self, service: TokenizerService):
|
|
9
|
-
super().__init__(service)
|
|
10
|
-
|
|
11
|
-
@property
|
|
12
|
-
@abstractmethod
|
|
13
|
-
def tokenizer_name(self) -> str:
|
|
14
|
-
"""Each Luminous model has its own tokenizer."""
|
|
15
|
-
pass
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def max_sequence_length(self) -> int:
|
|
19
|
-
"""
|
|
20
|
-
From https://docs.aleph-alpha.com/api/complete, "the summed number of tokens of prompt
|
|
21
|
-
and maximum_tokens..may not exceed 2048 tokens." Confirmed it's 2048 for the Luminous
|
|
22
|
-
models currently available.
|
|
23
|
-
"""
|
|
24
|
-
return 2048
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def max_request_length(self) -> int:
|
|
28
|
-
return self.max_sequence_length
|
|
29
|
-
|
|
30
|
-
@property
|
|
31
|
-
def end_of_text_token(self) -> str:
|
|
32
|
-
"""
|
|
33
|
-
The end of text token.
|
|
34
|
-
TODO: Setting to empty string for now as echo is not supported.
|
|
35
|
-
"""
|
|
36
|
-
return ""
|
|
37
|
-
|
|
38
|
-
@property
|
|
39
|
-
def prefix_token(self) -> str:
|
|
40
|
-
"""
|
|
41
|
-
The prefix token.
|
|
42
|
-
"""
|
|
43
|
-
return self.end_of_text_token
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class LuminousBaseWindowService(LuminousWindowService):
|
|
47
|
-
@property
|
|
48
|
-
def tokenizer_name(self) -> str:
|
|
49
|
-
return "AlephAlpha/luminous-base"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class LuminousExtendedWindowService(LuminousWindowService):
|
|
53
|
-
@property
|
|
54
|
-
def tokenizer_name(self) -> str:
|
|
55
|
-
return "AlephAlpha/luminous-extended"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class LuminousSupremeWindowService(LuminousWindowService):
|
|
59
|
-
@property
|
|
60
|
-
def tokenizer_name(self) -> str:
|
|
61
|
-
return "AlephAlpha/luminous-supreme"
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class LuminousWorldWindowService(LuminousWindowService):
|
|
65
|
-
@property
|
|
66
|
-
def tokenizer_name(self) -> str:
|
|
67
|
-
return "AlephAlpha/luminous-world"
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
from .gpt2_window_service import GPT2WindowService
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
# NOTE: The only difference between this and GPT2WindowService is that
|
|
5
|
-
# the request length is constrained to the sequence length.
|
|
6
|
-
class MegatronWindowService(GPT2WindowService):
|
|
7
|
-
@property
|
|
8
|
-
def max_request_length(self) -> int:
|
|
9
|
-
"""Return the max request length of GPT-2."""
|
|
10
|
-
return self.max_sequence_length
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from .gpt2_window_service import GPT2WindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class MTNLGWindowService(GPT2WindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
"""
|
|
12
|
-
The max length of the model input. MT-NLG does not predict the logprob of the first
|
|
13
|
-
input token so `max_sequence_length` is one token shorter than `max_request_length`.
|
|
14
|
-
"""
|
|
15
|
-
return self.max_request_length - 1
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def max_request_length(self) -> int:
|
|
19
|
-
"""
|
|
20
|
-
The max request length for the MT-NLG models is 2048.
|
|
21
|
-
Source: https://github.com/microsoft/turing-academic-TNLG
|
|
22
|
-
"""
|
|
23
|
-
return 2048
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def prefix_token(self) -> str:
|
|
27
|
-
return "<<"
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from .gpt2_window_service import GPT2WindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class OpenAIWindowService(GPT2WindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
# OpenAI uses the same tokenizer for GPT-2 and GPT-3.
|
|
8
|
-
super().__init__(service)
|
|
9
|
-
|
|
10
|
-
@property
|
|
11
|
-
def max_sequence_length(self) -> int:
|
|
12
|
-
"""Return the max sequence length of the OpenAI models (max length of model input)."""
|
|
13
|
-
return 2048
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class OPTWindowService(LocalWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
"""
|
|
12
|
-
The max length of the model input. The max sequence length for the OPT models is 2048.
|
|
13
|
-
Source: https://arxiv.org/pdf/2205.01068.pdf
|
|
14
|
-
"""
|
|
15
|
-
return 2048
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def max_request_length(self) -> int:
|
|
19
|
-
"""Return the max request length."""
|
|
20
|
-
return self.max_sequence_length + 1
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def end_of_text_token(self) -> str:
|
|
24
|
-
"""The end of text token."""
|
|
25
|
-
return "</s>"
|
|
26
|
-
|
|
27
|
-
@property
|
|
28
|
-
def prefix_token(self) -> str:
|
|
29
|
-
"""The prefix token is the same as the end of text token."""
|
|
30
|
-
return self.end_of_text_token
|
|
31
|
-
|
|
32
|
-
@property
|
|
33
|
-
def tokenizer_name(self) -> str:
|
|
34
|
-
"""Name of the tokenizer to use when sending a request."""
|
|
35
|
-
return "facebook/opt-66b"
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class PalmyraWindowService(LocalWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def tokenizer_name(self) -> str:
|
|
11
|
-
"""All Palmyra models use the same tokenizer."""
|
|
12
|
-
return "huggingface/gpt2"
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def max_sequence_length(self) -> int:
|
|
16
|
-
return 2048
|
|
17
|
-
|
|
18
|
-
@property
|
|
19
|
-
def max_request_length(self) -> int:
|
|
20
|
-
return self.max_sequence_length
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def max_sequence_and_generated_tokens_length(self) -> int:
|
|
24
|
-
return self.max_sequence_length
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def end_of_text_token(self) -> str:
|
|
28
|
-
"""
|
|
29
|
-
The end of text token.
|
|
30
|
-
TODO: Setting to empty string for now as echo is not supported.
|
|
31
|
-
"""
|
|
32
|
-
return ""
|
|
33
|
-
|
|
34
|
-
@property
|
|
35
|
-
def prefix_token(self) -> str:
|
|
36
|
-
"""
|
|
37
|
-
The prefix token.
|
|
38
|
-
"""
|
|
39
|
-
return self.end_of_text_token
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class LongerPalmyraWindowService(PalmyraWindowService):
|
|
43
|
-
@property
|
|
44
|
-
def max_sequence_length(self) -> int:
|
|
45
|
-
return 8192
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
from typing import Dict, Type
|
|
2
|
-
from .local_window_service import LocalWindowService
|
|
3
|
-
from .tokenizer_service import TokenizerService
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class RemoteWindowService(LocalWindowService):
|
|
7
|
-
def __init__(self, service: TokenizerService, model_name: str):
|
|
8
|
-
super().__init__(service)
|
|
9
|
-
self.model_name = model_name
|
|
10
|
-
info = self.service.get_info(model_name)
|
|
11
|
-
self._tokenizer_name = info.tokenizer_name
|
|
12
|
-
self._max_sequence_length = info.max_sequence_length
|
|
13
|
-
self._max_request_length = info.max_request_length
|
|
14
|
-
self._end_of_text_token = info.end_of_text_token
|
|
15
|
-
self._prefix_token = info.prefix_token
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def max_sequence_length(self) -> int:
|
|
19
|
-
return self._max_sequence_length
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def max_request_length(self) -> int:
|
|
23
|
-
return self._max_request_length
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def end_of_text_token(self) -> str:
|
|
27
|
-
return self._end_of_text_token
|
|
28
|
-
|
|
29
|
-
@property
|
|
30
|
-
def prefix_token(self) -> str:
|
|
31
|
-
return self._prefix_token
|
|
32
|
-
|
|
33
|
-
@property
|
|
34
|
-
def tokenizer_name(self) -> str:
|
|
35
|
-
"""Name of the tokenizer to use when sending a request."""
|
|
36
|
-
return self._tokenizer_name
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
# If the windowing logic is different from the base LocalWindowService,
|
|
40
|
-
# please add the specific implementation for the model and add it to the following dict.
|
|
41
|
-
remote_window_services: Dict[str, Type[RemoteWindowService]] = {}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def get_remote_window_service(service: TokenizerService, model_name: str):
|
|
45
|
-
if model_name in remote_window_services:
|
|
46
|
-
return remote_window_services[model_name](service, model_name)
|
|
47
|
-
else:
|
|
48
|
-
return RemoteWindowService(service, model_name)
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class SantaCoderWindowService(LocalWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
return 2048
|
|
12
|
-
|
|
13
|
-
@property
|
|
14
|
-
def max_request_length(self) -> int:
|
|
15
|
-
return self.max_sequence_length
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def end_of_text_token(self) -> str:
|
|
19
|
-
return "<|endoftext|>"
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def tokenizer_name(self) -> str:
|
|
23
|
-
return "bigcode/santacoder"
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def prefix_token(self) -> str:
|
|
27
|
-
return self.end_of_text_token
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from .local_window_service import LocalWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class StarCoderWindowService(LocalWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
return 8192
|
|
12
|
-
|
|
13
|
-
@property
|
|
14
|
-
def max_request_length(self) -> int:
|
|
15
|
-
return self.max_sequence_length
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def end_of_text_token(self) -> str:
|
|
19
|
-
return "<|endoftext|>"
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def tokenizer_name(self) -> str:
|
|
23
|
-
return "bigcode/starcoder"
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def prefix_token(self) -> str:
|
|
27
|
-
return self.end_of_text_token
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
from .encoder_decoder_window_service import EncoderDecoderWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class T0ppWindowService(EncoderDecoderWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
"""Return the max sequence length."""
|
|
12
|
-
# From https://arxiv.org/pdf/2110.08207.pdf, "we truncate input and target sequences to 1024 and 256 tokens,
|
|
13
|
-
# respectively. Following Raffel et al. (2020), we use packing to combine multiple training examples into
|
|
14
|
-
# a single sequence to reach the maximum sequence length."
|
|
15
|
-
return 1024
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def max_output_length(self) -> int:
|
|
19
|
-
return 256
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def end_of_text_token(self) -> str:
|
|
23
|
-
"""The end of text token."""
|
|
24
|
-
return "</s>"
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def tokenizer_name(self) -> str:
|
|
28
|
-
"""Name of the tokenizer to use when sending a request."""
|
|
29
|
-
return "bigscience/T0pp"
|
|
30
|
-
|
|
31
|
-
@property
|
|
32
|
-
def prefix_token(self) -> str:
|
|
33
|
-
"""The prefix token is the same as the end of text token."""
|
|
34
|
-
# echo=True is not supported
|
|
35
|
-
return ""
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from .encoder_decoder_window_service import EncoderDecoderWindowService
|
|
2
|
-
from .tokenizer_service import TokenizerService
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class T511bWindowService(EncoderDecoderWindowService):
|
|
6
|
-
def __init__(self, service: TokenizerService):
|
|
7
|
-
super().__init__(service)
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def max_sequence_length(self) -> int:
|
|
11
|
-
"""Return the max sequence length."""
|
|
12
|
-
# From https://arxiv.org/pdf/1910.10683.pdf, "we use a maximum sequence length of 512".
|
|
13
|
-
# We subtract 1 to account for <extra_id_0> that gets appended to prompts.
|
|
14
|
-
return 512 - 1
|
|
15
|
-
|
|
16
|
-
@property
|
|
17
|
-
def end_of_text_token(self) -> str:
|
|
18
|
-
"""The end of text token."""
|
|
19
|
-
return "</s>"
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def tokenizer_name(self) -> str:
|
|
23
|
-
"""Name of the tokenizer to use when sending a request."""
|
|
24
|
-
return "google/t5-11b"
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def prefix_token(self) -> str:
|
|
28
|
-
"""The prefix token is the same as the end of text token."""
|
|
29
|
-
# echo=True is not supported
|
|
30
|
-
return ""
|