crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +37 -2
- helm/benchmark/adaptation/adapters/adapter.py +4 -42
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/prompt.py +7 -1
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/cleva_perturbation.py +7 -14
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
- helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/dialect_perturbation.py +2 -2
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +3 -3
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +0 -7
- helm/benchmark/augmentations/perturbation.py +20 -7
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/synonym_perturbation.py +2 -2
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/config_registry.py +38 -0
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +37 -7
- helm/benchmark/metrics/basic_metrics.py +172 -641
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics.py +4 -3
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +6 -112
- helm/benchmark/metrics/dry_run_metrics.py +5 -3
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +5 -5
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +6 -7
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +8 -8
- helm/benchmark/metrics/test_classification_metrics.py +9 -6
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +164 -41
- helm/benchmark/model_metadata_registry.py +181 -35
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +50 -17
- helm/benchmark/presentation/schema.py +28 -46
- helm/benchmark/presentation/summarize.py +213 -96
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +14 -9
- helm/benchmark/presentation/test_summarize.py +5 -0
- helm/benchmark/run.py +66 -54
- helm/benchmark/run_expander.py +342 -31
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +116 -69
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/cleva_scenario.py +43 -46
- helm/benchmark/scenarios/code_scenario.py +3 -2
- helm/benchmark/scenarios/commonsense_scenario.py +171 -191
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +123 -0
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +3 -3
- helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
- helm/benchmark/scenarios/raft_scenario.py +2 -6
- helm/benchmark/scenarios/scenario.py +14 -2
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +22 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/the_pile_scenario.py +6 -7
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
- helm/benchmark/server.py +59 -2
- helm/benchmark/slurm_jobs.py +12 -0
- helm/benchmark/slurm_runner.py +79 -51
- helm/benchmark/static/benchmarking.js +3 -4
- helm/benchmark/static/contamination.yaml +1 -1
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +824 -0
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +90 -0
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/tokenizer_config_registry.py +10 -14
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -35
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/no_decoding_window_service.py +32 -0
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +24 -269
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +5 -12
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +213 -24
- helm/clients/auto_client.py +215 -0
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +67 -55
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +6 -17
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +7 -8
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +6 -10
- helm/{proxy/clients → clients}/huggingface_client.py +134 -92
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
- helm/{proxy/clients → clients}/megatron_client.py +13 -7
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +15 -12
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +15 -15
- helm/clients/test_client.py +100 -0
- helm/clients/test_huggingface_client.py +70 -0
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +23 -12
- helm/{proxy/clients → clients}/together_client.py +18 -71
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +24 -179
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/concurrency.py +32 -0
- helm/common/credentials_utils.py +28 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +29 -10
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +113 -0
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +88 -0
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/object_spec.py +2 -2
- helm/common/request.py +36 -27
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +6 -3
- helm/config/__init__.py +0 -0
- helm/config/model_deployments.yaml +1942 -0
- helm/config/model_metadata.yaml +2201 -0
- helm/config/tokenizer_configs.yaml +362 -0
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +13 -5
- helm/proxy/example_queries.py +29 -17
- helm/proxy/retry.py +8 -2
- helm/proxy/server.py +77 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +103 -20
- helm/proxy/services/service.py +34 -2
- helm/proxy/services/test_remote_service.py +7 -6
- helm/proxy/services/test_service.py +27 -18
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/py.typed +0 -0
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
- helm/tokenizers/auto_tokenizer.py +93 -0
- helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/tokenizers/test_anthropic_tokenizer.py +82 -0
- helm/tokenizers/test_huggingface_tokenizer.py +136 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/tokenizers/vertexai_tokenizer.py +97 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.3.0.dist-info/RECORD +0 -396
- helm/benchmark/vlm_run_specs.py +0 -71
- helm/benchmark/window_services/anthropic_window_service.py +0 -68
- helm/benchmark/window_services/bloom_window_service.py +0 -35
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/gptj_window_service.py +0 -38
- helm/benchmark/window_services/gptneox_window_service.py +0 -41
- helm/benchmark/window_services/http_model_window_service.py +0 -28
- helm/benchmark/window_services/huggingface_window_service.py +0 -59
- helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
- helm/benchmark/window_services/llama_window_service.py +0 -28
- helm/benchmark/window_services/luminous_window_service.py +0 -67
- helm/benchmark/window_services/megatron_window_service.py +0 -10
- helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
- helm/benchmark/window_services/openai_window_service.py +0 -13
- helm/benchmark/window_services/opt_window_service.py +0 -35
- helm/benchmark/window_services/palmyra_window_service.py +0 -45
- helm/benchmark/window_services/remote_window_service.py +0 -48
- helm/benchmark/window_services/santacoder_window_service.py +0 -27
- helm/benchmark/window_services/starcoder_window_service.py +0 -27
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/benchmark/window_services/wider_openai_window_service.py +0 -52
- helm/proxy/clients/aleph_alpha_client.py +0 -99
- helm/proxy/clients/auto_client.py +0 -461
- helm/proxy/clients/goose_ai_client.py +0 -100
- helm/proxy/clients/microsoft_client.py +0 -182
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/remote_model_registry.py +0 -28
- helm/proxy/clients/simple_client.py +0 -61
- helm/proxy/clients/test_anthropic_client.py +0 -63
- helm/proxy/clients/test_client.py +0 -31
- helm/proxy/clients/test_huggingface_client.py +0 -87
- helm/proxy/models.py +0 -963
- helm/proxy/test_models.py +0 -27
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
- helm/proxy/token_counters/test_openai_token_counter.py +0 -79
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -120,7 +121,7 @@ class TestAnthropicWindowService:
|
|
|
120
121
|
|
|
121
122
|
def setup_method(self):
|
|
122
123
|
self.path: str = tempfile.mkdtemp()
|
|
123
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
124
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
124
125
|
self.window_service = WindowServiceFactory.get_window_service("anthropic/claude-v1.3", service)
|
|
125
126
|
|
|
126
127
|
def teardown_method(self, method):
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -64,7 +65,7 @@ class TestBloomWindowService:
|
|
|
64
65
|
|
|
65
66
|
def setup_method(self):
|
|
66
67
|
self.path: str = tempfile.mkdtemp()
|
|
67
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
68
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
68
69
|
self.window_service = WindowServiceFactory.get_window_service("together/bloom", service)
|
|
69
70
|
|
|
70
71
|
def teardown_method(self, method):
|
|
@@ -6,6 +6,7 @@ from typing import List
|
|
|
6
6
|
|
|
7
7
|
from sqlitedict import SqliteDict
|
|
8
8
|
|
|
9
|
+
from helm.common.cache_backend_config import SqliteCacheBackendConfig
|
|
9
10
|
from helm.common.general import ensure_directory_exists
|
|
10
11
|
from .test_cohere_window_service_utils import REQUESTS_TO_RESPONSES, TEST_PROMPT, TOKENIZED_PROMPT
|
|
11
12
|
from .tokenizer_service import TokenizerService
|
|
@@ -30,7 +31,7 @@ class TestCohereWindowService:
|
|
|
30
31
|
with open(os.path.join(cls.path, "credentials.conf"), "w") as f:
|
|
31
32
|
f.write("cohereApiKey: secret")
|
|
32
33
|
|
|
33
|
-
service: TokenizerService = get_tokenizer_service(cls.path)
|
|
34
|
+
service: TokenizerService = get_tokenizer_service(cls.path, SqliteCacheBackendConfig(cache_path))
|
|
34
35
|
cls.window_service = WindowServiceFactory.get_window_service("cohere/xlarge-20220609", service)
|
|
35
36
|
cls.prompt: str = TEST_PROMPT
|
|
36
37
|
cls.tokenized_prompt: List[str] = TOKENIZED_PROMPT
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import tempfile
|
|
2
2
|
|
|
3
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
3
4
|
from helm.benchmark.window_services.test_t511b_window_service import TestT511bWindowService
|
|
4
5
|
from helm.benchmark.window_services.window_service_factory import TokenizerService, WindowServiceFactory
|
|
5
6
|
from helm.benchmark.window_services.test_utils import get_tokenizer_service
|
|
@@ -8,5 +9,5 @@ from helm.benchmark.window_services.test_utils import get_tokenizer_service
|
|
|
8
9
|
class TestFlanT5WindowService(TestT511bWindowService):
|
|
9
10
|
def setup_method(self):
|
|
10
11
|
self.path: str = tempfile.mkdtemp()
|
|
11
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
12
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
12
13
|
self.window_service = WindowServiceFactory.get_window_service("together/flan-t5-xxl", service)
|
|
@@ -2,7 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
4
|
from helm.benchmark.window_services.tokenizer_service import TokenizerService
|
|
5
|
-
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
6
6
|
from .test_utils import get_tokenizer_service, TEST_PROMPT, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN_IDS
|
|
7
7
|
from .window_service_factory import WindowServiceFactory
|
|
8
8
|
|
|
@@ -10,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
|
|
|
10
10
|
class TestGPT2WindowService:
|
|
11
11
|
def setup_method(self):
|
|
12
12
|
self.path: str = tempfile.mkdtemp()
|
|
13
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
14
14
|
self.window_service = WindowServiceFactory.get_window_service("huggingface/gpt2", service)
|
|
15
15
|
|
|
16
16
|
def teardown_method(self, method):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .test_utils import get_tokenizer_service, TEST_PROMPT, GPT4_TEST_TOKEN_IDS, GPT4_TEST_TOKENS
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
@@ -9,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
|
|
|
9
10
|
class TestOpenAIWindowService:
|
|
10
11
|
def setup_method(self):
|
|
11
12
|
self.path: str = tempfile.mkdtemp()
|
|
12
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
13
14
|
self.window_service = WindowServiceFactory.get_window_service("openai/gpt-3.5-turbo-0301", service)
|
|
14
15
|
|
|
15
16
|
def teardown_method(self, method):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .tokenizer_service import TokenizerService
|
|
5
6
|
from .window_service_factory import WindowServiceFactory
|
|
6
7
|
from .test_utils import get_tokenizer_service, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN_IDS, TEST_PROMPT
|
|
@@ -9,8 +10,8 @@ from .test_utils import get_tokenizer_service, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN
|
|
|
9
10
|
class TestGPTJWindowService:
|
|
10
11
|
def setup_method(self):
|
|
11
12
|
self.path: str = tempfile.mkdtemp()
|
|
12
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
-
self.window_service = WindowServiceFactory.get_window_service("
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
14
|
+
self.window_service = WindowServiceFactory.get_window_service("huggingface/gpt-j-6b", service)
|
|
14
15
|
|
|
15
16
|
def teardown_method(self, method):
|
|
16
17
|
shutil.rmtree(self.path)
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -65,8 +66,8 @@ class TestGPTNeoXWindowService:
|
|
|
65
66
|
|
|
66
67
|
def setup_method(self):
|
|
67
68
|
self.path: str = tempfile.mkdtemp()
|
|
68
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
69
|
-
self.window_service = WindowServiceFactory.get_window_service("
|
|
69
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
70
|
+
self.window_service = WindowServiceFactory.get_window_service("huggingface/gpt-neox-20b", service)
|
|
70
71
|
|
|
71
72
|
def teardown_method(self, method):
|
|
72
73
|
shutil.rmtree(self.path)
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -64,7 +65,7 @@ class TestICEWindowService:
|
|
|
64
65
|
|
|
65
66
|
def setup_method(self):
|
|
66
67
|
self.path: str = tempfile.mkdtemp()
|
|
67
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
68
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
68
69
|
self.window_service = WindowServiceFactory.get_window_service("together/glm", service)
|
|
69
70
|
|
|
70
71
|
def teardown_method(self, method):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .test_utils import get_tokenizer_service, TEST_PROMPT, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN_IDS
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
@@ -9,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
|
|
|
9
10
|
class TestOpenAIWindowService:
|
|
10
11
|
def setup_method(self):
|
|
11
12
|
self.path: str = tempfile.mkdtemp()
|
|
12
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
13
14
|
self.window_service = WindowServiceFactory.get_window_service("openai/davinci", service)
|
|
14
15
|
|
|
15
16
|
def teardown_method(self, method):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
@@ -9,8 +10,8 @@ from .window_service_factory import WindowServiceFactory
|
|
|
9
10
|
class TestOPTWindowService:
|
|
10
11
|
def setup_method(self):
|
|
11
12
|
self.path: str = tempfile.mkdtemp()
|
|
12
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
-
self.window_service = WindowServiceFactory.get_window_service("
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
14
|
+
self.window_service = WindowServiceFactory.get_window_service("huggingface/opt-175b", service)
|
|
14
15
|
|
|
15
16
|
def teardown_method(self, method):
|
|
16
17
|
shutil.rmtree(self.path)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from tempfile import TemporaryDirectory
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .tokenizer_service import TokenizerService
|
|
5
6
|
from .window_service_factory import WindowServiceFactory
|
|
6
7
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -117,7 +118,7 @@ class TestPalmyraWindowService:
|
|
|
117
118
|
|
|
118
119
|
def setup_method(self):
|
|
119
120
|
self.temporary_directory = TemporaryDirectory()
|
|
120
|
-
service: TokenizerService = get_tokenizer_service(self.temporary_directory.name)
|
|
121
|
+
service: TokenizerService = get_tokenizer_service(self.temporary_directory.name, BlackHoleCacheBackendConfig())
|
|
121
122
|
self.window_service = WindowServiceFactory.get_window_service("writer/palmyra-large", service)
|
|
122
123
|
|
|
123
124
|
def teardown_method(self, method):
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -70,7 +71,7 @@ class TestT0ppWindowService:
|
|
|
70
71
|
|
|
71
72
|
def setup_method(self):
|
|
72
73
|
self.path: str = tempfile.mkdtemp()
|
|
73
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
74
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
74
75
|
self.window_service = WindowServiceFactory.get_window_service("together/t0pp", service)
|
|
75
76
|
|
|
76
77
|
def teardown_method(self, method):
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -70,7 +71,7 @@ class TestT511bWindowService:
|
|
|
70
71
|
|
|
71
72
|
def setup_method(self):
|
|
72
73
|
self.path: str = tempfile.mkdtemp()
|
|
73
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
74
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
74
75
|
self.window_service = WindowServiceFactory.get_window_service("together/t5-11b", service)
|
|
75
76
|
|
|
76
77
|
def teardown_method(self, method):
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -70,7 +71,7 @@ class TestUL2WindowService:
|
|
|
70
71
|
|
|
71
72
|
def setup_method(self):
|
|
72
73
|
self.path: str = tempfile.mkdtemp()
|
|
73
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
74
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
74
75
|
self.window_service = WindowServiceFactory.get_window_service("together/ul2", service)
|
|
75
76
|
|
|
76
77
|
def teardown_method(self, method):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
3
|
from helm.common.authentication import Authentication
|
|
4
|
+
from helm.common.cache_backend_config import CacheBackendConfig
|
|
4
5
|
from helm.proxy.services.server_service import ServerService
|
|
5
6
|
from helm.benchmark.metrics.metric_service import MetricService
|
|
6
7
|
from .tokenizer_service import TokenizerService
|
|
@@ -227,6 +228,6 @@ GPT4_TEST_TOKENS: List[str] = [
|
|
|
227
228
|
]
|
|
228
229
|
|
|
229
230
|
|
|
230
|
-
def get_tokenizer_service(local_path: str) -> TokenizerService:
|
|
231
|
-
service = ServerService(base_path=local_path, root_mode=True)
|
|
231
|
+
def get_tokenizer_service(local_path: str, cache_backend_config: CacheBackendConfig) -> TokenizerService:
|
|
232
|
+
service = ServerService(base_path=local_path, root_mode=True, cache_backend_config=cache_backend_config)
|
|
232
233
|
return MetricService(service, Authentication("test"))
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
@@ -9,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
|
|
|
9
10
|
class TestYaLMWindowService:
|
|
10
11
|
def setup_method(self):
|
|
11
12
|
self.path: str = tempfile.mkdtemp()
|
|
12
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
13
14
|
self.window_service = WindowServiceFactory.get_window_service("together/yalm", service)
|
|
14
15
|
|
|
15
16
|
def teardown_method(self, method):
|
|
@@ -110,3 +110,45 @@ class WindowService(ABC):
|
|
|
110
110
|
minus the expected completion length (defaults to 0).
|
|
111
111
|
"""
|
|
112
112
|
pass
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class ConfigurableWindowService(WindowService, ABC):
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
tokenizer_name: str,
|
|
119
|
+
max_sequence_length: int,
|
|
120
|
+
max_request_length: Optional[int] = None,
|
|
121
|
+
max_sequence_and_generated_tokens_length: Optional[int] = None,
|
|
122
|
+
end_of_text_token: Optional[str] = None,
|
|
123
|
+
prefix_token: Optional[str] = None,
|
|
124
|
+
):
|
|
125
|
+
self._tokenizer_name = tokenizer_name
|
|
126
|
+
self._max_sequence_length = max_sequence_length
|
|
127
|
+
self._max_request_length = max_request_length or max_sequence_length
|
|
128
|
+
self._max_sequence_and_generated_tokens_length = max_sequence_and_generated_tokens_length or INT_MAX
|
|
129
|
+
self._end_of_text_token = end_of_text_token or ""
|
|
130
|
+
self._prefix_token = prefix_token or ""
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def tokenizer_name(self) -> str:
|
|
134
|
+
return self._tokenizer_name
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def max_sequence_length(self) -> int:
|
|
138
|
+
return self._max_sequence_length
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def max_request_length(self) -> int:
|
|
142
|
+
return self._max_request_length
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def max_sequence_and_generated_tokens_length(self) -> int:
|
|
146
|
+
return self._max_sequence_and_generated_tokens_length
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def end_of_text_token(self) -> str:
|
|
150
|
+
return self._end_of_text_token
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def prefix_token(self) -> str:
|
|
154
|
+
return self._prefix_token
|