crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +138 -31
- crfm_helm-0.5.1.dist-info/RECORD +654 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +31 -3
- helm/benchmark/adaptation/adapters/adapter.py +2 -2
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/gender_perturbation.py +1 -1
- helm/benchmark/augmentations/perturbation.py +25 -3
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/test_perturbation.py +41 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/config_registry.py +7 -1
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +20 -7
- helm/benchmark/metrics/basic_metrics.py +169 -664
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -110
- helm/benchmark/metrics/dry_run_metrics.py +2 -2
- helm/benchmark/metrics/efficiency_metrics.py +213 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +392 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +89 -0
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +2 -2
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +2 -2
- helm/benchmark/metrics/test_classification_metrics.py +8 -5
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +575 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +74 -0
- helm/benchmark/model_metadata_registry.py +41 -1
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +26 -10
- helm/benchmark/presentation/schema.py +15 -40
- helm/benchmark/presentation/summarize.py +119 -79
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +1 -2
- helm/benchmark/presentation/test_summarize.py +3 -3
- helm/benchmark/run.py +54 -26
- helm/benchmark/run_expander.py +205 -35
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +163 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/run_specs/classic_run_specs.py +1510 -0
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +757 -0
- helm/benchmark/runner.py +51 -57
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +1 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +6 -2
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +1 -1
- helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
- helm/benchmark/scenarios/scenario.py +4 -0
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +6 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +94 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +3 -4
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +5 -3
- helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
- helm/benchmark/server.py +24 -1
- helm/benchmark/slurm_runner.py +70 -49
- helm/benchmark/static/benchmarking.js +1 -1
- helm/benchmark/static/schema_classic.yaml +258 -1066
- helm/benchmark/static/schema_image2structure.yaml +304 -0
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +2 -227
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
- helm/benchmark/static/schema_vlm.yaml +823 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
- helm/benchmark/static_build/assets/index-878a1094.css +1 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -44
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +4 -1
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +3 -9
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +233 -18
- helm/{proxy/clients → clients}/auto_client.py +59 -31
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +65 -7
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +4 -11
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +5 -5
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +5 -7
- helm/{proxy/clients → clients}/huggingface_client.py +43 -64
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
- helm/{proxy/clients → clients}/megatron_client.py +5 -5
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +301 -0
- helm/{proxy/clients → clients}/palmyra_client.py +6 -8
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +13 -15
- helm/clients/test_client.py +100 -0
- helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +20 -8
- helm/{proxy/clients → clients}/together_client.py +104 -73
- helm/clients/vertexai_client.py +400 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
- helm/clients/vision_language/huggingface_vlm_client.py +111 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +54 -49
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +16 -4
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +1 -1
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +33 -3
- helm/common/key_value_store.py +35 -4
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +3 -3
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/request.py +15 -17
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +1 -1
- helm/config/model_deployments.yaml +1159 -538
- helm/config/model_metadata.yaml +868 -41
- helm/config/tokenizer_configs.yaml +149 -43
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +8 -6
- helm/proxy/example_queries.py +29 -17
- helm/proxy/server.py +70 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +96 -16
- helm/proxy/services/service.py +30 -0
- helm/proxy/services/test_remote_service.py +4 -3
- helm/proxy/services/test_service.py +0 -12
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.4.0.dist-info/RECORD +0 -397
- helm/benchmark/run_specs.py +0 -2762
- helm/benchmark/test_model_deployment_definition.py +0 -92
- helm/benchmark/test_model_properties.py +0 -1570
- helm/benchmark/vlm_run_specs.py +0 -97
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/huggingface_window_service.py +0 -60
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/common/cache_utils.py +0 -14
- helm/proxy/clients/aleph_alpha_client.py +0 -95
- helm/proxy/clients/goose_ai_client.py +0 -99
- helm/proxy/clients/microsoft_client.py +0 -180
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/simple_client.py +0 -60
- helm/proxy/clients/test_client.py +0 -49
- helm/proxy/clients/vertexai_client.py +0 -115
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
- helm/proxy/token_counters/test_openai_token_counter.py +0 -81
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .test_utils import get_tokenizer_service, TEST_PROMPT, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN_IDS
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
@@ -9,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
|
|
|
9
10
|
class TestOpenAIWindowService:
|
|
10
11
|
def setup_method(self):
|
|
11
12
|
self.path: str = tempfile.mkdtemp()
|
|
12
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
13
14
|
self.window_service = WindowServiceFactory.get_window_service("openai/davinci", service)
|
|
14
15
|
|
|
15
16
|
def teardown_method(self, method):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
@@ -9,8 +10,8 @@ from .window_service_factory import WindowServiceFactory
|
|
|
9
10
|
class TestOPTWindowService:
|
|
10
11
|
def setup_method(self):
|
|
11
12
|
self.path: str = tempfile.mkdtemp()
|
|
12
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
-
self.window_service = WindowServiceFactory.get_window_service("
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
14
|
+
self.window_service = WindowServiceFactory.get_window_service("huggingface/opt-175b", service)
|
|
14
15
|
|
|
15
16
|
def teardown_method(self, method):
|
|
16
17
|
shutil.rmtree(self.path)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from tempfile import TemporaryDirectory
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .tokenizer_service import TokenizerService
|
|
5
6
|
from .window_service_factory import WindowServiceFactory
|
|
6
7
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -117,7 +118,7 @@ class TestPalmyraWindowService:
|
|
|
117
118
|
|
|
118
119
|
def setup_method(self):
|
|
119
120
|
self.temporary_directory = TemporaryDirectory()
|
|
120
|
-
service: TokenizerService = get_tokenizer_service(self.temporary_directory.name)
|
|
121
|
+
service: TokenizerService = get_tokenizer_service(self.temporary_directory.name, BlackHoleCacheBackendConfig())
|
|
121
122
|
self.window_service = WindowServiceFactory.get_window_service("writer/palmyra-large", service)
|
|
122
123
|
|
|
123
124
|
def teardown_method(self, method):
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -70,7 +71,7 @@ class TestT0ppWindowService:
|
|
|
70
71
|
|
|
71
72
|
def setup_method(self):
|
|
72
73
|
self.path: str = tempfile.mkdtemp()
|
|
73
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
74
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
74
75
|
self.window_service = WindowServiceFactory.get_window_service("together/t0pp", service)
|
|
75
76
|
|
|
76
77
|
def teardown_method(self, method):
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -70,7 +71,7 @@ class TestT511bWindowService:
|
|
|
70
71
|
|
|
71
72
|
def setup_method(self):
|
|
72
73
|
self.path: str = tempfile.mkdtemp()
|
|
73
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
74
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
74
75
|
self.window_service = WindowServiceFactory.get_window_service("together/t5-11b", service)
|
|
75
76
|
|
|
76
77
|
def teardown_method(self, method):
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
7
8
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
@@ -70,7 +71,7 @@ class TestUL2WindowService:
|
|
|
70
71
|
|
|
71
72
|
def setup_method(self):
|
|
72
73
|
self.path: str = tempfile.mkdtemp()
|
|
73
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
74
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
74
75
|
self.window_service = WindowServiceFactory.get_window_service("together/ul2", service)
|
|
75
76
|
|
|
76
77
|
def teardown_method(self, method):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
3
|
from helm.common.authentication import Authentication
|
|
4
|
+
from helm.common.cache_backend_config import CacheBackendConfig
|
|
4
5
|
from helm.proxy.services.server_service import ServerService
|
|
5
6
|
from helm.benchmark.metrics.metric_service import MetricService
|
|
6
7
|
from .tokenizer_service import TokenizerService
|
|
@@ -227,6 +228,6 @@ GPT4_TEST_TOKENS: List[str] = [
|
|
|
227
228
|
]
|
|
228
229
|
|
|
229
230
|
|
|
230
|
-
def get_tokenizer_service(local_path: str) -> TokenizerService:
|
|
231
|
-
service = ServerService(base_path=local_path, root_mode=True)
|
|
231
|
+
def get_tokenizer_service(local_path: str, cache_backend_config: CacheBackendConfig) -> TokenizerService:
|
|
232
|
+
service = ServerService(base_path=local_path, root_mode=True, cache_backend_config=cache_backend_config)
|
|
232
233
|
return MetricService(service, Authentication("test"))
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
from .test_utils import get_tokenizer_service, TEST_PROMPT
|
|
5
6
|
from .tokenizer_service import TokenizerService
|
|
6
7
|
from .window_service_factory import WindowServiceFactory
|
|
@@ -9,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
|
|
|
9
10
|
class TestYaLMWindowService:
|
|
10
11
|
def setup_method(self):
|
|
11
12
|
self.path: str = tempfile.mkdtemp()
|
|
12
|
-
service: TokenizerService = get_tokenizer_service(self.path)
|
|
13
|
+
service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
|
|
13
14
|
self.window_service = WindowServiceFactory.get_window_service("together/yalm", service)
|
|
14
15
|
|
|
15
16
|
def teardown_method(self, method):
|
|
@@ -110,3 +110,45 @@ class WindowService(ABC):
|
|
|
110
110
|
minus the expected completion length (defaults to 0).
|
|
111
111
|
"""
|
|
112
112
|
pass
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class ConfigurableWindowService(WindowService, ABC):
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
tokenizer_name: str,
|
|
119
|
+
max_sequence_length: int,
|
|
120
|
+
max_request_length: Optional[int] = None,
|
|
121
|
+
max_sequence_and_generated_tokens_length: Optional[int] = None,
|
|
122
|
+
end_of_text_token: Optional[str] = None,
|
|
123
|
+
prefix_token: Optional[str] = None,
|
|
124
|
+
):
|
|
125
|
+
self._tokenizer_name = tokenizer_name
|
|
126
|
+
self._max_sequence_length = max_sequence_length
|
|
127
|
+
self._max_request_length = max_request_length or max_sequence_length
|
|
128
|
+
self._max_sequence_and_generated_tokens_length = max_sequence_and_generated_tokens_length or INT_MAX
|
|
129
|
+
self._end_of_text_token = end_of_text_token or ""
|
|
130
|
+
self._prefix_token = prefix_token or ""
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def tokenizer_name(self) -> str:
|
|
134
|
+
return self._tokenizer_name
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def max_sequence_length(self) -> int:
|
|
138
|
+
return self._max_sequence_length
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def max_request_length(self) -> int:
|
|
142
|
+
return self._max_request_length
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def max_sequence_and_generated_tokens_length(self) -> int:
|
|
146
|
+
return self._max_sequence_and_generated_tokens_length
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def end_of_text_token(self) -> str:
|
|
150
|
+
return self._end_of_text_token
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def prefix_token(self) -> str:
|
|
154
|
+
return self._prefix_token
|
|
@@ -43,7 +43,7 @@ class WindowServiceFactory:
|
|
|
43
43
|
# in the users configuration file. Instead, they have to be constructed dynamically at runtime.
|
|
44
44
|
window_service_spec = inject_object_spec_args(
|
|
45
45
|
window_service_spec,
|
|
46
|
-
{
|
|
46
|
+
constant_bindings={
|
|
47
47
|
"service": service,
|
|
48
48
|
"tokenizer_name": model_deployment.tokenizer_name,
|
|
49
49
|
"max_sequence_length": model_deployment.max_sequence_length,
|
|
@@ -52,6 +52,9 @@ class WindowServiceFactory:
|
|
|
52
52
|
"end_of_text_token": end_of_text_token,
|
|
53
53
|
"prefix_token": prefix_token,
|
|
54
54
|
},
|
|
55
|
+
provider_bindings={
|
|
56
|
+
"gpt2_window_service": lambda: WindowServiceFactory.get_window_service("huggingface/gpt2", service)
|
|
57
|
+
},
|
|
55
58
|
)
|
|
56
59
|
return create_object(window_service_spec)
|
|
57
60
|
|
|
@@ -1,34 +1,7 @@
|
|
|
1
|
-
from helm.proxy.tokenizers.yalm_tokenizer_data.yalm_tokenizer import YaLMTokenizer
|
|
2
1
|
from .local_window_service import LocalWindowService
|
|
3
|
-
from .tokenizer_service import TokenizerService
|
|
4
2
|
|
|
5
3
|
|
|
6
4
|
class YaLMWindowService(LocalWindowService):
|
|
7
|
-
def __init__(self, service: TokenizerService):
|
|
8
|
-
super().__init__(service)
|
|
9
|
-
|
|
10
|
-
@property
|
|
11
|
-
def tokenizer_name(self) -> str:
|
|
12
|
-
return "Yandex/yalm"
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def max_sequence_length(self) -> int:
|
|
16
|
-
return YaLMTokenizer.MAX_SEQUENCE_LENGTH
|
|
17
|
-
|
|
18
|
-
@property
|
|
19
|
-
def max_request_length(self) -> int:
|
|
20
|
-
return self.max_sequence_length + 1
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def end_of_text_token(self) -> str:
|
|
24
|
-
"""The end of text token."""
|
|
25
|
-
return YaLMTokenizer.EOS_TOKEN
|
|
26
|
-
|
|
27
|
-
@property
|
|
28
|
-
def prefix_token(self) -> str:
|
|
29
|
-
"""The prefix token"""
|
|
30
|
-
return self.end_of_text_token
|
|
31
|
-
|
|
32
5
|
def truncate_from_right(self, text: str, expected_completion_token_length: int = 0) -> str:
|
|
33
6
|
"""
|
|
34
7
|
Truncates text from the right to fit within the context window given by `max_request_length`
|
helm/clients/__init__.py
ADDED
|
File without changes
|
|
@@ -7,7 +7,7 @@ from helm.common.request import (
|
|
|
7
7
|
EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
|
|
8
8
|
Request,
|
|
9
9
|
RequestResult,
|
|
10
|
-
|
|
10
|
+
GeneratedOutput,
|
|
11
11
|
Token,
|
|
12
12
|
)
|
|
13
13
|
from .client import CachingClient, truncate_sequence, cleanup_str
|
|
@@ -97,25 +97,19 @@ class AI21Client(CachingClient):
|
|
|
97
97
|
# Compute the actual length of the token text
|
|
98
98
|
# e.g. "▁burying"(0,8) -> 8 - 0 = 8; "▁burying"(0,7) -> 7 - 0 = 7
|
|
99
99
|
text_length: int = raw["textRange"]["end"] - raw["textRange"]["start"]
|
|
100
|
-
# "topTokens" can be None when sending a request with topKReturn=0
|
|
101
|
-
# AI21 sends unscaled logprobs as `raw_logprob` so use this instead of `logprob`.
|
|
102
|
-
top_logprobs: Dict[str, float] = dict(
|
|
103
|
-
(fix_text(x["token"], first), x["raw_logprob"]) for x in raw["topTokens"] or []
|
|
104
|
-
)
|
|
105
100
|
|
|
106
101
|
return Token(
|
|
107
102
|
# Text should not be longer than text_length. Since "▁" is always inserted
|
|
108
103
|
# in the beginning, we truncate the text from the right.
|
|
109
104
|
text=fix_text(raw["generatedToken"]["token"], first)[-text_length:] if text_length else "",
|
|
110
105
|
logprob=raw["generatedToken"]["raw_logprob"],
|
|
111
|
-
top_logprobs=top_logprobs,
|
|
112
106
|
)
|
|
113
107
|
|
|
114
|
-
def parse_sequence(raw: Dict, first: bool, finish_reason: Optional[Dict] = None) ->
|
|
108
|
+
def parse_sequence(raw: Dict, first: bool, finish_reason: Optional[Dict] = None) -> GeneratedOutput:
|
|
115
109
|
text = raw["text"]
|
|
116
110
|
tokens = [parse_token(token, first and i == 0) for i, token in enumerate(raw["tokens"])]
|
|
117
111
|
logprob = sum(token.logprob for token in tokens)
|
|
118
|
-
return
|
|
112
|
+
return GeneratedOutput(text=text, logprob=logprob, tokens=tokens, finish_reason=finish_reason)
|
|
119
113
|
|
|
120
114
|
prompt = parse_sequence(response["prompt"], True)
|
|
121
115
|
completions = []
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from helm.common.cache import CacheConfig
|
|
4
|
+
from helm.common.media_object import TEXT_TYPE
|
|
5
|
+
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
6
|
+
from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
|
|
7
|
+
from .client import CachingClient, truncate_sequence, generate_uid_for_multimodal_prompt
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from aleph_alpha_client import Client, CompletionRequest, CompletionResponse, Image, Prompt
|
|
11
|
+
except ModuleNotFoundError as e:
|
|
12
|
+
handle_module_not_found_error(e, ["aleph-alpha"])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AlephAlphaClient(CachingClient):
|
|
16
|
+
def __init__(self, api_key: str, cache_config: CacheConfig):
|
|
17
|
+
super().__init__(cache_config=cache_config)
|
|
18
|
+
self._api_key: str = api_key
|
|
19
|
+
self._aleph_alpha_client = Client(token=self._api_key) if self._api_key else None
|
|
20
|
+
|
|
21
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
22
|
+
"""Make a request following https://docs.aleph-alpha.com/api/complete."""
|
|
23
|
+
assert self._aleph_alpha_client is not None
|
|
24
|
+
|
|
25
|
+
model: str = request.model_engine
|
|
26
|
+
prompt: Prompt
|
|
27
|
+
|
|
28
|
+
# The prompt key is a unique identifier for the prompt
|
|
29
|
+
prompt_key: str = request.prompt
|
|
30
|
+
|
|
31
|
+
# Contents can either be text or a list of multimodal content made up of text, images or other content
|
|
32
|
+
if request.multimodal_prompt is not None:
|
|
33
|
+
from helm.common.images_utils import encode_base64
|
|
34
|
+
|
|
35
|
+
items = []
|
|
36
|
+
for media_object in request.multimodal_prompt.media_objects:
|
|
37
|
+
if media_object.is_type("image") and media_object.location:
|
|
38
|
+
items.append(Image(base_64=encode_base64(media_object.location), cropping=None, controls=[]))
|
|
39
|
+
elif media_object.is_type(TEXT_TYPE):
|
|
40
|
+
if media_object.text is None:
|
|
41
|
+
raise ValueError("MediaObject of text type has missing text field value")
|
|
42
|
+
items.append(media_object.text)
|
|
43
|
+
else:
|
|
44
|
+
raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
|
|
45
|
+
|
|
46
|
+
prompt = Prompt(items=items)
|
|
47
|
+
prompt_key = generate_uid_for_multimodal_prompt(request.multimodal_prompt)
|
|
48
|
+
else:
|
|
49
|
+
prompt = Prompt.from_text(request.prompt)
|
|
50
|
+
|
|
51
|
+
parameters = {
|
|
52
|
+
"maximum_tokens": request.max_tokens,
|
|
53
|
+
"temperature": request.temperature,
|
|
54
|
+
"top_k": request.top_k_per_token,
|
|
55
|
+
"top_p": request.top_p,
|
|
56
|
+
"presence_penalty": request.presence_penalty,
|
|
57
|
+
"frequency_penalty": request.frequency_penalty,
|
|
58
|
+
"n": request.num_completions,
|
|
59
|
+
"stop_sequences": request.stop_sequences,
|
|
60
|
+
"log_probs": request.top_k_per_token,
|
|
61
|
+
"echo": request.echo_prompt,
|
|
62
|
+
"tokens": True, # Setting to True returns individual tokens of the completion
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
|
|
67
|
+
def do_it():
|
|
68
|
+
assert self._aleph_alpha_client is not None
|
|
69
|
+
completion_response: CompletionResponse = self._aleph_alpha_client.complete(
|
|
70
|
+
request=CompletionRequest(prompt=prompt, **parameters), model=model
|
|
71
|
+
)
|
|
72
|
+
result = dict(completion_response.to_json())
|
|
73
|
+
assert "completions" in result, f"Invalid response: {result}"
|
|
74
|
+
return result
|
|
75
|
+
|
|
76
|
+
cache_key = CachingClient.make_cache_key({"model": model, "prompt": prompt_key, **parameters}, request)
|
|
77
|
+
response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
|
|
78
|
+
except Exception as e:
|
|
79
|
+
error: str = f"AlephAlphaClient error: {e}"
|
|
80
|
+
return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
|
|
81
|
+
|
|
82
|
+
completions: List[GeneratedOutput] = []
|
|
83
|
+
for completion in response["completions"]:
|
|
84
|
+
sequence_logprob: float = 0
|
|
85
|
+
tokens: List[Token] = []
|
|
86
|
+
|
|
87
|
+
# `completion_tokens` is the list of selected tokens.
|
|
88
|
+
for i, token in enumerate(completion.get("completion_tokens", [])):
|
|
89
|
+
# Use the selected token value to get the logprob
|
|
90
|
+
logprob: float = completion["log_probs"][i][token]
|
|
91
|
+
sequence_logprob += logprob
|
|
92
|
+
tokens.append(
|
|
93
|
+
Token(
|
|
94
|
+
text=token,
|
|
95
|
+
logprob=logprob,
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
sequence: GeneratedOutput = GeneratedOutput(
|
|
100
|
+
text=completion["completion"], logprob=sequence_logprob, tokens=tokens
|
|
101
|
+
)
|
|
102
|
+
sequence = truncate_sequence(sequence, request)
|
|
103
|
+
completions.append(sequence)
|
|
104
|
+
|
|
105
|
+
return RequestResult(
|
|
106
|
+
success=True,
|
|
107
|
+
cached=cached,
|
|
108
|
+
request_time=response["request_time"],
|
|
109
|
+
request_datetime=response["request_datetime"],
|
|
110
|
+
completions=completions,
|
|
111
|
+
embedding=[],
|
|
112
|
+
)
|