crfm-helm 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +134 -31
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +31 -3
- helm/benchmark/adaptation/adapters/adapter.py +2 -2
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/gender_perturbation.py +1 -1
- helm/benchmark/augmentations/perturbation.py +8 -2
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/config_registry.py +7 -1
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +20 -7
- helm/benchmark/metrics/basic_metrics.py +169 -664
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -110
- helm/benchmark/metrics/dry_run_metrics.py +2 -2
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +89 -0
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +2 -2
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +2 -2
- helm/benchmark/metrics/test_classification_metrics.py +8 -5
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +74 -0
- helm/benchmark/model_metadata_registry.py +36 -0
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +26 -10
- helm/benchmark/presentation/schema.py +15 -40
- helm/benchmark/presentation/summarize.py +119 -79
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +1 -2
- helm/benchmark/presentation/test_summarize.py +3 -3
- helm/benchmark/run.py +54 -26
- helm/benchmark/run_expander.py +214 -16
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/run_specs/classic_run_specs.py +1510 -0
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +51 -57
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +1 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +1 -1
- helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
- helm/benchmark/scenarios/scenario.py +4 -0
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +6 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -2
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
- helm/benchmark/server.py +24 -1
- helm/benchmark/slurm_runner.py +70 -49
- helm/benchmark/static/benchmarking.js +1 -1
- helm/benchmark/static/schema_classic.yaml +258 -1066
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +2 -227
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +14 -16
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -44
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +4 -1
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +3 -9
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +203 -18
- helm/{proxy/clients → clients}/auto_client.py +59 -31
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +65 -7
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +4 -11
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +5 -5
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +5 -7
- helm/{proxy/clients → clients}/huggingface_client.py +43 -64
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
- helm/{proxy/clients → clients}/megatron_client.py +5 -5
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +6 -8
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +13 -15
- helm/clients/test_client.py +100 -0
- helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +20 -8
- helm/{proxy/clients → clients}/together_client.py +12 -72
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +53 -48
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +16 -4
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +1 -1
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +35 -4
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +3 -3
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/request.py +15 -17
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +1 -1
- helm/config/model_deployments.yaml +1069 -546
- helm/config/model_metadata.yaml +753 -31
- helm/config/tokenizer_configs.yaml +142 -43
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +8 -6
- helm/proxy/example_queries.py +29 -17
- helm/proxy/server.py +70 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +96 -16
- helm/proxy/services/service.py +30 -0
- helm/proxy/services/test_remote_service.py +4 -3
- helm/proxy/services/test_service.py +0 -12
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.4.0.dist-info/RECORD +0 -397
- helm/benchmark/run_specs.py +0 -2762
- helm/benchmark/test_model_properties.py +0 -1570
- helm/benchmark/vlm_run_specs.py +0 -97
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/huggingface_window_service.py +0 -60
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/common/cache_utils.py +0 -14
- helm/proxy/clients/aleph_alpha_client.py +0 -95
- helm/proxy/clients/goose_ai_client.py +0 -99
- helm/proxy/clients/microsoft_client.py +0 -180
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/simple_client.py +0 -60
- helm/proxy/clients/test_client.py +0 -49
- helm/proxy/clients/vertexai_client.py +0 -115
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
- helm/proxy/token_counters/test_openai_token_counter.py +0 -81
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from helm.
|
|
5
|
-
from .
|
|
6
|
-
from .
|
|
7
|
-
from .free_token_counter import FreeTokenCounter
|
|
8
|
-
from .gooseai_token_counter import GooseAITokenCounter
|
|
9
|
-
from .openai_token_counter import OpenAITokenCounter
|
|
1
|
+
from typing import List
|
|
2
|
+
from helm.benchmark.model_deployment_registry import ModelDeployment, get_model_deployment
|
|
3
|
+
|
|
4
|
+
from helm.common.request import Request, GeneratedOutput
|
|
5
|
+
from helm.tokenizers.auto_tokenizer import AutoTokenizer
|
|
6
|
+
from helm.common.tokenization_request import TokenizationRequest, TokenizationRequestResult
|
|
10
7
|
from .token_counter import TokenCounter
|
|
11
8
|
|
|
12
9
|
|
|
13
10
|
class AutoTokenCounter(TokenCounter):
|
|
14
|
-
"""Automatically count tokens based on the
|
|
15
|
-
|
|
16
|
-
def __init__(self,
|
|
17
|
-
self.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
11
|
+
"""Automatically count tokens based on the model_deployment."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, auto_tokenizer: AutoTokenizer):
|
|
14
|
+
self.auto_tokenizer: AutoTokenizer = auto_tokenizer
|
|
15
|
+
|
|
16
|
+
def count_tokens(self, request: Request, completions: List[GeneratedOutput]) -> int:
|
|
17
|
+
"""Counts tokens based on the model deployment.
|
|
18
|
+
|
|
19
|
+
This counts the number of tokens in the request and completions.
|
|
20
|
+
Both input and output tokens are counted. For some model providers,
|
|
21
|
+
this method will return a larger number of tokens than the actual
|
|
22
|
+
token count used for billing. For example, GooseAI only charges for
|
|
23
|
+
(output_tokens - 25) rather than (input_tokens + output_tokens)."""
|
|
24
|
+
model_deployment: ModelDeployment = get_model_deployment(request.model_deployment)
|
|
25
|
+
assert model_deployment.tokenizer_name
|
|
26
|
+
tokenizer_name = model_deployment.tokenizer_name
|
|
27
|
+
|
|
28
|
+
num_completion_tokens = 0
|
|
29
|
+
for completion in completions:
|
|
30
|
+
if completion.tokens:
|
|
31
|
+
num_completion_tokens += len(completion.tokens)
|
|
32
32
|
else:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
return
|
|
33
|
+
tokenized_completion: TokenizationRequestResult = self.auto_tokenizer.tokenize(
|
|
34
|
+
TokenizationRequest(request.prompt, tokenizer=tokenizer_name)
|
|
35
|
+
)
|
|
36
|
+
num_completion_tokens += len(tokenized_completion.tokens)
|
|
37
|
+
|
|
38
|
+
tokenized_prompt: TokenizationRequestResult = self.auto_tokenizer.tokenize(
|
|
39
|
+
TokenizationRequest(request.prompt, tokenizer=tokenizer_name)
|
|
40
|
+
)
|
|
41
|
+
num_prompt_tokens = len(tokenized_prompt.tokens)
|
|
42
|
+
return num_prompt_tokens + num_completion_tokens
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
3
|
+
|
|
4
|
+
from helm.common.request import Request, GeneratedOutput, Token
|
|
5
|
+
from helm.tokenizers.auto_tokenizer import AutoTokenizer
|
|
6
|
+
from helm.proxy.token_counters.auto_token_counter import AutoTokenCounter
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestAutoTokenCounter:
|
|
10
|
+
def test_count_tokens_openai(self):
|
|
11
|
+
token_counter = AutoTokenCounter(
|
|
12
|
+
AutoTokenizer(credentials={}, cache_backend_config=BlackHoleCacheBackendConfig())
|
|
13
|
+
)
|
|
14
|
+
# The following prompt has 51 tokens according to the GPT-2 tokenizer
|
|
15
|
+
request = Request(
|
|
16
|
+
model="openai/text-davinci-002",
|
|
17
|
+
model_deployment="openai/text-davinci-002",
|
|
18
|
+
prompt="The Center for Research on Foundation Models (CRFM) is "
|
|
19
|
+
"an interdisciplinary initiative born out of the Stanford "
|
|
20
|
+
"Institute for Human-Centered Artificial Intelligence (HAI) "
|
|
21
|
+
"that aims to make fundamental advances in the study, development, "
|
|
22
|
+
"and deployment of foundation models.",
|
|
23
|
+
)
|
|
24
|
+
completions: List[GeneratedOutput] = [
|
|
25
|
+
GeneratedOutput(
|
|
26
|
+
text=" The CRFM is dedicated to advancing our knowledge of the foundations of artificial intelligence "
|
|
27
|
+
"(AI) and related fields. It focuses on foundational questions in AI, which are",
|
|
28
|
+
logprob=-49.00783279519999,
|
|
29
|
+
tokens=[
|
|
30
|
+
Token(text=" The", logprob=-1.8096403),
|
|
31
|
+
Token(text=" CR", logprob=-1.2861944),
|
|
32
|
+
Token(text="FM", logprob=-0.0032369632),
|
|
33
|
+
Token(text=" is", logprob=-1.4355252),
|
|
34
|
+
Token(text=" dedicated", logprob=-3.814422),
|
|
35
|
+
Token(text=" to", logprob=-0.009623392),
|
|
36
|
+
Token(text=" advancing", logprob=-2.6732886),
|
|
37
|
+
Token(text=" our", logprob=-3.123714),
|
|
38
|
+
Token(text=" knowledge", logprob=-3.030337),
|
|
39
|
+
Token(text=" of", logprob=-0.46280858),
|
|
40
|
+
Token(text=" the", logprob=-1.4058315),
|
|
41
|
+
Token(text=" foundations", logprob=-2.0638132),
|
|
42
|
+
Token(text=" of", logprob=-0.2607486),
|
|
43
|
+
Token(text=" artificial", logprob=-1.1653417),
|
|
44
|
+
Token(text=" intelligence", logprob=-0.03756146),
|
|
45
|
+
Token(text=" (", logprob=-2.019812),
|
|
46
|
+
Token(text="AI", logprob=-0.03869382),
|
|
47
|
+
Token(text=")", logprob=-0.49895737),
|
|
48
|
+
Token(text=" and", logprob=-0.81909865),
|
|
49
|
+
Token(text=" related", logprob=-2.611718),
|
|
50
|
+
Token(text=" fields", logprob=-0.7640527),
|
|
51
|
+
Token(text=".", logprob=-1.8066244),
|
|
52
|
+
Token(text=" It", logprob=-2.2856107),
|
|
53
|
+
Token(text=" focuses", logprob=-3.3735154),
|
|
54
|
+
Token(text=" on", logprob=-0.13244776),
|
|
55
|
+
Token(text=" foundational", logprob=-1.2640914),
|
|
56
|
+
Token(text=" questions", logprob=-2.010647),
|
|
57
|
+
Token(text=" in", logprob=-1.980726),
|
|
58
|
+
Token(text=" AI", logprob=-0.5709368),
|
|
59
|
+
Token(text=",", logprob=-1.036094),
|
|
60
|
+
Token(text=" which", logprob=-3.826836),
|
|
61
|
+
Token(text=" are", logprob=-1.3858839),
|
|
62
|
+
],
|
|
63
|
+
)
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Verified against https://beta.openai.com/tokenizer. Prompt + completions = 51 + 32.
|
|
67
|
+
assert token_counter.count_tokens(request, completions) == 51 + 32
|
|
68
|
+
|
|
69
|
+
def test_count_tokens_anthropic(self):
|
|
70
|
+
token_counter = AutoTokenCounter(
|
|
71
|
+
AutoTokenizer(credentials={}, cache_backend_config=BlackHoleCacheBackendConfig())
|
|
72
|
+
)
|
|
73
|
+
request = Request(
|
|
74
|
+
model="anthropic/claude-instant-v1",
|
|
75
|
+
model_deployment="anthropic/claude-instant-v1",
|
|
76
|
+
prompt="\n\nHuman:The Center for Research on Foundation Models (CRFM) is "
|
|
77
|
+
"an interdisciplinary initiative born out of the Stanford "
|
|
78
|
+
"Institute for Human-Centered Artificial Intelligence (HAI) "
|
|
79
|
+
"that aims to make fundamental advances in the study, development, "
|
|
80
|
+
"and deployment of foundation models.\n\nAssistant:",
|
|
81
|
+
)
|
|
82
|
+
completions: List[GeneratedOutput] = [
|
|
83
|
+
GeneratedOutput(
|
|
84
|
+
text="Thank you for the background information. The Center for Research "
|
|
85
|
+
"on Foundation Models sounds like an interesting initiative focused on "
|
|
86
|
+
"advancing research and responsible development of large AI models. I "
|
|
87
|
+
"don't have any personal thoughts on it, but I'm happy to discuss or "
|
|
88
|
+
"provide more information if helpful. As an AI assistant, I don't have "
|
|
89
|
+
"subjective opinions.",
|
|
90
|
+
logprob=0,
|
|
91
|
+
tokens=[
|
|
92
|
+
Token(text="Thank", logprob=0),
|
|
93
|
+
Token(text=" you", logprob=0),
|
|
94
|
+
Token(text=" for", logprob=0),
|
|
95
|
+
Token(text=" the", logprob=0),
|
|
96
|
+
Token(text=" background", logprob=0),
|
|
97
|
+
Token(text=" information", logprob=0),
|
|
98
|
+
Token(text=".", logprob=0),
|
|
99
|
+
Token(text=" The", logprob=0),
|
|
100
|
+
Token(text=" Center", logprob=0),
|
|
101
|
+
Token(text=" for", logprob=0),
|
|
102
|
+
Token(text=" Research", logprob=0),
|
|
103
|
+
Token(text=" on", logprob=0),
|
|
104
|
+
Token(text=" Foundation", logprob=0),
|
|
105
|
+
Token(text=" Models", logprob=0),
|
|
106
|
+
Token(text=" sounds", logprob=0),
|
|
107
|
+
Token(text=" like", logprob=0),
|
|
108
|
+
Token(text=" an", logprob=0),
|
|
109
|
+
Token(text=" interesting", logprob=0),
|
|
110
|
+
Token(text=" initiative", logprob=0),
|
|
111
|
+
Token(text=" focused", logprob=0),
|
|
112
|
+
Token(text=" on", logprob=0),
|
|
113
|
+
Token(text=" advancing", logprob=0),
|
|
114
|
+
Token(text=" research", logprob=0),
|
|
115
|
+
Token(text=" and", logprob=0),
|
|
116
|
+
Token(text=" responsible", logprob=0),
|
|
117
|
+
Token(text=" development", logprob=0),
|
|
118
|
+
Token(text=" of", logprob=0),
|
|
119
|
+
Token(text=" large", logprob=0),
|
|
120
|
+
Token(text=" AI", logprob=0),
|
|
121
|
+
Token(text=" models", logprob=0),
|
|
122
|
+
Token(text=".", logprob=0),
|
|
123
|
+
Token(text=" I", logprob=0),
|
|
124
|
+
Token(text=" don", logprob=0),
|
|
125
|
+
Token(text="'t", logprob=0),
|
|
126
|
+
Token(text=" have", logprob=0),
|
|
127
|
+
Token(text=" any", logprob=0),
|
|
128
|
+
Token(text=" personal", logprob=0),
|
|
129
|
+
Token(text=" thoughts", logprob=0),
|
|
130
|
+
Token(text=" on", logprob=0),
|
|
131
|
+
Token(text=" it", logprob=0),
|
|
132
|
+
Token(text=",", logprob=0),
|
|
133
|
+
Token(text=" but", logprob=0),
|
|
134
|
+
Token(text=" I", logprob=0),
|
|
135
|
+
Token(text="'m", logprob=0),
|
|
136
|
+
Token(text=" happy", logprob=0),
|
|
137
|
+
Token(text=" to", logprob=0),
|
|
138
|
+
Token(text=" discuss", logprob=0),
|
|
139
|
+
Token(text=" or", logprob=0),
|
|
140
|
+
Token(text=" provide", logprob=0),
|
|
141
|
+
Token(text=" more", logprob=0),
|
|
142
|
+
Token(text=" information", logprob=0),
|
|
143
|
+
Token(text=" if", logprob=0),
|
|
144
|
+
Token(text=" helpful", logprob=0),
|
|
145
|
+
Token(text=".", logprob=0),
|
|
146
|
+
Token(text=" As", logprob=0),
|
|
147
|
+
Token(text=" an", logprob=0),
|
|
148
|
+
Token(text=" AI", logprob=0),
|
|
149
|
+
Token(text=" assistant", logprob=0),
|
|
150
|
+
Token(text=",", logprob=0),
|
|
151
|
+
Token(text=" I", logprob=0),
|
|
152
|
+
Token(text=" don", logprob=0),
|
|
153
|
+
Token(text="'t", logprob=0),
|
|
154
|
+
Token(text=" have", logprob=0),
|
|
155
|
+
Token(text=" subjective", logprob=0),
|
|
156
|
+
Token(text=" opinions", logprob=0),
|
|
157
|
+
Token(text=".", logprob=0),
|
|
158
|
+
],
|
|
159
|
+
finish_reason=None,
|
|
160
|
+
multimodal_content=None,
|
|
161
|
+
)
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
assert token_counter.count_tokens(request, completions) == 126
|
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
-
from helm.common.request import
|
|
4
|
+
from helm.common.request import GeneratedOutput, Request
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class TokenCounter(ABC):
|
|
8
8
|
"""Counts the number of tokens used given `Request` and completions."""
|
|
9
9
|
|
|
10
10
|
@abstractmethod
|
|
11
|
-
def count_tokens(self, request: Request, completions: List[
|
|
12
|
-
"""
|
|
13
|
-
Counts the total number of tokens given a request and completions.
|
|
14
|
-
"""
|
|
11
|
+
def count_tokens(self, request: Request, completions: List[GeneratedOutput]) -> int:
|
|
12
|
+
"""Counts the total number of tokens given a request and completions."""
|
|
15
13
|
pass
|
|
File without changes
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict, List
|
|
1
|
+
from typing import Any, Dict, List
|
|
2
2
|
import requests
|
|
3
3
|
|
|
4
4
|
from dacite import from_dict
|
|
@@ -12,7 +12,7 @@ from helm.common.tokenization_request import (
|
|
|
12
12
|
DecodeRequest,
|
|
13
13
|
DecodeRequestResult,
|
|
14
14
|
)
|
|
15
|
-
from helm.
|
|
15
|
+
from helm.clients.ai21_utils import AI21RequestError, handle_failed_request
|
|
16
16
|
from .tokenizer import Tokenizer
|
|
17
17
|
|
|
18
18
|
|
|
@@ -28,7 +28,7 @@ class AI21Tokenizer(Tokenizer):
|
|
|
28
28
|
# TODO: Does not support encoding
|
|
29
29
|
raw_request: Dict[str, str] = {"text": request.text}
|
|
30
30
|
|
|
31
|
-
def do_it():
|
|
31
|
+
def do_it() -> Dict[str, Any]:
|
|
32
32
|
response = requests.post(
|
|
33
33
|
"https://api.ai21.com/studio/v1/tokenize",
|
|
34
34
|
headers={"Authorization": f"Bearer {self.api_key}"},
|
|
@@ -23,7 +23,7 @@ class AnthropicTokenizer(CachingTokenizer):
|
|
|
23
23
|
super().__init__(cache_config)
|
|
24
24
|
with AnthropicTokenizer.LOCK:
|
|
25
25
|
self._tokenizer: PreTrainedTokenizerBase = PreTrainedTokenizerFast(
|
|
26
|
-
tokenizer_object=anthropic.get_tokenizer()
|
|
26
|
+
tokenizer_object=anthropic.Anthropic().get_tokenizer()
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -4,9 +4,8 @@ from typing import Any, Dict, Mapping, Optional
|
|
|
4
4
|
from retrying import Attempt, RetryError
|
|
5
5
|
|
|
6
6
|
from helm.benchmark.tokenizer_config_registry import get_tokenizer_config
|
|
7
|
-
from helm.common.cache_utils import build_cache_config
|
|
8
7
|
from helm.common.credentials_utils import provide_api_key
|
|
9
|
-
from helm.common.
|
|
8
|
+
from helm.common.cache_backend_config import CacheBackendConfig, CacheConfig
|
|
10
9
|
from helm.common.hierarchical_logger import hlog
|
|
11
10
|
from helm.common.object_spec import create_object, inject_object_spec_args
|
|
12
11
|
from helm.proxy.retry import retry_tokenizer_request
|
|
@@ -16,19 +15,17 @@ from helm.common.tokenization_request import (
|
|
|
16
15
|
TokenizationRequest,
|
|
17
16
|
TokenizationRequestResult,
|
|
18
17
|
)
|
|
19
|
-
from helm.
|
|
18
|
+
from helm.tokenizers.tokenizer import Tokenizer
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class AutoTokenizer(Tokenizer):
|
|
23
22
|
"""Automatically dispatch to the proper `Tokenizer` based on the tokenizer name."""
|
|
24
23
|
|
|
25
|
-
def __init__(self, credentials: Mapping[str, Any],
|
|
24
|
+
def __init__(self, credentials: Mapping[str, Any], cache_backend_config: CacheBackendConfig):
|
|
26
25
|
self.credentials = credentials
|
|
27
|
-
self.
|
|
28
|
-
self.mongo_uri = mongo_uri
|
|
26
|
+
self.cache_backend_config = cache_backend_config
|
|
29
27
|
self.tokenizers: Dict[str, Tokenizer] = {}
|
|
30
|
-
hlog(f"AutoTokenizer:
|
|
31
|
-
hlog(f"AutoTokenizer: mongo_uri = {mongo_uri}")
|
|
28
|
+
hlog(f"AutoTokenizer: cache_backend_config = {cache_backend_config}")
|
|
32
29
|
|
|
33
30
|
def _get_tokenizer(self, tokenizer_name: str) -> Tokenizer:
|
|
34
31
|
# First try to find the tokenizer in the cache
|
|
@@ -38,7 +35,7 @@ class AutoTokenizer(Tokenizer):
|
|
|
38
35
|
|
|
39
36
|
# Otherwise, create the tokenizer
|
|
40
37
|
organization: str = tokenizer_name.split("/")[0]
|
|
41
|
-
cache_config: CacheConfig =
|
|
38
|
+
cache_config: CacheConfig = self.cache_backend_config.get_cache_config(organization)
|
|
42
39
|
|
|
43
40
|
tokenizer_config = get_tokenizer_config(tokenizer_name)
|
|
44
41
|
if tokenizer_config:
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
DecodeRequestResult,
|
|
10
10
|
TokenizationToken,
|
|
11
11
|
)
|
|
12
|
-
from helm.
|
|
12
|
+
from helm.clients.cohere_utils import get_cohere_url, DEFAULT_COHERE_API_VERSION
|
|
13
13
|
from .caching_tokenizer import CachingTokenizer
|
|
14
14
|
|
|
15
15
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from dataclasses import asdict
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
5
|
from helm.common.cache import Cache, CacheConfig
|
|
6
6
|
from helm.common.request import wrap_request_time
|
|
@@ -42,7 +42,7 @@ class HTTPModelTokenizer(Tokenizer):
|
|
|
42
42
|
|
|
43
43
|
try:
|
|
44
44
|
|
|
45
|
-
def do_it():
|
|
45
|
+
def do_it() -> Dict[str, Any]:
|
|
46
46
|
url = f"{self.base_url}/tokenize"
|
|
47
47
|
response = requests.post(url, json=raw_request)
|
|
48
48
|
response.raise_for_status()
|
|
@@ -70,7 +70,7 @@ class HTTPModelTokenizer(Tokenizer):
|
|
|
70
70
|
|
|
71
71
|
try:
|
|
72
72
|
|
|
73
|
-
def do_it():
|
|
73
|
+
def do_it() -> Dict[str, Any]:
|
|
74
74
|
url = f"{self.base_url}/decode"
|
|
75
75
|
response = requests.post(url, json={"tokens": request.tokens})
|
|
76
76
|
response.raise_for_status()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any, Dict, Optional
|
|
2
|
+
from typing import Any, Dict, Optional, cast
|
|
3
3
|
from threading import Lock
|
|
4
4
|
from helm.common.cache import CacheConfig
|
|
5
5
|
from helm.common.concurrency import ThreadSafeWrapper
|
|
@@ -11,26 +11,6 @@ from .caching_tokenizer import CachingTokenizer
|
|
|
11
11
|
from .tokenizer import cleanup_tokens
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
# TODO: Delete this.
|
|
15
|
-
_MODEL_NAME_ALIASES: Dict[str, str] = {
|
|
16
|
-
"google/t5-11b": "t5-11b",
|
|
17
|
-
"huggingface/gpt2": "gpt2",
|
|
18
|
-
"huggingface/santacoder": "bigcode/santacoder",
|
|
19
|
-
"huggingface/starcoder": "bigcode/starcoder",
|
|
20
|
-
"writer/gpt2": "gpt2", # Palmyra models do not support echo
|
|
21
|
-
# So they have a different TokenizerConfig called "writer/gpt2"
|
|
22
|
-
# when in reality they use the same tokenizer as "huggingface/gpt2"
|
|
23
|
-
"microsoft/gpt2": "gpt2", # Same as above
|
|
24
|
-
}
|
|
25
|
-
"""Mapping of some HELM model names to Hugging Face pretrained model name."""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
# TODO: Delete this.
|
|
29
|
-
def resolve_alias(model_name: str) -> str:
|
|
30
|
-
"""Resolve some HELM model names to Hugging Face pretrained model name."""
|
|
31
|
-
return _MODEL_NAME_ALIASES.get(model_name, model_name)
|
|
32
|
-
|
|
33
|
-
|
|
34
14
|
WrappedPreTrainedTokenizer = ThreadSafeWrapper[PreTrainedTokenizerBase]
|
|
35
15
|
"""Thread safe wrapper around Hugging Face PreTrainedTokenizerBase.
|
|
36
16
|
|
|
@@ -106,11 +86,9 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
106
86
|
|
|
107
87
|
def _get_tokenizer_for_request(self, request: Dict[str, Any]) -> WrappedPreTrainedTokenizer:
|
|
108
88
|
"""Method used in both _tokenize_do_it and _decode_do_it to get the tokenizer."""
|
|
109
|
-
pretrained_model_name_or_path
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
else:
|
|
113
|
-
pretrained_model_name_or_path = resolve_alias(request["tokenizer"])
|
|
89
|
+
pretrained_model_name_or_path = (
|
|
90
|
+
self._pretrained_model_name_or_path if self._pretrained_model_name_or_path else request["tokenizer"]
|
|
91
|
+
)
|
|
114
92
|
return HuggingFaceTokenizer.get_tokenizer(
|
|
115
93
|
helm_tokenizer_name=request["tokenizer"],
|
|
116
94
|
pretrained_model_name_or_path=pretrained_model_name_or_path,
|
|
@@ -155,6 +133,9 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
155
133
|
# Just like tokenize("Hello", encode=False) would return ["Hello"].
|
|
156
134
|
with self._get_tokenizer_for_request(request) as tokenizer:
|
|
157
135
|
tokens = tokenizer.tokenize(request["text"])
|
|
136
|
+
# Some tokenizers (e.g. Qwen/Qwen-7B) return the tokens as bytes, so we have to decode them to strings.
|
|
137
|
+
if tokens and type(tokens[0]) == bytes:
|
|
138
|
+
tokens = [cast(bytes, token).decode(errors="ignore") for token in tokens]
|
|
158
139
|
tokens = cleanup_tokens(tokens, request["tokenizer"])
|
|
159
140
|
return {"tokens": tokens}
|
|
160
141
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from helm.common.tokenization_request import (
|
|
2
|
+
DecodeRequest,
|
|
3
|
+
DecodeRequestResult,
|
|
4
|
+
TokenizationRequest,
|
|
5
|
+
TokenizationRequestResult,
|
|
6
|
+
TokenizationToken,
|
|
7
|
+
)
|
|
8
|
+
from helm.tokenizers.tokenizer import Tokenizer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SimpleTokenizer(Tokenizer):
|
|
12
|
+
"""Simple tokenizer for tutorials and for debugging."""
|
|
13
|
+
|
|
14
|
+
def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
|
|
15
|
+
if request.encode:
|
|
16
|
+
return TokenizationRequestResult(
|
|
17
|
+
success=True,
|
|
18
|
+
cached=False,
|
|
19
|
+
tokens=[TokenizationToken(ord(character)) for character in request.text],
|
|
20
|
+
text=request.text,
|
|
21
|
+
)
|
|
22
|
+
else:
|
|
23
|
+
return TokenizationRequestResult(
|
|
24
|
+
success=True,
|
|
25
|
+
cached=False,
|
|
26
|
+
tokens=[TokenizationToken(character) for character in request.text],
|
|
27
|
+
text=request.text,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def decode(self, request: DecodeRequest) -> DecodeRequestResult:
|
|
31
|
+
return DecodeRequestResult(
|
|
32
|
+
success=True, cached=False, text="".join([chr(code_point) for code_point in request.tokens])
|
|
33
|
+
)
|
|
@@ -10,7 +10,7 @@ from helm.common.tokenization_request import (
|
|
|
10
10
|
TokenizationRequest,
|
|
11
11
|
TokenizationRequestResult,
|
|
12
12
|
)
|
|
13
|
-
from helm.
|
|
13
|
+
from helm.tokenizers.anthropic_tokenizer import AnthropicTokenizer
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class TestAnthropicTokenizer:
|
|
@@ -124,6 +124,9 @@ class TestHuggingFaceTokenizer:
|
|
|
124
124
|
def test_get_santacoder(self):
|
|
125
125
|
TestHuggingFaceTokenizer.verify_get_tokenizer("bigcode/santacoder", 62)
|
|
126
126
|
|
|
127
|
+
def test_get_clip_tokenizer(self):
|
|
128
|
+
TestHuggingFaceTokenizer.verify_get_tokenizer("openai/clip-vit-large-patch14", 50)
|
|
129
|
+
|
|
127
130
|
def test_gpt2_tokenize_eos(self):
|
|
128
131
|
eos_token: str = "<|endoftext|>"
|
|
129
132
|
wrapped_tokenizer = HuggingFaceTokenizer.get_tokenizer("huggingface/gpt2", pretrained_model_name_or_path="gpt2")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from helm.common.tokenization_request import (
|
|
2
|
+
DecodeRequest,
|
|
3
|
+
TokenizationRequest,
|
|
4
|
+
TokenizationToken,
|
|
5
|
+
)
|
|
6
|
+
from helm.tokenizers.simple_tokenizer import SimpleTokenizer
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_simple_tokenizer_tokenize():
|
|
10
|
+
tokenizer = SimpleTokenizer()
|
|
11
|
+
request = TokenizationRequest(tokenizer="simple/tokenizer1", text="otter 🦦")
|
|
12
|
+
result = tokenizer.tokenize(request)
|
|
13
|
+
assert result.success
|
|
14
|
+
assert not result.cached
|
|
15
|
+
assert result.tokens == [TokenizationToken(token) for token in ["o", "t", "t", "e", "r", " ", "🦦"]]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_simple_tokenizer_encode():
|
|
19
|
+
tokenizer = SimpleTokenizer()
|
|
20
|
+
request = TokenizationRequest(tokenizer="simple/tokenizer1", text="otter 🦦", encode=True)
|
|
21
|
+
result = tokenizer.tokenize(request)
|
|
22
|
+
assert result.success
|
|
23
|
+
assert not result.cached
|
|
24
|
+
assert result.tokens == [TokenizationToken(token) for token in [111, 116, 116, 101, 114, 32, 129446]]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_simple_tokenizer_decode():
|
|
28
|
+
tokenizer = SimpleTokenizer()
|
|
29
|
+
request = DecodeRequest(tokenizer="simple/tokenizer1", tokens=[111, 116, 116, 101, 114, 32, 129446])
|
|
30
|
+
result = tokenizer.decode(request)
|
|
31
|
+
assert result.success
|
|
32
|
+
assert not result.cached
|
|
33
|
+
assert result.text == "otter 🦦"
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
TokenizationRequest,
|
|
10
10
|
TokenizationToken,
|
|
11
11
|
)
|
|
12
|
-
from helm.
|
|
12
|
+
from helm.tokenizers.caching_tokenizer import CachingTokenizer
|
|
13
13
|
from helm.proxy.retry import NonRetriableException
|
|
14
14
|
|
|
15
15
|
try:
|
|
@@ -21,9 +21,11 @@ class YaLMTokenizer(CachingTokenizer):
|
|
|
21
21
|
# This is a problem because then tokenize(" Hello", encode=False) == tokenize("Hello", encode=False)
|
|
22
22
|
# That is why we manually replace "▁" with a space.
|
|
23
23
|
return {
|
|
24
|
-
"tokens":
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
"tokens": (
|
|
25
|
+
token_ids
|
|
26
|
+
if request["encode"]
|
|
27
|
+
else cleanup_tokens(self._tokenizer.convert_ids_to_tokens(token_ids), request["tokenizer"])
|
|
28
|
+
)
|
|
27
29
|
}
|
|
28
30
|
|
|
29
31
|
def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
File without changes
|
|
Binary file
|
|
@@ -16,7 +16,7 @@ adapted from https://github.com/yandex/YaLM-100B/blob/main/megatron_lm/megatron/
|
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
YALM_TOKENIZER_PACKAGE: str = "helm.
|
|
19
|
+
YALM_TOKENIZER_PACKAGE: str = "helm.tokenizers.yalm_tokenizer_data"
|
|
20
20
|
YALM_TOKENIZER_VOCAB_FILENAME: str = "voc_100b.sp"
|
|
21
21
|
|
|
22
22
|
|