crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +37 -2
- helm/benchmark/adaptation/adapters/adapter.py +4 -42
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/prompt.py +7 -1
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/cleva_perturbation.py +7 -14
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
- helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/dialect_perturbation.py +2 -2
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +3 -3
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +0 -7
- helm/benchmark/augmentations/perturbation.py +20 -7
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/synonym_perturbation.py +2 -2
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/config_registry.py +38 -0
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +37 -7
- helm/benchmark/metrics/basic_metrics.py +172 -641
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics.py +4 -3
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +6 -112
- helm/benchmark/metrics/dry_run_metrics.py +5 -3
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +5 -5
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +6 -7
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +8 -8
- helm/benchmark/metrics/test_classification_metrics.py +9 -6
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +164 -41
- helm/benchmark/model_metadata_registry.py +181 -35
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +50 -17
- helm/benchmark/presentation/schema.py +28 -46
- helm/benchmark/presentation/summarize.py +213 -96
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +14 -9
- helm/benchmark/presentation/test_summarize.py +5 -0
- helm/benchmark/run.py +66 -54
- helm/benchmark/run_expander.py +342 -31
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +116 -69
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/cleva_scenario.py +43 -46
- helm/benchmark/scenarios/code_scenario.py +3 -2
- helm/benchmark/scenarios/commonsense_scenario.py +171 -191
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +123 -0
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +3 -3
- helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
- helm/benchmark/scenarios/raft_scenario.py +2 -6
- helm/benchmark/scenarios/scenario.py +14 -2
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +22 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/the_pile_scenario.py +6 -7
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
- helm/benchmark/server.py +59 -2
- helm/benchmark/slurm_jobs.py +12 -0
- helm/benchmark/slurm_runner.py +79 -51
- helm/benchmark/static/benchmarking.js +3 -4
- helm/benchmark/static/contamination.yaml +1 -1
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +824 -0
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +90 -0
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/tokenizer_config_registry.py +10 -14
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -35
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/no_decoding_window_service.py +32 -0
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +24 -269
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +5 -12
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +213 -24
- helm/clients/auto_client.py +215 -0
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +67 -55
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +6 -17
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +7 -8
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +6 -10
- helm/{proxy/clients → clients}/huggingface_client.py +134 -92
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
- helm/{proxy/clients → clients}/megatron_client.py +13 -7
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +15 -12
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +15 -15
- helm/clients/test_client.py +100 -0
- helm/clients/test_huggingface_client.py +70 -0
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +23 -12
- helm/{proxy/clients → clients}/together_client.py +18 -71
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +24 -179
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/concurrency.py +32 -0
- helm/common/credentials_utils.py +28 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +29 -10
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +113 -0
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +88 -0
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/object_spec.py +2 -2
- helm/common/request.py +36 -27
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +6 -3
- helm/config/__init__.py +0 -0
- helm/config/model_deployments.yaml +1942 -0
- helm/config/model_metadata.yaml +2201 -0
- helm/config/tokenizer_configs.yaml +362 -0
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +13 -5
- helm/proxy/example_queries.py +29 -17
- helm/proxy/retry.py +8 -2
- helm/proxy/server.py +77 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +103 -20
- helm/proxy/services/service.py +34 -2
- helm/proxy/services/test_remote_service.py +7 -6
- helm/proxy/services/test_service.py +27 -18
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/py.typed +0 -0
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
- helm/tokenizers/auto_tokenizer.py +93 -0
- helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/tokenizers/test_anthropic_tokenizer.py +82 -0
- helm/tokenizers/test_huggingface_tokenizer.py +136 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/tokenizers/vertexai_tokenizer.py +97 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.3.0.dist-info/RECORD +0 -396
- helm/benchmark/vlm_run_specs.py +0 -71
- helm/benchmark/window_services/anthropic_window_service.py +0 -68
- helm/benchmark/window_services/bloom_window_service.py +0 -35
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/gptj_window_service.py +0 -38
- helm/benchmark/window_services/gptneox_window_service.py +0 -41
- helm/benchmark/window_services/http_model_window_service.py +0 -28
- helm/benchmark/window_services/huggingface_window_service.py +0 -59
- helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
- helm/benchmark/window_services/llama_window_service.py +0 -28
- helm/benchmark/window_services/luminous_window_service.py +0 -67
- helm/benchmark/window_services/megatron_window_service.py +0 -10
- helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
- helm/benchmark/window_services/openai_window_service.py +0 -13
- helm/benchmark/window_services/opt_window_service.py +0 -35
- helm/benchmark/window_services/palmyra_window_service.py +0 -45
- helm/benchmark/window_services/remote_window_service.py +0 -48
- helm/benchmark/window_services/santacoder_window_service.py +0 -27
- helm/benchmark/window_services/starcoder_window_service.py +0 -27
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/benchmark/window_services/wider_openai_window_service.py +0 -52
- helm/proxy/clients/aleph_alpha_client.py +0 -99
- helm/proxy/clients/auto_client.py +0 -461
- helm/proxy/clients/goose_ai_client.py +0 -100
- helm/proxy/clients/microsoft_client.py +0 -182
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/remote_model_registry.py +0 -28
- helm/proxy/clients/simple_client.py +0 -61
- helm/proxy/clients/test_anthropic_client.py +0 -63
- helm/proxy/clients/test_client.py +0 -31
- helm/proxy/clients/test_huggingface_client.py +0 -87
- helm/proxy/models.py +0 -963
- helm/proxy/test_models.py +0 -27
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
- helm/proxy/token_counters/test_openai_token_counter.py +0 -79
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -3,6 +3,7 @@ import pytest
|
|
|
3
3
|
import shutil
|
|
4
4
|
import tempfile
|
|
5
5
|
|
|
6
|
+
from helm.benchmark.model_deployment_registry import ModelDeployment, get_model_deployment
|
|
6
7
|
from helm.common.authentication import Authentication
|
|
7
8
|
from helm.common.request import Request
|
|
8
9
|
from helm.proxy.accounts import AuthenticationError, Accounts
|
|
@@ -34,7 +35,9 @@ class TestServerService:
|
|
|
34
35
|
|
|
35
36
|
def test_make_request(self):
|
|
36
37
|
num_completions = 2
|
|
37
|
-
request = Request(
|
|
38
|
+
request = Request(
|
|
39
|
+
prompt="1 2 3", model="simple/model1", model_deployment="simple/model1", num_completions=num_completions
|
|
40
|
+
)
|
|
38
41
|
result = self.service.make_request(self.auth, request)
|
|
39
42
|
assert len(result.completions) == num_completions
|
|
40
43
|
|
|
@@ -194,24 +197,12 @@ def helper_prod_test_service(request: Request, expected_text: str):
|
|
|
194
197
|
# Consistency of log probs
|
|
195
198
|
assert completion.logprob == sum(token.logprob for token in completion.tokens)
|
|
196
199
|
|
|
197
|
-
for token in completion.tokens[1:]:
|
|
198
|
-
assert len(token.top_logprobs) == request.top_k_per_token
|
|
199
|
-
|
|
200
|
-
# If generated token was one of the top, make sure has the right probability
|
|
201
|
-
if token.text in token.top_logprobs:
|
|
202
|
-
assert token.logprob == token.top_logprobs[token.text]
|
|
203
|
-
|
|
204
|
-
# If temperature = 0, then make sure we're getting the top probability token
|
|
205
|
-
if request.temperature == 0:
|
|
206
|
-
assert token.text in token.top_logprobs
|
|
207
|
-
assert token.logprob == max(token.top_logprobs.values())
|
|
208
|
-
|
|
209
200
|
# Make sure we get the expected_text in one of the completions
|
|
210
201
|
assert any(completion.text == expected_text for completion in result.completions)
|
|
211
202
|
|
|
212
203
|
|
|
213
204
|
# Models that we want to test
|
|
214
|
-
|
|
205
|
+
prod_model_deployments = ["openai/davinci", "ai21/j1-jumbo"]
|
|
215
206
|
|
|
216
207
|
|
|
217
208
|
# TODO: put a flag on this so that it's easy to use pytest to still run these slow tests
|
|
@@ -220,8 +211,17 @@ prod_models = ["openai/davinci", "ai21/j1-jumbo"]
|
|
|
220
211
|
def test_prod_continue():
|
|
221
212
|
# Test that we're continuing
|
|
222
213
|
prompt = "Paris is the capital of"
|
|
223
|
-
for
|
|
224
|
-
|
|
214
|
+
for model_deployment_name in prod_model_deployments:
|
|
215
|
+
model_deployment: ModelDeployment = get_model_deployment(model_deployment_name)
|
|
216
|
+
model_name: str = model_deployment.model_name or model_deployment.name
|
|
217
|
+
request = Request(
|
|
218
|
+
prompt=prompt,
|
|
219
|
+
model=model_name,
|
|
220
|
+
model_deployment=model_deployment_name,
|
|
221
|
+
max_tokens=1,
|
|
222
|
+
num_completions=1,
|
|
223
|
+
temperature=0,
|
|
224
|
+
)
|
|
225
225
|
helper_prod_test_service(request, " France")
|
|
226
226
|
|
|
227
227
|
|
|
@@ -229,6 +229,15 @@ def test_prod_continue():
|
|
|
229
229
|
def test_prod_echo():
|
|
230
230
|
# If we're echoing the prompt, make sure we're getting the same thing back
|
|
231
231
|
prompt = "I like pickles."
|
|
232
|
-
for
|
|
233
|
-
|
|
232
|
+
for model_deployment_name in prod_model_deployments:
|
|
233
|
+
model_deployment: ModelDeployment = get_model_deployment(model_deployment_name)
|
|
234
|
+
model_name: str = model_deployment.model_name or model_deployment.name
|
|
235
|
+
request = Request(
|
|
236
|
+
prompt=prompt,
|
|
237
|
+
model=model_name,
|
|
238
|
+
model_deployment=model_deployment_name,
|
|
239
|
+
max_tokens=0,
|
|
240
|
+
num_completions=1,
|
|
241
|
+
echo_prompt=True,
|
|
242
|
+
)
|
|
234
243
|
helper_prod_test_service(request, prompt)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pytest
|
|
3
|
+
import tempfile
|
|
4
|
+
|
|
5
|
+
from helm.proxy.accounts import Accounts, Authentication, InsufficientQuotaError, Usage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestAutoTokenCounter:
|
|
9
|
+
def setup_method(self, method):
|
|
10
|
+
accounts_file = tempfile.NamedTemporaryFile(delete=False)
|
|
11
|
+
self.accounts_path: str = accounts_file.name
|
|
12
|
+
self.accounts = Accounts(self.accounts_path)
|
|
13
|
+
self.root_auth = Authentication(Accounts.DEFAULT_API_KEY)
|
|
14
|
+
|
|
15
|
+
def teardown_method(self, method):
|
|
16
|
+
os.remove(self.accounts_path)
|
|
17
|
+
|
|
18
|
+
def test_check_can_use(self):
|
|
19
|
+
model_group = "anthropic"
|
|
20
|
+
account = self.accounts.create_account(self.root_auth)
|
|
21
|
+
|
|
22
|
+
# Cannot use this account because no quota was added
|
|
23
|
+
with pytest.raises(InsufficientQuotaError):
|
|
24
|
+
self.accounts.check_can_use(account.api_key, model_group)
|
|
25
|
+
|
|
26
|
+
# Add monthly quota
|
|
27
|
+
account.usages[model_group] = {}
|
|
28
|
+
account.usages[model_group]["monthly"] = Usage(quota=1000)
|
|
29
|
+
self.accounts.update_account(self.root_auth, account)
|
|
30
|
+
|
|
31
|
+
# Now this account has quota and can be used
|
|
32
|
+
self.accounts.check_can_use(account.api_key, model_group)
|
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from helm.
|
|
5
|
-
from .
|
|
6
|
-
from .
|
|
7
|
-
from .free_token_counter import FreeTokenCounter
|
|
8
|
-
from .gooseai_token_counter import GooseAITokenCounter
|
|
9
|
-
from .openai_token_counter import OpenAITokenCounter
|
|
1
|
+
from typing import List
|
|
2
|
+
from helm.benchmark.model_deployment_registry import ModelDeployment, get_model_deployment
|
|
3
|
+
|
|
4
|
+
from helm.common.request import Request, GeneratedOutput
|
|
5
|
+
from helm.tokenizers.auto_tokenizer import AutoTokenizer
|
|
6
|
+
from helm.common.tokenization_request import TokenizationRequest, TokenizationRequestResult
|
|
10
7
|
from .token_counter import TokenCounter
|
|
11
8
|
|
|
12
9
|
|
|
13
10
|
class AutoTokenCounter(TokenCounter):
|
|
14
|
-
"""Automatically count tokens based on the
|
|
15
|
-
|
|
16
|
-
def __init__(self,
|
|
17
|
-
self.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
11
|
+
"""Automatically count tokens based on the model_deployment."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, auto_tokenizer: AutoTokenizer):
|
|
14
|
+
self.auto_tokenizer: AutoTokenizer = auto_tokenizer
|
|
15
|
+
|
|
16
|
+
def count_tokens(self, request: Request, completions: List[GeneratedOutput]) -> int:
|
|
17
|
+
"""Counts tokens based on the model deployment.
|
|
18
|
+
|
|
19
|
+
This counts the number of tokens in the request and completions.
|
|
20
|
+
Both input and output tokens are counted. For some model providers,
|
|
21
|
+
this method will return a larger number of tokens than the actual
|
|
22
|
+
token count used for billing. For example, GooseAI only charges for
|
|
23
|
+
(output_tokens - 25) rather than (input_tokens + output_tokens)."""
|
|
24
|
+
model_deployment: ModelDeployment = get_model_deployment(request.model_deployment)
|
|
25
|
+
assert model_deployment.tokenizer_name
|
|
26
|
+
tokenizer_name = model_deployment.tokenizer_name
|
|
27
|
+
|
|
28
|
+
num_completion_tokens = 0
|
|
29
|
+
for completion in completions:
|
|
30
|
+
if completion.tokens:
|
|
31
|
+
num_completion_tokens += len(completion.tokens)
|
|
32
32
|
else:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
return
|
|
33
|
+
tokenized_completion: TokenizationRequestResult = self.auto_tokenizer.tokenize(
|
|
34
|
+
TokenizationRequest(request.prompt, tokenizer=tokenizer_name)
|
|
35
|
+
)
|
|
36
|
+
num_completion_tokens += len(tokenized_completion.tokens)
|
|
37
|
+
|
|
38
|
+
tokenized_prompt: TokenizationRequestResult = self.auto_tokenizer.tokenize(
|
|
39
|
+
TokenizationRequest(request.prompt, tokenizer=tokenizer_name)
|
|
40
|
+
)
|
|
41
|
+
num_prompt_tokens = len(tokenized_prompt.tokens)
|
|
42
|
+
return num_prompt_tokens + num_completion_tokens
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
3
|
+
|
|
4
|
+
from helm.common.request import Request, GeneratedOutput, Token
|
|
5
|
+
from helm.tokenizers.auto_tokenizer import AutoTokenizer
|
|
6
|
+
from helm.proxy.token_counters.auto_token_counter import AutoTokenCounter
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestAutoTokenCounter:
|
|
10
|
+
def test_count_tokens_openai(self):
|
|
11
|
+
token_counter = AutoTokenCounter(
|
|
12
|
+
AutoTokenizer(credentials={}, cache_backend_config=BlackHoleCacheBackendConfig())
|
|
13
|
+
)
|
|
14
|
+
# The following prompt has 51 tokens according to the GPT-2 tokenizer
|
|
15
|
+
request = Request(
|
|
16
|
+
model="openai/text-davinci-002",
|
|
17
|
+
model_deployment="openai/text-davinci-002",
|
|
18
|
+
prompt="The Center for Research on Foundation Models (CRFM) is "
|
|
19
|
+
"an interdisciplinary initiative born out of the Stanford "
|
|
20
|
+
"Institute for Human-Centered Artificial Intelligence (HAI) "
|
|
21
|
+
"that aims to make fundamental advances in the study, development, "
|
|
22
|
+
"and deployment of foundation models.",
|
|
23
|
+
)
|
|
24
|
+
completions: List[GeneratedOutput] = [
|
|
25
|
+
GeneratedOutput(
|
|
26
|
+
text=" The CRFM is dedicated to advancing our knowledge of the foundations of artificial intelligence "
|
|
27
|
+
"(AI) and related fields. It focuses on foundational questions in AI, which are",
|
|
28
|
+
logprob=-49.00783279519999,
|
|
29
|
+
tokens=[
|
|
30
|
+
Token(text=" The", logprob=-1.8096403),
|
|
31
|
+
Token(text=" CR", logprob=-1.2861944),
|
|
32
|
+
Token(text="FM", logprob=-0.0032369632),
|
|
33
|
+
Token(text=" is", logprob=-1.4355252),
|
|
34
|
+
Token(text=" dedicated", logprob=-3.814422),
|
|
35
|
+
Token(text=" to", logprob=-0.009623392),
|
|
36
|
+
Token(text=" advancing", logprob=-2.6732886),
|
|
37
|
+
Token(text=" our", logprob=-3.123714),
|
|
38
|
+
Token(text=" knowledge", logprob=-3.030337),
|
|
39
|
+
Token(text=" of", logprob=-0.46280858),
|
|
40
|
+
Token(text=" the", logprob=-1.4058315),
|
|
41
|
+
Token(text=" foundations", logprob=-2.0638132),
|
|
42
|
+
Token(text=" of", logprob=-0.2607486),
|
|
43
|
+
Token(text=" artificial", logprob=-1.1653417),
|
|
44
|
+
Token(text=" intelligence", logprob=-0.03756146),
|
|
45
|
+
Token(text=" (", logprob=-2.019812),
|
|
46
|
+
Token(text="AI", logprob=-0.03869382),
|
|
47
|
+
Token(text=")", logprob=-0.49895737),
|
|
48
|
+
Token(text=" and", logprob=-0.81909865),
|
|
49
|
+
Token(text=" related", logprob=-2.611718),
|
|
50
|
+
Token(text=" fields", logprob=-0.7640527),
|
|
51
|
+
Token(text=".", logprob=-1.8066244),
|
|
52
|
+
Token(text=" It", logprob=-2.2856107),
|
|
53
|
+
Token(text=" focuses", logprob=-3.3735154),
|
|
54
|
+
Token(text=" on", logprob=-0.13244776),
|
|
55
|
+
Token(text=" foundational", logprob=-1.2640914),
|
|
56
|
+
Token(text=" questions", logprob=-2.010647),
|
|
57
|
+
Token(text=" in", logprob=-1.980726),
|
|
58
|
+
Token(text=" AI", logprob=-0.5709368),
|
|
59
|
+
Token(text=",", logprob=-1.036094),
|
|
60
|
+
Token(text=" which", logprob=-3.826836),
|
|
61
|
+
Token(text=" are", logprob=-1.3858839),
|
|
62
|
+
],
|
|
63
|
+
)
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Verified against https://beta.openai.com/tokenizer. Prompt + completions = 51 + 32.
|
|
67
|
+
assert token_counter.count_tokens(request, completions) == 51 + 32
|
|
68
|
+
|
|
69
|
+
def test_count_tokens_anthropic(self):
|
|
70
|
+
token_counter = AutoTokenCounter(
|
|
71
|
+
AutoTokenizer(credentials={}, cache_backend_config=BlackHoleCacheBackendConfig())
|
|
72
|
+
)
|
|
73
|
+
request = Request(
|
|
74
|
+
model="anthropic/claude-instant-v1",
|
|
75
|
+
model_deployment="anthropic/claude-instant-v1",
|
|
76
|
+
prompt="\n\nHuman:The Center for Research on Foundation Models (CRFM) is "
|
|
77
|
+
"an interdisciplinary initiative born out of the Stanford "
|
|
78
|
+
"Institute for Human-Centered Artificial Intelligence (HAI) "
|
|
79
|
+
"that aims to make fundamental advances in the study, development, "
|
|
80
|
+
"and deployment of foundation models.\n\nAssistant:",
|
|
81
|
+
)
|
|
82
|
+
completions: List[GeneratedOutput] = [
|
|
83
|
+
GeneratedOutput(
|
|
84
|
+
text="Thank you for the background information. The Center for Research "
|
|
85
|
+
"on Foundation Models sounds like an interesting initiative focused on "
|
|
86
|
+
"advancing research and responsible development of large AI models. I "
|
|
87
|
+
"don't have any personal thoughts on it, but I'm happy to discuss or "
|
|
88
|
+
"provide more information if helpful. As an AI assistant, I don't have "
|
|
89
|
+
"subjective opinions.",
|
|
90
|
+
logprob=0,
|
|
91
|
+
tokens=[
|
|
92
|
+
Token(text="Thank", logprob=0),
|
|
93
|
+
Token(text=" you", logprob=0),
|
|
94
|
+
Token(text=" for", logprob=0),
|
|
95
|
+
Token(text=" the", logprob=0),
|
|
96
|
+
Token(text=" background", logprob=0),
|
|
97
|
+
Token(text=" information", logprob=0),
|
|
98
|
+
Token(text=".", logprob=0),
|
|
99
|
+
Token(text=" The", logprob=0),
|
|
100
|
+
Token(text=" Center", logprob=0),
|
|
101
|
+
Token(text=" for", logprob=0),
|
|
102
|
+
Token(text=" Research", logprob=0),
|
|
103
|
+
Token(text=" on", logprob=0),
|
|
104
|
+
Token(text=" Foundation", logprob=0),
|
|
105
|
+
Token(text=" Models", logprob=0),
|
|
106
|
+
Token(text=" sounds", logprob=0),
|
|
107
|
+
Token(text=" like", logprob=0),
|
|
108
|
+
Token(text=" an", logprob=0),
|
|
109
|
+
Token(text=" interesting", logprob=0),
|
|
110
|
+
Token(text=" initiative", logprob=0),
|
|
111
|
+
Token(text=" focused", logprob=0),
|
|
112
|
+
Token(text=" on", logprob=0),
|
|
113
|
+
Token(text=" advancing", logprob=0),
|
|
114
|
+
Token(text=" research", logprob=0),
|
|
115
|
+
Token(text=" and", logprob=0),
|
|
116
|
+
Token(text=" responsible", logprob=0),
|
|
117
|
+
Token(text=" development", logprob=0),
|
|
118
|
+
Token(text=" of", logprob=0),
|
|
119
|
+
Token(text=" large", logprob=0),
|
|
120
|
+
Token(text=" AI", logprob=0),
|
|
121
|
+
Token(text=" models", logprob=0),
|
|
122
|
+
Token(text=".", logprob=0),
|
|
123
|
+
Token(text=" I", logprob=0),
|
|
124
|
+
Token(text=" don", logprob=0),
|
|
125
|
+
Token(text="'t", logprob=0),
|
|
126
|
+
Token(text=" have", logprob=0),
|
|
127
|
+
Token(text=" any", logprob=0),
|
|
128
|
+
Token(text=" personal", logprob=0),
|
|
129
|
+
Token(text=" thoughts", logprob=0),
|
|
130
|
+
Token(text=" on", logprob=0),
|
|
131
|
+
Token(text=" it", logprob=0),
|
|
132
|
+
Token(text=",", logprob=0),
|
|
133
|
+
Token(text=" but", logprob=0),
|
|
134
|
+
Token(text=" I", logprob=0),
|
|
135
|
+
Token(text="'m", logprob=0),
|
|
136
|
+
Token(text=" happy", logprob=0),
|
|
137
|
+
Token(text=" to", logprob=0),
|
|
138
|
+
Token(text=" discuss", logprob=0),
|
|
139
|
+
Token(text=" or", logprob=0),
|
|
140
|
+
Token(text=" provide", logprob=0),
|
|
141
|
+
Token(text=" more", logprob=0),
|
|
142
|
+
Token(text=" information", logprob=0),
|
|
143
|
+
Token(text=" if", logprob=0),
|
|
144
|
+
Token(text=" helpful", logprob=0),
|
|
145
|
+
Token(text=".", logprob=0),
|
|
146
|
+
Token(text=" As", logprob=0),
|
|
147
|
+
Token(text=" an", logprob=0),
|
|
148
|
+
Token(text=" AI", logprob=0),
|
|
149
|
+
Token(text=" assistant", logprob=0),
|
|
150
|
+
Token(text=",", logprob=0),
|
|
151
|
+
Token(text=" I", logprob=0),
|
|
152
|
+
Token(text=" don", logprob=0),
|
|
153
|
+
Token(text="'t", logprob=0),
|
|
154
|
+
Token(text=" have", logprob=0),
|
|
155
|
+
Token(text=" subjective", logprob=0),
|
|
156
|
+
Token(text=" opinions", logprob=0),
|
|
157
|
+
Token(text=".", logprob=0),
|
|
158
|
+
],
|
|
159
|
+
finish_reason=None,
|
|
160
|
+
multimodal_content=None,
|
|
161
|
+
)
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
assert token_counter.count_tokens(request, completions) == 126
|
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
-
from helm.common.request import
|
|
4
|
+
from helm.common.request import GeneratedOutput, Request
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class TokenCounter(ABC):
|
|
8
8
|
"""Counts the number of tokens used given `Request` and completions."""
|
|
9
9
|
|
|
10
10
|
@abstractmethod
|
|
11
|
-
def count_tokens(self, request: Request, completions: List[
|
|
12
|
-
"""
|
|
13
|
-
Counts the total number of tokens given a request and completions.
|
|
14
|
-
"""
|
|
11
|
+
def count_tokens(self, request: Request, completions: List[GeneratedOutput]) -> int:
|
|
12
|
+
"""Counts the total number of tokens given a request and completions."""
|
|
15
13
|
pass
|
helm/py.typed
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict, List
|
|
1
|
+
from typing import Any, Dict, List
|
|
2
2
|
import requests
|
|
3
3
|
|
|
4
4
|
from dacite import from_dict
|
|
@@ -12,7 +12,7 @@ from helm.common.tokenization_request import (
|
|
|
12
12
|
DecodeRequest,
|
|
13
13
|
DecodeRequestResult,
|
|
14
14
|
)
|
|
15
|
-
from helm.
|
|
15
|
+
from helm.clients.ai21_utils import AI21RequestError, handle_failed_request
|
|
16
16
|
from .tokenizer import Tokenizer
|
|
17
17
|
|
|
18
18
|
|
|
@@ -28,7 +28,7 @@ class AI21Tokenizer(Tokenizer):
|
|
|
28
28
|
# TODO: Does not support encoding
|
|
29
29
|
raw_request: Dict[str, str] = {"text": request.text}
|
|
30
30
|
|
|
31
|
-
def do_it():
|
|
31
|
+
def do_it() -> Dict[str, Any]:
|
|
32
32
|
response = requests.post(
|
|
33
33
|
"https://api.ai21.com/studio/v1/tokenize",
|
|
34
34
|
headers={"Authorization": f"Bearer {self.api_key}"},
|
|
@@ -31,7 +31,7 @@ class AlephAlphaTokenizer(CachingTokenizer):
|
|
|
31
31
|
def __init__(self, api_key: str, cache_config: CacheConfig) -> None:
|
|
32
32
|
super().__init__(cache_config)
|
|
33
33
|
self.api_key: str = api_key
|
|
34
|
-
self._aleph_alpha_client = AlephAlphaPythonClient(token=api_key)
|
|
34
|
+
self._aleph_alpha_client = AlephAlphaPythonClient(token=api_key) if api_key else None
|
|
35
35
|
self._tokenizer_name_to_tokenizer: Dict[str, InternalTokenizer] = {}
|
|
36
36
|
|
|
37
37
|
def _get_tokenizer(self, tokenizer_name: str) -> InternalTokenizer:
|
|
@@ -40,6 +40,8 @@ class AlephAlphaTokenizer(CachingTokenizer):
|
|
|
40
40
|
|
|
41
41
|
# Check if the tokenizer is cached
|
|
42
42
|
if tokenizer_name not in self._tokenizer_name_to_tokenizer:
|
|
43
|
+
if self._aleph_alpha_client is None:
|
|
44
|
+
raise ValueError("Aleph Alpha API key not set.")
|
|
43
45
|
self._tokenizer_name_to_tokenizer[tokenizer_name] = self._aleph_alpha_client.tokenizer(tokenizer_name)
|
|
44
46
|
hlog(f"Initialized tokenizer: {tokenizer_name}")
|
|
45
47
|
return self._tokenizer_name_to_tokenizer[tokenizer_name]
|
|
@@ -15,32 +15,38 @@ except ModuleNotFoundError as e:
|
|
|
15
15
|
|
|
16
16
|
class AnthropicTokenizer(CachingTokenizer):
|
|
17
17
|
LOCK: threading.Lock = threading.Lock()
|
|
18
|
+
"""Global lock for the Anthropic tokenizer.
|
|
19
|
+
|
|
20
|
+
The Anthropic tokenizer is a wrapper around a single global Hugging Face tokenizer, which is thread-hostile."""
|
|
18
21
|
|
|
19
22
|
def __init__(self, cache_config: CacheConfig) -> None:
|
|
20
23
|
super().__init__(cache_config)
|
|
21
24
|
with AnthropicTokenizer.LOCK:
|
|
22
25
|
self._tokenizer: PreTrainedTokenizerBase = PreTrainedTokenizerFast(
|
|
23
|
-
tokenizer_object=anthropic.get_tokenizer()
|
|
26
|
+
tokenizer_object=anthropic.Anthropic().get_tokenizer()
|
|
24
27
|
)
|
|
25
28
|
|
|
26
29
|
def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
27
30
|
if request["encode"]:
|
|
28
31
|
if request["truncation"]:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
with AnthropicTokenizer.LOCK:
|
|
33
|
+
tokens = self._tokenizer.encode(
|
|
34
|
+
request["text"],
|
|
35
|
+
truncation=request["truncation"],
|
|
36
|
+
max_length=request["max_length"],
|
|
37
|
+
add_special_tokens=False,
|
|
38
|
+
)
|
|
35
39
|
else:
|
|
36
|
-
|
|
40
|
+
with AnthropicTokenizer.LOCK:
|
|
41
|
+
tokens = self._tokenizer.encode(request["text"], add_special_tokens=False)
|
|
37
42
|
else:
|
|
38
43
|
# No encoding, just return the token strings
|
|
39
44
|
tokens = [self._tokenizer.convert_tokens_to_string([i]) for i in self._tokenizer.tokenize(request["text"])]
|
|
40
45
|
return {"tokens": tokens}
|
|
41
46
|
|
|
42
47
|
def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
with AnthropicTokenizer.LOCK:
|
|
49
|
+
text = self._tokenizer.decode(
|
|
50
|
+
request["tokens"], clean_up_tokenization_spaces=request["clean_up_tokenization_spaces"]
|
|
51
|
+
)
|
|
46
52
|
return {"text": text}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from dataclasses import replace
|
|
2
|
+
from typing import Any, Dict, Mapping, Optional
|
|
3
|
+
|
|
4
|
+
from retrying import Attempt, RetryError
|
|
5
|
+
|
|
6
|
+
from helm.benchmark.tokenizer_config_registry import get_tokenizer_config
|
|
7
|
+
from helm.common.credentials_utils import provide_api_key
|
|
8
|
+
from helm.common.cache_backend_config import CacheBackendConfig, CacheConfig
|
|
9
|
+
from helm.common.hierarchical_logger import hlog
|
|
10
|
+
from helm.common.object_spec import create_object, inject_object_spec_args
|
|
11
|
+
from helm.proxy.retry import retry_tokenizer_request
|
|
12
|
+
from helm.common.tokenization_request import (
|
|
13
|
+
DecodeRequest,
|
|
14
|
+
DecodeRequestResult,
|
|
15
|
+
TokenizationRequest,
|
|
16
|
+
TokenizationRequestResult,
|
|
17
|
+
)
|
|
18
|
+
from helm.tokenizers.tokenizer import Tokenizer
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AutoTokenizer(Tokenizer):
|
|
22
|
+
"""Automatically dispatch to the proper `Tokenizer` based on the tokenizer name."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, credentials: Mapping[str, Any], cache_backend_config: CacheBackendConfig):
|
|
25
|
+
self.credentials = credentials
|
|
26
|
+
self.cache_backend_config = cache_backend_config
|
|
27
|
+
self.tokenizers: Dict[str, Tokenizer] = {}
|
|
28
|
+
hlog(f"AutoTokenizer: cache_backend_config = {cache_backend_config}")
|
|
29
|
+
|
|
30
|
+
def _get_tokenizer(self, tokenizer_name: str) -> Tokenizer:
|
|
31
|
+
# First try to find the tokenizer in the cache
|
|
32
|
+
tokenizer: Optional[Tokenizer] = self.tokenizers.get(tokenizer_name)
|
|
33
|
+
if tokenizer is not None:
|
|
34
|
+
return tokenizer
|
|
35
|
+
|
|
36
|
+
# Otherwise, create the tokenizer
|
|
37
|
+
organization: str = tokenizer_name.split("/")[0]
|
|
38
|
+
cache_config: CacheConfig = self.cache_backend_config.get_cache_config(organization)
|
|
39
|
+
|
|
40
|
+
tokenizer_config = get_tokenizer_config(tokenizer_name)
|
|
41
|
+
if tokenizer_config:
|
|
42
|
+
tokenizer_spec = inject_object_spec_args(
|
|
43
|
+
tokenizer_config.tokenizer_spec,
|
|
44
|
+
constant_bindings={"cache_config": cache_config},
|
|
45
|
+
provider_bindings={
|
|
46
|
+
"api_key": lambda: provide_api_key(self.credentials, organization),
|
|
47
|
+
"project_id": lambda: self.credentials.get(organization + "ProjectId", None), # VertexAI
|
|
48
|
+
"location": lambda: self.credentials.get(organization + "Location", None), # VertexAI
|
|
49
|
+
},
|
|
50
|
+
)
|
|
51
|
+
tokenizer = create_object(tokenizer_spec)
|
|
52
|
+
else:
|
|
53
|
+
hlog(f"No tokenizer config for {tokenizer_name}")
|
|
54
|
+
|
|
55
|
+
# Cache the tokenizer
|
|
56
|
+
assert isinstance(tokenizer, Tokenizer) # To make mypy happy
|
|
57
|
+
self.tokenizers[tokenizer_name] = tokenizer
|
|
58
|
+
|
|
59
|
+
return tokenizer
|
|
60
|
+
|
|
61
|
+
def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
|
|
62
|
+
"""Tokenizes based on the name of the tokenizer (e.g., huggingface/gpt2)."""
|
|
63
|
+
|
|
64
|
+
@retry_tokenizer_request
|
|
65
|
+
def tokenize_with_retry(tokenizer: Tokenizer, request: TokenizationRequest) -> TokenizationRequestResult:
|
|
66
|
+
return tokenizer.tokenize(request)
|
|
67
|
+
|
|
68
|
+
tokenizer: Tokenizer = self._get_tokenizer(request.tokenizer)
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
return tokenize_with_retry(tokenizer=tokenizer, request=request)
|
|
72
|
+
except RetryError as e:
|
|
73
|
+
last_attempt: Attempt = e.last_attempt
|
|
74
|
+
retry_error: str = f"Failed to tokenize after retrying {last_attempt.attempt_number} times"
|
|
75
|
+
hlog(retry_error)
|
|
76
|
+
return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
|
|
77
|
+
|
|
78
|
+
def decode(self, request: DecodeRequest) -> DecodeRequestResult:
|
|
79
|
+
"""Decodes based on the the name of the tokenizer (e.g., huggingface/gpt2)."""
|
|
80
|
+
|
|
81
|
+
@retry_tokenizer_request
|
|
82
|
+
def decode_with_retry(tokenizer: Tokenizer, request: DecodeRequest) -> DecodeRequestResult:
|
|
83
|
+
return tokenizer.decode(request)
|
|
84
|
+
|
|
85
|
+
tokenizer: Tokenizer = self._get_tokenizer(request.tokenizer)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
return decode_with_retry(tokenizer=tokenizer, request=request)
|
|
89
|
+
except RetryError as e:
|
|
90
|
+
last_attempt: Attempt = e.last_attempt
|
|
91
|
+
retry_error: str = f"Failed to decode after retrying {last_attempt.attempt_number} times"
|
|
92
|
+
hlog(retry_error)
|
|
93
|
+
return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
|
|
@@ -100,9 +100,15 @@ class CachingTokenizer(Tokenizer):
|
|
|
100
100
|
# Internal check of the type of the first token
|
|
101
101
|
# This is to make sure that the tokenization is correct
|
|
102
102
|
if request.encode and len(tokens) > 0:
|
|
103
|
-
assert type(tokens[0].value) == int
|
|
103
|
+
assert type(tokens[0].value) == int, (
|
|
104
|
+
f"tokenize() returned strings instead of integers when encode is True: "
|
|
105
|
+
f"request={request} repsonse={response}"
|
|
106
|
+
)
|
|
104
107
|
elif not request.encode and len(tokens) > 0:
|
|
105
|
-
assert type(tokens[0].value) == str
|
|
108
|
+
assert type(tokens[0].value) == str, (
|
|
109
|
+
f"tokenize() returned integers instead of strings when encode is False: "
|
|
110
|
+
f"request={request} repsonse={response}"
|
|
111
|
+
)
|
|
106
112
|
|
|
107
113
|
result = TokenizationRequestResult(
|
|
108
114
|
success=True,
|
|
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
|
|
|
9
9
|
DecodeRequestResult,
|
|
10
10
|
TokenizationToken,
|
|
11
11
|
)
|
|
12
|
-
from helm.
|
|
12
|
+
from helm.clients.cohere_utils import get_cohere_url, DEFAULT_COHERE_API_VERSION
|
|
13
13
|
from .caching_tokenizer import CachingTokenizer
|
|
14
14
|
|
|
15
15
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from dataclasses import asdict
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
5
|
from helm.common.cache import Cache, CacheConfig
|
|
6
6
|
from helm.common.request import wrap_request_time
|
|
@@ -42,7 +42,7 @@ class HTTPModelTokenizer(Tokenizer):
|
|
|
42
42
|
|
|
43
43
|
try:
|
|
44
44
|
|
|
45
|
-
def do_it():
|
|
45
|
+
def do_it() -> Dict[str, Any]:
|
|
46
46
|
url = f"{self.base_url}/tokenize"
|
|
47
47
|
response = requests.post(url, json=raw_request)
|
|
48
48
|
response.raise_for_status()
|
|
@@ -70,7 +70,7 @@ class HTTPModelTokenizer(Tokenizer):
|
|
|
70
70
|
|
|
71
71
|
try:
|
|
72
72
|
|
|
73
|
-
def do_it():
|
|
73
|
+
def do_it() -> Dict[str, Any]:
|
|
74
74
|
url = f"{self.base_url}/decode"
|
|
75
75
|
response = requests.post(url, json={"tokens": request.tokens})
|
|
76
76
|
response.raise_for_status()
|