crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +37 -2
- helm/benchmark/adaptation/adapters/adapter.py +4 -42
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/prompt.py +7 -1
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/cleva_perturbation.py +7 -14
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
- helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/dialect_perturbation.py +2 -2
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +3 -3
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +0 -7
- helm/benchmark/augmentations/perturbation.py +20 -7
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/synonym_perturbation.py +2 -2
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/config_registry.py +38 -0
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +37 -7
- helm/benchmark/metrics/basic_metrics.py +172 -641
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics.py +4 -3
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +6 -112
- helm/benchmark/metrics/dry_run_metrics.py +5 -3
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +5 -5
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +6 -7
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +8 -8
- helm/benchmark/metrics/test_classification_metrics.py +9 -6
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +164 -41
- helm/benchmark/model_metadata_registry.py +181 -35
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +50 -17
- helm/benchmark/presentation/schema.py +28 -46
- helm/benchmark/presentation/summarize.py +213 -96
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +14 -9
- helm/benchmark/presentation/test_summarize.py +5 -0
- helm/benchmark/run.py +66 -54
- helm/benchmark/run_expander.py +342 -31
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +116 -69
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/cleva_scenario.py +43 -46
- helm/benchmark/scenarios/code_scenario.py +3 -2
- helm/benchmark/scenarios/commonsense_scenario.py +171 -191
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +123 -0
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +3 -3
- helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
- helm/benchmark/scenarios/raft_scenario.py +2 -6
- helm/benchmark/scenarios/scenario.py +14 -2
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +22 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/the_pile_scenario.py +6 -7
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
- helm/benchmark/server.py +59 -2
- helm/benchmark/slurm_jobs.py +12 -0
- helm/benchmark/slurm_runner.py +79 -51
- helm/benchmark/static/benchmarking.js +3 -4
- helm/benchmark/static/contamination.yaml +1 -1
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +824 -0
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +90 -0
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/tokenizer_config_registry.py +10 -14
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -35
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/no_decoding_window_service.py +32 -0
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +24 -269
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +5 -12
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +213 -24
- helm/clients/auto_client.py +215 -0
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +67 -55
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +6 -17
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +7 -8
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +6 -10
- helm/{proxy/clients → clients}/huggingface_client.py +134 -92
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
- helm/{proxy/clients → clients}/megatron_client.py +13 -7
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +15 -12
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +15 -15
- helm/clients/test_client.py +100 -0
- helm/clients/test_huggingface_client.py +70 -0
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +23 -12
- helm/{proxy/clients → clients}/together_client.py +18 -71
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +24 -179
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/concurrency.py +32 -0
- helm/common/credentials_utils.py +28 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +29 -10
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +113 -0
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +88 -0
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/object_spec.py +2 -2
- helm/common/request.py +36 -27
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +6 -3
- helm/config/__init__.py +0 -0
- helm/config/model_deployments.yaml +1942 -0
- helm/config/model_metadata.yaml +2201 -0
- helm/config/tokenizer_configs.yaml +362 -0
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +13 -5
- helm/proxy/example_queries.py +29 -17
- helm/proxy/retry.py +8 -2
- helm/proxy/server.py +77 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +103 -20
- helm/proxy/services/service.py +34 -2
- helm/proxy/services/test_remote_service.py +7 -6
- helm/proxy/services/test_service.py +27 -18
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/py.typed +0 -0
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
- helm/tokenizers/auto_tokenizer.py +93 -0
- helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/tokenizers/test_anthropic_tokenizer.py +82 -0
- helm/tokenizers/test_huggingface_tokenizer.py +136 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/tokenizers/vertexai_tokenizer.py +97 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.3.0.dist-info/RECORD +0 -396
- helm/benchmark/vlm_run_specs.py +0 -71
- helm/benchmark/window_services/anthropic_window_service.py +0 -68
- helm/benchmark/window_services/bloom_window_service.py +0 -35
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/gptj_window_service.py +0 -38
- helm/benchmark/window_services/gptneox_window_service.py +0 -41
- helm/benchmark/window_services/http_model_window_service.py +0 -28
- helm/benchmark/window_services/huggingface_window_service.py +0 -59
- helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
- helm/benchmark/window_services/llama_window_service.py +0 -28
- helm/benchmark/window_services/luminous_window_service.py +0 -67
- helm/benchmark/window_services/megatron_window_service.py +0 -10
- helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
- helm/benchmark/window_services/openai_window_service.py +0 -13
- helm/benchmark/window_services/opt_window_service.py +0 -35
- helm/benchmark/window_services/palmyra_window_service.py +0 -45
- helm/benchmark/window_services/remote_window_service.py +0 -48
- helm/benchmark/window_services/santacoder_window_service.py +0 -27
- helm/benchmark/window_services/starcoder_window_service.py +0 -27
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/benchmark/window_services/wider_openai_window_service.py +0 -52
- helm/proxy/clients/aleph_alpha_client.py +0 -99
- helm/proxy/clients/auto_client.py +0 -461
- helm/proxy/clients/goose_ai_client.py +0 -100
- helm/proxy/clients/microsoft_client.py +0 -182
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/remote_model_registry.py +0 -28
- helm/proxy/clients/simple_client.py +0 -61
- helm/proxy/clients/test_anthropic_client.py +0 -63
- helm/proxy/clients/test_client.py +0 -31
- helm/proxy/clients/test_huggingface_client.py +0 -87
- helm/proxy/models.py +0 -963
- helm/proxy/test_models.py +0 -27
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
- helm/proxy/token_counters/test_openai_token_counter.py +0 -79
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
helm/proxy/server.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# mypy: check_untyped_defs = False
|
|
2
|
-
|
|
3
1
|
"""
|
|
4
2
|
Starts a REST server for the frontend to interact with.
|
|
5
3
|
Look at `index.js` to see how the functionality is invoked.
|
|
@@ -16,12 +14,21 @@ import time
|
|
|
16
14
|
from dacite import from_dict
|
|
17
15
|
import bottle
|
|
18
16
|
|
|
17
|
+
from helm.benchmark.config_registry import (
|
|
18
|
+
register_configs_from_directory,
|
|
19
|
+
register_builtin_configs_from_helm_package,
|
|
20
|
+
)
|
|
21
|
+
from helm.benchmark.model_deployment_registry import get_default_model_deployment_for_model
|
|
19
22
|
from helm.common.authentication import Authentication
|
|
23
|
+
from helm.common.cache_backend_config import CacheBackendConfig, MongoCacheBackendConfig, SqliteCacheBackendConfig
|
|
24
|
+
from helm.common.general import ensure_directory_exists
|
|
20
25
|
from helm.common.hierarchical_logger import hlog
|
|
21
26
|
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
22
27
|
from helm.common.request import Request
|
|
23
28
|
from helm.common.perspective_api_request import PerspectiveAPIRequest
|
|
29
|
+
from helm.common.moderations_api_request import ModerationAPIRequest
|
|
24
30
|
from helm.common.tokenization_request import TokenizationRequest, DecodeRequest
|
|
31
|
+
from helm.proxy.services.service import CACHE_DIR
|
|
25
32
|
from .accounts import Account
|
|
26
33
|
from .services.server_service import ServerService
|
|
27
34
|
from .query import Query
|
|
@@ -35,6 +42,7 @@ except ModuleNotFoundError as e:
|
|
|
35
42
|
bottle.BaseRequest.MEMFILE_MAX = 1024 * 1024
|
|
36
43
|
|
|
37
44
|
app = bottle.default_app()
|
|
45
|
+
service: ServerService
|
|
38
46
|
|
|
39
47
|
|
|
40
48
|
def safe_call(func, to_json=True):
|
|
@@ -83,9 +91,16 @@ def handle_static_filename(filename):
|
|
|
83
91
|
return resp
|
|
84
92
|
|
|
85
93
|
|
|
94
|
+
@app.get("/output/<filename:path>")
|
|
95
|
+
def handle_output_filename(filename):
|
|
96
|
+
resp = bottle.static_file(filename, root=app.config["crfm.proxy.outputpath"])
|
|
97
|
+
return resp
|
|
98
|
+
|
|
99
|
+
|
|
86
100
|
@app.get("/api/general_info")
|
|
87
101
|
def handle_get_general_info():
|
|
88
102
|
def perform(args):
|
|
103
|
+
global service
|
|
89
104
|
return dataclasses.asdict(service.get_general_info())
|
|
90
105
|
|
|
91
106
|
return safe_call(perform)
|
|
@@ -94,6 +109,7 @@ def handle_get_general_info():
|
|
|
94
109
|
@app.get("/api/window_service_info")
|
|
95
110
|
def handle_get_window_service_info():
|
|
96
111
|
def perform(args):
|
|
112
|
+
global service
|
|
97
113
|
return dataclasses.asdict(service.get_window_service_info(args["model_name"]))
|
|
98
114
|
|
|
99
115
|
return safe_call(perform)
|
|
@@ -102,6 +118,7 @@ def handle_get_window_service_info():
|
|
|
102
118
|
@app.post("/api/account")
|
|
103
119
|
def handle_create_account():
|
|
104
120
|
def perform(args):
|
|
121
|
+
global service
|
|
105
122
|
auth = Authentication(**json.loads(args["auth"]))
|
|
106
123
|
return dataclasses.asdict(service.create_account(auth))
|
|
107
124
|
|
|
@@ -111,6 +128,7 @@ def handle_create_account():
|
|
|
111
128
|
@app.delete("/api/account")
|
|
112
129
|
def handle_delete_account():
|
|
113
130
|
def perform(args):
|
|
131
|
+
global service
|
|
114
132
|
auth = Authentication(**json.loads(args["auth"]))
|
|
115
133
|
api_key = args["api_key"]
|
|
116
134
|
return dataclasses.asdict(service.delete_account(auth, api_key))
|
|
@@ -121,6 +139,7 @@ def handle_delete_account():
|
|
|
121
139
|
@app.get("/api/account")
|
|
122
140
|
def handle_get_account():
|
|
123
141
|
def perform(args):
|
|
142
|
+
global service
|
|
124
143
|
auth = Authentication(**json.loads(args["auth"]))
|
|
125
144
|
if "all" in args and args["all"].lower() == "true":
|
|
126
145
|
return [dataclasses.asdict(account) for account in service.get_accounts(auth)]
|
|
@@ -133,6 +152,7 @@ def handle_get_account():
|
|
|
133
152
|
@app.put("/api/account")
|
|
134
153
|
def handle_update_account():
|
|
135
154
|
def perform(args):
|
|
155
|
+
global service
|
|
136
156
|
auth = Authentication(**json.loads(args["auth"]))
|
|
137
157
|
account = from_dict(Account, json.loads(args["account"]))
|
|
138
158
|
return dataclasses.asdict(service.update_account(auth, account))
|
|
@@ -143,6 +163,7 @@ def handle_update_account():
|
|
|
143
163
|
@app.put("/api/account/api_key")
|
|
144
164
|
def handle_update_api_key():
|
|
145
165
|
def perform(args):
|
|
166
|
+
global service
|
|
146
167
|
auth = Authentication(**json.loads(args["auth"]))
|
|
147
168
|
account = from_dict(Account, json.loads(args["account"]))
|
|
148
169
|
return dataclasses.asdict(service.rotate_api_key(auth, account))
|
|
@@ -153,6 +174,7 @@ def handle_update_api_key():
|
|
|
153
174
|
@app.get("/api/query")
|
|
154
175
|
def handle_query():
|
|
155
176
|
def perform(args):
|
|
177
|
+
global service
|
|
156
178
|
query = Query(**args)
|
|
157
179
|
return dataclasses.asdict(service.expand_query(query))
|
|
158
180
|
|
|
@@ -162,9 +184,28 @@ def handle_query():
|
|
|
162
184
|
@app.get("/api/request")
|
|
163
185
|
def handle_request():
|
|
164
186
|
def perform(args):
|
|
187
|
+
global service
|
|
165
188
|
auth = Authentication(**json.loads(args["auth"]))
|
|
166
189
|
request = Request(**json.loads(args["request"]))
|
|
167
|
-
|
|
190
|
+
# Hack to maintain reverse compatibility with clients with version <= 0.3.0.
|
|
191
|
+
# Clients with version <= 0.3.0 do not set model_deployment, but this is now
|
|
192
|
+
# required by Request.
|
|
193
|
+
if not request.model_deployment:
|
|
194
|
+
model_deployment = get_default_model_deployment_for_model(request.model)
|
|
195
|
+
if model_deployment is None:
|
|
196
|
+
raise ValueError(f"Unknown model '{request.model}'")
|
|
197
|
+
request = dataclasses.replace(request, model_deployment=model_deployment)
|
|
198
|
+
|
|
199
|
+
raw_response = dataclasses.asdict(service.make_request(auth, request))
|
|
200
|
+
|
|
201
|
+
# Hack to maintain reverse compatibility with clients with version <= 1.0.0.
|
|
202
|
+
# Clients with version <= 1.0.0 expect each token to contain a `top_logprobs`
|
|
203
|
+
# field of type dict.
|
|
204
|
+
for completion in raw_response["completions"]:
|
|
205
|
+
for token in completion["tokens"]:
|
|
206
|
+
token["top_logprobs"] = {}
|
|
207
|
+
|
|
208
|
+
return raw_response
|
|
168
209
|
|
|
169
210
|
return safe_call(perform)
|
|
170
211
|
|
|
@@ -172,6 +213,7 @@ def handle_request():
|
|
|
172
213
|
@app.get("/api/tokenize")
|
|
173
214
|
def handle_tokenization():
|
|
174
215
|
def perform(args):
|
|
216
|
+
global service
|
|
175
217
|
auth = Authentication(**json.loads(args["auth"]))
|
|
176
218
|
request = TokenizationRequest(**json.loads(args["request"]))
|
|
177
219
|
return dataclasses.asdict(service.tokenize(auth, request))
|
|
@@ -182,6 +224,7 @@ def handle_tokenization():
|
|
|
182
224
|
@app.get("/api/decode")
|
|
183
225
|
def handle_decode():
|
|
184
226
|
def perform(args):
|
|
227
|
+
global service
|
|
185
228
|
auth = Authentication(**json.loads(args["auth"]))
|
|
186
229
|
request = DecodeRequest(**json.loads(args["request"]))
|
|
187
230
|
return dataclasses.asdict(service.decode(auth, request))
|
|
@@ -192,6 +235,7 @@ def handle_decode():
|
|
|
192
235
|
@app.get("/api/toxicity")
|
|
193
236
|
def handle_toxicity_request():
|
|
194
237
|
def perform(args):
|
|
238
|
+
global service
|
|
195
239
|
auth = Authentication(**json.loads(args["auth"]))
|
|
196
240
|
request = PerspectiveAPIRequest(**json.loads(args["request"]))
|
|
197
241
|
return dataclasses.asdict(service.get_toxicity_scores(auth, request))
|
|
@@ -199,9 +243,21 @@ def handle_toxicity_request():
|
|
|
199
243
|
return safe_call(perform)
|
|
200
244
|
|
|
201
245
|
|
|
246
|
+
@app.get("/api/moderation")
|
|
247
|
+
def handle_moderation_request():
|
|
248
|
+
def perform(args):
|
|
249
|
+
global service
|
|
250
|
+
auth = Authentication(**json.loads(args["auth"]))
|
|
251
|
+
request = ModerationAPIRequest(**json.loads(args["request"]))
|
|
252
|
+
return dataclasses.asdict(service.get_moderation_results(auth, request))
|
|
253
|
+
|
|
254
|
+
return safe_call(perform)
|
|
255
|
+
|
|
256
|
+
|
|
202
257
|
@app.get("/api/shutdown")
|
|
203
258
|
def handle_shutdown():
|
|
204
259
|
def perform(args):
|
|
260
|
+
global service
|
|
205
261
|
auth = Authentication(**json.loads(args["auth"]))
|
|
206
262
|
service.shutdown(auth)
|
|
207
263
|
|
|
@@ -214,6 +270,7 @@ def main():
|
|
|
214
270
|
parser.add_argument("-p", "--port", type=int, help="What port to listen on", default=1959)
|
|
215
271
|
parser.add_argument("--ssl-key-file", type=str, help="Path to SSL key file")
|
|
216
272
|
parser.add_argument("--ssl-cert-file", type=str, help="Path to SSL cert file")
|
|
273
|
+
parser.add_argument("--ssl-ca-certs", type=str, help="Path to SSL CA certs")
|
|
217
274
|
parser.add_argument("-b", "--base-path", help="What directory has credentials, etc.", default="prod_env")
|
|
218
275
|
parser.add_argument("-w", "--workers", type=int, help="Number of worker processes to handle requests", default=8)
|
|
219
276
|
parser.add_argument("-t", "--timeout", type=int, help="Request timeout in seconds", default=5 * 60)
|
|
@@ -225,17 +282,32 @@ def main():
|
|
|
225
282
|
)
|
|
226
283
|
args = parser.parse_args()
|
|
227
284
|
|
|
228
|
-
|
|
285
|
+
register_builtin_configs_from_helm_package()
|
|
286
|
+
register_configs_from_directory(args.base_path)
|
|
287
|
+
|
|
288
|
+
cache_backend_config: CacheBackendConfig
|
|
289
|
+
if args.mongo_uri:
|
|
290
|
+
cache_backend_config = MongoCacheBackendConfig(args.mongo_uri)
|
|
291
|
+
else:
|
|
292
|
+
sqlite_cache_path = os.path.join(args.base_path, CACHE_DIR)
|
|
293
|
+
ensure_directory_exists(sqlite_cache_path)
|
|
294
|
+
cache_backend_config = SqliteCacheBackendConfig(sqlite_cache_path)
|
|
295
|
+
|
|
296
|
+
service = ServerService(base_path=args.base_path, cache_backend_config=cache_backend_config)
|
|
229
297
|
|
|
230
298
|
gunicorn_args = {
|
|
231
299
|
"workers": args.workers,
|
|
232
300
|
"timeout": args.timeout,
|
|
233
301
|
"limit_request_line": 0, # Controls the maximum size of HTTP request line in bytes. 0 = unlimited.
|
|
234
302
|
}
|
|
235
|
-
if args.ssl_key_file
|
|
303
|
+
if args.ssl_key_file:
|
|
236
304
|
gunicorn_args["keyfile"] = args.ssl_key_file
|
|
305
|
+
if args.ssl_cert_file:
|
|
237
306
|
gunicorn_args["certfile"] = args.ssl_cert_file
|
|
307
|
+
if args.ssl_ca_certs:
|
|
308
|
+
gunicorn_args["ca_certs"] = args.ssl_ca_certs
|
|
238
309
|
|
|
239
310
|
# Clear arguments before running gunicorn as it also uses argparse
|
|
240
311
|
sys.argv = [sys.argv[0]]
|
|
312
|
+
app.config["crfm.proxy.outputpath"] = os.path.join(os.path.realpath(args.base_path), "cache", "output")
|
|
241
313
|
app.run(host="0.0.0.0", port=args.port, server="gunicorn", **gunicorn_args)
|
|
@@ -5,9 +5,15 @@ import urllib.parse
|
|
|
5
5
|
from dataclasses import asdict
|
|
6
6
|
from typing import Any, List, Optional
|
|
7
7
|
|
|
8
|
+
from helm.common.cache import CacheConfig
|
|
9
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
8
10
|
from helm.common.authentication import Authentication
|
|
11
|
+
from helm.common.moderations_api_request import ModerationAPIRequest, ModerationAPIRequestResult
|
|
9
12
|
from helm.common.critique_request import CritiqueRequest, CritiqueRequestResult
|
|
13
|
+
from helm.common.nudity_check_request import NudityCheckRequest, NudityCheckResult
|
|
14
|
+
from helm.common.file_upload_request import FileUploadRequest, FileUploadResult
|
|
10
15
|
from helm.common.perspective_api_request import PerspectiveAPIRequest, PerspectiveAPIRequestResult
|
|
16
|
+
from helm.common.clip_score_request import CLIPScoreRequest, CLIPScoreResult
|
|
11
17
|
from helm.common.tokenization_request import (
|
|
12
18
|
WindowServiceInfo,
|
|
13
19
|
TokenizationRequest,
|
|
@@ -27,6 +33,8 @@ class RemoteServiceError(Exception):
|
|
|
27
33
|
|
|
28
34
|
|
|
29
35
|
class RemoteService(Service):
|
|
36
|
+
NOT_SUPPORTED_ERROR: str = "Not supported through the remote service."
|
|
37
|
+
|
|
30
38
|
def __init__(self, base_url):
|
|
31
39
|
self.base_url: str = base_url
|
|
32
40
|
|
|
@@ -84,6 +92,15 @@ class RemoteService(Service):
|
|
|
84
92
|
RemoteService._check_response(response, request_json)
|
|
85
93
|
return from_dict(DecodeRequestResult, response)
|
|
86
94
|
|
|
95
|
+
def upload(self, auth: Authentication, request: FileUploadRequest) -> FileUploadResult:
|
|
96
|
+
raise NotImplementedError(self.NOT_SUPPORTED_ERROR)
|
|
97
|
+
|
|
98
|
+
def check_nudity(self, auth: Authentication, request: NudityCheckRequest) -> NudityCheckResult:
|
|
99
|
+
raise NotImplementedError(self.NOT_SUPPORTED_ERROR)
|
|
100
|
+
|
|
101
|
+
def compute_clip_score(self, auth: Authentication, request: CLIPScoreRequest) -> CLIPScoreResult:
|
|
102
|
+
raise NotImplementedError(self.NOT_SUPPORTED_ERROR)
|
|
103
|
+
|
|
87
104
|
def get_toxicity_scores(self, auth: Authentication, request: PerspectiveAPIRequest) -> PerspectiveAPIRequestResult:
|
|
88
105
|
request_json: str = json.dumps(asdict(request))
|
|
89
106
|
params = {
|
|
@@ -94,6 +111,16 @@ class RemoteService(Service):
|
|
|
94
111
|
RemoteService._check_response(response, request_json)
|
|
95
112
|
return from_dict(PerspectiveAPIRequestResult, response)
|
|
96
113
|
|
|
114
|
+
def get_moderation_results(self, auth: Authentication, request: ModerationAPIRequest) -> ModerationAPIRequestResult:
|
|
115
|
+
request_json: str = json.dumps(asdict(request))
|
|
116
|
+
params = {
|
|
117
|
+
"auth": json.dumps(asdict(auth)),
|
|
118
|
+
"request": request_json,
|
|
119
|
+
}
|
|
120
|
+
response = requests.get(f"{self.base_url}/api/moderation?{urllib.parse.urlencode(params)}").json()
|
|
121
|
+
RemoteService._check_response(response, request_json)
|
|
122
|
+
return from_dict(ModerationAPIRequestResult, response)
|
|
123
|
+
|
|
97
124
|
def make_critique_request(self, auth: Authentication, request: CritiqueRequest) -> CritiqueRequestResult:
|
|
98
125
|
raise NotImplementedError("make_critique_request is not supported by RemoteServer")
|
|
99
126
|
|
|
@@ -153,6 +180,10 @@ class RemoteService(Service):
|
|
|
153
180
|
# A ConnectionError is expected when shutting down the server.
|
|
154
181
|
pass
|
|
155
182
|
|
|
183
|
+
def get_cache_config(self, shard_name: str) -> CacheConfig:
|
|
184
|
+
"""Returns a CacheConfig"""
|
|
185
|
+
return BlackHoleCacheBackendConfig().get_cache_config(shard_name)
|
|
186
|
+
|
|
156
187
|
|
|
157
188
|
def add_service_args(parser: argparse.ArgumentParser):
|
|
158
189
|
"""Add command-line arguments to enable command-line utilities to specify how to connect to a remote server."""
|
|
@@ -1,12 +1,16 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import os
|
|
2
3
|
import signal
|
|
3
4
|
from typing import List, Optional
|
|
4
5
|
|
|
5
|
-
from helm.
|
|
6
|
-
from helm.
|
|
7
|
-
from helm.benchmark.tokenizer_config_registry import maybe_register_tokenizer_configs_from_base_path
|
|
6
|
+
from helm.common.cache import CacheConfig
|
|
7
|
+
from helm.common.cache_backend_config import CacheBackendConfig, BlackHoleCacheBackendConfig
|
|
8
8
|
from helm.common.critique_request import CritiqueRequest, CritiqueRequestResult
|
|
9
9
|
from helm.common.authentication import Authentication
|
|
10
|
+
from helm.common.moderations_api_request import ModerationAPIRequest, ModerationAPIRequestResult
|
|
11
|
+
from helm.common.clip_score_request import CLIPScoreRequest, CLIPScoreResult
|
|
12
|
+
from helm.common.nudity_check_request import NudityCheckRequest, NudityCheckResult
|
|
13
|
+
from helm.common.file_upload_request import FileUploadRequest, FileUploadResult
|
|
10
14
|
from helm.common.general import ensure_directory_exists, parse_hocon, get_credentials
|
|
11
15
|
from helm.common.perspective_api_request import PerspectiveAPIRequest, PerspectiveAPIRequestResult
|
|
12
16
|
from helm.common.tokenization_request import (
|
|
@@ -19,13 +23,20 @@ from helm.common.tokenization_request import (
|
|
|
19
23
|
from helm.common.request import Request, RequestResult
|
|
20
24
|
from helm.common.hierarchical_logger import hlog
|
|
21
25
|
from helm.proxy.accounts import Accounts, Account
|
|
22
|
-
from helm.
|
|
23
|
-
from helm.
|
|
26
|
+
from helm.clients.auto_client import AutoClient
|
|
27
|
+
from helm.clients.moderation_api_client import ModerationAPIClient
|
|
28
|
+
from helm.clients.perspective_api_client import PerspectiveAPIClient
|
|
29
|
+
from helm.clients.image_generation.nudity_check_client import NudityCheckClient
|
|
30
|
+
from helm.clients.gcs_client import GCSClient
|
|
31
|
+
from helm.clients.clip_score_client import CLIPScoreClient
|
|
32
|
+
from helm.clients.toxicity_classifier_client import ToxicityClassifierClient
|
|
24
33
|
from helm.proxy.example_queries import example_queries
|
|
25
|
-
from helm.
|
|
34
|
+
from helm.benchmark.model_metadata_registry import ALL_MODELS_METADATA
|
|
35
|
+
from helm.benchmark.model_deployment_registry import get_model_deployment_host_organization
|
|
26
36
|
from helm.proxy.query import Query, QueryResult
|
|
27
37
|
from helm.proxy.retry import retry_request
|
|
28
38
|
from helm.proxy.token_counters.auto_token_counter import AutoTokenCounter
|
|
39
|
+
from helm.tokenizers.auto_tokenizer import AutoTokenizer
|
|
29
40
|
from .service import (
|
|
30
41
|
Service,
|
|
31
42
|
CACHE_DIR,
|
|
@@ -42,24 +53,38 @@ class ServerService(Service):
|
|
|
42
53
|
Main class that supports various functionality for the server.
|
|
43
54
|
"""
|
|
44
55
|
|
|
45
|
-
def __init__(
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
base_path: str = "prod_env",
|
|
59
|
+
root_mode: bool = False,
|
|
60
|
+
cache_backend_config: CacheBackendConfig = BlackHoleCacheBackendConfig(),
|
|
61
|
+
):
|
|
62
|
+
ensure_directory_exists(base_path)
|
|
63
|
+
client_file_storage_path = os.path.join(base_path, CACHE_DIR)
|
|
64
|
+
ensure_directory_exists(client_file_storage_path)
|
|
65
|
+
|
|
46
66
|
credentials = get_credentials(base_path)
|
|
47
|
-
cache_path = os.path.join(base_path, CACHE_DIR)
|
|
48
|
-
ensure_directory_exists(cache_path)
|
|
49
67
|
accounts_path = os.path.join(base_path, ACCOUNTS_FILE)
|
|
50
68
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
self.client = AutoClient(credentials, cache_path, mongo_uri)
|
|
56
|
-
self.token_counter = AutoTokenCounter(self.client.get_huggingface_client())
|
|
69
|
+
self.cache_backend_config = cache_backend_config
|
|
70
|
+
self.client = AutoClient(credentials, client_file_storage_path, cache_backend_config)
|
|
71
|
+
self.tokenizer = AutoTokenizer(credentials, cache_backend_config)
|
|
72
|
+
self.token_counter = AutoTokenCounter(self.tokenizer)
|
|
57
73
|
self.accounts = Accounts(accounts_path, root_mode=root_mode)
|
|
58
|
-
|
|
74
|
+
|
|
75
|
+
# Lazily instantiate the following clients
|
|
76
|
+
self.moderation_api_client: Optional[ModerationAPIClient] = None
|
|
59
77
|
self.toxicity_classifier_client: Optional[ToxicityClassifierClient] = None
|
|
78
|
+
self.perspective_api_client: Optional[PerspectiveAPIClient] = None
|
|
79
|
+
self.nudity_check_client: Optional[NudityCheckClient] = None
|
|
80
|
+
self.clip_score_client: Optional[CLIPScoreClient] = None
|
|
81
|
+
self.gcs_client: Optional[GCSClient] = None
|
|
60
82
|
|
|
61
83
|
def get_general_info(self) -> GeneralInfo:
|
|
62
|
-
|
|
84
|
+
# Can't send release_dates in ModelMetadata bacause dates cannot be round-tripped to and from JSON easily.
|
|
85
|
+
# TODO(#2158): Either fix this or delete get_general_info.
|
|
86
|
+
all_models = [dataclasses.replace(model_metadata, release_date=None) for model_metadata in ALL_MODELS_METADATA]
|
|
87
|
+
return GeneralInfo(version=VERSION, example_queries=example_queries, all_models=all_models)
|
|
63
88
|
|
|
64
89
|
def get_window_service_info(self, model_name) -> WindowServiceInfo:
|
|
65
90
|
# The import statement is placed here to avoid two problems, please refer to the link for details
|
|
@@ -88,6 +113,21 @@ class ServerService(Service):
|
|
|
88
113
|
requests.append(request)
|
|
89
114
|
return QueryResult(requests=requests)
|
|
90
115
|
|
|
116
|
+
def _get_model_group_for_model_deployment(self, model_deployment: str) -> str:
|
|
117
|
+
if model_deployment.startswith("openai/"):
|
|
118
|
+
if model_deployment.startswith("openai/code-"):
|
|
119
|
+
return "codex"
|
|
120
|
+
elif model_deployment.startswith("openai/dall-e-"):
|
|
121
|
+
return "dall_e"
|
|
122
|
+
elif model_deployment.startswith("openai/gpt-4-"):
|
|
123
|
+
return "gpt4"
|
|
124
|
+
else:
|
|
125
|
+
return "gpt3"
|
|
126
|
+
elif model_deployment.startswith("ai21/"):
|
|
127
|
+
return "jurassic"
|
|
128
|
+
else:
|
|
129
|
+
return get_model_deployment_host_organization(model_deployment)
|
|
130
|
+
|
|
91
131
|
def make_request(self, auth: Authentication, request: Request) -> RequestResult:
|
|
92
132
|
"""Actually make a request to an API."""
|
|
93
133
|
# TODO: try to invoke the API even if we're not authenticated, and if
|
|
@@ -95,7 +135,7 @@ class ServerService(Service):
|
|
|
95
135
|
# https://github.com/stanford-crfm/benchmarking/issues/56
|
|
96
136
|
|
|
97
137
|
self.accounts.authenticate(auth)
|
|
98
|
-
model_group: str =
|
|
138
|
+
model_group: str = self._get_model_group_for_model_deployment(request.model_deployment)
|
|
99
139
|
# Make sure we can use
|
|
100
140
|
self.accounts.check_can_use(auth.api_key, model_group)
|
|
101
141
|
|
|
@@ -113,12 +153,42 @@ class ServerService(Service):
|
|
|
113
153
|
def tokenize(self, auth: Authentication, request: TokenizationRequest) -> TokenizationRequestResult:
|
|
114
154
|
"""Tokenize via an API."""
|
|
115
155
|
self.accounts.authenticate(auth)
|
|
116
|
-
return self.
|
|
156
|
+
return self.tokenizer.tokenize(request)
|
|
117
157
|
|
|
118
158
|
def decode(self, auth: Authentication, request: DecodeRequest) -> DecodeRequestResult:
|
|
119
159
|
"""Decodes to text."""
|
|
120
160
|
self.accounts.authenticate(auth)
|
|
121
|
-
return self.
|
|
161
|
+
return self.tokenizer.decode(request)
|
|
162
|
+
|
|
163
|
+
def upload(self, auth: Authentication, request: FileUploadRequest) -> FileUploadResult:
|
|
164
|
+
"""Uploads a file to external storage."""
|
|
165
|
+
self.accounts.authenticate(auth)
|
|
166
|
+
|
|
167
|
+
if not self.gcs_client:
|
|
168
|
+
self.gcs_client = self.client.get_gcs_client()
|
|
169
|
+
|
|
170
|
+
assert self.gcs_client
|
|
171
|
+
return self.gcs_client.upload(request)
|
|
172
|
+
|
|
173
|
+
def check_nudity(self, auth: Authentication, request: NudityCheckRequest) -> NudityCheckResult:
|
|
174
|
+
"""Check for nudity."""
|
|
175
|
+
self.accounts.authenticate(auth)
|
|
176
|
+
|
|
177
|
+
if not self.nudity_check_client:
|
|
178
|
+
self.nudity_check_client = self.client.get_nudity_check_client()
|
|
179
|
+
|
|
180
|
+
assert self.nudity_check_client
|
|
181
|
+
return self.nudity_check_client.check_nudity(request)
|
|
182
|
+
|
|
183
|
+
def compute_clip_score(self, auth: Authentication, request: CLIPScoreRequest) -> CLIPScoreResult:
|
|
184
|
+
"""Computes CLIPScore for a given caption and image."""
|
|
185
|
+
self.accounts.authenticate(auth)
|
|
186
|
+
|
|
187
|
+
if not self.clip_score_client:
|
|
188
|
+
self.clip_score_client = self.client.get_clip_score_client()
|
|
189
|
+
|
|
190
|
+
assert self.clip_score_client
|
|
191
|
+
return self.clip_score_client.compute_score(request)
|
|
122
192
|
|
|
123
193
|
def get_toxicity_scores(self, auth: Authentication, request: PerspectiveAPIRequest) -> PerspectiveAPIRequestResult:
|
|
124
194
|
@retry_request
|
|
@@ -130,6 +200,16 @@ class ServerService(Service):
|
|
|
130
200
|
self.accounts.authenticate(auth)
|
|
131
201
|
return get_toxicity_scores_with_retry(request)
|
|
132
202
|
|
|
203
|
+
def get_moderation_results(self, auth: Authentication, request: ModerationAPIRequest) -> ModerationAPIRequestResult:
|
|
204
|
+
@retry_request
|
|
205
|
+
def get_moderation_results_with_retry(request: ModerationAPIRequest) -> ModerationAPIRequestResult:
|
|
206
|
+
if not self.moderation_api_client:
|
|
207
|
+
self.moderation_api_client = self.client.get_moderation_api_client()
|
|
208
|
+
return self.moderation_api_client.get_moderation_results(request)
|
|
209
|
+
|
|
210
|
+
self.accounts.authenticate(auth)
|
|
211
|
+
return get_moderation_results_with_retry(request)
|
|
212
|
+
|
|
133
213
|
def make_critique_request(self, auth: Authentication, request: CritiqueRequest) -> CritiqueRequestResult:
|
|
134
214
|
self.accounts.authenticate(auth)
|
|
135
215
|
return self.client.get_critique_client().make_critique_request(request)
|
|
@@ -165,3 +245,6 @@ class ServerService(Service):
|
|
|
165
245
|
hlog(f"Shutting down server by killing its own process {pid}...")
|
|
166
246
|
os.kill(pid, signal.SIGTERM)
|
|
167
247
|
hlog("Done.")
|
|
248
|
+
|
|
249
|
+
def get_cache_config(self, shard_name: str) -> CacheConfig:
|
|
250
|
+
return self.cache_backend_config.get_cache_config(shard_name)
|
helm/proxy/services/service.py
CHANGED
|
@@ -5,7 +5,11 @@ from typing import Dict, List, Tuple, Any
|
|
|
5
5
|
|
|
6
6
|
from helm.common.general import parse_hocon
|
|
7
7
|
from helm.common.critique_request import CritiqueRequest, CritiqueRequestResult
|
|
8
|
+
from helm.common.clip_score_request import CLIPScoreRequest, CLIPScoreResult
|
|
9
|
+
from helm.common.file_upload_request import FileUploadResult, FileUploadRequest
|
|
10
|
+
from helm.common.nudity_check_request import NudityCheckRequest, NudityCheckResult
|
|
8
11
|
from helm.common.perspective_api_request import PerspectiveAPIRequestResult, PerspectiveAPIRequest
|
|
12
|
+
from helm.common.moderations_api_request import ModerationAPIRequest, ModerationAPIRequestResult
|
|
9
13
|
from helm.common.tokenization_request import (
|
|
10
14
|
WindowServiceInfo,
|
|
11
15
|
TokenizationRequest,
|
|
@@ -14,9 +18,10 @@ from helm.common.tokenization_request import (
|
|
|
14
18
|
DecodeRequestResult,
|
|
15
19
|
)
|
|
16
20
|
from helm.common.request import Request, RequestResult
|
|
17
|
-
from helm.
|
|
21
|
+
from helm.benchmark.model_metadata_registry import ModelMetadata
|
|
18
22
|
from helm.proxy.query import Query, QueryResult
|
|
19
23
|
from helm.proxy.accounts import Authentication, Account
|
|
24
|
+
from helm.common.cache import CacheConfig
|
|
20
25
|
|
|
21
26
|
VERSION = "1.0"
|
|
22
27
|
ACCOUNTS_FILE = "accounts.sqlite"
|
|
@@ -29,7 +34,7 @@ MAX_EXPANSION = 1000
|
|
|
29
34
|
class GeneralInfo:
|
|
30
35
|
version: str
|
|
31
36
|
example_queries: List[Query]
|
|
32
|
-
all_models: List[
|
|
37
|
+
all_models: List[ModelMetadata]
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
def expand_environments(environments: Dict[str, List[str]]):
|
|
@@ -69,6 +74,8 @@ def synthesize_request(prompt: str, settings: str, environment: Dict[str, str])
|
|
|
69
74
|
request: Dict[str, Any] = {}
|
|
70
75
|
request["prompt"] = substitute_text(prompt, environment)
|
|
71
76
|
request.update(parse_hocon(substitute_text(settings, environment)))
|
|
77
|
+
if "model_deployment" not in request and "model" not in request:
|
|
78
|
+
request["model_deployment"] = "openai/text-davinci-002"
|
|
72
79
|
return Request(**request)
|
|
73
80
|
|
|
74
81
|
|
|
@@ -103,11 +110,31 @@ class Service(ABC):
|
|
|
103
110
|
"""Decodes to text."""
|
|
104
111
|
pass
|
|
105
112
|
|
|
113
|
+
@abstractmethod
|
|
114
|
+
def upload(self, auth: Authentication, request: FileUploadRequest) -> FileUploadResult:
|
|
115
|
+
"""Uploads a file to external storage."""
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
@abstractmethod
|
|
119
|
+
def check_nudity(self, auth: Authentication, request: NudityCheckRequest) -> NudityCheckResult:
|
|
120
|
+
"""Check for nudity for a batch of images."""
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
@abstractmethod
|
|
124
|
+
def compute_clip_score(self, auth: Authentication, request: CLIPScoreRequest) -> CLIPScoreResult:
|
|
125
|
+
"""Computes CLIPScore for a given caption and image."""
|
|
126
|
+
pass
|
|
127
|
+
|
|
106
128
|
@abstractmethod
|
|
107
129
|
def get_toxicity_scores(self, auth: Authentication, request: PerspectiveAPIRequest) -> PerspectiveAPIRequestResult:
|
|
108
130
|
"""Get toxicity scores for a batch of text."""
|
|
109
131
|
pass
|
|
110
132
|
|
|
133
|
+
@abstractmethod
|
|
134
|
+
def get_moderation_results(self, auth: Authentication, request: ModerationAPIRequest) -> ModerationAPIRequestResult:
|
|
135
|
+
"""Get OpenAI's moderation results for some text."""
|
|
136
|
+
pass
|
|
137
|
+
|
|
111
138
|
@abstractmethod
|
|
112
139
|
def make_critique_request(self, auth: Authentication, request: CritiqueRequest) -> CritiqueRequestResult:
|
|
113
140
|
"""Get responses to a critique request."""
|
|
@@ -147,3 +174,8 @@ class Service(ABC):
|
|
|
147
174
|
def shutdown(self, auth: Authentication):
|
|
148
175
|
"""Shutdown server."""
|
|
149
176
|
pass
|
|
177
|
+
|
|
178
|
+
@abstractmethod
|
|
179
|
+
def get_cache_config(self, shard_name: str) -> CacheConfig:
|
|
180
|
+
"""Returns a CacheConfig"""
|
|
181
|
+
pass
|
|
@@ -17,7 +17,7 @@ from sqlitedict import SqliteDict
|
|
|
17
17
|
from helm.common.authentication import Authentication
|
|
18
18
|
from helm.common.request import Request, RequestResult
|
|
19
19
|
from helm.common.tokenization_request import TokenizationRequest, TokenizationRequestResult
|
|
20
|
-
from helm.proxy.accounts import Account
|
|
20
|
+
from helm.proxy.accounts import Account, set_default_quotas
|
|
21
21
|
from .remote_service import RemoteService
|
|
22
22
|
from .service import ACCOUNTS_FILE
|
|
23
23
|
|
|
@@ -55,6 +55,7 @@ class TestRemoteServerService:
|
|
|
55
55
|
|
|
56
56
|
with SqliteDict(os.path.join(path, ACCOUNTS_FILE)) as cache:
|
|
57
57
|
account: Account = Account(TestRemoteServerService._ADMIN_API_KEY, is_admin=True)
|
|
58
|
+
set_default_quotas(account)
|
|
58
59
|
cache[TestRemoteServerService._ADMIN_API_KEY] = asdict(account)
|
|
59
60
|
cache.commit()
|
|
60
61
|
return path
|
|
@@ -85,7 +86,7 @@ class TestRemoteServerService:
|
|
|
85
86
|
|
|
86
87
|
@staticmethod
|
|
87
88
|
def query(url: str, auth: Authentication, prompt: str):
|
|
88
|
-
request = Request(prompt=prompt, model="simple/model1")
|
|
89
|
+
request = Request(prompt=prompt, model="simple/model1", model_deployment="simple/model1")
|
|
89
90
|
response: RequestResult = RemoteService(base_url=url).make_request(auth, request)
|
|
90
91
|
response_text: str = response.completions[0].text
|
|
91
92
|
# With the toy model (simple/model1), we should expect the same response as the prompt
|
|
@@ -121,18 +122,18 @@ class TestRemoteServerService:
|
|
|
121
122
|
shutil.rmtree(cls.base_path)
|
|
122
123
|
|
|
123
124
|
def test_make_request(self):
|
|
124
|
-
request = Request(prompt="1 2 3", model="simple/model1")
|
|
125
|
+
request = Request(prompt="1 2 3", model="simple/model1", model_deployment="simple/model1")
|
|
125
126
|
response: RequestResult = self.service.make_request(self.auth, request)
|
|
126
127
|
assert response.success
|
|
127
128
|
|
|
128
129
|
def test_tokenize(self):
|
|
129
|
-
request = TokenizationRequest(text="1 2 3", tokenizer="simple/
|
|
130
|
+
request = TokenizationRequest(text="1 2 3", tokenizer="simple/tokenizer1")
|
|
130
131
|
response: TokenizationRequestResult = self.service.tokenize(self.auth, request)
|
|
131
|
-
assert [token.value for token in response.tokens] == ["1", "2", "3"]
|
|
132
|
+
assert [token.value for token in response.tokens] == ["1", " ", "2", " ", "3"]
|
|
132
133
|
|
|
133
134
|
def test_make_request_plus_sign(self):
|
|
134
135
|
# Ensure + in prompt doesn't get replaced by a blank space
|
|
135
|
-
request = Request(prompt="+", model="simple/model1")
|
|
136
|
+
request = Request(prompt="+", model="simple/model1", model_deployment="simple/model1")
|
|
136
137
|
response: RequestResult = self.service.make_request(self.auth, request)
|
|
137
138
|
assert response.completions[0].text == "+"
|
|
138
139
|
assert response.success
|