crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +37 -2
- helm/benchmark/adaptation/adapters/adapter.py +4 -42
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/prompt.py +7 -1
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/cleva_perturbation.py +7 -14
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
- helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/dialect_perturbation.py +2 -2
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +3 -3
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +0 -7
- helm/benchmark/augmentations/perturbation.py +20 -7
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/synonym_perturbation.py +2 -2
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/config_registry.py +38 -0
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +37 -7
- helm/benchmark/metrics/basic_metrics.py +172 -641
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics.py +4 -3
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +6 -112
- helm/benchmark/metrics/dry_run_metrics.py +5 -3
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +5 -5
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +6 -7
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +8 -8
- helm/benchmark/metrics/test_classification_metrics.py +9 -6
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +164 -41
- helm/benchmark/model_metadata_registry.py +181 -35
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +50 -17
- helm/benchmark/presentation/schema.py +28 -46
- helm/benchmark/presentation/summarize.py +213 -96
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +14 -9
- helm/benchmark/presentation/test_summarize.py +5 -0
- helm/benchmark/run.py +66 -54
- helm/benchmark/run_expander.py +342 -31
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +116 -69
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/cleva_scenario.py +43 -46
- helm/benchmark/scenarios/code_scenario.py +3 -2
- helm/benchmark/scenarios/commonsense_scenario.py +171 -191
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +123 -0
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +3 -3
- helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
- helm/benchmark/scenarios/raft_scenario.py +2 -6
- helm/benchmark/scenarios/scenario.py +14 -2
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +22 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/the_pile_scenario.py +6 -7
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
- helm/benchmark/server.py +59 -2
- helm/benchmark/slurm_jobs.py +12 -0
- helm/benchmark/slurm_runner.py +79 -51
- helm/benchmark/static/benchmarking.js +3 -4
- helm/benchmark/static/contamination.yaml +1 -1
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +824 -0
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +90 -0
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/tokenizer_config_registry.py +10 -14
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -35
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/no_decoding_window_service.py +32 -0
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +24 -269
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +5 -12
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +213 -24
- helm/clients/auto_client.py +215 -0
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +67 -55
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +6 -17
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +7 -8
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +6 -10
- helm/{proxy/clients → clients}/huggingface_client.py +134 -92
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
- helm/{proxy/clients → clients}/megatron_client.py +13 -7
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +15 -12
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +15 -15
- helm/clients/test_client.py +100 -0
- helm/clients/test_huggingface_client.py +70 -0
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +23 -12
- helm/{proxy/clients → clients}/together_client.py +18 -71
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +24 -179
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/concurrency.py +32 -0
- helm/common/credentials_utils.py +28 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +29 -10
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +113 -0
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +88 -0
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/object_spec.py +2 -2
- helm/common/request.py +36 -27
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +6 -3
- helm/config/__init__.py +0 -0
- helm/config/model_deployments.yaml +1942 -0
- helm/config/model_metadata.yaml +2201 -0
- helm/config/tokenizer_configs.yaml +362 -0
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +13 -5
- helm/proxy/example_queries.py +29 -17
- helm/proxy/retry.py +8 -2
- helm/proxy/server.py +77 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +103 -20
- helm/proxy/services/service.py +34 -2
- helm/proxy/services/test_remote_service.py +7 -6
- helm/proxy/services/test_service.py +27 -18
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/py.typed +0 -0
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
- helm/tokenizers/auto_tokenizer.py +93 -0
- helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/tokenizers/test_anthropic_tokenizer.py +82 -0
- helm/tokenizers/test_huggingface_tokenizer.py +136 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/tokenizers/vertexai_tokenizer.py +97 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.3.0.dist-info/RECORD +0 -396
- helm/benchmark/vlm_run_specs.py +0 -71
- helm/benchmark/window_services/anthropic_window_service.py +0 -68
- helm/benchmark/window_services/bloom_window_service.py +0 -35
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/gptj_window_service.py +0 -38
- helm/benchmark/window_services/gptneox_window_service.py +0 -41
- helm/benchmark/window_services/http_model_window_service.py +0 -28
- helm/benchmark/window_services/huggingface_window_service.py +0 -59
- helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
- helm/benchmark/window_services/llama_window_service.py +0 -28
- helm/benchmark/window_services/luminous_window_service.py +0 -67
- helm/benchmark/window_services/megatron_window_service.py +0 -10
- helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
- helm/benchmark/window_services/openai_window_service.py +0 -13
- helm/benchmark/window_services/opt_window_service.py +0 -35
- helm/benchmark/window_services/palmyra_window_service.py +0 -45
- helm/benchmark/window_services/remote_window_service.py +0 -48
- helm/benchmark/window_services/santacoder_window_service.py +0 -27
- helm/benchmark/window_services/starcoder_window_service.py +0 -27
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/benchmark/window_services/wider_openai_window_service.py +0 -52
- helm/proxy/clients/aleph_alpha_client.py +0 -99
- helm/proxy/clients/auto_client.py +0 -461
- helm/proxy/clients/goose_ai_client.py +0 -100
- helm/proxy/clients/microsoft_client.py +0 -182
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/remote_model_registry.py +0 -28
- helm/proxy/clients/simple_client.py +0 -61
- helm/proxy/clients/test_anthropic_client.py +0 -63
- helm/proxy/clients/test_client.py +0 -31
- helm/proxy/clients/test_huggingface_client.py +0 -87
- helm/proxy/models.py +0 -963
- helm/proxy/test_models.py +0 -27
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
- helm/proxy/token_counters/test_openai_token_counter.py +0 -79
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -1,6 +1,26 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from typing import List, Optional
|
|
3
3
|
|
|
4
|
+
from helm.common.image_generation_parameters import ImageGenerationParameters
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Adaptation methods
|
|
8
|
+
ADAPT_GENERATION: str = "generation"
|
|
9
|
+
ADAPT_LANGUAGE_MODELING: str = "language_modeling"
|
|
10
|
+
ADAPT_MULTIPLE_CHOICE_JOINT: str = "multiple_choice_joint"
|
|
11
|
+
ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL: str = "multiple_choice_separate_original"
|
|
12
|
+
ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED: str = "multiple_choice_separate_calibrated"
|
|
13
|
+
ADAPT_RANKING_BINARY: str = "ranking_binary"
|
|
14
|
+
|
|
15
|
+
ADAPT_MULTIPLE_CHOICE_SEPARATE_METHODS: List[str] = [
|
|
16
|
+
ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
|
|
17
|
+
ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED,
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
# Multimodal adaptation methods
|
|
21
|
+
ADAPT_GENERATION_MULTIMODAL: str = "generation_multimodal"
|
|
22
|
+
ADAPT_MULTIPLE_CHOICE_JOINT_MULTIMODAL: str = "multiple_choice_joint_multimodal"
|
|
23
|
+
|
|
4
24
|
|
|
5
25
|
@dataclass(frozen=True)
|
|
6
26
|
class Substitution:
|
|
@@ -26,6 +46,9 @@ class AdapterSpec:
|
|
|
26
46
|
# For example, it is recommended to prefix all prompts with [NLG] for UL2.
|
|
27
47
|
global_prefix: str = ""
|
|
28
48
|
|
|
49
|
+
# Append all prompts with this string.
|
|
50
|
+
global_suffix: str = ""
|
|
51
|
+
|
|
29
52
|
# Prompt starts with instructions
|
|
30
53
|
instructions: str = ""
|
|
31
54
|
|
|
@@ -68,12 +91,18 @@ class AdapterSpec:
|
|
|
68
91
|
# set of training instances. Used to compute error bars.
|
|
69
92
|
num_train_trials: int = 1
|
|
70
93
|
|
|
94
|
+
# Number of trials, where we query the model with the same requests, but different random seeds
|
|
95
|
+
num_trials: int = 1
|
|
96
|
+
|
|
71
97
|
# If true, randomly sample N training examples; if false, select N consecutive training examples
|
|
72
98
|
sample_train: bool = True
|
|
73
99
|
|
|
74
100
|
# Decoding parameters (inherited by `Request`)
|
|
75
101
|
|
|
76
|
-
# Model to make the request to (need to fill in)
|
|
102
|
+
# Model deployment to make the request to (need to fill in)
|
|
103
|
+
model_deployment: str = ""
|
|
104
|
+
|
|
105
|
+
# Model to make the request to
|
|
77
106
|
model: str = ""
|
|
78
107
|
|
|
79
108
|
# Temperature to use
|
|
@@ -89,5 +118,11 @@ class AdapterSpec:
|
|
|
89
118
|
random: Optional[str] = None
|
|
90
119
|
|
|
91
120
|
# If true, for instances with multiple correct reference, the gold answer should be considered
|
|
92
|
-
# to be all
|
|
121
|
+
# to be all the correct references rather than any of the correct references.
|
|
93
122
|
multi_label: bool = False
|
|
123
|
+
|
|
124
|
+
# Parameters for image generation
|
|
125
|
+
image_generation_parameters: Optional[ImageGenerationParameters] = None
|
|
126
|
+
|
|
127
|
+
# The splits from which evaluation instances will be drawn (set hash=False to make `AdapterSpec` hashable)
|
|
128
|
+
eval_splits: Optional[List[str]] = field(default=None, hash=False)
|
|
@@ -1,15 +1,12 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
-
import numpy as np
|
|
5
|
-
|
|
6
4
|
from helm.benchmark.adaptation.adapter_spec import AdapterSpec
|
|
7
|
-
from helm.benchmark.adaptation.
|
|
8
|
-
from helm.benchmark.scenarios.scenario import Instance
|
|
5
|
+
from helm.benchmark.adaptation.request_state import RequestState
|
|
6
|
+
from helm.benchmark.scenarios.scenario import Instance
|
|
9
7
|
from helm.benchmark.window_services.tokenizer_service import TokenizerService
|
|
10
8
|
from helm.benchmark.window_services.window_service import WindowService
|
|
11
9
|
from helm.benchmark.window_services.window_service_factory import WindowServiceFactory
|
|
12
|
-
from helm.common.hierarchical_logger import hlog
|
|
13
10
|
|
|
14
11
|
|
|
15
12
|
class Adapter(ABC):
|
|
@@ -21,48 +18,13 @@ class Adapter(ABC):
|
|
|
21
18
|
def __init__(self, adapter_spec: AdapterSpec, tokenizer_service: TokenizerService):
|
|
22
19
|
self.adapter_spec: AdapterSpec = adapter_spec
|
|
23
20
|
self.window_service: WindowService = WindowServiceFactory.get_window_service(
|
|
24
|
-
adapter_spec.
|
|
21
|
+
adapter_spec.model_deployment, tokenizer_service
|
|
25
22
|
)
|
|
26
23
|
|
|
27
24
|
@abstractmethod
|
|
28
|
-
def adapt(self, instances: List[Instance], parallelism: int) ->
|
|
25
|
+
def adapt(self, instances: List[Instance], parallelism: int) -> List[RequestState]:
|
|
29
26
|
"""
|
|
30
27
|
Takes a a list of `Instance`s and returns a `ScenarioState` with the
|
|
31
28
|
list of corresponding `RequestState`s.
|
|
32
29
|
"""
|
|
33
30
|
pass
|
|
34
|
-
|
|
35
|
-
def get_run_instances(self, instances: List[Instance]) -> List[Instance]:
|
|
36
|
-
"""
|
|
37
|
-
Get the instances necessary for this run:
|
|
38
|
-
Train instances (split=train): keep all (if any) for in-context learning
|
|
39
|
-
Eval instances (split=valid or test): keep at most `max_eval_instances` specified in `AdapterSpec` by sampling
|
|
40
|
-
Return the resulting train and eval instances.
|
|
41
|
-
"""
|
|
42
|
-
all_train_instances: List[Instance] = [instance for instance in instances if instance.split == TRAIN_SPLIT]
|
|
43
|
-
|
|
44
|
-
all_eval_instances: List[Instance] = [instance for instance in instances if instance.split in EVAL_SPLITS]
|
|
45
|
-
if (
|
|
46
|
-
self.adapter_spec.max_eval_instances is not None
|
|
47
|
-
and len(all_eval_instances) > self.adapter_spec.max_eval_instances
|
|
48
|
-
):
|
|
49
|
-
# Pick the first `self.adapter_spec.max_eval_instances`.
|
|
50
|
-
# The random sampling includes instances monotonically.
|
|
51
|
-
np.random.seed(0)
|
|
52
|
-
selected_eval_instances = list(
|
|
53
|
-
np.random.choice(
|
|
54
|
-
all_eval_instances, # type: ignore
|
|
55
|
-
self.adapter_spec.max_eval_instances,
|
|
56
|
-
replace=False,
|
|
57
|
-
)
|
|
58
|
-
)
|
|
59
|
-
else:
|
|
60
|
-
selected_eval_instances = all_eval_instances
|
|
61
|
-
|
|
62
|
-
hlog(
|
|
63
|
-
f"{len(instances)} instances, "
|
|
64
|
-
f"{len(all_train_instances)} train instances, "
|
|
65
|
-
f"{len(selected_eval_instances)}/{len(all_eval_instances)} eval instances"
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
return all_train_instances + selected_eval_instances
|
|
@@ -1,31 +1,26 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
from .language_modeling_adapter import LanguageModelingAdapter
|
|
8
|
-
from .multiple_choice_joint_adapter import MultipleChoiceJointAdapter
|
|
9
|
-
from .multiple_choice_separate_adapter import MultipleChoiceSeparateAdapter
|
|
10
|
-
from .multiple_choice_calibrated_adapter import MultipleChoiceCalibratedAdapter
|
|
11
|
-
from .binary_ranking_adapter import BinaryRankingAdapter
|
|
12
|
-
from .multimodal.generation_multimodal_adapter import GenerationMultimodalAdapter
|
|
13
|
-
|
|
14
|
-
# Adaptation methods
|
|
15
|
-
ADAPT_GENERATION: str = "generation"
|
|
16
|
-
ADAPT_LANGUAGE_MODELING: str = "language_modeling"
|
|
17
|
-
ADAPT_MULTIPLE_CHOICE_JOINT: str = "multiple_choice_joint"
|
|
18
|
-
ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL: str = "multiple_choice_separate_original"
|
|
19
|
-
ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED: str = "multiple_choice_separate_calibrated"
|
|
20
|
-
ADAPT_RANKING_BINARY: str = "ranking_binary"
|
|
21
|
-
|
|
22
|
-
ADAPT_MULTIPLE_CHOICE_SEPARATE_METHODS: List[str] = [
|
|
23
|
-
ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
|
|
1
|
+
from helm.benchmark.adaptation.adapter_spec import (
|
|
2
|
+
ADAPT_GENERATION,
|
|
3
|
+
ADAPT_GENERATION_MULTIMODAL,
|
|
4
|
+
ADAPT_LANGUAGE_MODELING,
|
|
5
|
+
ADAPT_MULTIPLE_CHOICE_JOINT,
|
|
6
|
+
ADAPT_MULTIPLE_CHOICE_JOINT_MULTIMODAL,
|
|
24
7
|
ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED,
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
8
|
+
ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
|
|
9
|
+
ADAPT_RANKING_BINARY,
|
|
10
|
+
AdapterSpec,
|
|
11
|
+
)
|
|
12
|
+
from helm.benchmark.adaptation.adapters.adapter import Adapter
|
|
13
|
+
from helm.benchmark.adaptation.adapters.binary_ranking_adapter import BinaryRankingAdapter
|
|
14
|
+
from helm.benchmark.adaptation.adapters.generation_adapter import GenerationAdapter
|
|
15
|
+
from helm.benchmark.adaptation.adapters.language_modeling_adapter import LanguageModelingAdapter
|
|
16
|
+
from helm.benchmark.adaptation.adapters.multimodal.generation_multimodal_adapter import GenerationMultimodalAdapter
|
|
17
|
+
from helm.benchmark.adaptation.adapters.multimodal.multiple_choice_joint_multimodal_adapter import (
|
|
18
|
+
MultipleChoiceJointMultimodalAdapter,
|
|
19
|
+
)
|
|
20
|
+
from helm.benchmark.adaptation.adapters.multiple_choice_calibrated_adapter import MultipleChoiceCalibratedAdapter
|
|
21
|
+
from helm.benchmark.adaptation.adapters.multiple_choice_joint_adapter import MultipleChoiceJointAdapter
|
|
22
|
+
from helm.benchmark.adaptation.adapters.multiple_choice_separate_adapter import MultipleChoiceSeparateAdapter
|
|
23
|
+
from helm.benchmark.window_services.tokenizer_service import TokenizerService
|
|
29
24
|
|
|
30
25
|
|
|
31
26
|
class AdapterFactory:
|
|
@@ -51,6 +46,8 @@ class AdapterFactory:
|
|
|
51
46
|
adapter = BinaryRankingAdapter(adapter_spec, tokenizer_service)
|
|
52
47
|
elif method == ADAPT_GENERATION_MULTIMODAL:
|
|
53
48
|
adapter = GenerationMultimodalAdapter(adapter_spec, tokenizer_service)
|
|
49
|
+
elif method == ADAPT_MULTIPLE_CHOICE_JOINT_MULTIMODAL:
|
|
50
|
+
adapter = MultipleChoiceJointMultimodalAdapter(adapter_spec, tokenizer_service)
|
|
54
51
|
else:
|
|
55
52
|
raise ValueError(f"Invalid adaptation method: {method}")
|
|
56
53
|
|
|
@@ -50,6 +50,7 @@ class BinaryRankingAdapter(InContextLearningAdapter):
|
|
|
50
50
|
)
|
|
51
51
|
request = Request(
|
|
52
52
|
model=self.adapter_spec.model,
|
|
53
|
+
model_deployment=self.adapter_spec.model_deployment,
|
|
53
54
|
prompt=prompt.text,
|
|
54
55
|
num_completions=self.adapter_spec.num_outputs,
|
|
55
56
|
temperature=self.adapter_spec.temperature,
|
|
@@ -39,12 +39,14 @@ class GenerationAdapter(InContextLearningAdapter):
|
|
|
39
39
|
)
|
|
40
40
|
request = Request(
|
|
41
41
|
model=self.adapter_spec.model,
|
|
42
|
+
model_deployment=self.adapter_spec.model_deployment,
|
|
42
43
|
prompt=prompt.text,
|
|
43
44
|
num_completions=self.adapter_spec.num_outputs,
|
|
44
45
|
temperature=self.adapter_spec.temperature,
|
|
45
46
|
max_tokens=self.adapter_spec.max_tokens,
|
|
46
47
|
stop_sequences=self.adapter_spec.stop_sequences,
|
|
47
48
|
random=self.adapter_spec.random,
|
|
49
|
+
image_generation_parameters=self.adapter_spec.image_generation_parameters,
|
|
48
50
|
)
|
|
49
51
|
request_state = RequestState(
|
|
50
52
|
instance=eval_instance,
|
|
@@ -7,9 +7,9 @@ from typing import List, Dict, Optional
|
|
|
7
7
|
|
|
8
8
|
from helm.benchmark.adaptation.prompt import Prompt
|
|
9
9
|
from helm.benchmark.adaptation.request_state import RequestState
|
|
10
|
-
from helm.benchmark.adaptation.scenario_state import ScenarioState
|
|
11
10
|
from helm.benchmark.scenarios.scenario import Instance, TRAIN_SPLIT, EVAL_SPLITS, Reference
|
|
12
11
|
from helm.common.general import parallel_map
|
|
12
|
+
from helm.common.request import Request
|
|
13
13
|
from helm.common.hierarchical_logger import hlog, htrack, htrack_block
|
|
14
14
|
from .adapter import Adapter
|
|
15
15
|
|
|
@@ -30,7 +30,7 @@ class InContextLearningAdapter(Adapter, ABC):
|
|
|
30
30
|
pass
|
|
31
31
|
|
|
32
32
|
@htrack(None)
|
|
33
|
-
def adapt(self, instances: List[Instance], parallelism: int) ->
|
|
33
|
+
def adapt(self, instances: List[Instance], parallelism: int) -> List[RequestState]:
|
|
34
34
|
"""
|
|
35
35
|
Takes a list of `Instance`s and builds a list of corresponding `RequestState`s.
|
|
36
36
|
The reason we don't do this per eval instance is that we create a common set of
|
|
@@ -64,7 +64,7 @@ class InContextLearningAdapter(Adapter, ABC):
|
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
hlog(f"{len(all_request_states)} requests")
|
|
67
|
-
return
|
|
67
|
+
return all_request_states
|
|
68
68
|
|
|
69
69
|
def _adapt_trial_index(
|
|
70
70
|
self,
|
|
@@ -101,7 +101,23 @@ class InContextLearningAdapter(Adapter, ABC):
|
|
|
101
101
|
hlog(line)
|
|
102
102
|
|
|
103
103
|
# Flatten and return
|
|
104
|
-
|
|
104
|
+
all_request_states: List[RequestState] = [request_state for result in results for request_state in result]
|
|
105
|
+
return self._add_trials(all_request_states)
|
|
106
|
+
|
|
107
|
+
def _add_trials(self, request_states: List[RequestState]) -> List[RequestState]:
|
|
108
|
+
"""Expand the request states by adding trials."""
|
|
109
|
+
if self.adapter_spec.num_trials <= 1:
|
|
110
|
+
return request_states
|
|
111
|
+
|
|
112
|
+
all_request_states: List[RequestState] = request_states.copy()
|
|
113
|
+
for i in range(1, self.adapter_spec.num_trials):
|
|
114
|
+
seed: str = str(i)
|
|
115
|
+
for request_state in request_states:
|
|
116
|
+
request: Request = replace(request_state.request, random=seed)
|
|
117
|
+
all_request_states.append(replace(request_state, request=request))
|
|
118
|
+
|
|
119
|
+
assert len(all_request_states) == len(request_states) * self.adapter_spec.num_trials
|
|
120
|
+
return all_request_states
|
|
105
121
|
|
|
106
122
|
def sample_examples(
|
|
107
123
|
self, all_train_instances: List[Instance], seed: int, sample_train: bool = True
|
|
@@ -214,6 +230,7 @@ class InContextLearningAdapter(Adapter, ABC):
|
|
|
214
230
|
# Prompt
|
|
215
231
|
prompt = Prompt(
|
|
216
232
|
global_prefix=self.adapter_spec.global_prefix,
|
|
233
|
+
global_suffix=self.adapter_spec.global_suffix,
|
|
217
234
|
instructions_block=instructions_block,
|
|
218
235
|
train_instance_blocks=train_instance_blocks,
|
|
219
236
|
eval_instance_block=eval_instance_block,
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import List, Tuple, Optional
|
|
2
2
|
|
|
3
3
|
from helm.benchmark.adaptation.request_state import RequestState
|
|
4
|
-
from helm.benchmark.adaptation.scenario_state import ScenarioState
|
|
5
4
|
from helm.benchmark.scenarios.scenario import Instance, EVAL_SPLITS
|
|
6
5
|
from helm.benchmark.window_services.window_service import EncodeResult
|
|
7
6
|
from helm.common.general import flatten_list, parallel_map
|
|
@@ -26,7 +25,7 @@ class LanguageModelingAdapter(Adapter):
|
|
|
26
25
|
"""
|
|
27
26
|
|
|
28
27
|
@htrack(None)
|
|
29
|
-
def adapt(self, instances: List[Instance], parallelism: int) ->
|
|
28
|
+
def adapt(self, instances: List[Instance], parallelism: int) -> List[RequestState]:
|
|
30
29
|
"""
|
|
31
30
|
Takes a list of `Instance`s and builds a list of corresponding `RequestState`s.
|
|
32
31
|
Only requires eval instances.
|
|
@@ -34,13 +33,19 @@ class LanguageModelingAdapter(Adapter):
|
|
|
34
33
|
# Pick out evaluation instances. This includes both valid and test splits.
|
|
35
34
|
eval_instances: List[Instance] = [instance for instance in instances if instance.split in EVAL_SPLITS]
|
|
36
35
|
hlog(f"{len(eval_instances)} eval instances")
|
|
37
|
-
|
|
36
|
+
# Since at least 2023-01-01, this adapter was using `instances` instead of `eval_instances`
|
|
37
|
+
# https://github.com/stanford-crfm/helm/commit/ac9892f7449418d32ab55843702db312b58003ed#diff-69871182494f0d9f4bc6aeea76e99c13edf0213e2c123432a63cd2024d66ffcaR39
|
|
38
|
+
# This assert is intended to identify run specs (if any) that had been producing incorrect results.
|
|
39
|
+
assert len(eval_instances) == len(instances), (
|
|
40
|
+
"Non-evaluation instances were passed to LanguageModelingAdapter, but LanguageModelingAdapter "
|
|
41
|
+
+ "expects evaluation instances only. Please open a GitHub issue with your RunSpec."
|
|
42
|
+
)
|
|
38
43
|
all_request_states: List[RequestState] = flatten_list(
|
|
39
|
-
parallel_map(self._generate_requests,
|
|
44
|
+
parallel_map(self._generate_requests, eval_instances, parallelism)
|
|
40
45
|
)
|
|
41
46
|
hlog(f"{len(all_request_states)} requests")
|
|
42
47
|
|
|
43
|
-
return
|
|
48
|
+
return all_request_states
|
|
44
49
|
|
|
45
50
|
def _generate_requests(self, eval_instance: Instance) -> List[RequestState]:
|
|
46
51
|
"""
|
|
@@ -114,6 +119,7 @@ class LanguageModelingAdapter(Adapter):
|
|
|
114
119
|
)
|
|
115
120
|
request = Request(
|
|
116
121
|
model=self.adapter_spec.model,
|
|
122
|
+
model_deployment=self.adapter_spec.model_deployment,
|
|
117
123
|
prompt=prompt_text,
|
|
118
124
|
num_completions=1,
|
|
119
125
|
temperature=0,
|
|
@@ -162,6 +168,7 @@ class LanguageModelingAdapter(Adapter):
|
|
|
162
168
|
|
|
163
169
|
request = Request(
|
|
164
170
|
model=self.adapter_spec.model,
|
|
171
|
+
model_deployment=self.adapter_spec.model_deployment,
|
|
165
172
|
prompt=prompt_text,
|
|
166
173
|
num_completions=1,
|
|
167
174
|
temperature=0,
|
|
@@ -29,6 +29,7 @@ class GenerationMultimodalAdapter(InContextLearningMultimodalAdapter):
|
|
|
29
29
|
|
|
30
30
|
request = Request(
|
|
31
31
|
model=self.adapter_spec.model,
|
|
32
|
+
model_deployment=self.adapter_spec.model_deployment,
|
|
32
33
|
multimodal_prompt=prompt.multimedia_object,
|
|
33
34
|
num_completions=self.adapter_spec.num_outputs,
|
|
34
35
|
temperature=self.adapter_spec.temperature,
|
|
@@ -27,6 +27,7 @@ class InContextLearningMultimodalAdapter(InContextLearningAdapter, ABC):
|
|
|
27
27
|
|
|
28
28
|
request = Request(
|
|
29
29
|
model=self.adapter_spec.model,
|
|
30
|
+
model_deployment=self.adapter_spec.model_deployment,
|
|
30
31
|
multimodal_prompt=prompt.multimedia_object,
|
|
31
32
|
num_completions=self.adapter_spec.num_outputs,
|
|
32
33
|
temperature=self.adapter_spec.temperature,
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
from helm.benchmark.adaptation.request_state import RequestState
|
|
5
|
+
from helm.benchmark.scenarios.scenario import Instance
|
|
6
|
+
from helm.common.media_object import MediaObject, MultimediaObject
|
|
7
|
+
from helm.common.request import Request
|
|
8
|
+
from helm.benchmark.adaptation.adapters.multimodal.in_context_learning_multimodal_adapter import (
|
|
9
|
+
InContextLearningMultimodalAdapter,
|
|
10
|
+
)
|
|
11
|
+
from .multimodal_prompt import MultimodalPrompt
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MultipleChoiceJointMultimodalAdapter(InContextLearningMultimodalAdapter, ABC):
|
|
15
|
+
"""
|
|
16
|
+
An `Adapter`, guided by the `AdapterSpec`, takes a `Scenario` and produces
|
|
17
|
+
a `ScenarioState`. This `Adapter` has additional logic to support in-context
|
|
18
|
+
learning for multimodal models.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def get_reference_prefix(prefix: str, i: int) -> str:
|
|
23
|
+
"""
|
|
24
|
+
Example: prefix = "\nA. ", i = 2, return "\nC. "
|
|
25
|
+
"""
|
|
26
|
+
return prefix.replace("A", chr(ord("A") + i))
|
|
27
|
+
|
|
28
|
+
def generate_requests(
|
|
29
|
+
self, eval_instance: Instance, train_trial_index: int, training_instances: List[Instance]
|
|
30
|
+
) -> List[RequestState]:
|
|
31
|
+
prompt: MultimodalPrompt = self.construct_prompt(
|
|
32
|
+
training_instances, eval_instance, include_output=False, reference_index=None
|
|
33
|
+
)
|
|
34
|
+
output_mapping: Dict[str, str] = dict(
|
|
35
|
+
(self.get_reference_prefix("A", reference_index), reference.output.text)
|
|
36
|
+
for reference_index, reference in enumerate(eval_instance.references)
|
|
37
|
+
)
|
|
38
|
+
request = Request(
|
|
39
|
+
model=self.adapter_spec.model,
|
|
40
|
+
model_deployment=self.adapter_spec.model_deployment,
|
|
41
|
+
multimodal_prompt=prompt.multimedia_object,
|
|
42
|
+
num_completions=self.adapter_spec.num_outputs,
|
|
43
|
+
temperature=self.adapter_spec.temperature,
|
|
44
|
+
max_tokens=self.adapter_spec.max_tokens,
|
|
45
|
+
stop_sequences=[],
|
|
46
|
+
random=self.adapter_spec.random,
|
|
47
|
+
)
|
|
48
|
+
request_state = RequestState(
|
|
49
|
+
instance=eval_instance,
|
|
50
|
+
reference_index=None,
|
|
51
|
+
request_mode=None,
|
|
52
|
+
train_trial_index=train_trial_index,
|
|
53
|
+
output_mapping=output_mapping,
|
|
54
|
+
request=request,
|
|
55
|
+
result=None,
|
|
56
|
+
num_train_instances=prompt.num_train_instances,
|
|
57
|
+
prompt_truncated=False,
|
|
58
|
+
)
|
|
59
|
+
return [request_state]
|
|
60
|
+
|
|
61
|
+
def construct_example_multimodal_prompt(
|
|
62
|
+
self, instance: Instance, include_output: bool, reference_index: Optional[int]
|
|
63
|
+
) -> MultimediaObject:
|
|
64
|
+
"""
|
|
65
|
+
Returns a single example of the prompt. `include_output` controls whether the gold output is included.
|
|
66
|
+
"""
|
|
67
|
+
# Input
|
|
68
|
+
assert instance.input.multimedia_content is not None
|
|
69
|
+
result: MultimediaObject = instance.input.multimedia_content.add_textual_prefix(self.adapter_spec.input_prefix)
|
|
70
|
+
result = result.add_textual_suffix(self.adapter_spec.input_suffix)
|
|
71
|
+
|
|
72
|
+
# Include the references
|
|
73
|
+
delimiter: str = ", "
|
|
74
|
+
no_correct_references: str = "n/a"
|
|
75
|
+
output: str = no_correct_references
|
|
76
|
+
for reference_index, reference in enumerate(instance.references):
|
|
77
|
+
prefix = self.get_reference_prefix(self.adapter_spec.reference_prefix, reference_index)
|
|
78
|
+
|
|
79
|
+
if reference.output.multimedia_content is not None:
|
|
80
|
+
reference_output_content: MultimediaObject = reference.output.multimedia_content
|
|
81
|
+
reference_output_content = reference_output_content.add_textual_prefix(prefix)
|
|
82
|
+
reference_output_content = reference_output_content.add_textual_suffix(
|
|
83
|
+
self.adapter_spec.reference_suffix
|
|
84
|
+
)
|
|
85
|
+
result = result.combine(reference_output_content)
|
|
86
|
+
else:
|
|
87
|
+
result = result.add_textual_suffix(prefix + reference.output.text + self.adapter_spec.reference_suffix)
|
|
88
|
+
|
|
89
|
+
if reference.is_correct:
|
|
90
|
+
if output == no_correct_references:
|
|
91
|
+
output = self.get_reference_prefix("A", reference_index)
|
|
92
|
+
elif self.adapter_spec.multi_label:
|
|
93
|
+
output += delimiter
|
|
94
|
+
output += self.get_reference_prefix("A", reference_index)
|
|
95
|
+
|
|
96
|
+
if include_output:
|
|
97
|
+
output_content: MultimediaObject = MultimediaObject([MediaObject(text=output, content_type="text/plain")])
|
|
98
|
+
output_content = output_content.add_textual_prefix(self.adapter_spec.output_prefix)
|
|
99
|
+
output_content = output_content.add_textual_suffix(self.adapter_spec.output_suffix)
|
|
100
|
+
result = result.combine(output_content)
|
|
101
|
+
else:
|
|
102
|
+
result = result.add_textual_suffix(self.adapter_spec.output_prefix.rstrip())
|
|
103
|
+
|
|
104
|
+
return result
|
helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
import tempfile
|
|
3
3
|
import unittest
|
|
4
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
4
5
|
|
|
5
6
|
from helm.common.media_object import MediaObject, MultimediaObject
|
|
6
7
|
from helm.benchmark.scenarios.scenario import Instance, Reference, Input, Output, TEST_SPLIT, TRAIN_SPLIT, CORRECT_TAG
|
|
@@ -14,7 +15,7 @@ from .multimodal_prompt import MultimodalPrompt
|
|
|
14
15
|
class TestInContextLearningMultimodalAdapter(unittest.TestCase):
|
|
15
16
|
def setup_method(self, _):
|
|
16
17
|
self._path: str = tempfile.mkdtemp()
|
|
17
|
-
self._tokenizer_service = get_tokenizer_service(self._path)
|
|
18
|
+
self._tokenizer_service = get_tokenizer_service(self._path, BlackHoleCacheBackendConfig())
|
|
18
19
|
|
|
19
20
|
def teardown_method(self, _):
|
|
20
21
|
shutil.rmtree(self._path)
|
|
@@ -22,6 +23,7 @@ class TestInContextLearningMultimodalAdapter(unittest.TestCase):
|
|
|
22
23
|
def test_construct_prompt(self):
|
|
23
24
|
adapter_spec: AdapterSpec = AdapterSpec(
|
|
24
25
|
model="simple/model1",
|
|
26
|
+
model_deployment="simple/model1",
|
|
25
27
|
method=ADAPT_GENERATION_MULTIMODAL,
|
|
26
28
|
global_prefix="[START]",
|
|
27
29
|
instructions="Please answer the following question about the images.",
|
|
@@ -91,6 +93,7 @@ class TestInContextLearningMultimodalAdapter(unittest.TestCase):
|
|
|
91
93
|
def test_construct_prompt_multi_label(self):
|
|
92
94
|
adapter_spec: AdapterSpec = AdapterSpec(
|
|
93
95
|
model="simple/model1",
|
|
96
|
+
model_deployment="simple/model1",
|
|
94
97
|
method=ADAPT_GENERATION_MULTIMODAL,
|
|
95
98
|
global_prefix="[START]",
|
|
96
99
|
instructions="Please answer the following question about the images.",
|
|
@@ -171,6 +174,7 @@ class TestInContextLearningMultimodalAdapter(unittest.TestCase):
|
|
|
171
174
|
"""
|
|
172
175
|
adapter_spec: AdapterSpec = AdapterSpec(
|
|
173
176
|
model="simple/model1",
|
|
177
|
+
model_deployment="simple/model1",
|
|
174
178
|
method=ADAPT_GENERATION_MULTIMODAL,
|
|
175
179
|
input_prefix="User: ",
|
|
176
180
|
input_suffix="<end_of_utterance>",
|
|
@@ -55,6 +55,7 @@ class MultipleChoiceJointAdapter(InContextLearningAdapter):
|
|
|
55
55
|
)
|
|
56
56
|
request = Request(
|
|
57
57
|
model=self.adapter_spec.model,
|
|
58
|
+
model_deployment=self.adapter_spec.model_deployment,
|
|
58
59
|
prompt=prompt.text,
|
|
59
60
|
num_completions=1,
|
|
60
61
|
top_k_per_token=self.adapter_spec.num_outputs,
|
|
@@ -2,6 +2,7 @@ import shutil
|
|
|
2
2
|
import tempfile
|
|
3
3
|
|
|
4
4
|
from helm.common.authentication import Authentication
|
|
5
|
+
from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
|
|
5
6
|
from helm.proxy.services.server_service import ServerService
|
|
6
7
|
from helm.benchmark.window_services.tokenizer_service import TokenizerService
|
|
7
8
|
|
|
@@ -13,7 +14,7 @@ class TestAdapter:
|
|
|
13
14
|
|
|
14
15
|
def setup_method(self):
|
|
15
16
|
self.path: str = tempfile.mkdtemp()
|
|
16
|
-
service = ServerService(base_path=self.path, root_mode=True)
|
|
17
|
+
service = ServerService(base_path=self.path, root_mode=True, cache_backend_config=BlackHoleCacheBackendConfig())
|
|
17
18
|
self.tokenizer_service = TokenizerService(service, Authentication("test"))
|
|
18
19
|
|
|
19
20
|
def teardown_method(self, _):
|