crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +37 -2
- helm/benchmark/adaptation/adapters/adapter.py +4 -42
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/prompt.py +7 -1
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/cleva_perturbation.py +7 -14
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
- helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/dialect_perturbation.py +2 -2
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +3 -3
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +0 -7
- helm/benchmark/augmentations/perturbation.py +20 -7
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/synonym_perturbation.py +2 -2
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/config_registry.py +38 -0
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +37 -7
- helm/benchmark/metrics/basic_metrics.py +172 -641
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics.py +4 -3
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +6 -112
- helm/benchmark/metrics/dry_run_metrics.py +5 -3
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +5 -5
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +6 -7
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +8 -8
- helm/benchmark/metrics/test_classification_metrics.py +9 -6
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +164 -41
- helm/benchmark/model_metadata_registry.py +181 -35
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +50 -17
- helm/benchmark/presentation/schema.py +28 -46
- helm/benchmark/presentation/summarize.py +213 -96
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +14 -9
- helm/benchmark/presentation/test_summarize.py +5 -0
- helm/benchmark/run.py +66 -54
- helm/benchmark/run_expander.py +342 -31
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +116 -69
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/cleva_scenario.py +43 -46
- helm/benchmark/scenarios/code_scenario.py +3 -2
- helm/benchmark/scenarios/commonsense_scenario.py +171 -191
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +123 -0
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +3 -3
- helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
- helm/benchmark/scenarios/raft_scenario.py +2 -6
- helm/benchmark/scenarios/scenario.py +14 -2
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +22 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/the_pile_scenario.py +6 -7
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
- helm/benchmark/server.py +59 -2
- helm/benchmark/slurm_jobs.py +12 -0
- helm/benchmark/slurm_runner.py +79 -51
- helm/benchmark/static/benchmarking.js +3 -4
- helm/benchmark/static/contamination.yaml +1 -1
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +824 -0
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +90 -0
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/tokenizer_config_registry.py +10 -14
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -35
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/no_decoding_window_service.py +32 -0
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +24 -269
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +5 -12
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +213 -24
- helm/clients/auto_client.py +215 -0
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +67 -55
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +6 -17
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +7 -8
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +6 -10
- helm/{proxy/clients → clients}/huggingface_client.py +134 -92
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
- helm/{proxy/clients → clients}/megatron_client.py +13 -7
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +15 -12
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +15 -15
- helm/clients/test_client.py +100 -0
- helm/clients/test_huggingface_client.py +70 -0
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +23 -12
- helm/{proxy/clients → clients}/together_client.py +18 -71
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +24 -179
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/concurrency.py +32 -0
- helm/common/credentials_utils.py +28 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +29 -10
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +113 -0
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +88 -0
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/object_spec.py +2 -2
- helm/common/request.py +36 -27
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +6 -3
- helm/config/__init__.py +0 -0
- helm/config/model_deployments.yaml +1942 -0
- helm/config/model_metadata.yaml +2201 -0
- helm/config/tokenizer_configs.yaml +362 -0
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +13 -5
- helm/proxy/example_queries.py +29 -17
- helm/proxy/retry.py +8 -2
- helm/proxy/server.py +77 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +103 -20
- helm/proxy/services/service.py +34 -2
- helm/proxy/services/test_remote_service.py +7 -6
- helm/proxy/services/test_service.py +27 -18
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/py.typed +0 -0
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
- helm/tokenizers/auto_tokenizer.py +93 -0
- helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/tokenizers/test_anthropic_tokenizer.py +82 -0
- helm/tokenizers/test_huggingface_tokenizer.py +136 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/tokenizers/vertexai_tokenizer.py +97 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.3.0.dist-info/RECORD +0 -396
- helm/benchmark/vlm_run_specs.py +0 -71
- helm/benchmark/window_services/anthropic_window_service.py +0 -68
- helm/benchmark/window_services/bloom_window_service.py +0 -35
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/gptj_window_service.py +0 -38
- helm/benchmark/window_services/gptneox_window_service.py +0 -41
- helm/benchmark/window_services/http_model_window_service.py +0 -28
- helm/benchmark/window_services/huggingface_window_service.py +0 -59
- helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
- helm/benchmark/window_services/llama_window_service.py +0 -28
- helm/benchmark/window_services/luminous_window_service.py +0 -67
- helm/benchmark/window_services/megatron_window_service.py +0 -10
- helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
- helm/benchmark/window_services/openai_window_service.py +0 -13
- helm/benchmark/window_services/opt_window_service.py +0 -35
- helm/benchmark/window_services/palmyra_window_service.py +0 -45
- helm/benchmark/window_services/remote_window_service.py +0 -48
- helm/benchmark/window_services/santacoder_window_service.py +0 -27
- helm/benchmark/window_services/starcoder_window_service.py +0 -27
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/benchmark/window_services/wider_openai_window_service.py +0 -52
- helm/proxy/clients/aleph_alpha_client.py +0 -99
- helm/proxy/clients/auto_client.py +0 -461
- helm/proxy/clients/goose_ai_client.py +0 -100
- helm/proxy/clients/microsoft_client.py +0 -182
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/remote_model_registry.py +0 -28
- helm/proxy/clients/simple_client.py +0 -61
- helm/proxy/clients/test_anthropic_client.py +0 -63
- helm/proxy/clients/test_client.py +0 -31
- helm/proxy/clients/test_huggingface_client.py +0 -87
- helm/proxy/models.py +0 -963
- helm/proxy/test_models.py +0 -27
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
- helm/proxy/token_counters/test_openai_token_counter.py +0 -79
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import os.path
|
|
2
|
+
from typing import List, Optional, Dict, Any
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
|
|
5
|
+
from datasets import load_dataset
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
from helm.benchmark.scenarios.scenario import (
|
|
9
|
+
CORRECT_TAG,
|
|
10
|
+
ASSET_NAME_TAG,
|
|
11
|
+
ASSET_PATH_TAG,
|
|
12
|
+
TEST_SPLIT,
|
|
13
|
+
VALID_SPLIT,
|
|
14
|
+
Instance,
|
|
15
|
+
Input,
|
|
16
|
+
Output,
|
|
17
|
+
Reference,
|
|
18
|
+
Scenario,
|
|
19
|
+
)
|
|
20
|
+
from helm.common.media_object import MediaObject, MultimediaObject
|
|
21
|
+
from helm.common.general import ensure_directory_exists
|
|
22
|
+
from helm.common.hierarchical_logger import hlog
|
|
23
|
+
|
|
24
|
+
PROCESSED: str = "processed"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Image2StructureScenario(Scenario):
|
|
28
|
+
BASE_PROMPT: str
|
|
29
|
+
HUGGINGFACE_DATASET_NAME: str
|
|
30
|
+
SUBSETS: List[str]
|
|
31
|
+
|
|
32
|
+
name: str
|
|
33
|
+
description: str
|
|
34
|
+
tags = ["vision-language"]
|
|
35
|
+
|
|
36
|
+
helm_split_to_huggingface_split = {
|
|
37
|
+
TEST_SPLIT: "test",
|
|
38
|
+
VALID_SPLIT: "validation",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def __init__(self, subset: str, recompile_prompt: bool = True, split: str = VALID_SPLIT):
|
|
42
|
+
super().__init__()
|
|
43
|
+
assert subset in self.SUBSETS, f"Invalid subset: {subset}"
|
|
44
|
+
self._subset: str = subset
|
|
45
|
+
self._recompile_prompt: bool = recompile_prompt
|
|
46
|
+
self._split: str = split
|
|
47
|
+
self._output_path: Optional[str] = None
|
|
48
|
+
|
|
49
|
+
def preprocess_row(self, row: Dict[str, Any], assets_path: str) -> Dict[str, Any]:
|
|
50
|
+
# By default, there are no assets
|
|
51
|
+
del row["assets"]
|
|
52
|
+
row["assets_paths"] = []
|
|
53
|
+
row["assets_names"] = []
|
|
54
|
+
return row
|
|
55
|
+
|
|
56
|
+
def build_prompt(self, row: Dict[str, Any]) -> str:
|
|
57
|
+
return self.BASE_PROMPT
|
|
58
|
+
|
|
59
|
+
@abstractmethod
|
|
60
|
+
def compile_and_save(self, structure: str, assets_path: str, destination_path: str) -> str:
|
|
61
|
+
"""Compile the prompt, should save the image and return the text extracted from the image"""
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
def finalize(self, row: Dict[str, Any]) -> None:
|
|
65
|
+
"""Perform cleanup operations after the instance has been generated."""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
def get_instances(self, output_path: str) -> List[Instance]:
|
|
69
|
+
"""Get the instances for the scenario. This compile_and_save method should be implemented by the subclass.
|
|
70
|
+
Additionally, the subclass should implement the preprocess_row method if any preprocessing is needed.
|
|
71
|
+
|
|
72
|
+
For each instance, the following steps are performed:
|
|
73
|
+
1. Preprocess the row
|
|
74
|
+
2. Save the image locally
|
|
75
|
+
- 2.a. If we don't want to recompile the prompt, save the image directly
|
|
76
|
+
- 2.b. If we want to recompile the prompt, compile the structure and save the image
|
|
77
|
+
3. Create the prompt
|
|
78
|
+
4. Create the multimedia content
|
|
79
|
+
5. Create the reference
|
|
80
|
+
6. Finalize the Instance
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
output_path (str): The path where the instances will be saved
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
List[Instance]: The list of instances
|
|
87
|
+
"""
|
|
88
|
+
self._output_path = output_path
|
|
89
|
+
images_path: str = os.path.join(output_path, "data/images", self._subset)
|
|
90
|
+
assets_path: str = os.path.join(output_path, "data/assets", self._subset)
|
|
91
|
+
ensure_directory_exists(images_path)
|
|
92
|
+
ensure_directory_exists(assets_path)
|
|
93
|
+
|
|
94
|
+
instances: List[Instance] = []
|
|
95
|
+
|
|
96
|
+
# Process the desired set of instances
|
|
97
|
+
for row in tqdm(
|
|
98
|
+
load_dataset(
|
|
99
|
+
self.HUGGINGFACE_DATASET_NAME,
|
|
100
|
+
self._subset,
|
|
101
|
+
split=self.helm_split_to_huggingface_split[self._split],
|
|
102
|
+
cache_dir=output_path,
|
|
103
|
+
)
|
|
104
|
+
):
|
|
105
|
+
question_uuid: str = str(row["uuid"]).replace('"', "")
|
|
106
|
+
if row["category"][1:-1] != self._subset:
|
|
107
|
+
hlog(
|
|
108
|
+
f"Skipping instance {question_uuid} as it belong in category"
|
|
109
|
+
f" {row['category']} and not {self._subset}"
|
|
110
|
+
)
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# Step 1: Preprocess the row
|
|
114
|
+
row = self.preprocess_row(row, assets_path)
|
|
115
|
+
|
|
116
|
+
# Step 2: Save the image locally
|
|
117
|
+
image_path: str = os.path.join(images_path, f"{question_uuid}.png")
|
|
118
|
+
if not os.path.exists(image_path):
|
|
119
|
+
if not self._recompile_prompt: # 2.a
|
|
120
|
+
row["image"].save(image_path)
|
|
121
|
+
else: # 2.b
|
|
122
|
+
if "structure" not in row:
|
|
123
|
+
raise ValueError("Cannot recompile prompt without structure")
|
|
124
|
+
structure: str = row["structure"]
|
|
125
|
+
text: str = self.compile_and_save(structure, assets_path, image_path)
|
|
126
|
+
row["text"] = text
|
|
127
|
+
|
|
128
|
+
# Step 3: Create the prompt
|
|
129
|
+
prompt: str = self.build_prompt(row)
|
|
130
|
+
|
|
131
|
+
# Step 4: Create the multimedia content
|
|
132
|
+
image_object = MediaObject(location=image_path, content_type="image/png")
|
|
133
|
+
content: List[MediaObject] = [
|
|
134
|
+
MediaObject(text=prompt, content_type="text/plain"),
|
|
135
|
+
image_object,
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
# Step 5: Create the references
|
|
139
|
+
# 5.a Create the reference containing the structure and the associated image.
|
|
140
|
+
reference: Reference
|
|
141
|
+
if "structure" in row:
|
|
142
|
+
multimedia_object: MultimediaObject
|
|
143
|
+
if os.path.exists(row["structure"]):
|
|
144
|
+
# 5.a.1 The structure is a path, therefore represent it as a multimedia object
|
|
145
|
+
# containing the files used to compile the structure (such as a repository
|
|
146
|
+
# containing the HTML, CSS, and JavaScript files used to generate a webpage)
|
|
147
|
+
multimedia_object = MultimediaObject(
|
|
148
|
+
[image_object, MediaObject(location=row["structure"], content_type="path/path")]
|
|
149
|
+
)
|
|
150
|
+
elif row["structure"] == PROCESSED:
|
|
151
|
+
# 5.a.2 The structure has been processed and is no longer present in the row
|
|
152
|
+
# This can be the case if the structure is a base64 encoding of an archive that
|
|
153
|
+
# has been extracted to a temporary path and processed but the path is no longer
|
|
154
|
+
# existing (deleted after the processing is done)
|
|
155
|
+
multimedia_object = MultimediaObject([image_object])
|
|
156
|
+
else:
|
|
157
|
+
# 5.a.3 The structure is not a path, therefore it is directly a valid string
|
|
158
|
+
# representing the structure (such as LaTeX code)
|
|
159
|
+
multimedia_object = MultimediaObject([image_object])
|
|
160
|
+
reference = Reference(
|
|
161
|
+
output=Output(text=row["text"], multimedia_content=multimedia_object),
|
|
162
|
+
tags=[CORRECT_TAG],
|
|
163
|
+
)
|
|
164
|
+
else:
|
|
165
|
+
if "text" in row:
|
|
166
|
+
reference = Reference(
|
|
167
|
+
output=Output(text=row["text"], multimedia_content=MultimediaObject([image_object])),
|
|
168
|
+
tags=[CORRECT_TAG],
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
reference = Reference(
|
|
172
|
+
output=Output(multimedia_content=MultimediaObject([image_object])), tags=[CORRECT_TAG]
|
|
173
|
+
)
|
|
174
|
+
references: List[Reference] = [reference]
|
|
175
|
+
|
|
176
|
+
# 5.b Create the reference containing the assets
|
|
177
|
+
if len(row["assets_paths"]) > 0:
|
|
178
|
+
assets_paths_reference: Reference = Reference(
|
|
179
|
+
output=Output(
|
|
180
|
+
text=", ".join(
|
|
181
|
+
row["assets_paths"]
|
|
182
|
+
), # TODO: This is for debugging purposes (to show in the frontend)
|
|
183
|
+
multimedia_content=MultimediaObject(
|
|
184
|
+
[
|
|
185
|
+
MediaObject(location=asset, content_type=f"image/{asset.split('.')[-1].lower()}")
|
|
186
|
+
for asset in row["assets_paths"]
|
|
187
|
+
]
|
|
188
|
+
),
|
|
189
|
+
),
|
|
190
|
+
tags=[ASSET_PATH_TAG],
|
|
191
|
+
)
|
|
192
|
+
references.append(assets_paths_reference)
|
|
193
|
+
assets_names_reference: Reference = Reference(
|
|
194
|
+
output=Output(
|
|
195
|
+
text=", ".join(
|
|
196
|
+
row["assets_names"]
|
|
197
|
+
), # TODO: This is for debugging purposes (to show in the frontend)
|
|
198
|
+
multimedia_content=MultimediaObject(
|
|
199
|
+
[MediaObject(text=asset, content_type="text/plain") for asset in row["assets_names"]]
|
|
200
|
+
),
|
|
201
|
+
),
|
|
202
|
+
tags=[ASSET_NAME_TAG],
|
|
203
|
+
)
|
|
204
|
+
references.append(assets_names_reference)
|
|
205
|
+
|
|
206
|
+
# Step 6: Finalize the Instance
|
|
207
|
+
self.finalize(row)
|
|
208
|
+
instance = Instance(
|
|
209
|
+
input=Input(multimedia_content=MultimediaObject(content)), references=references, split=self._split
|
|
210
|
+
)
|
|
211
|
+
instances.append(instance)
|
|
212
|
+
|
|
213
|
+
assert len(instances) > 0, f"No instances found for subject {self._subset}"
|
|
214
|
+
return instances
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from helm.benchmark.scenarios.scenario import VALID_SPLIT
|
|
2
|
+
from helm.benchmark.scenarios.vision_language.image2structure.utils_latex import (
|
|
3
|
+
latex_to_image,
|
|
4
|
+
strip_unnecessary_latex_parts,
|
|
5
|
+
)
|
|
6
|
+
from helm.benchmark.scenarios.vision_language.image2structure.image2structure_scenario import Image2StructureScenario
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LatexScenario(Image2StructureScenario):
|
|
10
|
+
BASE_PROMPT = "Please provide the LaTeX code used to generate this image. Only generate the code relevant to what you see. Your code will be surrounded by all the imports necessary as well as the begin and end document delimiters." # noqa: E501
|
|
11
|
+
HUGGINGFACE_DATASET_NAME = "stanford-crfm/i2s-latex"
|
|
12
|
+
SUBSETS = ["equation", "table", "plot", "algorithm"]
|
|
13
|
+
|
|
14
|
+
name = "image2latex"
|
|
15
|
+
description = "Evaluate multimodal models on Latex generation to recreate a provided image"
|
|
16
|
+
|
|
17
|
+
def __init__(self, subset: str, recompile_prompt: bool = True, split: str = VALID_SPLIT):
|
|
18
|
+
super().__init__(subset, recompile_prompt, split)
|
|
19
|
+
|
|
20
|
+
def compile_and_save(self, structure: str, assets_path: str, destination_path: str) -> str:
|
|
21
|
+
image, infos = latex_to_image(structure, assets_path=assets_path, crop=True)
|
|
22
|
+
image.save(destination_path)
|
|
23
|
+
assert "latex_code" in infos
|
|
24
|
+
text: str = strip_unnecessary_latex_parts(infos["latex_code"])
|
|
25
|
+
return text
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from helm.benchmark.scenarios.scenario import VALID_SPLIT
|
|
2
|
+
from helm.benchmark.scenarios.vision_language.image2structure.image2structure_scenario import Image2StructureScenario
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class MusicSheetScenario(Image2StructureScenario):
|
|
6
|
+
BASE_PROMPT = (
|
|
7
|
+
"Please generate the Lilypond code to generate a music sheet that looks like this image as much as feasible possible.\n" # noqa: E501
|
|
8
|
+
"This music sheet was created by me, and I would like to recreate it using Lilypond."
|
|
9
|
+
)
|
|
10
|
+
HUGGINGFACE_DATASET_NAME = "stanford-crfm/i2s-musicsheet"
|
|
11
|
+
SUBSETS = ["music"]
|
|
12
|
+
|
|
13
|
+
name = "image2musicsheet"
|
|
14
|
+
description = "Evaluate multimodal models on Lilypond generation to recreate a provided image"
|
|
15
|
+
|
|
16
|
+
def __init__(self, subset: str, recompile_prompt: bool = True, split: str = VALID_SPLIT):
|
|
17
|
+
super().__init__(subset, recompile_prompt, split)
|
|
18
|
+
|
|
19
|
+
def compile_and_save(self, structure: str, assets_path: str, destination_path: str) -> str:
|
|
20
|
+
raise Exception("Music sheets have no ground truth, compilation is not possible")
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
from typing import Optional, Tuple, List, Dict, Any
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from helm.common.optional_dependencies import handle_module_not_found_error, OptionalDependencyNotInstalled
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from latex import build_pdf
|
|
11
|
+
from pdf2image import convert_from_bytes
|
|
12
|
+
from PIL import ImageOps
|
|
13
|
+
from PIL.Image import Image
|
|
14
|
+
except ModuleNotFoundError as e:
|
|
15
|
+
handle_module_not_found_error(e, suggestions=["image2structure"])
|
|
16
|
+
|
|
17
|
+
# LaTeX preamble
|
|
18
|
+
# Make sure to install "latex-full".
|
|
19
|
+
TEX_INCLUDES = r"""
|
|
20
|
+
\usepackage{amsmath,amssymb,amsfonts}
|
|
21
|
+
\usepackage{graphicx}
|
|
22
|
+
\usepackage{graphicx}
|
|
23
|
+
\usepackage{amsmath}
|
|
24
|
+
\usepackage{xcolor}
|
|
25
|
+
\usepackage{algorithm}
|
|
26
|
+
\usepackage{algorithmicx}
|
|
27
|
+
\usepackage{algpseudocode}
|
|
28
|
+
\usepackage{listings}
|
|
29
|
+
\usepackage{stfloats}
|
|
30
|
+
\usepackage{epstopdf}
|
|
31
|
+
\usepackage{pgfplots}
|
|
32
|
+
\usepackage{tikz}
|
|
33
|
+
\usepackage{tikz-cd}
|
|
34
|
+
\usepackage{tikz-qtree}
|
|
35
|
+
\usepackage{tikz-dependency}
|
|
36
|
+
\usepackage{tikz-3dplot}
|
|
37
|
+
\usepackage{tikz-network}
|
|
38
|
+
\usepackage[flushleft]{threeparttable}
|
|
39
|
+
\usepackage{adjustbox}
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
# LaTeX delimiters
|
|
43
|
+
TEX_BEGIN_FILE = r"""\documentclass{article}"""
|
|
44
|
+
TEX_BEGIN_DOCUMENT = r"""\begin{document}"""
|
|
45
|
+
TEX_END_DOCUMENT = r"""\end{document}"""
|
|
46
|
+
|
|
47
|
+
# Number of times to try to fix the LaTeX code
|
|
48
|
+
MAX_NUM_TRIES: int = 3
|
|
49
|
+
|
|
50
|
+
TEX_BEGIN_DOCUMENT = r"""\begin{document}"""
|
|
51
|
+
TEX_END_DOCUMENT = r"""\end{document}"""
|
|
52
|
+
|
|
53
|
+
TEX_REPLACE_NUMBERING: List[Tuple[str, str]] = [
|
|
54
|
+
("{equation}", "{equation*}"),
|
|
55
|
+
("{align}", "{align*}"),
|
|
56
|
+
("{alignat}", "{alignat*}"),
|
|
57
|
+
("{gather}", "{gather*}"),
|
|
58
|
+
("{flalign}", "{flalign*}"),
|
|
59
|
+
("{multline}", "{multline*}"),
|
|
60
|
+
("{eqnarray}", "{eqnarray*}"),
|
|
61
|
+
("{subeqnarray}", "{subeqnarray*}"),
|
|
62
|
+
("{multline}", "{multline*}"),
|
|
63
|
+
("{aligneq}", "{aligneq*}"),
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def latex_to_pdf(latex_code: str, assets_path: str) -> io.BytesIO:
|
|
68
|
+
# Compiling LaTeX code to PDF
|
|
69
|
+
path = os.path.join(os.path.abspath(os.path.dirname(__file__)), assets_path)
|
|
70
|
+
pdf = build_pdf(latex_code, texinputs=[path, ""])
|
|
71
|
+
return io.BytesIO(pdf.data) # Convert PDF to a byte stream
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def pdf_to_image(
|
|
75
|
+
pdf_stream: io.BytesIO,
|
|
76
|
+
crop: bool = False,
|
|
77
|
+
resize_to: Optional[Tuple[int, int]] = None,
|
|
78
|
+
) -> Image:
|
|
79
|
+
# Convert the first page of the PDF stream to an image
|
|
80
|
+
images = convert_from_bytes(pdf_stream.read(), first_page=1, last_page=1)
|
|
81
|
+
if images:
|
|
82
|
+
image = images[0]
|
|
83
|
+
|
|
84
|
+
# Removes the white border around the image
|
|
85
|
+
if crop:
|
|
86
|
+
(w, h) = image.size
|
|
87
|
+
image = image.crop((0, 0, w, h - int(h * 0.2))) # Remove pagination
|
|
88
|
+
image = image.crop(ImageOps.invert(image).getbbox()) # Remove white border
|
|
89
|
+
|
|
90
|
+
# Resize the image
|
|
91
|
+
if resize_to:
|
|
92
|
+
image = image.resize(resize_to)
|
|
93
|
+
|
|
94
|
+
return image
|
|
95
|
+
else:
|
|
96
|
+
raise Exception("PDF to Image conversion failed")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def strip_unnecessary_latex_parts(latex_code: str) -> str:
|
|
100
|
+
"""Strip unnecessary parts of the LaTeX code."""
|
|
101
|
+
|
|
102
|
+
# Remove comments
|
|
103
|
+
minimal_latex_code = re.sub(r"%.*?\n", "\n", latex_code)
|
|
104
|
+
|
|
105
|
+
# Remove \documentclass and any \usepackage lines
|
|
106
|
+
minimal_latex_code = re.sub(r"\\documentclass\{.*?\}\n", "", latex_code)
|
|
107
|
+
minimal_latex_code = re.sub(r"\\usepackage(\[.*?\])?\{.*?\}\n", "", minimal_latex_code)
|
|
108
|
+
|
|
109
|
+
# Remove everything before \begin{document} and including it, and everything after \end{document}
|
|
110
|
+
minimal_latex_code = re.sub(r"\\begin\{document\}\n*", "", minimal_latex_code, flags=re.DOTALL)
|
|
111
|
+
minimal_latex_code = re.sub(r"\\end\{document\}.*", "", minimal_latex_code, flags=re.DOTALL)
|
|
112
|
+
|
|
113
|
+
# Ensure \begin{...} is followed by a \n
|
|
114
|
+
minimal_latex_code = re.sub(r"(\\begin\{.*?\}(\[.*?\])?)(?!\n)", r"\1\n", minimal_latex_code)
|
|
115
|
+
# Ensure \end{...} has a \n before it
|
|
116
|
+
minimal_latex_code = re.sub(r"(\\end\{.*?\})(?!\n)", r"\1\n", minimal_latex_code)
|
|
117
|
+
|
|
118
|
+
# Normalize space sequences to a single space globally
|
|
119
|
+
minimal_latex_code = re.sub(r" +", " ", minimal_latex_code)
|
|
120
|
+
# Replace tabs with a single space
|
|
121
|
+
minimal_latex_code = re.sub(r"\t", " ", minimal_latex_code)
|
|
122
|
+
# Remove leading and trailing spaces on each line
|
|
123
|
+
minimal_latex_code = re.sub(r"^[ \t]+|[ \t]+$", "", minimal_latex_code, flags=re.MULTILINE)
|
|
124
|
+
# Remove unnecessary whitespace - multiple empty lines and tabulations
|
|
125
|
+
minimal_latex_code = re.sub(r"\n\s*\n", "\n", minimal_latex_code)
|
|
126
|
+
|
|
127
|
+
return minimal_latex_code.strip()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def handle_latex_error(
|
|
131
|
+
e: Exception,
|
|
132
|
+
original_latex_code: str,
|
|
133
|
+
assets_path: str,
|
|
134
|
+
crop: bool,
|
|
135
|
+
resize_to: Optional[Tuple[int, int]],
|
|
136
|
+
num_try_remaining: int,
|
|
137
|
+
) -> Tuple[Image, Dict[str, Any]]:
|
|
138
|
+
# Check for error that are caused by the original LaTeX code itself
|
|
139
|
+
# and should not be fixed by trying again with a different code
|
|
140
|
+
# TODO #2346: Make this list more exhaustive
|
|
141
|
+
str_e: str = str(e).replace("\n", "")
|
|
142
|
+
# Source of the descriptions:
|
|
143
|
+
# - https://www.overleaf.com/learn/latex/Errors
|
|
144
|
+
# - https://tex.stackexchange.com/
|
|
145
|
+
for error_message in [
|
|
146
|
+
# This error occurs when LaTeX encounters an undefined control sequence
|
|
147
|
+
# Example: \blabla
|
|
148
|
+
r"""Undefined control sequence""",
|
|
149
|
+
# This error appears when you have forgotten to include an \item command.
|
|
150
|
+
# It can also appear from trying to use lists inside a table incorrectly.
|
|
151
|
+
# Example:
|
|
152
|
+
# \begin{itemize}
|
|
153
|
+
# First item without the \item command
|
|
154
|
+
# \end{itemize}
|
|
155
|
+
r"""LaTeX Error: Lonely \item--perhaps a missing list environment.""",
|
|
156
|
+
# This error occurs when a { or } is missing.
|
|
157
|
+
# Example: \sum_{i=1 ^n
|
|
158
|
+
r"""Missing } inserted""",
|
|
159
|
+
r"""Missing { inserted""",
|
|
160
|
+
# This error occurs when LaTeX encounters a double subscript.
|
|
161
|
+
# Example: a_b_c
|
|
162
|
+
r"""Double subscript.""",
|
|
163
|
+
# This error occurs when an environment or $ is added around something that cannot be typeset
|
|
164
|
+
# in the given mode.
|
|
165
|
+
# Example:
|
|
166
|
+
# $
|
|
167
|
+
# \begin{table}
|
|
168
|
+
# ...
|
|
169
|
+
# \end{table}
|
|
170
|
+
# $
|
|
171
|
+
r"""LaTeX Error: Not in outer par mode.""",
|
|
172
|
+
# This error occurs when LaTeX is typesetting a table and detects
|
|
173
|
+
# an alignment character ( & ) where it did not expect to find one
|
|
174
|
+
r"""Extra alignment tab has been changed to \cr.""",
|
|
175
|
+
# Missing control sequence othen than $ (which is handled elsewhere).
|
|
176
|
+
# Example: \left( without
|
|
177
|
+
"Missing \\",
|
|
178
|
+
# LaTeX Error: \begin{<env>} on input line <line> ended by \end{<diff_env>}
|
|
179
|
+
# This error occurs when LaTeX encounters an environment that is not properly closed.
|
|
180
|
+
# Example:
|
|
181
|
+
# \begin{table}
|
|
182
|
+
# ...
|
|
183
|
+
# \end{document}
|
|
184
|
+
r"""LaTeX Error: \begin{""",
|
|
185
|
+
# This error occurs when LaTeX encounters a \noalign command in the wrong place.
|
|
186
|
+
# Example:
|
|
187
|
+
# \begin{tabular}
|
|
188
|
+
# \noalign{\hrule}
|
|
189
|
+
# ...
|
|
190
|
+
# \end{tabular}
|
|
191
|
+
r"""Misplaced \noalign""",
|
|
192
|
+
# LaTeX Error: Command <command> already defined.
|
|
193
|
+
# This errors occurs when two packages define the same command.
|
|
194
|
+
# We cannot fix this as we would have to try to find the conflicting packages.
|
|
195
|
+
# Example:
|
|
196
|
+
# \usepackage{algorithmic}
|
|
197
|
+
# \usepackage{algorithmicx}
|
|
198
|
+
r""" already defined.""",
|
|
199
|
+
]:
|
|
200
|
+
if error_message in str_e:
|
|
201
|
+
raise RuntimeError(str(e)) from e
|
|
202
|
+
|
|
203
|
+
if num_try_remaining > 0:
|
|
204
|
+
# Check if the error is easily fixable
|
|
205
|
+
fixed_code: str = original_latex_code
|
|
206
|
+
|
|
207
|
+
# Equation not in math mode
|
|
208
|
+
# We correct this error as the prompt might not be obvious if the output should be:
|
|
209
|
+
# <EQUATION_CODE> or $<EQUATION_CODE>$.
|
|
210
|
+
# We only handle this cas and that is why we add the $ at the beginning and end of the equation.
|
|
211
|
+
# The missing $ might come from elsewhere but then, it is a problem of the generated code,
|
|
212
|
+
# and not some unclear instructions, so we do not handle it.
|
|
213
|
+
# Error format: "Missing $ inserted" or "<command> allowed only in math mode"
|
|
214
|
+
if "Missing $ inserted" in str(e) or " allowed only in math mode" in str_e:
|
|
215
|
+
# Only wrap the content after \begin{document} and before \end{document}
|
|
216
|
+
fixed_code = re.sub(
|
|
217
|
+
r"(?<=\\begin{document})(.*?)(?=\\end{document})",
|
|
218
|
+
r"$$\1$$",
|
|
219
|
+
fixed_code,
|
|
220
|
+
flags=re.DOTALL,
|
|
221
|
+
) # Use \begin{equation} instead of $ to avoid inline mode
|
|
222
|
+
|
|
223
|
+
# Missing include
|
|
224
|
+
# Missing includes are tolerated as the prompt suggests that it is not necessary to include them,
|
|
225
|
+
# and our TEX_INCLUDES might lack some packages.
|
|
226
|
+
# Error format: "LaTeX Error: Environment <env> undefined."
|
|
227
|
+
undefined_search = re.search(r"LaTeX Error: Environment (.*) undefined", str_e)
|
|
228
|
+
if undefined_search:
|
|
229
|
+
# If a package is missing and this is our first retry, then simply include TEX_INCLUDES
|
|
230
|
+
if num_try_remaining == MAX_NUM_TRIES:
|
|
231
|
+
fixed_code = fixed_code.replace(TEX_BEGIN_FILE, TEX_BEGIN_FILE + "\n" + TEX_INCLUDES + "\n")
|
|
232
|
+
if num_try_remaining < MAX_NUM_TRIES or fixed_code == original_latex_code:
|
|
233
|
+
# Here we try to manually solve the missing environment.
|
|
234
|
+
# This is either executed on the second rety or the first if no changements
|
|
235
|
+
# were made in the first retry.
|
|
236
|
+
assert TEX_INCLUDES in fixed_code, "TEX_INCLUDES should be present in the code"
|
|
237
|
+
# TEX_INCLUDES is already present, so we add the missing package
|
|
238
|
+
# Since we cannot know the name of the package that contains the missing environment,
|
|
239
|
+
# we simply hope that they are named the same way.
|
|
240
|
+
env_undefined: str = undefined_search.group(1)
|
|
241
|
+
|
|
242
|
+
if f"\\usepackage{{{env_undefined}}}" in fixed_code:
|
|
243
|
+
# We already tried to include the missing package, but it probably
|
|
244
|
+
# does not exist, so we raise an error
|
|
245
|
+
raise RuntimeError(str(e)) from e
|
|
246
|
+
|
|
247
|
+
fixed_code = fixed_code.replace(TEX_BEGIN_FILE, TEX_BEGIN_FILE + f"\n\\usepackage{{{env_undefined}}}\n")
|
|
248
|
+
|
|
249
|
+
# Try again with the fixed code (if the fixed code is different from the original code)
|
|
250
|
+
if fixed_code != original_latex_code:
|
|
251
|
+
return latex_to_image(
|
|
252
|
+
fixed_code,
|
|
253
|
+
assets_path=assets_path,
|
|
254
|
+
crop=crop,
|
|
255
|
+
resize_to=resize_to,
|
|
256
|
+
num_try_remaining=num_try_remaining - 1,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# TODO #2346: Ideally we should never reach this point
|
|
260
|
+
# All errors should be either detected as:
|
|
261
|
+
# - generation error: should not be fixed and raised
|
|
262
|
+
# - easily fixable: should be fixed and tried again
|
|
263
|
+
# If we reach this point, it means that none of the above cases were detected.
|
|
264
|
+
raise RuntimeError(str(e)) from e
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def latex_to_image(
|
|
268
|
+
original_latex_code: str,
|
|
269
|
+
assets_path: str,
|
|
270
|
+
crop: bool = False,
|
|
271
|
+
resize_to: Optional[Tuple[int, int]] = None,
|
|
272
|
+
num_try_remaining: int = MAX_NUM_TRIES,
|
|
273
|
+
) -> Tuple[Image, Dict[str, Any]]:
|
|
274
|
+
"""Convert a LaTeX code to an image.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
original_latex_code (str): The LaTeX code to convert to an image.
|
|
278
|
+
assets_path (str): The path to the assets.
|
|
279
|
+
crop (bool, optional): Whether to crop the image. Defaults to False.
|
|
280
|
+
resize_to (Optional[Tuple[int, int]], optional): The size to resize the image to. Defaults to None.
|
|
281
|
+
num_try_remaining (int, optional): The number of tries remaining. Defaults to MAX_NUM_TRIES.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
image (Image): The image of the LaTeX code.
|
|
285
|
+
infos (Dict[str, Any]): a dictionnary containing:
|
|
286
|
+
size (Tuple[int, int]): The size of the image.
|
|
287
|
+
latex_code (str): The modified LaTeX code that was successfully compiled.
|
|
288
|
+
|
|
289
|
+
Raises:
|
|
290
|
+
OptionalDependencyNotInstalled: If LaTeX is not installed.
|
|
291
|
+
RuntimeError: If the LaTeX code cannot be converted to an image.
|
|
292
|
+
"""
|
|
293
|
+
# Basic LaTeX processing
|
|
294
|
+
# This changes cannot break the original LaTeX code
|
|
295
|
+
# Other processing will be done in the handle_latex_error function
|
|
296
|
+
# but these might break the original LaTeX code so they are only applied
|
|
297
|
+
# if the original LaTeX code does not compile.
|
|
298
|
+
|
|
299
|
+
# 0. Remove all environments that might cause numbering
|
|
300
|
+
# This is important because the numbering of the equations might change
|
|
301
|
+
# the bounding box of the image.
|
|
302
|
+
for replace in TEX_REPLACE_NUMBERING:
|
|
303
|
+
original_latex_code = original_latex_code.replace(replace[0], replace[1])
|
|
304
|
+
# Also removes all \label commands
|
|
305
|
+
# If it is followed by a \n, it should be removed as well
|
|
306
|
+
original_latex_code = re.sub(r"\\label\{.*?\}[\t ]*(\n)?", "", original_latex_code)
|
|
307
|
+
|
|
308
|
+
# 1. Add begin/end document if not present
|
|
309
|
+
if TEX_BEGIN_DOCUMENT not in original_latex_code and TEX_BEGIN_FILE not in original_latex_code:
|
|
310
|
+
original_latex_code = TEX_BEGIN_DOCUMENT + original_latex_code
|
|
311
|
+
if TEX_END_DOCUMENT not in original_latex_code:
|
|
312
|
+
original_latex_code = original_latex_code + TEX_END_DOCUMENT
|
|
313
|
+
|
|
314
|
+
# 2. Add preamble
|
|
315
|
+
# 2.1. Remove \documentclass if present to make sure we use our own
|
|
316
|
+
documentclass_search = re.search(r"\\documentclass\{(.*)\}", original_latex_code)
|
|
317
|
+
if documentclass_search:
|
|
318
|
+
documentclass: str = documentclass_search.group(1)
|
|
319
|
+
original_latex_code = original_latex_code.replace(f"\\documentclass{{{documentclass}}}", TEX_BEGIN_FILE)
|
|
320
|
+
else:
|
|
321
|
+
# If there is no \documentclass, we add our own
|
|
322
|
+
original_latex_code = TEX_BEGIN_FILE + "\n\n" + original_latex_code
|
|
323
|
+
|
|
324
|
+
# 2.2. Add includes. In this first step, we only add includes if none are present.
|
|
325
|
+
# We do this because if some are present, we might define them twice which can cause errors
|
|
326
|
+
# and this section should not make the original LaTeX code fail if it was compilable.
|
|
327
|
+
# If there are missing packages, in handle_latex_error, we will add TEX_INCLUDES after the begin document,
|
|
328
|
+
# which might define some packages twice, but often solves the problem.
|
|
329
|
+
if not re.search(r"\\usepackage\{.*\}", original_latex_code):
|
|
330
|
+
original_latex_code = original_latex_code.replace(TEX_BEGIN_FILE, TEX_BEGIN_FILE + "\n" + TEX_INCLUDES + "\n")
|
|
331
|
+
|
|
332
|
+
latex_code: str = original_latex_code
|
|
333
|
+
try:
|
|
334
|
+
pdf_stream = latex_to_pdf(latex_code, assets_path=assets_path)
|
|
335
|
+
image = pdf_to_image(pdf_stream, crop=crop, resize_to=resize_to)
|
|
336
|
+
return image, {"image_size": image.size, "latex_code": latex_code}
|
|
337
|
+
except RuntimeError as e:
|
|
338
|
+
if str(e) == "No available builder could be instantiated. Please make sure LaTeX is installed.":
|
|
339
|
+
raise OptionalDependencyNotInstalled(
|
|
340
|
+
"Optional dependency LaTeX is not installed. "
|
|
341
|
+
"Please install LaTeX and make sure it is available in your PATH."
|
|
342
|
+
"You can install LaTeX on Ubuntu with `sudo apt-get install texlive-full`."
|
|
343
|
+
) from e
|
|
344
|
+
else:
|
|
345
|
+
return handle_latex_error(e, original_latex_code, assets_path, crop, resize_to, num_try_remaining)
|
|
346
|
+
except Exception as e:
|
|
347
|
+
return handle_latex_error(e, original_latex_code, assets_path, crop, resize_to, num_try_remaining)
|
|
File without changes
|