crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (499) hide show
  1. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +138 -31
  2. crfm_helm-0.5.1.dist-info/RECORD +654 -0
  3. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +31 -3
  5. helm/benchmark/adaptation/adapters/adapter.py +2 -2
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
  9. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
  10. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  11. helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
  12. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  13. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
  14. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
  15. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  16. helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
  17. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
  18. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
  19. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  20. helm/benchmark/adaptation/request_state.py +6 -1
  21. helm/benchmark/adaptation/scenario_state.py +6 -2
  22. helm/benchmark/annotation/annotator.py +43 -0
  23. helm/benchmark/annotation/annotator_factory.py +61 -0
  24. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  25. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  26. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  27. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  28. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  29. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  30. helm/benchmark/annotation_executor.py +124 -0
  31. helm/benchmark/augmentations/data_augmenter.py +0 -2
  32. helm/benchmark/augmentations/gender_perturbation.py +1 -1
  33. helm/benchmark/augmentations/perturbation.py +25 -3
  34. helm/benchmark/augmentations/perturbation_description.py +1 -1
  35. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  36. helm/benchmark/augmentations/test_perturbation.py +41 -7
  37. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  38. helm/benchmark/config_registry.py +7 -1
  39. helm/benchmark/executor.py +46 -16
  40. helm/benchmark/huggingface_registration.py +20 -7
  41. helm/benchmark/metrics/basic_metrics.py +169 -664
  42. helm/benchmark/metrics/bbq_metrics.py +3 -4
  43. helm/benchmark/metrics/bias_metrics.py +6 -6
  44. helm/benchmark/metrics/classification_metrics.py +11 -8
  45. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  46. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  47. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  48. helm/benchmark/metrics/common_metric_specs.py +167 -0
  49. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  50. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  51. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  52. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  53. helm/benchmark/metrics/disinformation_metrics.py +4 -110
  54. helm/benchmark/metrics/dry_run_metrics.py +2 -2
  55. helm/benchmark/metrics/efficiency_metrics.py +213 -0
  56. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  57. helm/benchmark/metrics/evaluate_reference_metrics.py +392 -0
  58. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  59. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  60. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  61. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  62. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  63. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  64. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  65. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  66. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  67. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  68. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  69. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  70. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  71. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  72. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  73. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  74. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  75. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  76. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  77. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  78. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  79. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  80. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  81. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  82. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  83. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  84. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  85. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  86. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  87. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  88. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  89. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  90. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  91. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  92. helm/benchmark/metrics/machine_translation_metrics.py +89 -0
  93. helm/benchmark/metrics/metric.py +93 -172
  94. helm/benchmark/metrics/metric_name.py +0 -1
  95. helm/benchmark/metrics/metric_service.py +16 -0
  96. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  97. helm/benchmark/metrics/ranking_metrics.py +2 -2
  98. helm/benchmark/metrics/reference_metric.py +148 -0
  99. helm/benchmark/metrics/summac/model_summac.py +0 -2
  100. helm/benchmark/metrics/summarization_metrics.py +2 -2
  101. helm/benchmark/metrics/test_classification_metrics.py +8 -5
  102. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  103. helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
  104. helm/benchmark/metrics/test_metric.py +2 -2
  105. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
  106. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  107. helm/benchmark/metrics/toxicity_utils.py +23 -0
  108. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  109. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  110. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  111. helm/benchmark/metrics/vision_language/image_metrics.py +575 -0
  112. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  113. helm/benchmark/model_deployment_registry.py +74 -0
  114. helm/benchmark/model_metadata_registry.py +41 -1
  115. helm/benchmark/multi_gpu_runner.py +133 -0
  116. helm/benchmark/presentation/create_plots.py +8 -7
  117. helm/benchmark/presentation/run_display.py +26 -10
  118. helm/benchmark/presentation/schema.py +15 -40
  119. helm/benchmark/presentation/summarize.py +119 -79
  120. helm/benchmark/presentation/table.py +8 -8
  121. helm/benchmark/presentation/test_contamination.py +2 -2
  122. helm/benchmark/presentation/test_run_entry.py +1 -2
  123. helm/benchmark/presentation/test_summarize.py +3 -3
  124. helm/benchmark/run.py +54 -26
  125. helm/benchmark/run_expander.py +205 -35
  126. helm/benchmark/run_spec.py +93 -0
  127. helm/benchmark/run_spec_factory.py +163 -0
  128. helm/benchmark/run_specs/__init__.py +0 -0
  129. helm/benchmark/run_specs/classic_run_specs.py +1510 -0
  130. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  131. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  132. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  133. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  134. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  135. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  136. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  137. helm/benchmark/run_specs/vlm_run_specs.py +757 -0
  138. helm/benchmark/runner.py +51 -57
  139. helm/benchmark/runner_config_registry.py +21 -0
  140. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  141. helm/benchmark/scenarios/bold_scenario.py +2 -2
  142. helm/benchmark/scenarios/code_scenario.py +1 -0
  143. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  144. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  145. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  146. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  147. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  148. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  149. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  150. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  151. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  152. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  153. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  154. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  155. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  156. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  157. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  158. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  159. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  160. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  161. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  162. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  163. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  164. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  165. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  166. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  167. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  168. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  169. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  170. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  171. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  172. helm/benchmark/scenarios/legalbench_scenario.py +6 -2
  173. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  174. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  175. helm/benchmark/scenarios/math_scenario.py +19 -2
  176. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  177. helm/benchmark/scenarios/numeracy_scenario.py +1 -1
  178. helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
  179. helm/benchmark/scenarios/scenario.py +4 -0
  180. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  181. helm/benchmark/scenarios/test_math_scenario.py +6 -0
  182. helm/benchmark/scenarios/test_scenario.py +6 -3
  183. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  184. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  185. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  186. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  187. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  188. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
  189. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  190. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
  191. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
  192. helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
  193. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +94 -0
  194. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  195. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  196. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  197. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  198. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  199. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  200. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  201. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  202. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  203. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  204. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  205. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  206. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
  207. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  208. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
  209. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  210. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  211. helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
  212. helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
  213. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  214. helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
  215. helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
  216. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  217. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  218. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  219. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +3 -4
  220. helm/benchmark/scenarios/vision_language/vqa_scenario.py +5 -3
  221. helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
  222. helm/benchmark/server.py +24 -1
  223. helm/benchmark/slurm_runner.py +70 -49
  224. helm/benchmark/static/benchmarking.js +1 -1
  225. helm/benchmark/static/schema_classic.yaml +258 -1066
  226. helm/benchmark/static/schema_image2structure.yaml +304 -0
  227. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  228. helm/benchmark/static/schema_lite.yaml +2 -227
  229. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  230. helm/benchmark/static/schema_unitxt.yaml +428 -0
  231. helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
  232. helm/benchmark/static/schema_vlm.yaml +823 -0
  233. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  234. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  235. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  236. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  237. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  238. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  239. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  240. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  241. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  242. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  243. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  244. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  245. helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
  246. helm/benchmark/static_build/assets/index-878a1094.css +1 -0
  247. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  248. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  249. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  250. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  251. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  252. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  253. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  254. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  255. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  256. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  257. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  258. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  259. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  260. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  261. helm/benchmark/static_build/config.js +4 -0
  262. helm/benchmark/static_build/index.html +20 -0
  263. helm/benchmark/test_data_preprocessor.py +3 -3
  264. helm/benchmark/test_run_expander.py +1 -1
  265. helm/benchmark/window_services/ai21_window_service.py +22 -33
  266. helm/benchmark/window_services/cohere_window_service.py +1 -63
  267. helm/benchmark/window_services/default_window_service.py +2 -44
  268. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  269. helm/benchmark/window_services/ice_window_service.py +0 -34
  270. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  271. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  272. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  273. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  274. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  275. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  276. helm/benchmark/window_services/local_window_service.py +21 -4
  277. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  278. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  279. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  280. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  281. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  282. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  283. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  284. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  285. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  286. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  287. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  288. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  289. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  290. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  291. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  292. helm/benchmark/window_services/test_utils.py +3 -2
  293. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  294. helm/benchmark/window_services/window_service.py +42 -0
  295. helm/benchmark/window_services/window_service_factory.py +4 -1
  296. helm/benchmark/window_services/yalm_window_service.py +0 -27
  297. helm/clients/__init__.py +0 -0
  298. helm/{proxy/clients → clients}/ai21_client.py +3 -9
  299. helm/clients/aleph_alpha_client.py +112 -0
  300. helm/{proxy/clients → clients}/anthropic_client.py +233 -18
  301. helm/{proxy/clients → clients}/auto_client.py +59 -31
  302. helm/clients/bedrock_client.py +128 -0
  303. helm/clients/bedrock_utils.py +72 -0
  304. helm/{proxy/clients → clients}/client.py +65 -7
  305. helm/clients/clip_score_client.py +49 -0
  306. helm/clients/clip_scorers/__init__.py +0 -0
  307. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  308. helm/clients/clip_scorers/clip_scorer.py +50 -0
  309. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  310. helm/{proxy/clients → clients}/cohere_client.py +4 -11
  311. helm/clients/gcs_client.py +82 -0
  312. helm/{proxy/clients → clients}/google_client.py +5 -5
  313. helm/clients/google_translate_client.py +35 -0
  314. helm/{proxy/clients → clients}/http_model_client.py +5 -7
  315. helm/{proxy/clients → clients}/huggingface_client.py +43 -64
  316. helm/clients/image_generation/__init__.py +0 -0
  317. helm/clients/image_generation/adobe_vision_client.py +78 -0
  318. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  319. helm/clients/image_generation/cogview2/__init__.py +0 -0
  320. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  321. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  322. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  323. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  324. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  325. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  326. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  327. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  328. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  329. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  330. helm/clients/image_generation/cogview2_client.py +191 -0
  331. helm/clients/image_generation/dalle2_client.py +192 -0
  332. helm/clients/image_generation/dalle3_client.py +108 -0
  333. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  334. helm/clients/image_generation/dalle_mini/data.py +442 -0
  335. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  336. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  337. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  338. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  339. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  340. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  341. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  342. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  343. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  344. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  345. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  346. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  347. helm/clients/image_generation/dalle_mini_client.py +190 -0
  348. helm/clients/image_generation/deep_floyd_client.py +78 -0
  349. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  350. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  351. helm/clients/image_generation/lexica_client.py +86 -0
  352. helm/clients/image_generation/mindalle/__init__.py +0 -0
  353. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  354. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  355. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  356. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  357. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  358. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  359. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  360. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  361. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  362. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  363. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  364. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  365. helm/clients/image_generation/mindalle_client.py +115 -0
  366. helm/clients/image_generation/nudity_check_client.py +64 -0
  367. helm/clients/image_generation/together_image_generation_client.py +111 -0
  368. helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
  369. helm/{proxy/clients → clients}/megatron_client.py +5 -5
  370. helm/clients/mistral_client.py +134 -0
  371. helm/clients/moderation_api_client.py +109 -0
  372. helm/clients/open_lm_client.py +43 -0
  373. helm/clients/openai_client.py +301 -0
  374. helm/{proxy/clients → clients}/palmyra_client.py +6 -8
  375. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  376. helm/clients/simple_client.py +64 -0
  377. helm/{proxy/clients → clients}/test_auto_client.py +13 -15
  378. helm/clients/test_client.py +100 -0
  379. helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
  380. helm/clients/test_simple_client.py +19 -0
  381. helm/{proxy/clients → clients}/test_together_client.py +20 -8
  382. helm/{proxy/clients → clients}/together_client.py +104 -73
  383. helm/clients/vertexai_client.py +400 -0
  384. helm/clients/vision_language/__init__.py +0 -0
  385. helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
  386. helm/clients/vision_language/huggingface_vlm_client.py +111 -0
  387. helm/{proxy/clients → clients}/vision_language/idefics_client.py +54 -49
  388. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  389. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  390. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  391. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  392. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  393. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  394. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  395. helm/clients/vision_language/open_flamingo_client.py +155 -0
  396. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  397. helm/clients/vllm_client.py +46 -0
  398. helm/common/cache.py +16 -4
  399. helm/common/cache_backend_config.py +47 -0
  400. helm/common/clip_score_request.py +41 -0
  401. helm/common/file_caches/__init__.py +0 -0
  402. helm/common/file_caches/file_cache.py +16 -0
  403. helm/common/file_caches/local_file_cache.py +61 -0
  404. helm/common/file_caches/test_local_file_cache.py +25 -0
  405. helm/common/file_upload_request.py +27 -0
  406. helm/common/general.py +1 -1
  407. helm/common/image_generation_parameters.py +25 -0
  408. helm/common/images_utils.py +33 -3
  409. helm/common/key_value_store.py +35 -4
  410. helm/common/media_object.py +13 -0
  411. helm/common/moderations_api_request.py +71 -0
  412. helm/common/mongo_key_value_store.py +3 -3
  413. helm/common/multimodal_request_utils.py +31 -0
  414. helm/common/nudity_check_request.py +29 -0
  415. helm/common/request.py +15 -17
  416. helm/common/test_general.py +6 -0
  417. helm/common/tokenization_request.py +1 -1
  418. helm/config/model_deployments.yaml +1159 -538
  419. helm/config/model_metadata.yaml +868 -41
  420. helm/config/tokenizer_configs.yaml +149 -43
  421. helm/proxy/accounts.py +31 -4
  422. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  423. helm/proxy/critique/model_critique_client.py +8 -6
  424. helm/proxy/example_queries.py +29 -17
  425. helm/proxy/server.py +70 -5
  426. helm/proxy/services/remote_service.py +31 -0
  427. helm/proxy/services/server_service.py +96 -16
  428. helm/proxy/services/service.py +30 -0
  429. helm/proxy/services/test_remote_service.py +4 -3
  430. helm/proxy/services/test_service.py +0 -12
  431. helm/proxy/test_accounts.py +32 -0
  432. helm/proxy/token_counters/auto_token_counter.py +37 -37
  433. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  434. helm/proxy/token_counters/token_counter.py +3 -5
  435. helm/tokenizers/__init__.py +0 -0
  436. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  437. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
  438. helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
  439. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  440. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  441. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
  442. helm/tokenizers/simple_tokenizer.py +33 -0
  443. helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
  444. helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
  445. helm/tokenizers/test_simple_tokenizer.py +33 -0
  446. helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
  447. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  448. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  449. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  450. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  451. crfm_helm-0.4.0.dist-info/RECORD +0 -397
  452. helm/benchmark/run_specs.py +0 -2762
  453. helm/benchmark/test_model_deployment_definition.py +0 -92
  454. helm/benchmark/test_model_properties.py +0 -1570
  455. helm/benchmark/vlm_run_specs.py +0 -97
  456. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  457. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  458. helm/benchmark/window_services/huggingface_window_service.py +0 -60
  459. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  460. helm/benchmark/window_services/t511b_window_service.py +0 -30
  461. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  462. helm/benchmark/window_services/ul2_window_service.py +0 -30
  463. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  464. helm/common/cache_utils.py +0 -14
  465. helm/proxy/clients/aleph_alpha_client.py +0 -95
  466. helm/proxy/clients/goose_ai_client.py +0 -99
  467. helm/proxy/clients/microsoft_client.py +0 -180
  468. helm/proxy/clients/openai_client.py +0 -206
  469. helm/proxy/clients/simple_client.py +0 -60
  470. helm/proxy/clients/test_client.py +0 -49
  471. helm/proxy/clients/vertexai_client.py +0 -115
  472. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  473. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  474. helm/proxy/token_counters/free_token_counter.py +0 -12
  475. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  476. helm/proxy/token_counters/openai_token_counter.py +0 -22
  477. helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
  478. helm/proxy/token_counters/test_openai_token_counter.py +0 -81
  479. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  480. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
  481. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
  482. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
  483. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  484. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  485. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  486. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  487. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  488. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  489. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  490. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  491. /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
  492. /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
  493. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  494. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  495. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  496. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  497. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  498. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  499. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -0,0 +1,347 @@
1
+ from typing import Optional, Tuple, List, Dict, Any
2
+
3
+ import io
4
+ import os
5
+ import re
6
+
7
+ from helm.common.optional_dependencies import handle_module_not_found_error, OptionalDependencyNotInstalled
8
+
9
+ try:
10
+ from latex import build_pdf
11
+ from pdf2image import convert_from_bytes
12
+ from PIL import ImageOps
13
+ from PIL.Image import Image
14
+ except ModuleNotFoundError as e:
15
+ handle_module_not_found_error(e, suggestions=["image2structure"])
16
+
17
+ # LaTeX preamble
18
+ # Make sure to install "latex-full".
19
+ TEX_INCLUDES = r"""
20
+ \usepackage{amsmath,amssymb,amsfonts}
21
+ \usepackage{graphicx}
22
+ \usepackage{graphicx}
23
+ \usepackage{amsmath}
24
+ \usepackage{xcolor}
25
+ \usepackage{algorithm}
26
+ \usepackage{algorithmicx}
27
+ \usepackage{algpseudocode}
28
+ \usepackage{listings}
29
+ \usepackage{stfloats}
30
+ \usepackage{epstopdf}
31
+ \usepackage{pgfplots}
32
+ \usepackage{tikz}
33
+ \usepackage{tikz-cd}
34
+ \usepackage{tikz-qtree}
35
+ \usepackage{tikz-dependency}
36
+ \usepackage{tikz-3dplot}
37
+ \usepackage{tikz-network}
38
+ \usepackage[flushleft]{threeparttable}
39
+ \usepackage{adjustbox}
40
+ """
41
+
42
+ # LaTeX delimiters
43
+ TEX_BEGIN_FILE = r"""\documentclass{article}"""
44
+ TEX_BEGIN_DOCUMENT = r"""\begin{document}"""
45
+ TEX_END_DOCUMENT = r"""\end{document}"""
46
+
47
+ # Number of times to try to fix the LaTeX code
48
+ MAX_NUM_TRIES: int = 3
49
+
50
+ TEX_BEGIN_DOCUMENT = r"""\begin{document}"""
51
+ TEX_END_DOCUMENT = r"""\end{document}"""
52
+
53
+ TEX_REPLACE_NUMBERING: List[Tuple[str, str]] = [
54
+ ("{equation}", "{equation*}"),
55
+ ("{align}", "{align*}"),
56
+ ("{alignat}", "{alignat*}"),
57
+ ("{gather}", "{gather*}"),
58
+ ("{flalign}", "{flalign*}"),
59
+ ("{multline}", "{multline*}"),
60
+ ("{eqnarray}", "{eqnarray*}"),
61
+ ("{subeqnarray}", "{subeqnarray*}"),
62
+ ("{multline}", "{multline*}"),
63
+ ("{aligneq}", "{aligneq*}"),
64
+ ]
65
+
66
+
67
+ def latex_to_pdf(latex_code: str, assets_path: str) -> io.BytesIO:
68
+ # Compiling LaTeX code to PDF
69
+ path = os.path.join(os.path.abspath(os.path.dirname(__file__)), assets_path)
70
+ pdf = build_pdf(latex_code, texinputs=[path, ""])
71
+ return io.BytesIO(pdf.data) # Convert PDF to a byte stream
72
+
73
+
74
+ def pdf_to_image(
75
+ pdf_stream: io.BytesIO,
76
+ crop: bool = False,
77
+ resize_to: Optional[Tuple[int, int]] = None,
78
+ ) -> Image:
79
+ # Convert the first page of the PDF stream to an image
80
+ images = convert_from_bytes(pdf_stream.read(), first_page=1, last_page=1)
81
+ if images:
82
+ image = images[0]
83
+
84
+ # Removes the white border around the image
85
+ if crop:
86
+ (w, h) = image.size
87
+ image = image.crop((0, 0, w, h - int(h * 0.2))) # Remove pagination
88
+ image = image.crop(ImageOps.invert(image).getbbox()) # Remove white border
89
+
90
+ # Resize the image
91
+ if resize_to:
92
+ image = image.resize(resize_to)
93
+
94
+ return image
95
+ else:
96
+ raise Exception("PDF to Image conversion failed")
97
+
98
+
99
+ def strip_unnecessary_latex_parts(latex_code: str) -> str:
100
+ """Strip unnecessary parts of the LaTeX code."""
101
+
102
+ # Remove comments
103
+ minimal_latex_code = re.sub(r"%.*?\n", "\n", latex_code)
104
+
105
+ # Remove \documentclass and any \usepackage lines
106
+ minimal_latex_code = re.sub(r"\\documentclass\{.*?\}\n", "", latex_code)
107
+ minimal_latex_code = re.sub(r"\\usepackage(\[.*?\])?\{.*?\}\n", "", minimal_latex_code)
108
+
109
+ # Remove everything before \begin{document} and including it, and everything after \end{document}
110
+ minimal_latex_code = re.sub(r"\\begin\{document\}\n*", "", minimal_latex_code, flags=re.DOTALL)
111
+ minimal_latex_code = re.sub(r"\\end\{document\}.*", "", minimal_latex_code, flags=re.DOTALL)
112
+
113
+ # Ensure \begin{...} is followed by a \n
114
+ minimal_latex_code = re.sub(r"(\\begin\{.*?\}(\[.*?\])?)(?!\n)", r"\1\n", minimal_latex_code)
115
+ # Ensure \end{...} has a \n before it
116
+ minimal_latex_code = re.sub(r"(\\end\{.*?\})(?!\n)", r"\1\n", minimal_latex_code)
117
+
118
+ # Normalize space sequences to a single space globally
119
+ minimal_latex_code = re.sub(r" +", " ", minimal_latex_code)
120
+ # Replace tabs with a single space
121
+ minimal_latex_code = re.sub(r"\t", " ", minimal_latex_code)
122
+ # Remove leading and trailing spaces on each line
123
+ minimal_latex_code = re.sub(r"^[ \t]+|[ \t]+$", "", minimal_latex_code, flags=re.MULTILINE)
124
+ # Remove unnecessary whitespace - multiple empty lines and tabulations
125
+ minimal_latex_code = re.sub(r"\n\s*\n", "\n", minimal_latex_code)
126
+
127
+ return minimal_latex_code.strip()
128
+
129
+
130
+ def handle_latex_error(
131
+ e: Exception,
132
+ original_latex_code: str,
133
+ assets_path: str,
134
+ crop: bool,
135
+ resize_to: Optional[Tuple[int, int]],
136
+ num_try_remaining: int,
137
+ ) -> Tuple[Image, Dict[str, Any]]:
138
+ # Check for error that are caused by the original LaTeX code itself
139
+ # and should not be fixed by trying again with a different code
140
+ # TODO #2346: Make this list more exhaustive
141
+ str_e: str = str(e).replace("\n", "")
142
+ # Source of the descriptions:
143
+ # - https://www.overleaf.com/learn/latex/Errors
144
+ # - https://tex.stackexchange.com/
145
+ for error_message in [
146
+ # This error occurs when LaTeX encounters an undefined control sequence
147
+ # Example: \blabla
148
+ r"""Undefined control sequence""",
149
+ # This error appears when you have forgotten to include an \item command.
150
+ # It can also appear from trying to use lists inside a table incorrectly.
151
+ # Example:
152
+ # \begin{itemize}
153
+ # First item without the \item command
154
+ # \end{itemize}
155
+ r"""LaTeX Error: Lonely \item--perhaps a missing list environment.""",
156
+ # This error occurs when a { or } is missing.
157
+ # Example: \sum_{i=1 ^n
158
+ r"""Missing } inserted""",
159
+ r"""Missing { inserted""",
160
+ # This error occurs when LaTeX encounters a double subscript.
161
+ # Example: a_b_c
162
+ r"""Double subscript.""",
163
+ # This error occurs when an environment or $ is added around something that cannot be typeset
164
+ # in the given mode.
165
+ # Example:
166
+ # $
167
+ # \begin{table}
168
+ # ...
169
+ # \end{table}
170
+ # $
171
+ r"""LaTeX Error: Not in outer par mode.""",
172
+ # This error occurs when LaTeX is typesetting a table and detects
173
+ # an alignment character ( & ) where it did not expect to find one
174
+ r"""Extra alignment tab has been changed to \cr.""",
175
+ # Missing control sequence othen than $ (which is handled elsewhere).
176
+ # Example: \left( without
177
+ "Missing \\",
178
+ # LaTeX Error: \begin{<env>} on input line <line> ended by \end{<diff_env>}
179
+ # This error occurs when LaTeX encounters an environment that is not properly closed.
180
+ # Example:
181
+ # \begin{table}
182
+ # ...
183
+ # \end{document}
184
+ r"""LaTeX Error: \begin{""",
185
+ # This error occurs when LaTeX encounters a \noalign command in the wrong place.
186
+ # Example:
187
+ # \begin{tabular}
188
+ # \noalign{\hrule}
189
+ # ...
190
+ # \end{tabular}
191
+ r"""Misplaced \noalign""",
192
+ # LaTeX Error: Command <command> already defined.
193
+ # This errors occurs when two packages define the same command.
194
+ # We cannot fix this as we would have to try to find the conflicting packages.
195
+ # Example:
196
+ # \usepackage{algorithmic}
197
+ # \usepackage{algorithmicx}
198
+ r""" already defined.""",
199
+ ]:
200
+ if error_message in str_e:
201
+ raise RuntimeError(str(e)) from e
202
+
203
+ if num_try_remaining > 0:
204
+ # Check if the error is easily fixable
205
+ fixed_code: str = original_latex_code
206
+
207
+ # Equation not in math mode
208
+ # We correct this error as the prompt might not be obvious if the output should be:
209
+ # <EQUATION_CODE> or $<EQUATION_CODE>$.
210
+ # We only handle this cas and that is why we add the $ at the beginning and end of the equation.
211
+ # The missing $ might come from elsewhere but then, it is a problem of the generated code,
212
+ # and not some unclear instructions, so we do not handle it.
213
+ # Error format: "Missing $ inserted" or "<command> allowed only in math mode"
214
+ if "Missing $ inserted" in str(e) or " allowed only in math mode" in str_e:
215
+ # Only wrap the content after \begin{document} and before \end{document}
216
+ fixed_code = re.sub(
217
+ r"(?<=\\begin{document})(.*?)(?=\\end{document})",
218
+ r"$$\1$$",
219
+ fixed_code,
220
+ flags=re.DOTALL,
221
+ ) # Use \begin{equation} instead of $ to avoid inline mode
222
+
223
+ # Missing include
224
+ # Missing includes are tolerated as the prompt suggests that it is not necessary to include them,
225
+ # and our TEX_INCLUDES might lack some packages.
226
+ # Error format: "LaTeX Error: Environment <env> undefined."
227
+ undefined_search = re.search(r"LaTeX Error: Environment (.*) undefined", str_e)
228
+ if undefined_search:
229
+ # If a package is missing and this is our first retry, then simply include TEX_INCLUDES
230
+ if num_try_remaining == MAX_NUM_TRIES:
231
+ fixed_code = fixed_code.replace(TEX_BEGIN_FILE, TEX_BEGIN_FILE + "\n" + TEX_INCLUDES + "\n")
232
+ if num_try_remaining < MAX_NUM_TRIES or fixed_code == original_latex_code:
233
+ # Here we try to manually solve the missing environment.
234
+ # This is either executed on the second rety or the first if no changements
235
+ # were made in the first retry.
236
+ assert TEX_INCLUDES in fixed_code, "TEX_INCLUDES should be present in the code"
237
+ # TEX_INCLUDES is already present, so we add the missing package
238
+ # Since we cannot know the name of the package that contains the missing environment,
239
+ # we simply hope that they are named the same way.
240
+ env_undefined: str = undefined_search.group(1)
241
+
242
+ if f"\\usepackage{{{env_undefined}}}" in fixed_code:
243
+ # We already tried to include the missing package, but it probably
244
+ # does not exist, so we raise an error
245
+ raise RuntimeError(str(e)) from e
246
+
247
+ fixed_code = fixed_code.replace(TEX_BEGIN_FILE, TEX_BEGIN_FILE + f"\n\\usepackage{{{env_undefined}}}\n")
248
+
249
+ # Try again with the fixed code (if the fixed code is different from the original code)
250
+ if fixed_code != original_latex_code:
251
+ return latex_to_image(
252
+ fixed_code,
253
+ assets_path=assets_path,
254
+ crop=crop,
255
+ resize_to=resize_to,
256
+ num_try_remaining=num_try_remaining - 1,
257
+ )
258
+
259
+ # TODO #2346: Ideally we should never reach this point
260
+ # All errors should be either detected as:
261
+ # - generation error: should not be fixed and raised
262
+ # - easily fixable: should be fixed and tried again
263
+ # If we reach this point, it means that none of the above cases were detected.
264
+ raise RuntimeError(str(e)) from e
265
+
266
+
267
+ def latex_to_image(
268
+ original_latex_code: str,
269
+ assets_path: str,
270
+ crop: bool = False,
271
+ resize_to: Optional[Tuple[int, int]] = None,
272
+ num_try_remaining: int = MAX_NUM_TRIES,
273
+ ) -> Tuple[Image, Dict[str, Any]]:
274
+ """Convert a LaTeX code to an image.
275
+
276
+ Args:
277
+ original_latex_code (str): The LaTeX code to convert to an image.
278
+ assets_path (str): The path to the assets.
279
+ crop (bool, optional): Whether to crop the image. Defaults to False.
280
+ resize_to (Optional[Tuple[int, int]], optional): The size to resize the image to. Defaults to None.
281
+ num_try_remaining (int, optional): The number of tries remaining. Defaults to MAX_NUM_TRIES.
282
+
283
+ Returns:
284
+ image (Image): The image of the LaTeX code.
285
+ infos (Dict[str, Any]): a dictionnary containing:
286
+ size (Tuple[int, int]): The size of the image.
287
+ latex_code (str): The modified LaTeX code that was successfully compiled.
288
+
289
+ Raises:
290
+ OptionalDependencyNotInstalled: If LaTeX is not installed.
291
+ RuntimeError: If the LaTeX code cannot be converted to an image.
292
+ """
293
+ # Basic LaTeX processing
294
+ # This changes cannot break the original LaTeX code
295
+ # Other processing will be done in the handle_latex_error function
296
+ # but these might break the original LaTeX code so they are only applied
297
+ # if the original LaTeX code does not compile.
298
+
299
+ # 0. Remove all environments that might cause numbering
300
+ # This is important because the numbering of the equations might change
301
+ # the bounding box of the image.
302
+ for replace in TEX_REPLACE_NUMBERING:
303
+ original_latex_code = original_latex_code.replace(replace[0], replace[1])
304
+ # Also removes all \label commands
305
+ # If it is followed by a \n, it should be removed as well
306
+ original_latex_code = re.sub(r"\\label\{.*?\}[\t ]*(\n)?", "", original_latex_code)
307
+
308
+ # 1. Add begin/end document if not present
309
+ if TEX_BEGIN_DOCUMENT not in original_latex_code and TEX_BEGIN_FILE not in original_latex_code:
310
+ original_latex_code = TEX_BEGIN_DOCUMENT + original_latex_code
311
+ if TEX_END_DOCUMENT not in original_latex_code:
312
+ original_latex_code = original_latex_code + TEX_END_DOCUMENT
313
+
314
+ # 2. Add preamble
315
+ # 2.1. Remove \documentclass if present to make sure we use our own
316
+ documentclass_search = re.search(r"\\documentclass\{(.*)\}", original_latex_code)
317
+ if documentclass_search:
318
+ documentclass: str = documentclass_search.group(1)
319
+ original_latex_code = original_latex_code.replace(f"\\documentclass{{{documentclass}}}", TEX_BEGIN_FILE)
320
+ else:
321
+ # If there is no \documentclass, we add our own
322
+ original_latex_code = TEX_BEGIN_FILE + "\n\n" + original_latex_code
323
+
324
+ # 2.2. Add includes. In this first step, we only add includes if none are present.
325
+ # We do this because if some are present, we might define them twice which can cause errors
326
+ # and this section should not make the original LaTeX code fail if it was compilable.
327
+ # If there are missing packages, in handle_latex_error, we will add TEX_INCLUDES after the begin document,
328
+ # which might define some packages twice, but often solves the problem.
329
+ if not re.search(r"\\usepackage\{.*\}", original_latex_code):
330
+ original_latex_code = original_latex_code.replace(TEX_BEGIN_FILE, TEX_BEGIN_FILE + "\n" + TEX_INCLUDES + "\n")
331
+
332
+ latex_code: str = original_latex_code
333
+ try:
334
+ pdf_stream = latex_to_pdf(latex_code, assets_path=assets_path)
335
+ image = pdf_to_image(pdf_stream, crop=crop, resize_to=resize_to)
336
+ return image, {"image_size": image.size, "latex_code": latex_code}
337
+ except RuntimeError as e:
338
+ if str(e) == "No available builder could be instantiated. Please make sure LaTeX is installed.":
339
+ raise OptionalDependencyNotInstalled(
340
+ "Optional dependency LaTeX is not installed. "
341
+ "Please install LaTeX and make sure it is available in your PATH."
342
+ "You can install LaTeX on Ubuntu with `sudo apt-get install texlive-full`."
343
+ ) from e
344
+ else:
345
+ return handle_latex_error(e, original_latex_code, assets_path, crop, resize_to, num_try_remaining)
346
+ except Exception as e:
347
+ return handle_latex_error(e, original_latex_code, assets_path, crop, resize_to, num_try_remaining)
@@ -0,0 +1,84 @@
1
+ from typing import Tuple, Dict, Any
2
+
3
+ from helm.common.optional_dependencies import handle_module_not_found_error
4
+
5
+ try:
6
+ from selenium import webdriver
7
+ import selenium.common.exceptions
8
+ except ModuleNotFoundError as e:
9
+ handle_module_not_found_error(e, suggestions=["image2structure"])
10
+
11
+
12
+ def init_driver(url: str, resolution: Tuple[int, int] = (1920, 1080)) -> webdriver.Chrome:
13
+ """Initialize the WebDriver
14
+
15
+ Args:
16
+ url (str): The URL of the website. Usually "http://localhost:{port}".
17
+ resolution (tuple[int, int], optional): The resolution of the WebDriver. Defaults to (1920, 1080).
18
+
19
+ Returns:
20
+ webdriver.Chrome: The Chrome WebDriver
21
+ """
22
+ options = webdriver.ChromeOptions()
23
+ options.add_argument(f"--window-size={resolution[0]},{resolution[1]}")
24
+ options.add_argument("--headless") # Optional: run in headless mode
25
+ options.add_argument("--no-sandbox") # Optional: for certain environments
26
+ options.add_argument("--disable-dev-shm-usage") # Optional: overcome limited resource problems
27
+ driver = webdriver.Chrome(options=options)
28
+ driver.get(url)
29
+ return driver
30
+
31
+
32
+ def close_driver(driver: webdriver.Chrome):
33
+ """Close the WebDriver
34
+
35
+ Args:
36
+ driver (webdriver.Chrome): The Chrome WebDriver
37
+ """
38
+ driver.quit()
39
+
40
+
41
+ class ScreenshotOptions:
42
+ """A class to store the parameters for taking a screenshot"""
43
+
44
+ """The resolution of the screenshot"""
45
+ resolution: Tuple[int, int] = (1920, 1080)
46
+
47
+ """The delay between each action in milliseconds"""
48
+ delay_between_each_action_ms: int = 1000
49
+
50
+
51
+ def save_random_screenshot(path: str, port: int, options: ScreenshotOptions = ScreenshotOptions()) -> Dict[str, Any]:
52
+ """Save a screenshot of a random page
53
+
54
+ Args:
55
+ path (str): The path to save the screenshot
56
+ port (int): The port to use for the website.
57
+ options (ScreenshotOptions, optional): The options to use for taking the screenshot.
58
+ Defaults to ScreenshotOptions().
59
+
60
+ Returns:
61
+ infos (Dict[str, Any]): Additional information about the screenshot
62
+
63
+ Raises:
64
+ ValueError: If the path does not end with .png
65
+ """
66
+ if not path.endswith(".png"):
67
+ raise ValueError("The path should end with .png")
68
+
69
+ driver: webdriver.Chrome
70
+ try:
71
+ driver = init_driver(url=f"http://localhost:{port}", resolution=options.resolution)
72
+ except selenium.common.exceptions.WebDriverException as e:
73
+ raise Exception(f"Failed to initialize the driver: {e}")
74
+ except Exception as e:
75
+ raise Exception(f"An unknown error occurred while initializing the driver: {e}")
76
+
77
+ # Extract the HTML of the page
78
+ html = driver.page_source
79
+
80
+ # Take a screenshot of the page
81
+ driver.save_screenshot(path)
82
+ close_driver(driver)
83
+
84
+ return {"html": html}
@@ -0,0 +1,182 @@
1
+ import subprocess
2
+ import os
3
+ import signal
4
+ from typing import Optional
5
+ import time
6
+ import socket
7
+ import threading
8
+
9
+ from helm.common.hierarchical_logger import hlog
10
+
11
+
12
+ class JekyllServer:
13
+ """A class to start and stop a Jekyll server in a separate process."""
14
+
15
+ def __init__(self, repo_path: str, port: int, verbose: bool = False):
16
+ self.repo_path: str = repo_path
17
+ self.verbose: bool = verbose
18
+ self.port: int = port
19
+ self.process: Optional[subprocess.Popen] = None
20
+ self.success: bool = False # Shared flag to indicate if the server started successfully
21
+
22
+ def __del__(self):
23
+ self.stop()
24
+ if JekyllServer.is_port_in_use(self.port):
25
+ if self.verbose:
26
+ hlog(f"Port {self.port} is in use. Attempting to free it.")
27
+ self.kill_process_using_port(self.port)
28
+ if self.verbose:
29
+ hlog("JekyllServer object deleted.")
30
+
31
+ def setup_gemfile(self):
32
+ # Check if Gemfile exists, if not, copy Gemfile.default to Gemfile
33
+ if not os.path.exists(f"{self.repo_path}/Gemfile"):
34
+ default_gemfile_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "Gemfile.default")
35
+ os.system(f"cp {default_gemfile_path} {self.repo_path}/Gemfile")
36
+ if self.verbose:
37
+ hlog("Copied Gemfile.default to Gemfile")
38
+ return
39
+
40
+ # Gemfile exists, check if it has the jekyll gem
41
+ if "jekyll" in open(f"{self.repo_path}/Gemfile").read():
42
+ # TODO: figure out if we need to do anything here
43
+ return
44
+
45
+ # Gemfile exists, but doesn't have jekyll gem
46
+ with open(f"{self.repo_path}/Gemfile", "a") as file:
47
+ file.write('gem "jekyll", "~> 4.3.3"')
48
+ if self.verbose:
49
+ hlog("Added jekyll gem to Gemfile")
50
+
51
+ def setup_config(self):
52
+ # Check if _config.yml exists, if not, copy _config.default.yml to _config.yml
53
+ if not os.path.exists(f"{self.repo_path}/_config.yml"):
54
+ default_config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "_config.default.yml")
55
+ os.system(f"cp {default_config_path} {self.repo_path}/_config.yml")
56
+ if self.verbose:
57
+ hlog("Copied _config.default.yml to _config.yml")
58
+ # Search for line starting with "port:" and replace it with "port: <port>"
59
+ with open(f"{self.repo_path}/_config.yml", "r") as file:
60
+ lines = file.readlines()
61
+ with open(f"{self.repo_path}/_config.yml", "w") as file:
62
+ for line in lines:
63
+ if line.startswith("port"):
64
+ file.write(f"port: {self.port}\n")
65
+ else:
66
+ file.write(line)
67
+
68
+ @staticmethod
69
+ def is_port_in_use(port: int) -> bool:
70
+ """Check if a port is in use on localhost."""
71
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
72
+ return s.connect_ex(("localhost", port)) == 0
73
+
74
+ def kill_process_using_port(self, port: int):
75
+ """Find and kill the process using the specified port."""
76
+ command = f"lsof -ti:{port} | grep '[0-9]' | xargs -r kill -9"
77
+ os.system(command)
78
+ if self.verbose:
79
+ hlog(f"Killed process using port {port}.")
80
+
81
+ def stream_output(self, process: subprocess.Popen):
82
+ """Read from stdout and stderr streams and hlog."""
83
+ assert process.stdout is not None
84
+ assert process.stderr is not None
85
+ while True:
86
+ output = process.stdout.readline()
87
+ if not output:
88
+ err = process.stderr.readline()
89
+ if err:
90
+ decoded_line = err.decode("utf-8").strip()
91
+ if self.verbose:
92
+ hlog(f"\t> \033[91mStderr: {decoded_line}\033[0m")
93
+ self.success = False
94
+ break
95
+ else:
96
+ # No more output
97
+ break
98
+ else:
99
+ decoded_line = output.decode("utf-8").strip()
100
+ if self.verbose:
101
+ hlog(f"\t> Stdout: {decoded_line}")
102
+ if "Server running... press ctrl-c to stop." in decoded_line:
103
+ self.success = True
104
+ break
105
+
106
+ def start(self, timeout: int = 30) -> bool:
107
+ """Start the Jekyll server in a separate process and monitor the output."""
108
+ if JekyllServer.is_port_in_use(self.port):
109
+ if self.verbose:
110
+ hlog(f"Port {self.port} is in use. Attempting to free it.")
111
+ self.kill_process_using_port(self.port)
112
+
113
+ self.setup_gemfile()
114
+ self.setup_config()
115
+ command_install = f"cd {self.repo_path} && bundle install"
116
+ subprocess.run(command_install, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
117
+
118
+ command_serve = f"cd {self.repo_path} && bundle exec jekyll serve --port {self.port}"
119
+ self.process = subprocess.Popen(
120
+ command_serve,
121
+ shell=True,
122
+ stdout=subprocess.PIPE,
123
+ stderr=subprocess.PIPE,
124
+ preexec_fn=os.setsid,
125
+ )
126
+
127
+ # Start thread to read output
128
+ output_thread = threading.Thread(target=self.stream_output, args=(self.process,))
129
+ output_thread.start()
130
+
131
+ # Wait for the thread to complete or timeout
132
+ output_thread.join(timeout=timeout)
133
+
134
+ if output_thread.is_alive():
135
+ # If the thread is still alive after the timeout, the server did not start
136
+ # successfully within the timeout period
137
+ hlog("Timeout reached without detecting server start.")
138
+ self.process.terminate() # Terminate the process if it's still running
139
+ output_thread.join() # Ensure the thread is cleaned up
140
+ return False
141
+ else:
142
+ if self.verbose:
143
+ if self.success:
144
+ hlog("Jekyll server started successfully.")
145
+ else:
146
+ hlog("Jekyll server failed to start.")
147
+ return self.success # Return the success flag
148
+
149
+ def stop(self, timeout=5):
150
+ """Stop the Jekyll server and terminate the process with a timeout.
151
+
152
+ Args:
153
+ timeout (int, optional): Time to wait for the server to gracefully shut down. Defaults to 5 seconds.
154
+ """
155
+ if self.process:
156
+ # Try to terminate the process group gracefully
157
+ os.killpg(os.getpgid(self.process.pid), signal.SIGTERM)
158
+ self.process.terminate()
159
+
160
+ # Wait for the process to end, checking periodically
161
+ try:
162
+ # Wait up to `timeout` seconds for process to terminate
163
+ for _ in range(timeout):
164
+ if self.process.poll() is not None: # Process has terminated
165
+ break
166
+ time.sleep(1) # Wait a bit before checking again
167
+ else:
168
+ # If the process is still alive after the timeout, kill it
169
+ os.killpg(os.getpgid(self.process.pid), signal.SIGKILL)
170
+ self.process.kill()
171
+ self.process.wait() # Wait for process to be killed
172
+ if self.verbose:
173
+ hlog("Jekyll server forcefully stopped.")
174
+ except Exception as e:
175
+ if self.verbose:
176
+ hlog(f"Error stopping the Jekyll server: {e}")
177
+
178
+ self.process = None
179
+ if self.verbose:
180
+ hlog("Jekyll server stopped.")
181
+ elif self.verbose:
182
+ hlog("Jekyll server is not running.")
@@ -0,0 +1,31 @@
1
+ import re
2
+
3
+ from helm.common.optional_dependencies import handle_module_not_found_error
4
+
5
+ try:
6
+ from html2text import HTML2Text
7
+ except ModuleNotFoundError as e:
8
+ handle_module_not_found_error(e, suggestions=["image2structure"])
9
+
10
+
11
+ def convert_html_to_text(handler: HTML2Text, html: str) -> str:
12
+ """Convert HTML to text
13
+
14
+ Args:
15
+ handler (HTML2Text): The HTML2Text handler
16
+ html (str): The HTML to convert
17
+
18
+ Returns:
19
+ str: The text
20
+ """
21
+ text: str = handler.handle(html)
22
+ # Normalize space sequences to a single space globally
23
+ text = re.sub(r" +", " ", text)
24
+ # Replace tabs with a single space
25
+ text = re.sub(r"\t", " ", text)
26
+ # Remove leading and trailing spaces on each line
27
+ text = re.sub(r"^[ \t]+|[ \t]+$", "", text, flags=re.MULTILINE)
28
+ # Remove unnecessary whitespace - multiple empty lines and tabulations
29
+ text = re.sub(r"\n\s*\n", "\n", text)
30
+
31
+ return text.strip()