crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (499) hide show
  1. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +138 -31
  2. crfm_helm-0.5.1.dist-info/RECORD +654 -0
  3. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +31 -3
  5. helm/benchmark/adaptation/adapters/adapter.py +2 -2
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
  9. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
  10. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  11. helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
  12. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  13. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
  14. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
  15. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  16. helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
  17. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
  18. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
  19. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  20. helm/benchmark/adaptation/request_state.py +6 -1
  21. helm/benchmark/adaptation/scenario_state.py +6 -2
  22. helm/benchmark/annotation/annotator.py +43 -0
  23. helm/benchmark/annotation/annotator_factory.py +61 -0
  24. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  25. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  26. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  27. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  28. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  29. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  30. helm/benchmark/annotation_executor.py +124 -0
  31. helm/benchmark/augmentations/data_augmenter.py +0 -2
  32. helm/benchmark/augmentations/gender_perturbation.py +1 -1
  33. helm/benchmark/augmentations/perturbation.py +25 -3
  34. helm/benchmark/augmentations/perturbation_description.py +1 -1
  35. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  36. helm/benchmark/augmentations/test_perturbation.py +41 -7
  37. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  38. helm/benchmark/config_registry.py +7 -1
  39. helm/benchmark/executor.py +46 -16
  40. helm/benchmark/huggingface_registration.py +20 -7
  41. helm/benchmark/metrics/basic_metrics.py +169 -664
  42. helm/benchmark/metrics/bbq_metrics.py +3 -4
  43. helm/benchmark/metrics/bias_metrics.py +6 -6
  44. helm/benchmark/metrics/classification_metrics.py +11 -8
  45. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  46. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  47. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  48. helm/benchmark/metrics/common_metric_specs.py +167 -0
  49. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  50. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  51. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  52. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  53. helm/benchmark/metrics/disinformation_metrics.py +4 -110
  54. helm/benchmark/metrics/dry_run_metrics.py +2 -2
  55. helm/benchmark/metrics/efficiency_metrics.py +213 -0
  56. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  57. helm/benchmark/metrics/evaluate_reference_metrics.py +392 -0
  58. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  59. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  60. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  61. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  62. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  63. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  64. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  65. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  66. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  67. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  68. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  69. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  70. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  71. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  72. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  73. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  74. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  75. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  76. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  77. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  78. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  79. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  80. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  81. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  82. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  83. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  84. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  85. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  86. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  87. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  88. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  89. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  90. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  91. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  92. helm/benchmark/metrics/machine_translation_metrics.py +89 -0
  93. helm/benchmark/metrics/metric.py +93 -172
  94. helm/benchmark/metrics/metric_name.py +0 -1
  95. helm/benchmark/metrics/metric_service.py +16 -0
  96. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  97. helm/benchmark/metrics/ranking_metrics.py +2 -2
  98. helm/benchmark/metrics/reference_metric.py +148 -0
  99. helm/benchmark/metrics/summac/model_summac.py +0 -2
  100. helm/benchmark/metrics/summarization_metrics.py +2 -2
  101. helm/benchmark/metrics/test_classification_metrics.py +8 -5
  102. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  103. helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
  104. helm/benchmark/metrics/test_metric.py +2 -2
  105. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
  106. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  107. helm/benchmark/metrics/toxicity_utils.py +23 -0
  108. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  109. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  110. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  111. helm/benchmark/metrics/vision_language/image_metrics.py +575 -0
  112. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  113. helm/benchmark/model_deployment_registry.py +74 -0
  114. helm/benchmark/model_metadata_registry.py +41 -1
  115. helm/benchmark/multi_gpu_runner.py +133 -0
  116. helm/benchmark/presentation/create_plots.py +8 -7
  117. helm/benchmark/presentation/run_display.py +26 -10
  118. helm/benchmark/presentation/schema.py +15 -40
  119. helm/benchmark/presentation/summarize.py +119 -79
  120. helm/benchmark/presentation/table.py +8 -8
  121. helm/benchmark/presentation/test_contamination.py +2 -2
  122. helm/benchmark/presentation/test_run_entry.py +1 -2
  123. helm/benchmark/presentation/test_summarize.py +3 -3
  124. helm/benchmark/run.py +54 -26
  125. helm/benchmark/run_expander.py +205 -35
  126. helm/benchmark/run_spec.py +93 -0
  127. helm/benchmark/run_spec_factory.py +163 -0
  128. helm/benchmark/run_specs/__init__.py +0 -0
  129. helm/benchmark/run_specs/classic_run_specs.py +1510 -0
  130. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  131. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  132. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  133. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  134. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  135. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  136. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  137. helm/benchmark/run_specs/vlm_run_specs.py +757 -0
  138. helm/benchmark/runner.py +51 -57
  139. helm/benchmark/runner_config_registry.py +21 -0
  140. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  141. helm/benchmark/scenarios/bold_scenario.py +2 -2
  142. helm/benchmark/scenarios/code_scenario.py +1 -0
  143. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  144. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  145. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  146. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  147. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  148. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  149. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  150. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  151. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  152. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  153. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  154. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  155. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  156. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  157. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  158. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  159. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  160. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  161. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  162. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  163. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  164. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  165. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  166. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  167. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  168. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  169. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  170. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  171. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  172. helm/benchmark/scenarios/legalbench_scenario.py +6 -2
  173. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  174. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  175. helm/benchmark/scenarios/math_scenario.py +19 -2
  176. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  177. helm/benchmark/scenarios/numeracy_scenario.py +1 -1
  178. helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
  179. helm/benchmark/scenarios/scenario.py +4 -0
  180. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  181. helm/benchmark/scenarios/test_math_scenario.py +6 -0
  182. helm/benchmark/scenarios/test_scenario.py +6 -3
  183. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  184. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  185. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  186. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  187. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  188. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
  189. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  190. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
  191. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
  192. helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
  193. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +94 -0
  194. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  195. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  196. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  197. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  198. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  199. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  200. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  201. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  202. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  203. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  204. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  205. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  206. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
  207. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  208. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
  209. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  210. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  211. helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
  212. helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
  213. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  214. helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
  215. helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
  216. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  217. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  218. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  219. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +3 -4
  220. helm/benchmark/scenarios/vision_language/vqa_scenario.py +5 -3
  221. helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
  222. helm/benchmark/server.py +24 -1
  223. helm/benchmark/slurm_runner.py +70 -49
  224. helm/benchmark/static/benchmarking.js +1 -1
  225. helm/benchmark/static/schema_classic.yaml +258 -1066
  226. helm/benchmark/static/schema_image2structure.yaml +304 -0
  227. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  228. helm/benchmark/static/schema_lite.yaml +2 -227
  229. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  230. helm/benchmark/static/schema_unitxt.yaml +428 -0
  231. helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
  232. helm/benchmark/static/schema_vlm.yaml +823 -0
  233. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  234. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  235. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  236. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  237. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  238. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  239. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  240. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  241. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  242. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  243. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  244. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  245. helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
  246. helm/benchmark/static_build/assets/index-878a1094.css +1 -0
  247. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  248. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  249. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  250. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  251. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  252. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  253. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  254. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  255. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  256. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  257. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  258. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  259. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  260. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  261. helm/benchmark/static_build/config.js +4 -0
  262. helm/benchmark/static_build/index.html +20 -0
  263. helm/benchmark/test_data_preprocessor.py +3 -3
  264. helm/benchmark/test_run_expander.py +1 -1
  265. helm/benchmark/window_services/ai21_window_service.py +22 -33
  266. helm/benchmark/window_services/cohere_window_service.py +1 -63
  267. helm/benchmark/window_services/default_window_service.py +2 -44
  268. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  269. helm/benchmark/window_services/ice_window_service.py +0 -34
  270. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  271. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  272. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  273. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  274. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  275. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  276. helm/benchmark/window_services/local_window_service.py +21 -4
  277. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  278. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  279. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  280. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  281. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  282. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  283. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  284. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  285. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  286. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  287. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  288. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  289. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  290. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  291. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  292. helm/benchmark/window_services/test_utils.py +3 -2
  293. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  294. helm/benchmark/window_services/window_service.py +42 -0
  295. helm/benchmark/window_services/window_service_factory.py +4 -1
  296. helm/benchmark/window_services/yalm_window_service.py +0 -27
  297. helm/clients/__init__.py +0 -0
  298. helm/{proxy/clients → clients}/ai21_client.py +3 -9
  299. helm/clients/aleph_alpha_client.py +112 -0
  300. helm/{proxy/clients → clients}/anthropic_client.py +233 -18
  301. helm/{proxy/clients → clients}/auto_client.py +59 -31
  302. helm/clients/bedrock_client.py +128 -0
  303. helm/clients/bedrock_utils.py +72 -0
  304. helm/{proxy/clients → clients}/client.py +65 -7
  305. helm/clients/clip_score_client.py +49 -0
  306. helm/clients/clip_scorers/__init__.py +0 -0
  307. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  308. helm/clients/clip_scorers/clip_scorer.py +50 -0
  309. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  310. helm/{proxy/clients → clients}/cohere_client.py +4 -11
  311. helm/clients/gcs_client.py +82 -0
  312. helm/{proxy/clients → clients}/google_client.py +5 -5
  313. helm/clients/google_translate_client.py +35 -0
  314. helm/{proxy/clients → clients}/http_model_client.py +5 -7
  315. helm/{proxy/clients → clients}/huggingface_client.py +43 -64
  316. helm/clients/image_generation/__init__.py +0 -0
  317. helm/clients/image_generation/adobe_vision_client.py +78 -0
  318. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  319. helm/clients/image_generation/cogview2/__init__.py +0 -0
  320. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  321. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  322. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  323. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  324. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  325. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  326. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  327. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  328. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  329. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  330. helm/clients/image_generation/cogview2_client.py +191 -0
  331. helm/clients/image_generation/dalle2_client.py +192 -0
  332. helm/clients/image_generation/dalle3_client.py +108 -0
  333. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  334. helm/clients/image_generation/dalle_mini/data.py +442 -0
  335. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  336. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  337. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  338. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  339. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  340. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  341. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  342. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  343. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  344. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  345. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  346. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  347. helm/clients/image_generation/dalle_mini_client.py +190 -0
  348. helm/clients/image_generation/deep_floyd_client.py +78 -0
  349. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  350. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  351. helm/clients/image_generation/lexica_client.py +86 -0
  352. helm/clients/image_generation/mindalle/__init__.py +0 -0
  353. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  354. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  355. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  356. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  357. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  358. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  359. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  360. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  361. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  362. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  363. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  364. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  365. helm/clients/image_generation/mindalle_client.py +115 -0
  366. helm/clients/image_generation/nudity_check_client.py +64 -0
  367. helm/clients/image_generation/together_image_generation_client.py +111 -0
  368. helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
  369. helm/{proxy/clients → clients}/megatron_client.py +5 -5
  370. helm/clients/mistral_client.py +134 -0
  371. helm/clients/moderation_api_client.py +109 -0
  372. helm/clients/open_lm_client.py +43 -0
  373. helm/clients/openai_client.py +301 -0
  374. helm/{proxy/clients → clients}/palmyra_client.py +6 -8
  375. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  376. helm/clients/simple_client.py +64 -0
  377. helm/{proxy/clients → clients}/test_auto_client.py +13 -15
  378. helm/clients/test_client.py +100 -0
  379. helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
  380. helm/clients/test_simple_client.py +19 -0
  381. helm/{proxy/clients → clients}/test_together_client.py +20 -8
  382. helm/{proxy/clients → clients}/together_client.py +104 -73
  383. helm/clients/vertexai_client.py +400 -0
  384. helm/clients/vision_language/__init__.py +0 -0
  385. helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
  386. helm/clients/vision_language/huggingface_vlm_client.py +111 -0
  387. helm/{proxy/clients → clients}/vision_language/idefics_client.py +54 -49
  388. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  389. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  390. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  391. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  392. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  393. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  394. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  395. helm/clients/vision_language/open_flamingo_client.py +155 -0
  396. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  397. helm/clients/vllm_client.py +46 -0
  398. helm/common/cache.py +16 -4
  399. helm/common/cache_backend_config.py +47 -0
  400. helm/common/clip_score_request.py +41 -0
  401. helm/common/file_caches/__init__.py +0 -0
  402. helm/common/file_caches/file_cache.py +16 -0
  403. helm/common/file_caches/local_file_cache.py +61 -0
  404. helm/common/file_caches/test_local_file_cache.py +25 -0
  405. helm/common/file_upload_request.py +27 -0
  406. helm/common/general.py +1 -1
  407. helm/common/image_generation_parameters.py +25 -0
  408. helm/common/images_utils.py +33 -3
  409. helm/common/key_value_store.py +35 -4
  410. helm/common/media_object.py +13 -0
  411. helm/common/moderations_api_request.py +71 -0
  412. helm/common/mongo_key_value_store.py +3 -3
  413. helm/common/multimodal_request_utils.py +31 -0
  414. helm/common/nudity_check_request.py +29 -0
  415. helm/common/request.py +15 -17
  416. helm/common/test_general.py +6 -0
  417. helm/common/tokenization_request.py +1 -1
  418. helm/config/model_deployments.yaml +1159 -538
  419. helm/config/model_metadata.yaml +868 -41
  420. helm/config/tokenizer_configs.yaml +149 -43
  421. helm/proxy/accounts.py +31 -4
  422. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  423. helm/proxy/critique/model_critique_client.py +8 -6
  424. helm/proxy/example_queries.py +29 -17
  425. helm/proxy/server.py +70 -5
  426. helm/proxy/services/remote_service.py +31 -0
  427. helm/proxy/services/server_service.py +96 -16
  428. helm/proxy/services/service.py +30 -0
  429. helm/proxy/services/test_remote_service.py +4 -3
  430. helm/proxy/services/test_service.py +0 -12
  431. helm/proxy/test_accounts.py +32 -0
  432. helm/proxy/token_counters/auto_token_counter.py +37 -37
  433. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  434. helm/proxy/token_counters/token_counter.py +3 -5
  435. helm/tokenizers/__init__.py +0 -0
  436. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  437. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
  438. helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
  439. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  440. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  441. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
  442. helm/tokenizers/simple_tokenizer.py +33 -0
  443. helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
  444. helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
  445. helm/tokenizers/test_simple_tokenizer.py +33 -0
  446. helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
  447. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  448. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  449. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  450. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  451. crfm_helm-0.4.0.dist-info/RECORD +0 -397
  452. helm/benchmark/run_specs.py +0 -2762
  453. helm/benchmark/test_model_deployment_definition.py +0 -92
  454. helm/benchmark/test_model_properties.py +0 -1570
  455. helm/benchmark/vlm_run_specs.py +0 -97
  456. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  457. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  458. helm/benchmark/window_services/huggingface_window_service.py +0 -60
  459. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  460. helm/benchmark/window_services/t511b_window_service.py +0 -30
  461. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  462. helm/benchmark/window_services/ul2_window_service.py +0 -30
  463. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  464. helm/common/cache_utils.py +0 -14
  465. helm/proxy/clients/aleph_alpha_client.py +0 -95
  466. helm/proxy/clients/goose_ai_client.py +0 -99
  467. helm/proxy/clients/microsoft_client.py +0 -180
  468. helm/proxy/clients/openai_client.py +0 -206
  469. helm/proxy/clients/simple_client.py +0 -60
  470. helm/proxy/clients/test_client.py +0 -49
  471. helm/proxy/clients/vertexai_client.py +0 -115
  472. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  473. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  474. helm/proxy/token_counters/free_token_counter.py +0 -12
  475. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  476. helm/proxy/token_counters/openai_token_counter.py +0 -22
  477. helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
  478. helm/proxy/token_counters/test_openai_token_counter.py +0 -81
  479. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  480. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
  481. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
  482. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
  483. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  484. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  485. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  486. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  487. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  488. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  489. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  490. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  491. /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
  492. /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
  493. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  494. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  495. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  496. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  497. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  498. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  499. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -0,0 +1,155 @@
1
+ """
2
+ Source: https://github.com/mlfoundations/open_flamingo
3
+ """
4
+
5
+ import torch.nn as nn
6
+ from .helpers import GatedCrossAttentionBlock
7
+ from .utils import getattr_recursive, setattr_recursive
8
+
9
+
10
+ class FlamingoLayer(nn.Module):
11
+ """
12
+ FlamingoLayer is a wrapper around the GatedCrossAttentionBlock and DecoderLayer.
13
+ """
14
+
15
+ def __init__(self, gated_cross_attn_layer, decoder_layer, gradient_checkpointing=False):
16
+ super().__init__()
17
+ self.gated_cross_attn_layer = gated_cross_attn_layer
18
+ self.decoder_layer = decoder_layer
19
+ self.vis_x = None
20
+ self.media_locations = None
21
+ if self.gated_cross_attn_layer is not None:
22
+ self.gated_cross_attn_layer._use_gradient_checkpointing = gradient_checkpointing
23
+ self.decoder_layer._use_gradient_checkpointing = gradient_checkpointing
24
+
25
+ def is_conditioned(self) -> bool:
26
+ """Check whether the layer is conditioned."""
27
+ return self.vis_x is not None and self.media_locations is not None
28
+
29
+ # Used this great idea from this implementation of Flamingo (https://github.com/dhansmair/flamingo-mini/)
30
+ def condition_vis_x(self, vis_x):
31
+ self.vis_x = vis_x
32
+
33
+ def condition_media_locations(self, media_locations):
34
+ self.media_locations = media_locations
35
+
36
+ def condition_use_cached_media(self, use_cached_media):
37
+ self.use_cached_media = use_cached_media
38
+
39
+ def forward(
40
+ self,
41
+ lang_x,
42
+ attention_mask=None,
43
+ **decoder_layer_kwargs,
44
+ ):
45
+ # Cross attention
46
+ if self.gated_cross_attn_layer is not None:
47
+ if self.vis_x is None:
48
+ raise ValueError("vis_x must be conditioned before forward pass")
49
+
50
+ if self.media_locations is None:
51
+ raise ValueError("media_locations must be conditioned before forward pass")
52
+
53
+ lang_x = self.gated_cross_attn_layer(
54
+ lang_x,
55
+ self.vis_x,
56
+ media_locations=self.media_locations,
57
+ use_cached_media=self.use_cached_media,
58
+ )
59
+
60
+ # Normal decoder layer
61
+ lang_x = self.decoder_layer(lang_x, attention_mask=attention_mask, **decoder_layer_kwargs)
62
+ return lang_x
63
+
64
+
65
+ class FlamingoLMMixin(nn.Module):
66
+ """
67
+ Mixin to add cross-attention layers to a language model.
68
+ """
69
+
70
+ def set_decoder_layers_attr_name(self, decoder_layers_attr_name):
71
+ self.decoder_layers_attr_name = decoder_layers_attr_name
72
+
73
+ def _get_decoder_layers(self):
74
+ return getattr_recursive(self, self.decoder_layers_attr_name)
75
+
76
+ def _set_decoder_layers(self, value):
77
+ setattr_recursive(self, self.decoder_layers_attr_name, value)
78
+
79
+ def init_flamingo(
80
+ self,
81
+ media_token_id,
82
+ lang_hidden_size,
83
+ vis_hidden_size,
84
+ cross_attn_every_n_layers,
85
+ gradient_checkpointing,
86
+ ):
87
+ """
88
+ Initialize Flamingo by adding a new gated cross attn to the decoder. Store the media token id for computing the media locations.
89
+ """
90
+ self.old_decoder_blocks = self._get_decoder_layers()
91
+ self.gated_cross_attn_layers = nn.ModuleList(
92
+ [
93
+ (
94
+ GatedCrossAttentionBlock(dim=lang_hidden_size, dim_visual=vis_hidden_size)
95
+ if (layer_idx + 1) % cross_attn_every_n_layers == 0
96
+ else None
97
+ )
98
+ for layer_idx, _ in enumerate(self._get_decoder_layers())
99
+ ]
100
+ )
101
+ self.init_flamingo_layers(gradient_checkpointing)
102
+ self.media_token_id = media_token_id
103
+ self.initialized_flamingo = True
104
+ self._use_cached_vision_x = False
105
+
106
+ def init_flamingo_layers(self, gradient_checkpointing):
107
+ """
108
+ Re initializes the FlamingoLayers.
109
+ Propagates any changes made to self.gated_corss_attn_layers or self.old_decoder_blocks
110
+ """
111
+ self._set_decoder_layers(
112
+ nn.ModuleList(
113
+ [
114
+ FlamingoLayer(gated_cross_attn_layer, decoder_layer, gradient_checkpointing)
115
+ for gated_cross_attn_layer, decoder_layer in zip(
116
+ self.gated_cross_attn_layers, self.old_decoder_blocks
117
+ )
118
+ ]
119
+ )
120
+ )
121
+
122
+ def forward(self, input_ids, attention_mask, **kwargs):
123
+ """Condition the Flamingo layers on the media locations before forward()"""
124
+ if not self.initialized_flamingo:
125
+ raise ValueError("Flamingo layers are not initialized. Please call `init_flamingo` first.")
126
+
127
+ media_locations = input_ids == self.media_token_id
128
+
129
+ # if there are media already cached and we're generating and there are no media tokens in the input,
130
+ # we'll assume that ALL input tokens should attend to the last previous media that is cached.
131
+ # this is especially important for HF generate() compatibility, since generate() calls forward()
132
+ # repeatedly one token at a time (with no media tokens).
133
+ # without this check, the model would not attend to any images when generating (after the first token)
134
+ use_cached_media_locations = self._use_cached_vision_x and self.is_conditioned() and not media_locations.any()
135
+
136
+ for layer in self._get_decoder_layers():
137
+ if not use_cached_media_locations:
138
+ layer.condition_media_locations(media_locations)
139
+ layer.condition_use_cached_media(use_cached_media_locations)
140
+
141
+ # package arguments for the other parent's forward. since we don't know the order of the arguments,
142
+ # make them all kwargs
143
+ kwargs["input_ids"] = input_ids
144
+ kwargs["attention_mask"] = attention_mask
145
+ return super().forward(**kwargs) # Call the other parent's forward method
146
+
147
+ def is_conditioned(self) -> bool:
148
+ """Check whether all decoder layers are already conditioned."""
149
+ return all(l.is_conditioned() for l in self._get_decoder_layers())
150
+
151
+ def clear_conditioned_layers(self):
152
+ for layer in self._get_decoder_layers():
153
+ layer.condition_vis_x(None)
154
+ layer.condition_media_locations(None)
155
+ layer.condition_use_cached_media(None)
@@ -0,0 +1,267 @@
1
+ """
2
+ Based on: https://github.com/lucidrains/flamingo-pytorch
3
+ """
4
+
5
+ import torch
6
+ from einops import rearrange, repeat
7
+ from einops_exts import rearrange_many
8
+ from torch import einsum, nn
9
+
10
+
11
+ def exists(val):
12
+ return val is not None
13
+
14
+
15
+ def FeedForward(dim, mult=4):
16
+ inner_dim = int(dim * mult)
17
+ return nn.Sequential(
18
+ nn.LayerNorm(dim),
19
+ nn.Linear(dim, inner_dim, bias=False),
20
+ nn.GELU(),
21
+ nn.Linear(inner_dim, dim, bias=False),
22
+ )
23
+
24
+
25
+ class PerceiverAttention(nn.Module):
26
+ def __init__(self, *, dim, dim_head=64, heads=8):
27
+ super().__init__()
28
+ self.scale = dim_head**-0.5
29
+ self.heads = heads
30
+ inner_dim = dim_head * heads
31
+
32
+ self.norm_media = nn.LayerNorm(dim)
33
+ self.norm_latents = nn.LayerNorm(dim)
34
+
35
+ self.to_q = nn.Linear(dim, inner_dim, bias=False)
36
+ self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False)
37
+ self.to_out = nn.Linear(inner_dim, dim, bias=False)
38
+
39
+ def forward(self, x, latents):
40
+ """
41
+ Args:
42
+ x (torch.Tensor): image features
43
+ shape (b, T, n1, D)
44
+ latent (torch.Tensor): latent features
45
+ shape (b, T, n2, D)
46
+ """
47
+ x = self.norm_media(x)
48
+ latents = self.norm_latents(latents)
49
+
50
+ h = self.heads
51
+
52
+ q = self.to_q(latents)
53
+ kv_input = torch.cat((x, latents), dim=-2)
54
+ k, v = self.to_kv(kv_input).chunk(2, dim=-1)
55
+ q, k, v = rearrange_many((q, k, v), "b t n (h d) -> b h t n d", h=h)
56
+ q = q * self.scale
57
+
58
+ # attention
59
+ sim = einsum("... i d, ... j d -> ... i j", q, k)
60
+ sim = sim - sim.amax(dim=-1, keepdim=True).detach()
61
+ attn = sim.softmax(dim=-1)
62
+
63
+ out = einsum("... i j, ... j d -> ... i d", attn, v)
64
+ out = rearrange(out, "b h t n d -> b t n (h d)", h=h)
65
+ return self.to_out(out)
66
+
67
+
68
+ class PerceiverResampler(nn.Module):
69
+ def __init__(
70
+ self,
71
+ *,
72
+ dim,
73
+ depth=6,
74
+ dim_head=64,
75
+ heads=8,
76
+ num_latents=64,
77
+ max_num_media=None,
78
+ max_num_frames=None,
79
+ ff_mult=4,
80
+ ):
81
+ super().__init__()
82
+ self.latents = nn.Parameter(torch.randn(num_latents, dim))
83
+ self.frame_embs = nn.Parameter(torch.randn(max_num_frames, dim)) if exists(max_num_frames) else None
84
+ self.media_time_embs = nn.Parameter(torch.randn(max_num_media, 1, dim)) if exists(max_num_media) else None
85
+
86
+ self.layers = nn.ModuleList([])
87
+ for _ in range(depth):
88
+ self.layers.append(
89
+ nn.ModuleList(
90
+ [
91
+ PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads),
92
+ FeedForward(dim=dim, mult=ff_mult),
93
+ ]
94
+ )
95
+ )
96
+
97
+ self.norm = nn.LayerNorm(dim)
98
+
99
+ def forward(self, x):
100
+ """
101
+ Args:
102
+ x (torch.Tensor): image features
103
+ shape (b, T, F, v, D)
104
+ Returns:
105
+ shape (b, T, n, D) where n is self.num_latents
106
+ """
107
+ b, T, F, v = x.shape[:4]
108
+
109
+ # frame and media time embeddings
110
+ if exists(self.frame_embs):
111
+ frame_embs = repeat(self.frame_embs[:F], "F d -> b T F v d", b=b, T=T, v=v)
112
+ x = x + frame_embs
113
+ x = rearrange(x, "b T F v d -> b T (F v) d") # flatten the frame and spatial dimensions
114
+ if exists(self.media_time_embs):
115
+ x = x + self.media_time_embs[:T]
116
+
117
+ # blocks
118
+ latents = repeat(self.latents, "n d -> b T n d", b=b, T=T)
119
+ for attn, ff in self.layers:
120
+ latents = attn(x, latents) + latents
121
+ latents = ff(latents) + latents
122
+ return self.norm(latents)
123
+
124
+
125
+ # gated cross attention
126
+ class MaskedCrossAttention(nn.Module):
127
+ def __init__(
128
+ self,
129
+ *,
130
+ dim,
131
+ dim_visual,
132
+ dim_head=64,
133
+ heads=8,
134
+ only_attend_immediate_media=True,
135
+ ):
136
+ super().__init__()
137
+ self.scale = dim_head**-0.5
138
+ self.heads = heads
139
+ inner_dim = dim_head * heads
140
+
141
+ self.norm = nn.LayerNorm(dim)
142
+
143
+ self.to_q = nn.Linear(dim, inner_dim, bias=False)
144
+ self.to_kv = nn.Linear(dim_visual, inner_dim * 2, bias=False)
145
+ self.to_out = nn.Linear(inner_dim, dim, bias=False)
146
+
147
+ # whether for text to only attend to immediate preceding image, or all previous images
148
+ self.only_attend_immediate_media = only_attend_immediate_media
149
+
150
+ def forward(self, x, media, media_locations=None, use_cached_media=False):
151
+ """
152
+ Args:
153
+ x (torch.Tensor): text features
154
+ shape (B, T_txt, D_txt)
155
+ media (torch.Tensor): image features
156
+ shape (B, T_img, n, D_img) where n is the dim of the latents
157
+ media_locations: boolean mask identifying the media tokens in x
158
+ shape (B, T_txt)
159
+ use_cached_media: bool
160
+ If true, treat all of x as if they occur after the last media
161
+ registered in media_locations. T_txt does not need to exactly
162
+ equal media_locations.shape[1] in this case
163
+ """
164
+
165
+ if not use_cached_media:
166
+ assert (
167
+ media_locations.shape[1] == x.shape[1]
168
+ ), f"media_location.shape is {media_locations.shape} but x.shape is {x.shape}"
169
+
170
+ T_txt = x.shape[1]
171
+ _, T_img, n = media.shape[:3]
172
+ h = self.heads
173
+
174
+ x = self.norm(x)
175
+
176
+ q = self.to_q(x)
177
+ media = rearrange(media, "b t n d -> b (t n) d")
178
+
179
+ k, v = self.to_kv(media).chunk(2, dim=-1)
180
+ q, k, v = rearrange_many((q, k, v), "b n (h d) -> b h n d", h=h)
181
+
182
+ q = q * self.scale
183
+
184
+ sim = einsum("... i d, ... j d -> ... i j", q, k)
185
+
186
+ if exists(media_locations):
187
+ media_time = torch.arange(T_img, device=x.device) + 1
188
+
189
+ if use_cached_media:
190
+ # text time is set to the last cached media location
191
+ text_time = repeat(
192
+ torch.count_nonzero(media_locations, dim=1),
193
+ "b -> b i",
194
+ i=T_txt,
195
+ )
196
+ else:
197
+ # at each boolean of True, increment the time counter (relative to media time)
198
+ text_time = media_locations.cumsum(dim=-1)
199
+
200
+ # text time must equal media time if only attending to most immediate image
201
+ # otherwise, as long as text time is greater than media time (if attending to all previous images / media)
202
+ mask_op = torch.eq if self.only_attend_immediate_media else torch.ge
203
+
204
+ text_to_media_mask = mask_op(
205
+ rearrange(text_time, "b i -> b 1 i 1"),
206
+ repeat(media_time, "j -> 1 1 1 (j n)", n=n),
207
+ )
208
+ sim = sim.masked_fill(~text_to_media_mask, -torch.finfo(sim.dtype).max)
209
+
210
+ sim = sim - sim.amax(dim=-1, keepdim=True).detach()
211
+ attn = sim.softmax(dim=-1)
212
+
213
+ if exists(media_locations) and self.only_attend_immediate_media:
214
+ # any text without a preceding media needs to have attention zeroed out
215
+ text_without_media_mask = text_time == 0
216
+ text_without_media_mask = rearrange(text_without_media_mask, "b i -> b 1 i 1")
217
+ attn = attn.masked_fill(text_without_media_mask, 0.0)
218
+
219
+ out = einsum("... i j, ... j d -> ... i d", attn, v)
220
+ out = rearrange(out, "b h n d -> b n (h d)")
221
+ return self.to_out(out)
222
+
223
+
224
+ class GatedCrossAttentionBlock(nn.Module):
225
+ def __init__(
226
+ self,
227
+ *,
228
+ dim,
229
+ dim_visual,
230
+ dim_head=64,
231
+ heads=8,
232
+ ff_mult=4,
233
+ only_attend_immediate_media=True,
234
+ ):
235
+ super().__init__()
236
+ self.attn = MaskedCrossAttention(
237
+ dim=dim,
238
+ dim_visual=dim_visual,
239
+ dim_head=dim_head,
240
+ heads=heads,
241
+ only_attend_immediate_media=only_attend_immediate_media,
242
+ )
243
+ self.attn_gate = nn.Parameter(torch.tensor([0.0]))
244
+
245
+ self.ff = FeedForward(dim, mult=ff_mult)
246
+ self.ff_gate = nn.Parameter(torch.tensor([0.0]))
247
+
248
+ def forward(
249
+ self,
250
+ x,
251
+ media,
252
+ media_locations=None,
253
+ use_cached_media=False,
254
+ ):
255
+ x = (
256
+ self.attn(
257
+ x,
258
+ media,
259
+ media_locations=media_locations,
260
+ use_cached_media=use_cached_media,
261
+ )
262
+ * self.attn_gate.tanh()
263
+ + x
264
+ )
265
+ x = self.ff(x) * self.ff_gate.tanh() + x
266
+
267
+ return x
@@ -0,0 +1,47 @@
1
+ """
2
+ Source: https://github.com/mlfoundations/open_flamingo
3
+ """
4
+
5
+
6
+ def extend_instance(obj, mixin):
7
+ """Apply mixins to a class instance after creation"""
8
+ base_cls = obj.__class__
9
+ base_cls_name = obj.__class__.__name__
10
+ obj.__class__ = type(
11
+ base_cls_name, (mixin, base_cls), {}
12
+ ) # mixin needs to go first for our forward() logic to work
13
+
14
+
15
+ def getattr_recursive(obj, att):
16
+ """
17
+ Return nested attribute of obj
18
+ Example: getattr_recursive(obj, 'a.b.c') is equivalent to obj.a.b.c
19
+ """
20
+ if att == "":
21
+ return obj
22
+ i = att.find(".")
23
+ if i < 0:
24
+ return getattr(obj, att)
25
+ else:
26
+ return getattr_recursive(getattr(obj, att[:i]), att[i + 1 :])
27
+
28
+
29
+ def setattr_recursive(obj, att, val):
30
+ """
31
+ Set nested attribute of obj
32
+ Example: setattr_recursive(obj, 'a.b.c', val) is equivalent to obj.a.b.c = val
33
+ """
34
+ if "." in att:
35
+ obj = getattr_recursive(obj, ".".join(att.split(".")[:-1]))
36
+ setattr(obj, att.split(".")[-1], val)
37
+
38
+
39
+ def apply_with_stopping_condition(module, apply_fn, apply_condition=None, stopping_condition=None, **other_args):
40
+ if stopping_condition(module):
41
+ return
42
+ if apply_condition(module):
43
+ apply_fn(module, **other_args)
44
+ for child in module.children():
45
+ apply_with_stopping_condition(
46
+ child, apply_fn, apply_condition=apply_condition, stopping_condition=stopping_condition, **other_args
47
+ )
@@ -0,0 +1,155 @@
1
+ from threading import Lock
2
+ from typing import List, Optional, Tuple
3
+
4
+ import torch
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ from helm.common.cache import CacheConfig
8
+ from helm.common.hierarchical_logger import hlog, htrack_block
9
+ from helm.common.images_utils import open_image
10
+ from helm.common.gpu_utils import get_torch_device_name
11
+ from helm.common.media_object import TEXT_TYPE
12
+ from helm.common.optional_dependencies import handle_module_not_found_error
13
+ from helm.common.request import Request, RequestResult, GeneratedOutput, Token
14
+ from helm.common.request import wrap_request_time
15
+ from helm.clients.vision_language.open_flamingo import create_model_and_transforms
16
+ from helm.clients.client import CachingClient, generate_uid_for_multimodal_prompt
17
+
18
+ try:
19
+ from PIL import Image
20
+ except ModuleNotFoundError as e:
21
+ handle_module_not_found_error(e, ["images"])
22
+
23
+
24
+ class OpenFlamingoClient(CachingClient):
25
+ """
26
+ OpenFlamingo is an open source implementation of DeepMind's Flamingo models.
27
+ Implementation following:
28
+ https://github.com/mlfoundations/open_flamingo
29
+ https://huggingface.co/openflamingo/OpenFlamingo-9B-vitl-mpt7b
30
+ """
31
+
32
+ END_OF_CHUNK_TOKEN: str = "<|endofchunk|>"
33
+ IMAGE_TOKEN: str = "<image>"
34
+
35
+ _model_lock: Lock = Lock()
36
+
37
+ def __init__(
38
+ self,
39
+ cache_config: CacheConfig,
40
+ checkpoint_path: Optional[str] = None,
41
+ tokenizer_name: Optional[str] = None,
42
+ cross_attn_every_n_layers: int = 4,
43
+ ):
44
+ super().__init__(cache_config)
45
+ self._device: str = get_torch_device_name()
46
+ self._checkpoint_path: Optional[str] = checkpoint_path
47
+ self._tokenizer_name: Optional[str] = tokenizer_name
48
+ self._cross_attn_every_n_layers: int = cross_attn_every_n_layers
49
+
50
+ # Model
51
+ # The model is only initialized when the first request is made
52
+ # This is to avoid loading the model if it is not used
53
+ self._model: Optional[torch.nn.Module] = None
54
+
55
+ def _get_model(self):
56
+ if not self._checkpoint_path:
57
+ raise ValueError("OpenFlamingoClient requires a checkpoint path")
58
+ if not self._tokenizer_name:
59
+ raise ValueError("OpenFlamingoClient requires a tokenizer name")
60
+ with htrack_block("Initializing OpenFlamingo model"):
61
+ with self._model_lock:
62
+ self._model, self.image_processor, self.tokenizer = create_model_and_transforms(
63
+ clip_vision_encoder_path="ViT-L-14",
64
+ clip_vision_encoder_pretrained="openai",
65
+ lang_encoder_path=self._tokenizer_name,
66
+ tokenizer_path=self._tokenizer_name,
67
+ cross_attn_every_n_layers=self._cross_attn_every_n_layers,
68
+ )
69
+ self.tokenizer.padding_side = "left"
70
+ checkpoint_path = hf_hub_download(self._checkpoint_path, "checkpoint.pt")
71
+ self._model.load_state_dict(torch.load(checkpoint_path), strict=False)
72
+ self._model = self._model.to(self._device)
73
+ hlog(f"Loaded model to {self._device}.")
74
+
75
+ def make_request(self, request: Request) -> RequestResult:
76
+ assert request.multimodal_prompt is not None, "Multimodal prompt is required"
77
+
78
+ # Load model if needed
79
+ if self._model is None:
80
+ self._get_model()
81
+
82
+ # Build the prompt
83
+ prompt_text: str = ""
84
+ images: List[Image.Image] = []
85
+ for media_object in request.multimodal_prompt.media_objects:
86
+ if media_object.is_type("image") and media_object.location:
87
+ images.append(open_image(media_object.location))
88
+ prompt_text += self.IMAGE_TOKEN
89
+ elif media_object.is_type(TEXT_TYPE):
90
+ if media_object.text is None:
91
+ raise ValueError("MediaObject of text type has missing text field value")
92
+ prompt_text += media_object.text
93
+ else:
94
+ raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
95
+
96
+ # Preprocess
97
+ vision_x: torch.Tensor = torch.cat([self.image_processor(image).unsqueeze(0) for image in images], dim=0)
98
+ vision_x = vision_x.unsqueeze(1).unsqueeze(0)
99
+ lang_x = self.tokenizer([prompt_text], return_tensors="pt")
100
+
101
+ # Generate
102
+ try:
103
+ generation_args = {
104
+ "max_new_tokens": request.max_tokens,
105
+ "n": request.num_completions,
106
+ }
107
+
108
+ def do_it():
109
+ tensors = self._model.generate(
110
+ vision_x=vision_x.to(self._device),
111
+ lang_x=lang_x["input_ids"].to(self._device),
112
+ attention_mask=lang_x["attention_mask"].to(self._device),
113
+ max_new_tokens=generation_args["max_new_tokens"],
114
+ num_beams=generation_args["n"],
115
+ num_return_sequences=generation_args["n"],
116
+ )
117
+ generated_completions: List[Tuple[str, List[str]]] = []
118
+ for tensor in tensors:
119
+ generated_text: str = self.tokenizer.decode(tensor)
120
+ raw_tokens: List[str] = self.tokenizer.tokenize(generated_text)
121
+ generated_completions.append((generated_text, raw_tokens))
122
+
123
+ return {"output": generated_completions}
124
+
125
+ cache_key = CachingClient.make_cache_key(
126
+ raw_request={
127
+ "model": request.model,
128
+ "prompt": generate_uid_for_multimodal_prompt(request.multimodal_prompt),
129
+ **generation_args,
130
+ },
131
+ request=request,
132
+ )
133
+ result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
134
+ except RuntimeError as ex:
135
+ return RequestResult(success=False, cached=False, error=str(ex), completions=[], embedding=[])
136
+
137
+ completions: List[GeneratedOutput] = []
138
+ for text, tokens in result["output"]:
139
+ # Remove the prompt from the generated text
140
+ text = (
141
+ text[len(prompt_text) :].replace(self.END_OF_CHUNK_TOKEN, "").strip()
142
+ if len(text) >= len(prompt_text)
143
+ else text[-1]
144
+ )
145
+ completions.append(
146
+ GeneratedOutput(text=text, logprob=0, tokens=[Token(text=token, logprob=0) for token in tokens])
147
+ )
148
+
149
+ return RequestResult(
150
+ success=True,
151
+ cached=cached,
152
+ request_time=result["request_time"],
153
+ completions=completions,
154
+ embedding=[],
155
+ )