crfm-helm 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (482) hide show
  1. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +134 -31
  2. crfm_helm-0.5.0.dist-info/RECORD +642 -0
  3. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +31 -3
  5. helm/benchmark/adaptation/adapters/adapter.py +2 -2
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
  9. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
  10. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  11. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
  12. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  13. helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
  14. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
  15. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
  16. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  17. helm/benchmark/adaptation/request_state.py +6 -1
  18. helm/benchmark/adaptation/scenario_state.py +6 -2
  19. helm/benchmark/annotation/annotator.py +43 -0
  20. helm/benchmark/annotation/annotator_factory.py +61 -0
  21. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  22. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  23. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  24. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  25. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  26. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  27. helm/benchmark/annotation_executor.py +124 -0
  28. helm/benchmark/augmentations/data_augmenter.py +0 -2
  29. helm/benchmark/augmentations/gender_perturbation.py +1 -1
  30. helm/benchmark/augmentations/perturbation.py +8 -2
  31. helm/benchmark/augmentations/perturbation_description.py +1 -1
  32. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  33. helm/benchmark/augmentations/test_perturbation.py +11 -7
  34. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  35. helm/benchmark/config_registry.py +7 -1
  36. helm/benchmark/executor.py +46 -16
  37. helm/benchmark/huggingface_registration.py +20 -7
  38. helm/benchmark/metrics/basic_metrics.py +169 -664
  39. helm/benchmark/metrics/bbq_metrics.py +3 -4
  40. helm/benchmark/metrics/bias_metrics.py +6 -6
  41. helm/benchmark/metrics/classification_metrics.py +11 -8
  42. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  43. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  44. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  45. helm/benchmark/metrics/common_metric_specs.py +167 -0
  46. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  47. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  48. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  49. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  50. helm/benchmark/metrics/disinformation_metrics.py +4 -110
  51. helm/benchmark/metrics/dry_run_metrics.py +2 -2
  52. helm/benchmark/metrics/efficiency_metrics.py +206 -0
  53. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  54. helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
  55. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  56. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  57. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  58. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  59. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  60. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  61. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  62. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  63. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  64. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  65. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  66. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  67. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  68. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  69. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  70. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  71. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  72. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  73. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  74. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  75. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  76. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  77. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  78. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  79. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  80. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  81. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  82. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  83. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  84. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  85. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  86. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  87. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  88. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  89. helm/benchmark/metrics/machine_translation_metrics.py +89 -0
  90. helm/benchmark/metrics/metric.py +93 -172
  91. helm/benchmark/metrics/metric_name.py +0 -1
  92. helm/benchmark/metrics/metric_service.py +16 -0
  93. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  94. helm/benchmark/metrics/ranking_metrics.py +2 -2
  95. helm/benchmark/metrics/reference_metric.py +148 -0
  96. helm/benchmark/metrics/summac/model_summac.py +0 -2
  97. helm/benchmark/metrics/summarization_metrics.py +2 -2
  98. helm/benchmark/metrics/test_classification_metrics.py +8 -5
  99. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  100. helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
  101. helm/benchmark/metrics/test_metric.py +2 -2
  102. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
  103. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  104. helm/benchmark/metrics/toxicity_utils.py +23 -0
  105. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  106. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  107. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  108. helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
  109. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  110. helm/benchmark/model_deployment_registry.py +74 -0
  111. helm/benchmark/model_metadata_registry.py +36 -0
  112. helm/benchmark/multi_gpu_runner.py +133 -0
  113. helm/benchmark/presentation/create_plots.py +8 -7
  114. helm/benchmark/presentation/run_display.py +26 -10
  115. helm/benchmark/presentation/schema.py +15 -40
  116. helm/benchmark/presentation/summarize.py +119 -79
  117. helm/benchmark/presentation/table.py +8 -8
  118. helm/benchmark/presentation/test_contamination.py +2 -2
  119. helm/benchmark/presentation/test_run_entry.py +1 -2
  120. helm/benchmark/presentation/test_summarize.py +3 -3
  121. helm/benchmark/run.py +54 -26
  122. helm/benchmark/run_expander.py +214 -16
  123. helm/benchmark/run_spec.py +93 -0
  124. helm/benchmark/run_spec_factory.py +162 -0
  125. helm/benchmark/run_specs/__init__.py +0 -0
  126. helm/benchmark/run_specs/classic_run_specs.py +1510 -0
  127. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  128. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  129. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  130. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  131. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  132. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  133. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  134. helm/benchmark/run_specs/vlm_run_specs.py +501 -0
  135. helm/benchmark/runner.py +51 -57
  136. helm/benchmark/runner_config_registry.py +21 -0
  137. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  138. helm/benchmark/scenarios/bold_scenario.py +2 -2
  139. helm/benchmark/scenarios/code_scenario.py +1 -0
  140. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  141. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  142. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  143. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  144. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  145. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  146. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  147. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  148. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  149. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  150. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  151. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  152. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  153. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  154. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  155. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  156. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  157. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  158. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  159. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  160. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  161. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  162. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  163. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  164. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  165. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  166. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  167. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  168. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  169. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  170. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  171. helm/benchmark/scenarios/math_scenario.py +19 -2
  172. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  173. helm/benchmark/scenarios/numeracy_scenario.py +1 -1
  174. helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
  175. helm/benchmark/scenarios/scenario.py +4 -0
  176. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  177. helm/benchmark/scenarios/test_math_scenario.py +6 -0
  178. helm/benchmark/scenarios/test_scenario.py +6 -3
  179. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  180. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  181. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  182. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  183. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  184. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  185. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
  186. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  187. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  188. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  189. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  190. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  191. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  192. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  193. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  194. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  195. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  196. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  197. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  198. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  199. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  200. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  201. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  202. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  203. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  204. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  205. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -2
  206. helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
  207. helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
  208. helm/benchmark/server.py +24 -1
  209. helm/benchmark/slurm_runner.py +70 -49
  210. helm/benchmark/static/benchmarking.js +1 -1
  211. helm/benchmark/static/schema_classic.yaml +258 -1066
  212. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  213. helm/benchmark/static/schema_lite.yaml +2 -227
  214. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  215. helm/benchmark/static/schema_unitxt.yaml +428 -0
  216. helm/benchmark/static/schema_vlm.yaml +576 -0
  217. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  218. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  219. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  220. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  221. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  222. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  223. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  224. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  225. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  226. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  227. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  228. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  229. helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
  230. helm/benchmark/static_build/assets/index-d839df55.js +9 -0
  231. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  232. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  233. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  234. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  235. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  236. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  237. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  238. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  239. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  240. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  241. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  242. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  243. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  244. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  245. helm/benchmark/static_build/config.js +4 -0
  246. helm/benchmark/static_build/index.html +20 -0
  247. helm/benchmark/test_data_preprocessor.py +3 -3
  248. helm/benchmark/test_model_deployment_definition.py +14 -16
  249. helm/benchmark/test_run_expander.py +1 -1
  250. helm/benchmark/window_services/ai21_window_service.py +22 -33
  251. helm/benchmark/window_services/cohere_window_service.py +1 -63
  252. helm/benchmark/window_services/default_window_service.py +2 -44
  253. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  254. helm/benchmark/window_services/ice_window_service.py +0 -34
  255. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  256. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  257. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  258. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  259. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  260. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  261. helm/benchmark/window_services/local_window_service.py +21 -4
  262. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  263. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  264. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  265. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  266. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  267. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  268. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  269. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  270. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  271. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  272. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  273. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  274. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  275. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  276. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  277. helm/benchmark/window_services/test_utils.py +3 -2
  278. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  279. helm/benchmark/window_services/window_service.py +42 -0
  280. helm/benchmark/window_services/window_service_factory.py +4 -1
  281. helm/benchmark/window_services/yalm_window_service.py +0 -27
  282. helm/clients/__init__.py +0 -0
  283. helm/{proxy/clients → clients}/ai21_client.py +3 -9
  284. helm/clients/aleph_alpha_client.py +112 -0
  285. helm/{proxy/clients → clients}/anthropic_client.py +203 -18
  286. helm/{proxy/clients → clients}/auto_client.py +59 -31
  287. helm/clients/bedrock_client.py +128 -0
  288. helm/clients/bedrock_utils.py +72 -0
  289. helm/{proxy/clients → clients}/client.py +65 -7
  290. helm/clients/clip_score_client.py +49 -0
  291. helm/clients/clip_scorers/__init__.py +0 -0
  292. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  293. helm/clients/clip_scorers/clip_scorer.py +50 -0
  294. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  295. helm/{proxy/clients → clients}/cohere_client.py +4 -11
  296. helm/clients/gcs_client.py +82 -0
  297. helm/{proxy/clients → clients}/google_client.py +5 -5
  298. helm/clients/google_translate_client.py +35 -0
  299. helm/{proxy/clients → clients}/http_model_client.py +5 -7
  300. helm/{proxy/clients → clients}/huggingface_client.py +43 -64
  301. helm/clients/image_generation/__init__.py +0 -0
  302. helm/clients/image_generation/adobe_vision_client.py +78 -0
  303. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  304. helm/clients/image_generation/cogview2/__init__.py +0 -0
  305. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  306. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  307. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  308. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  309. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  310. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  311. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  312. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  313. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  314. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  315. helm/clients/image_generation/cogview2_client.py +191 -0
  316. helm/clients/image_generation/dalle2_client.py +192 -0
  317. helm/clients/image_generation/dalle3_client.py +108 -0
  318. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  319. helm/clients/image_generation/dalle_mini/data.py +442 -0
  320. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  321. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  322. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  323. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  324. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  325. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  326. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  327. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  328. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  329. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  330. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  331. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  332. helm/clients/image_generation/dalle_mini_client.py +190 -0
  333. helm/clients/image_generation/deep_floyd_client.py +78 -0
  334. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  335. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  336. helm/clients/image_generation/lexica_client.py +86 -0
  337. helm/clients/image_generation/mindalle/__init__.py +0 -0
  338. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  339. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  340. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  341. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  342. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  343. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  344. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  345. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  346. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  347. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  348. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  349. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  350. helm/clients/image_generation/mindalle_client.py +115 -0
  351. helm/clients/image_generation/nudity_check_client.py +64 -0
  352. helm/clients/image_generation/together_image_generation_client.py +111 -0
  353. helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
  354. helm/{proxy/clients → clients}/megatron_client.py +5 -5
  355. helm/clients/mistral_client.py +134 -0
  356. helm/clients/moderation_api_client.py +109 -0
  357. helm/clients/open_lm_client.py +43 -0
  358. helm/clients/openai_client.py +302 -0
  359. helm/{proxy/clients → clients}/palmyra_client.py +6 -8
  360. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  361. helm/clients/simple_client.py +64 -0
  362. helm/{proxy/clients → clients}/test_auto_client.py +13 -15
  363. helm/clients/test_client.py +100 -0
  364. helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
  365. helm/clients/test_simple_client.py +19 -0
  366. helm/{proxy/clients → clients}/test_together_client.py +20 -8
  367. helm/{proxy/clients → clients}/together_client.py +12 -72
  368. helm/clients/vertexai_client.py +391 -0
  369. helm/clients/vision_language/__init__.py +0 -0
  370. helm/clients/vision_language/huggingface_vlm_client.py +104 -0
  371. helm/{proxy/clients → clients}/vision_language/idefics_client.py +53 -48
  372. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  373. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  374. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  375. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  376. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  377. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  378. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  379. helm/clients/vision_language/open_flamingo_client.py +155 -0
  380. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  381. helm/clients/vllm_client.py +46 -0
  382. helm/common/cache.py +16 -4
  383. helm/common/cache_backend_config.py +47 -0
  384. helm/common/clip_score_request.py +41 -0
  385. helm/common/file_caches/__init__.py +0 -0
  386. helm/common/file_caches/file_cache.py +16 -0
  387. helm/common/file_caches/local_file_cache.py +61 -0
  388. helm/common/file_caches/test_local_file_cache.py +25 -0
  389. helm/common/file_upload_request.py +27 -0
  390. helm/common/general.py +1 -1
  391. helm/common/image_generation_parameters.py +25 -0
  392. helm/common/images_utils.py +24 -1
  393. helm/common/key_value_store.py +35 -4
  394. helm/common/media_object.py +13 -0
  395. helm/common/moderations_api_request.py +71 -0
  396. helm/common/mongo_key_value_store.py +3 -3
  397. helm/common/multimodal_request_utils.py +31 -0
  398. helm/common/nudity_check_request.py +29 -0
  399. helm/common/request.py +15 -17
  400. helm/common/test_general.py +6 -0
  401. helm/common/tokenization_request.py +1 -1
  402. helm/config/model_deployments.yaml +1069 -546
  403. helm/config/model_metadata.yaml +753 -31
  404. helm/config/tokenizer_configs.yaml +142 -43
  405. helm/proxy/accounts.py +31 -4
  406. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  407. helm/proxy/critique/model_critique_client.py +8 -6
  408. helm/proxy/example_queries.py +29 -17
  409. helm/proxy/server.py +70 -5
  410. helm/proxy/services/remote_service.py +31 -0
  411. helm/proxy/services/server_service.py +96 -16
  412. helm/proxy/services/service.py +30 -0
  413. helm/proxy/services/test_remote_service.py +4 -3
  414. helm/proxy/services/test_service.py +0 -12
  415. helm/proxy/test_accounts.py +32 -0
  416. helm/proxy/token_counters/auto_token_counter.py +37 -37
  417. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  418. helm/proxy/token_counters/token_counter.py +3 -5
  419. helm/tokenizers/__init__.py +0 -0
  420. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  421. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
  422. helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
  423. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  424. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  425. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
  426. helm/tokenizers/simple_tokenizer.py +33 -0
  427. helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
  428. helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
  429. helm/tokenizers/test_simple_tokenizer.py +33 -0
  430. helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
  431. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  432. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  433. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  434. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  435. crfm_helm-0.4.0.dist-info/RECORD +0 -397
  436. helm/benchmark/run_specs.py +0 -2762
  437. helm/benchmark/test_model_properties.py +0 -1570
  438. helm/benchmark/vlm_run_specs.py +0 -97
  439. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  440. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  441. helm/benchmark/window_services/huggingface_window_service.py +0 -60
  442. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  443. helm/benchmark/window_services/t511b_window_service.py +0 -30
  444. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  445. helm/benchmark/window_services/ul2_window_service.py +0 -30
  446. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  447. helm/common/cache_utils.py +0 -14
  448. helm/proxy/clients/aleph_alpha_client.py +0 -95
  449. helm/proxy/clients/goose_ai_client.py +0 -99
  450. helm/proxy/clients/microsoft_client.py +0 -180
  451. helm/proxy/clients/openai_client.py +0 -206
  452. helm/proxy/clients/simple_client.py +0 -60
  453. helm/proxy/clients/test_client.py +0 -49
  454. helm/proxy/clients/vertexai_client.py +0 -115
  455. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  456. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  457. helm/proxy/token_counters/free_token_counter.py +0 -12
  458. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  459. helm/proxy/token_counters/openai_token_counter.py +0 -22
  460. helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
  461. helm/proxy/token_counters/test_openai_token_counter.py +0 -81
  462. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  463. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
  464. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
  465. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
  466. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  467. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  468. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  469. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  470. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  471. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  472. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  473. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  474. /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
  475. /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
  476. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  477. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  478. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  479. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  480. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  481. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  482. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -10,129 +10,155 @@
10
10
 
11
11
  tokenizer_configs:
12
12
 
13
- - name: simple/model1
13
+ - name: simple/tokenizer1
14
14
  tokenizer_spec:
15
- class_name: "helm.proxy.tokenizers.simple_tokenizer.SimpleTokenizer"
15
+ class_name: "helm.tokenizers.simple_tokenizer.SimpleTokenizer"
16
16
  end_of_text_token: "</s>"
17
17
  prefix_token: "<s>"
18
18
 
19
19
  # AI21
20
20
  - name: ai21/j1
21
21
  tokenizer_spec:
22
- class_name: "helm.proxy.tokenizers.ai21_tokenizer.AI21Tokenizer"
22
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21Tokenizer"
23
23
  end_of_text_token: " "
24
24
  prefix_token: ""
25
25
 
26
26
  # AlephAlpha
27
27
  - name: AlephAlpha/luminous-base
28
28
  tokenizer_spec:
29
- class_name: "helm.proxy.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
29
+ class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
30
30
  end_of_text_token: ""
31
31
  prefix_token: ""
32
32
  - name: AlephAlpha/luminous-extended
33
33
  tokenizer_spec:
34
- class_name: "helm.proxy.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
34
+ class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
35
35
  end_of_text_token: ""
36
36
  prefix_token: ""
37
37
  - name: AlephAlpha/luminous-supreme
38
38
  tokenizer_spec:
39
- class_name: "helm.proxy.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
39
+ class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
40
40
  end_of_text_token: ""
41
41
  prefix_token: ""
42
42
  - name: AlephAlpha/luminous-world
43
43
  tokenizer_spec:
44
- class_name: "helm.proxy.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
44
+ class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
45
45
  end_of_text_token: ""
46
46
  prefix_token: ""
47
47
 
48
48
  # Anthropic
49
49
  - name: anthropic/claude
50
50
  tokenizer_spec:
51
- class_name: "helm.proxy.tokenizers.anthropic_tokenizer.AnthropicTokenizer"
51
+ class_name: "helm.tokenizers.anthropic_tokenizer.AnthropicTokenizer"
52
52
  end_of_text_token: "<|endoftext|>"
53
53
  prefix_token: "<|endoftext|>"
54
54
 
55
55
  # Bigcode
56
56
  - name: bigcode/santacoder
57
57
  tokenizer_spec:
58
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
58
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
59
59
  end_of_text_token: "<|endoftext|>"
60
60
  prefix_token: "<|endoftext|>"
61
61
  - name: bigcode/starcoder
62
62
  tokenizer_spec:
63
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
63
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
64
64
  end_of_text_token: "<|endoftext|>"
65
65
  prefix_token: "<|endoftext|>"
66
66
 
67
67
  # Bigscience
68
68
  - name: bigscience/bloom
69
69
  tokenizer_spec:
70
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
70
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
71
71
  end_of_text_token: "</s>"
72
72
  prefix_token: "</s>"
73
73
  - name: bigscience/T0pp
74
74
  tokenizer_spec:
75
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
75
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
76
76
  end_of_text_token: "</s>"
77
77
  prefix_token: ""
78
78
 
79
79
  # Cohere
80
80
  - name: cohere/cohere
81
81
  tokenizer_spec:
82
- class_name: "helm.proxy.tokenizers.cohere_tokenizer.CohereTokenizer"
82
+ class_name: "helm.tokenizers.cohere_tokenizer.CohereTokenizer"
83
83
  end_of_text_token: ""
84
84
  prefix_token: ":"
85
85
 
86
+ # Databricks
87
+ - name: databricks/dbrx-instruct
88
+ tokenizer_spec:
89
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
90
+ end_of_text_token: "<|endoftext|>"
91
+ prefix_token: "<|endoftext|>"
92
+
93
+ # DeepSeek
94
+ - name: deepseek-ai/deepseek-llm-67b-chat
95
+ tokenizer_spec:
96
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
97
+ end_of_text_token: "<|end▁of▁sentence|>"
98
+ prefix_token: "<|begin▁of▁sentence|>"
99
+
86
100
  # EleutherAI
87
101
  - name: EleutherAI/gpt-j-6B
88
102
  tokenizer_spec:
89
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
103
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
90
104
  end_of_text_token: "<|endoftext|>"
91
105
  prefix_token: "<|endoftext|>"
92
106
  - name: EleutherAI/gpt-neox-20b
93
107
  tokenizer_spec:
94
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
108
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
95
109
  end_of_text_token: "<|endoftext|>"
96
110
  prefix_token: "<|endoftext|>"
97
111
 
98
112
  # Facebook
99
113
  - name: facebook/opt-66b
100
114
  tokenizer_spec:
101
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
115
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
102
116
  end_of_text_token: "</s>"
103
117
  prefix_token: "</s>"
104
118
 
105
119
  # Google
106
120
  - name: google/t5-11b
107
121
  tokenizer_spec:
108
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
122
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
123
+ args:
124
+ pretrained_model_name_or_path: google-t5/t5-11b
109
125
  end_of_text_token: "</s>"
110
126
  prefix_token: ""
111
127
  - name: google/flan-t5-xxl
112
128
  tokenizer_spec:
113
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
129
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
114
130
  end_of_text_token: "</s>"
115
131
  prefix_token: ""
116
132
  - name: google/ul2
117
133
  tokenizer_spec:
118
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
134
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
119
135
  end_of_text_token: "</s>"
120
136
  prefix_token: ""
121
137
  - name: google/mt5-base
122
138
  tokenizer_spec:
123
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
139
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
124
140
  end_of_text_token: "</s>"
125
141
  prefix_token: ""
126
142
  - name: google/text-bison@001
127
143
  tokenizer_spec:
128
- class_name: "helm.proxy.tokenizers.vertexai_tokenizer.VertexAITokenizer"
144
+ class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
145
+ end_of_text_token: "</s>"
146
+ prefix_token: ""
147
+ - name: google/text-bison@002
148
+ tokenizer_spec:
149
+ class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
129
150
  end_of_text_token: "</s>"
130
151
  prefix_token: ""
131
152
  - name: google/text-unicorn@001
132
153
  tokenizer_spec:
133
- class_name: "helm.proxy.tokenizers.vertexai_tokenizer.VertexAITokenizer"
154
+ class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
134
155
  end_of_text_token: "</s>"
135
156
  prefix_token: ""
157
+ - name: google/gemma-2b
158
+ tokenizer_spec:
159
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
160
+ end_of_text_token: "<eos>"
161
+ prefix_token: "<bos>"
136
162
 
137
163
  # Hf-internal-testing
138
164
 
@@ -140,43 +166,51 @@ tokenizer_configs:
140
166
  # https://huggingface.co/docs/transformers/main/en/model_doc/llama#transformers.LlamaTokenizerFast.example
141
167
  - name: hf-internal-testing/llama-tokenizer
142
168
  tokenizer_spec:
143
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
169
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
144
170
  end_of_text_token: "</s>"
145
171
  prefix_token: "<s>"
146
172
 
147
173
  # HuggingFaceM4
148
174
  - name: HuggingFaceM4/idefics-9b
149
175
  tokenizer_spec:
150
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
176
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
151
177
  end_of_text_token: "</s>"
152
178
  prefix_token: "<s>"
153
179
  - name: HuggingFaceM4/idefics-9b-instruct
154
180
  tokenizer_spec:
155
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
181
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
156
182
  end_of_text_token: "</s>"
157
183
  prefix_token: "<s>"
158
184
  - name: HuggingFaceM4/idefics-80b
159
185
  tokenizer_spec:
160
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
186
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
161
187
  end_of_text_token: "</s>"
162
188
  prefix_token: "<s>"
163
189
  - name: HuggingFaceM4/idefics-80b-instruct
164
190
  tokenizer_spec:
165
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
191
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
166
192
  end_of_text_token: "</s>"
167
193
  prefix_token: "<s>"
194
+
195
+ - name: anas-awadalla/mpt-7b
196
+ tokenizer_spec:
197
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
198
+ end_of_text_token: "<|endoftext|>"
199
+ prefix_token: ""
168
200
 
169
201
  # Huggingface
170
202
  - name: huggingface/gpt2
171
203
  tokenizer_spec:
172
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
204
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
205
+ args:
206
+ pretrained_model_name_or_path: openai-community/gpt2
173
207
  end_of_text_token: "<|endoftext|>"
174
208
  prefix_token: "<|endoftext|>"
175
209
 
176
210
  # Lighting AI
177
211
  - name: lightningai/lit-gpt
178
212
  tokenizer_spec:
179
- class_name: "helm.proxy.tokenizers.lit_gpt_tokenizer.LitGPTTokenizer"
213
+ class_name: "helm.tokenizers.lit_gpt_tokenizer.LitGPTTokenizer"
180
214
  end_of_text_token: "<|endoftext|>"
181
215
  prefix_token: "<|endoftext|>"
182
216
 
@@ -194,70 +228,135 @@ tokenizer_configs:
194
228
  # 'https://huggingface.co/models'
195
229
  - name: meta-llama/Llama-2-7b-hf
196
230
  tokenizer_spec:
197
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
231
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
198
232
  end_of_text_token: "</s>"
199
233
  prefix_token: "<s>"
200
234
 
235
+ - name: meta/llama-3-8b
236
+ tokenizer_spec:
237
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
238
+ args:
239
+ pretrained_model_name_or_path: meta-llama/Meta-Llama-3-8B
240
+ prefix_token: "<|begin_of_text|>"
241
+ end_of_text_token: "<|end_of_text|>"
201
242
 
202
243
  # 01-ai
203
244
  - name: 01-ai/Yi-6B
204
245
  tokenizer_spec:
205
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
246
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
206
247
  end_of_text_token: "</s>"
207
248
  prefix_token: "<s>"
208
249
 
209
- # Microsoft
210
- - name: microsoft/gpt2
211
- tokenizer_spec:
212
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
250
+
251
+ # Allen Institute for AI
252
+ # The allenai/olmo-7b requires Python 3.9 or newer.
253
+ # To use the allenai/olmo-7b tokenizer, run `pip install crfm-helm[allenai]` first.
254
+ - name: allenai/olmo-7b
255
+ tokenizer_spec:
256
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
257
+ args:
258
+ trust_remote_code: true
213
259
  end_of_text_token: "<|endoftext|>"
214
- prefix_token: "<<"
260
+ prefix_token: ""
261
+
262
+
263
+ # Microsoft
264
+ - name: microsoft/phi-2
265
+ tokenizer_spec:
266
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
267
+ end_of_text_token: "<|endoftext|>"
268
+ prefix_token: "<|endoftext|>"
215
269
 
216
270
  # Mistralai
217
271
  - name: mistralai/Mistral-7B-v0.1
218
272
  tokenizer_spec:
219
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
273
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
220
274
  end_of_text_token: "</s>"
221
275
  prefix_token: "<s>"
222
276
 
223
277
  # Neurips
224
278
  - name: neurips/local
225
279
  tokenizer_spec:
226
- class_name: "helm.proxy.tokenizers.http_model_tokenizer.HTTPModelTokenizer"
280
+ class_name: "helm.tokenizers.http_model_tokenizer.HTTPModelTokenizer"
227
281
  end_of_text_token: "<|endoftext|>"
228
282
  prefix_token: "<|endoftext|>"
229
283
 
230
284
  # Openai
231
285
  - name: openai/cl100k_base
232
286
  tokenizer_spec:
233
- class_name: "helm.proxy.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
287
+ class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
234
288
  end_of_text_token: "<|endoftext|>"
235
289
  prefix_token: "<|endoftext|>"
236
290
 
291
+ - name: openai/clip-vit-large-patch14
292
+ tokenizer_spec:
293
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
294
+ end_of_text_token: ""
295
+ prefix_token: ""
296
+
297
+ - name: qwen/qwen-7b
298
+ tokenizer_spec:
299
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
300
+ args:
301
+ pretrained_model_name_or_path: Qwen/Qwen-7B
302
+ trust_remote_code: true
303
+ end_of_text_token: "<|endoftext|>"
304
+ prefix_token: ""
305
+
306
+ - name: qwen/qwen1.5-7b
307
+ tokenizer_spec:
308
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
309
+ args:
310
+ pretrained_model_name_or_path: Qwen/Qwen1.5-7B
311
+ end_of_text_token: "<|endoftext|>"
312
+ prefix_token: ""
313
+
314
+ - name: qwen/qwen-vl
315
+ tokenizer_spec:
316
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
317
+ args:
318
+ pretrained_model_name_or_path: Qwen/Qwen-VL
319
+ trust_remote_code: true
320
+ # Source: https://github.com/QwenLM/Qwen-VL
321
+ end_of_text_token: "<|endoftext|>"
322
+ prefix_token: ""
323
+
324
+ - name: qwen/qwen-vl-chat
325
+ tokenizer_spec:
326
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
327
+ args:
328
+ pretrained_model_name_or_path: Qwen/Qwen-VL-Chat
329
+ trust_remote_code: true
330
+ # Source: https://github.com/QwenLM/Qwen-VL
331
+ end_of_text_token: "<|endoftext|>"
332
+ prefix_token: ""
333
+
237
334
  # Tiiuae
238
335
  - name: tiiuae/falcon-7b
239
336
  tokenizer_spec:
240
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
337
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
241
338
  end_of_text_token: "<|endoftext|>"
242
339
  prefix_token: ""
243
340
 
244
341
  # TsinghuaKEG
245
342
  - name: TsinghuaKEG/ice
246
343
  tokenizer_spec:
247
- class_name: "helm.proxy.tokenizers.ice_tokenizer.ICETokenizer"
344
+ class_name: "helm.tokenizers.ice_tokenizer.ICETokenizer"
248
345
  end_of_text_token: "</s>"
249
346
  prefix_token: ""
250
347
 
251
348
  # Writer
252
349
  - name: writer/gpt2
253
350
  tokenizer_spec:
254
- class_name: "helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
351
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
352
+ args:
353
+ pretrained_model_name_or_path: openai-community/gpt2
255
354
  end_of_text_token: ""
256
355
  prefix_token: ""
257
356
 
258
357
  # Yandex
259
358
  - name: Yandex/yalm
260
359
  tokenizer_spec:
261
- class_name: "helm.proxy.tokenizers.yalm_tokenizer.YaLMTokenizer"
360
+ class_name: "helm.tokenizers.yalm_tokenizer.YaLMTokenizer"
262
361
  end_of_text_token: "</s>"
263
362
  prefix_token: "</s>"
helm/proxy/accounts.py CHANGED
@@ -23,6 +23,9 @@ DEFAULT_QUOTAS = {
23
23
  "jurassic": {"daily": 10000},
24
24
  "gooseai": {"daily": 10000},
25
25
  "cohere": {"daily": 10000},
26
+ "dall_e": {"daily": 5}, # In terms of the number of generated images
27
+ "together_vision": {"daily": 30},
28
+ "simple": {"daily": 10000},
26
29
  }
27
30
 
28
31
 
@@ -303,7 +306,7 @@ class Accounts:
303
306
  model_group: str,
304
307
  granularity: str,
305
308
  compute_period: Callable[[], str],
306
- ):
309
+ ) -> None:
307
310
  """Helper that checks the usage at a certain granularity (e.g., daily, monthly, total)."""
308
311
 
309
312
  model_group_usages = account.usages.get(model_group)
@@ -321,14 +324,38 @@ class Accounts:
321
324
  if not usage.can_use():
322
325
  raise InsufficientQuotaError(f"{granularity} quota ({usage.quota}) for {model_group} already used up")
323
326
 
327
+ def check_non_empty_quota(
328
+ account: Account,
329
+ model_group: str,
330
+ ) -> None:
331
+ """Helper that checks that the account has quota at some granularity.
332
+
333
+ At each granularity, a quota of None means unlimited quota.
334
+ However, if the quota is None at every granularity, it means that there is no quota.
335
+ To enforce this rule, this helper raises a InsufficientQuotaError if the quota is None
336
+ at every granularity."""
337
+ model_group_usages = account.usages.get(model_group)
338
+ if model_group_usages is None:
339
+ raise InsufficientQuotaError(f"No quota for {model_group}")
340
+ if all(
341
+ [
342
+ granularity_usage.quota is None or granularity_usage.quota <= 0
343
+ for granularity_usage in model_group_usages.values()
344
+ ]
345
+ ):
346
+ raise InsufficientQuotaError(f"No quota for {model_group}")
347
+
324
348
  if self.root_mode:
325
349
  return
326
350
 
327
351
  with SqliteDict(self.path) as cache:
328
352
  account: Account = from_dict(Account, cache[api_key])
329
- granular_check_can_use(account, model_group, "daily", compute_daily_period)
330
- granular_check_can_use(account, model_group, "monthly", compute_monthly_period)
331
- granular_check_can_use(account, model_group, "total", compute_total_period)
353
+ if account.is_admin:
354
+ return
355
+ granular_check_can_use(account, model_group, "daily", compute_daily_period)
356
+ granular_check_can_use(account, model_group, "monthly", compute_monthly_period)
357
+ granular_check_can_use(account, model_group, "total", compute_total_period)
358
+ check_non_empty_quota(account, model_group)
332
359
 
333
360
  def use(self, api_key: str, model_group: str, delta: int):
334
361
  """
@@ -4,6 +4,7 @@ import os
4
4
  from threading import Lock
5
5
  from typing import Dict, List, Optional, Tuple, Union
6
6
  import re
7
+ import sys
7
8
 
8
9
  from helm.common.critique_request import (
9
10
  CritiqueRequest,
@@ -15,6 +16,8 @@ from helm.common.critique_request import (
15
16
  from helm.common.hierarchical_logger import hlog
16
17
  from helm.proxy.critique.mechanical_turk_utils import replace_emoji_characters
17
18
 
19
+ csv.field_size_limit(sys.maxsize)
20
+
18
21
  # A representation of fields that can be used as a dict key.
19
22
  _CritiqueRequestKey = Tuple[Tuple[str, str], ...]
20
23
 
@@ -2,7 +2,7 @@ from typing import Dict, List, Union, Optional
2
2
  import string
3
3
  import dataclasses
4
4
 
5
- from helm.benchmark.run_specs import get_default_model_deployment_for_model
5
+ from helm.benchmark.run_spec_factory import get_default_model_deployment_for_model
6
6
  from helm.common.critique_request import (
7
7
  CritiqueRequest,
8
8
  CritiqueRequestResult,
@@ -12,8 +12,8 @@ from helm.common.critique_request import (
12
12
  )
13
13
  from helm.common.hierarchical_logger import hlog
14
14
  from helm.common.optional_dependencies import handle_module_not_found_error
15
- from helm.common.request import Request, RequestResult, Sequence
16
- from helm.proxy.clients.client import Client
15
+ from helm.common.request import Request, RequestResult, GeneratedOutput
16
+ from helm.clients.client import Client
17
17
  from helm.proxy.critique.critique_client import CritiqueClient
18
18
 
19
19
 
@@ -114,7 +114,7 @@ class ModelCritiqueClient(CritiqueClient):
114
114
  return answers
115
115
 
116
116
  def _multiple_choice_completion_to_answer(
117
- self, question: CritiqueQuestionTemplate, completion: Sequence
117
+ self, question: CritiqueQuestionTemplate, completion: GeneratedOutput
118
118
  ) -> Optional[str]:
119
119
  """Convert a multiple choice completion to an answer."""
120
120
  assert question.question_type == "multiple_choice"
@@ -131,7 +131,7 @@ class ModelCritiqueClient(CritiqueClient):
131
131
  return None
132
132
 
133
133
  def _checkbox_completion_to_answer(
134
- self, question: CritiqueQuestionTemplate, completion: Sequence
134
+ self, question: CritiqueQuestionTemplate, completion: GeneratedOutput
135
135
  ) -> Optional[List[str]]:
136
136
  """Convert a checkbox completion to an answer."""
137
137
  assert question.question_type == "checkbox"
@@ -147,7 +147,9 @@ class ModelCritiqueClient(CritiqueClient):
147
147
  hlog(f"Error parsing answer: {e}. Skipping question (and so the respondent entirely)")
148
148
  return None
149
149
 
150
- def _free_response_completion_to_answer(self, question: CritiqueQuestionTemplate, completion: Sequence) -> str:
150
+ def _free_response_completion_to_answer(
151
+ self, question: CritiqueQuestionTemplate, completion: GeneratedOutput
152
+ ) -> str:
151
153
  """Convert a free response completion to an answer."""
152
154
  assert question.question_type == "free_response"
153
155
  return completion.text
@@ -21,6 +21,8 @@ example_queries = [
21
21
  """
22
22
  temperature: 0.5 # Medium amount of randomness
23
23
  stop_sequences: [.] # Stop when you hit a period
24
+ model: openai/gpt-3.5-turbo-0613
25
+ model_deployment: openai/gpt-3.5-turbo-0613
24
26
  """
25
27
  ),
26
28
  environments="",
@@ -31,7 +33,9 @@ example_queries = [
31
33
  """
32
34
  temperature: 0.5 # Medium amount of randomness
33
35
  stop_sequences: [\\n] # Stop when you hit a newline
34
- num_completions: 10 # Generate many samples
36
+ num_completions: 5 # Generate many samples
37
+ model: openai/gpt-3.5-turbo-0613
38
+ model_deployment: openai/gpt-3.5-turbo-0613
35
39
  """
36
40
  ),
37
41
  environments="",
@@ -42,7 +46,9 @@ example_queries = [
42
46
  """
43
47
  echo_prompt: true # Analyze the prompt
44
48
  max_tokens: 0 # Don't generate any more
45
- top_k_per_token: 10 # Show alternatives for each position
49
+ top_k_per_token: 5 # Show alternatives for each position
50
+ model: openai/davinci-002
51
+ model_deployment: openai/davinci-002
46
52
  """
47
53
  ),
48
54
  environments=dedent(""),
@@ -53,6 +59,8 @@ example_queries = [
53
59
  """
54
60
  temperature: 0 # Deterministic
55
61
  max_tokens: 50
62
+ model: openai/gpt-3.5-turbo-0613
63
+ model_deployment: openai/gpt-3.5-turbo-0613
56
64
  """
57
65
  ),
58
66
  environments="",
@@ -63,13 +71,15 @@ example_queries = [
63
71
  """
64
72
  temperature: 0
65
73
  stop_sequences: [.]
66
- model_deployment: ${model_deployment} # Try out multiple models
74
+ # Try out multiple models
75
+ model: ${model}
76
+ model_deployment: ${model}
67
77
  """
68
78
  ),
69
79
  environments=dedent(
70
80
  """
71
81
  occupation: [mathematician, lawyer, doctor]
72
- model_deployment: [openai/davinci, ai21/j1-jumbo]
82
+ model: [openai/gpt-3.5-turbo-0613, openai/gpt-3.5-turbo-1106]
73
83
  """
74
84
  ),
75
85
  ),
@@ -88,12 +98,14 @@ example_queries = [
88
98
  temperature: 0.5
89
99
  stop_sequences: [\\n]
90
100
  num_completions: 5
91
- model_deployment: ${model_deployment} # Try out GPT-3 and Jurassic
101
+ # Try out multiple models
102
+ model: ${model}
103
+ model_deployment: ${model}
92
104
  """
93
105
  ),
94
106
  environments=dedent(
95
107
  """
96
- model_deployment: [openai/davinci, ai21/j1-jumbo]
108
+ model: [openai/gpt-3.5-turbo-0613, openai/gpt-3.5-turbo-1106]
97
109
  """
98
110
  ),
99
111
  ),
@@ -122,20 +134,23 @@ example_queries = [
122
134
  temperature: 0
123
135
  max_tokens: 1
124
136
  top_k_per_token: 4
125
- model_deployment: ${model_deployment} # Try out GPT-3 and Jurassic
137
+ # Try out multiple models
138
+ model: ${model}
139
+ model_deployment: ${model}
126
140
  """
127
141
  ),
128
142
  environments=dedent(
129
143
  """
130
- model_deployment: [openai/davinci, ai21/j1-jumbo]
144
+ model: [openai/gpt-3.5-turbo-0613, openai/gpt-3.5-turbo-1106]
131
145
  """
132
146
  ),
133
147
  ),
134
148
  Query(
135
- prompt="Takes two vectors a and b and returns their Euclidean distance",
149
+ prompt="Write a Python function that takes two vectors a and b and returns their Euclidean distance.",
136
150
  settings=dedent(
137
151
  """
138
- model_deployment: openai/code-davinci-001 # Codex for code generation
152
+ model: openai/gpt-3.5-turbo-0613
153
+ model_deployment: openai/gpt-3.5-turbo-0613
139
154
  """
140
155
  ),
141
156
  environments="",
@@ -144,19 +159,16 @@ example_queries = [
144
159
  prompt="The quick brown fox",
145
160
  settings=dedent(
146
161
  """
147
- model_deployment: ${model_deployment}
148
162
  temperature: 0.3
149
163
  stop_sequences: [\\n]
164
+ # Try out multiple models
165
+ model: ${model}
166
+ model_deployment: ${model}
150
167
  """
151
168
  ),
152
169
  environments=dedent(
153
170
  """
154
- model_deployment: [
155
- "openai/davinci", "openai/text-davinci-002",
156
- "openai/text-davinci-003", "ai21/j1-grande-v2-beta",
157
- "together/gpt-j-6b", "together/gpt-jt-6b-v1",
158
- "together/bloom", "together/opt-175b"
159
- ]
171
+ model: [openai/gpt-3.5-turbo-0613, openai/gpt-3.5-turbo-1106]
160
172
  """
161
173
  ),
162
174
  ),