crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
  2. crfm_helm-0.5.0.dist-info/RECORD +642 -0
  3. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +37 -2
  5. helm/benchmark/adaptation/adapters/adapter.py +4 -42
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
  9. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
  10. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
  11. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
  12. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  13. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  14. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
  15. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
  16. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
  17. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  18. helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
  19. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
  20. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
  21. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  22. helm/benchmark/adaptation/prompt.py +7 -1
  23. helm/benchmark/adaptation/request_state.py +6 -1
  24. helm/benchmark/adaptation/scenario_state.py +6 -2
  25. helm/benchmark/annotation/annotator.py +43 -0
  26. helm/benchmark/annotation/annotator_factory.py +61 -0
  27. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  28. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  29. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  30. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  31. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  32. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  33. helm/benchmark/annotation_executor.py +124 -0
  34. helm/benchmark/augmentations/cleva_perturbation.py +7 -14
  35. helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
  36. helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
  37. helm/benchmark/augmentations/data_augmenter.py +0 -2
  38. helm/benchmark/augmentations/dialect_perturbation.py +2 -2
  39. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  40. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  41. helm/benchmark/augmentations/gender_perturbation.py +3 -3
  42. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  43. helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
  44. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  45. helm/benchmark/augmentations/person_name_perturbation.py +0 -7
  46. helm/benchmark/augmentations/perturbation.py +20 -7
  47. helm/benchmark/augmentations/perturbation_description.py +1 -1
  48. helm/benchmark/augmentations/space_perturbation.py +2 -2
  49. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  50. helm/benchmark/augmentations/synonym_perturbation.py +2 -2
  51. helm/benchmark/augmentations/test_perturbation.py +11 -7
  52. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  53. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  54. helm/benchmark/config_registry.py +38 -0
  55. helm/benchmark/executor.py +46 -16
  56. helm/benchmark/huggingface_registration.py +37 -7
  57. helm/benchmark/metrics/basic_metrics.py +172 -641
  58. helm/benchmark/metrics/bbq_metrics.py +3 -4
  59. helm/benchmark/metrics/bias_metrics.py +6 -6
  60. helm/benchmark/metrics/classification_metrics.py +11 -8
  61. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  62. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  63. helm/benchmark/metrics/code_metrics.py +4 -3
  64. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  65. helm/benchmark/metrics/common_metric_specs.py +167 -0
  66. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  67. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  68. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  69. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  70. helm/benchmark/metrics/disinformation_metrics.py +6 -112
  71. helm/benchmark/metrics/dry_run_metrics.py +5 -3
  72. helm/benchmark/metrics/efficiency_metrics.py +206 -0
  73. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  74. helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
  75. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  76. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  77. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  78. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  79. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  80. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  81. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  82. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  83. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  84. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  85. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  86. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  87. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  88. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  89. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  90. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  91. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  92. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  93. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  94. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  95. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  96. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  97. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  98. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  99. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  100. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  101. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  102. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  103. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  104. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  105. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  106. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  107. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  108. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  109. helm/benchmark/metrics/machine_translation_metrics.py +5 -5
  110. helm/benchmark/metrics/metric.py +93 -172
  111. helm/benchmark/metrics/metric_name.py +0 -1
  112. helm/benchmark/metrics/metric_service.py +16 -0
  113. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  114. helm/benchmark/metrics/ranking_metrics.py +6 -7
  115. helm/benchmark/metrics/reference_metric.py +148 -0
  116. helm/benchmark/metrics/summac/model_summac.py +0 -2
  117. helm/benchmark/metrics/summarization_metrics.py +8 -8
  118. helm/benchmark/metrics/test_classification_metrics.py +9 -6
  119. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  120. helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
  121. helm/benchmark/metrics/test_metric.py +2 -2
  122. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
  123. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
  124. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  125. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
  126. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
  127. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  128. helm/benchmark/metrics/toxicity_utils.py +23 -0
  129. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  130. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  131. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  132. helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
  133. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  134. helm/benchmark/model_deployment_registry.py +164 -41
  135. helm/benchmark/model_metadata_registry.py +181 -35
  136. helm/benchmark/multi_gpu_runner.py +133 -0
  137. helm/benchmark/presentation/contamination.py +3 -3
  138. helm/benchmark/presentation/create_plots.py +8 -7
  139. helm/benchmark/presentation/run_display.py +50 -17
  140. helm/benchmark/presentation/schema.py +28 -46
  141. helm/benchmark/presentation/summarize.py +213 -96
  142. helm/benchmark/presentation/table.py +8 -8
  143. helm/benchmark/presentation/test_contamination.py +2 -2
  144. helm/benchmark/presentation/test_run_entry.py +14 -9
  145. helm/benchmark/presentation/test_summarize.py +5 -0
  146. helm/benchmark/run.py +66 -54
  147. helm/benchmark/run_expander.py +342 -31
  148. helm/benchmark/run_spec.py +93 -0
  149. helm/benchmark/run_spec_factory.py +162 -0
  150. helm/benchmark/run_specs/__init__.py +0 -0
  151. helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
  152. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  153. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  154. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  155. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  156. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  157. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  158. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  159. helm/benchmark/run_specs/vlm_run_specs.py +501 -0
  160. helm/benchmark/runner.py +116 -69
  161. helm/benchmark/runner_config_registry.py +21 -0
  162. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  163. helm/benchmark/scenarios/bold_scenario.py +2 -2
  164. helm/benchmark/scenarios/cleva_scenario.py +43 -46
  165. helm/benchmark/scenarios/code_scenario.py +3 -2
  166. helm/benchmark/scenarios/commonsense_scenario.py +171 -191
  167. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  168. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  169. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  170. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  171. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  172. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  173. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  174. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  175. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  176. helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
  177. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  178. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  179. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  180. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  181. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  182. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  183. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  184. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  185. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  186. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  187. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  188. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  189. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  190. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  191. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  192. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  193. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  194. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  195. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  196. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  197. helm/benchmark/scenarios/legalbench_scenario.py +123 -0
  198. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  199. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  200. helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
  201. helm/benchmark/scenarios/math_scenario.py +19 -2
  202. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  203. helm/benchmark/scenarios/numeracy_scenario.py +3 -3
  204. helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
  205. helm/benchmark/scenarios/raft_scenario.py +2 -6
  206. helm/benchmark/scenarios/scenario.py +14 -2
  207. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  208. helm/benchmark/scenarios/test_math_scenario.py +22 -0
  209. helm/benchmark/scenarios/test_scenario.py +6 -3
  210. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  211. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  212. helm/benchmark/scenarios/the_pile_scenario.py +6 -7
  213. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  214. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  215. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  216. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  217. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
  218. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  219. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  220. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  221. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  222. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  223. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  224. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  225. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  226. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  227. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  228. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  229. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  230. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  231. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  232. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  233. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  234. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  235. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  236. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  237. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
  238. helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
  239. helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
  240. helm/benchmark/server.py +59 -2
  241. helm/benchmark/slurm_jobs.py +12 -0
  242. helm/benchmark/slurm_runner.py +79 -51
  243. helm/benchmark/static/benchmarking.js +3 -4
  244. helm/benchmark/static/contamination.yaml +1 -1
  245. helm/benchmark/static/images/organizations/together.png +0 -0
  246. helm/benchmark/static/json-urls.js +4 -0
  247. helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
  248. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  249. helm/benchmark/static/schema_lite.yaml +824 -0
  250. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  251. helm/benchmark/static/schema_unitxt.yaml +428 -0
  252. helm/benchmark/static/schema_vlm.yaml +576 -0
  253. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  254. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  255. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  256. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  257. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  258. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  259. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  260. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  261. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  262. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  263. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  264. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  265. helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
  266. helm/benchmark/static_build/assets/index-d839df55.js +9 -0
  267. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  268. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  269. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  270. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  271. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  272. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  273. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  274. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  275. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  276. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  277. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  278. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  279. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  280. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  281. helm/benchmark/static_build/config.js +4 -0
  282. helm/benchmark/static_build/index.html +20 -0
  283. helm/benchmark/test_data_preprocessor.py +3 -3
  284. helm/benchmark/test_model_deployment_definition.py +90 -0
  285. helm/benchmark/test_run_expander.py +1 -1
  286. helm/benchmark/tokenizer_config_registry.py +10 -14
  287. helm/benchmark/window_services/ai21_window_service.py +22 -33
  288. helm/benchmark/window_services/cohere_window_service.py +1 -63
  289. helm/benchmark/window_services/default_window_service.py +2 -35
  290. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  291. helm/benchmark/window_services/ice_window_service.py +0 -34
  292. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  293. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  294. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  295. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  296. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  297. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  298. helm/benchmark/window_services/local_window_service.py +21 -4
  299. helm/benchmark/window_services/no_decoding_window_service.py +32 -0
  300. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  301. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  302. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  303. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  304. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  305. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  306. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  307. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  308. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  309. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  310. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  311. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  312. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  313. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  314. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  315. helm/benchmark/window_services/test_utils.py +3 -2
  316. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  317. helm/benchmark/window_services/window_service.py +42 -0
  318. helm/benchmark/window_services/window_service_factory.py +24 -269
  319. helm/benchmark/window_services/yalm_window_service.py +0 -27
  320. helm/clients/__init__.py +0 -0
  321. helm/{proxy/clients → clients}/ai21_client.py +5 -12
  322. helm/clients/aleph_alpha_client.py +112 -0
  323. helm/{proxy/clients → clients}/anthropic_client.py +213 -24
  324. helm/clients/auto_client.py +215 -0
  325. helm/clients/bedrock_client.py +128 -0
  326. helm/clients/bedrock_utils.py +72 -0
  327. helm/{proxy/clients → clients}/client.py +67 -55
  328. helm/clients/clip_score_client.py +49 -0
  329. helm/clients/clip_scorers/__init__.py +0 -0
  330. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  331. helm/clients/clip_scorers/clip_scorer.py +50 -0
  332. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  333. helm/{proxy/clients → clients}/cohere_client.py +6 -17
  334. helm/clients/gcs_client.py +82 -0
  335. helm/{proxy/clients → clients}/google_client.py +7 -8
  336. helm/clients/google_translate_client.py +35 -0
  337. helm/{proxy/clients → clients}/http_model_client.py +6 -10
  338. helm/{proxy/clients → clients}/huggingface_client.py +134 -92
  339. helm/clients/image_generation/__init__.py +0 -0
  340. helm/clients/image_generation/adobe_vision_client.py +78 -0
  341. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  342. helm/clients/image_generation/cogview2/__init__.py +0 -0
  343. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  344. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  345. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  346. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  347. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  348. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  349. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  350. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  351. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  352. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  353. helm/clients/image_generation/cogview2_client.py +191 -0
  354. helm/clients/image_generation/dalle2_client.py +192 -0
  355. helm/clients/image_generation/dalle3_client.py +108 -0
  356. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  357. helm/clients/image_generation/dalle_mini/data.py +442 -0
  358. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  359. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  360. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  361. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  362. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  363. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  364. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  365. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  366. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  367. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  368. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  369. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  370. helm/clients/image_generation/dalle_mini_client.py +190 -0
  371. helm/clients/image_generation/deep_floyd_client.py +78 -0
  372. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  373. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  374. helm/clients/image_generation/lexica_client.py +86 -0
  375. helm/clients/image_generation/mindalle/__init__.py +0 -0
  376. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  377. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  378. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  379. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  380. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  381. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  382. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  383. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  384. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  385. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  386. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  387. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  388. helm/clients/image_generation/mindalle_client.py +115 -0
  389. helm/clients/image_generation/nudity_check_client.py +64 -0
  390. helm/clients/image_generation/together_image_generation_client.py +111 -0
  391. helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
  392. helm/{proxy/clients → clients}/megatron_client.py +13 -7
  393. helm/clients/mistral_client.py +134 -0
  394. helm/clients/moderation_api_client.py +109 -0
  395. helm/clients/open_lm_client.py +43 -0
  396. helm/clients/openai_client.py +302 -0
  397. helm/{proxy/clients → clients}/palmyra_client.py +15 -12
  398. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  399. helm/clients/simple_client.py +64 -0
  400. helm/{proxy/clients → clients}/test_auto_client.py +15 -15
  401. helm/clients/test_client.py +100 -0
  402. helm/clients/test_huggingface_client.py +70 -0
  403. helm/clients/test_simple_client.py +19 -0
  404. helm/{proxy/clients → clients}/test_together_client.py +23 -12
  405. helm/{proxy/clients → clients}/together_client.py +18 -71
  406. helm/clients/vertexai_client.py +391 -0
  407. helm/clients/vision_language/__init__.py +0 -0
  408. helm/clients/vision_language/huggingface_vlm_client.py +104 -0
  409. helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
  410. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  411. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  412. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  413. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  414. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  415. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  416. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  417. helm/clients/vision_language/open_flamingo_client.py +155 -0
  418. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  419. helm/clients/vllm_client.py +46 -0
  420. helm/common/cache.py +24 -179
  421. helm/common/cache_backend_config.py +47 -0
  422. helm/common/clip_score_request.py +41 -0
  423. helm/common/concurrency.py +32 -0
  424. helm/common/credentials_utils.py +28 -0
  425. helm/common/file_caches/__init__.py +0 -0
  426. helm/common/file_caches/file_cache.py +16 -0
  427. helm/common/file_caches/local_file_cache.py +61 -0
  428. helm/common/file_caches/test_local_file_cache.py +25 -0
  429. helm/common/file_upload_request.py +27 -0
  430. helm/common/general.py +29 -10
  431. helm/common/image_generation_parameters.py +25 -0
  432. helm/common/images_utils.py +24 -1
  433. helm/common/key_value_store.py +113 -0
  434. helm/common/media_object.py +13 -0
  435. helm/common/moderations_api_request.py +71 -0
  436. helm/common/mongo_key_value_store.py +88 -0
  437. helm/common/multimodal_request_utils.py +31 -0
  438. helm/common/nudity_check_request.py +29 -0
  439. helm/common/object_spec.py +2 -2
  440. helm/common/request.py +36 -27
  441. helm/common/test_general.py +6 -0
  442. helm/common/tokenization_request.py +6 -3
  443. helm/config/__init__.py +0 -0
  444. helm/config/model_deployments.yaml +1942 -0
  445. helm/config/model_metadata.yaml +2201 -0
  446. helm/config/tokenizer_configs.yaml +362 -0
  447. helm/proxy/accounts.py +31 -4
  448. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  449. helm/proxy/critique/model_critique_client.py +13 -5
  450. helm/proxy/example_queries.py +29 -17
  451. helm/proxy/retry.py +8 -2
  452. helm/proxy/server.py +77 -5
  453. helm/proxy/services/remote_service.py +31 -0
  454. helm/proxy/services/server_service.py +103 -20
  455. helm/proxy/services/service.py +34 -2
  456. helm/proxy/services/test_remote_service.py +7 -6
  457. helm/proxy/services/test_service.py +27 -18
  458. helm/proxy/test_accounts.py +32 -0
  459. helm/proxy/token_counters/auto_token_counter.py +37 -37
  460. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  461. helm/proxy/token_counters/token_counter.py +3 -5
  462. helm/py.typed +0 -0
  463. helm/tokenizers/__init__.py +0 -0
  464. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  465. helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
  466. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
  467. helm/tokenizers/auto_tokenizer.py +93 -0
  468. helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
  469. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  470. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  471. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
  472. helm/tokenizers/simple_tokenizer.py +33 -0
  473. helm/tokenizers/test_anthropic_tokenizer.py +82 -0
  474. helm/tokenizers/test_huggingface_tokenizer.py +136 -0
  475. helm/tokenizers/test_simple_tokenizer.py +33 -0
  476. helm/tokenizers/vertexai_tokenizer.py +97 -0
  477. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  478. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  479. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  480. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  481. crfm_helm-0.3.0.dist-info/RECORD +0 -396
  482. helm/benchmark/vlm_run_specs.py +0 -71
  483. helm/benchmark/window_services/anthropic_window_service.py +0 -68
  484. helm/benchmark/window_services/bloom_window_service.py +0 -35
  485. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  486. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  487. helm/benchmark/window_services/gptj_window_service.py +0 -38
  488. helm/benchmark/window_services/gptneox_window_service.py +0 -41
  489. helm/benchmark/window_services/http_model_window_service.py +0 -28
  490. helm/benchmark/window_services/huggingface_window_service.py +0 -59
  491. helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
  492. helm/benchmark/window_services/llama_window_service.py +0 -28
  493. helm/benchmark/window_services/luminous_window_service.py +0 -67
  494. helm/benchmark/window_services/megatron_window_service.py +0 -10
  495. helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
  496. helm/benchmark/window_services/openai_window_service.py +0 -13
  497. helm/benchmark/window_services/opt_window_service.py +0 -35
  498. helm/benchmark/window_services/palmyra_window_service.py +0 -45
  499. helm/benchmark/window_services/remote_window_service.py +0 -48
  500. helm/benchmark/window_services/santacoder_window_service.py +0 -27
  501. helm/benchmark/window_services/starcoder_window_service.py +0 -27
  502. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  503. helm/benchmark/window_services/t511b_window_service.py +0 -30
  504. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  505. helm/benchmark/window_services/ul2_window_service.py +0 -30
  506. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  507. helm/benchmark/window_services/wider_openai_window_service.py +0 -52
  508. helm/proxy/clients/aleph_alpha_client.py +0 -99
  509. helm/proxy/clients/auto_client.py +0 -461
  510. helm/proxy/clients/goose_ai_client.py +0 -100
  511. helm/proxy/clients/microsoft_client.py +0 -182
  512. helm/proxy/clients/openai_client.py +0 -206
  513. helm/proxy/clients/remote_model_registry.py +0 -28
  514. helm/proxy/clients/simple_client.py +0 -61
  515. helm/proxy/clients/test_anthropic_client.py +0 -63
  516. helm/proxy/clients/test_client.py +0 -31
  517. helm/proxy/clients/test_huggingface_client.py +0 -87
  518. helm/proxy/models.py +0 -963
  519. helm/proxy/test_models.py +0 -27
  520. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  521. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  522. helm/proxy/token_counters/free_token_counter.py +0 -12
  523. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  524. helm/proxy/token_counters/openai_token_counter.py +0 -22
  525. helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
  526. helm/proxy/token_counters/test_openai_token_counter.py +0 -79
  527. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  528. helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
  529. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
  530. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
  531. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
  532. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  533. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  534. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  535. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  536. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  537. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  538. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  539. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  540. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  541. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  542. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  543. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  544. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  545. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  546. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -1,7 +1,8 @@
1
1
  import os
2
- from typing import Any, Dict, Optional
2
+ from typing import Any, Dict, Optional, cast
3
3
  from threading import Lock
4
4
  from helm.common.cache import CacheConfig
5
+ from helm.common.concurrency import ThreadSafeWrapper
5
6
 
6
7
  from transformers import AutoTokenizer, PreTrainedTokenizerBase
7
8
 
@@ -10,46 +11,36 @@ from .caching_tokenizer import CachingTokenizer
10
11
  from .tokenizer import cleanup_tokens
11
12
 
12
13
 
13
- # TODO: Delete this.
14
- _MODEL_NAME_ALIASES: Dict[str, str] = {
15
- "google/t5-11b": "t5-11b",
16
- "huggingface/gpt2": "gpt2",
17
- "huggingface/santacoder": "bigcode/santacoder",
18
- "huggingface/starcoder": "bigcode/starcoder",
19
- }
20
- """Mapping of some HELM model names to Hugging Face pretrained model name."""
14
+ WrappedPreTrainedTokenizer = ThreadSafeWrapper[PreTrainedTokenizerBase]
15
+ """Thread safe wrapper around Hugging Face PreTrainedTokenizerBase.
21
16
 
17
+ Hugging Face PreTrainedTokenizerBase is thread-hostile and using it from multiple threads
18
+ simultaneously can result in an "Already borrowed" error (#1421). This wrapper ensures
19
+ that a lock is held when using the PreTrainedTokenizerBase.
22
20
 
23
- # TODO: Delete this.
24
- def resolve_alias(model_name: str) -> str:
25
- """Resolve some HELM model names to Hugging Face pretrained model name."""
26
- return _MODEL_NAME_ALIASES.get(model_name, model_name)
21
+ Example usage:
22
+
23
+ with wrapped_tokenizer as tokenizer:
24
+ tokenizer.encode("...")
25
+ """
27
26
 
28
27
 
29
28
  class HuggingFaceTokenizer(CachingTokenizer):
30
- _tokenizers: Dict[str, PreTrainedTokenizerBase] = {}
29
+ _tokenizers: Dict[str, WrappedPreTrainedTokenizer] = {}
31
30
  _tokenizers_lock: Lock = Lock()
32
31
 
33
- def __init__(
34
- self,
35
- cache_config: CacheConfig,
36
- pretrained_model_name_or_path: Optional[str] = None,
37
- revision: Optional[str] = None,
38
- ):
32
+ def __init__(self, cache_config: CacheConfig, pretrained_model_name_or_path: Optional[str] = None, **kwargs):
39
33
  super().__init__(cache_config=cache_config)
40
34
  self._pretrained_model_name_or_path = pretrained_model_name_or_path
41
- self._revision = revision
35
+ self._kwargs = kwargs
42
36
 
43
37
  @staticmethod
44
- def create_tokenizer(pretrained_model_name_or_path: str, revision: Optional[str] = None) -> PreTrainedTokenizerBase:
38
+ def create_tokenizer(pretrained_model_name_or_path: str, **kwargs) -> WrappedPreTrainedTokenizer:
45
39
  """Loads tokenizer using files from disk if they exist. Otherwise, downloads from HuggingFace."""
46
40
  # To avoid deadlocks when using HuggingFace tokenizers with multiple processes
47
41
  # TODO: Figure out if we actually need this.
48
42
  os.environ["TOKENIZERS_PARALLELISM"] = "False"
49
43
 
50
- tokenizer_kwargs = {}
51
- if revision is not None:
52
- tokenizer_kwargs["revision"] = revision
53
44
  try:
54
45
  # From the Hugging Face documentation, "local_files_only(defaults to False) —
55
46
  # Whether or not to only look at local files".
@@ -60,19 +51,23 @@ class HuggingFaceTokenizer(CachingTokenizer):
60
51
  # From https://huggingface.co/course/chapter6/3, "slow tokenizers are those written in Python inside
61
52
  # the Hugging Face Transformers library, while the fast versions are the ones provided by Hugging Face
62
53
  # Tokenizers, which are written in Rust." So, use the "fast" version of the tokenizers if available.
63
- return AutoTokenizer.from_pretrained(
64
- pretrained_model_name_or_path, local_files_only=True, use_fast=True, **tokenizer_kwargs
54
+ return WrappedPreTrainedTokenizer(
55
+ AutoTokenizer.from_pretrained(
56
+ pretrained_model_name_or_path, local_files_only=True, use_fast=True, **kwargs
57
+ )
65
58
  )
66
59
  except OSError:
67
60
  hlog(f"Local files do not exist for HuggingFace tokenizer: {pretrained_model_name_or_path}. Downloading...")
68
- return AutoTokenizer.from_pretrained(
69
- pretrained_model_name_or_path, local_files_only=False, use_fast=True, **tokenizer_kwargs
61
+ return WrappedPreTrainedTokenizer(
62
+ AutoTokenizer.from_pretrained(
63
+ pretrained_model_name_or_path, local_files_only=False, use_fast=True, **kwargs
64
+ )
70
65
  )
71
66
 
72
67
  @staticmethod
73
68
  def get_tokenizer(
74
- helm_tokenizer_name: str, pretrained_model_name_or_path: str, revision: Optional[str] = None
75
- ) -> PreTrainedTokenizerBase:
69
+ helm_tokenizer_name: str, pretrained_model_name_or_path: str, **kwargs
70
+ ) -> WrappedPreTrainedTokenizer:
76
71
  """
77
72
  Checks if the desired tokenizer is cached. Creates the tokenizer if it's not cached.
78
73
  Returns the tokenizer.
@@ -80,42 +75,39 @@ class HuggingFaceTokenizer(CachingTokenizer):
80
75
  with HuggingFaceTokenizer._tokenizers_lock:
81
76
  if helm_tokenizer_name not in HuggingFaceTokenizer._tokenizers:
82
77
  with htrack_block(
83
- f"Loading {pretrained_model_name_or_path} (revision={revision}) "
78
+ f"Loading {pretrained_model_name_or_path} (kwargs={kwargs}) "
84
79
  f"for HELM tokenizer {helm_tokenizer_name} with Hugging Face Transformers"
85
80
  ):
86
81
  # Keep the tokenizer in memory, so we don't recreate it for future requests
87
82
  HuggingFaceTokenizer._tokenizers[helm_tokenizer_name] = HuggingFaceTokenizer.create_tokenizer(
88
- pretrained_model_name_or_path, revision
83
+ pretrained_model_name_or_path, **kwargs
89
84
  )
90
85
  return HuggingFaceTokenizer._tokenizers[helm_tokenizer_name]
91
86
 
92
- def _get_tokenizer_for_request(self, request: Dict[str, Any]) -> PreTrainedTokenizerBase:
87
+ def _get_tokenizer_for_request(self, request: Dict[str, Any]) -> WrappedPreTrainedTokenizer:
93
88
  """Method used in both _tokenize_do_it and _decode_do_it to get the tokenizer."""
94
- pretrained_model_name_or_path: str
95
- if self._pretrained_model_name_or_path:
96
- pretrained_model_name_or_path = self._pretrained_model_name_or_path
97
- else:
98
- pretrained_model_name_or_path = resolve_alias(request["tokenizer"])
99
- _tokenizer = HuggingFaceTokenizer.get_tokenizer(
89
+ pretrained_model_name_or_path = (
90
+ self._pretrained_model_name_or_path if self._pretrained_model_name_or_path else request["tokenizer"]
91
+ )
92
+ return HuggingFaceTokenizer.get_tokenizer(
100
93
  helm_tokenizer_name=request["tokenizer"],
101
94
  pretrained_model_name_or_path=pretrained_model_name_or_path,
102
- revision=self._revision,
95
+ **self._kwargs,
103
96
  )
104
- return _tokenizer
105
97
 
106
98
  def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
107
- _tokenizer = self._get_tokenizer_for_request(request)
108
-
109
99
  if request["encode"]:
110
100
  if request["truncation"]:
111
- tokens = _tokenizer.encode(
112
- request["text"],
113
- truncation=request["truncation"],
114
- max_length=request["max_length"],
115
- add_special_tokens=False,
116
- )
101
+ with self._get_tokenizer_for_request(request) as tokenizer:
102
+ tokens = tokenizer.encode(
103
+ request["text"],
104
+ truncation=request["truncation"],
105
+ max_length=request["max_length"],
106
+ add_special_tokens=False,
107
+ )
117
108
  else:
118
- tokens = _tokenizer.encode(request["text"], add_special_tokens=False)
109
+ with self._get_tokenizer_for_request(request) as tokenizer:
110
+ tokens = tokenizer.encode(request["text"], add_special_tokens=False)
119
111
  else:
120
112
  if "gpt" in request["tokenizer"] or request["tokenizer"] in [
121
113
  "bigscience/bloom",
@@ -126,9 +118,10 @@ class HuggingFaceTokenizer(CachingTokenizer):
126
118
  # convert_tokens_to_string method. We prefer to use this method instead
127
119
  # of the hacky cleanup_tokens method below as it might handle cases
128
120
  # we haven't thought of in cleanup_tokens.
129
- tokens = [
130
- _tokenizer.convert_tokens_to_string([token]) for token in _tokenizer.tokenize(request["text"])
131
- ]
121
+ with self._get_tokenizer_for_request(request) as tokenizer:
122
+ tokens = [
123
+ tokenizer.convert_tokens_to_string([token]) for token in tokenizer.tokenize(request["text"])
124
+ ]
132
125
  else:
133
126
  # Tokenizes the text and returns the tokens as a list of strings,
134
127
  # not a list of token objects (otherwise "Hello world" would be"
@@ -138,14 +131,17 @@ class HuggingFaceTokenizer(CachingTokenizer):
138
131
  # But this replaces all the "▁" characters by "", which is not what we want.
139
132
  # This would be problematic as tokenize(" Hello", encode=False) would return ["Hello"]
140
133
  # Just like tokenize("Hello", encode=False) would return ["Hello"].
141
- tokens = _tokenizer.tokenize(request["text"])
134
+ with self._get_tokenizer_for_request(request) as tokenizer:
135
+ tokens = tokenizer.tokenize(request["text"])
136
+ # Some tokenizers (e.g. Qwen/Qwen-7B) return the tokens as bytes, so we have to decode them to strings.
137
+ if tokens and type(tokens[0]) == bytes:
138
+ tokens = [cast(bytes, token).decode(errors="ignore") for token in tokens]
142
139
  tokens = cleanup_tokens(tokens, request["tokenizer"])
143
140
  return {"tokens": tokens}
144
141
 
145
142
  def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
146
- _tokenizer = self._get_tokenizer_for_request(request)
147
-
148
- text = _tokenizer.decode(
149
- request["tokens"], clean_up_tokenization_spaces=request["clean_up_tokenization_spaces"]
150
- )
143
+ with self._get_tokenizer_for_request(request) as tokenizer:
144
+ text = tokenizer.decode(
145
+ request["tokens"], clean_up_tokenization_spaces=request["clean_up_tokenization_spaces"]
146
+ )
151
147
  return {"text": text}
@@ -0,0 +1,33 @@
1
+ from helm.common.tokenization_request import (
2
+ DecodeRequest,
3
+ DecodeRequestResult,
4
+ TokenizationRequest,
5
+ TokenizationRequestResult,
6
+ TokenizationToken,
7
+ )
8
+ from helm.tokenizers.tokenizer import Tokenizer
9
+
10
+
11
+ class SimpleTokenizer(Tokenizer):
12
+ """Simple tokenizer for tutorials and for debugging."""
13
+
14
+ def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
15
+ if request.encode:
16
+ return TokenizationRequestResult(
17
+ success=True,
18
+ cached=False,
19
+ tokens=[TokenizationToken(ord(character)) for character in request.text],
20
+ text=request.text,
21
+ )
22
+ else:
23
+ return TokenizationRequestResult(
24
+ success=True,
25
+ cached=False,
26
+ tokens=[TokenizationToken(character) for character in request.text],
27
+ text=request.text,
28
+ )
29
+
30
+ def decode(self, request: DecodeRequest) -> DecodeRequestResult:
31
+ return DecodeRequestResult(
32
+ success=True, cached=False, text="".join([chr(code_point) for code_point in request.tokens])
33
+ )
@@ -0,0 +1,82 @@
1
+ import os
2
+ import tempfile
3
+ from typing import List
4
+
5
+ from helm.common.cache import SqliteCacheConfig
6
+ from helm.common.general import parallel_map
7
+ from helm.common.tokenization_request import (
8
+ DecodeRequest,
9
+ DecodeRequestResult,
10
+ TokenizationRequest,
11
+ TokenizationRequestResult,
12
+ )
13
+ from helm.tokenizers.anthropic_tokenizer import AnthropicTokenizer
14
+
15
+
16
+ class TestAnthropicTokenizer:
17
+ TEST_PROMPT: str = "I am a computer scientist."
18
+ TEST_ENCODED: List[int] = [45, 1413, 269, 6797, 22228, 18]
19
+ TEST_TOKENS: List[str] = ["I", " am", " a", " computer", " scientist", "."]
20
+
21
+ def setup_method(self, method):
22
+ cache_file = tempfile.NamedTemporaryFile(delete=False)
23
+ self.cache_path: str = cache_file.name
24
+ self.tokenizer = AnthropicTokenizer(SqliteCacheConfig(self.cache_path))
25
+
26
+ def teardown_method(self, method):
27
+ os.remove(self.cache_path)
28
+
29
+ def test_tokenize(self):
30
+ request = TokenizationRequest(text=self.TEST_PROMPT, tokenizer="anthropic/claude")
31
+ result: TokenizationRequestResult = self.tokenizer.tokenize(request)
32
+ assert not result.cached, "First time making the tokenize request. Result should not be cached"
33
+ assert result.raw_tokens == self.TEST_TOKENS
34
+ result = self.tokenizer.tokenize(request)
35
+ assert result.cached, "Result should be cached"
36
+ assert result.raw_tokens == self.TEST_TOKENS
37
+
38
+ def test_encode(self):
39
+ request = TokenizationRequest(
40
+ text=self.TEST_PROMPT, tokenizer="anthropic/claude", encode=True, truncation=True, max_length=1
41
+ )
42
+ result: TokenizationRequestResult = self.tokenizer.tokenize(request)
43
+ assert not result.cached, "First time making the tokenize request. Result should not be cached"
44
+ assert result.raw_tokens == [self.TEST_ENCODED[0]]
45
+ result = self.tokenizer.tokenize(request)
46
+ assert result.cached, "Result should be cached"
47
+ assert result.raw_tokens == [self.TEST_ENCODED[0]]
48
+
49
+ request = TokenizationRequest(
50
+ text=self.TEST_PROMPT, tokenizer="anthropic/claude", encode=True, truncation=True, max_length=1024
51
+ )
52
+ result = self.tokenizer.tokenize(request)
53
+ assert not result.cached, "First time making this particular request. Result should not be cached"
54
+ assert result.raw_tokens == self.TEST_ENCODED
55
+
56
+ def test_decode(self):
57
+ request = DecodeRequest(tokens=self.TEST_ENCODED, tokenizer="anthropic/claude")
58
+ result: DecodeRequestResult = self.tokenizer.decode(request)
59
+ assert not result.cached, "First time making the decode request. Result should not be cached"
60
+ assert result.text == self.TEST_PROMPT
61
+ result = self.tokenizer.decode(request)
62
+ assert result.cached, "Result should be cached"
63
+ assert result.text == self.TEST_PROMPT
64
+
65
+ def test_already_borrowed(self):
66
+ """Test workaround of the "Already borrowed" bug (#1421) caused by the thread-hostile Anthropic tokenizer,
67
+ which is a thin wrapper around a Hugging Face FastTokenizer"""
68
+
69
+ def make_tokenize_request(seed: int) -> None:
70
+ request_length = 10
71
+ truncation = bool(seed % 2)
72
+ self.tokenizer.tokenize(
73
+ # The truncation parameter requires setting a flag on the Rust FastTokenizer.
74
+ # Concurrent requests cause concurrent mutations, which results an Rust concurrency error.
75
+ TokenizationRequest(
76
+ text=str(seed) * request_length, tokenizer="anthropic/claude", encode=True, truncation=truncation
77
+ )
78
+ )
79
+
80
+ num_requests = 100
81
+ # Should not raise "Already borrowed" error
82
+ parallel_map(make_tokenize_request, list(range(num_requests)), parallelism=8)
@@ -0,0 +1,136 @@
1
+ import os
2
+ import tempfile
3
+ from typing import Optional
4
+
5
+ from helm.common.cache import SqliteCacheConfig
6
+ from helm.common.general import parallel_map, singleton
7
+ from helm.common.tokenization_request import (
8
+ DecodeRequest,
9
+ DecodeRequestResult,
10
+ TokenizationRequest,
11
+ TokenizationRequestResult,
12
+ )
13
+ from .huggingface_tokenizer import HuggingFaceTokenizer
14
+
15
+
16
+ class TestHuggingFaceGPT2Tokenizer:
17
+ def setup_method(self, method):
18
+ cache_file = tempfile.NamedTemporaryFile(delete=False)
19
+ self.cache_path: str = cache_file.name
20
+ self.tokenizer = HuggingFaceTokenizer(SqliteCacheConfig(self.cache_path))
21
+
22
+ def teardown_method(self, method):
23
+ os.remove(self.cache_path)
24
+
25
+ def test_tokenize(self):
26
+ request = TokenizationRequest(text="I am a computer scientist.", tokenizer="huggingface/gpt2")
27
+ result: TokenizationRequestResult = self.tokenizer.tokenize(request)
28
+ assert not result.cached, "First time making the tokenize request. Result should not be cached"
29
+ result = self.tokenizer.tokenize(request)
30
+ assert result.cached, "Result should be cached"
31
+ assert result.raw_tokens == ["I", " am", " a", " computer", " scientist", "."]
32
+
33
+ def test_encode(self):
34
+ request = TokenizationRequest(
35
+ text="I am a computer scientist.", tokenizer="huggingface/gpt2", encode=True, truncation=True, max_length=1
36
+ )
37
+ result: TokenizationRequestResult = self.tokenizer.tokenize(request)
38
+ assert not result.cached, "First time making the tokenize request. Result should not be cached"
39
+ result = self.tokenizer.tokenize(request)
40
+ assert result.cached, "Result should be cached"
41
+ assert result.raw_tokens == [40]
42
+
43
+ request = TokenizationRequest(
44
+ text="I am a computer scientist.",
45
+ tokenizer="huggingface/gpt2",
46
+ encode=True,
47
+ truncation=True,
48
+ max_length=1024,
49
+ )
50
+ result = self.tokenizer.tokenize(request)
51
+ assert not result.cached, "First time making this particular request. Result should not be cached"
52
+ assert result.raw_tokens == [40, 716, 257, 3644, 11444, 13]
53
+
54
+ def test_decode(self):
55
+ request = DecodeRequest(tokens=[40, 716, 257, 3644, 11444, 13], tokenizer="huggingface/gpt2")
56
+ result: DecodeRequestResult = self.tokenizer.decode(request)
57
+ assert not result.cached, "First time making the decode request. Result should not be cached"
58
+ result = self.tokenizer.decode(request)
59
+ assert result.cached, "Result should be cached"
60
+ assert result.text == "I am a computer scientist."
61
+
62
+ def test_already_borrowed(self):
63
+ """Test workaround of the "Already borrowed" bug (#1421) caused by the thread-hostile Hugging Face tokenizer"""
64
+
65
+ def make_tokenize_request(seed: int) -> None:
66
+ request_length = 10
67
+ truncation = bool(seed % 2)
68
+ self.tokenizer.tokenize(
69
+ # The truncation parameter requires setting a flag on the Rust FastTokenizer.
70
+ # Concurrent requests cause concurrent mutations, which results an Rust concurrency error.
71
+ TokenizationRequest(
72
+ text=str(seed) * request_length, tokenizer="huggingface/gpt2", encode=True, truncation=truncation
73
+ )
74
+ )
75
+
76
+ num_requests = 100
77
+ # Should not raise "Already borrowed" error
78
+ parallel_map(make_tokenize_request, list(range(num_requests)), parallelism=8)
79
+
80
+
81
+ class TestHuggingFaceTokenizer:
82
+ # The following prompt has 51 tokens according to the GPT-2 tokenizer
83
+ TEST_PROMPT: str = (
84
+ "The Center for Research on Foundation Models (CRFM) is "
85
+ "an interdisciplinary initiative born out of the Stanford "
86
+ "Institute for Human-Centered Artificial Intelligence (HAI) "
87
+ "that aims to make fundamental advances in the study, development, "
88
+ "and deployment of foundation models."
89
+ )
90
+
91
+ @staticmethod
92
+ def verify_get_tokenizer(
93
+ tokenizer_name: str, expected_num_tokens: int, pretrained_model_name_or_path: Optional[str] = None
94
+ ):
95
+ wrapped_tokenizer = HuggingFaceTokenizer.get_tokenizer(
96
+ helm_tokenizer_name=tokenizer_name,
97
+ pretrained_model_name_or_path=pretrained_model_name_or_path or tokenizer_name,
98
+ )
99
+ assert tokenizer_name in HuggingFaceTokenizer._tokenizers, "Tokenizer should be cached"
100
+ with wrapped_tokenizer as tokenizer:
101
+ assert len(tokenizer.encode(TestHuggingFaceTokenizer.TEST_PROMPT)) == expected_num_tokens
102
+
103
+ def test_get_tokenizer_gpt2(self):
104
+ TestHuggingFaceTokenizer.verify_get_tokenizer("huggingface/gpt2", 51, pretrained_model_name_or_path="gpt2")
105
+
106
+ def test_get_tokenizer_gptj(self):
107
+ TestHuggingFaceTokenizer.verify_get_tokenizer("EleutherAI/gpt-j-6B", 51)
108
+
109
+ def test_get_tokenizer_gptneox(self):
110
+ TestHuggingFaceTokenizer.verify_get_tokenizer("EleutherAI/gpt-neox-20b", 52)
111
+
112
+ def test_get_tokenizer_bloom(self):
113
+ TestHuggingFaceTokenizer.verify_get_tokenizer("bigscience/bloom", 51)
114
+
115
+ def test_get_tokenizer_t0pp(self):
116
+ TestHuggingFaceTokenizer.verify_get_tokenizer("bigscience/T0pp", 58)
117
+
118
+ def test_get_tokenizer_t511b(self):
119
+ TestHuggingFaceTokenizer.verify_get_tokenizer("google/t5-11b", 58, pretrained_model_name_or_path="t5-11b")
120
+
121
+ def test_get_tokenizer_ul2(self):
122
+ TestHuggingFaceTokenizer.verify_get_tokenizer("google/ul2", 58)
123
+
124
+ def test_get_santacoder(self):
125
+ TestHuggingFaceTokenizer.verify_get_tokenizer("bigcode/santacoder", 62)
126
+
127
+ def test_get_clip_tokenizer(self):
128
+ TestHuggingFaceTokenizer.verify_get_tokenizer("openai/clip-vit-large-patch14", 50)
129
+
130
+ def test_gpt2_tokenize_eos(self):
131
+ eos_token: str = "<|endoftext|>"
132
+ wrapped_tokenizer = HuggingFaceTokenizer.get_tokenizer("huggingface/gpt2", pretrained_model_name_or_path="gpt2")
133
+ with wrapped_tokenizer as tokenizer:
134
+ token_ids = tokenizer.encode(eos_token)
135
+ assert singleton(token_ids) == 50256
136
+ assert tokenizer.decode(token_ids) == eos_token
@@ -0,0 +1,33 @@
1
+ from helm.common.tokenization_request import (
2
+ DecodeRequest,
3
+ TokenizationRequest,
4
+ TokenizationToken,
5
+ )
6
+ from helm.tokenizers.simple_tokenizer import SimpleTokenizer
7
+
8
+
9
+ def test_simple_tokenizer_tokenize():
10
+ tokenizer = SimpleTokenizer()
11
+ request = TokenizationRequest(tokenizer="simple/tokenizer1", text="otter 🦦")
12
+ result = tokenizer.tokenize(request)
13
+ assert result.success
14
+ assert not result.cached
15
+ assert result.tokens == [TokenizationToken(token) for token in ["o", "t", "t", "e", "r", " ", "🦦"]]
16
+
17
+
18
+ def test_simple_tokenizer_encode():
19
+ tokenizer = SimpleTokenizer()
20
+ request = TokenizationRequest(tokenizer="simple/tokenizer1", text="otter 🦦", encode=True)
21
+ result = tokenizer.tokenize(request)
22
+ assert result.success
23
+ assert not result.cached
24
+ assert result.tokens == [TokenizationToken(token) for token in [111, 116, 116, 101, 114, 32, 129446]]
25
+
26
+
27
+ def test_simple_tokenizer_decode():
28
+ tokenizer = SimpleTokenizer()
29
+ request = DecodeRequest(tokenizer="simple/tokenizer1", tokens=[111, 116, 116, 101, 114, 32, 129446])
30
+ result = tokenizer.decode(request)
31
+ assert result.success
32
+ assert not result.cached
33
+ assert result.text == "otter 🦦"
@@ -0,0 +1,97 @@
1
+ import base64
2
+ import dataclasses
3
+ import requests
4
+ from typing import Any, Dict, List, Union, Optional
5
+
6
+ from helm.common.cache import CacheConfig
7
+ from helm.common.optional_dependencies import handle_module_not_found_error
8
+ from helm.common.tokenization_request import (
9
+ TokenizationRequest,
10
+ TokenizationToken,
11
+ )
12
+ from helm.tokenizers.caching_tokenizer import CachingTokenizer
13
+ from helm.proxy.retry import NonRetriableException
14
+
15
+ try:
16
+ import google.auth
17
+ import google.auth.transport.requests
18
+ from google.auth.exceptions import DefaultCredentialsError
19
+ except ModuleNotFoundError as e:
20
+ handle_module_not_found_error(e, ["google"])
21
+
22
+
23
+ class VertexAIAuthenticationException(NonRetriableException):
24
+ pass
25
+
26
+
27
+ class VertexAITokenizer(CachingTokenizer):
28
+ """Google Vertex AI API for tokenization.
29
+
30
+ Doc: https://cloud.google.com/vertex-ai/docs/generative-ai/compute-token"""
31
+
32
+ def __init__(self, project_id: Optional[str], location: Optional[str], cache_config: CacheConfig) -> None:
33
+ super().__init__(cache_config)
34
+ if not project_id:
35
+ raise VertexAIAuthenticationException("credentials.conf is missing googleProjectId")
36
+ if not location:
37
+ raise VertexAIAuthenticationException("credentials.conf is missing googleLocation")
38
+ self.project_id = project_id
39
+ self.location = location
40
+ try:
41
+ creds, _ = google.auth.default(quota_project_id=self.project_id)
42
+ auth_req = google.auth.transport.requests.Request()
43
+ creds.refresh(auth_req)
44
+ except DefaultCredentialsError as e:
45
+ raise VertexAIAuthenticationException(
46
+ "Log in using `gcloud auth application-default login` to use the Google Vertex tokenizer API"
47
+ ) from e
48
+ self.access_token = creds.token
49
+
50
+ def _tokenization_request_to_cache_key(self, request: TokenizationRequest) -> Dict[str, Any]:
51
+ cache_key = dataclasses.asdict(request)
52
+ # Delete encode because the Google Vertex AI API simulateously gives string and integer tokens.
53
+ del cache_key["encode"]
54
+ return cache_key
55
+
56
+ def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
57
+ text: str = request["text"]
58
+ tokenizer_name = request["tokenizer"].split("/", maxsplit=1)[1]
59
+ url = (
60
+ f"https://{self.location}-aiplatform.googleapis.com/v1/projects/{self.project_id}/"
61
+ f"locations/{self.location}/publishers/google/models/{tokenizer_name}:computeTokens"
62
+ )
63
+
64
+ headers = {"Authorization": f"Bearer {self.access_token}"}
65
+ body = {
66
+ "instances": [{"prompt": text}],
67
+ }
68
+ response = requests.post(url, headers=headers, json=body)
69
+ response.raise_for_status()
70
+ return response.json()
71
+
72
+ def _tokenization_raw_response_to_tokens(
73
+ self, response: Dict[str, Any], request: TokenizationRequest
74
+ ) -> List[TokenizationToken]:
75
+ tokens: List[Union[int, str]]
76
+ response_instance = response["tokensInfo"][0]
77
+ if not response_instance:
78
+ # Response was empty
79
+ tokens = []
80
+ else:
81
+ if request.encode:
82
+ tokens = [int(token) for token in response_instance["tokenIds"]]
83
+ else:
84
+ # errors="ignore" is needed because the tokenizer is not guaranteed to tokenize on
85
+ # the boundary of UTF-8 characters. The tokenization boundary can be within the bytes of
86
+ # a UTF-8 character.
87
+ #
88
+ # TODO(#2141): Come up with a more correct way of doing this.
89
+ tokens = [
90
+ base64.decodebytes(token.encode()).decode("utf-8", errors="ignore")
91
+ for token in response_instance["tokens"]
92
+ ]
93
+ return [TokenizationToken(token) for token in tokens]
94
+
95
+ def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
96
+ # Defined for mypy but decode() already raises NotImplementedError
97
+ raise NotImplementedError("The Google Vertex AI API does not support decoding.")
@@ -21,9 +21,11 @@ class YaLMTokenizer(CachingTokenizer):
21
21
  # This is a problem because then tokenize(" Hello", encode=False) == tokenize("Hello", encode=False)
22
22
  # That is why we manually replace "▁" with a space.
23
23
  return {
24
- "tokens": token_ids
25
- if request["encode"]
26
- else cleanup_tokens(self._tokenizer.convert_ids_to_tokens(token_ids), request["tokenizer"])
24
+ "tokens": (
25
+ token_ids
26
+ if request["encode"]
27
+ else cleanup_tokens(self._tokenizer.convert_ids_to_tokens(token_ids), request["tokenizer"])
28
+ )
27
29
  }
28
30
 
29
31
  def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
File without changes
@@ -16,7 +16,7 @@ adapted from https://github.com/yandex/YaLM-100B/blob/main/megatron_lm/megatron/
16
16
  """
17
17
 
18
18
 
19
- YALM_TOKENIZER_PACKAGE: str = "helm.proxy.tokenizers.yalm_tokenizer_data"
19
+ YALM_TOKENIZER_PACKAGE: str = "helm.tokenizers.yalm_tokenizer_data"
20
20
  YALM_TOKENIZER_VOCAB_FILENAME: str = "voc_100b.sp"
21
21
 
22
22