crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (499) hide show
  1. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +138 -31
  2. crfm_helm-0.5.1.dist-info/RECORD +654 -0
  3. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +31 -3
  5. helm/benchmark/adaptation/adapters/adapter.py +2 -2
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
  9. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
  10. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  11. helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
  12. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  13. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
  14. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
  15. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  16. helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
  17. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
  18. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
  19. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  20. helm/benchmark/adaptation/request_state.py +6 -1
  21. helm/benchmark/adaptation/scenario_state.py +6 -2
  22. helm/benchmark/annotation/annotator.py +43 -0
  23. helm/benchmark/annotation/annotator_factory.py +61 -0
  24. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  25. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  26. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  27. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  28. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  29. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  30. helm/benchmark/annotation_executor.py +124 -0
  31. helm/benchmark/augmentations/data_augmenter.py +0 -2
  32. helm/benchmark/augmentations/gender_perturbation.py +1 -1
  33. helm/benchmark/augmentations/perturbation.py +25 -3
  34. helm/benchmark/augmentations/perturbation_description.py +1 -1
  35. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  36. helm/benchmark/augmentations/test_perturbation.py +41 -7
  37. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  38. helm/benchmark/config_registry.py +7 -1
  39. helm/benchmark/executor.py +46 -16
  40. helm/benchmark/huggingface_registration.py +20 -7
  41. helm/benchmark/metrics/basic_metrics.py +169 -664
  42. helm/benchmark/metrics/bbq_metrics.py +3 -4
  43. helm/benchmark/metrics/bias_metrics.py +6 -6
  44. helm/benchmark/metrics/classification_metrics.py +11 -8
  45. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  46. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  47. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  48. helm/benchmark/metrics/common_metric_specs.py +167 -0
  49. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  50. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  51. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  52. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  53. helm/benchmark/metrics/disinformation_metrics.py +4 -110
  54. helm/benchmark/metrics/dry_run_metrics.py +2 -2
  55. helm/benchmark/metrics/efficiency_metrics.py +213 -0
  56. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  57. helm/benchmark/metrics/evaluate_reference_metrics.py +392 -0
  58. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  59. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  60. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  61. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  62. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  63. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  64. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  65. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  66. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  67. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  68. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  69. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  70. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  71. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  72. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  73. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  74. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  75. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  76. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  77. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  78. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  79. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  80. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  81. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  82. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  83. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  84. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  85. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  86. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  87. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  88. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  89. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  90. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  91. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  92. helm/benchmark/metrics/machine_translation_metrics.py +89 -0
  93. helm/benchmark/metrics/metric.py +93 -172
  94. helm/benchmark/metrics/metric_name.py +0 -1
  95. helm/benchmark/metrics/metric_service.py +16 -0
  96. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  97. helm/benchmark/metrics/ranking_metrics.py +2 -2
  98. helm/benchmark/metrics/reference_metric.py +148 -0
  99. helm/benchmark/metrics/summac/model_summac.py +0 -2
  100. helm/benchmark/metrics/summarization_metrics.py +2 -2
  101. helm/benchmark/metrics/test_classification_metrics.py +8 -5
  102. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  103. helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
  104. helm/benchmark/metrics/test_metric.py +2 -2
  105. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
  106. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  107. helm/benchmark/metrics/toxicity_utils.py +23 -0
  108. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  109. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  110. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  111. helm/benchmark/metrics/vision_language/image_metrics.py +575 -0
  112. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  113. helm/benchmark/model_deployment_registry.py +74 -0
  114. helm/benchmark/model_metadata_registry.py +41 -1
  115. helm/benchmark/multi_gpu_runner.py +133 -0
  116. helm/benchmark/presentation/create_plots.py +8 -7
  117. helm/benchmark/presentation/run_display.py +26 -10
  118. helm/benchmark/presentation/schema.py +15 -40
  119. helm/benchmark/presentation/summarize.py +119 -79
  120. helm/benchmark/presentation/table.py +8 -8
  121. helm/benchmark/presentation/test_contamination.py +2 -2
  122. helm/benchmark/presentation/test_run_entry.py +1 -2
  123. helm/benchmark/presentation/test_summarize.py +3 -3
  124. helm/benchmark/run.py +54 -26
  125. helm/benchmark/run_expander.py +205 -35
  126. helm/benchmark/run_spec.py +93 -0
  127. helm/benchmark/run_spec_factory.py +163 -0
  128. helm/benchmark/run_specs/__init__.py +0 -0
  129. helm/benchmark/run_specs/classic_run_specs.py +1510 -0
  130. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  131. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  132. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  133. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  134. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  135. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  136. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  137. helm/benchmark/run_specs/vlm_run_specs.py +757 -0
  138. helm/benchmark/runner.py +51 -57
  139. helm/benchmark/runner_config_registry.py +21 -0
  140. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  141. helm/benchmark/scenarios/bold_scenario.py +2 -2
  142. helm/benchmark/scenarios/code_scenario.py +1 -0
  143. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  144. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  145. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  146. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  147. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  148. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  149. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  150. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  151. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  152. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  153. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  154. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  155. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  156. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  157. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  158. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  159. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  160. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  161. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  162. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  163. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  164. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  165. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  166. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  167. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  168. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  169. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  170. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  171. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  172. helm/benchmark/scenarios/legalbench_scenario.py +6 -2
  173. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  174. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  175. helm/benchmark/scenarios/math_scenario.py +19 -2
  176. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  177. helm/benchmark/scenarios/numeracy_scenario.py +1 -1
  178. helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
  179. helm/benchmark/scenarios/scenario.py +4 -0
  180. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  181. helm/benchmark/scenarios/test_math_scenario.py +6 -0
  182. helm/benchmark/scenarios/test_scenario.py +6 -3
  183. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  184. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  185. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  186. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  187. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  188. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
  189. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  190. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
  191. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
  192. helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
  193. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +94 -0
  194. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  195. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  196. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  197. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  198. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  199. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  200. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  201. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  202. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  203. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  204. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  205. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  206. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
  207. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  208. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
  209. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  210. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  211. helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
  212. helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
  213. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  214. helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
  215. helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
  216. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  217. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  218. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  219. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +3 -4
  220. helm/benchmark/scenarios/vision_language/vqa_scenario.py +5 -3
  221. helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
  222. helm/benchmark/server.py +24 -1
  223. helm/benchmark/slurm_runner.py +70 -49
  224. helm/benchmark/static/benchmarking.js +1 -1
  225. helm/benchmark/static/schema_classic.yaml +258 -1066
  226. helm/benchmark/static/schema_image2structure.yaml +304 -0
  227. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  228. helm/benchmark/static/schema_lite.yaml +2 -227
  229. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  230. helm/benchmark/static/schema_unitxt.yaml +428 -0
  231. helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
  232. helm/benchmark/static/schema_vlm.yaml +823 -0
  233. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  234. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  235. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  236. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  237. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  238. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  239. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  240. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  241. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  242. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  243. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  244. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  245. helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
  246. helm/benchmark/static_build/assets/index-878a1094.css +1 -0
  247. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  248. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  249. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  250. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  251. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  252. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  253. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  254. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  255. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  256. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  257. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  258. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  259. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  260. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  261. helm/benchmark/static_build/config.js +4 -0
  262. helm/benchmark/static_build/index.html +20 -0
  263. helm/benchmark/test_data_preprocessor.py +3 -3
  264. helm/benchmark/test_run_expander.py +1 -1
  265. helm/benchmark/window_services/ai21_window_service.py +22 -33
  266. helm/benchmark/window_services/cohere_window_service.py +1 -63
  267. helm/benchmark/window_services/default_window_service.py +2 -44
  268. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  269. helm/benchmark/window_services/ice_window_service.py +0 -34
  270. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  271. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  272. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  273. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  274. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  275. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  276. helm/benchmark/window_services/local_window_service.py +21 -4
  277. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  278. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  279. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  280. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  281. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  282. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  283. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  284. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  285. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  286. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  287. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  288. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  289. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  290. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  291. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  292. helm/benchmark/window_services/test_utils.py +3 -2
  293. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  294. helm/benchmark/window_services/window_service.py +42 -0
  295. helm/benchmark/window_services/window_service_factory.py +4 -1
  296. helm/benchmark/window_services/yalm_window_service.py +0 -27
  297. helm/clients/__init__.py +0 -0
  298. helm/{proxy/clients → clients}/ai21_client.py +3 -9
  299. helm/clients/aleph_alpha_client.py +112 -0
  300. helm/{proxy/clients → clients}/anthropic_client.py +233 -18
  301. helm/{proxy/clients → clients}/auto_client.py +59 -31
  302. helm/clients/bedrock_client.py +128 -0
  303. helm/clients/bedrock_utils.py +72 -0
  304. helm/{proxy/clients → clients}/client.py +65 -7
  305. helm/clients/clip_score_client.py +49 -0
  306. helm/clients/clip_scorers/__init__.py +0 -0
  307. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  308. helm/clients/clip_scorers/clip_scorer.py +50 -0
  309. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  310. helm/{proxy/clients → clients}/cohere_client.py +4 -11
  311. helm/clients/gcs_client.py +82 -0
  312. helm/{proxy/clients → clients}/google_client.py +5 -5
  313. helm/clients/google_translate_client.py +35 -0
  314. helm/{proxy/clients → clients}/http_model_client.py +5 -7
  315. helm/{proxy/clients → clients}/huggingface_client.py +43 -64
  316. helm/clients/image_generation/__init__.py +0 -0
  317. helm/clients/image_generation/adobe_vision_client.py +78 -0
  318. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  319. helm/clients/image_generation/cogview2/__init__.py +0 -0
  320. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  321. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  322. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  323. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  324. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  325. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  326. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  327. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  328. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  329. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  330. helm/clients/image_generation/cogview2_client.py +191 -0
  331. helm/clients/image_generation/dalle2_client.py +192 -0
  332. helm/clients/image_generation/dalle3_client.py +108 -0
  333. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  334. helm/clients/image_generation/dalle_mini/data.py +442 -0
  335. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  336. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  337. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  338. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  339. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  340. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  341. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  342. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  343. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  344. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  345. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  346. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  347. helm/clients/image_generation/dalle_mini_client.py +190 -0
  348. helm/clients/image_generation/deep_floyd_client.py +78 -0
  349. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  350. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  351. helm/clients/image_generation/lexica_client.py +86 -0
  352. helm/clients/image_generation/mindalle/__init__.py +0 -0
  353. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  354. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  355. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  356. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  357. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  358. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  359. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  360. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  361. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  362. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  363. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  364. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  365. helm/clients/image_generation/mindalle_client.py +115 -0
  366. helm/clients/image_generation/nudity_check_client.py +64 -0
  367. helm/clients/image_generation/together_image_generation_client.py +111 -0
  368. helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
  369. helm/{proxy/clients → clients}/megatron_client.py +5 -5
  370. helm/clients/mistral_client.py +134 -0
  371. helm/clients/moderation_api_client.py +109 -0
  372. helm/clients/open_lm_client.py +43 -0
  373. helm/clients/openai_client.py +301 -0
  374. helm/{proxy/clients → clients}/palmyra_client.py +6 -8
  375. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  376. helm/clients/simple_client.py +64 -0
  377. helm/{proxy/clients → clients}/test_auto_client.py +13 -15
  378. helm/clients/test_client.py +100 -0
  379. helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
  380. helm/clients/test_simple_client.py +19 -0
  381. helm/{proxy/clients → clients}/test_together_client.py +20 -8
  382. helm/{proxy/clients → clients}/together_client.py +104 -73
  383. helm/clients/vertexai_client.py +400 -0
  384. helm/clients/vision_language/__init__.py +0 -0
  385. helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
  386. helm/clients/vision_language/huggingface_vlm_client.py +111 -0
  387. helm/{proxy/clients → clients}/vision_language/idefics_client.py +54 -49
  388. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  389. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  390. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  391. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  392. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  393. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  394. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  395. helm/clients/vision_language/open_flamingo_client.py +155 -0
  396. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  397. helm/clients/vllm_client.py +46 -0
  398. helm/common/cache.py +16 -4
  399. helm/common/cache_backend_config.py +47 -0
  400. helm/common/clip_score_request.py +41 -0
  401. helm/common/file_caches/__init__.py +0 -0
  402. helm/common/file_caches/file_cache.py +16 -0
  403. helm/common/file_caches/local_file_cache.py +61 -0
  404. helm/common/file_caches/test_local_file_cache.py +25 -0
  405. helm/common/file_upload_request.py +27 -0
  406. helm/common/general.py +1 -1
  407. helm/common/image_generation_parameters.py +25 -0
  408. helm/common/images_utils.py +33 -3
  409. helm/common/key_value_store.py +35 -4
  410. helm/common/media_object.py +13 -0
  411. helm/common/moderations_api_request.py +71 -0
  412. helm/common/mongo_key_value_store.py +3 -3
  413. helm/common/multimodal_request_utils.py +31 -0
  414. helm/common/nudity_check_request.py +29 -0
  415. helm/common/request.py +15 -17
  416. helm/common/test_general.py +6 -0
  417. helm/common/tokenization_request.py +1 -1
  418. helm/config/model_deployments.yaml +1159 -538
  419. helm/config/model_metadata.yaml +868 -41
  420. helm/config/tokenizer_configs.yaml +149 -43
  421. helm/proxy/accounts.py +31 -4
  422. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  423. helm/proxy/critique/model_critique_client.py +8 -6
  424. helm/proxy/example_queries.py +29 -17
  425. helm/proxy/server.py +70 -5
  426. helm/proxy/services/remote_service.py +31 -0
  427. helm/proxy/services/server_service.py +96 -16
  428. helm/proxy/services/service.py +30 -0
  429. helm/proxy/services/test_remote_service.py +4 -3
  430. helm/proxy/services/test_service.py +0 -12
  431. helm/proxy/test_accounts.py +32 -0
  432. helm/proxy/token_counters/auto_token_counter.py +37 -37
  433. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  434. helm/proxy/token_counters/token_counter.py +3 -5
  435. helm/tokenizers/__init__.py +0 -0
  436. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  437. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
  438. helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
  439. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  440. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  441. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
  442. helm/tokenizers/simple_tokenizer.py +33 -0
  443. helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
  444. helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
  445. helm/tokenizers/test_simple_tokenizer.py +33 -0
  446. helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
  447. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  448. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  449. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  450. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  451. crfm_helm-0.4.0.dist-info/RECORD +0 -397
  452. helm/benchmark/run_specs.py +0 -2762
  453. helm/benchmark/test_model_deployment_definition.py +0 -92
  454. helm/benchmark/test_model_properties.py +0 -1570
  455. helm/benchmark/vlm_run_specs.py +0 -97
  456. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  457. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  458. helm/benchmark/window_services/huggingface_window_service.py +0 -60
  459. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  460. helm/benchmark/window_services/t511b_window_service.py +0 -30
  461. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  462. helm/benchmark/window_services/ul2_window_service.py +0 -30
  463. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  464. helm/common/cache_utils.py +0 -14
  465. helm/proxy/clients/aleph_alpha_client.py +0 -95
  466. helm/proxy/clients/goose_ai_client.py +0 -99
  467. helm/proxy/clients/microsoft_client.py +0 -180
  468. helm/proxy/clients/openai_client.py +0 -206
  469. helm/proxy/clients/simple_client.py +0 -60
  470. helm/proxy/clients/test_client.py +0 -49
  471. helm/proxy/clients/vertexai_client.py +0 -115
  472. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  473. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  474. helm/proxy/token_counters/free_token_counter.py +0 -12
  475. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  476. helm/proxy/token_counters/openai_token_counter.py +0 -22
  477. helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
  478. helm/proxy/token_counters/test_openai_token_counter.py +0 -81
  479. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  480. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
  481. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
  482. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
  483. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  484. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  485. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  486. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  487. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  488. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  489. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  490. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  491. /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
  492. /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
  493. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  494. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  495. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  496. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  497. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  498. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  499. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -1,6 +1,7 @@
1
1
  import shutil
2
2
  import tempfile
3
3
 
4
+ from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
4
5
  from .test_utils import get_tokenizer_service, TEST_PROMPT, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN_IDS
5
6
  from .tokenizer_service import TokenizerService
6
7
  from .window_service_factory import WindowServiceFactory
@@ -9,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
9
10
  class TestOpenAIWindowService:
10
11
  def setup_method(self):
11
12
  self.path: str = tempfile.mkdtemp()
12
- service: TokenizerService = get_tokenizer_service(self.path)
13
+ service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
13
14
  self.window_service = WindowServiceFactory.get_window_service("openai/davinci", service)
14
15
 
15
16
  def teardown_method(self, method):
@@ -1,6 +1,7 @@
1
1
  import shutil
2
2
  import tempfile
3
3
 
4
+ from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
4
5
  from .test_utils import get_tokenizer_service, TEST_PROMPT
5
6
  from .tokenizer_service import TokenizerService
6
7
  from .window_service_factory import WindowServiceFactory
@@ -9,8 +10,8 @@ from .window_service_factory import WindowServiceFactory
9
10
  class TestOPTWindowService:
10
11
  def setup_method(self):
11
12
  self.path: str = tempfile.mkdtemp()
12
- service: TokenizerService = get_tokenizer_service(self.path)
13
- self.window_service = WindowServiceFactory.get_window_service("together/opt-175b", service)
13
+ service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
14
+ self.window_service = WindowServiceFactory.get_window_service("huggingface/opt-175b", service)
14
15
 
15
16
  def teardown_method(self, method):
16
17
  shutil.rmtree(self.path)
@@ -1,6 +1,7 @@
1
1
  from tempfile import TemporaryDirectory
2
2
  from typing import List
3
3
 
4
+ from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
4
5
  from .tokenizer_service import TokenizerService
5
6
  from .window_service_factory import WindowServiceFactory
6
7
  from .test_utils import get_tokenizer_service, TEST_PROMPT
@@ -117,7 +118,7 @@ class TestPalmyraWindowService:
117
118
 
118
119
  def setup_method(self):
119
120
  self.temporary_directory = TemporaryDirectory()
120
- service: TokenizerService = get_tokenizer_service(self.temporary_directory.name)
121
+ service: TokenizerService = get_tokenizer_service(self.temporary_directory.name, BlackHoleCacheBackendConfig())
121
122
  self.window_service = WindowServiceFactory.get_window_service("writer/palmyra-large", service)
122
123
 
123
124
  def teardown_method(self, method):
@@ -2,6 +2,7 @@ import shutil
2
2
  import tempfile
3
3
  from typing import List
4
4
 
5
+ from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
5
6
  from .tokenizer_service import TokenizerService
6
7
  from .window_service_factory import WindowServiceFactory
7
8
  from .test_utils import get_tokenizer_service, TEST_PROMPT
@@ -70,7 +71,7 @@ class TestT0ppWindowService:
70
71
 
71
72
  def setup_method(self):
72
73
  self.path: str = tempfile.mkdtemp()
73
- service: TokenizerService = get_tokenizer_service(self.path)
74
+ service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
74
75
  self.window_service = WindowServiceFactory.get_window_service("together/t0pp", service)
75
76
 
76
77
  def teardown_method(self, method):
@@ -2,6 +2,7 @@ import shutil
2
2
  import tempfile
3
3
  from typing import List
4
4
 
5
+ from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
5
6
  from .tokenizer_service import TokenizerService
6
7
  from .window_service_factory import WindowServiceFactory
7
8
  from .test_utils import get_tokenizer_service, TEST_PROMPT
@@ -70,7 +71,7 @@ class TestT511bWindowService:
70
71
 
71
72
  def setup_method(self):
72
73
  self.path: str = tempfile.mkdtemp()
73
- service: TokenizerService = get_tokenizer_service(self.path)
74
+ service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
74
75
  self.window_service = WindowServiceFactory.get_window_service("together/t5-11b", service)
75
76
 
76
77
  def teardown_method(self, method):
@@ -2,6 +2,7 @@ import shutil
2
2
  import tempfile
3
3
  from typing import List
4
4
 
5
+ from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
5
6
  from .tokenizer_service import TokenizerService
6
7
  from .window_service_factory import WindowServiceFactory
7
8
  from .test_utils import get_tokenizer_service, TEST_PROMPT
@@ -70,7 +71,7 @@ class TestUL2WindowService:
70
71
 
71
72
  def setup_method(self):
72
73
  self.path: str = tempfile.mkdtemp()
73
- service: TokenizerService = get_tokenizer_service(self.path)
74
+ service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
74
75
  self.window_service = WindowServiceFactory.get_window_service("together/ul2", service)
75
76
 
76
77
  def teardown_method(self, method):
@@ -1,6 +1,7 @@
1
1
  from typing import List
2
2
 
3
3
  from helm.common.authentication import Authentication
4
+ from helm.common.cache_backend_config import CacheBackendConfig
4
5
  from helm.proxy.services.server_service import ServerService
5
6
  from helm.benchmark.metrics.metric_service import MetricService
6
7
  from .tokenizer_service import TokenizerService
@@ -227,6 +228,6 @@ GPT4_TEST_TOKENS: List[str] = [
227
228
  ]
228
229
 
229
230
 
230
- def get_tokenizer_service(local_path: str) -> TokenizerService:
231
- service = ServerService(base_path=local_path, root_mode=True)
231
+ def get_tokenizer_service(local_path: str, cache_backend_config: CacheBackendConfig) -> TokenizerService:
232
+ service = ServerService(base_path=local_path, root_mode=True, cache_backend_config=cache_backend_config)
232
233
  return MetricService(service, Authentication("test"))
@@ -1,6 +1,7 @@
1
1
  import shutil
2
2
  import tempfile
3
3
 
4
+ from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
4
5
  from .test_utils import get_tokenizer_service, TEST_PROMPT
5
6
  from .tokenizer_service import TokenizerService
6
7
  from .window_service_factory import WindowServiceFactory
@@ -9,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
9
10
  class TestYaLMWindowService:
10
11
  def setup_method(self):
11
12
  self.path: str = tempfile.mkdtemp()
12
- service: TokenizerService = get_tokenizer_service(self.path)
13
+ service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
13
14
  self.window_service = WindowServiceFactory.get_window_service("together/yalm", service)
14
15
 
15
16
  def teardown_method(self, method):
@@ -110,3 +110,45 @@ class WindowService(ABC):
110
110
  minus the expected completion length (defaults to 0).
111
111
  """
112
112
  pass
113
+
114
+
115
+ class ConfigurableWindowService(WindowService, ABC):
116
+ def __init__(
117
+ self,
118
+ tokenizer_name: str,
119
+ max_sequence_length: int,
120
+ max_request_length: Optional[int] = None,
121
+ max_sequence_and_generated_tokens_length: Optional[int] = None,
122
+ end_of_text_token: Optional[str] = None,
123
+ prefix_token: Optional[str] = None,
124
+ ):
125
+ self._tokenizer_name = tokenizer_name
126
+ self._max_sequence_length = max_sequence_length
127
+ self._max_request_length = max_request_length or max_sequence_length
128
+ self._max_sequence_and_generated_tokens_length = max_sequence_and_generated_tokens_length or INT_MAX
129
+ self._end_of_text_token = end_of_text_token or ""
130
+ self._prefix_token = prefix_token or ""
131
+
132
+ @property
133
+ def tokenizer_name(self) -> str:
134
+ return self._tokenizer_name
135
+
136
+ @property
137
+ def max_sequence_length(self) -> int:
138
+ return self._max_sequence_length
139
+
140
+ @property
141
+ def max_request_length(self) -> int:
142
+ return self._max_request_length
143
+
144
+ @property
145
+ def max_sequence_and_generated_tokens_length(self) -> int:
146
+ return self._max_sequence_and_generated_tokens_length
147
+
148
+ @property
149
+ def end_of_text_token(self) -> str:
150
+ return self._end_of_text_token
151
+
152
+ @property
153
+ def prefix_token(self) -> str:
154
+ return self._prefix_token
@@ -43,7 +43,7 @@ class WindowServiceFactory:
43
43
  # in the users configuration file. Instead, they have to be constructed dynamically at runtime.
44
44
  window_service_spec = inject_object_spec_args(
45
45
  window_service_spec,
46
- {
46
+ constant_bindings={
47
47
  "service": service,
48
48
  "tokenizer_name": model_deployment.tokenizer_name,
49
49
  "max_sequence_length": model_deployment.max_sequence_length,
@@ -52,6 +52,9 @@ class WindowServiceFactory:
52
52
  "end_of_text_token": end_of_text_token,
53
53
  "prefix_token": prefix_token,
54
54
  },
55
+ provider_bindings={
56
+ "gpt2_window_service": lambda: WindowServiceFactory.get_window_service("huggingface/gpt2", service)
57
+ },
55
58
  )
56
59
  return create_object(window_service_spec)
57
60
 
@@ -1,34 +1,7 @@
1
- from helm.proxy.tokenizers.yalm_tokenizer_data.yalm_tokenizer import YaLMTokenizer
2
1
  from .local_window_service import LocalWindowService
3
- from .tokenizer_service import TokenizerService
4
2
 
5
3
 
6
4
  class YaLMWindowService(LocalWindowService):
7
- def __init__(self, service: TokenizerService):
8
- super().__init__(service)
9
-
10
- @property
11
- def tokenizer_name(self) -> str:
12
- return "Yandex/yalm"
13
-
14
- @property
15
- def max_sequence_length(self) -> int:
16
- return YaLMTokenizer.MAX_SEQUENCE_LENGTH
17
-
18
- @property
19
- def max_request_length(self) -> int:
20
- return self.max_sequence_length + 1
21
-
22
- @property
23
- def end_of_text_token(self) -> str:
24
- """The end of text token."""
25
- return YaLMTokenizer.EOS_TOKEN
26
-
27
- @property
28
- def prefix_token(self) -> str:
29
- """The prefix token"""
30
- return self.end_of_text_token
31
-
32
5
  def truncate_from_right(self, text: str, expected_completion_token_length: int = 0) -> str:
33
6
  """
34
7
  Truncates text from the right to fit within the context window given by `max_request_length`
File without changes
@@ -7,7 +7,7 @@ from helm.common.request import (
7
7
  EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
8
8
  Request,
9
9
  RequestResult,
10
- Sequence,
10
+ GeneratedOutput,
11
11
  Token,
12
12
  )
13
13
  from .client import CachingClient, truncate_sequence, cleanup_str
@@ -97,25 +97,19 @@ class AI21Client(CachingClient):
97
97
  # Compute the actual length of the token text
98
98
  # e.g. "▁burying"(0,8) -> 8 - 0 = 8; "▁burying"(0,7) -> 7 - 0 = 7
99
99
  text_length: int = raw["textRange"]["end"] - raw["textRange"]["start"]
100
- # "topTokens" can be None when sending a request with topKReturn=0
101
- # AI21 sends unscaled logprobs as `raw_logprob` so use this instead of `logprob`.
102
- top_logprobs: Dict[str, float] = dict(
103
- (fix_text(x["token"], first), x["raw_logprob"]) for x in raw["topTokens"] or []
104
- )
105
100
 
106
101
  return Token(
107
102
  # Text should not be longer than text_length. Since "▁" is always inserted
108
103
  # in the beginning, we truncate the text from the right.
109
104
  text=fix_text(raw["generatedToken"]["token"], first)[-text_length:] if text_length else "",
110
105
  logprob=raw["generatedToken"]["raw_logprob"],
111
- top_logprobs=top_logprobs,
112
106
  )
113
107
 
114
- def parse_sequence(raw: Dict, first: bool, finish_reason: Optional[Dict] = None) -> Sequence:
108
+ def parse_sequence(raw: Dict, first: bool, finish_reason: Optional[Dict] = None) -> GeneratedOutput:
115
109
  text = raw["text"]
116
110
  tokens = [parse_token(token, first and i == 0) for i, token in enumerate(raw["tokens"])]
117
111
  logprob = sum(token.logprob for token in tokens)
118
- return Sequence(text=text, logprob=logprob, tokens=tokens, finish_reason=finish_reason)
112
+ return GeneratedOutput(text=text, logprob=logprob, tokens=tokens, finish_reason=finish_reason)
119
113
 
120
114
  prompt = parse_sequence(response["prompt"], True)
121
115
  completions = []
@@ -0,0 +1,112 @@
1
+ from typing import List
2
+
3
+ from helm.common.cache import CacheConfig
4
+ from helm.common.media_object import TEXT_TYPE
5
+ from helm.common.optional_dependencies import handle_module_not_found_error
6
+ from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
7
+ from .client import CachingClient, truncate_sequence, generate_uid_for_multimodal_prompt
8
+
9
+ try:
10
+ from aleph_alpha_client import Client, CompletionRequest, CompletionResponse, Image, Prompt
11
+ except ModuleNotFoundError as e:
12
+ handle_module_not_found_error(e, ["aleph-alpha"])
13
+
14
+
15
+ class AlephAlphaClient(CachingClient):
16
+ def __init__(self, api_key: str, cache_config: CacheConfig):
17
+ super().__init__(cache_config=cache_config)
18
+ self._api_key: str = api_key
19
+ self._aleph_alpha_client = Client(token=self._api_key) if self._api_key else None
20
+
21
+ def make_request(self, request: Request) -> RequestResult:
22
+ """Make a request following https://docs.aleph-alpha.com/api/complete."""
23
+ assert self._aleph_alpha_client is not None
24
+
25
+ model: str = request.model_engine
26
+ prompt: Prompt
27
+
28
+ # The prompt key is a unique identifier for the prompt
29
+ prompt_key: str = request.prompt
30
+
31
+ # Contents can either be text or a list of multimodal content made up of text, images or other content
32
+ if request.multimodal_prompt is not None:
33
+ from helm.common.images_utils import encode_base64
34
+
35
+ items = []
36
+ for media_object in request.multimodal_prompt.media_objects:
37
+ if media_object.is_type("image") and media_object.location:
38
+ items.append(Image(base_64=encode_base64(media_object.location), cropping=None, controls=[]))
39
+ elif media_object.is_type(TEXT_TYPE):
40
+ if media_object.text is None:
41
+ raise ValueError("MediaObject of text type has missing text field value")
42
+ items.append(media_object.text)
43
+ else:
44
+ raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
45
+
46
+ prompt = Prompt(items=items)
47
+ prompt_key = generate_uid_for_multimodal_prompt(request.multimodal_prompt)
48
+ else:
49
+ prompt = Prompt.from_text(request.prompt)
50
+
51
+ parameters = {
52
+ "maximum_tokens": request.max_tokens,
53
+ "temperature": request.temperature,
54
+ "top_k": request.top_k_per_token,
55
+ "top_p": request.top_p,
56
+ "presence_penalty": request.presence_penalty,
57
+ "frequency_penalty": request.frequency_penalty,
58
+ "n": request.num_completions,
59
+ "stop_sequences": request.stop_sequences,
60
+ "log_probs": request.top_k_per_token,
61
+ "echo": request.echo_prompt,
62
+ "tokens": True, # Setting to True returns individual tokens of the completion
63
+ }
64
+
65
+ try:
66
+
67
+ def do_it():
68
+ assert self._aleph_alpha_client is not None
69
+ completion_response: CompletionResponse = self._aleph_alpha_client.complete(
70
+ request=CompletionRequest(prompt=prompt, **parameters), model=model
71
+ )
72
+ result = dict(completion_response.to_json())
73
+ assert "completions" in result, f"Invalid response: {result}"
74
+ return result
75
+
76
+ cache_key = CachingClient.make_cache_key({"model": model, "prompt": prompt_key, **parameters}, request)
77
+ response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
78
+ except Exception as e:
79
+ error: str = f"AlephAlphaClient error: {e}"
80
+ return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
81
+
82
+ completions: List[GeneratedOutput] = []
83
+ for completion in response["completions"]:
84
+ sequence_logprob: float = 0
85
+ tokens: List[Token] = []
86
+
87
+ # `completion_tokens` is the list of selected tokens.
88
+ for i, token in enumerate(completion.get("completion_tokens", [])):
89
+ # Use the selected token value to get the logprob
90
+ logprob: float = completion["log_probs"][i][token]
91
+ sequence_logprob += logprob
92
+ tokens.append(
93
+ Token(
94
+ text=token,
95
+ logprob=logprob,
96
+ )
97
+ )
98
+
99
+ sequence: GeneratedOutput = GeneratedOutput(
100
+ text=completion["completion"], logprob=sequence_logprob, tokens=tokens
101
+ )
102
+ sequence = truncate_sequence(sequence, request)
103
+ completions.append(sequence)
104
+
105
+ return RequestResult(
106
+ success=True,
107
+ cached=cached,
108
+ request_time=response["request_time"],
109
+ request_datetime=response["request_datetime"],
110
+ completions=completions,
111
+ embedding=[],
112
+ )