crfm-helm 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (580) hide show
  1. crfm_helm-0.5.5.dist-info/METADATA +413 -0
  2. crfm_helm-0.5.5.dist-info/RECORD +894 -0
  3. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +13 -1
  5. helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
  6. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
  7. helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
  8. helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
  9. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
  10. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +1 -1
  11. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
  12. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
  13. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
  14. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
  15. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
  16. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
  17. helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
  18. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
  19. helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
  20. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
  21. helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
  22. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
  23. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
  24. helm/benchmark/adaptation/common_adapter_specs.py +69 -4
  25. helm/benchmark/adaptation/prompt.py +1 -1
  26. helm/benchmark/annotation/aci_bench_annotator.py +95 -0
  27. helm/benchmark/annotation/air_bench_annotator.py +20 -5
  28. helm/benchmark/annotation/annotator.py +5 -0
  29. helm/benchmark/annotation/annotator_factory.py +3 -20
  30. helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
  31. helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
  32. helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
  33. helm/benchmark/annotation/bird_sql_annotator.py +58 -0
  34. helm/benchmark/annotation/chw_care_plan_annotator.py +98 -0
  35. helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
  36. helm/benchmark/annotation/dischargeme_annotator.py +107 -0
  37. helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
  38. helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
  39. helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
  40. helm/benchmark/annotation/live_qa_annotator.py +1 -1
  41. helm/benchmark/annotation/med_dialog_annotator.py +99 -0
  42. helm/benchmark/annotation/medalign_annotator.py +100 -0
  43. helm/benchmark/annotation/medi_qa_annotator.py +98 -0
  44. helm/benchmark/annotation/medication_qa_annotator.py +87 -63
  45. helm/benchmark/annotation/mental_health_annotator.py +98 -0
  46. helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
  47. helm/benchmark/annotation/model_as_judge.py +218 -6
  48. helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
  49. helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
  50. helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
  51. helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
  52. helm/benchmark/annotation/omni_math_annotator.py +132 -0
  53. helm/benchmark/annotation/spider_annotator.py +18 -0
  54. helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
  55. helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
  56. helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
  57. helm/benchmark/annotation/wildbench_annotator.py +119 -0
  58. helm/benchmark/annotation_executor.py +35 -15
  59. helm/benchmark/augmentations/cleva_perturbation.py +9 -8
  60. helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
  61. helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
  62. helm/benchmark/augmentations/dialect_perturbation.py +4 -5
  63. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  64. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  65. helm/benchmark/augmentations/gender_perturbation.py +2 -2
  66. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  67. helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
  68. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  69. helm/benchmark/augmentations/person_name_perturbation.py +4 -5
  70. helm/benchmark/augmentations/perturbation.py +1 -1
  71. helm/benchmark/augmentations/space_perturbation.py +2 -2
  72. helm/benchmark/augmentations/suffix_perturbation.py +2 -2
  73. helm/benchmark/augmentations/synonym_perturbation.py +4 -3
  74. helm/benchmark/augmentations/test_perturbation.py +16 -13
  75. helm/benchmark/augmentations/translate_perturbation.py +2 -2
  76. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  77. helm/benchmark/data_preprocessor.py +2 -2
  78. helm/benchmark/huggingface_registration.py +2 -7
  79. helm/benchmark/metrics/aci_bench_metrics.py +34 -0
  80. helm/benchmark/metrics/basic_metrics.py +6 -6
  81. helm/benchmark/metrics/bbq_metrics.py +2 -2
  82. helm/benchmark/metrics/bias_metrics.py +12 -3
  83. helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
  84. helm/benchmark/metrics/bird_sql_metrics.py +28 -0
  85. helm/benchmark/metrics/chw_care_plan_metrics.py +34 -0
  86. helm/benchmark/metrics/classification_metrics.py +76 -12
  87. helm/benchmark/metrics/cleva_harms_metrics.py +8 -7
  88. helm/benchmark/metrics/code_metrics.py +5 -5
  89. helm/benchmark/metrics/comet_metric.py +125 -0
  90. helm/benchmark/metrics/common_metric_specs.py +9 -2
  91. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
  92. helm/benchmark/metrics/copyright_metrics.py +4 -4
  93. helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
  94. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
  95. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
  96. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
  97. helm/benchmark/metrics/dischargeme_metrics.py +34 -0
  98. helm/benchmark/metrics/disinformation_metrics.py +4 -4
  99. helm/benchmark/metrics/dry_run_metrics.py +5 -5
  100. helm/benchmark/metrics/efficiency_metrics.py +3 -3
  101. helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
  102. helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
  103. helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
  104. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
  105. helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
  106. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
  107. helm/benchmark/metrics/ifeval/__init__.py +0 -0
  108. helm/benchmark/metrics/ifeval/instructions.py +1574 -0
  109. helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
  110. helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
  111. helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
  112. helm/benchmark/metrics/ifeval_metrics.py +55 -0
  113. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
  114. helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
  115. helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
  116. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
  117. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
  118. helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
  119. helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
  120. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
  121. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
  122. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
  123. helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
  124. helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
  125. helm/benchmark/metrics/language_modeling_metrics.py +4 -4
  126. helm/benchmark/metrics/machine_translation_metrics.py +2 -2
  127. helm/benchmark/metrics/med_dialog_metrics.py +34 -0
  128. helm/benchmark/metrics/medalign_metrics.py +34 -0
  129. helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
  130. helm/benchmark/metrics/medec_metrics.py +101 -0
  131. helm/benchmark/metrics/medi_qa_metrics.py +34 -0
  132. helm/benchmark/metrics/medication_qa_metrics.py +15 -4
  133. helm/benchmark/metrics/mental_health_metrics.py +34 -0
  134. helm/benchmark/metrics/metric.py +3 -3
  135. helm/benchmark/metrics/mimic_rrs_metrics.py +34 -0
  136. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
  137. helm/benchmark/metrics/mtsamples_procedures_metrics.py +34 -0
  138. helm/benchmark/metrics/mtsamples_replicate_metrics.py +34 -0
  139. helm/benchmark/metrics/nltk_helper.py +32 -0
  140. helm/benchmark/metrics/numeracy_metrics.py +4 -4
  141. helm/benchmark/metrics/omni_math_metrics.py +32 -0
  142. helm/benchmark/metrics/output_processing_metric.py +60 -0
  143. helm/benchmark/metrics/output_processors.py +15 -0
  144. helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
  145. helm/benchmark/metrics/ranking_metrics.py +3 -3
  146. helm/benchmark/metrics/reference_metric.py +3 -3
  147. helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
  148. helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
  149. helm/benchmark/metrics/spider_metrics.py +7 -0
  150. helm/benchmark/metrics/starr_patient_instructions_metrics.py +34 -0
  151. helm/benchmark/metrics/statistic.py +1 -1
  152. helm/benchmark/metrics/summac/model_summac.py +1 -1
  153. helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
  154. helm/benchmark/metrics/summarization_metrics.py +19 -9
  155. helm/benchmark/metrics/test_bias_metrics.py +5 -1
  156. helm/benchmark/metrics/test_classification_metrics.py +140 -68
  157. helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
  158. helm/benchmark/metrics/test_metric.py +1 -1
  159. helm/benchmark/metrics/test_statistic.py +2 -2
  160. helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
  161. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
  162. helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
  163. helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
  164. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
  165. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  166. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
  167. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +1 -1
  168. helm/benchmark/metrics/toxicity_metrics.py +4 -4
  169. helm/benchmark/metrics/unitxt_metrics.py +4 -1
  170. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  171. helm/benchmark/metrics/wildbench_metrics.py +34 -0
  172. helm/benchmark/model_metadata_registry.py +16 -0
  173. helm/benchmark/presentation/summarize.py +23 -10
  174. helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
  175. helm/benchmark/reeval_run.py +203 -0
  176. helm/benchmark/reeval_runner.py +355 -0
  177. helm/benchmark/run.py +8 -17
  178. helm/benchmark/run_expander.py +78 -8
  179. helm/benchmark/run_spec_factory.py +12 -0
  180. helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
  181. helm/benchmark/run_specs/audio_run_specs.py +613 -0
  182. helm/benchmark/run_specs/call_center_run_specs.py +49 -0
  183. helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
  184. helm/benchmark/run_specs/classic_run_specs.py +1 -69
  185. helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
  186. helm/benchmark/run_specs/enterprise_run_specs.py +260 -0
  187. helm/benchmark/run_specs/experimental_run_specs.py +112 -3
  188. helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
  189. helm/benchmark/run_specs/lite_run_specs.py +2 -2
  190. helm/benchmark/run_specs/long_context_run_specs.py +89 -0
  191. helm/benchmark/run_specs/medhelm_run_specs.py +1155 -0
  192. helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
  193. helm/benchmark/run_specs/oab_exams_specs.py +32 -0
  194. helm/benchmark/run_specs/safety_run_specs.py +37 -0
  195. helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +44 -44
  196. helm/benchmark/run_specs/sql_run_specs.py +54 -0
  197. helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
  198. helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
  199. helm/benchmark/run_specs/vlm_run_specs.py +75 -2
  200. helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
  201. helm/benchmark/scenarios/aci_bench_scenario.py +120 -0
  202. helm/benchmark/scenarios/air_bench_scenario.py +6 -1
  203. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
  204. helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
  205. helm/benchmark/scenarios/audio_language/__init__.py +0 -0
  206. helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +128 -0
  207. helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
  208. helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
  209. helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
  210. helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
  211. helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
  212. helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
  213. helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
  214. helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
  215. helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
  216. helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
  217. helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
  218. helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
  219. helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
  220. helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
  221. helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
  222. helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
  223. helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
  224. helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
  225. helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
  226. helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +69 -0
  227. helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
  228. helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +106 -0
  229. helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
  230. helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
  231. helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
  232. helm/benchmark/scenarios/banking77_scenario.py +6 -1
  233. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  234. helm/benchmark/scenarios/big_bench_scenario.py +11 -1
  235. helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
  236. helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
  237. helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
  238. helm/benchmark/scenarios/blimp_scenario.py +1 -1
  239. helm/benchmark/scenarios/bold_scenario.py +1 -1
  240. helm/benchmark/scenarios/boolq_scenario.py +1 -1
  241. helm/benchmark/scenarios/casehold_scenario.py +79 -0
  242. helm/benchmark/scenarios/chw_care_plan_scenario.py +105 -0
  243. helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
  244. helm/benchmark/scenarios/clear_scenario.py +153 -0
  245. helm/benchmark/scenarios/cleva_scenario.py +2 -2
  246. helm/benchmark/scenarios/code_scenario.py +17 -4
  247. helm/benchmark/scenarios/commonsense_scenario.py +1 -1
  248. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
  249. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  250. helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
  251. helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
  252. helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
  253. helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
  254. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
  255. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
  256. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
  257. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
  258. helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
  259. helm/benchmark/scenarios/dischargeme_scenario.py +157 -0
  260. helm/benchmark/scenarios/disinformation_scenario.py +10 -1
  261. helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
  262. helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
  263. helm/benchmark/scenarios/ehr_sql_scenario.py +131 -0
  264. helm/benchmark/scenarios/ehrshot_scenario.py +1546 -0
  265. helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
  266. helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
  267. helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
  268. helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
  269. helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
  270. helm/benchmark/scenarios/gpqa_scenario.py +80 -0
  271. helm/benchmark/scenarios/grammar_scenario.py +2 -2
  272. helm/benchmark/scenarios/gsm_scenario.py +10 -1
  273. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
  274. helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
  275. helm/benchmark/scenarios/headqa_scenario.py +131 -0
  276. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
  277. helm/benchmark/scenarios/ice_scenario.py +8 -4
  278. helm/benchmark/scenarios/ifeval_scenario.py +53 -0
  279. helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
  280. helm/benchmark/scenarios/imdb_scenario.py +11 -2
  281. helm/benchmark/scenarios/infinite_bench_sum_scenario.py +82 -0
  282. helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
  283. helm/benchmark/scenarios/koala_scenario.py +1 -1
  284. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
  285. helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
  286. helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
  287. helm/benchmark/scenarios/legal_support_scenario.py +11 -1
  288. helm/benchmark/scenarios/legalbench_scenario.py +22 -3
  289. helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
  290. helm/benchmark/scenarios/lextreme_scenario.py +11 -1
  291. helm/benchmark/scenarios/live_qa_scenario.py +1 -1
  292. helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
  293. helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
  294. helm/benchmark/scenarios/math_scenario.py +9 -1
  295. helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
  296. helm/benchmark/scenarios/med_dialog_scenario.py +22 -24
  297. helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
  298. helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
  299. helm/benchmark/scenarios/med_qa_scenario.py +10 -1
  300. helm/benchmark/scenarios/medalign_scenario.py +88 -0
  301. helm/benchmark/scenarios/medalign_scenario_helper.py +429 -0
  302. helm/benchmark/scenarios/medbullets_scenario.py +140 -0
  303. helm/benchmark/scenarios/medcalc_bench_scenario.py +125 -0
  304. helm/benchmark/scenarios/medec_scenario.py +120 -0
  305. helm/benchmark/scenarios/medhallu_scenario.py +66 -0
  306. helm/benchmark/scenarios/medi_qa_scenario.py +105 -0
  307. helm/benchmark/scenarios/medication_qa_scenario.py +2 -2
  308. helm/benchmark/scenarios/mental_health_scenario.py +112 -0
  309. helm/benchmark/scenarios/mimic_bhc_scenario.py +98 -0
  310. helm/benchmark/scenarios/mimic_rrs_scenario.py +89 -0
  311. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +71 -0
  312. helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
  313. helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
  314. helm/benchmark/scenarios/mmlu_scenario.py +11 -1
  315. helm/benchmark/scenarios/msmarco_scenario.py +1 -1
  316. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +141 -0
  317. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +141 -0
  318. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +271 -0
  319. helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
  320. helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
  321. helm/benchmark/scenarios/newsqa_scenario.py +1 -1
  322. helm/benchmark/scenarios/numeracy_scenario.py +10 -1
  323. helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
  324. helm/benchmark/scenarios/omni_math_scenario.py +53 -0
  325. helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
  326. helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
  327. helm/benchmark/scenarios/pubmed_qa_scenario.py +54 -43
  328. helm/benchmark/scenarios/quac_scenario.py +10 -1
  329. helm/benchmark/scenarios/race_based_med_scenario.py +142 -0
  330. helm/benchmark/scenarios/raft_scenario.py +17 -2
  331. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
  332. helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
  333. helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
  334. helm/benchmark/scenarios/scenario.py +9 -1
  335. helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +7 -2
  336. helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
  337. helm/benchmark/scenarios/shc_bmt_scenario.py +69 -0
  338. helm/benchmark/scenarios/shc_cdi_scenario.py +70 -0
  339. helm/benchmark/scenarios/shc_conf_scenario.py +70 -0
  340. helm/benchmark/scenarios/shc_ent_scenario.py +72 -0
  341. helm/benchmark/scenarios/shc_gip_scenario.py +66 -0
  342. helm/benchmark/scenarios/shc_ptbm_scenario.py +76 -0
  343. helm/benchmark/scenarios/shc_sei_scenario.py +89 -0
  344. helm/benchmark/scenarios/shc_sequoia_scenario.py +69 -0
  345. helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
  346. helm/benchmark/scenarios/spider_scenario.py +91 -0
  347. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +90 -0
  348. helm/benchmark/scenarios/summarization_scenario.py +11 -1
  349. helm/benchmark/scenarios/sumosum_scenario.py +157 -0
  350. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
  351. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
  352. helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
  353. helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
  354. helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
  355. helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
  356. helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
  357. helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
  358. helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
  359. helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
  360. helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
  361. helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +46 -0
  362. helm/benchmark/scenarios/test_math_scenario.py +1 -0
  363. helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
  364. helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
  365. helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
  366. helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
  367. helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
  368. helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
  369. helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
  370. helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
  371. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  372. helm/benchmark/scenarios/truthful_qa_scenario.py +10 -1
  373. helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
  374. helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
  375. helm/benchmark/scenarios/unitxt_scenario.py +8 -2
  376. helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
  377. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  378. helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
  379. helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
  380. helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
  381. helm/benchmark/scenarios/wikifact_scenario.py +11 -1
  382. helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
  383. helm/benchmark/scenarios/wildbench_scenario.py +83 -0
  384. helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
  385. helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
  386. helm/benchmark/scenarios/xstest_scenario.py +1 -1
  387. helm/benchmark/server.py +11 -0
  388. helm/benchmark/slurm_runner.py +1 -1
  389. helm/benchmark/static/schema_audio.yaml +752 -0
  390. helm/benchmark/static/schema_autobencher.yaml +150 -0
  391. helm/benchmark/static/schema_call_center.yaml +97 -60
  392. helm/benchmark/static/schema_capabilities.yaml +254 -0
  393. helm/benchmark/static/schema_czech_bank.yaml +148 -0
  394. helm/benchmark/static/schema_enem_challenge.yaml +146 -0
  395. helm/benchmark/static/schema_enterprise.yaml +298 -0
  396. helm/benchmark/static/schema_finance.yaml +14 -12
  397. helm/benchmark/static/schema_heim.yaml +1389 -0
  398. helm/benchmark/static/{schema_medical.yaml → schema_long_context.yaml} +67 -82
  399. helm/benchmark/static/schema_medhelm.yaml +1081 -0
  400. helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
  401. helm/benchmark/static/schema_safety.yaml +18 -1
  402. helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +30 -16
  403. helm/benchmark/static/schema_social_audio.yaml +224 -0
  404. helm/benchmark/static/schema_sql.yaml +171 -0
  405. helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +169 -36
  406. helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
  407. helm/benchmark/static/schema_vhelm.yaml +109 -36
  408. helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
  409. helm/benchmark/static_build/assets/index-262903c1.js +10 -0
  410. helm/benchmark/static_build/assets/index-42060d71.css +1 -0
  411. helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png +0 -0
  412. helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
  413. helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
  414. helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-9cefc3c5.js} +1 -1
  415. helm/benchmark/static_build/config.js +1 -1
  416. helm/benchmark/static_build/index.html +5 -5
  417. helm/benchmark/window_services/default_window_service.py +1 -1
  418. helm/benchmark/window_services/encoder_decoder_window_service.py +1 -1
  419. helm/benchmark/window_services/ice_window_service.py +1 -1
  420. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
  421. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
  422. helm/benchmark/window_services/local_window_service.py +2 -2
  423. helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
  424. helm/benchmark/window_services/test_bloom_window_service.py +3 -3
  425. helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
  426. helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
  427. helm/benchmark/window_services/test_gptj_window_service.py +8 -3
  428. helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
  429. helm/benchmark/window_services/test_openai_window_service.py +8 -3
  430. helm/benchmark/window_services/test_opt_window_service.py +3 -3
  431. helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
  432. helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
  433. helm/benchmark/window_services/test_t511b_window_service.py +3 -3
  434. helm/benchmark/window_services/test_ul2_window_service.py +3 -3
  435. helm/benchmark/window_services/test_utils.py +1 -1
  436. helm/benchmark/window_services/test_yalm_window_service.py +3 -3
  437. helm/benchmark/window_services/yalm_window_service.py +1 -1
  438. helm/clients/ai21_client.py +3 -3
  439. helm/clients/aleph_alpha_client.py +1 -1
  440. helm/clients/audio_language/__init__.py +0 -0
  441. helm/clients/audio_language/diva_llama_client.py +118 -0
  442. helm/clients/audio_language/llama_omni_client.py +198 -0
  443. helm/clients/audio_language/qwen2_audiolm_client.py +188 -0
  444. helm/clients/audio_language/qwen_audiolm_client.py +150 -0
  445. helm/clients/auto_client.py +4 -2
  446. helm/clients/azure_openai_client.py +55 -0
  447. helm/clients/bedrock_client.py +201 -7
  448. helm/clients/bedrock_utils.py +33 -0
  449. helm/clients/clip_scorers/clip_scorer.py +1 -1
  450. helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
  451. helm/clients/cohere_client.py +3 -3
  452. helm/clients/google_client.py +1 -1
  453. helm/clients/http_model_client.py +1 -1
  454. helm/clients/huggingface_client.py +10 -18
  455. helm/clients/ibm_client.py +267 -0
  456. helm/clients/image_generation/adobe_vision_client.py +1 -1
  457. helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
  458. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
  459. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
  460. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
  461. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
  462. helm/clients/image_generation/cogview2_client.py +1 -1
  463. helm/clients/image_generation/dalle2_client.py +1 -1
  464. helm/clients/image_generation/dalle3_client.py +2 -2
  465. helm/clients/image_generation/dalle_mini/__init__.py +1 -1
  466. helm/clients/image_generation/dalle_mini/data.py +1 -1
  467. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
  468. helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
  469. helm/clients/image_generation/dalle_mini/model/modeling.py +2 -2
  470. helm/clients/image_generation/dalle_mini/model/processor.py +4 -4
  471. helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
  472. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
  473. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
  474. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
  475. helm/clients/image_generation/dalle_mini_client.py +1 -1
  476. helm/clients/image_generation/deep_floyd_client.py +1 -1
  477. helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
  478. helm/clients/image_generation/lexica_client.py +1 -1
  479. helm/clients/image_generation/mindalle/models/__init__.py +6 -6
  480. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
  481. helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
  482. helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
  483. helm/clients/image_generation/mindalle_client.py +1 -1
  484. helm/clients/image_generation/together_image_generation_client.py +1 -1
  485. helm/clients/lit_gpt_client.py +2 -2
  486. helm/clients/mistral_client.py +62 -18
  487. helm/clients/nvidia_nim_client.py +0 -3
  488. helm/clients/openai_client.py +241 -22
  489. helm/clients/palmyra_client.py +1 -4
  490. helm/clients/reka_client.py +1 -1
  491. helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
  492. helm/clients/stanfordhealthcare_claude_client.py +31 -0
  493. helm/clients/stanfordhealthcare_google_client.py +43 -0
  494. helm/clients/stanfordhealthcare_http_model_client.py +93 -0
  495. helm/clients/stanfordhealthcare_openai_client.py +62 -0
  496. helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
  497. helm/clients/test_client.py +1 -1
  498. helm/clients/test_together_client.py +6 -1
  499. helm/clients/together_client.py +47 -7
  500. helm/clients/upstage_client.py +23 -0
  501. helm/clients/vertexai_client.py +39 -13
  502. helm/clients/vision_language/open_flamingo/__init__.py +2 -2
  503. helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
  504. helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
  505. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
  506. helm/clients/vision_language/qwen2_vlm_client.py +175 -0
  507. helm/clients/vllm_client.py +4 -6
  508. helm/clients/yi_client.py +0 -3
  509. helm/common/audio_utils.py +111 -0
  510. helm/common/file_caches/local_file_cache.py +1 -1
  511. helm/common/file_caches/test_local_file_cache.py +1 -1
  512. helm/common/images_utils.py +2 -2
  513. helm/common/media_object.py +2 -2
  514. helm/common/multimodal_request_utils.py +26 -0
  515. helm/common/reeval_parameters.py +12 -0
  516. helm/common/request.py +6 -2
  517. helm/common/response_format.py +18 -0
  518. helm/common/test_media_object.py +1 -1
  519. helm/config/model_deployments.yaml +1112 -19
  520. helm/config/model_metadata.yaml +985 -44
  521. helm/config/tokenizer_configs.yaml +379 -3
  522. helm/proxy/cli.py +2 -2
  523. helm/proxy/example_queries.py +1 -1
  524. helm/proxy/server.py +11 -4
  525. helm/proxy/services/remote_service.py +1 -1
  526. helm/proxy/services/server_service.py +1 -1
  527. helm/proxy/services/test_remote_service.py +2 -2
  528. helm/proxy/services/test_service.py +1 -1
  529. helm/proxy/static/general.js +122 -0
  530. helm/proxy/static/help.html +99 -0
  531. helm/proxy/static/index.css +57 -0
  532. helm/proxy/static/index.html +40 -0
  533. helm/proxy/static/index.js +456 -0
  534. helm/proxy/static/info-icon.png +0 -0
  535. helm/proxy/test_retry.py +1 -1
  536. helm/proxy/token_counters/auto_token_counter.py +1 -1
  537. helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
  538. helm/tokenizers/caching_tokenizer.py +2 -30
  539. helm/tokenizers/http_model_tokenizer.py +1 -1
  540. helm/tokenizers/huggingface_tokenizer.py +2 -2
  541. helm/tokenizers/lit_gpt_tokenizer.py +1 -1
  542. helm/tokenizers/test_anthropic_tokenizer.py +6 -2
  543. helm/tokenizers/test_huggingface_tokenizer.py +1 -1
  544. helm/tokenizers/test_yalm_tokenizer.py +1 -1
  545. helm/tokenizers/tiktoken_tokenizer.py +1 -1
  546. helm/tokenizers/tokenizer.py +3 -1
  547. helm/tokenizers/yalm_tokenizer.py +3 -3
  548. helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
  549. crfm_helm-0.5.4.dist-info/METADATA +0 -350
  550. crfm_helm-0.5.4.dist-info/RECORD +0 -697
  551. helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
  552. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  553. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  554. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  555. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  556. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  557. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  558. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  559. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  560. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  561. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  562. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  563. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  564. helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
  565. helm/benchmark/static_build/assets/index-3ee38b3d.js +0 -10
  566. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  567. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  568. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  569. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  570. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  571. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  572. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  573. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  574. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  575. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  576. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  577. helm/tokenizers/anthropic_tokenizer.py +0 -52
  578. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/entry_points.txt +0 -0
  579. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info/licenses}/LICENSE +0 -0
  580. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,99 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <title>CRFM Models API</title>
5
+ <meta charset="utf-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
7
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta/css/bootstrap.min.css" integrity="sha384-/Y6pD6FV/Vv2HJnA6t+vslU6fwYXjCFtcEpHbNJ0lyAFsXTsjBbfaDjzALeQsN6M" crossorigin="anonymous">
8
+ <link rel="stylesheet" type="text/css" href="index.css">
9
+ </head>
10
+
11
+ <body>
12
+ <div class="container-fluid">
13
+ <nav class="navbar navbar-expand-sm navbar-light bg-faded">
14
+ <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#nav-content" aria-controls="nav-content" aria-expanded="false" aria-label="Toggle navigation">
15
+ <span class="navbar-toggler-icon"></span>
16
+ </button>
17
+
18
+ <div class="collapse navbar-collapse" id="nav-content">
19
+ <ul class="navbar-nav">
20
+ <li class="nav-item"><a class="nav-link" href="index.html">Query</a></li>
21
+ <li class="nav-item"><a class="nav-link active" href="help.html">Help</a></li>
22
+ <li class="nav-item" id="loginInfo"></li>
23
+ </ul>
24
+ </div>
25
+ </nav>
26
+
27
+ <div class="row">
28
+ <div class="col-sm-12">
29
+ <p>
30
+ This web interface and API allows you to experiment with multiple large language models using a unified interface.
31
+ To log in, you need an API key.
32
+ </p>
33
+ <h2>Query</h2>
34
+ <ul>
35
+ <li>In the web interface, you can enter a <b>query</b>, which consists of the following components:
36
+ <ul>
37
+ <li><b>prompt</b>, which is what text we want to feed into the language model. The prompt can have variables (e.g., <tt>${name}</tt>) which are filled in later.</li>
38
+ <li><b>settings</b>, which configures how we're going to call the backend API (HOCON format):
39
+ <ul>
40
+ <li><tt>model</tt>: which model to query; options are:
41
+ <div id="help-models" />
42
+ </li>
43
+ <li><tt>temperature</tt>: a non-negative number determining amount of stochasticity (e.g., <tt>1</tt> is sampling from the model, <tt>0</tt> is returning the maximum probability output)</li>
44
+ <li><tt>num_completions</tt>: number of completions (sequences, independent sampled) to return</li>
45
+ <li><tt>top_k_per_token</tt>: number of candidates per token position in each completion</li>
46
+ <li><tt>max_tokens</tt>: maximum number of tokens before generation stops</li>
47
+ <li><tt>stop_sequences</tt>: list of strings that will stop generation (e.g., <tt>'.'</tt> or <tt>'\n'</tt>)</li>
48
+ <li><tt>echo_prompt</tt>: Whether to include the prompt as a prefix of the completion</li>
49
+ <li><tt>top_p</tt>: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.</li>
50
+ <li><tt>presence_penalty</tt>: Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. (OpenAI only)</li>
51
+ <li><tt>frequency_penalty</tt>: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. (OpenAI only)</li>
52
+ </ul>
53
+ Settings can also have variables in them (e.g., <tt>temperature</tt>).
54
+ </li>
55
+ <li><b>environments</b>, which specifies for each variable, a list of values (HOCON format).
56
+ </ul>
57
+ </li>
58
+ <li>
59
+ When the query is submitted,
60
+ we consider all possible assignments of values to variables.
61
+ For example:
62
+ <ul>
63
+ <li>environments has <tt>name: [Boston, New York]</tt> and <tt>temperature: [0, 1]</tt></li>
64
+ <li>prompt is <tt>${name} is a</tt></li>
65
+ <li>settings is <tt>temperature: ${temperature}</tt></li>
66
+ </ul>
67
+ This gives rise to 4 <b>requests</b>:
68
+ <ul>
69
+ <li>prompt: <tt>Boston is a</tt>, temperature: <tt>0</tt></li>
70
+ <li>prompt: <tt>Boston is a</tt>, temperature: <tt>1</tt></li>
71
+ <li>prompt: <tt>New York is a</tt>, temperature: <tt>0</tt></li>
72
+ <li>prompt: <tt>New York is a</tt>, temperature: <tt>1</tt></li>
73
+ </ul>
74
+ </li>
75
+ </ul>
76
+
77
+ <h2><a name="quotas">Quotas</a></h2>
78
+ <p>
79
+ For each model group (e.g., <tt>gpt3</tt>) and time granularity
80
+ (e.g., <tt>daily</tt>, <tt>monthly</tt>, <tt>total</tt>), you are
81
+ given a quota of a certain number of tokens.
82
+ Once you go over that number, you won't be able to use the API.
83
+ However, note that requests that have already been made (by you or
84
+ anyone) that are cached are not counted towards your quota.
85
+ For example, if your daily quota for <tt>gpt3</tt> is <tt>10000</tt>,
86
+ that means each day, you will get 10000 tokens.
87
+ </p>
88
+ </div>
89
+ </div>
90
+ </div>
91
+
92
+ <script src="https://code.jquery.com/jquery-3.2.1.min.js"></script>
93
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.11.0/umd/popper.min.js" integrity="sha384-b/U6ypiBEHpOf/4+1nzFpr53nxSS+GLCkfwBdFNTxtclqqenISfwAzpKaMNFNmj4" crossorigin="anonymous"></script>
94
+ <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta/js/bootstrap.min.js" integrity="sha384-h0AbiXch4ZDo7tp9hKZ4TsHbi047NrKGLO3SEJAg45jXxnGIfYzk4Si90RDIqNm1" crossorigin="anonymous"></script>
95
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.tablesorter/2.29.0/js/jquery.tablesorter.min.js"></script>
96
+ <script src="general.js"></script>
97
+ <script src="index.js"></script>
98
+ </body>
99
+ </html>
@@ -0,0 +1,57 @@
1
+ .warning {
2
+ color: red;
3
+ }
4
+
5
+ .active {
6
+ background-color: #f0f0f0;
7
+ }
8
+
9
+ .examples-block {
10
+ margin-left: 120px;
11
+ margin-top: 10px;
12
+ margin-bottom: 10px;
13
+ }
14
+
15
+ .request {
16
+ background-color: #f0f0f0;
17
+ margin-top: 20px;
18
+ margin-bottom: 20px;
19
+ }
20
+
21
+ .completion {
22
+ border: solid 1px;
23
+ border-color: #c0c0c0;
24
+ padding: 3px;
25
+ margin-top: 5px;
26
+ margin-bottom: 5px;
27
+ }
28
+
29
+ .completion:hover {
30
+ background-color: #e8e8e8;
31
+ }
32
+
33
+ .metadata {
34
+ float: right;
35
+ font-style: italic;
36
+ }
37
+
38
+ .token:hover {
39
+ background-color: lightgreen;
40
+ }
41
+
42
+ .block {
43
+ margin: 15px;
44
+ padding: 15px;
45
+ border: solid 1px;
46
+ border-color: lightgray;
47
+ }
48
+
49
+ td {
50
+ text-align: left;
51
+ vertical-align: top;
52
+ }
53
+
54
+ .help-icon {
55
+ margin-left: 3px;
56
+ margin-right: 3px;
57
+ }
@@ -0,0 +1,40 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <title>CRFM Models API</title>
5
+ <meta charset="utf-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
7
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta/css/bootstrap.min.css" integrity="sha384-/Y6pD6FV/Vv2HJnA6t+vslU6fwYXjCFtcEpHbNJ0lyAFsXTsjBbfaDjzALeQsN6M" crossorigin="anonymous">
8
+ <link rel="stylesheet" type="text/css" href="index.css">
9
+ </head>
10
+
11
+ <body>
12
+ <div class="container-fluid">
13
+ <nav class="navbar navbar-expand-sm navbar-light bg-faded">
14
+ <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#nav-content" aria-controls="nav-content" aria-expanded="false" aria-label="Toggle navigation">
15
+ <span class="navbar-toggler-icon"></span>
16
+ </button>
17
+
18
+ <div class="collapse navbar-collapse" id="nav-content">
19
+ <ul class="navbar-nav">
20
+ <li class="nav-item"><a class="nav-link active" href="index.html">Query</a></li>
21
+ <li class="nav-item"><a class="nav-link" href="help.html">Help</a></li>
22
+ <li class="nav-item" id="loginInfo"></li>
23
+ </ul>
24
+ </div>
25
+ </nav>
26
+
27
+ <div class="row">
28
+ <div class="col-sm-12" id="main">
29
+ </div>
30
+ </div>
31
+ </div>
32
+
33
+ <script src="https://code.jquery.com/jquery-3.2.1.min.js"></script>
34
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.11.0/umd/popper.min.js" integrity="sha384-b/U6ypiBEHpOf/4+1nzFpr53nxSS+GLCkfwBdFNTxtclqqenISfwAzpKaMNFNmj4" crossorigin="anonymous"></script>
35
+ <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta/js/bootstrap.min.js" integrity="sha384-h0AbiXch4ZDo7tp9hKZ4TsHbi047NrKGLO3SEJAg45jXxnGIfYzk4Si90RDIqNm1" crossorigin="anonymous"></script>
36
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.tablesorter/2.29.0/js/jquery.tablesorter.min.js"></script>
37
+ <script src="general.js"></script>
38
+ <script src="index.js"></script>
39
+ </body>
40
+ </html>
@@ -0,0 +1,456 @@
1
+ /**
2
+ * This is a very quick and dirty frontend for just interacting with the models.
3
+ * Please refrain from adding additional functionality to this.
4
+ * TODO: Write this in React.
5
+ */
6
+ $(function () {
7
+ const urlParams = decodeUrlParams(window.location.search);
8
+ let auth = null;
9
+
10
+ function censor(api_key) {
11
+ // Show only the first k letters
12
+ const k = 2;
13
+ if (api_key.length <= k) {
14
+ return api_key;
15
+ }
16
+ return api_key.substring(0, k) + "*".repeat(api_key.length - k);
17
+ }
18
+
19
+ // Logging in and out
20
+ function updateLogin() {
21
+ const $loginInfo = $("#loginInfo");
22
+ $loginInfo.empty();
23
+ let api_key = readCookie("api_key");
24
+ if (api_key) {
25
+ auth = { api_key };
26
+ $loginInfo.append(
27
+ $("<a>", { class: "nav-link", href: "#" })
28
+ .append("Logout of API key " + censor(api_key))
29
+ .click(() => {
30
+ eraseCookie("api_key");
31
+ updateLogin();
32
+ }),
33
+ );
34
+ } else {
35
+ auth = null;
36
+ $loginInfo.append(
37
+ $("<a>", { class: "nav-link", href: "#" })
38
+ .append("Login")
39
+ .click(() => {
40
+ api_key = prompt("Enter your API key:");
41
+ if (!api_key) {
42
+ return;
43
+ }
44
+
45
+ // Check the API key the user entered using endpoint /api/account
46
+ const args = { auth: JSON.stringify({ api_key }) };
47
+ $.getJSON("/api/account", args, (response) => {
48
+ console.log("/api/account", response);
49
+ if ("error" in response) {
50
+ alert("The API key you've entered is invalid. Try again.");
51
+ } else {
52
+ createCookie("api_key", api_key);
53
+ updateLogin();
54
+ }
55
+ });
56
+ }),
57
+ );
58
+ }
59
+ }
60
+
61
+ updateLogin();
62
+
63
+ ////////////////////////////////////////////////////////////
64
+ // Rendering functions
65
+
66
+ function renderExampleQueries(updateQuery) {
67
+ // Show links for each example query, so when you click on them, they populate the textboxes.
68
+ const $examplesBlock = $("<div>", { class: "examples-block" });
69
+ $examplesBlock.append($("<span>").append("Examples:"));
70
+ generalInfo.example_queries.forEach((query, i) => {
71
+ const href = "#";
72
+ const title =
73
+ "[Prompt]\n" +
74
+ query.prompt +
75
+ "\n[Settings]\n" +
76
+ query.settings +
77
+ "\n[Environments]\n" +
78
+ query.environments;
79
+ const $link = $("<a>", { href, title }).append(`[${i}]`);
80
+ $link.click(() => {
81
+ // Populate the query with the example
82
+ updateQuery(query);
83
+ urlParams.prompt = query.prompt;
84
+ urlParams.settings = query.settings;
85
+ urlParams.environments = query.environments;
86
+ updateBrowserLocation(urlParams);
87
+ });
88
+ $examplesBlock.append("&nbsp;");
89
+ $examplesBlock.append($link);
90
+ });
91
+ return $examplesBlock;
92
+ }
93
+
94
+ function renderQuery(handleQueryResult) {
95
+ // Render the textboxes for entering the query (which includes the prompt, settings, and environment)
96
+ const $queryBlock = $("<div>", { class: "block" });
97
+ const $prompt = $("<textarea>", {
98
+ cols: 90,
99
+ rows: 7,
100
+ placeholder: "Enter prompt",
101
+ }).val(urlParams.prompt);
102
+ const $settings = $("<textarea>", {
103
+ cols: 90,
104
+ rows: 5,
105
+ placeholder:
106
+ "Enter settings (e.g., model: openai/text-davinci-002 for Instruct GPT-3); click Help at the top to learn more",
107
+ }).val(urlParams.settings);
108
+ const $environments = $("<textarea>", {
109
+ cols: 90,
110
+ rows: 3,
111
+ placeholder:
112
+ "Enter environment variables (e.g., city: [Boston, New York]); click Help at the top to learn more",
113
+ }).val(urlParams.environments);
114
+
115
+ $queryBlock.data("prompt", $prompt);
116
+ $queryBlock.data("settings", $settings);
117
+ $queryBlock.data("environments", $environments);
118
+
119
+ function bindSubmit($text) {
120
+ $text.keypress((e) => {
121
+ if ((event.keyCode === 10 || event.keyCode === 13) && event.ctrlKey) {
122
+ submit();
123
+ }
124
+ });
125
+ }
126
+
127
+ bindSubmit($prompt);
128
+ bindSubmit($settings);
129
+ bindSubmit($environments);
130
+
131
+ function submit() {
132
+ if (!auth) {
133
+ alert("You must log in first.");
134
+ return;
135
+ }
136
+
137
+ const query = {
138
+ prompt: $prompt.val(),
139
+ settings: $settings.val(),
140
+ environments: $environments.val(),
141
+ };
142
+
143
+ urlParams.prompt = query.prompt;
144
+ urlParams.settings = query.settings;
145
+ urlParams.environments = query.environments;
146
+ updateBrowserLocation(urlParams);
147
+
148
+ $.getJSON("/api/query", query, handleQueryResult);
149
+ }
150
+
151
+ // Show examples of queries
152
+ const $exampleQueries = renderExampleQueries((query) => {
153
+ $queryBlock.data("prompt").val(query.prompt);
154
+ $queryBlock.data("settings").val(query.settings);
155
+ $queryBlock.data("environments").val(query.environments);
156
+ urlParams.prompt = query.prompt;
157
+ urlParams.settings = query.settings;
158
+ urlParams.environments = query.environments;
159
+ updateBrowserLocation();
160
+ });
161
+
162
+ const promptHelp =
163
+ "This is the text you feed into the language model to complete.\nExample:\n Life is like";
164
+ const settingsHelp =
165
+ "Specifies what information we want from the language model (see [Help] for more details).\nExample:\n temperature: ${temperature}\n model: openai/davinci\n max_tokens: 10\n num_completions: 5";
166
+ const environmentsHelp =
167
+ "Specifies a list of values to try for each variable that appears in the prompt or settings.\nExample:\n temperature: [0, 0.5, 1]";
168
+
169
+ const $promptLabel = $("<span>")
170
+ .append(helpIcon(promptHelp, "help.html#query"))
171
+ .append("Prompt");
172
+ const $settingsLabel = $("<span>")
173
+ .append(helpIcon(settingsHelp, "help.html#query"))
174
+ .append("Settings");
175
+ const $environmentsLabel = $("<span>")
176
+ .append(helpIcon(environmentsHelp, "help.html#query"))
177
+ .append("Environments");
178
+
179
+ $queryBlock.append($("<h4>").append("Query"));
180
+ $queryBlock.append($exampleQueries);
181
+ const $table = $("<table>", { class: "query-table" });
182
+ $table.append(
183
+ $("<tr>")
184
+ .append($("<td>").append($promptLabel))
185
+ .append($("<td>").append($prompt)),
186
+ );
187
+ $table.append(
188
+ $("<tr>")
189
+ .append($("<td>").append($settingsLabel))
190
+ .append($("<td>").append($settings)),
191
+ );
192
+ $table.append(
193
+ $("<tr>")
194
+ .append($("<td>").append($environmentsLabel))
195
+ .append($("<td>").append($environments)),
196
+ );
197
+ $queryBlock.append($table);
198
+ $queryBlock.append($("<button>").append("Submit").click(submit));
199
+
200
+ return $queryBlock;
201
+ }
202
+
203
+ function getChangingKeys(items) {
204
+ // Return the list of keys whose values vary across `items.`
205
+ if (items.length === 0) {
206
+ return [];
207
+ }
208
+ return Object.keys(items[0]).filter((key) => {
209
+ return !items.every(
210
+ (item) => JSON.stringify(item[key]) === JSON.stringify(items[0][key]),
211
+ );
212
+ });
213
+ }
214
+
215
+ function renderRequest(changingKeys, request) {
216
+ // Render the request metadata (e.g., temperature if it is changing)
217
+ const title = JSON.stringify(request);
218
+ // Always include model, never prompt (since that's shown right after).
219
+ const showKeys = ["model"].concat(
220
+ changingKeys.filter((key) => key !== "prompt" && key !== "model"),
221
+ );
222
+ const summary =
223
+ "[" + showKeys.map((key) => key + ":" + request[key]).join(", ") + "]";
224
+ return $("<div>", { title }).append(
225
+ summary + " " + multilineHtml(request.prompt),
226
+ );
227
+ }
228
+
229
+ function renderTime(time) {
230
+ return Math.round(time * 10) / 10 + "s";
231
+ }
232
+
233
+ function constructTokenGroups(tokens) {
234
+ // Note: sometimes multiple tokens correspond to one character, for example:
235
+ // ["bytes:\xe2\x80", "bytes:\x99"] => ’
236
+ // For these, we keep these in the buffer and collapse them, and concatenate the entries.
237
+ //
238
+ // get_num_bytes() and convert_tokens_to_text() in src/helm/benchmark/basic_metrics.py are adapted from this function.
239
+ const groups = [];
240
+ for (let i = 0; i < tokens.length;) {
241
+ // Aggregate consecutive tokens while they're "bytes:..."
242
+ const group = { tokens: [] };
243
+ if (tokens[i].text.startsWith("bytes:")) {
244
+ let bytestring = "";
245
+ while (i < tokens.length && tokens[i].text.startsWith("bytes:")) {
246
+ group.tokens.push(tokens[i]);
247
+ // Extract part after : (e.g., \xe2\x80)
248
+ bytestring += tokens[i].text.split(":")[1];
249
+ i++;
250
+ }
251
+ // Convert to encoded URI (e.g., %e2%80%99) and decode
252
+ group.text = decodeURIComponent(bytestring.replaceAll("\\x", "%"));
253
+ } else {
254
+ group.tokens.push(tokens[i]);
255
+ group.text = tokens[i].text;
256
+ i++;
257
+ }
258
+ groups.push(group);
259
+ }
260
+ return groups;
261
+ }
262
+
263
+ function renderTokens(tokens) {
264
+ // Render text as a sequence of tokens that you can interact with to see more information (e.g., logprobs)
265
+ const $result = $("<div>");
266
+ const groups = constructTokenGroups(tokens);
267
+ for (const group of groups) {
268
+ const $group = $("<span>", { class: "token" }).append(
269
+ multilineHtml(group.text),
270
+ );
271
+ $result.append($group);
272
+ }
273
+ return $result;
274
+ }
275
+
276
+ function renderRequestResult(requestResult) {
277
+ // Render the list of completions.
278
+ if (requestResult.error) {
279
+ return renderError(requestResult.error);
280
+ }
281
+ const $result = $("<div>");
282
+ requestResult.completions.forEach((completion) => {
283
+ const $contents = $("<span>", {
284
+ title: `logprob: ${completion.logprob}`,
285
+ }).append(renderTokens(completion.tokens));
286
+ const $metadata = $("<span>", { class: "metadata" });
287
+ $metadata.append(
288
+ $("<span>", { title: "Log probability" }).append(
289
+ round(completion.logprob, 2),
290
+ ),
291
+ );
292
+ if (completion.finish_reason) {
293
+ const title =
294
+ "Generation finished because of this reason: " +
295
+ JSON.stringify(completion.finish_reason);
296
+ $metadata
297
+ .append(" ")
298
+ .append(
299
+ $("<span>", { title }).append(completion.finish_reason.reason),
300
+ );
301
+ }
302
+ $result.append(
303
+ $("<div>", { class: "completion" }).append($metadata).append($contents),
304
+ );
305
+ });
306
+ $result.append($("<i>").append(renderTime(requestResult.request_time)));
307
+ return $result;
308
+ }
309
+
310
+ function renderAccount() {
311
+ // Render the account information (usage, quotas).
312
+ if (!auth) {
313
+ return null;
314
+ }
315
+
316
+ const $accountBlock = $("<div>", { class: "block" });
317
+ const args = { auth: JSON.stringify(auth) };
318
+ $.getJSON("/api/account", args, ([account]) => {
319
+ console.log("/api/account", account);
320
+ const items = [];
321
+ for (modelGroup in account.usages) {
322
+ for (granularity in account.usages[modelGroup]) {
323
+ const usage = account.usages[modelGroup][granularity];
324
+ // Only print out usage for model groups and granularities where there is a quota
325
+ if (usage.quota) {
326
+ const percent = Math.round((usage.used / usage.quota) * 100);
327
+ items.push(
328
+ `<b>${modelGroup}</b>: ${usage.period} (${usage.used} / ${usage.quota} = ${percent}%)`,
329
+ );
330
+ }
331
+ }
332
+ }
333
+ if (items.length === 0) {
334
+ items.push("no restrictions");
335
+ }
336
+ $accountBlock
337
+ .empty()
338
+ .append(
339
+ helpIcon(
340
+ "Specifies your usage/quota (321/10000) for each model group (e.g., gpt3) for the current period (e.g., 2022-1-2).",
341
+ "help.html#quotas",
342
+ ),
343
+ )
344
+ .append("Usage")
345
+ .append(": ")
346
+ .append(items.join(" | "));
347
+ });
348
+ return $accountBlock;
349
+ }
350
+
351
+ ////////////////////////////////////////////////////////////
352
+ // For index.html
353
+
354
+ function renderQueryInterface() {
355
+ // For index.html
356
+ const $accountBlock = $("<div>").append(renderAccount());
357
+
358
+ // Allow editing the query
359
+ const $queryBlock = renderQuery((queryResult) => {
360
+ // Create requests
361
+ console.log("/api/query", queryResult);
362
+ $requestsBlock.empty();
363
+
364
+ if (queryResult.error) {
365
+ $requestsBlock.append(renderError(queryResult.error));
366
+ return;
367
+ }
368
+
369
+ $requestsBlock.append(
370
+ $("<h4>").append(`Requests (${queryResult.requests.length})`),
371
+ );
372
+ if (queryResult.error) {
373
+ $requestsBlock.append(renderError(queryResult.error));
374
+ return;
375
+ }
376
+ const changingKeys = getChangingKeys(queryResult.requests);
377
+ queryResult.requests.forEach((request) => {
378
+ const $request = $("<div>", { class: "request" }).append(
379
+ renderRequest(changingKeys, request),
380
+ );
381
+ const $requestResult = $("<div>").append($("<i>").append("(waiting)"));
382
+ const args = {
383
+ auth: JSON.stringify(auth),
384
+ request: JSON.stringify(request),
385
+ };
386
+ $.getJSON("/api/request", args, (requestResult) => {
387
+ console.log("/api/request", request, requestResult);
388
+ $requestResult.empty().append(renderRequestResult(requestResult));
389
+ if (!requestResult.cached) {
390
+ $accountBlock.empty().append(renderAccount());
391
+ }
392
+ });
393
+ $request.append($requestResult);
394
+ $requestsBlock.append($request);
395
+ });
396
+ });
397
+
398
+ // Where the requests and responses come in
399
+ const $requestsBlock = $("<div>", { class: "block" });
400
+
401
+ const $group = $("<div>");
402
+ $group.append($accountBlock);
403
+ $group.append($queryBlock);
404
+ $group.append($requestsBlock);
405
+ return $group;
406
+ }
407
+
408
+ ////////////////////////////////////////////////////////////
409
+ // For help.html
410
+
411
+ function renderModelsTable() {
412
+ // Render the list of models
413
+ const $table = $("<table>", { class: "table" });
414
+ const $header = $("<tr>")
415
+ .append($("<td>").append("group"))
416
+ .append($("<td>").append("name"))
417
+ .append($("<td>").append("description"))
418
+ .append($("<td>").append("tags"));
419
+ $table.append($header);
420
+ generalInfo.all_models.forEach((model) => {
421
+ const $row = $("<tr>")
422
+ .append($("<td>").append($("<tt>").append(model.group)))
423
+ .append($("<td>").append($("<tt>").append(model.name)))
424
+ .append($("<td>").append(model.description))
425
+ .append($("<td>").append(model.tags.join(" ")));
426
+ $table.append($row);
427
+ });
428
+ return $table;
429
+ }
430
+
431
+ ////////////////////////////////////////////////////////////
432
+ // Main
433
+
434
+ let generalInfo;
435
+
436
+ $.getJSON("/api/general_info", (response) => {
437
+ generalInfo = response;
438
+ console.log("/api/general_info", generalInfo);
439
+ if (generalInfo.error) {
440
+ alert(generalInfo.error);
441
+ return;
442
+ }
443
+
444
+ // For index.html
445
+ const $main = $("#main");
446
+ if ($main.length > 0) {
447
+ $main.empty().append(renderQueryInterface());
448
+ }
449
+
450
+ // For help.html
451
+ const $helpModels = $("#help-models");
452
+ if ($helpModels.length > 0) {
453
+ $helpModels.empty().append(renderModelsTable());
454
+ }
455
+ });
456
+ });
Binary file
helm/proxy/test_retry.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from retrying import RetryError
2
2
 
3
3
  from helm.common.request import RequestResult
4
- from .retry import retry_request, get_retry_decorator, retry_if_request_failed
4
+ from helm.proxy.retry import retry_request, get_retry_decorator, retry_if_request_failed
5
5
 
6
6
 
7
7
  def test_retry_for_successful_request():
@@ -4,7 +4,7 @@ from helm.benchmark.model_deployment_registry import ModelDeployment, get_model_
4
4
  from helm.common.request import Request, GeneratedOutput
5
5
  from helm.tokenizers.auto_tokenizer import AutoTokenizer
6
6
  from helm.common.tokenization_request import TokenizationRequest, TokenizationRequestResult
7
- from .token_counter import TokenCounter
7
+ from helm.proxy.token_counters.token_counter import TokenCounter
8
8
 
9
9
 
10
10
  class AutoTokenCounter(TokenCounter):