crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
  2. crfm_helm-0.5.0.dist-info/RECORD +642 -0
  3. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +37 -2
  5. helm/benchmark/adaptation/adapters/adapter.py +4 -42
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
  9. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
  10. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
  11. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
  12. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  13. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  14. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
  15. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
  16. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
  17. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  18. helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
  19. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
  20. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
  21. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  22. helm/benchmark/adaptation/prompt.py +7 -1
  23. helm/benchmark/adaptation/request_state.py +6 -1
  24. helm/benchmark/adaptation/scenario_state.py +6 -2
  25. helm/benchmark/annotation/annotator.py +43 -0
  26. helm/benchmark/annotation/annotator_factory.py +61 -0
  27. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  28. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  29. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  30. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  31. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  32. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  33. helm/benchmark/annotation_executor.py +124 -0
  34. helm/benchmark/augmentations/cleva_perturbation.py +7 -14
  35. helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
  36. helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
  37. helm/benchmark/augmentations/data_augmenter.py +0 -2
  38. helm/benchmark/augmentations/dialect_perturbation.py +2 -2
  39. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  40. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  41. helm/benchmark/augmentations/gender_perturbation.py +3 -3
  42. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  43. helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
  44. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  45. helm/benchmark/augmentations/person_name_perturbation.py +0 -7
  46. helm/benchmark/augmentations/perturbation.py +20 -7
  47. helm/benchmark/augmentations/perturbation_description.py +1 -1
  48. helm/benchmark/augmentations/space_perturbation.py +2 -2
  49. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  50. helm/benchmark/augmentations/synonym_perturbation.py +2 -2
  51. helm/benchmark/augmentations/test_perturbation.py +11 -7
  52. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  53. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  54. helm/benchmark/config_registry.py +38 -0
  55. helm/benchmark/executor.py +46 -16
  56. helm/benchmark/huggingface_registration.py +37 -7
  57. helm/benchmark/metrics/basic_metrics.py +172 -641
  58. helm/benchmark/metrics/bbq_metrics.py +3 -4
  59. helm/benchmark/metrics/bias_metrics.py +6 -6
  60. helm/benchmark/metrics/classification_metrics.py +11 -8
  61. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  62. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  63. helm/benchmark/metrics/code_metrics.py +4 -3
  64. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  65. helm/benchmark/metrics/common_metric_specs.py +167 -0
  66. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  67. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  68. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  69. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  70. helm/benchmark/metrics/disinformation_metrics.py +6 -112
  71. helm/benchmark/metrics/dry_run_metrics.py +5 -3
  72. helm/benchmark/metrics/efficiency_metrics.py +206 -0
  73. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  74. helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
  75. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  76. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  77. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  78. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  79. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  80. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  81. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  82. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  83. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  84. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  85. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  86. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  87. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  88. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  89. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  90. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  91. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  92. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  93. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  94. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  95. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  96. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  97. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  98. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  99. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  100. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  101. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  102. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  103. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  104. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  105. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  106. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  107. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  108. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  109. helm/benchmark/metrics/machine_translation_metrics.py +5 -5
  110. helm/benchmark/metrics/metric.py +93 -172
  111. helm/benchmark/metrics/metric_name.py +0 -1
  112. helm/benchmark/metrics/metric_service.py +16 -0
  113. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  114. helm/benchmark/metrics/ranking_metrics.py +6 -7
  115. helm/benchmark/metrics/reference_metric.py +148 -0
  116. helm/benchmark/metrics/summac/model_summac.py +0 -2
  117. helm/benchmark/metrics/summarization_metrics.py +8 -8
  118. helm/benchmark/metrics/test_classification_metrics.py +9 -6
  119. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  120. helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
  121. helm/benchmark/metrics/test_metric.py +2 -2
  122. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
  123. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
  124. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  125. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
  126. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
  127. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  128. helm/benchmark/metrics/toxicity_utils.py +23 -0
  129. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  130. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  131. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  132. helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
  133. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  134. helm/benchmark/model_deployment_registry.py +164 -41
  135. helm/benchmark/model_metadata_registry.py +181 -35
  136. helm/benchmark/multi_gpu_runner.py +133 -0
  137. helm/benchmark/presentation/contamination.py +3 -3
  138. helm/benchmark/presentation/create_plots.py +8 -7
  139. helm/benchmark/presentation/run_display.py +50 -17
  140. helm/benchmark/presentation/schema.py +28 -46
  141. helm/benchmark/presentation/summarize.py +213 -96
  142. helm/benchmark/presentation/table.py +8 -8
  143. helm/benchmark/presentation/test_contamination.py +2 -2
  144. helm/benchmark/presentation/test_run_entry.py +14 -9
  145. helm/benchmark/presentation/test_summarize.py +5 -0
  146. helm/benchmark/run.py +66 -54
  147. helm/benchmark/run_expander.py +342 -31
  148. helm/benchmark/run_spec.py +93 -0
  149. helm/benchmark/run_spec_factory.py +162 -0
  150. helm/benchmark/run_specs/__init__.py +0 -0
  151. helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
  152. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  153. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  154. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  155. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  156. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  157. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  158. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  159. helm/benchmark/run_specs/vlm_run_specs.py +501 -0
  160. helm/benchmark/runner.py +116 -69
  161. helm/benchmark/runner_config_registry.py +21 -0
  162. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  163. helm/benchmark/scenarios/bold_scenario.py +2 -2
  164. helm/benchmark/scenarios/cleva_scenario.py +43 -46
  165. helm/benchmark/scenarios/code_scenario.py +3 -2
  166. helm/benchmark/scenarios/commonsense_scenario.py +171 -191
  167. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  168. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  169. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  170. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  171. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  172. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  173. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  174. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  175. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  176. helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
  177. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  178. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  179. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  180. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  181. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  182. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  183. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  184. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  185. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  186. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  187. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  188. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  189. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  190. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  191. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  192. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  193. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  194. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  195. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  196. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  197. helm/benchmark/scenarios/legalbench_scenario.py +123 -0
  198. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  199. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  200. helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
  201. helm/benchmark/scenarios/math_scenario.py +19 -2
  202. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  203. helm/benchmark/scenarios/numeracy_scenario.py +3 -3
  204. helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
  205. helm/benchmark/scenarios/raft_scenario.py +2 -6
  206. helm/benchmark/scenarios/scenario.py +14 -2
  207. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  208. helm/benchmark/scenarios/test_math_scenario.py +22 -0
  209. helm/benchmark/scenarios/test_scenario.py +6 -3
  210. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  211. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  212. helm/benchmark/scenarios/the_pile_scenario.py +6 -7
  213. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  214. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  215. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  216. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  217. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
  218. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  219. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  220. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  221. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  222. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  223. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  224. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  225. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  226. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  227. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  228. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  229. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  230. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  231. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  232. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  233. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  234. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  235. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  236. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  237. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
  238. helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
  239. helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
  240. helm/benchmark/server.py +59 -2
  241. helm/benchmark/slurm_jobs.py +12 -0
  242. helm/benchmark/slurm_runner.py +79 -51
  243. helm/benchmark/static/benchmarking.js +3 -4
  244. helm/benchmark/static/contamination.yaml +1 -1
  245. helm/benchmark/static/images/organizations/together.png +0 -0
  246. helm/benchmark/static/json-urls.js +4 -0
  247. helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
  248. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  249. helm/benchmark/static/schema_lite.yaml +824 -0
  250. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  251. helm/benchmark/static/schema_unitxt.yaml +428 -0
  252. helm/benchmark/static/schema_vlm.yaml +576 -0
  253. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  254. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  255. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  256. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  257. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  258. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  259. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  260. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  261. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  262. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  263. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  264. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  265. helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
  266. helm/benchmark/static_build/assets/index-d839df55.js +9 -0
  267. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  268. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  269. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  270. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  271. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  272. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  273. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  274. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  275. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  276. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  277. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  278. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  279. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  280. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  281. helm/benchmark/static_build/config.js +4 -0
  282. helm/benchmark/static_build/index.html +20 -0
  283. helm/benchmark/test_data_preprocessor.py +3 -3
  284. helm/benchmark/test_model_deployment_definition.py +90 -0
  285. helm/benchmark/test_run_expander.py +1 -1
  286. helm/benchmark/tokenizer_config_registry.py +10 -14
  287. helm/benchmark/window_services/ai21_window_service.py +22 -33
  288. helm/benchmark/window_services/cohere_window_service.py +1 -63
  289. helm/benchmark/window_services/default_window_service.py +2 -35
  290. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  291. helm/benchmark/window_services/ice_window_service.py +0 -34
  292. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  293. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  294. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  295. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  296. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  297. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  298. helm/benchmark/window_services/local_window_service.py +21 -4
  299. helm/benchmark/window_services/no_decoding_window_service.py +32 -0
  300. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  301. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  302. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  303. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  304. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  305. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  306. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  307. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  308. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  309. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  310. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  311. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  312. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  313. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  314. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  315. helm/benchmark/window_services/test_utils.py +3 -2
  316. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  317. helm/benchmark/window_services/window_service.py +42 -0
  318. helm/benchmark/window_services/window_service_factory.py +24 -269
  319. helm/benchmark/window_services/yalm_window_service.py +0 -27
  320. helm/clients/__init__.py +0 -0
  321. helm/{proxy/clients → clients}/ai21_client.py +5 -12
  322. helm/clients/aleph_alpha_client.py +112 -0
  323. helm/{proxy/clients → clients}/anthropic_client.py +213 -24
  324. helm/clients/auto_client.py +215 -0
  325. helm/clients/bedrock_client.py +128 -0
  326. helm/clients/bedrock_utils.py +72 -0
  327. helm/{proxy/clients → clients}/client.py +67 -55
  328. helm/clients/clip_score_client.py +49 -0
  329. helm/clients/clip_scorers/__init__.py +0 -0
  330. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  331. helm/clients/clip_scorers/clip_scorer.py +50 -0
  332. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  333. helm/{proxy/clients → clients}/cohere_client.py +6 -17
  334. helm/clients/gcs_client.py +82 -0
  335. helm/{proxy/clients → clients}/google_client.py +7 -8
  336. helm/clients/google_translate_client.py +35 -0
  337. helm/{proxy/clients → clients}/http_model_client.py +6 -10
  338. helm/{proxy/clients → clients}/huggingface_client.py +134 -92
  339. helm/clients/image_generation/__init__.py +0 -0
  340. helm/clients/image_generation/adobe_vision_client.py +78 -0
  341. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  342. helm/clients/image_generation/cogview2/__init__.py +0 -0
  343. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  344. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  345. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  346. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  347. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  348. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  349. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  350. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  351. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  352. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  353. helm/clients/image_generation/cogview2_client.py +191 -0
  354. helm/clients/image_generation/dalle2_client.py +192 -0
  355. helm/clients/image_generation/dalle3_client.py +108 -0
  356. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  357. helm/clients/image_generation/dalle_mini/data.py +442 -0
  358. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  359. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  360. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  361. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  362. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  363. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  364. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  365. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  366. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  367. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  368. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  369. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  370. helm/clients/image_generation/dalle_mini_client.py +190 -0
  371. helm/clients/image_generation/deep_floyd_client.py +78 -0
  372. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  373. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  374. helm/clients/image_generation/lexica_client.py +86 -0
  375. helm/clients/image_generation/mindalle/__init__.py +0 -0
  376. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  377. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  378. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  379. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  380. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  381. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  382. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  383. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  384. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  385. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  386. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  387. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  388. helm/clients/image_generation/mindalle_client.py +115 -0
  389. helm/clients/image_generation/nudity_check_client.py +64 -0
  390. helm/clients/image_generation/together_image_generation_client.py +111 -0
  391. helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
  392. helm/{proxy/clients → clients}/megatron_client.py +13 -7
  393. helm/clients/mistral_client.py +134 -0
  394. helm/clients/moderation_api_client.py +109 -0
  395. helm/clients/open_lm_client.py +43 -0
  396. helm/clients/openai_client.py +302 -0
  397. helm/{proxy/clients → clients}/palmyra_client.py +15 -12
  398. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  399. helm/clients/simple_client.py +64 -0
  400. helm/{proxy/clients → clients}/test_auto_client.py +15 -15
  401. helm/clients/test_client.py +100 -0
  402. helm/clients/test_huggingface_client.py +70 -0
  403. helm/clients/test_simple_client.py +19 -0
  404. helm/{proxy/clients → clients}/test_together_client.py +23 -12
  405. helm/{proxy/clients → clients}/together_client.py +18 -71
  406. helm/clients/vertexai_client.py +391 -0
  407. helm/clients/vision_language/__init__.py +0 -0
  408. helm/clients/vision_language/huggingface_vlm_client.py +104 -0
  409. helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
  410. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  411. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  412. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  413. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  414. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  415. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  416. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  417. helm/clients/vision_language/open_flamingo_client.py +155 -0
  418. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  419. helm/clients/vllm_client.py +46 -0
  420. helm/common/cache.py +24 -179
  421. helm/common/cache_backend_config.py +47 -0
  422. helm/common/clip_score_request.py +41 -0
  423. helm/common/concurrency.py +32 -0
  424. helm/common/credentials_utils.py +28 -0
  425. helm/common/file_caches/__init__.py +0 -0
  426. helm/common/file_caches/file_cache.py +16 -0
  427. helm/common/file_caches/local_file_cache.py +61 -0
  428. helm/common/file_caches/test_local_file_cache.py +25 -0
  429. helm/common/file_upload_request.py +27 -0
  430. helm/common/general.py +29 -10
  431. helm/common/image_generation_parameters.py +25 -0
  432. helm/common/images_utils.py +24 -1
  433. helm/common/key_value_store.py +113 -0
  434. helm/common/media_object.py +13 -0
  435. helm/common/moderations_api_request.py +71 -0
  436. helm/common/mongo_key_value_store.py +88 -0
  437. helm/common/multimodal_request_utils.py +31 -0
  438. helm/common/nudity_check_request.py +29 -0
  439. helm/common/object_spec.py +2 -2
  440. helm/common/request.py +36 -27
  441. helm/common/test_general.py +6 -0
  442. helm/common/tokenization_request.py +6 -3
  443. helm/config/__init__.py +0 -0
  444. helm/config/model_deployments.yaml +1942 -0
  445. helm/config/model_metadata.yaml +2201 -0
  446. helm/config/tokenizer_configs.yaml +362 -0
  447. helm/proxy/accounts.py +31 -4
  448. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  449. helm/proxy/critique/model_critique_client.py +13 -5
  450. helm/proxy/example_queries.py +29 -17
  451. helm/proxy/retry.py +8 -2
  452. helm/proxy/server.py +77 -5
  453. helm/proxy/services/remote_service.py +31 -0
  454. helm/proxy/services/server_service.py +103 -20
  455. helm/proxy/services/service.py +34 -2
  456. helm/proxy/services/test_remote_service.py +7 -6
  457. helm/proxy/services/test_service.py +27 -18
  458. helm/proxy/test_accounts.py +32 -0
  459. helm/proxy/token_counters/auto_token_counter.py +37 -37
  460. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  461. helm/proxy/token_counters/token_counter.py +3 -5
  462. helm/py.typed +0 -0
  463. helm/tokenizers/__init__.py +0 -0
  464. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  465. helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
  466. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
  467. helm/tokenizers/auto_tokenizer.py +93 -0
  468. helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
  469. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  470. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  471. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
  472. helm/tokenizers/simple_tokenizer.py +33 -0
  473. helm/tokenizers/test_anthropic_tokenizer.py +82 -0
  474. helm/tokenizers/test_huggingface_tokenizer.py +136 -0
  475. helm/tokenizers/test_simple_tokenizer.py +33 -0
  476. helm/tokenizers/vertexai_tokenizer.py +97 -0
  477. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  478. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  479. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  480. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  481. crfm_helm-0.3.0.dist-info/RECORD +0 -396
  482. helm/benchmark/vlm_run_specs.py +0 -71
  483. helm/benchmark/window_services/anthropic_window_service.py +0 -68
  484. helm/benchmark/window_services/bloom_window_service.py +0 -35
  485. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  486. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  487. helm/benchmark/window_services/gptj_window_service.py +0 -38
  488. helm/benchmark/window_services/gptneox_window_service.py +0 -41
  489. helm/benchmark/window_services/http_model_window_service.py +0 -28
  490. helm/benchmark/window_services/huggingface_window_service.py +0 -59
  491. helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
  492. helm/benchmark/window_services/llama_window_service.py +0 -28
  493. helm/benchmark/window_services/luminous_window_service.py +0 -67
  494. helm/benchmark/window_services/megatron_window_service.py +0 -10
  495. helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
  496. helm/benchmark/window_services/openai_window_service.py +0 -13
  497. helm/benchmark/window_services/opt_window_service.py +0 -35
  498. helm/benchmark/window_services/palmyra_window_service.py +0 -45
  499. helm/benchmark/window_services/remote_window_service.py +0 -48
  500. helm/benchmark/window_services/santacoder_window_service.py +0 -27
  501. helm/benchmark/window_services/starcoder_window_service.py +0 -27
  502. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  503. helm/benchmark/window_services/t511b_window_service.py +0 -30
  504. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  505. helm/benchmark/window_services/ul2_window_service.py +0 -30
  506. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  507. helm/benchmark/window_services/wider_openai_window_service.py +0 -52
  508. helm/proxy/clients/aleph_alpha_client.py +0 -99
  509. helm/proxy/clients/auto_client.py +0 -461
  510. helm/proxy/clients/goose_ai_client.py +0 -100
  511. helm/proxy/clients/microsoft_client.py +0 -182
  512. helm/proxy/clients/openai_client.py +0 -206
  513. helm/proxy/clients/remote_model_registry.py +0 -28
  514. helm/proxy/clients/simple_client.py +0 -61
  515. helm/proxy/clients/test_anthropic_client.py +0 -63
  516. helm/proxy/clients/test_client.py +0 -31
  517. helm/proxy/clients/test_huggingface_client.py +0 -87
  518. helm/proxy/models.py +0 -963
  519. helm/proxy/test_models.py +0 -27
  520. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  521. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  522. helm/proxy/token_counters/free_token_counter.py +0 -12
  523. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  524. helm/proxy/token_counters/openai_token_counter.py +0 -22
  525. helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
  526. helm/proxy/token_counters/test_openai_token_counter.py +0 -79
  527. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  528. helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
  529. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
  530. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
  531. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
  532. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  533. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  534. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  535. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  536. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  537. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  538. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  539. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  540. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  541. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  542. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  543. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  544. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  545. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  546. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
helm/proxy/server.py CHANGED
@@ -1,5 +1,3 @@
1
- # mypy: check_untyped_defs = False
2
-
3
1
  """
4
2
  Starts a REST server for the frontend to interact with.
5
3
  Look at `index.js` to see how the functionality is invoked.
@@ -16,12 +14,21 @@ import time
16
14
  from dacite import from_dict
17
15
  import bottle
18
16
 
17
+ from helm.benchmark.config_registry import (
18
+ register_configs_from_directory,
19
+ register_builtin_configs_from_helm_package,
20
+ )
21
+ from helm.benchmark.model_deployment_registry import get_default_model_deployment_for_model
19
22
  from helm.common.authentication import Authentication
23
+ from helm.common.cache_backend_config import CacheBackendConfig, MongoCacheBackendConfig, SqliteCacheBackendConfig
24
+ from helm.common.general import ensure_directory_exists
20
25
  from helm.common.hierarchical_logger import hlog
21
26
  from helm.common.optional_dependencies import handle_module_not_found_error
22
27
  from helm.common.request import Request
23
28
  from helm.common.perspective_api_request import PerspectiveAPIRequest
29
+ from helm.common.moderations_api_request import ModerationAPIRequest
24
30
  from helm.common.tokenization_request import TokenizationRequest, DecodeRequest
31
+ from helm.proxy.services.service import CACHE_DIR
25
32
  from .accounts import Account
26
33
  from .services.server_service import ServerService
27
34
  from .query import Query
@@ -35,6 +42,7 @@ except ModuleNotFoundError as e:
35
42
  bottle.BaseRequest.MEMFILE_MAX = 1024 * 1024
36
43
 
37
44
  app = bottle.default_app()
45
+ service: ServerService
38
46
 
39
47
 
40
48
  def safe_call(func, to_json=True):
@@ -83,9 +91,16 @@ def handle_static_filename(filename):
83
91
  return resp
84
92
 
85
93
 
94
+ @app.get("/output/<filename:path>")
95
+ def handle_output_filename(filename):
96
+ resp = bottle.static_file(filename, root=app.config["crfm.proxy.outputpath"])
97
+ return resp
98
+
99
+
86
100
  @app.get("/api/general_info")
87
101
  def handle_get_general_info():
88
102
  def perform(args):
103
+ global service
89
104
  return dataclasses.asdict(service.get_general_info())
90
105
 
91
106
  return safe_call(perform)
@@ -94,6 +109,7 @@ def handle_get_general_info():
94
109
  @app.get("/api/window_service_info")
95
110
  def handle_get_window_service_info():
96
111
  def perform(args):
112
+ global service
97
113
  return dataclasses.asdict(service.get_window_service_info(args["model_name"]))
98
114
 
99
115
  return safe_call(perform)
@@ -102,6 +118,7 @@ def handle_get_window_service_info():
102
118
  @app.post("/api/account")
103
119
  def handle_create_account():
104
120
  def perform(args):
121
+ global service
105
122
  auth = Authentication(**json.loads(args["auth"]))
106
123
  return dataclasses.asdict(service.create_account(auth))
107
124
 
@@ -111,6 +128,7 @@ def handle_create_account():
111
128
  @app.delete("/api/account")
112
129
  def handle_delete_account():
113
130
  def perform(args):
131
+ global service
114
132
  auth = Authentication(**json.loads(args["auth"]))
115
133
  api_key = args["api_key"]
116
134
  return dataclasses.asdict(service.delete_account(auth, api_key))
@@ -121,6 +139,7 @@ def handle_delete_account():
121
139
  @app.get("/api/account")
122
140
  def handle_get_account():
123
141
  def perform(args):
142
+ global service
124
143
  auth = Authentication(**json.loads(args["auth"]))
125
144
  if "all" in args and args["all"].lower() == "true":
126
145
  return [dataclasses.asdict(account) for account in service.get_accounts(auth)]
@@ -133,6 +152,7 @@ def handle_get_account():
133
152
  @app.put("/api/account")
134
153
  def handle_update_account():
135
154
  def perform(args):
155
+ global service
136
156
  auth = Authentication(**json.loads(args["auth"]))
137
157
  account = from_dict(Account, json.loads(args["account"]))
138
158
  return dataclasses.asdict(service.update_account(auth, account))
@@ -143,6 +163,7 @@ def handle_update_account():
143
163
  @app.put("/api/account/api_key")
144
164
  def handle_update_api_key():
145
165
  def perform(args):
166
+ global service
146
167
  auth = Authentication(**json.loads(args["auth"]))
147
168
  account = from_dict(Account, json.loads(args["account"]))
148
169
  return dataclasses.asdict(service.rotate_api_key(auth, account))
@@ -153,6 +174,7 @@ def handle_update_api_key():
153
174
  @app.get("/api/query")
154
175
  def handle_query():
155
176
  def perform(args):
177
+ global service
156
178
  query = Query(**args)
157
179
  return dataclasses.asdict(service.expand_query(query))
158
180
 
@@ -162,9 +184,28 @@ def handle_query():
162
184
  @app.get("/api/request")
163
185
  def handle_request():
164
186
  def perform(args):
187
+ global service
165
188
  auth = Authentication(**json.loads(args["auth"]))
166
189
  request = Request(**json.loads(args["request"]))
167
- return dataclasses.asdict(service.make_request(auth, request))
190
+ # Hack to maintain reverse compatibility with clients with version <= 0.3.0.
191
+ # Clients with version <= 0.3.0 do not set model_deployment, but this is now
192
+ # required by Request.
193
+ if not request.model_deployment:
194
+ model_deployment = get_default_model_deployment_for_model(request.model)
195
+ if model_deployment is None:
196
+ raise ValueError(f"Unknown model '{request.model}'")
197
+ request = dataclasses.replace(request, model_deployment=model_deployment)
198
+
199
+ raw_response = dataclasses.asdict(service.make_request(auth, request))
200
+
201
+ # Hack to maintain reverse compatibility with clients with version <= 1.0.0.
202
+ # Clients with version <= 1.0.0 expect each token to contain a `top_logprobs`
203
+ # field of type dict.
204
+ for completion in raw_response["completions"]:
205
+ for token in completion["tokens"]:
206
+ token["top_logprobs"] = {}
207
+
208
+ return raw_response
168
209
 
169
210
  return safe_call(perform)
170
211
 
@@ -172,6 +213,7 @@ def handle_request():
172
213
  @app.get("/api/tokenize")
173
214
  def handle_tokenization():
174
215
  def perform(args):
216
+ global service
175
217
  auth = Authentication(**json.loads(args["auth"]))
176
218
  request = TokenizationRequest(**json.loads(args["request"]))
177
219
  return dataclasses.asdict(service.tokenize(auth, request))
@@ -182,6 +224,7 @@ def handle_tokenization():
182
224
  @app.get("/api/decode")
183
225
  def handle_decode():
184
226
  def perform(args):
227
+ global service
185
228
  auth = Authentication(**json.loads(args["auth"]))
186
229
  request = DecodeRequest(**json.loads(args["request"]))
187
230
  return dataclasses.asdict(service.decode(auth, request))
@@ -192,6 +235,7 @@ def handle_decode():
192
235
  @app.get("/api/toxicity")
193
236
  def handle_toxicity_request():
194
237
  def perform(args):
238
+ global service
195
239
  auth = Authentication(**json.loads(args["auth"]))
196
240
  request = PerspectiveAPIRequest(**json.loads(args["request"]))
197
241
  return dataclasses.asdict(service.get_toxicity_scores(auth, request))
@@ -199,9 +243,21 @@ def handle_toxicity_request():
199
243
  return safe_call(perform)
200
244
 
201
245
 
246
+ @app.get("/api/moderation")
247
+ def handle_moderation_request():
248
+ def perform(args):
249
+ global service
250
+ auth = Authentication(**json.loads(args["auth"]))
251
+ request = ModerationAPIRequest(**json.loads(args["request"]))
252
+ return dataclasses.asdict(service.get_moderation_results(auth, request))
253
+
254
+ return safe_call(perform)
255
+
256
+
202
257
  @app.get("/api/shutdown")
203
258
  def handle_shutdown():
204
259
  def perform(args):
260
+ global service
205
261
  auth = Authentication(**json.loads(args["auth"]))
206
262
  service.shutdown(auth)
207
263
 
@@ -214,6 +270,7 @@ def main():
214
270
  parser.add_argument("-p", "--port", type=int, help="What port to listen on", default=1959)
215
271
  parser.add_argument("--ssl-key-file", type=str, help="Path to SSL key file")
216
272
  parser.add_argument("--ssl-cert-file", type=str, help="Path to SSL cert file")
273
+ parser.add_argument("--ssl-ca-certs", type=str, help="Path to SSL CA certs")
217
274
  parser.add_argument("-b", "--base-path", help="What directory has credentials, etc.", default="prod_env")
218
275
  parser.add_argument("-w", "--workers", type=int, help="Number of worker processes to handle requests", default=8)
219
276
  parser.add_argument("-t", "--timeout", type=int, help="Request timeout in seconds", default=5 * 60)
@@ -225,17 +282,32 @@ def main():
225
282
  )
226
283
  args = parser.parse_args()
227
284
 
228
- service = ServerService(base_path=args.base_path, mongo_uri=args.mongo_uri)
285
+ register_builtin_configs_from_helm_package()
286
+ register_configs_from_directory(args.base_path)
287
+
288
+ cache_backend_config: CacheBackendConfig
289
+ if args.mongo_uri:
290
+ cache_backend_config = MongoCacheBackendConfig(args.mongo_uri)
291
+ else:
292
+ sqlite_cache_path = os.path.join(args.base_path, CACHE_DIR)
293
+ ensure_directory_exists(sqlite_cache_path)
294
+ cache_backend_config = SqliteCacheBackendConfig(sqlite_cache_path)
295
+
296
+ service = ServerService(base_path=args.base_path, cache_backend_config=cache_backend_config)
229
297
 
230
298
  gunicorn_args = {
231
299
  "workers": args.workers,
232
300
  "timeout": args.timeout,
233
301
  "limit_request_line": 0, # Controls the maximum size of HTTP request line in bytes. 0 = unlimited.
234
302
  }
235
- if args.ssl_key_file and args.ssl_cert_file:
303
+ if args.ssl_key_file:
236
304
  gunicorn_args["keyfile"] = args.ssl_key_file
305
+ if args.ssl_cert_file:
237
306
  gunicorn_args["certfile"] = args.ssl_cert_file
307
+ if args.ssl_ca_certs:
308
+ gunicorn_args["ca_certs"] = args.ssl_ca_certs
238
309
 
239
310
  # Clear arguments before running gunicorn as it also uses argparse
240
311
  sys.argv = [sys.argv[0]]
312
+ app.config["crfm.proxy.outputpath"] = os.path.join(os.path.realpath(args.base_path), "cache", "output")
241
313
  app.run(host="0.0.0.0", port=args.port, server="gunicorn", **gunicorn_args)
@@ -5,9 +5,15 @@ import urllib.parse
5
5
  from dataclasses import asdict
6
6
  from typing import Any, List, Optional
7
7
 
8
+ from helm.common.cache import CacheConfig
9
+ from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
8
10
  from helm.common.authentication import Authentication
11
+ from helm.common.moderations_api_request import ModerationAPIRequest, ModerationAPIRequestResult
9
12
  from helm.common.critique_request import CritiqueRequest, CritiqueRequestResult
13
+ from helm.common.nudity_check_request import NudityCheckRequest, NudityCheckResult
14
+ from helm.common.file_upload_request import FileUploadRequest, FileUploadResult
10
15
  from helm.common.perspective_api_request import PerspectiveAPIRequest, PerspectiveAPIRequestResult
16
+ from helm.common.clip_score_request import CLIPScoreRequest, CLIPScoreResult
11
17
  from helm.common.tokenization_request import (
12
18
  WindowServiceInfo,
13
19
  TokenizationRequest,
@@ -27,6 +33,8 @@ class RemoteServiceError(Exception):
27
33
 
28
34
 
29
35
  class RemoteService(Service):
36
+ NOT_SUPPORTED_ERROR: str = "Not supported through the remote service."
37
+
30
38
  def __init__(self, base_url):
31
39
  self.base_url: str = base_url
32
40
 
@@ -84,6 +92,15 @@ class RemoteService(Service):
84
92
  RemoteService._check_response(response, request_json)
85
93
  return from_dict(DecodeRequestResult, response)
86
94
 
95
+ def upload(self, auth: Authentication, request: FileUploadRequest) -> FileUploadResult:
96
+ raise NotImplementedError(self.NOT_SUPPORTED_ERROR)
97
+
98
+ def check_nudity(self, auth: Authentication, request: NudityCheckRequest) -> NudityCheckResult:
99
+ raise NotImplementedError(self.NOT_SUPPORTED_ERROR)
100
+
101
+ def compute_clip_score(self, auth: Authentication, request: CLIPScoreRequest) -> CLIPScoreResult:
102
+ raise NotImplementedError(self.NOT_SUPPORTED_ERROR)
103
+
87
104
  def get_toxicity_scores(self, auth: Authentication, request: PerspectiveAPIRequest) -> PerspectiveAPIRequestResult:
88
105
  request_json: str = json.dumps(asdict(request))
89
106
  params = {
@@ -94,6 +111,16 @@ class RemoteService(Service):
94
111
  RemoteService._check_response(response, request_json)
95
112
  return from_dict(PerspectiveAPIRequestResult, response)
96
113
 
114
+ def get_moderation_results(self, auth: Authentication, request: ModerationAPIRequest) -> ModerationAPIRequestResult:
115
+ request_json: str = json.dumps(asdict(request))
116
+ params = {
117
+ "auth": json.dumps(asdict(auth)),
118
+ "request": request_json,
119
+ }
120
+ response = requests.get(f"{self.base_url}/api/moderation?{urllib.parse.urlencode(params)}").json()
121
+ RemoteService._check_response(response, request_json)
122
+ return from_dict(ModerationAPIRequestResult, response)
123
+
97
124
  def make_critique_request(self, auth: Authentication, request: CritiqueRequest) -> CritiqueRequestResult:
98
125
  raise NotImplementedError("make_critique_request is not supported by RemoteServer")
99
126
 
@@ -153,6 +180,10 @@ class RemoteService(Service):
153
180
  # A ConnectionError is expected when shutting down the server.
154
181
  pass
155
182
 
183
+ def get_cache_config(self, shard_name: str) -> CacheConfig:
184
+ """Returns a CacheConfig"""
185
+ return BlackHoleCacheBackendConfig().get_cache_config(shard_name)
186
+
156
187
 
157
188
  def add_service_args(parser: argparse.ArgumentParser):
158
189
  """Add command-line arguments to enable command-line utilities to specify how to connect to a remote server."""
@@ -1,12 +1,16 @@
1
+ import dataclasses
1
2
  import os
2
3
  import signal
3
4
  from typing import List, Optional
4
5
 
5
- from helm.benchmark.model_metadata_registry import maybe_register_model_metadata_from_base_path
6
- from helm.benchmark.model_deployment_registry import maybe_register_model_deployments_from_base_path
7
- from helm.benchmark.tokenizer_config_registry import maybe_register_tokenizer_configs_from_base_path
6
+ from helm.common.cache import CacheConfig
7
+ from helm.common.cache_backend_config import CacheBackendConfig, BlackHoleCacheBackendConfig
8
8
  from helm.common.critique_request import CritiqueRequest, CritiqueRequestResult
9
9
  from helm.common.authentication import Authentication
10
+ from helm.common.moderations_api_request import ModerationAPIRequest, ModerationAPIRequestResult
11
+ from helm.common.clip_score_request import CLIPScoreRequest, CLIPScoreResult
12
+ from helm.common.nudity_check_request import NudityCheckRequest, NudityCheckResult
13
+ from helm.common.file_upload_request import FileUploadRequest, FileUploadResult
10
14
  from helm.common.general import ensure_directory_exists, parse_hocon, get_credentials
11
15
  from helm.common.perspective_api_request import PerspectiveAPIRequest, PerspectiveAPIRequestResult
12
16
  from helm.common.tokenization_request import (
@@ -19,13 +23,20 @@ from helm.common.tokenization_request import (
19
23
  from helm.common.request import Request, RequestResult
20
24
  from helm.common.hierarchical_logger import hlog
21
25
  from helm.proxy.accounts import Accounts, Account
22
- from helm.proxy.clients.auto_client import AutoClient
23
- from helm.proxy.clients.toxicity_classifier_client import ToxicityClassifierClient
26
+ from helm.clients.auto_client import AutoClient
27
+ from helm.clients.moderation_api_client import ModerationAPIClient
28
+ from helm.clients.perspective_api_client import PerspectiveAPIClient
29
+ from helm.clients.image_generation.nudity_check_client import NudityCheckClient
30
+ from helm.clients.gcs_client import GCSClient
31
+ from helm.clients.clip_score_client import CLIPScoreClient
32
+ from helm.clients.toxicity_classifier_client import ToxicityClassifierClient
24
33
  from helm.proxy.example_queries import example_queries
25
- from helm.proxy.models import ALL_MODELS, get_model_group
34
+ from helm.benchmark.model_metadata_registry import ALL_MODELS_METADATA
35
+ from helm.benchmark.model_deployment_registry import get_model_deployment_host_organization
26
36
  from helm.proxy.query import Query, QueryResult
27
37
  from helm.proxy.retry import retry_request
28
38
  from helm.proxy.token_counters.auto_token_counter import AutoTokenCounter
39
+ from helm.tokenizers.auto_tokenizer import AutoTokenizer
29
40
  from .service import (
30
41
  Service,
31
42
  CACHE_DIR,
@@ -42,24 +53,38 @@ class ServerService(Service):
42
53
  Main class that supports various functionality for the server.
43
54
  """
44
55
 
45
- def __init__(self, base_path: str = "prod_env", root_mode=False, mongo_uri: str = ""):
56
+ def __init__(
57
+ self,
58
+ base_path: str = "prod_env",
59
+ root_mode: bool = False,
60
+ cache_backend_config: CacheBackendConfig = BlackHoleCacheBackendConfig(),
61
+ ):
62
+ ensure_directory_exists(base_path)
63
+ client_file_storage_path = os.path.join(base_path, CACHE_DIR)
64
+ ensure_directory_exists(client_file_storage_path)
65
+
46
66
  credentials = get_credentials(base_path)
47
- cache_path = os.path.join(base_path, CACHE_DIR)
48
- ensure_directory_exists(cache_path)
49
67
  accounts_path = os.path.join(base_path, ACCOUNTS_FILE)
50
68
 
51
- maybe_register_model_metadata_from_base_path(base_path)
52
- maybe_register_model_deployments_from_base_path(base_path)
53
- maybe_register_tokenizer_configs_from_base_path(base_path)
54
-
55
- self.client = AutoClient(credentials, cache_path, mongo_uri)
56
- self.token_counter = AutoTokenCounter(self.client.get_huggingface_client())
69
+ self.cache_backend_config = cache_backend_config
70
+ self.client = AutoClient(credentials, client_file_storage_path, cache_backend_config)
71
+ self.tokenizer = AutoTokenizer(credentials, cache_backend_config)
72
+ self.token_counter = AutoTokenCounter(self.tokenizer)
57
73
  self.accounts = Accounts(accounts_path, root_mode=root_mode)
58
- # Lazily instantiated by get_toxicity_scores()
74
+
75
+ # Lazily instantiate the following clients
76
+ self.moderation_api_client: Optional[ModerationAPIClient] = None
59
77
  self.toxicity_classifier_client: Optional[ToxicityClassifierClient] = None
78
+ self.perspective_api_client: Optional[PerspectiveAPIClient] = None
79
+ self.nudity_check_client: Optional[NudityCheckClient] = None
80
+ self.clip_score_client: Optional[CLIPScoreClient] = None
81
+ self.gcs_client: Optional[GCSClient] = None
60
82
 
61
83
  def get_general_info(self) -> GeneralInfo:
62
- return GeneralInfo(version=VERSION, example_queries=example_queries, all_models=ALL_MODELS)
84
+ # Can't send release_dates in ModelMetadata bacause dates cannot be round-tripped to and from JSON easily.
85
+ # TODO(#2158): Either fix this or delete get_general_info.
86
+ all_models = [dataclasses.replace(model_metadata, release_date=None) for model_metadata in ALL_MODELS_METADATA]
87
+ return GeneralInfo(version=VERSION, example_queries=example_queries, all_models=all_models)
63
88
 
64
89
  def get_window_service_info(self, model_name) -> WindowServiceInfo:
65
90
  # The import statement is placed here to avoid two problems, please refer to the link for details
@@ -88,6 +113,21 @@ class ServerService(Service):
88
113
  requests.append(request)
89
114
  return QueryResult(requests=requests)
90
115
 
116
+ def _get_model_group_for_model_deployment(self, model_deployment: str) -> str:
117
+ if model_deployment.startswith("openai/"):
118
+ if model_deployment.startswith("openai/code-"):
119
+ return "codex"
120
+ elif model_deployment.startswith("openai/dall-e-"):
121
+ return "dall_e"
122
+ elif model_deployment.startswith("openai/gpt-4-"):
123
+ return "gpt4"
124
+ else:
125
+ return "gpt3"
126
+ elif model_deployment.startswith("ai21/"):
127
+ return "jurassic"
128
+ else:
129
+ return get_model_deployment_host_organization(model_deployment)
130
+
91
131
  def make_request(self, auth: Authentication, request: Request) -> RequestResult:
92
132
  """Actually make a request to an API."""
93
133
  # TODO: try to invoke the API even if we're not authenticated, and if
@@ -95,7 +135,7 @@ class ServerService(Service):
95
135
  # https://github.com/stanford-crfm/benchmarking/issues/56
96
136
 
97
137
  self.accounts.authenticate(auth)
98
- model_group: str = get_model_group(request.model)
138
+ model_group: str = self._get_model_group_for_model_deployment(request.model_deployment)
99
139
  # Make sure we can use
100
140
  self.accounts.check_can_use(auth.api_key, model_group)
101
141
 
@@ -113,12 +153,42 @@ class ServerService(Service):
113
153
  def tokenize(self, auth: Authentication, request: TokenizationRequest) -> TokenizationRequestResult:
114
154
  """Tokenize via an API."""
115
155
  self.accounts.authenticate(auth)
116
- return self.client.tokenize(request)
156
+ return self.tokenizer.tokenize(request)
117
157
 
118
158
  def decode(self, auth: Authentication, request: DecodeRequest) -> DecodeRequestResult:
119
159
  """Decodes to text."""
120
160
  self.accounts.authenticate(auth)
121
- return self.client.decode(request)
161
+ return self.tokenizer.decode(request)
162
+
163
+ def upload(self, auth: Authentication, request: FileUploadRequest) -> FileUploadResult:
164
+ """Uploads a file to external storage."""
165
+ self.accounts.authenticate(auth)
166
+
167
+ if not self.gcs_client:
168
+ self.gcs_client = self.client.get_gcs_client()
169
+
170
+ assert self.gcs_client
171
+ return self.gcs_client.upload(request)
172
+
173
+ def check_nudity(self, auth: Authentication, request: NudityCheckRequest) -> NudityCheckResult:
174
+ """Check for nudity."""
175
+ self.accounts.authenticate(auth)
176
+
177
+ if not self.nudity_check_client:
178
+ self.nudity_check_client = self.client.get_nudity_check_client()
179
+
180
+ assert self.nudity_check_client
181
+ return self.nudity_check_client.check_nudity(request)
182
+
183
+ def compute_clip_score(self, auth: Authentication, request: CLIPScoreRequest) -> CLIPScoreResult:
184
+ """Computes CLIPScore for a given caption and image."""
185
+ self.accounts.authenticate(auth)
186
+
187
+ if not self.clip_score_client:
188
+ self.clip_score_client = self.client.get_clip_score_client()
189
+
190
+ assert self.clip_score_client
191
+ return self.clip_score_client.compute_score(request)
122
192
 
123
193
  def get_toxicity_scores(self, auth: Authentication, request: PerspectiveAPIRequest) -> PerspectiveAPIRequestResult:
124
194
  @retry_request
@@ -130,6 +200,16 @@ class ServerService(Service):
130
200
  self.accounts.authenticate(auth)
131
201
  return get_toxicity_scores_with_retry(request)
132
202
 
203
+ def get_moderation_results(self, auth: Authentication, request: ModerationAPIRequest) -> ModerationAPIRequestResult:
204
+ @retry_request
205
+ def get_moderation_results_with_retry(request: ModerationAPIRequest) -> ModerationAPIRequestResult:
206
+ if not self.moderation_api_client:
207
+ self.moderation_api_client = self.client.get_moderation_api_client()
208
+ return self.moderation_api_client.get_moderation_results(request)
209
+
210
+ self.accounts.authenticate(auth)
211
+ return get_moderation_results_with_retry(request)
212
+
133
213
  def make_critique_request(self, auth: Authentication, request: CritiqueRequest) -> CritiqueRequestResult:
134
214
  self.accounts.authenticate(auth)
135
215
  return self.client.get_critique_client().make_critique_request(request)
@@ -165,3 +245,6 @@ class ServerService(Service):
165
245
  hlog(f"Shutting down server by killing its own process {pid}...")
166
246
  os.kill(pid, signal.SIGTERM)
167
247
  hlog("Done.")
248
+
249
+ def get_cache_config(self, shard_name: str) -> CacheConfig:
250
+ return self.cache_backend_config.get_cache_config(shard_name)
@@ -5,7 +5,11 @@ from typing import Dict, List, Tuple, Any
5
5
 
6
6
  from helm.common.general import parse_hocon
7
7
  from helm.common.critique_request import CritiqueRequest, CritiqueRequestResult
8
+ from helm.common.clip_score_request import CLIPScoreRequest, CLIPScoreResult
9
+ from helm.common.file_upload_request import FileUploadResult, FileUploadRequest
10
+ from helm.common.nudity_check_request import NudityCheckRequest, NudityCheckResult
8
11
  from helm.common.perspective_api_request import PerspectiveAPIRequestResult, PerspectiveAPIRequest
12
+ from helm.common.moderations_api_request import ModerationAPIRequest, ModerationAPIRequestResult
9
13
  from helm.common.tokenization_request import (
10
14
  WindowServiceInfo,
11
15
  TokenizationRequest,
@@ -14,9 +18,10 @@ from helm.common.tokenization_request import (
14
18
  DecodeRequestResult,
15
19
  )
16
20
  from helm.common.request import Request, RequestResult
17
- from helm.proxy.models import Model
21
+ from helm.benchmark.model_metadata_registry import ModelMetadata
18
22
  from helm.proxy.query import Query, QueryResult
19
23
  from helm.proxy.accounts import Authentication, Account
24
+ from helm.common.cache import CacheConfig
20
25
 
21
26
  VERSION = "1.0"
22
27
  ACCOUNTS_FILE = "accounts.sqlite"
@@ -29,7 +34,7 @@ MAX_EXPANSION = 1000
29
34
  class GeneralInfo:
30
35
  version: str
31
36
  example_queries: List[Query]
32
- all_models: List[Model]
37
+ all_models: List[ModelMetadata]
33
38
 
34
39
 
35
40
  def expand_environments(environments: Dict[str, List[str]]):
@@ -69,6 +74,8 @@ def synthesize_request(prompt: str, settings: str, environment: Dict[str, str])
69
74
  request: Dict[str, Any] = {}
70
75
  request["prompt"] = substitute_text(prompt, environment)
71
76
  request.update(parse_hocon(substitute_text(settings, environment)))
77
+ if "model_deployment" not in request and "model" not in request:
78
+ request["model_deployment"] = "openai/text-davinci-002"
72
79
  return Request(**request)
73
80
 
74
81
 
@@ -103,11 +110,31 @@ class Service(ABC):
103
110
  """Decodes to text."""
104
111
  pass
105
112
 
113
+ @abstractmethod
114
+ def upload(self, auth: Authentication, request: FileUploadRequest) -> FileUploadResult:
115
+ """Uploads a file to external storage."""
116
+ pass
117
+
118
+ @abstractmethod
119
+ def check_nudity(self, auth: Authentication, request: NudityCheckRequest) -> NudityCheckResult:
120
+ """Check for nudity for a batch of images."""
121
+ pass
122
+
123
+ @abstractmethod
124
+ def compute_clip_score(self, auth: Authentication, request: CLIPScoreRequest) -> CLIPScoreResult:
125
+ """Computes CLIPScore for a given caption and image."""
126
+ pass
127
+
106
128
  @abstractmethod
107
129
  def get_toxicity_scores(self, auth: Authentication, request: PerspectiveAPIRequest) -> PerspectiveAPIRequestResult:
108
130
  """Get toxicity scores for a batch of text."""
109
131
  pass
110
132
 
133
+ @abstractmethod
134
+ def get_moderation_results(self, auth: Authentication, request: ModerationAPIRequest) -> ModerationAPIRequestResult:
135
+ """Get OpenAI's moderation results for some text."""
136
+ pass
137
+
111
138
  @abstractmethod
112
139
  def make_critique_request(self, auth: Authentication, request: CritiqueRequest) -> CritiqueRequestResult:
113
140
  """Get responses to a critique request."""
@@ -147,3 +174,8 @@ class Service(ABC):
147
174
  def shutdown(self, auth: Authentication):
148
175
  """Shutdown server."""
149
176
  pass
177
+
178
+ @abstractmethod
179
+ def get_cache_config(self, shard_name: str) -> CacheConfig:
180
+ """Returns a CacheConfig"""
181
+ pass
@@ -17,7 +17,7 @@ from sqlitedict import SqliteDict
17
17
  from helm.common.authentication import Authentication
18
18
  from helm.common.request import Request, RequestResult
19
19
  from helm.common.tokenization_request import TokenizationRequest, TokenizationRequestResult
20
- from helm.proxy.accounts import Account
20
+ from helm.proxy.accounts import Account, set_default_quotas
21
21
  from .remote_service import RemoteService
22
22
  from .service import ACCOUNTS_FILE
23
23
 
@@ -55,6 +55,7 @@ class TestRemoteServerService:
55
55
 
56
56
  with SqliteDict(os.path.join(path, ACCOUNTS_FILE)) as cache:
57
57
  account: Account = Account(TestRemoteServerService._ADMIN_API_KEY, is_admin=True)
58
+ set_default_quotas(account)
58
59
  cache[TestRemoteServerService._ADMIN_API_KEY] = asdict(account)
59
60
  cache.commit()
60
61
  return path
@@ -85,7 +86,7 @@ class TestRemoteServerService:
85
86
 
86
87
  @staticmethod
87
88
  def query(url: str, auth: Authentication, prompt: str):
88
- request = Request(prompt=prompt, model="simple/model1")
89
+ request = Request(prompt=prompt, model="simple/model1", model_deployment="simple/model1")
89
90
  response: RequestResult = RemoteService(base_url=url).make_request(auth, request)
90
91
  response_text: str = response.completions[0].text
91
92
  # With the toy model (simple/model1), we should expect the same response as the prompt
@@ -121,18 +122,18 @@ class TestRemoteServerService:
121
122
  shutil.rmtree(cls.base_path)
122
123
 
123
124
  def test_make_request(self):
124
- request = Request(prompt="1 2 3", model="simple/model1")
125
+ request = Request(prompt="1 2 3", model="simple/model1", model_deployment="simple/model1")
125
126
  response: RequestResult = self.service.make_request(self.auth, request)
126
127
  assert response.success
127
128
 
128
129
  def test_tokenize(self):
129
- request = TokenizationRequest(text="1 2 3", tokenizer="simple/model1")
130
+ request = TokenizationRequest(text="1 2 3", tokenizer="simple/tokenizer1")
130
131
  response: TokenizationRequestResult = self.service.tokenize(self.auth, request)
131
- assert [token.value for token in response.tokens] == ["1", "2", "3"]
132
+ assert [token.value for token in response.tokens] == ["1", " ", "2", " ", "3"]
132
133
 
133
134
  def test_make_request_plus_sign(self):
134
135
  # Ensure + in prompt doesn't get replaced by a blank space
135
- request = Request(prompt="+", model="simple/model1")
136
+ request = Request(prompt="+", model="simple/model1", model_deployment="simple/model1")
136
137
  response: RequestResult = self.service.make_request(self.auth, request)
137
138
  assert response.completions[0].text == "+"
138
139
  assert response.success