crfm-helm 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (209) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/METADATA +81 -112
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/RECORD +165 -155
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +57 -0
  10. helm/benchmark/annotation/call_center_annotator.py +258 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +55 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +37 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +36 -44
  18. helm/benchmark/annotation/model_as_judge.py +96 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +50 -0
  20. helm/benchmark/annotation/xstest_annotator.py +100 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +79 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/unitxt_metrics.py +17 -3
  30. helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
  31. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  32. helm/benchmark/model_metadata_registry.py +3 -3
  33. helm/benchmark/presentation/create_plots.py +1 -1
  34. helm/benchmark/presentation/schema.py +3 -0
  35. helm/benchmark/presentation/summarize.py +106 -256
  36. helm/benchmark/presentation/test_run_entry.py +1 -0
  37. helm/benchmark/presentation/test_summarize.py +145 -3
  38. helm/benchmark/run.py +15 -0
  39. helm/benchmark/run_expander.py +83 -30
  40. helm/benchmark/run_specs/bhasa_run_specs.py +652 -0
  41. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  42. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  43. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  44. helm/benchmark/run_specs/finance_run_specs.py +82 -1
  45. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  46. helm/benchmark/run_specs/vlm_run_specs.py +100 -24
  47. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  48. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  49. helm/benchmark/scenarios/bhasa_scenario.py +1942 -0
  50. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  51. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  52. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  53. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  54. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  55. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  56. helm/benchmark/scenarios/raft_scenario.py +1 -1
  57. helm/benchmark/scenarios/scenario.py +1 -1
  58. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  59. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  60. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  61. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  62. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  63. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  64. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  65. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  66. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  67. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  68. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  69. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  70. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  71. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  72. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  73. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  74. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  75. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  76. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  78. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  79. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  80. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  81. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  82. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  83. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  84. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  85. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  86. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  87. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  88. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  89. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  91. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  92. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  93. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  94. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  95. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  96. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  97. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  98. helm/benchmark/server.py +1 -6
  99. helm/benchmark/static/schema_air_bench.yaml +750 -750
  100. helm/benchmark/static/schema_bhasa.yaml +709 -0
  101. helm/benchmark/static/schema_call_center.yaml +232 -0
  102. helm/benchmark/static/schema_cleva.yaml +768 -0
  103. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  104. helm/benchmark/static/schema_ewok.yaml +367 -0
  105. helm/benchmark/static/schema_finance.yaml +55 -9
  106. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  107. helm/benchmark/static/schema_legal.yaml +566 -0
  108. helm/benchmark/static/schema_safety.yaml +266 -0
  109. helm/benchmark/static/schema_tables.yaml +149 -8
  110. helm/benchmark/static/schema_thai.yaml +21 -0
  111. helm/benchmark/static/schema_vhelm.yaml +137 -101
  112. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  113. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  114. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  115. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  116. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  117. helm/benchmark/static_build/assets/index-3ee38b3d.js +10 -0
  118. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  119. helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
  120. helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
  121. helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
  122. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  123. helm/benchmark/static_build/index.html +2 -2
  124. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  125. helm/benchmark/window_services/tokenizer_service.py +0 -5
  126. helm/clients/ai21_client.py +71 -1
  127. helm/clients/anthropic_client.py +7 -19
  128. helm/clients/huggingface_client.py +38 -37
  129. helm/clients/nvidia_nim_client.py +35 -0
  130. helm/clients/openai_client.py +18 -4
  131. helm/clients/palmyra_client.py +24 -0
  132. helm/clients/perspective_api_client.py +11 -6
  133. helm/clients/test_client.py +4 -6
  134. helm/clients/together_client.py +22 -0
  135. helm/clients/vision_language/open_flamingo_client.py +1 -2
  136. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  137. helm/common/cache.py +8 -30
  138. helm/common/images_utils.py +6 -0
  139. helm/common/key_value_store.py +9 -9
  140. helm/common/mongo_key_value_store.py +5 -4
  141. helm/common/request.py +16 -0
  142. helm/common/test_cache.py +1 -48
  143. helm/common/tokenization_request.py +0 -9
  144. helm/config/model_deployments.yaml +444 -329
  145. helm/config/model_metadata.yaml +513 -111
  146. helm/config/tokenizer_configs.yaml +140 -11
  147. helm/proxy/example_queries.py +14 -21
  148. helm/proxy/server.py +0 -9
  149. helm/proxy/services/remote_service.py +0 -6
  150. helm/proxy/services/server_service.py +6 -20
  151. helm/proxy/services/service.py +0 -6
  152. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  153. helm/tokenizers/ai21_tokenizer.py +51 -59
  154. helm/tokenizers/cohere_tokenizer.py +0 -75
  155. helm/tokenizers/huggingface_tokenizer.py +0 -1
  156. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  157. helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
  158. helm/benchmark/data_overlap/export_scenario_text.py +0 -119
  159. helm/benchmark/data_overlap/light_scenario.py +0 -60
  160. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  161. helm/benchmark/static/benchmarking.css +0 -156
  162. helm/benchmark/static/benchmarking.js +0 -1705
  163. helm/benchmark/static/config.js +0 -3
  164. helm/benchmark/static/general.js +0 -122
  165. helm/benchmark/static/images/crfm-logo.png +0 -0
  166. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  167. helm/benchmark/static/images/helm-logo.png +0 -0
  168. helm/benchmark/static/images/language-model-helm.png +0 -0
  169. helm/benchmark/static/images/organizations/ai21.png +0 -0
  170. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  171. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  172. helm/benchmark/static/images/organizations/cohere.png +0 -0
  173. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  174. helm/benchmark/static/images/organizations/google.png +0 -0
  175. helm/benchmark/static/images/organizations/meta.png +0 -0
  176. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  177. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  178. helm/benchmark/static/images/organizations/openai.png +0 -0
  179. helm/benchmark/static/images/organizations/together.png +0 -0
  180. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  181. helm/benchmark/static/images/organizations/yandex.png +0 -0
  182. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  183. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  184. helm/benchmark/static/index.html +0 -68
  185. helm/benchmark/static/info-icon.png +0 -0
  186. helm/benchmark/static/json-urls.js +0 -69
  187. helm/benchmark/static/plot-captions.js +0 -27
  188. helm/benchmark/static/utils.js +0 -285
  189. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  190. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  191. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  192. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  193. helm/benchmark/window_services/ai21_window_service.py +0 -247
  194. helm/benchmark/window_services/cohere_window_service.py +0 -101
  195. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  196. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  197. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  198. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  199. helm/tokenizers/ice_tokenizer.py +0 -30
  200. helm/tokenizers/test_ice_tokenizer.py +0 -57
  201. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/LICENSE +0 -0
  202. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/entry_points.txt +0 -0
  203. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/top_level.txt +0 -0
  204. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  205. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  206. /helm/benchmark/{data_overlap → scenarios/vision_language/image2struct}/__init__.py +0 -0
  207. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  208. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct/webpage}/__init__.py +0 -0
  209. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -17,11 +17,31 @@ tokenizer_configs:
17
17
  prefix_token: "<s>"
18
18
 
19
19
  # AI21
20
- - name: ai21/j1
20
+ - name: ai21/j2-tokenizer
21
21
  tokenizer_spec:
22
- class_name: "helm.tokenizers.ai21_tokenizer.AI21Tokenizer"
23
- end_of_text_token: " "
24
- prefix_token: ""
22
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
23
+ end_of_text_token: "<|endoftext|>"
24
+ prefix_token: "<|startoftext|>"
25
+ - name: ai21/jamba-tokenizer
26
+ tokenizer_spec:
27
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
28
+ end_of_text_token: "<|endoftext|>"
29
+ prefix_token: "<|startoftext|>"
30
+ - name: ai21/jamba-instruct-tokenizer
31
+ tokenizer_spec:
32
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
33
+ end_of_text_token: "<|endoftext|>"
34
+ prefix_token: "<|startoftext|>"
35
+ - name: ai21/jamba-1.5-mini-tokenizer
36
+ tokenizer_spec:
37
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
38
+ end_of_text_token: "<|endoftext|>"
39
+ prefix_token: "<|startoftext|>"
40
+ - name: ai21/jamba-1.5-large-tokenizer
41
+ tokenizer_spec:
42
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
43
+ end_of_text_token: "<|endoftext|>"
44
+ prefix_token: "<|startoftext|>"
25
45
 
26
46
  # AlephAlpha
27
47
  - name: AlephAlpha/luminous-base
@@ -45,6 +65,24 @@ tokenizer_configs:
45
65
  end_of_text_token: ""
46
66
  prefix_token: ""
47
67
 
68
+ # Alibaba DAMO Academy
69
+
70
+ - name: damo/seallm-7b-v2
71
+ tokenizer_spec:
72
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
73
+ args:
74
+ pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
75
+ end_of_text_token: "</s>"
76
+ prefix_token: "<s>"
77
+
78
+ - name: damo/seallm-7b-v2.5
79
+ tokenizer_spec:
80
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
81
+ args:
82
+ pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
83
+ end_of_text_token: "<eos>"
84
+ prefix_token: "<bos>"
85
+
48
86
  # Anthropic
49
87
  - name: anthropic/claude
50
88
  tokenizer_spec:
@@ -77,12 +115,6 @@ tokenizer_configs:
77
115
  prefix_token: ""
78
116
 
79
117
  # Cohere
80
- - name: cohere/cohere
81
- tokenizer_spec:
82
- class_name: "helm.tokenizers.cohere_tokenizer.CohereTokenizer"
83
- end_of_text_token: ""
84
- prefix_token: ":"
85
-
86
118
  - name: cohere/command
87
119
  tokenizer_spec:
88
120
  class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
@@ -199,6 +231,11 @@ tokenizer_configs:
199
231
  class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
200
232
  end_of_text_token: "<eos>"
201
233
  prefix_token: "<bos>"
234
+ - name: google/gemma-2-9b
235
+ tokenizer_spec:
236
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
237
+ end_of_text_token: "<eos>"
238
+ prefix_token: "<bos>"
202
239
 
203
240
  # Hf-internal-testing
204
241
 
@@ -280,6 +317,38 @@ tokenizer_configs:
280
317
  prefix_token: "<|begin_of_text|>"
281
318
  end_of_text_token: "<|end_of_text|>"
282
319
 
320
+ - name: meta/llama-3-8b-instruct
321
+ tokenizer_spec:
322
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
323
+ args:
324
+ pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
325
+ prefix_token: "<|begin_of_text|>"
326
+ end_of_text_token: "<|eot_id|>"
327
+
328
+ - name: meta/llama-3.1-8b
329
+ tokenizer_spec:
330
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
331
+ args:
332
+ pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
333
+ prefix_token: "<|begin_of_text|>"
334
+ end_of_text_token: "<|end_of_text|>"
335
+
336
+ - name: meta/llama-3.2-3b-instruct
337
+ tokenizer_spec:
338
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
339
+ args:
340
+ pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
341
+ prefix_token: "<|begin_of_text|>"
342
+ end_of_text_token: "<|eot_id|>"
343
+
344
+ - name: meta/llama-3.2-11b-vision-instruct
345
+ tokenizer_spec:
346
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
347
+ args:
348
+ pretrained_model_name_or_path: meta-llama/Llama-3.2-11B-Vision-Instruct
349
+ prefix_token: "<|begin_of_text|>"
350
+ end_of_text_token: "<|eot_id|>"
351
+
283
352
  # 01-ai
284
353
  - name: 01-ai/Yi-6B
285
354
  tokenizer_spec:
@@ -324,6 +393,20 @@ tokenizer_configs:
324
393
  end_of_text_token: "<|endoftext|>"
325
394
  prefix_token: "<|endoftext|>"
326
395
 
396
+ - name: microsoft/phi-3-small-8k-instruct
397
+ tokenizer_spec:
398
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
399
+ args:
400
+ trust_remote_code: true
401
+ end_of_text_token: "<|endoftext|>"
402
+ prefix_token: "<|endoftext|>"
403
+
404
+ - name: microsoft/phi-3-medium-4k-instruct
405
+ tokenizer_spec:
406
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
407
+ end_of_text_token: "<|endoftext|>"
408
+ prefix_token: "<s>"
409
+
327
410
  # Mistralai
328
411
  - name: mistralai/Mistral-7B-v0.1
329
412
  tokenizer_spec:
@@ -349,6 +432,18 @@ tokenizer_configs:
349
432
  end_of_text_token: "</s>"
350
433
  prefix_token: "<s>"
351
434
 
435
+ - name: mistralai/Mistral-Nemo-Base-2407
436
+ tokenizer_spec:
437
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
438
+ end_of_text_token: "</s>"
439
+ prefix_token: "<s>"
440
+
441
+ - name: mistralai/Mistral-Large-Instruct-2407
442
+ tokenizer_spec:
443
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
444
+ end_of_text_token: "</s>"
445
+ prefix_token: "<s>"
446
+
352
447
  # Neurips
353
448
  - name: neurips/local
354
449
  tokenizer_spec:
@@ -356,7 +451,17 @@ tokenizer_configs:
356
451
  end_of_text_token: "<|endoftext|>"
357
452
  prefix_token: "<|endoftext|>"
358
453
 
359
- # Openai
454
+ # NVIDIA
455
+ - name: nvidia/nemotron-4-340b-instruct
456
+ tokenizer_spec:
457
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
458
+ args:
459
+ pretrained_model_name_or_path: Xenova/Nemotron-4-340B-Instruct-Tokenizer
460
+ revision: b7aa0de92cda9f9e722d58d6ca90f46ae17d4701
461
+ end_of_text_token: "<|endoftext|>"
462
+ prefix_token: "<|endoftext|>"
463
+
464
+ # OpenAI
360
465
  - name: openai/cl100k_base
361
466
  tokenizer_spec:
362
467
  class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
@@ -375,6 +480,14 @@ tokenizer_configs:
375
480
  end_of_text_token: ""
376
481
  prefix_token: ""
377
482
 
483
+ # OpenThaiGPT
484
+ - name: openthaigpt/openthaigpt-1.0.0-7b-chat
485
+ tokenizer_spec:
486
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
487
+ end_of_text_token: "</s>"
488
+ prefix_token: "<s>"
489
+
490
+ # Qwen
378
491
  - name: qwen/qwen-7b
379
492
  tokenizer_spec:
380
493
  class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
@@ -420,6 +533,15 @@ tokenizer_configs:
420
533
  end_of_text_token: "<|endoftext|>"
421
534
  prefix_token: ""
422
535
 
536
+ # SambaLingo
537
+ - name: sambanova/sambalingo-thai-base
538
+ tokenizer_spec:
539
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
540
+ args:
541
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
542
+ end_of_text_token: "</s>"
543
+ prefix_token: "<s>"
544
+
423
545
  # Snowflake
424
546
  - name: snowflake/snowflake-arctic-instruct
425
547
  tokenizer_spec:
@@ -444,6 +566,13 @@ tokenizer_configs:
444
566
  end_of_text_token: "</s>"
445
567
  prefix_token: ""
446
568
 
569
+ # Typhoon
570
+ - name: scb10x/typhoon-7b
571
+ tokenizer_spec:
572
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
573
+ end_of_text_token: "</s>"
574
+ prefix_token: "<s>"
575
+
447
576
  # Writer
448
577
  - name: writer/gpt2
449
578
  tokenizer_spec:
@@ -22,7 +22,6 @@ example_queries = [
22
22
  temperature: 0.5 # Medium amount of randomness
23
23
  stop_sequences: [.] # Stop when you hit a period
24
24
  model: openai/gpt-3.5-turbo-0613
25
- model_deployment: openai/gpt-3.5-turbo-0613
26
25
  """
27
26
  ),
28
27
  environments="",
@@ -35,24 +34,24 @@ example_queries = [
35
34
  stop_sequences: [\\n] # Stop when you hit a newline
36
35
  num_completions: 5 # Generate many samples
37
36
  model: openai/gpt-3.5-turbo-0613
38
- model_deployment: openai/gpt-3.5-turbo-0613
39
37
  """
40
38
  ),
41
39
  environments="",
42
40
  ),
43
- Query(
44
- prompt="The quick brown fox jumps over the lazy dog.",
45
- settings=dedent(
46
- """
47
- echo_prompt: true # Analyze the prompt
48
- max_tokens: 0 # Don't generate any more
49
- top_k_per_token: 5 # Show alternatives for each position
50
- model: openai/davinci-002
51
- model_deployment: openai/davinci-002
52
- """
53
- ),
54
- environments=dedent(""),
55
- ),
41
+ # Disabled because `max_tokens: 0` no longer works on the OpenAI API
42
+ # Query(
43
+ # prompt="The quick brown fox jumps over the lazy dog.",
44
+ # settings=dedent(
45
+ # """
46
+ # echo_prompt: true # Analyze the prompt
47
+ # max_tokens: 0 # Don't generate any more
48
+ # top_k_per_token: 5 # Show alternatives for each position
49
+ # model: openai/text-davinci-002
50
+ # model_deployment: openai/text-davinci-002
51
+ # """
52
+ # ),
53
+ # environments=dedent(""),
54
+ # ),
56
55
  Query(
57
56
  prompt="Odd numbers: 1 -> 3 -> 5",
58
57
  settings=dedent(
@@ -60,7 +59,6 @@ example_queries = [
60
59
  temperature: 0 # Deterministic
61
60
  max_tokens: 50
62
61
  model: openai/gpt-3.5-turbo-0613
63
- model_deployment: openai/gpt-3.5-turbo-0613
64
62
  """
65
63
  ),
66
64
  environments="",
@@ -73,7 +71,6 @@ example_queries = [
73
71
  stop_sequences: [.]
74
72
  # Try out multiple models
75
73
  model: ${model}
76
- model_deployment: ${model}
77
74
  """
78
75
  ),
79
76
  environments=dedent(
@@ -100,7 +97,6 @@ example_queries = [
100
97
  num_completions: 5
101
98
  # Try out multiple models
102
99
  model: ${model}
103
- model_deployment: ${model}
104
100
  """
105
101
  ),
106
102
  environments=dedent(
@@ -136,7 +132,6 @@ example_queries = [
136
132
  top_k_per_token: 4
137
133
  # Try out multiple models
138
134
  model: ${model}
139
- model_deployment: ${model}
140
135
  """
141
136
  ),
142
137
  environments=dedent(
@@ -150,7 +145,6 @@ example_queries = [
150
145
  settings=dedent(
151
146
  """
152
147
  model: openai/gpt-3.5-turbo-0613
153
- model_deployment: openai/gpt-3.5-turbo-0613
154
148
  """
155
149
  ),
156
150
  environments="",
@@ -163,7 +157,6 @@ example_queries = [
163
157
  stop_sequences: [\\n]
164
158
  # Try out multiple models
165
159
  model: ${model}
166
- model_deployment: ${model}
167
160
  """
168
161
  ),
169
162
  environments=dedent(
helm/proxy/server.py CHANGED
@@ -106,15 +106,6 @@ def handle_get_general_info():
106
106
  return safe_call(perform)
107
107
 
108
108
 
109
- @app.get("/api/window_service_info")
110
- def handle_get_window_service_info():
111
- def perform(args):
112
- global service
113
- return dataclasses.asdict(service.get_window_service_info(args["model_name"]))
114
-
115
- return safe_call(perform)
116
-
117
-
118
109
  @app.post("/api/account")
119
110
  def handle_create_account():
120
111
  def perform(args):
@@ -15,7 +15,6 @@ from helm.common.file_upload_request import FileUploadRequest, FileUploadResult
15
15
  from helm.common.perspective_api_request import PerspectiveAPIRequest, PerspectiveAPIRequestResult
16
16
  from helm.common.clip_score_request import CLIPScoreRequest, CLIPScoreResult
17
17
  from helm.common.tokenization_request import (
18
- WindowServiceInfo,
19
18
  TokenizationRequest,
20
19
  TokenizationRequestResult,
21
20
  DecodeRequestResult,
@@ -51,11 +50,6 @@ class RemoteService(Service):
51
50
  response = requests.get(f"{self.base_url}/api/general_info").json()
52
51
  return from_dict(GeneralInfo, response)
53
52
 
54
- def get_window_service_info(self, model_name) -> WindowServiceInfo:
55
- params = {"model_name": model_name}
56
- response = requests.get(f"{self.base_url}/api/window_service_info?{urllib.parse.urlencode(params)}").json()
57
- return from_dict(WindowServiceInfo, response)
58
-
59
53
  def expand_query(self, query: Query) -> QueryResult:
60
54
  params = asdict(query)
61
55
  response = requests.get(f"{self.base_url}/api/query?{urllib.parse.urlencode(params)}").json()
@@ -14,7 +14,6 @@ from helm.common.file_upload_request import FileUploadRequest, FileUploadResult
14
14
  from helm.common.general import ensure_directory_exists, parse_hocon, get_credentials
15
15
  from helm.common.perspective_api_request import PerspectiveAPIRequest, PerspectiveAPIRequestResult
16
16
  from helm.common.tokenization_request import (
17
- WindowServiceInfo,
18
17
  TokenizationRequest,
19
18
  TokenizationRequestResult,
20
19
  DecodeRequest,
@@ -25,7 +24,6 @@ from helm.common.hierarchical_logger import hlog
25
24
  from helm.proxy.accounts import Accounts, Account
26
25
  from helm.clients.auto_client import AutoClient
27
26
  from helm.clients.moderation_api_client import ModerationAPIClient
28
- from helm.clients.perspective_api_client import PerspectiveAPIClient
29
27
  from helm.clients.image_generation.nudity_check_client import NudityCheckClient
30
28
  from helm.clients.gcs_client import GCSClient
31
29
  from helm.clients.clip_score_client import CLIPScoreClient
@@ -75,7 +73,7 @@ class ServerService(Service):
75
73
  # Lazily instantiate the following clients
76
74
  self.moderation_api_client: Optional[ModerationAPIClient] = None
77
75
  self.toxicity_classifier_client: Optional[ToxicityClassifierClient] = None
78
- self.perspective_api_client: Optional[PerspectiveAPIClient] = None
76
+ self.perspective_api_client: Optional[ToxicityClassifierClient] = None
79
77
  self.nudity_check_client: Optional[NudityCheckClient] = None
80
78
  self.clip_score_client: Optional[CLIPScoreClient] = None
81
79
  self.gcs_client: Optional[GCSClient] = None
@@ -86,22 +84,6 @@ class ServerService(Service):
86
84
  all_models = [dataclasses.replace(model_metadata, release_date=None) for model_metadata in ALL_MODELS_METADATA]
87
85
  return GeneralInfo(version=VERSION, example_queries=example_queries, all_models=all_models)
88
86
 
89
- def get_window_service_info(self, model_name) -> WindowServiceInfo:
90
- # The import statement is placed here to avoid two problems, please refer to the link for details
91
- # https://github.com/stanford-crfm/helm/pull/1430#discussion_r1156686624
92
- from helm.benchmark.window_services.tokenizer_service import TokenizerService
93
- from helm.benchmark.window_services.window_service_factory import WindowServiceFactory
94
-
95
- token_service = TokenizerService(self, Authentication(""))
96
- window_service = WindowServiceFactory.get_window_service(model_name, token_service)
97
- return WindowServiceInfo(
98
- tokenizer_name=window_service.tokenizer_name,
99
- max_sequence_length=window_service.max_sequence_length,
100
- max_request_length=window_service.max_request_length,
101
- end_of_text_token=window_service.end_of_text_token,
102
- prefix_token=window_service.prefix_token,
103
- )
104
-
105
87
  def expand_query(self, query: Query) -> QueryResult:
106
88
  """Turn the `query` into requests."""
107
89
  prompt = query.prompt
@@ -121,8 +103,12 @@ class ServerService(Service):
121
103
  return "dall_e"
122
104
  elif model_deployment.startswith("openai/gpt-4"):
123
105
  return "gpt4"
124
- else:
106
+ elif model_deployment.startswith("openai/gpt-3"):
125
107
  return "gpt3"
108
+ elif model_deployment.startswith("openai/o1"):
109
+ return "o1"
110
+ else:
111
+ return "openai"
126
112
  elif model_deployment.startswith("ai21/"):
127
113
  return "jurassic"
128
114
  else:
@@ -11,7 +11,6 @@ from helm.common.nudity_check_request import NudityCheckRequest, NudityCheckResu
11
11
  from helm.common.perspective_api_request import PerspectiveAPIRequestResult, PerspectiveAPIRequest
12
12
  from helm.common.moderations_api_request import ModerationAPIRequest, ModerationAPIRequestResult
13
13
  from helm.common.tokenization_request import (
14
- WindowServiceInfo,
15
14
  TokenizationRequest,
16
15
  TokenizationRequestResult,
17
16
  DecodeRequest,
@@ -85,11 +84,6 @@ class Service(ABC):
85
84
  """Get general info."""
86
85
  pass
87
86
 
88
- @abstractmethod
89
- def get_window_service_info(self, model_name: str) -> WindowServiceInfo:
90
- """Get window service info."""
91
- pass
92
-
93
87
  @abstractmethod
94
88
  def expand_query(self, query: Query) -> QueryResult:
95
89
  """Turn the `query` into requests."""
@@ -13,8 +13,8 @@ class TestAutoTokenCounter:
13
13
  )
14
14
  # The following prompt has 51 tokens according to the GPT-2 tokenizer
15
15
  request = Request(
16
- model="openai/text-davinci-002",
17
- model_deployment="openai/text-davinci-002",
16
+ model="openai/gpt2",
17
+ model_deployment="huggingface/gpt2",
18
18
  prompt="The Center for Research on Foundation Models (CRFM) is "
19
19
  "an interdisciplinary initiative born out of the Stanford "
20
20
  "Institute for Human-Centered Artificial Intelligence (HAI) "
@@ -1,60 +1,52 @@
1
- from typing import Any, Dict, List
2
- import requests
3
-
4
- from dacite import from_dict
5
-
6
- from helm.common.cache import Cache, CacheConfig
7
- from helm.common.tokenization_request import (
8
- TokenizationRequest,
9
- TokenizationRequestResult,
10
- TokenizationToken,
11
- TextRange,
12
- DecodeRequest,
13
- DecodeRequestResult,
14
- )
15
- from helm.clients.ai21_utils import AI21RequestError, handle_failed_request
16
- from .tokenizer import Tokenizer
17
-
18
-
19
- class AI21Tokenizer(Tokenizer):
20
- def __init__(self, api_key: str, cache_config: CacheConfig) -> None:
21
- self.cache = Cache(cache_config)
22
- self.api_key: str = api_key
23
-
24
- def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
25
- """
26
- Tokenizes the text by using the AI21 endpoint: https://api.ai21.com/studio/v1/tokenize.
27
- """
28
- # TODO: Does not support encoding
29
- raw_request: Dict[str, str] = {"text": request.text}
30
-
31
- def do_it() -> Dict[str, Any]:
32
- response = requests.post(
33
- "https://api.ai21.com/studio/v1/tokenize",
34
- headers={"Authorization": f"Bearer {self.api_key}"},
35
- json=raw_request,
36
- ).json()
37
-
38
- # If 'tokens' is not present in the response, assume request failed.
39
- if "tokens" not in response:
40
- handle_failed_request(api_type="tokenizer", response=response)
41
-
42
- return response
43
-
44
- try:
45
- response, cached = self.cache.get(raw_request, do_it)
46
- except AI21RequestError:
47
- return TokenizationRequestResult(success=False, cached=False, text="", tokens=[])
48
-
49
- # Each token is represented like this in the response:
50
- # {'token': '▁Hello', 'textRange': {'start': 0, 'end': 5}}
51
- tokens: List[TokenizationToken] = []
52
- for token_dict in response["tokens"]:
53
- tokens.append(
54
- TokenizationToken(value=token_dict["token"], text_range=from_dict(TextRange, token_dict["textRange"]))
1
+ import threading
2
+ from typing import Any, Dict
3
+
4
+ from helm.common.cache import CacheConfig
5
+ from helm.common.optional_dependencies import handle_module_not_found_error
6
+ from helm.tokenizers.caching_tokenizer import CachingTokenizer
7
+
8
+ try:
9
+ from ai21_tokenizer import Tokenizer as SDKTokenizer
10
+ from ai21_tokenizer.base_tokenizer import BaseTokenizer
11
+ except ModuleNotFoundError as e:
12
+ handle_module_not_found_error(e, ["ai21"])
13
+
14
+
15
+ class AI21LocalTokenizer(CachingTokenizer):
16
+ """AI21 tokenizer using the AI21 Python library."""
17
+
18
+ def __init__(self, cache_config: CacheConfig) -> None:
19
+ super().__init__(cache_config)
20
+ self._tokenizers_lock = threading.Lock()
21
+ self.tokenizers: Dict[str, BaseTokenizer] = {}
22
+
23
+ def _get_tokenizer(self, tokenizer_name: str) -> BaseTokenizer:
24
+ with self._tokenizers_lock:
25
+ if tokenizer_name not in self.tokenizers:
26
+ self.tokenizers[tokenizer_name] = SDKTokenizer.get_tokenizer(tokenizer_name)
27
+ return self.tokenizers[tokenizer_name]
28
+
29
+ def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
30
+ tokenizer_name = request["tokenizer"].split("/")[1]
31
+ tokenizer = self._get_tokenizer(tokenizer_name)
32
+ if request["truncation"]:
33
+ token_ids = tokenizer.encode(
34
+ text=request["text"],
35
+ truncation=request["truncation"],
36
+ max_length=request["max_length"],
37
+ add_special_tokens=False,
55
38
  )
56
- text: str = response["text"]
57
- return TokenizationRequestResult(success=True, cached=cached, tokens=tokens, text=text)
58
-
59
- def decode(self, request: DecodeRequest) -> DecodeRequestResult:
60
- raise NotImplementedError("Not supported")
39
+ else:
40
+ token_ids = tokenizer.encode(
41
+ text=request["text"],
42
+ add_special_tokens=False,
43
+ )
44
+ if request["encode"]:
45
+ return {"tokens": token_ids}
46
+ else:
47
+ return {"tokens": tokenizer.convert_ids_to_tokens(token_ids)}
48
+
49
+ def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
50
+ tokenizer_name = request["tokenizer"].split("/")[1]
51
+ tokenizer = self._get_tokenizer(tokenizer_name)
52
+ return {"text": tokenizer.decode(request["tokens"])}
@@ -1,5 +1,3 @@
1
- import json
2
- import requests
3
1
  from typing import Any, Dict, List, Optional
4
2
 
5
3
  import cohere
@@ -8,84 +6,11 @@ from cohere.manually_maintained.tokenizers import get_hf_tokenizer
8
6
  from helm.common.cache import CacheConfig
9
7
  from helm.common.tokenization_request import (
10
8
  TokenizationRequest,
11
- DecodeRequest,
12
- DecodeRequestResult,
13
9
  TokenizationToken,
14
10
  )
15
- from helm.clients.cohere_utils import get_cohere_url, DEFAULT_COHERE_API_VERSION
16
11
  from helm.tokenizers.caching_tokenizer import CachingTokenizer
17
12
 
18
13
 
19
- class CohereTokenizer(CachingTokenizer):
20
- # From "https://docs.cohere.ai/versioning-reference",
21
- # "this version [2021-11-08] introduces multiple generations, meaning that the generations endpoint will
22
- # now accept a num_generations argument in the JSON and will always return an array of generations"
23
- # Note that the API version is decoupled from the model version.
24
- DEFAULT_API_VERSION: str = "2021-11-08"
25
-
26
- TOKENIZE_ENDPOINT: str = "tokenize"
27
-
28
- # According to https://docs.cohere.ai/tokenize-reference#request, for tokenize, text: "the string to
29
- # be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters."
30
- # However, even sending a request with 60,000 characters sometimes fails, so we set the
31
- # maximum length to 50,000, which is about 8,333 tokens.
32
- # TODO: followed up with Cohere support with an example of a failure case
33
- TOKENIZE_API_MAX_TEXT_LENGTH: int = 50_000
34
-
35
- def __init__(self, api_key: str, cache_config: CacheConfig) -> None:
36
- super().__init__(cache_config)
37
- self.api_key: str = api_key
38
-
39
- def _tokenization_request_to_cache_key(self, request: TokenizationRequest) -> Dict[str, Any]:
40
- # This cache key is used to preserve our existing Cache (10/17/2023)
41
- return {"text": request.text}
42
-
43
- def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
44
- """
45
- Send the request to the Cohere Tokenize API.
46
-
47
- From https://docs.cohere.ai/tokenize-reference, for text "tokenize me! :D", the response will be:
48
-
49
- {
50
- "tokens": [34160, 974, 514, 34, 1420, 69]
51
- "token_strings": ["token", "ize", " me", "!", " :", "D"]
52
- }
53
- """
54
- text: str = request["text"]
55
- assert (
56
- 1 <= len(text) <= CohereTokenizer.TOKENIZE_API_MAX_TEXT_LENGTH
57
- ), f"Invalid text length: {len(text)}. Valid length: [1..{CohereTokenizer.TOKENIZE_API_MAX_TEXT_LENGTH:,d}]"
58
-
59
- response = requests.request(
60
- method="POST",
61
- url=get_cohere_url(CohereTokenizer.TOKENIZE_ENDPOINT),
62
- headers={
63
- "Authorization": f"BEARER {self.api_key}",
64
- "Content-Type": "application/json",
65
- "Cohere-Version": DEFAULT_COHERE_API_VERSION,
66
- },
67
- data=json.dumps(request),
68
- )
69
- result = json.loads(response.text)
70
- assert "message" not in result.keys(), f"Request failed with error {result['message']}"
71
- assert "tokens" in result and "token_strings" in result, f"Invalid response: {result}"
72
- # This output format is used to preserve our existing Cache (10/17/2023)
73
- return result
74
-
75
- def _tokenization_raw_response_to_tokens(
76
- self, response: Dict[str, Any], request: TokenizationRequest
77
- ) -> List[TokenizationToken]:
78
- tokens = response["tokens" if request.encode else "token_strings"]
79
- return [TokenizationToken(token) for token in tokens]
80
-
81
- def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
82
- # Defined for mypy but decode() already raises NotImplementedError
83
- raise NotImplementedError("The Cohere API does not support decoding.")
84
-
85
- def decode(self, request: DecodeRequest) -> DecodeRequestResult:
86
- raise NotImplementedError("The Cohere API does not support decoding.")
87
-
88
-
89
14
  class CohereLocalTokenizer(CachingTokenizer):
90
15
  """Cohere tokenizer using the Cohere Python library."""
91
16
 
@@ -53,7 +53,6 @@ class HuggingFaceTokenizer(CachingTokenizer):
53
53
  # If unspecified, set `use_fast=True` by default.
54
54
  if "use_fast" not in from_pretrained_kwargs:
55
55
  from_pretrained_kwargs["use_fast"] = True
56
- print(from_pretrained_kwargs)
57
56
  try:
58
57
  # From the Hugging Face documentation, "local_files_only(defaults to False) —
59
58
  # Whether or not to only look at local files".