crfm-helm 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (184) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +29 -55
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +146 -134
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  10. helm/benchmark/annotation/call_center_annotator.py +247 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +32 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +31 -44
  18. helm/benchmark/annotation/model_as_judge.py +45 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  20. helm/benchmark/annotation/xstest_annotator.py +110 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +57 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  30. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  31. helm/benchmark/model_metadata_registry.py +3 -3
  32. helm/benchmark/presentation/test_run_entry.py +1 -0
  33. helm/benchmark/run.py +15 -0
  34. helm/benchmark/run_expander.py +56 -30
  35. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  36. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  37. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  38. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  39. helm/benchmark/run_specs/finance_run_specs.py +78 -1
  40. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  41. helm/benchmark/run_specs/vlm_run_specs.py +92 -21
  42. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  43. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  44. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  45. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  46. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  47. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  48. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  49. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  50. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  51. helm/benchmark/scenarios/scenario.py +1 -1
  52. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  53. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  54. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  55. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  56. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  57. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  58. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  59. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  60. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  61. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  62. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  63. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  64. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  65. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  66. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  67. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  68. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  69. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  70. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  71. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  72. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  73. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  74. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  75. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  76. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  78. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  79. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  80. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  81. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  82. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  83. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  84. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  85. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  86. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  87. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  88. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  89. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  91. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  92. helm/benchmark/server.py +1 -6
  93. helm/benchmark/static/schema_air_bench.yaml +750 -750
  94. helm/benchmark/static/schema_bhasa.yaml +709 -0
  95. helm/benchmark/static/schema_call_center.yaml +232 -0
  96. helm/benchmark/static/schema_cleva.yaml +768 -0
  97. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  98. helm/benchmark/static/schema_ewok.yaml +367 -0
  99. helm/benchmark/static/schema_finance.yaml +55 -9
  100. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  101. helm/benchmark/static/schema_safety.yaml +247 -0
  102. helm/benchmark/static/schema_tables.yaml +124 -7
  103. helm/benchmark/static/schema_thai.yaml +21 -0
  104. helm/benchmark/static/schema_vhelm.yaml +96 -91
  105. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  106. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  107. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  108. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  109. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  110. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  111. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  112. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  113. helm/benchmark/static_build/index.html +2 -2
  114. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  115. helm/clients/ai21_client.py +71 -1
  116. helm/clients/anthropic_client.py +7 -19
  117. helm/clients/huggingface_client.py +38 -37
  118. helm/clients/nvidia_nim_client.py +35 -0
  119. helm/clients/openai_client.py +2 -3
  120. helm/clients/palmyra_client.py +25 -0
  121. helm/clients/perspective_api_client.py +11 -6
  122. helm/clients/test_client.py +4 -6
  123. helm/clients/vision_language/open_flamingo_client.py +1 -2
  124. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  125. helm/common/images_utils.py +6 -0
  126. helm/common/mongo_key_value_store.py +2 -1
  127. helm/common/request.py +16 -0
  128. helm/config/model_deployments.yaml +315 -332
  129. helm/config/model_metadata.yaml +384 -110
  130. helm/config/tokenizer_configs.yaml +116 -11
  131. helm/proxy/example_queries.py +14 -21
  132. helm/proxy/services/server_service.py +1 -2
  133. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  134. helm/tokenizers/ai21_tokenizer.py +51 -59
  135. helm/tokenizers/cohere_tokenizer.py +0 -75
  136. helm/tokenizers/huggingface_tokenizer.py +0 -1
  137. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  138. helm/benchmark/static/benchmarking.css +0 -156
  139. helm/benchmark/static/benchmarking.js +0 -1705
  140. helm/benchmark/static/config.js +0 -3
  141. helm/benchmark/static/general.js +0 -122
  142. helm/benchmark/static/images/crfm-logo.png +0 -0
  143. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  144. helm/benchmark/static/images/helm-logo.png +0 -0
  145. helm/benchmark/static/images/language-model-helm.png +0 -0
  146. helm/benchmark/static/images/organizations/ai21.png +0 -0
  147. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  148. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  149. helm/benchmark/static/images/organizations/cohere.png +0 -0
  150. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  151. helm/benchmark/static/images/organizations/google.png +0 -0
  152. helm/benchmark/static/images/organizations/meta.png +0 -0
  153. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  154. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  155. helm/benchmark/static/images/organizations/openai.png +0 -0
  156. helm/benchmark/static/images/organizations/together.png +0 -0
  157. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  158. helm/benchmark/static/images/organizations/yandex.png +0 -0
  159. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  160. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  161. helm/benchmark/static/index.html +0 -68
  162. helm/benchmark/static/info-icon.png +0 -0
  163. helm/benchmark/static/json-urls.js +0 -69
  164. helm/benchmark/static/plot-captions.js +0 -27
  165. helm/benchmark/static/utils.js +0 -285
  166. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  167. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  168. helm/benchmark/window_services/ai21_window_service.py +0 -247
  169. helm/benchmark/window_services/cohere_window_service.py +0 -101
  170. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  171. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  172. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  173. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  174. helm/tokenizers/ice_tokenizer.py +0 -30
  175. helm/tokenizers/test_ice_tokenizer.py +0 -57
  176. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  177. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  178. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  179. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  180. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  181. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  182. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  183. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  184. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -17,11 +17,31 @@ tokenizer_configs:
17
17
  prefix_token: "<s>"
18
18
 
19
19
  # AI21
20
- - name: ai21/j1
20
+ - name: ai21/j2-tokenizer
21
21
  tokenizer_spec:
22
- class_name: "helm.tokenizers.ai21_tokenizer.AI21Tokenizer"
23
- end_of_text_token: " "
24
- prefix_token: ""
22
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
23
+ end_of_text_token: "<|endoftext|>"
24
+ prefix_token: "<|startoftext|>"
25
+ - name: ai21/jamba-tokenizer
26
+ tokenizer_spec:
27
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
28
+ end_of_text_token: "<|endoftext|>"
29
+ prefix_token: "<|startoftext|>"
30
+ - name: ai21/jamba-instruct-tokenizer
31
+ tokenizer_spec:
32
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
33
+ end_of_text_token: "<|endoftext|>"
34
+ prefix_token: "<|startoftext|>"
35
+ - name: ai21/jamba-1.5-mini-tokenizer
36
+ tokenizer_spec:
37
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
38
+ end_of_text_token: "<|endoftext|>"
39
+ prefix_token: "<|startoftext|>"
40
+ - name: ai21/jamba-1.5-large-tokenizer
41
+ tokenizer_spec:
42
+ class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
43
+ end_of_text_token: "<|endoftext|>"
44
+ prefix_token: "<|startoftext|>"
25
45
 
26
46
  # AlephAlpha
27
47
  - name: AlephAlpha/luminous-base
@@ -45,6 +65,24 @@ tokenizer_configs:
45
65
  end_of_text_token: ""
46
66
  prefix_token: ""
47
67
 
68
+ # Alibaba DAMO Academy
69
+
70
+ - name: damo/seallm-7b-v2
71
+ tokenizer_spec:
72
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
73
+ args:
74
+ pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
75
+ end_of_text_token: "</s>"
76
+ prefix_token: "<s>"
77
+
78
+ - name: damo/seallm-7b-v2.5
79
+ tokenizer_spec:
80
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
81
+ args:
82
+ pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
83
+ end_of_text_token: "<eos>"
84
+ prefix_token: "<bos>"
85
+
48
86
  # Anthropic
49
87
  - name: anthropic/claude
50
88
  tokenizer_spec:
@@ -77,12 +115,6 @@ tokenizer_configs:
77
115
  prefix_token: ""
78
116
 
79
117
  # Cohere
80
- - name: cohere/cohere
81
- tokenizer_spec:
82
- class_name: "helm.tokenizers.cohere_tokenizer.CohereTokenizer"
83
- end_of_text_token: ""
84
- prefix_token: ":"
85
-
86
118
  - name: cohere/command
87
119
  tokenizer_spec:
88
120
  class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
@@ -199,6 +231,11 @@ tokenizer_configs:
199
231
  class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
200
232
  end_of_text_token: "<eos>"
201
233
  prefix_token: "<bos>"
234
+ - name: google/gemma-2-9b
235
+ tokenizer_spec:
236
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
237
+ end_of_text_token: "<eos>"
238
+ prefix_token: "<bos>"
202
239
 
203
240
  # Hf-internal-testing
204
241
 
@@ -280,6 +317,14 @@ tokenizer_configs:
280
317
  prefix_token: "<|begin_of_text|>"
281
318
  end_of_text_token: "<|end_of_text|>"
282
319
 
320
+ - name: meta/llama-3.1-8b
321
+ tokenizer_spec:
322
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
323
+ args:
324
+ pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
325
+ prefix_token: "<|begin_of_text|>"
326
+ end_of_text_token: "<|end_of_text|>"
327
+
283
328
  # 01-ai
284
329
  - name: 01-ai/Yi-6B
285
330
  tokenizer_spec:
@@ -324,6 +369,20 @@ tokenizer_configs:
324
369
  end_of_text_token: "<|endoftext|>"
325
370
  prefix_token: "<|endoftext|>"
326
371
 
372
+ - name: microsoft/phi-3-small-8k-instruct
373
+ tokenizer_spec:
374
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
375
+ args:
376
+ trust_remote_code: true
377
+ end_of_text_token: "<|endoftext|>"
378
+ prefix_token: "<|endoftext|>"
379
+
380
+ - name: microsoft/phi-3-medium-4k-instruct
381
+ tokenizer_spec:
382
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
383
+ end_of_text_token: "<|endoftext|>"
384
+ prefix_token: "<s>"
385
+
327
386
  # Mistralai
328
387
  - name: mistralai/Mistral-7B-v0.1
329
388
  tokenizer_spec:
@@ -349,6 +408,18 @@ tokenizer_configs:
349
408
  end_of_text_token: "</s>"
350
409
  prefix_token: "<s>"
351
410
 
411
+ - name: mistralai/Mistral-Nemo-Base-2407
412
+ tokenizer_spec:
413
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
414
+ end_of_text_token: "</s>"
415
+ prefix_token: "<s>"
416
+
417
+ - name: mistralai/Mistral-Large-Instruct-2407
418
+ tokenizer_spec:
419
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
420
+ end_of_text_token: "</s>"
421
+ prefix_token: "<s>"
422
+
352
423
  # Neurips
353
424
  - name: neurips/local
354
425
  tokenizer_spec:
@@ -356,7 +427,17 @@ tokenizer_configs:
356
427
  end_of_text_token: "<|endoftext|>"
357
428
  prefix_token: "<|endoftext|>"
358
429
 
359
- # Openai
430
+ # NVIDIA
431
+ - name: nvidia/nemotron-4-340b-instruct
432
+ tokenizer_spec:
433
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
434
+ args:
435
+ pretrained_model_name_or_path: Xenova/Nemotron-4-340B-Instruct-Tokenizer
436
+ revision: b7aa0de92cda9f9e722d58d6ca90f46ae17d4701
437
+ end_of_text_token: "<|endoftext|>"
438
+ prefix_token: "<|endoftext|>"
439
+
440
+ # OpenAI
360
441
  - name: openai/cl100k_base
361
442
  tokenizer_spec:
362
443
  class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
@@ -375,6 +456,14 @@ tokenizer_configs:
375
456
  end_of_text_token: ""
376
457
  prefix_token: ""
377
458
 
459
+ # OpenThaiGPT
460
+ - name: openthaigpt/openthaigpt-1.0.0-7b-chat
461
+ tokenizer_spec:
462
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
463
+ end_of_text_token: "</s>"
464
+ prefix_token: "<s>"
465
+
466
+ # Qwen
378
467
  - name: qwen/qwen-7b
379
468
  tokenizer_spec:
380
469
  class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
@@ -420,6 +509,15 @@ tokenizer_configs:
420
509
  end_of_text_token: "<|endoftext|>"
421
510
  prefix_token: ""
422
511
 
512
+ # SambaLingo
513
+ - name: sambanova/sambalingo-thai-base
514
+ tokenizer_spec:
515
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
516
+ args:
517
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
518
+ end_of_text_token: "</s>"
519
+ prefix_token: "<s>"
520
+
423
521
  # Snowflake
424
522
  - name: snowflake/snowflake-arctic-instruct
425
523
  tokenizer_spec:
@@ -444,6 +542,13 @@ tokenizer_configs:
444
542
  end_of_text_token: "</s>"
445
543
  prefix_token: ""
446
544
 
545
+ # Typhoon
546
+ - name: scb10x/typhoon-7b
547
+ tokenizer_spec:
548
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
549
+ end_of_text_token: "</s>"
550
+ prefix_token: "<s>"
551
+
447
552
  # Writer
448
553
  - name: writer/gpt2
449
554
  tokenizer_spec:
@@ -22,7 +22,6 @@ example_queries = [
22
22
  temperature: 0.5 # Medium amount of randomness
23
23
  stop_sequences: [.] # Stop when you hit a period
24
24
  model: openai/gpt-3.5-turbo-0613
25
- model_deployment: openai/gpt-3.5-turbo-0613
26
25
  """
27
26
  ),
28
27
  environments="",
@@ -35,24 +34,24 @@ example_queries = [
35
34
  stop_sequences: [\\n] # Stop when you hit a newline
36
35
  num_completions: 5 # Generate many samples
37
36
  model: openai/gpt-3.5-turbo-0613
38
- model_deployment: openai/gpt-3.5-turbo-0613
39
37
  """
40
38
  ),
41
39
  environments="",
42
40
  ),
43
- Query(
44
- prompt="The quick brown fox jumps over the lazy dog.",
45
- settings=dedent(
46
- """
47
- echo_prompt: true # Analyze the prompt
48
- max_tokens: 0 # Don't generate any more
49
- top_k_per_token: 5 # Show alternatives for each position
50
- model: openai/davinci-002
51
- model_deployment: openai/davinci-002
52
- """
53
- ),
54
- environments=dedent(""),
55
- ),
41
+ # Disabled because `max_tokens: 0` no longer works on the OpenAI API
42
+ # Query(
43
+ # prompt="The quick brown fox jumps over the lazy dog.",
44
+ # settings=dedent(
45
+ # """
46
+ # echo_prompt: true # Analyze the prompt
47
+ # max_tokens: 0 # Don't generate any more
48
+ # top_k_per_token: 5 # Show alternatives for each position
49
+ # model: openai/text-davinci-002
50
+ # model_deployment: openai/text-davinci-002
51
+ # """
52
+ # ),
53
+ # environments=dedent(""),
54
+ # ),
56
55
  Query(
57
56
  prompt="Odd numbers: 1 -> 3 -> 5",
58
57
  settings=dedent(
@@ -60,7 +59,6 @@ example_queries = [
60
59
  temperature: 0 # Deterministic
61
60
  max_tokens: 50
62
61
  model: openai/gpt-3.5-turbo-0613
63
- model_deployment: openai/gpt-3.5-turbo-0613
64
62
  """
65
63
  ),
66
64
  environments="",
@@ -73,7 +71,6 @@ example_queries = [
73
71
  stop_sequences: [.]
74
72
  # Try out multiple models
75
73
  model: ${model}
76
- model_deployment: ${model}
77
74
  """
78
75
  ),
79
76
  environments=dedent(
@@ -100,7 +97,6 @@ example_queries = [
100
97
  num_completions: 5
101
98
  # Try out multiple models
102
99
  model: ${model}
103
- model_deployment: ${model}
104
100
  """
105
101
  ),
106
102
  environments=dedent(
@@ -136,7 +132,6 @@ example_queries = [
136
132
  top_k_per_token: 4
137
133
  # Try out multiple models
138
134
  model: ${model}
139
- model_deployment: ${model}
140
135
  """
141
136
  ),
142
137
  environments=dedent(
@@ -150,7 +145,6 @@ example_queries = [
150
145
  settings=dedent(
151
146
  """
152
147
  model: openai/gpt-3.5-turbo-0613
153
- model_deployment: openai/gpt-3.5-turbo-0613
154
148
  """
155
149
  ),
156
150
  environments="",
@@ -163,7 +157,6 @@ example_queries = [
163
157
  stop_sequences: [\\n]
164
158
  # Try out multiple models
165
159
  model: ${model}
166
- model_deployment: ${model}
167
160
  """
168
161
  ),
169
162
  environments=dedent(
@@ -25,7 +25,6 @@ from helm.common.hierarchical_logger import hlog
25
25
  from helm.proxy.accounts import Accounts, Account
26
26
  from helm.clients.auto_client import AutoClient
27
27
  from helm.clients.moderation_api_client import ModerationAPIClient
28
- from helm.clients.perspective_api_client import PerspectiveAPIClient
29
28
  from helm.clients.image_generation.nudity_check_client import NudityCheckClient
30
29
  from helm.clients.gcs_client import GCSClient
31
30
  from helm.clients.clip_score_client import CLIPScoreClient
@@ -75,7 +74,7 @@ class ServerService(Service):
75
74
  # Lazily instantiate the following clients
76
75
  self.moderation_api_client: Optional[ModerationAPIClient] = None
77
76
  self.toxicity_classifier_client: Optional[ToxicityClassifierClient] = None
78
- self.perspective_api_client: Optional[PerspectiveAPIClient] = None
77
+ self.perspective_api_client: Optional[ToxicityClassifierClient] = None
79
78
  self.nudity_check_client: Optional[NudityCheckClient] = None
80
79
  self.clip_score_client: Optional[CLIPScoreClient] = None
81
80
  self.gcs_client: Optional[GCSClient] = None
@@ -13,8 +13,8 @@ class TestAutoTokenCounter:
13
13
  )
14
14
  # The following prompt has 51 tokens according to the GPT-2 tokenizer
15
15
  request = Request(
16
- model="openai/text-davinci-002",
17
- model_deployment="openai/text-davinci-002",
16
+ model="openai/gpt2",
17
+ model_deployment="huggingface/gpt2",
18
18
  prompt="The Center for Research on Foundation Models (CRFM) is "
19
19
  "an interdisciplinary initiative born out of the Stanford "
20
20
  "Institute for Human-Centered Artificial Intelligence (HAI) "
@@ -1,60 +1,52 @@
1
- from typing import Any, Dict, List
2
- import requests
3
-
4
- from dacite import from_dict
5
-
6
- from helm.common.cache import Cache, CacheConfig
7
- from helm.common.tokenization_request import (
8
- TokenizationRequest,
9
- TokenizationRequestResult,
10
- TokenizationToken,
11
- TextRange,
12
- DecodeRequest,
13
- DecodeRequestResult,
14
- )
15
- from helm.clients.ai21_utils import AI21RequestError, handle_failed_request
16
- from .tokenizer import Tokenizer
17
-
18
-
19
- class AI21Tokenizer(Tokenizer):
20
- def __init__(self, api_key: str, cache_config: CacheConfig) -> None:
21
- self.cache = Cache(cache_config)
22
- self.api_key: str = api_key
23
-
24
- def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
25
- """
26
- Tokenizes the text by using the AI21 endpoint: https://api.ai21.com/studio/v1/tokenize.
27
- """
28
- # TODO: Does not support encoding
29
- raw_request: Dict[str, str] = {"text": request.text}
30
-
31
- def do_it() -> Dict[str, Any]:
32
- response = requests.post(
33
- "https://api.ai21.com/studio/v1/tokenize",
34
- headers={"Authorization": f"Bearer {self.api_key}"},
35
- json=raw_request,
36
- ).json()
37
-
38
- # If 'tokens' is not present in the response, assume request failed.
39
- if "tokens" not in response:
40
- handle_failed_request(api_type="tokenizer", response=response)
41
-
42
- return response
43
-
44
- try:
45
- response, cached = self.cache.get(raw_request, do_it)
46
- except AI21RequestError:
47
- return TokenizationRequestResult(success=False, cached=False, text="", tokens=[])
48
-
49
- # Each token is represented like this in the response:
50
- # {'token': '▁Hello', 'textRange': {'start': 0, 'end': 5}}
51
- tokens: List[TokenizationToken] = []
52
- for token_dict in response["tokens"]:
53
- tokens.append(
54
- TokenizationToken(value=token_dict["token"], text_range=from_dict(TextRange, token_dict["textRange"]))
1
+ import threading
2
+ from typing import Any, Dict
3
+
4
+ from helm.common.cache import CacheConfig
5
+ from helm.common.optional_dependencies import handle_module_not_found_error
6
+ from helm.tokenizers.caching_tokenizer import CachingTokenizer
7
+
8
+ try:
9
+ from ai21_tokenizer import Tokenizer as SDKTokenizer
10
+ from ai21_tokenizer.base_tokenizer import BaseTokenizer
11
+ except ModuleNotFoundError as e:
12
+ handle_module_not_found_error(e, ["ai21"])
13
+
14
+
15
+ class AI21LocalTokenizer(CachingTokenizer):
16
+ """AI21 tokenizer using the AI21 Python library."""
17
+
18
+ def __init__(self, cache_config: CacheConfig) -> None:
19
+ super().__init__(cache_config)
20
+ self._tokenizers_lock = threading.Lock()
21
+ self.tokenizers: Dict[str, BaseTokenizer] = {}
22
+
23
+ def _get_tokenizer(self, tokenizer_name: str) -> BaseTokenizer:
24
+ with self._tokenizers_lock:
25
+ if tokenizer_name not in self.tokenizers:
26
+ self.tokenizers[tokenizer_name] = SDKTokenizer.get_tokenizer(tokenizer_name)
27
+ return self.tokenizers[tokenizer_name]
28
+
29
+ def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
30
+ tokenizer_name = request["tokenizer"].split("/")[1]
31
+ tokenizer = self._get_tokenizer(tokenizer_name)
32
+ if request["truncation"]:
33
+ token_ids = tokenizer.encode(
34
+ text=request["text"],
35
+ truncation=request["truncation"],
36
+ max_length=request["max_length"],
37
+ add_special_tokens=False,
55
38
  )
56
- text: str = response["text"]
57
- return TokenizationRequestResult(success=True, cached=cached, tokens=tokens, text=text)
58
-
59
- def decode(self, request: DecodeRequest) -> DecodeRequestResult:
60
- raise NotImplementedError("Not supported")
39
+ else:
40
+ token_ids = tokenizer.encode(
41
+ text=request["text"],
42
+ add_special_tokens=False,
43
+ )
44
+ if request["encode"]:
45
+ return {"tokens": token_ids}
46
+ else:
47
+ return {"tokens": tokenizer.convert_ids_to_tokens(token_ids)}
48
+
49
+ def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
50
+ tokenizer_name = request["tokenizer"].split("/")[1]
51
+ tokenizer = self._get_tokenizer(tokenizer_name)
52
+ return {"text": tokenizer.decode(request["tokens"])}
@@ -1,5 +1,3 @@
1
- import json
2
- import requests
3
1
  from typing import Any, Dict, List, Optional
4
2
 
5
3
  import cohere
@@ -8,84 +6,11 @@ from cohere.manually_maintained.tokenizers import get_hf_tokenizer
8
6
  from helm.common.cache import CacheConfig
9
7
  from helm.common.tokenization_request import (
10
8
  TokenizationRequest,
11
- DecodeRequest,
12
- DecodeRequestResult,
13
9
  TokenizationToken,
14
10
  )
15
- from helm.clients.cohere_utils import get_cohere_url, DEFAULT_COHERE_API_VERSION
16
11
  from helm.tokenizers.caching_tokenizer import CachingTokenizer
17
12
 
18
13
 
19
- class CohereTokenizer(CachingTokenizer):
20
- # From "https://docs.cohere.ai/versioning-reference",
21
- # "this version [2021-11-08] introduces multiple generations, meaning that the generations endpoint will
22
- # now accept a num_generations argument in the JSON and will always return an array of generations"
23
- # Note that the API version is decoupled from the model version.
24
- DEFAULT_API_VERSION: str = "2021-11-08"
25
-
26
- TOKENIZE_ENDPOINT: str = "tokenize"
27
-
28
- # According to https://docs.cohere.ai/tokenize-reference#request, for tokenize, text: "the string to
29
- # be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters."
30
- # However, even sending a request with 60,000 characters sometimes fails, so we set the
31
- # maximum length to 50,000, which is about 8,333 tokens.
32
- # TODO: followed up with Cohere support with an example of a failure case
33
- TOKENIZE_API_MAX_TEXT_LENGTH: int = 50_000
34
-
35
- def __init__(self, api_key: str, cache_config: CacheConfig) -> None:
36
- super().__init__(cache_config)
37
- self.api_key: str = api_key
38
-
39
- def _tokenization_request_to_cache_key(self, request: TokenizationRequest) -> Dict[str, Any]:
40
- # This cache key is used to preserve our existing Cache (10/17/2023)
41
- return {"text": request.text}
42
-
43
- def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
44
- """
45
- Send the request to the Cohere Tokenize API.
46
-
47
- From https://docs.cohere.ai/tokenize-reference, for text "tokenize me! :D", the response will be:
48
-
49
- {
50
- "tokens": [34160, 974, 514, 34, 1420, 69]
51
- "token_strings": ["token", "ize", " me", "!", " :", "D"]
52
- }
53
- """
54
- text: str = request["text"]
55
- assert (
56
- 1 <= len(text) <= CohereTokenizer.TOKENIZE_API_MAX_TEXT_LENGTH
57
- ), f"Invalid text length: {len(text)}. Valid length: [1..{CohereTokenizer.TOKENIZE_API_MAX_TEXT_LENGTH:,d}]"
58
-
59
- response = requests.request(
60
- method="POST",
61
- url=get_cohere_url(CohereTokenizer.TOKENIZE_ENDPOINT),
62
- headers={
63
- "Authorization": f"BEARER {self.api_key}",
64
- "Content-Type": "application/json",
65
- "Cohere-Version": DEFAULT_COHERE_API_VERSION,
66
- },
67
- data=json.dumps(request),
68
- )
69
- result = json.loads(response.text)
70
- assert "message" not in result.keys(), f"Request failed with error {result['message']}"
71
- assert "tokens" in result and "token_strings" in result, f"Invalid response: {result}"
72
- # This output format is used to preserve our existing Cache (10/17/2023)
73
- return result
74
-
75
- def _tokenization_raw_response_to_tokens(
76
- self, response: Dict[str, Any], request: TokenizationRequest
77
- ) -> List[TokenizationToken]:
78
- tokens = response["tokens" if request.encode else "token_strings"]
79
- return [TokenizationToken(token) for token in tokens]
80
-
81
- def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
82
- # Defined for mypy but decode() already raises NotImplementedError
83
- raise NotImplementedError("The Cohere API does not support decoding.")
84
-
85
- def decode(self, request: DecodeRequest) -> DecodeRequestResult:
86
- raise NotImplementedError("The Cohere API does not support decoding.")
87
-
88
-
89
14
  class CohereLocalTokenizer(CachingTokenizer):
90
15
  """Cohere tokenizer using the Cohere Python library."""
91
16
 
@@ -53,7 +53,6 @@ class HuggingFaceTokenizer(CachingTokenizer):
53
53
  # If unspecified, set `use_fast=True` by default.
54
54
  if "use_fast" not in from_pretrained_kwargs:
55
55
  from_pretrained_kwargs["use_fast"] = True
56
- print(from_pretrained_kwargs)
57
56
  try:
58
57
  # From the Hugging Face documentation, "local_files_only(defaults to False) —
59
58
  # Whether or not to only look at local files".
@@ -0,0 +1,48 @@
1
+ import pytest
2
+
3
+ from helm.common.cache import BlackHoleCacheConfig
4
+ from helm.common.tokenization_request import (
5
+ DecodeRequest,
6
+ TokenizationRequest,
7
+ TokenizationToken,
8
+ )
9
+
10
+
11
+ @pytest.mark.models
12
+ def test_tokenize():
13
+ from helm.tokenizers.ai21_tokenizer import AI21LocalTokenizer
14
+
15
+ tokenizer = AI21LocalTokenizer(cache_config=BlackHoleCacheConfig())
16
+ request = TokenizationRequest(tokenizer="ai21/jamba-instruct-tokenizer", text="otter 🦦")
17
+ result = tokenizer.tokenize(request)
18
+ assert result.success
19
+ assert not result.cached
20
+ assert result.tokens == [
21
+ TokenizationToken(token) for token in ["ot", "ter", "▁", "<0xF0>", "<0x9F>", "<0xA6>", "<0xA6>"]
22
+ ]
23
+
24
+
25
+ @pytest.mark.models
26
+ def test_encode():
27
+ from helm.tokenizers.ai21_tokenizer import AI21LocalTokenizer
28
+
29
+ tokenizer = AI21LocalTokenizer(cache_config=BlackHoleCacheConfig())
30
+ request = TokenizationRequest(tokenizer="ai21/jamba-instruct-tokenizer", text="otter 🦦", encode=True)
31
+ result = tokenizer.tokenize(request)
32
+ assert result.success
33
+ assert not result.cached
34
+ assert result.tokens == [TokenizationToken(token) for token in [1860, 1901, 62934, 1784, 1703, 1710, 1710]]
35
+
36
+
37
+ @pytest.mark.models
38
+ def test_decode():
39
+ from helm.tokenizers.ai21_tokenizer import AI21LocalTokenizer
40
+
41
+ tokenizer = AI21LocalTokenizer(cache_config=BlackHoleCacheConfig())
42
+ request = DecodeRequest(
43
+ tokenizer="ai21/jamba-instruct-tokenizer", tokens=[1860, 1901, 62934, 1784, 1703, 1710, 1710]
44
+ )
45
+ result = tokenizer.decode(request)
46
+ assert result.success
47
+ assert not result.cached
48
+ assert result.text == "otter 🦦"