crfm-helm 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (98) hide show
  1. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/METADATA +13 -3
  2. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/RECORD +96 -63
  3. helm/benchmark/adaptation/adapter_spec.py +32 -31
  4. helm/benchmark/annotation/air_bench_annotator.py +64 -0
  5. helm/benchmark/annotation/annotator_factory.py +6 -0
  6. helm/benchmark/annotation/live_qa_annotator.py +84 -0
  7. helm/benchmark/annotation/medication_qa_annotator.py +81 -0
  8. helm/benchmark/augmentations/translate_perturbation.py +1 -0
  9. helm/benchmark/huggingface_registration.py +16 -6
  10. helm/benchmark/metrics/air_bench_metrics.py +56 -0
  11. helm/benchmark/metrics/fin_qa_metrics.py +60 -0
  12. helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
  13. helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
  14. helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
  15. helm/benchmark/metrics/live_qa_metrics.py +23 -0
  16. helm/benchmark/metrics/medication_qa_metrics.py +23 -0
  17. helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
  18. helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
  19. helm/benchmark/metrics/unitxt_metrics.py +20 -10
  20. helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
  21. helm/benchmark/metrics/vision_language/image_metrics.py +29 -71
  22. helm/benchmark/presentation/schema.py +54 -4
  23. helm/benchmark/presentation/test_schema.py +11 -0
  24. helm/benchmark/run.py +16 -2
  25. helm/benchmark/run_expander.py +77 -0
  26. helm/benchmark/run_spec_factory.py +4 -0
  27. helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
  28. helm/benchmark/run_specs/classic_run_specs.py +15 -11
  29. helm/benchmark/run_specs/decodingtrust_run_specs.py +3 -1
  30. helm/benchmark/run_specs/experimental_run_specs.py +33 -0
  31. helm/benchmark/run_specs/finance_run_specs.py +33 -0
  32. helm/benchmark/run_specs/vlm_run_specs.py +168 -45
  33. helm/benchmark/scenarios/air_bench_scenario.py +50 -0
  34. helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
  35. helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
  36. helm/benchmark/scenarios/fin_qa_scenario.py +117 -0
  37. helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
  38. helm/benchmark/scenarios/vision_language/bingo_scenario.py +3 -3
  39. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +13 -2
  40. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +1 -5
  41. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +0 -4
  42. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +4 -2
  43. helm/benchmark/scenarios/vision_language/pairs_scenario.py +6 -5
  44. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +3 -3
  45. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +95 -0
  46. helm/benchmark/static/schema_air_bench.yaml +3149 -0
  47. helm/benchmark/static/schema_classic.yaml +3 -59
  48. helm/benchmark/static/schema_finance.yaml +143 -0
  49. helm/benchmark/static/schema_image2structure.yaml +254 -111
  50. helm/benchmark/static/schema_instruction_following.yaml +3 -52
  51. helm/benchmark/static/schema_lite.yaml +3 -61
  52. helm/benchmark/static/schema_medical.yaml +255 -0
  53. helm/benchmark/static/schema_mmlu.yaml +3 -61
  54. helm/benchmark/static/schema_tables.yaml +200 -0
  55. helm/benchmark/static/schema_thai.yaml +223 -0
  56. helm/benchmark/static/schema_unitxt.yaml +3 -61
  57. helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +294 -293
  58. helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
  59. helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
  60. helm/benchmark/static_build/assets/index-30dbceba.js +10 -0
  61. helm/benchmark/static_build/assets/index-66b02d40.css +1 -0
  62. helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
  63. helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
  64. helm/benchmark/static_build/index.html +2 -2
  65. helm/clients/anthropic_client.py +43 -9
  66. helm/clients/auto_client.py +11 -0
  67. helm/clients/client.py +24 -7
  68. helm/clients/cohere_client.py +98 -3
  69. helm/clients/huggingface_client.py +71 -12
  70. helm/clients/openai_client.py +9 -2
  71. helm/clients/reka_client.py +189 -0
  72. helm/clients/test_client.py +3 -3
  73. helm/clients/test_huggingface_client.py +19 -3
  74. helm/clients/test_together_client.py +72 -2
  75. helm/clients/together_client.py +129 -23
  76. helm/clients/vertexai_client.py +62 -18
  77. helm/clients/vision_language/huggingface_vlm_client.py +1 -0
  78. helm/clients/vision_language/paligemma_client.py +146 -0
  79. helm/clients/vision_language/palmyra_vision_client.py +84 -0
  80. helm/clients/yi_client.py +31 -0
  81. helm/common/critique_request.py +10 -1
  82. helm/common/images_utils.py +19 -0
  83. helm/config/model_deployments.yaml +412 -18
  84. helm/config/model_metadata.yaml +447 -25
  85. helm/config/tokenizer_configs.yaml +93 -1
  86. helm/proxy/critique/model_critique_client.py +32 -4
  87. helm/proxy/services/server_service.py +1 -1
  88. helm/tokenizers/auto_tokenizer.py +1 -1
  89. helm/tokenizers/cohere_tokenizer.py +44 -2
  90. helm/tokenizers/huggingface_tokenizer.py +36 -13
  91. helm/tokenizers/test_cohere_tokenizer.py +39 -0
  92. helm/tokenizers/test_huggingface_tokenizer.py +5 -1
  93. helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
  94. helm/benchmark/static_build/assets/index-878a1094.css +0 -1
  95. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/LICENSE +0 -0
  96. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/WHEEL +0 -0
  97. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/entry_points.txt +0 -0
  98. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/top_level.txt +0 -0
@@ -307,7 +307,7 @@ model_deployments:
307
307
 
308
308
  - name: cohere/command
309
309
  model_name: cohere/command
310
- tokenizer_name: cohere/cohere
310
+ tokenizer_name: cohere/command
311
311
  max_sequence_length: 2019 # TODO: verify this
312
312
  max_request_length: 2020 # TODO: verify this
313
313
  client_spec:
@@ -317,7 +317,7 @@ model_deployments:
317
317
 
318
318
  - name: cohere/command-light
319
319
  model_name: cohere/command-light
320
- tokenizer_name: cohere/cohere
320
+ tokenizer_name: cohere/command-light
321
321
  max_sequence_length: 2019 # TODO: verify this
322
322
  max_request_length: 2020 # TODO: verify this
323
323
  client_spec:
@@ -325,6 +325,25 @@ model_deployments:
325
325
  window_service_spec:
326
326
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
327
327
 
328
+ - name: cohere/command-r
329
+ model_name: cohere/command-r
330
+ tokenizer_name: cohere/command-r
331
+ max_sequence_length: 128000
332
+ max_request_length: 128000
333
+ client_spec:
334
+ class_name: "helm.clients.cohere_client.CohereChatClient"
335
+
336
+ - name: cohere/command-r-plus
337
+ model_name: cohere/command-r-plus
338
+ tokenizer_name: cohere/command-r-plus
339
+ # "We have a known issue where prompts between 112K - 128K in length
340
+ # result in bad generations."
341
+ # Source: https://docs.cohere.com/docs/command-r-plus
342
+ max_sequence_length: 110000
343
+ max_request_length: 110000
344
+ client_spec:
345
+ class_name: "helm.clients.cohere_client.CohereChatClient"
346
+
328
347
  # Craiyon
329
348
 
330
349
  - name: craiyon/dalle-mini
@@ -352,7 +371,7 @@ model_deployments:
352
371
  tokenizer_name: databricks/dbrx-instruct
353
372
  max_sequence_length: 32767
354
373
  client_spec:
355
- class_name: "helm.clients.together_client.TogetherClient"
374
+ class_name: "helm.clients.together_client.TogetherChatClient"
356
375
 
357
376
  # DeepFloyd
358
377
 
@@ -390,7 +409,7 @@ model_deployments:
390
409
  tokenizer_name: deepseek-ai/deepseek-llm-67b-chat
391
410
  max_sequence_length: 4095
392
411
  client_spec:
393
- class_name: "helm.clients.together_client.TogetherClient"
412
+ class_name: "helm.clients.together_client.TogetherChatClient"
394
413
 
395
414
  # Gooseai
396
415
 
@@ -434,6 +453,14 @@ model_deployments:
434
453
  client_spec:
435
454
  class_name: "helm.clients.vertexai_client.VertexAIChatClient"
436
455
 
456
+ - name: google/gemini-1.0-pro-002
457
+ model_name: google/gemini-1.0-pro-002
458
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
459
+ max_sequence_length: 30720
460
+ max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
461
+ client_spec:
462
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
463
+
437
464
  - name: google/gemini-pro-vision
438
465
  model_name: google/gemini-pro-vision
439
466
  tokenizer_name: openai/cl100k_base
@@ -450,6 +477,22 @@ model_deployments:
450
477
  client_spec:
451
478
  class_name: "helm.clients.vertexai_client.VertexAIChatClient"
452
479
 
480
+ - name: google/gemini-1.5-flash-001
481
+ model_name: google/gemini-1.5-flash-001
482
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
483
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
484
+ # TODO: Max output tokens: 8192
485
+ client_spec:
486
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
487
+
488
+ - name: google/gemini-1.5-pro-001
489
+ model_name: google/gemini-1.5-pro-001
490
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
491
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
492
+ # TODO: Max output tokens: 8192
493
+ client_spec:
494
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
495
+
453
496
  - name: google/gemini-1.5-pro-preview-0409
454
497
  model_name: google/gemini-1.5-pro-preview-0409
455
498
  tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
@@ -458,6 +501,63 @@ model_deployments:
458
501
  client_spec:
459
502
  class_name: "helm.clients.vertexai_client.VertexAIChatClient"
460
503
 
504
+ - name: google/gemini-1.5-pro-preview-0514
505
+ model_name: google/gemini-1.5-pro-preview-0514
506
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
507
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
508
+ # TODO: Max output tokens: 8192
509
+ client_spec:
510
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
511
+
512
+ - name: google/gemini-1.5-flash-preview-0514
513
+ model_name: google/gemini-1.5-flash-preview-0514
514
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
515
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
516
+ # TODO: Max output tokens: 8192
517
+ client_spec:
518
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
519
+
520
+ ## Gemini with different safety settings
521
+ - name: google/gemini-1.5-pro-001-safety-default
522
+ model_name: google/gemini-1.5-pro-001-safety-default
523
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
524
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
525
+ # TODO: Max output tokens: 8192
526
+ client_spec:
527
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
528
+ args:
529
+ safety_settings_preset: default
530
+
531
+ - name: google/gemini-1.5-pro-001-safety-block-none
532
+ model_name: google/gemini-1.5-pro-001-safety-block-none
533
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
534
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
535
+ # TODO: Max output tokens: 8192
536
+ client_spec:
537
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
538
+ args:
539
+ safety_settings_preset: block_none
540
+
541
+ - name: google/gemini-1.5-flash-001-safety-default
542
+ model_name: google/gemini-1.5-flash-001-safety-default
543
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
544
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
545
+ # TODO: Max output tokens: 8192
546
+ client_spec:
547
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
548
+ args:
549
+ safety_settings_preset: default
550
+
551
+ - name: google/gemini-1.5-flash-001-safety-block-none
552
+ model_name: google/gemini-1.5-flash-001-safety-block-none
553
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
554
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
555
+ # TODO: Max output tokens: 8192
556
+ client_spec:
557
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
558
+ args:
559
+ safety_settings_preset: block_none
560
+
461
561
  ## Gemma
462
562
  - name: together/gemma-2b
463
563
  model_name: google/gemma-2b
@@ -487,6 +587,42 @@ model_deployments:
487
587
  client_spec:
488
588
  class_name: "helm.clients.together_client.TogetherClient"
489
589
 
590
+ ## MedLM
591
+ - name: google/medlm-medium
592
+ model_name: google/medlm-medium
593
+ tokenizer_name: google/text-bison@001
594
+ max_sequence_length: 6000 # Officially 8192
595
+ max_sequence_and_generated_tokens_length: 7000 # Officially 9216
596
+ client_spec:
597
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
598
+ window_service_spec:
599
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
600
+
601
+ - name: google/medlm-large
602
+ model_name: google/medlm-large
603
+ tokenizer_name: google/text-bison@001
604
+ max_sequence_length: 6000 # Officially 8192
605
+ max_sequence_and_generated_tokens_length: 7000 # Officially 9216
606
+ client_spec:
607
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
608
+ window_service_spec:
609
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
610
+
611
+ ## PaliGemma
612
+ - name: google/paligemma-3b-mix-224
613
+ model_name: google/paligemma-3b-mix-224
614
+ tokenizer_name: google/gemma-2b
615
+ max_sequence_length: 7167
616
+ client_spec:
617
+ class_name: "helm.clients.vision_language.paligemma_client.PaliGemmaClient"
618
+
619
+ - name: google/paligemma-3b-mix-448
620
+ model_name: google/paligemma-3b-mix-448
621
+ tokenizer_name: google/gemma-2b
622
+ max_sequence_length: 7167
623
+ client_spec:
624
+ class_name: "helm.clients.vision_language.paligemma_client.PaliGemmaClient"
625
+
490
626
  ## PaLM 2
491
627
  - name: google/text-bison@001
492
628
  model_name: google/text-bison@001
@@ -504,7 +640,7 @@ model_deployments:
504
640
  max_sequence_length: 6000 # Officially 8192
505
641
  max_sequence_and_generated_tokens_length: 9216
506
642
  client_spec:
507
- class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
643
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
508
644
  window_service_spec:
509
645
  class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
510
646
 
@@ -545,7 +681,7 @@ model_deployments:
545
681
  max_sequence_length: 6000 # Officially 6144
546
682
  max_sequence_and_generated_tokens_length: 7168
547
683
  client_spec:
548
- class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
684
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
549
685
  window_service_spec:
550
686
  class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
551
687
 
@@ -561,6 +697,25 @@ model_deployments:
561
697
 
562
698
  # HuggingFace
563
699
 
700
+ ## AI Singapore
701
+ - name: huggingface/sea-lion-7b
702
+ model_name: aisingapore/sea-lion-7b
703
+ tokenizer_name: aisingapore/sea-lion-7b
704
+ max_sequence_length: 2048
705
+ client_spec:
706
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
707
+ args:
708
+ trust_remote_code: true
709
+
710
+ - name: huggingface/sea-lion-7b-instruct
711
+ model_name: aisingapore/sea-lion-7b-instruct
712
+ tokenizer_name: aisingapore/sea-lion-7b
713
+ max_sequence_length: 2048
714
+ client_spec:
715
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
716
+ args:
717
+ trust_remote_code: true
718
+
564
719
  ## Bigcode
565
720
  - name: huggingface/santacoder
566
721
  model_name: bigcode/santacoder
@@ -576,6 +731,15 @@ model_deployments:
576
731
  client_spec:
577
732
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
578
733
 
734
+ ## Biomistral
735
+
736
+ - name: huggingface/biomistral-7b
737
+ model_name: biomistral/biomistral-7b
738
+ tokenizer_name: mistralai/Mistral-7B-v0.1
739
+ max_sequence_length: 32000
740
+ client_spec:
741
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
742
+
579
743
  ## Databricks
580
744
  - name: huggingface/dolly-v2-3b
581
745
  model_name: databricks/dolly-v2-3b
@@ -658,6 +822,15 @@ model_deployments:
658
822
  client_spec:
659
823
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
660
824
 
825
+ ## Meditron
826
+
827
+ - name: huggingface/meditron-7b
828
+ model_name: epfl-llm/meditron-7b
829
+ tokenizer_name: meta-llama/Llama-2-7b-hf
830
+ max_sequence_length: 4094
831
+ client_spec:
832
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
833
+
661
834
  ## Meta
662
835
  - name: huggingface/opt-175b
663
836
  model_name: meta/opt-175b
@@ -738,6 +911,14 @@ model_deployments:
738
911
  client_spec:
739
912
  class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
740
913
 
914
+ ## KAIST AI
915
+ - name: huggingface/prometheus-vision-13b-v1.0-hf
916
+ model_name: kaistai/prometheus-vision-13b-v1.0-hf
917
+ tokenizer_name: hf-internal-testing/llama-tokenizer
918
+ max_sequence_length: 2048
919
+ client_spec:
920
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
921
+
741
922
  ## OpenFlamingo
742
923
  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
743
924
  model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
@@ -811,6 +992,50 @@ model_deployments:
811
992
  args:
812
993
  pretrained_model_name_or_path: openai-community/gpt2
813
994
 
995
+ ## SAIL (SEA AI Lab)
996
+ - name: sail/sailor-7b
997
+ model_name: sail/sailor-7b
998
+ tokenizer_name: qwen/qwen1.5-7b
999
+ max_sequence_length: 32768
1000
+ client_spec:
1001
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1002
+
1003
+ - name: sail/sailor-7b-chat
1004
+ model_name: sail/sailor-7b-chat
1005
+ tokenizer_name: qwen/qwen1.5-7b
1006
+ max_sequence_length: 32768
1007
+ client_spec:
1008
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1009
+
1010
+ - name: sail/sailor-14b
1011
+ model_name: sail/sailor-14b
1012
+ tokenizer_name: qwen/qwen1.5-7b
1013
+ max_sequence_length: 32768
1014
+ client_spec:
1015
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1016
+
1017
+ - name: sail/sailor-14b-chat
1018
+ model_name: sail/sailor-14b-chat
1019
+ tokenizer_name: qwen/qwen1.5-7b
1020
+ max_sequence_length: 32768
1021
+ client_spec:
1022
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1023
+
1024
+ ## SCB10X
1025
+ - name: huggingface/typhoon-v1.5-72b
1026
+ model_name: scb10x/typhoon-v1.5-72b
1027
+ tokenizer_name: qwen/qwen1.5-7b
1028
+ max_sequence_length: 32768
1029
+ client_spec:
1030
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1031
+
1032
+ - name: huggingface/typhoon-v1.5-72b-instruct
1033
+ model_name: scb10x/typhoon-v1.5-72b-instruct
1034
+ tokenizer_name: qwen/qwen1.5-7b
1035
+ max_sequence_length: 32768
1036
+ client_spec:
1037
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1038
+
814
1039
  ## StabilityAI
815
1040
  - name: huggingface/stablelm-base-alpha-3b
816
1041
  model_name: stabilityai/stablelm-base-alpha-3b
@@ -1348,6 +1573,13 @@ model_deployments:
1348
1573
  client_spec:
1349
1574
  class_name: "helm.clients.openai_client.OpenAIClient"
1350
1575
 
1576
+ - name: openai/gpt-4o-2024-05-13
1577
+ model_name: openai/gpt-4o-2024-05-13
1578
+ tokenizer_name: openai/o200k_base
1579
+ max_sequence_length: 128000
1580
+ client_spec:
1581
+ class_name: "helm.clients.openai_client.OpenAIClient"
1582
+
1351
1583
  - name: openai/gpt-4-vision-preview
1352
1584
  model_name: openai/gpt-4-vision-preview
1353
1585
  tokenizer_name: openai/cl100k_base
@@ -1640,18 +1872,36 @@ model_deployments:
1640
1872
  tokenizer_name: meta/llama-3-8b
1641
1873
  max_sequence_length: 8191
1642
1874
  client_spec:
1643
- class_name: "helm.clients.together_client.TogetherClient"
1875
+ class_name: "helm.clients.together_client.TogetherChatClient"
1644
1876
  args:
1645
- together_model: meta-llama/Meta-Llama-3-8B
1877
+ together_model: meta-llama/Llama-3-8b-chat-hf
1646
1878
 
1647
1879
  - name: together/llama-3-70b-chat
1648
1880
  model_name: meta/llama-3-70b-chat
1649
1881
  tokenizer_name: meta/llama-3-8b
1650
1882
  max_sequence_length: 8191
1883
+ client_spec:
1884
+ class_name: "helm.clients.together_client.TogetherChatClient"
1885
+ args:
1886
+ together_model: meta-llama/Llama-3-70b-chat-hf
1887
+
1888
+ - name: together/llama-guard-7b
1889
+ model_name: meta/llama-guard-7b
1890
+ tokenizer_name: meta-llama/Llama-2-7b-hf
1891
+ max_sequence_length: 2047
1651
1892
  client_spec:
1652
1893
  class_name: "helm.clients.together_client.TogetherClient"
1653
1894
  args:
1654
- together_model: meta-llama/Meta-Llama-3-70B
1895
+ together_model: meta-llama/llama-guard-7b
1896
+
1897
+ - name: together/llama-guard-2-8b
1898
+ model_name: meta/llama-guard-2-8b
1899
+ tokenizer_name: meta/llama-3-8b
1900
+ max_sequence_length: 4094
1901
+ client_spec:
1902
+ class_name: "helm.clients.together_client.TogetherClient"
1903
+ args:
1904
+ together_model: meta-llama/llamaguard-2-8b
1655
1905
 
1656
1906
  # 01.AI
1657
1907
  - name: together/yi-6b
@@ -1677,18 +1927,32 @@ model_deployments:
1677
1927
  tokenizer_name: 01-ai/Yi-6B
1678
1928
  max_sequence_length: 4095
1679
1929
  client_spec:
1680
- class_name: "helm.clients.together_client.TogetherClient"
1930
+ class_name: "helm.clients.together_client.TogetherChatClient"
1681
1931
  args:
1682
- together_model: zero-one-ai/Yi-6B
1932
+ together_model: zero-one-ai/Yi-6B-Chat
1683
1933
 
1684
1934
  - name: together/yi-34b-chat
1685
1935
  model_name: 01-ai/yi-34b-chat
1686
1936
  tokenizer_name: 01-ai/Yi-6B
1687
1937
  max_sequence_length: 4095
1688
1938
  client_spec:
1689
- class_name: "helm.clients.together_client.TogetherClient"
1939
+ class_name: "helm.clients.together_client.TogetherChatClient"
1690
1940
  args:
1691
- together_model: zero-one-ai/Yi-34B
1941
+ together_model: zero-one-ai/Yi-34B-Chat
1942
+
1943
+ - name: 01-ai/yi-large
1944
+ model_name: 01-ai/yi-large
1945
+ tokenizer_name: 01-ai/Yi-6B # Actual tokenizer is publicly unavailable, so use a substitute
1946
+ max_sequence_length: 16000
1947
+ client_spec:
1948
+ class_name: "helm.clients.yi_client.YiChatClient"
1949
+
1950
+ - name: 01-ai/yi-large-preview
1951
+ model_name: 01-ai/yi-large-preview
1952
+ tokenizer_name: 01-ai/Yi-6B # Actual tokenizer is publicly unavailable, so use a substitute
1953
+ max_sequence_length: 16000
1954
+ client_spec:
1955
+ class_name: "helm.clients.yi_client.YiChatClient"
1692
1956
 
1693
1957
 
1694
1958
  # Allen Institute for AI
@@ -1711,8 +1975,16 @@ model_deployments:
1711
1975
  tokenizer_name: allenai/olmo-7b
1712
1976
  max_sequence_length: 2047
1713
1977
  client_spec:
1714
- class_name: "helm.clients.together_client.TogetherClient"
1978
+ class_name: "helm.clients.together_client.TogetherChatClient"
1715
1979
 
1980
+ - name: huggingface/olmo-1.7-7b
1981
+ model_name: allenai/olmo-1.7-7b
1982
+ tokenizer_name: allenai/OLMo-1.7-7B-hf
1983
+ max_sequence_length: 2048
1984
+ client_spec:
1985
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1986
+ args:
1987
+ pretrained_model_name_or_path: allenai/OLMo-1.7-7B-hf
1716
1988
 
1717
1989
  ## MistralAI
1718
1990
  - name: together/mistral-7b-v0.1
@@ -1724,6 +1996,27 @@ model_deployments:
1724
1996
  args:
1725
1997
  together_model: mistralai/Mistral-7B-v0.1
1726
1998
 
1999
+ - name: together/mistral-7b-instruct-v0.1
2000
+ model_name: mistralai/mistral-7b-instruct-v0.1
2001
+ tokenizer_name: mistralai/Mistral-7B-Instruct-v0.1
2002
+ max_sequence_length: 4000
2003
+ client_spec:
2004
+ class_name: "helm.clients.together_client.TogetherChatClient"
2005
+
2006
+ - name: together/mistral-7b-instruct-v0.2
2007
+ model_name: mistralai/mistral-7b-instruct-v0.2
2008
+ tokenizer_name: mistralai/Mistral-7B-Instruct-v0.2
2009
+ max_sequence_length: 32000
2010
+ client_spec:
2011
+ class_name: "helm.clients.together_client.TogetherChatClient"
2012
+
2013
+ - name: together/mistral-7b-instruct-v0.3
2014
+ model_name: mistralai/mistral-7b-instruct-v0.3
2015
+ tokenizer_name: mistralai/Mistral-7B-Instruct-v0.3
2016
+ max_sequence_length: 32000
2017
+ client_spec:
2018
+ class_name: "helm.clients.together_client.TogetherChatClient"
2019
+
1727
2020
  - name: together/mixtral-8x7b-32kseqlen
1728
2021
  model_name: mistralai/mixtral-8x7b-32kseqlen
1729
2022
  tokenizer_name: mistralai/Mistral-7B-v0.1
@@ -1738,7 +2031,7 @@ model_deployments:
1738
2031
  tokenizer_name: mistralai/Mistral-7B-v0.1
1739
2032
  max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1740
2033
  client_spec:
1741
- class_name: "helm.clients.together_client.TogetherClient"
2034
+ class_name: "helm.clients.together_client.TogetherChatClient"
1742
2035
 
1743
2036
  - name: together/mixtral-8x22b
1744
2037
  model_name: mistralai/mixtral-8x22b
@@ -1752,7 +2045,7 @@ model_deployments:
1752
2045
  tokenizer_name: mistralai/Mistral-7B-v0.1
1753
2046
  max_sequence_length: 65535
1754
2047
  client_spec:
1755
- class_name: "helm.clients.together_client.TogetherClient"
2048
+ class_name: "helm.clients.together_client.TogetherChatClient"
1756
2049
 
1757
2050
 
1758
2051
  ## Snowflake
@@ -1978,12 +2271,21 @@ model_deployments:
1978
2271
  client_spec:
1979
2272
  class_name: "helm.clients.palmyra_client.PalmyraClient"
1980
2273
 
2274
+ - name: writer/palmyra-vision-003
2275
+ model_name: writer/palmyra-vision-003
2276
+ tokenizer_name: writer/gpt2
2277
+ max_sequence_length: 2048
2278
+ max_sequence_and_generated_tokens_length: 2048
2279
+ client_spec:
2280
+ class_name: "helm.clients.vision_language.palmyra_vision_client.PalmyraVisionClient"
2281
+
2282
+
1981
2283
  # Qwen
1982
2284
 
1983
2285
  - name: together/qwen-7b
1984
2286
  model_name: qwen/qwen-7b
1985
2287
  tokenizer_name: qwen/qwen-7b
1986
- max_sequence_length: 8191
2288
+ max_sequence_length: 32767
1987
2289
  client_spec:
1988
2290
  class_name: "helm.clients.together_client.TogetherClient"
1989
2291
  args:
@@ -2019,12 +2321,54 @@ model_deployments:
2019
2321
  - name: together/qwen1.5-72b
2020
2322
  model_name: qwen/qwen1.5-72b
2021
2323
  tokenizer_name: qwen/qwen1.5-7b
2022
- max_sequence_length: 4095
2324
+ max_sequence_length: 32767
2023
2325
  client_spec:
2024
2326
  class_name: "helm.clients.together_client.TogetherClient"
2025
2327
  args:
2026
2328
  together_model: Qwen/Qwen1.5-72B
2027
2329
 
2330
+ - name: together/qwen1.5-7b-chat
2331
+ model_name: qwen/qwen1.5-7b-chat
2332
+ tokenizer_name: qwen/qwen1.5-7b
2333
+ max_sequence_length: 32767
2334
+ client_spec:
2335
+ class_name: "helm.clients.together_client.TogetherChatClient"
2336
+
2337
+ - name: together/qwen1.5-14b-chat
2338
+ model_name: qwen/qwen1.5-14b-chat
2339
+ tokenizer_name: qwen/qwen1.5-7b
2340
+ max_sequence_length: 32767
2341
+ client_spec:
2342
+ class_name: "helm.clients.together_client.TogetherChatClient"
2343
+
2344
+ - name: together/qwen1.5-32b-chat
2345
+ model_name: qwen/qwen1.5-32b-chat
2346
+ tokenizer_name: qwen/qwen1.5-7b
2347
+ max_sequence_length: 32767
2348
+ client_spec:
2349
+ class_name: "helm.clients.together_client.TogetherChatClient"
2350
+
2351
+ - name: together/qwen1.5-72b-chat
2352
+ model_name: qwen/qwen1.5-72b-chat
2353
+ tokenizer_name: qwen/qwen1.5-7b
2354
+ max_sequence_length: 32767
2355
+ client_spec:
2356
+ class_name: "helm.clients.together_client.TogetherChatClient"
2357
+
2358
+ - name: together/qwen1.5-110b-chat
2359
+ model_name: qwen/qwen1.5-110b-chat
2360
+ tokenizer_name: qwen/qwen1.5-7b
2361
+ max_sequence_length: 32767
2362
+ client_spec:
2363
+ class_name: "helm.clients.together_client.TogetherChatClient"
2364
+
2365
+ - name: together/qwen2-72b-instruct
2366
+ model_name: qwen/qwen2-72b-instruct
2367
+ tokenizer_name: qwen/qwen2-72b-instruct
2368
+ max_sequence_length: 128000
2369
+ client_spec:
2370
+ class_name: "helm.clients.together_client.TogetherChatClient"
2371
+
2028
2372
  - name: huggingface/qwen-vl
2029
2373
  model_name: qwen/qwen-vl
2030
2374
  tokenizer_name: qwen/qwen-vl
@@ -2038,3 +2382,53 @@ model_deployments:
2038
2382
  max_sequence_length: 8191
2039
2383
  client_spec:
2040
2384
  class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
2385
+
2386
+ # Reka
2387
+ - name: reka/reka-core
2388
+ model_name: reka/reka-core
2389
+ tokenizer_name: openai/cl100k_base
2390
+ max_sequence_length: 128000
2391
+ client_spec:
2392
+ class_name: "helm.clients.reka_client.RekaClient"
2393
+
2394
+ - name: reka/reka-core-20240415
2395
+ model_name: reka/reka-core-20240415
2396
+ tokenizer_name: openai/cl100k_base
2397
+ max_sequence_length: 128000
2398
+ client_spec:
2399
+ class_name: "helm.clients.reka_client.RekaClient"
2400
+
2401
+ - name: reka/reka-core-20240501
2402
+ model_name: reka/reka-core-20240501
2403
+ tokenizer_name: openai/cl100k_base
2404
+ max_sequence_length: 128000
2405
+ client_spec:
2406
+ class_name: "helm.clients.reka_client.RekaClient"
2407
+
2408
+ - name: reka/reka-flash
2409
+ model_name: reka/reka-flash
2410
+ tokenizer_name: openai/cl100k_base
2411
+ max_sequence_length: 128000
2412
+ client_spec:
2413
+ class_name: "helm.clients.reka_client.RekaClient"
2414
+
2415
+ - name: reka/reka-flash-20240226
2416
+ model_name: reka/reka-flash-20240226
2417
+ tokenizer_name: openai/cl100k_base
2418
+ max_sequence_length: 128000
2419
+ client_spec:
2420
+ class_name: "helm.clients.reka_client.RekaClient"
2421
+
2422
+ - name: reka/reka-edge
2423
+ model_name: reka/reka-edge
2424
+ tokenizer_name: openai/cl100k_base
2425
+ max_sequence_length: 64000
2426
+ client_spec:
2427
+ class_name: "helm.clients.reka_client.RekaClient"
2428
+
2429
+ - name: reka/reka-edge-20240208
2430
+ model_name: reka/reka-edge-20240208
2431
+ tokenizer_name: openai/cl100k_base
2432
+ max_sequence_length: 64000
2433
+ client_spec:
2434
+ class_name: "helm.clients.reka_client.RekaClient"