crfm-helm 0.5.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (665) hide show
  1. {crfm_helm-0.5.0/src/crfm_helm.egg-info → crfm_helm-0.5.1}/PKG-INFO +7 -3
  2. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/setup.cfg +8 -3
  3. {crfm_helm-0.5.0 → crfm_helm-0.5.1/src/crfm_helm.egg-info}/PKG-INFO +7 -3
  4. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/crfm_helm.egg-info/SOURCES.txt +15 -3
  5. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/crfm_helm.egg-info/requires.txt +7 -2
  6. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  7. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
  8. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
  9. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +1 -1
  10. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/perturbation.py +17 -1
  11. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/test_perturbation.py +30 -0
  12. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/efficiency_metrics.py +9 -2
  13. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/evaluate_reference_metrics.py +16 -0
  14. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/vision_language/image_metrics.py +142 -17
  15. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/model_metadata_registry.py +5 -1
  16. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_expander.py +35 -63
  17. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_spec_factory.py +11 -10
  18. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/vlm_run_specs.py +294 -38
  19. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/legalbench_scenario.py +6 -2
  20. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/math_scenario.py +1 -1
  21. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
  22. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
  23. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
  24. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
  25. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +4 -2
  26. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +1 -1
  27. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +1 -1
  28. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
  29. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
  30. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
  31. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
  32. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
  33. crfm_helm-0.5.1/src/helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
  34. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +2 -2
  35. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/vqa_scenario.py +4 -2
  36. crfm_helm-0.5.1/src/helm/benchmark/static/schema_image2structure.yaml +304 -0
  37. crfm_helm-0.5.1/src/helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
  38. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/schema_vlm.yaml +257 -10
  39. crfm_helm-0.5.1/src/helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
  40. crfm_helm-0.5.1/src/helm/benchmark/static_build/assets/index-878a1094.css +1 -0
  41. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/index.html +2 -2
  42. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/anthropic_client.py +36 -6
  43. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/openai_client.py +2 -3
  44. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/together_client.py +93 -2
  45. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vertexai_client.py +59 -50
  46. crfm_helm-0.5.1/src/helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
  47. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/huggingface_vlm_client.py +11 -4
  48. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/idefics_client.py +2 -2
  49. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/images_utils.py +10 -3
  50. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/config/model_deployments.yaml +100 -2
  51. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/config/model_metadata.yaml +136 -31
  52. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/config/tokenizer_configs.yaml +7 -0
  53. crfm_helm-0.5.0/src/helm/benchmark/static_build/assets/index-5088afcb.css +0 -1
  54. crfm_helm-0.5.0/src/helm/benchmark/static_build/assets/index-d839df55.js +0 -9
  55. crfm_helm-0.5.0/src/helm/benchmark/test_model_deployment_definition.py +0 -90
  56. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/LICENSE +0 -0
  57. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/MANIFEST.in +0 -0
  58. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/README.md +0 -0
  59. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/docs/tutorial.md +0 -0
  60. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/pyproject.toml +0 -0
  61. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/crfm_helm.egg-info/dependency_links.txt +0 -0
  62. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/crfm_helm.egg-info/entry_points.txt +0 -0
  63. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/crfm_helm.egg-info/not-zip-safe +0 -0
  64. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/crfm_helm.egg-info/top_level.txt +0 -0
  65. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/__init__.py +0 -0
  66. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/__init__.py +0 -0
  67. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/__init__.py +0 -0
  68. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapter_spec.py +0 -0
  69. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/__init__.py +0 -0
  70. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/adapter.py +0 -0
  71. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/adapter_factory.py +0 -0
  72. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +0 -0
  73. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/generation_adapter.py +0 -0
  74. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +0 -0
  75. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/language_modeling_adapter.py +0 -0
  76. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multimodal/__init__.py +0 -0
  77. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +0 -0
  78. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +0 -0
  79. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +0 -0
  80. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +0 -0
  81. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +0 -0
  82. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +0 -0
  83. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/test_adapter.py +0 -0
  84. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/test_generation_adapter.py +0 -0
  85. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +0 -0
  86. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +0 -0
  87. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/common_adapter_specs.py +0 -0
  88. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/prompt.py +0 -0
  89. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/request_state.py +0 -0
  90. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/adaptation/scenario_state.py +0 -0
  91. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/__init__.py +0 -0
  92. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/annotator.py +0 -0
  93. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/annotator_factory.py +0 -0
  94. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/image2structure/__init__.py +0 -0
  95. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/image2structure/image_compiler_annotator.py +0 -0
  96. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +0 -0
  97. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +0 -0
  98. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/test_annotator_factory.py +0 -0
  99. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation/test_dummy_annotator.py +0 -0
  100. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/annotation_executor.py +0 -0
  101. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/__init__.py +0 -0
  102. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/cleva_perturbation.py +0 -0
  103. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/contraction_expansion_perturbation.py +0 -0
  104. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -0
  105. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/correct_to_misspelling.json +0 -0
  106. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/data_augmenter.py +0 -0
  107. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/dialect_perturbation.py +0 -0
  108. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/extra_space_perturbation.py +0 -0
  109. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/filler_words_perturbation.py +0 -0
  110. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/gender_perturbation.py +0 -0
  111. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/lowercase_perturbation.py +0 -0
  112. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/mild_mix_perturbation.py +0 -0
  113. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/misspelling_perturbation.py +0 -0
  114. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/person_name_perturbation.py +0 -0
  115. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/perturbation_description.py +0 -0
  116. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/space_perturbation.py +0 -0
  117. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/suffix_perturbation.py +0 -0
  118. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/synonym_perturbation.py +0 -0
  119. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/translate_perturbation.py +0 -0
  120. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/augmentations/typos_perturbation.py +0 -0
  121. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/config_registry.py +0 -0
  122. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/data_overlap/__init__.py +0 -0
  123. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/data_overlap/data_overlap_spec.py +0 -0
  124. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/data_overlap/export_scenario_text.py +0 -0
  125. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/data_overlap/light_scenario.py +0 -0
  126. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/data_preprocessor.py +0 -0
  127. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/efficiency_data/inference_denoised_runtimes.json +0 -0
  128. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/efficiency_data/inference_idealized_runtimes.json +0 -0
  129. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/efficiency_data/training_efficiency.json +0 -0
  130. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/executor.py +0 -0
  131. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/huggingface_registration.py +0 -0
  132. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/__init__.py +0 -0
  133. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/basic_metrics.py +0 -0
  134. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/bbq_metrics.py +0 -0
  135. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/bias_metrics.py +0 -0
  136. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/bias_word_lists.py +0 -0
  137. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/classification_metrics.py +0 -0
  138. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/cleva_accuracy_metrics.py +0 -0
  139. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/cleva_harms_metrics.py +0 -0
  140. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/cleva_metrics_helper.py +0 -0
  141. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/code_metrics.py +0 -0
  142. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/code_metrics_helper.py +0 -0
  143. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/common_metric_specs.py +0 -0
  144. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/copyright_metrics.py +0 -0
  145. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/decodingtrust_fairness_metrics.py +0 -0
  146. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +0 -0
  147. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/decodingtrust_privacy_metrics.py +0 -0
  148. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +0 -0
  149. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/disinformation_metrics.py +0 -0
  150. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/dry_run_metrics.py +0 -0
  151. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/evaluate_instances_metric.py +0 -0
  152. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/__init__.py +0 -0
  153. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/aesthetics_metrics.py +0 -0
  154. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/aesthetics_scorer.py +0 -0
  155. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/clip_score_metrics.py +0 -0
  156. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +0 -0
  157. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/detection_metrics.py +0 -0
  158. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/detectors/__init__.py +0 -0
  159. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/detectors/base_detector.py +0 -0
  160. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/detectors/vitdet.py +0 -0
  161. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/efficiency_metrics.py +0 -0
  162. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/fidelity_metrics.py +0 -0
  163. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  164. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +0 -0
  165. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +0 -0
  166. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +0 -0
  167. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/gender_metrics.py +0 -0
  168. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/image_critique_metrics.py +0 -0
  169. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/lpips_metrics.py +0 -0
  170. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +0 -0
  171. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/nsfw_detector.py +0 -0
  172. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/nsfw_metrics.py +0 -0
  173. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/nudity_metrics.py +0 -0
  174. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +0 -0
  175. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/psnr_metrics.py +0 -0
  176. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  177. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +0 -0
  178. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/q16/test_q16.py +0 -0
  179. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +0 -0
  180. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/skin_tone_metrics.py +0 -0
  181. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/uiqi_metrics.py +0 -0
  182. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  183. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +0 -0
  184. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +0 -0
  185. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/image_generation/watermark_metrics.py +0 -0
  186. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/instruction_following_critique_metrics.py +0 -0
  187. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/language_modeling_metrics.py +0 -0
  188. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/machine_translation_metrics.py +0 -0
  189. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/metric.py +0 -0
  190. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/metric_name.py +0 -0
  191. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/metric_service.py +0 -0
  192. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/numeracy_metrics.py +0 -0
  193. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/paraphrase_generation_metrics.py +0 -0
  194. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/ranking_metrics.py +0 -0
  195. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/reference_metric.py +0 -0
  196. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/statistic.py +0 -0
  197. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/summac/__init__.py +0 -0
  198. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/summac/model_summac.py +0 -0
  199. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/summac/utils_misc.py +0 -0
  200. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/summarization_critique_metrics.py +0 -0
  201. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/summarization_metrics.py +0 -0
  202. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/test_bias_metrics.py +0 -0
  203. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/test_classification_metrics.py +0 -0
  204. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/test_disinformation_metrics.py +0 -0
  205. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/test_evaluate_reference_metrics.py +0 -0
  206. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/test_metric.py +0 -0
  207. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/test_numeracy_metrics.py +0 -0
  208. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/test_statistic.py +0 -0
  209. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/__init__.py +0 -0
  210. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +0 -0
  211. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +0 -0
  212. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +0 -0
  213. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/free_token_cost_estimator.py +0 -0
  214. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +0 -0
  215. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +0 -0
  216. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +0 -0
  217. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +0 -0
  218. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/tokens/token_cost_estimator.py +0 -0
  219. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/toxicity_metrics.py +0 -0
  220. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/toxicity_utils.py +0 -0
  221. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/unitxt_metrics.py +0 -0
  222. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/vision_language/__init__.py +0 -0
  223. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/vision_language/emd_utils.py +0 -0
  224. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/metrics/vision_language/image_utils.py +0 -0
  225. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/model_deployment_registry.py +0 -0
  226. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/multi_gpu_runner.py +0 -0
  227. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/__init__.py +0 -0
  228. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/contamination.py +0 -0
  229. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/create_plots.py +0 -0
  230. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/run_display.py +0 -0
  231. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/run_entry.py +0 -0
  232. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/schema.py +0 -0
  233. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/summarize.py +0 -0
  234. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/table.py +0 -0
  235. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/test_contamination.py +0 -0
  236. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/test_create_plots.py +0 -0
  237. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/test_run_entry.py +0 -0
  238. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/presentation/test_summarize.py +0 -0
  239. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run.py +0 -0
  240. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_spec.py +0 -0
  241. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/__init__.py +0 -0
  242. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/classic_run_specs.py +0 -0
  243. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/cleva_run_specs.py +0 -0
  244. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/decodingtrust_run_specs.py +0 -0
  245. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/heim_run_specs.py +0 -0
  246. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/instruction_following_run_specs.py +0 -0
  247. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/lite_run_specs.py +0 -0
  248. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/simple_run_specs.py +0 -0
  249. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/run_specs/unitxt_run_specs.py +0 -0
  250. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/runner.py +0 -0
  251. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/runner_config_registry.py +0 -0
  252. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/__init__.py +0 -0
  253. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +0 -0
  254. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/babi_qa_scenario.py +0 -0
  255. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/bbq_scenario.py +0 -0
  256. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/big_bench_scenario.py +0 -0
  257. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/blimp_scenario.py +0 -0
  258. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/bold_scenario.py +0 -0
  259. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/boolq_scenario.py +0 -0
  260. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/civil_comments_scenario.py +0 -0
  261. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/cleva_scenario.py +0 -0
  262. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/code_scenario.py +0 -0
  263. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/code_scenario_apps_pinned_file_order.py +0 -0
  264. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/code_scenario_helper.py +0 -0
  265. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/commonsense_scenario.py +0 -0
  266. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/copyright_scenario.py +0 -0
  267. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/covid_dialog_scenario.py +0 -0
  268. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/custom_mcqa_scenario.py +0 -0
  269. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +0 -0
  270. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +0 -0
  271. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +0 -0
  272. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +0 -0
  273. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +0 -0
  274. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +0 -0
  275. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +0 -0
  276. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +0 -0
  277. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/dialogue_scenarios.py +0 -0
  278. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/disinformation_scenario.py +0 -0
  279. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/dyck_language_scenario.py +0 -0
  280. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/entity_data_imputation_scenario.py +0 -0
  281. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/entity_matching_scenario.py +0 -0
  282. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py +0 -0
  283. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/grammar.py +0 -0
  284. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/grammar_scenario.py +0 -0
  285. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/gsm_scenario.py +0 -0
  286. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/ice_scenario.py +0 -0
  287. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/ice_scenario_pinned_file_order.py +0 -0
  288. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  289. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +0 -0
  290. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/cub200_scenario.py +0 -0
  291. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +0 -0
  292. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +0 -0
  293. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/detection_scenario.py +0 -0
  294. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +0 -0
  295. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/i2p_scenario.py +0 -0
  296. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/landing_page_scenario.py +0 -0
  297. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/logos_scenario.py +0 -0
  298. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +0 -0
  299. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +0 -0
  300. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/mscoco_scenario.py +0 -0
  301. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +0 -0
  302. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +0 -0
  303. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/radiology_scenario.py +0 -0
  304. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +0 -0
  305. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +0 -0
  306. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/image_generation/winoground_scenario.py +0 -0
  307. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/imdb_scenario.py +0 -0
  308. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/imdb_scenario_pinned_file_order.py +0 -0
  309. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +0 -0
  310. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/koala_scenario.py +0 -0
  311. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/legal_summarization_scenario.py +0 -0
  312. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/legal_support_scenario.py +0 -0
  313. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/lex_glue_scenario.py +0 -0
  314. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/lextreme_scenario.py +0 -0
  315. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/live_qa_scenario.py +0 -0
  316. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/lm_entry_scenario.py +0 -0
  317. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/lsat_qa_scenario.py +0 -0
  318. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/me_q_sum_scenario.py +0 -0
  319. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/med_dialog_scenario.py +0 -0
  320. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/med_mcqa_scenario.py +0 -0
  321. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +0 -0
  322. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/med_qa_scenario.py +0 -0
  323. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/medication_qa_scenario.py +0 -0
  324. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/mmlu_scenario.py +0 -0
  325. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/msmarco_scenario.py +0 -0
  326. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/narrativeqa_scenario.py +0 -0
  327. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/natural_qa_scenario.py +0 -0
  328. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/newsqa_scenario.py +0 -0
  329. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/numeracy_scenario.py +0 -0
  330. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/open_assistant_scenario.py +0 -0
  331. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/opinions_qa_scenario.py +0 -0
  332. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/pubmed_qa_scenario.py +0 -0
  333. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/quac_scenario.py +0 -0
  334. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/raft_scenario.py +0 -0
  335. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +0 -0
  336. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/scenario.py +0 -0
  337. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/self_instruct_scenario.py +0 -0
  338. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/simple_scenarios.py +0 -0
  339. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/summarization_scenario.py +0 -0
  340. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/synthetic_efficiency_scenario.py +0 -0
  341. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +0 -0
  342. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/synthetic_reasoning_scenario.py +0 -0
  343. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/test_grammar.py +0 -0
  344. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/test_math_scenario.py +0 -0
  345. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/test_scenario.py +0 -0
  346. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/test_simple_scenarios.py +0 -0
  347. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/thai_exam_scenario.py +0 -0
  348. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/the_pile_scenario.py +0 -0
  349. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/truthful_qa_scenario.py +0 -0
  350. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/twitter_aae_scenario.py +0 -0
  351. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/unitxt_scenario.py +0 -0
  352. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/verifiability_judgment_scenario.py +0 -0
  353. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vicuna_scenario.py +0 -0
  354. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/__init__.py +0 -0
  355. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/bingo_scenario.py +0 -0
  356. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +0 -0
  357. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  358. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +0 -0
  359. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +0 -0
  360. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +0 -0
  361. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +0 -0
  362. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  363. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +0 -0
  364. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +0 -0
  365. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +0 -0
  366. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/mementos_scenario.py +0 -0
  367. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/mme_scenario.py +0 -0
  368. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/mmmu_scenario.py +0 -0
  369. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +0 -0
  370. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/pope_scenario.py +0 -0
  371. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +0 -0
  372. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/vision_language/unicorn_scenario.py +0 -0
  373. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/wikifact_scenario.py +0 -0
  374. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/wikitext_103_scenario.py +0 -0
  375. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/scenarios/wmt_14_scenario.py +0 -0
  376. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/server.py +0 -0
  377. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/slurm_jobs.py +0 -0
  378. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/slurm_runner.py +0 -0
  379. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/benchmarking.css +0 -0
  380. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/benchmarking.js +0 -0
  381. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/config.js +0 -0
  382. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/contamination.yaml +0 -0
  383. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/general.js +0 -0
  384. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/crfm-logo.png +0 -0
  385. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/helm-logo-simple.png +0 -0
  386. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/helm-logo.png +0 -0
  387. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/language-model-helm.png +0 -0
  388. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/ai21.png +0 -0
  389. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/anthropic.png +0 -0
  390. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/bigscience.png +0 -0
  391. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/cohere.png +0 -0
  392. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  393. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/google.png +0 -0
  394. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/meta.png +0 -0
  395. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/microsoft.png +0 -0
  396. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/nvidia.png +0 -0
  397. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/openai.png +0 -0
  398. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/together.png +0 -0
  399. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  400. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/organizations/yandex.png +0 -0
  401. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  402. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  403. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/index.html +0 -0
  404. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/info-icon.png +0 -0
  405. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/json-urls.js +0 -0
  406. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/plot-captions.js +0 -0
  407. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/schema_classic.yaml +0 -0
  408. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/schema_instruction_following.yaml +0 -0
  409. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/schema_lite.yaml +0 -0
  410. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/schema_mmlu.yaml +0 -0
  411. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/schema_unitxt.yaml +0 -0
  412. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static/utils.js +0 -0
  413. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  414. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  415. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  416. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  417. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  418. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  419. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  420. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  421. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  422. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  423. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  424. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  425. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  426. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  427. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  428. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  429. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  430. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/react-d4a0b69b.js +0 -0
  431. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/recharts-6d337683.js +0 -0
  432. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  433. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  434. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/tremor-54a99cc4.js +0 -0
  435. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  436. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  437. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  438. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  439. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/static_build/config.js +0 -0
  440. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/test_data_preprocessor.py +0 -0
  441. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/test_run_expander.py +0 -0
  442. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/tokenizer_config_registry.py +0 -0
  443. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/__init__.py +0 -0
  444. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/ai21_window_service.py +0 -0
  445. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/cohere_window_service.py +0 -0
  446. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/default_window_service.py +0 -0
  447. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/encoder_decoder_window_service.py +0 -0
  448. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/ice_window_service.py +0 -0
  449. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/image_generation/__init__.py +0 -0
  450. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/image_generation/clip_window_service.py +0 -0
  451. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/image_generation/lexica_search_window_service.py +0 -0
  452. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +0 -0
  453. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/image_generation/test_clip_window_service.py +0 -0
  454. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +0 -0
  455. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/local_window_service.py +0 -0
  456. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/no_decoding_window_service.py +0 -0
  457. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_ai21_window_service.py +0 -0
  458. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_anthropic_window_service.py +0 -0
  459. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_bloom_window_service.py +0 -0
  460. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_cohere_window_service.py +0 -0
  461. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -0
  462. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_flan_t5_window_service.py +0 -0
  463. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_gpt2_window_service.py +0 -0
  464. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_gpt4_window_service.py +0 -0
  465. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_gptj_window_service.py +0 -0
  466. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_gptneox_window_service.py +0 -0
  467. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_ice_window_service.py +0 -0
  468. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_openai_window_service.py +0 -0
  469. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_opt_window_service.py +0 -0
  470. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_palmyra_window_service.py +0 -0
  471. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_t0pp_window_service.py +0 -0
  472. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_t511b_window_service.py +0 -0
  473. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_ul2_window_service.py +0 -0
  474. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_utils.py +0 -0
  475. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/test_yalm_window_service.py +0 -0
  476. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/tokenizer_service.py +0 -0
  477. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/window_service.py +0 -0
  478. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/window_service_factory.py +0 -0
  479. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/benchmark/window_services/yalm_window_service.py +0 -0
  480. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/__init__.py +0 -0
  481. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/ai21_client.py +0 -0
  482. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/ai21_utils.py +0 -0
  483. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/aleph_alpha_client.py +0 -0
  484. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/auto_client.py +0 -0
  485. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/bedrock_client.py +0 -0
  486. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/bedrock_utils.py +0 -0
  487. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/client.py +0 -0
  488. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/clip_score_client.py +0 -0
  489. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/clip_scorers/__init__.py +0 -0
  490. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/clip_scorers/base_clip_scorer.py +0 -0
  491. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/clip_scorers/clip_scorer.py +0 -0
  492. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/clip_scorers/multilingual_clip_scorer.py +0 -0
  493. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/cohere_client.py +0 -0
  494. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/cohere_utils.py +0 -0
  495. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/gcs_client.py +0 -0
  496. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/google_client.py +0 -0
  497. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/google_translate_client.py +0 -0
  498. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/http_model_client.py +0 -0
  499. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/huggingface_client.py +0 -0
  500. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/__init__.py +0 -0
  501. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/adobe_vision_client.py +0 -0
  502. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/aleph_alpha_image_generation_client.py +0 -0
  503. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/__init__.py +0 -0
  504. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/coglm_strategy.py +0 -0
  505. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/coglm_utils.py +0 -0
  506. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +0 -0
  507. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +0 -0
  508. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +0 -0
  509. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +0 -0
  510. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +0 -0
  511. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +0 -0
  512. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +0 -0
  513. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +0 -0
  514. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/cogview2_client.py +0 -0
  515. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle2_client.py +0 -0
  516. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle3_client.py +0 -0
  517. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/__init__.py +0 -0
  518. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/data.py +0 -0
  519. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/model/__init__.py +0 -0
  520. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/model/configuration.py +0 -0
  521. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/model/modeling.py +0 -0
  522. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/model/partitions.py +0 -0
  523. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/model/processor.py +0 -0
  524. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/model/text.py +0 -0
  525. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/model/tokenizer.py +0 -0
  526. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/model/utils.py +0 -0
  527. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +0 -0
  528. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +0 -0
  529. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +0 -0
  530. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +0 -0
  531. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/dalle_mini_client.py +0 -0
  532. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/deep_floyd_client.py +0 -0
  533. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/huggingface_diffusers_client.py +0 -0
  534. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/image_generation_client_utils.py +0 -0
  535. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/lexica_client.py +0 -0
  536. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/__init__.py +0 -0
  537. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/models/__init__.py +0 -0
  538. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  539. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/models/stage1/layers.py +0 -0
  540. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/models/stage1/vqgan.py +0 -0
  541. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  542. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/models/stage2/layers.py +0 -0
  543. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/models/stage2/transformer.py +0 -0
  544. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/models/tokenizer.py +0 -0
  545. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/utils/__init__.py +0 -0
  546. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/utils/config.py +0 -0
  547. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/utils/sampling.py +0 -0
  548. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle/utils/utils.py +0 -0
  549. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/mindalle_client.py +0 -0
  550. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/nudity_check_client.py +0 -0
  551. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/image_generation/together_image_generation_client.py +0 -0
  552. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/lit_gpt_client.py +0 -0
  553. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/lit_gpt_generate.py +0 -0
  554. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/megatron_client.py +0 -0
  555. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/mistral_client.py +0 -0
  556. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/moderation_api_client.py +0 -0
  557. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/open_lm_client.py +0 -0
  558. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/palmyra_client.py +0 -0
  559. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/perspective_api_client.py +0 -0
  560. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/simple_client.py +0 -0
  561. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/test_auto_client.py +0 -0
  562. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/test_client.py +0 -0
  563. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/test_huggingface_client.py +0 -0
  564. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/test_simple_client.py +0 -0
  565. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/test_together_client.py +0 -0
  566. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/toxicity_classifier_client.py +0 -0
  567. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/__init__.py +0 -0
  568. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/open_flamingo/__init__.py +0 -0
  569. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  570. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/open_flamingo/src/factory.py +0 -0
  571. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/open_flamingo/src/flamingo.py +0 -0
  572. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +0 -0
  573. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/open_flamingo/src/helpers.py +0 -0
  574. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/open_flamingo/src/utils.py +0 -0
  575. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/open_flamingo_client.py +0 -0
  576. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vision_language/qwen_vlm_client.py +0 -0
  577. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/clients/vllm_client.py +0 -0
  578. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/__init__.py +0 -0
  579. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/authentication.py +0 -0
  580. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/cache.py +0 -0
  581. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/cache_backend_config.py +0 -0
  582. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/clip_score_request.py +0 -0
  583. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/codec.py +0 -0
  584. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/concurrency.py +0 -0
  585. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/credentials_utils.py +0 -0
  586. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/critique_request.py +0 -0
  587. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/file_caches/__init__.py +0 -0
  588. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/file_caches/file_cache.py +0 -0
  589. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/file_caches/local_file_cache.py +0 -0
  590. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/file_caches/test_local_file_cache.py +0 -0
  591. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/file_upload_request.py +0 -0
  592. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/general.py +0 -0
  593. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/gpu_utils.py +0 -0
  594. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/hierarchical_logger.py +0 -0
  595. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/image_generation_parameters.py +0 -0
  596. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/key_value_store.py +0 -0
  597. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/media_object.py +0 -0
  598. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/moderations_api_request.py +0 -0
  599. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/mongo_key_value_store.py +0 -0
  600. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/multimodal_request_utils.py +0 -0
  601. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/nudity_check_request.py +0 -0
  602. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/object_spec.py +0 -0
  603. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/optional_dependencies.py +0 -0
  604. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/perspective_api_request.py +0 -0
  605. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/request.py +0 -0
  606. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/test_cache.py +0 -0
  607. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/test_codec.py +0 -0
  608. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/test_general.py +0 -0
  609. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/test_media_object.py +0 -0
  610. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/common/tokenization_request.py +0 -0
  611. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/config/__init__.py +0 -0
  612. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/__init__.py +0 -0
  613. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/accounts.py +0 -0
  614. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/cli.py +0 -0
  615. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/__init__.py +0 -0
  616. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/critique_client.py +0 -0
  617. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/mechanical_turk_critique_client.py +0 -0
  618. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/mechanical_turk_critique_exporter.py +0 -0
  619. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/mechanical_turk_critique_importer.py +0 -0
  620. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/mechanical_turk_utils.py +0 -0
  621. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/model_critique_client.py +0 -0
  622. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/scale_critique_client.py +0 -0
  623. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/critique/surge_ai_critique_client.py +0 -0
  624. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/example_queries.py +0 -0
  625. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/query.py +0 -0
  626. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/retry.py +0 -0
  627. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/server.py +0 -0
  628. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/services/__init__.py +0 -0
  629. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/services/remote_service.py +0 -0
  630. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/services/server_service.py +0 -0
  631. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/services/service.py +0 -0
  632. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/services/test_remote_service.py +0 -0
  633. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/services/test_service.py +0 -0
  634. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/test_accounts.py +0 -0
  635. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/test_retry.py +0 -0
  636. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/token_counters/__init__.py +0 -0
  637. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/token_counters/auto_token_counter.py +0 -0
  638. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/token_counters/test_auto_token_counter.py +0 -0
  639. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/proxy/token_counters/token_counter.py +0 -0
  640. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/py.typed +0 -0
  641. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/__init__.py +0 -0
  642. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/ai21_tokenizer.py +0 -0
  643. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/aleph_alpha_tokenizer.py +0 -0
  644. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/anthropic_tokenizer.py +0 -0
  645. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/auto_tokenizer.py +0 -0
  646. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/caching_tokenizer.py +0 -0
  647. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/cohere_tokenizer.py +0 -0
  648. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/http_model_tokenizer.py +0 -0
  649. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/huggingface_tokenizer.py +0 -0
  650. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/ice_tokenizer.py +0 -0
  651. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/lit_gpt_tokenizer.py +0 -0
  652. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/simple_tokenizer.py +0 -0
  653. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/test_anthropic_tokenizer.py +0 -0
  654. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/test_huggingface_tokenizer.py +0 -0
  655. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/test_ice_tokenizer.py +0 -0
  656. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/test_simple_tokenizer.py +0 -0
  657. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/test_yalm_tokenizer.py +0 -0
  658. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/tiktoken_tokenizer.py +0 -0
  659. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/tokenizer.py +0 -0
  660. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/vertexai_tokenizer.py +0 -0
  661. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/yalm_tokenizer.py +0 -0
  662. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  663. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
  664. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  665. {crfm_helm-0.5.0 → crfm_helm-0.5.1}/src/helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: crfm-helm
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Benchmark for language models
5
5
  Home-page: https://github.com/stanford-crfm/helm
6
6
  Author: Stanford CRFM
@@ -25,7 +25,7 @@ Requires-Dist: tqdm~=4.64
25
25
  Requires-Dist: zstandard~=0.18.0
26
26
  Requires-Dist: sqlitedict~=1.7
27
27
  Requires-Dist: bottle~=0.12.23
28
- Requires-Dist: datasets~=2.15
28
+ Requires-Dist: datasets~=2.17
29
29
  Requires-Dist: pyarrow>=11.0.0
30
30
  Requires-Dist: pyarrow-hotfix~=0.6
31
31
  Requires-Dist: nltk~=3.7
@@ -34,7 +34,7 @@ Requires-Dist: rouge-score~=0.1.2
34
34
  Requires-Dist: scipy~=1.10
35
35
  Requires-Dist: uncertainty-calibration~=0.1.4
36
36
  Requires-Dist: scikit-learn~=1.1
37
- Requires-Dist: transformers~=4.37
37
+ Requires-Dist: transformers~=4.40
38
38
  Requires-Dist: torch<3.0.0,>=1.13.1
39
39
  Requires-Dist: torchvision<3.0.0,>=0.14.1
40
40
  Requires-Dist: google-api-python-client~=2.64
@@ -94,6 +94,8 @@ Requires-Dist: tiktoken~=0.3.3; extra == "openai"
94
94
  Requires-Dist: pydantic~=2.0; extra == "openai"
95
95
  Provides-Extra: google
96
96
  Requires-Dist: google-cloud-aiplatform~=1.44; extra == "google"
97
+ Provides-Extra: together
98
+ Requires-Dist: together~=1.1; extra == "together"
97
99
  Provides-Extra: tsinghua
98
100
  Requires-Dist: icetk~=0.0.4; extra == "tsinghua"
99
101
  Provides-Extra: yandex
@@ -106,6 +108,7 @@ Requires-Dist: crfm-helm[anthropic]; extra == "models"
106
108
  Requires-Dist: crfm-helm[google]; extra == "models"
107
109
  Requires-Dist: crfm-helm[mistral]; extra == "models"
108
110
  Requires-Dist: crfm-helm[openai]; extra == "models"
111
+ Requires-Dist: crfm-helm[together]; extra == "models"
109
112
  Requires-Dist: crfm-helm[tsinghua]; extra == "models"
110
113
  Requires-Dist: crfm-helm[yandex]; extra == "models"
111
114
  Provides-Extra: vlm
@@ -119,6 +122,7 @@ Requires-Dist: scipy~=1.10; extra == "vlm"
119
122
  Requires-Dist: torchvision<3.0.0,>=0.14.1; extra == "vlm"
120
123
  Requires-Dist: crfm-helm[images]; extra == "vlm"
121
124
  Requires-Dist: crfm-helm[image2structure]; extra == "vlm"
125
+ Requires-Dist: pycocoevalcap~=1.2; extra == "vlm"
122
126
  Provides-Extra: image2structure
123
127
  Requires-Dist: crfm-helm[images]; extra == "image2structure"
124
128
  Requires-Dist: latex~=0.7.0; extra == "image2structure"
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = crfm-helm
3
- version = 0.5.0
3
+ version = 0.5.1
4
4
  author = Stanford CRFM
5
5
  author_email = contact-crfm@stanford.edu
6
6
  description = Benchmark for language models
@@ -35,7 +35,7 @@ install_requires =
35
35
  sqlitedict~=1.7
36
36
  bottle~=0.12.23
37
37
 
38
- datasets~=2.15
38
+ datasets~=2.17
39
39
  pyarrow>=11.0.0 # Pinned transitive dependency for datasets; workaround for #1026
40
40
  pyarrow-hotfix~=0.6 # Hotfix for CVE-2023-47248
41
41
 
@@ -46,7 +46,7 @@ install_requires =
46
46
  uncertainty-calibration~=0.1.4
47
47
  scikit-learn~=1.1
48
48
 
49
- transformers~=4.37 # For anthropic_client, vision_language.huggingface_vlm_client, huggingface_client, huggingface_tokenizer, test_openai_token_cost_estimator, model_summac (via summarization_metrics)
49
+ transformers~=4.40 # For anthropic_client, vision_language.huggingface_vlm_client, huggingface_client, huggingface_tokenizer, test_openai_token_cost_estimator, model_summac (via summarization_metrics)
50
50
  torch>=1.13.1,<3.0.0 # For huggingface_client, yalm_tokenizer, model_summac (via summarization_metrics)
51
51
  torchvision>=0.14.1,<3.0.0 # For huggingface_client, yalm_tokenizer, model_summac (via summarization_metrics)
52
52
 
@@ -109,6 +109,8 @@ openai =
109
109
  pydantic~=2.0 # For model_dump(mode="json") - openai only requires pydantic>=1.9.0
110
110
  google =
111
111
  google-cloud-aiplatform~=1.44
112
+ together =
113
+ together~=1.1
112
114
  tsinghua =
113
115
  icetk~=0.0.4
114
116
  yandex =
@@ -121,6 +123,7 @@ models =
121
123
  crfm-helm[google]
122
124
  crfm-helm[mistral]
123
125
  crfm-helm[openai]
126
+ crfm-helm[together]
124
127
  crfm-helm[tsinghua]
125
128
  crfm-helm[yandex]
126
129
  vlm =
@@ -138,6 +141,8 @@ vlm =
138
141
 
139
142
  crfm-helm[images]
140
143
  crfm-helm[image2structure]
144
+
145
+ pycocoevalcap~=1.2
141
146
  image2structure =
142
147
  crfm-helm[images]
143
148
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: crfm-helm
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Benchmark for language models
5
5
  Home-page: https://github.com/stanford-crfm/helm
6
6
  Author: Stanford CRFM
@@ -25,7 +25,7 @@ Requires-Dist: tqdm~=4.64
25
25
  Requires-Dist: zstandard~=0.18.0
26
26
  Requires-Dist: sqlitedict~=1.7
27
27
  Requires-Dist: bottle~=0.12.23
28
- Requires-Dist: datasets~=2.15
28
+ Requires-Dist: datasets~=2.17
29
29
  Requires-Dist: pyarrow>=11.0.0
30
30
  Requires-Dist: pyarrow-hotfix~=0.6
31
31
  Requires-Dist: nltk~=3.7
@@ -34,7 +34,7 @@ Requires-Dist: rouge-score~=0.1.2
34
34
  Requires-Dist: scipy~=1.10
35
35
  Requires-Dist: uncertainty-calibration~=0.1.4
36
36
  Requires-Dist: scikit-learn~=1.1
37
- Requires-Dist: transformers~=4.37
37
+ Requires-Dist: transformers~=4.40
38
38
  Requires-Dist: torch<3.0.0,>=1.13.1
39
39
  Requires-Dist: torchvision<3.0.0,>=0.14.1
40
40
  Requires-Dist: google-api-python-client~=2.64
@@ -94,6 +94,8 @@ Requires-Dist: tiktoken~=0.3.3; extra == "openai"
94
94
  Requires-Dist: pydantic~=2.0; extra == "openai"
95
95
  Provides-Extra: google
96
96
  Requires-Dist: google-cloud-aiplatform~=1.44; extra == "google"
97
+ Provides-Extra: together
98
+ Requires-Dist: together~=1.1; extra == "together"
97
99
  Provides-Extra: tsinghua
98
100
  Requires-Dist: icetk~=0.0.4; extra == "tsinghua"
99
101
  Provides-Extra: yandex
@@ -106,6 +108,7 @@ Requires-Dist: crfm-helm[anthropic]; extra == "models"
106
108
  Requires-Dist: crfm-helm[google]; extra == "models"
107
109
  Requires-Dist: crfm-helm[mistral]; extra == "models"
108
110
  Requires-Dist: crfm-helm[openai]; extra == "models"
111
+ Requires-Dist: crfm-helm[together]; extra == "models"
109
112
  Requires-Dist: crfm-helm[tsinghua]; extra == "models"
110
113
  Requires-Dist: crfm-helm[yandex]; extra == "models"
111
114
  Provides-Extra: vlm
@@ -119,6 +122,7 @@ Requires-Dist: scipy~=1.10; extra == "vlm"
119
122
  Requires-Dist: torchvision<3.0.0,>=0.14.1; extra == "vlm"
120
123
  Requires-Dist: crfm-helm[images]; extra == "vlm"
121
124
  Requires-Dist: crfm-helm[image2structure]; extra == "vlm"
125
+ Requires-Dist: pycocoevalcap~=1.2; extra == "vlm"
122
126
  Provides-Extra: image2structure
123
127
  Requires-Dist: crfm-helm[images]; extra == "image2structure"
124
128
  Requires-Dist: latex~=0.7.0; extra == "image2structure"
@@ -32,7 +32,6 @@ src/helm/benchmark/server.py
32
32
  src/helm/benchmark/slurm_jobs.py
33
33
  src/helm/benchmark/slurm_runner.py
34
34
  src/helm/benchmark/test_data_preprocessor.py
35
- src/helm/benchmark/test_model_deployment_definition.py
36
35
  src/helm/benchmark/test_run_expander.py
37
36
  src/helm/benchmark/tokenizer_config_registry.py
38
37
  src/helm/benchmark/adaptation/__init__.py
@@ -327,13 +326,23 @@ src/helm/benchmark/scenarios/image_generation/relational_understanding_scenario.
327
326
  src/helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py
328
327
  src/helm/benchmark/scenarios/image_generation/winoground_scenario.py
329
328
  src/helm/benchmark/scenarios/vision_language/__init__.py
329
+ src/helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py
330
330
  src/helm/benchmark/scenarios/vision_language/bingo_scenario.py
331
+ src/helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py
332
+ src/helm/benchmark/scenarios/vision_language/flickr30k_scenario.py
333
+ src/helm/benchmark/scenarios/vision_language/gqa_scenario.py
331
334
  src/helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py
332
335
  src/helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py
336
+ src/helm/benchmark/scenarios/vision_language/math_vista_scenario.py
333
337
  src/helm/benchmark/scenarios/vision_language/mementos_scenario.py
338
+ src/helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py
334
339
  src/helm/benchmark/scenarios/vision_language/mme_scenario.py
335
340
  src/helm/benchmark/scenarios/vision_language/mmmu_scenario.py
341
+ src/helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py
342
+ src/helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py
336
343
  src/helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py
344
+ src/helm/benchmark/scenarios/vision_language/originality_scenario.py
345
+ src/helm/benchmark/scenarios/vision_language/pairs_scenario.py
337
346
  src/helm/benchmark/scenarios/vision_language/pope_scenario.py
338
347
  src/helm/benchmark/scenarios/vision_language/seed_bench_scenario.py
339
348
  src/helm/benchmark/scenarios/vision_language/unicorn_scenario.py
@@ -360,10 +369,12 @@ src/helm/benchmark/static/info-icon.png
360
369
  src/helm/benchmark/static/json-urls.js
361
370
  src/helm/benchmark/static/plot-captions.js
362
371
  src/helm/benchmark/static/schema_classic.yaml
372
+ src/helm/benchmark/static/schema_image2structure.yaml
363
373
  src/helm/benchmark/static/schema_instruction_following.yaml
364
374
  src/helm/benchmark/static/schema_lite.yaml
365
375
  src/helm/benchmark/static/schema_mmlu.yaml
366
376
  src/helm/benchmark/static/schema_unitxt.yaml
377
+ src/helm/benchmark/static/schema_vhelm_lite.yaml
367
378
  src/helm/benchmark/static/schema_vlm.yaml
368
379
  src/helm/benchmark/static/utils.js
369
380
  src/helm/benchmark/static/images/crfm-logo.png
@@ -399,8 +410,8 @@ src/helm/benchmark/static_build/assets/google-06d997ad.png
399
410
  src/helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png
400
411
  src/helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png
401
412
  src/helm/benchmark/static_build/assets/helmhero-28e90f4d.png
402
- src/helm/benchmark/static_build/assets/index-5088afcb.css
403
- src/helm/benchmark/static_build/assets/index-d839df55.js
413
+ src/helm/benchmark/static_build/assets/index-737eef9e.js
414
+ src/helm/benchmark/static_build/assets/index-878a1094.css
404
415
  src/helm/benchmark/static_build/assets/meta-5580e9f1.png
405
416
  src/helm/benchmark/static_build/assets/microsoft-f5ee5016.png
406
417
  src/helm/benchmark/static_build/assets/mistral-18e1be23.png
@@ -545,6 +556,7 @@ src/helm/clients/image_generation/mindalle/utils/config.py
545
556
  src/helm/clients/image_generation/mindalle/utils/sampling.py
546
557
  src/helm/clients/image_generation/mindalle/utils/utils.py
547
558
  src/helm/clients/vision_language/__init__.py
559
+ src/helm/clients/vision_language/huggingface_vision2seq_client.py
548
560
  src/helm/clients/vision_language/huggingface_vlm_client.py
549
561
  src/helm/clients/vision_language/idefics_client.py
550
562
  src/helm/clients/vision_language/open_flamingo_client.py
@@ -10,7 +10,7 @@ tqdm~=4.64
10
10
  zstandard~=0.18.0
11
11
  sqlitedict~=1.7
12
12
  bottle~=0.12.23
13
- datasets~=2.15
13
+ datasets~=2.17
14
14
  pyarrow>=11.0.0
15
15
  pyarrow-hotfix~=0.6
16
16
  nltk~=3.7
@@ -19,7 +19,7 @@ rouge-score~=0.1.2
19
19
  scipy~=1.10
20
20
  uncertainty-calibration~=0.1.4
21
21
  scikit-learn~=1.1
22
- transformers~=4.37
22
+ transformers~=4.40
23
23
  torch<3.0.0,>=1.13.1
24
24
  torchvision<3.0.0,>=0.14.1
25
25
  google-api-python-client~=2.64
@@ -137,6 +137,7 @@ crfm-helm[anthropic]
137
137
  crfm-helm[google]
138
138
  crfm-helm[mistral]
139
139
  crfm-helm[openai]
140
+ crfm-helm[together]
140
141
  crfm-helm[tsinghua]
141
142
  crfm-helm[yandex]
142
143
 
@@ -167,6 +168,9 @@ simple-slurm~=0.2.6
167
168
  [summarization]
168
169
  summ-eval~=0.892
169
170
 
171
+ [together]
172
+ together~=1.1
173
+
170
174
  [tsinghua]
171
175
  icetk~=0.0.4
172
176
 
@@ -184,6 +188,7 @@ scipy~=1.10
184
188
  torchvision<3.0.0,>=0.14.1
185
189
  crfm-helm[images]
186
190
  crfm-helm[image2structure]
191
+ pycocoevalcap~=1.2
187
192
 
188
193
  [yandex]
189
194
  sentencepiece~=0.1.97
@@ -79,6 +79,7 @@ class InContextLearningMultimodalAdapter(InContextLearningAdapter, ABC):
79
79
  # Prompt
80
80
  prompt = MultimodalPrompt(
81
81
  global_prefix=self.adapter_spec.global_prefix,
82
+ global_suffix=self.adapter_spec.global_suffix,
82
83
  instructions=self.adapter_spec.instructions,
83
84
  train_instance_blocks=train_instance_blocks,
84
85
  eval_instance_block=eval_instance_block,
@@ -11,6 +11,9 @@ class MultimodalPrompt:
11
11
  # Global prefix, carried over from `AdapterSpec`
12
12
  global_prefix: str
13
13
 
14
+ # Global suffix, carried over from `AdapterSpec`
15
+ global_suffix: str
16
+
14
17
  # Instance prefix, carried over from `AdapterSpec`. What goes between the instruction and instances.
15
18
  instance_prefix: str
16
19
 
@@ -47,6 +50,10 @@ class MultimodalPrompt:
47
50
  if self.global_prefix:
48
51
  result = result.add_textual_prefix(self.global_prefix)
49
52
 
53
+ # Add the global prefix if one exists
54
+ if self.global_suffix:
55
+ result = result.add_textual_suffix(self.global_suffix)
56
+
50
57
  return result
51
58
 
52
59
  @property
@@ -32,6 +32,7 @@ class TestMultimodalContent(unittest.TestCase):
32
32
 
33
33
  prompt = MultimodalPrompt(
34
34
  global_prefix="[START]",
35
+ global_suffix="",
35
36
  instance_prefix="\n",
36
37
  instructions="Please answer the following questions about the images.",
37
38
  train_instance_blocks=train_instance_blocks,
@@ -67,6 +68,7 @@ class TestMultimodalContent(unittest.TestCase):
67
68
 
68
69
  prompt = MultimodalPrompt(
69
70
  global_prefix="",
71
+ global_suffix="",
70
72
  instance_prefix="\n",
71
73
  instructions="",
72
74
  train_instance_blocks=[],
@@ -18,7 +18,7 @@ class LilypondCompilerAnnotator(ImageCompilerAnnotator):
18
18
  """Annotator that compiles the text completions into a music sheet with LilyPond."""
19
19
 
20
20
  name: str = "lilypond_compiler"
21
- base_path = "/home/josselin/installs/lilypond-2.24.3/bin"
21
+ base_path = "lilypond-2.24.3/bin"
22
22
 
23
23
  def __init__(self, cache_config: CacheConfig, file_storage_path: str):
24
24
  super().__init__(cache_config, file_storage_path)
@@ -48,11 +48,27 @@ class TextPerturbation(Perturbation, ABC):
48
48
 
49
49
  description = replace(self.description, seed=seed)
50
50
 
51
+ perturbed_input: Input
52
+ if instance.input.multimedia_content:
53
+ perturbed_media_objects = []
54
+ for media_object in instance.input.multimedia_content.media_objects:
55
+ # Apply perturbations to the text data of the multimedia content
56
+ if media_object.is_type("text") and media_object.text is not None:
57
+ perturbed_media_objects.append(replace(media_object, text=self.perturb(media_object.text, rng)))
58
+ else:
59
+ perturbed_media_objects.append(media_object)
60
+
61
+ perturbed_input = Input(
62
+ multimedia_content=replace(instance.input.multimedia_content, media_objects=perturbed_media_objects)
63
+ )
64
+ else:
65
+ perturbed_input = Input(text=self.perturb(instance.input.text, rng))
66
+
51
67
  # Don't modify `id` of `Instance` here.
52
68
  # All the perturbed Instances generated from a single Instance should have the same ID.
53
69
  return replace(
54
70
  instance,
55
- input=Input(text=self.perturb(instance.input.text, rng)),
71
+ input=perturbed_input,
56
72
  references=references,
57
73
  perturbation=description,
58
74
  contrast_inputs=[instance.input],
@@ -2,6 +2,7 @@
2
2
  from typing import List
3
3
  import unittest
4
4
 
5
+ from helm.common.media_object import MediaObject, MultimediaObject
5
6
  from helm.benchmark.scenarios.scenario import Input, Instance, Output, Reference
6
7
  from .data_augmenter import DataAugmenter
7
8
  from .extra_space_perturbation import ExtraSpacePerturbation
@@ -33,6 +34,35 @@ def test_extra_space_perturbation():
33
34
  assert instances[1].references[0].output.text == "some name"
34
35
 
35
36
 
37
+ def test_multimodal_text_perturbation():
38
+ data_augmenter = DataAugmenter(perturbations=[ExtraSpacePerturbation(num_spaces=3)])
39
+ input: Input = Input(
40
+ multimedia_content=MultimediaObject(
41
+ [
42
+ MediaObject(text="Hello what is", content_type="text/plain"),
43
+ MediaObject(text="your name", content_type="text/plain"),
44
+ ]
45
+ )
46
+ )
47
+ instance: Instance = Instance(id="id0", input=input, references=[Reference(Output(text="some name"), tags=[])])
48
+ instances: List[Instance] = data_augmenter.generate([instance], include_original=True)
49
+
50
+ assert len(instances) == 2
51
+
52
+ # Test that the first instance is unperturbed
53
+ assert instances[0].id == "id0"
54
+ assert instances[0].perturbation is None
55
+ media_objects = instances[0].input.multimedia_content.media_objects
56
+ assert media_objects[0].text == "Hello what is"
57
+ assert media_objects[1].text == "your name"
58
+
59
+ assert instances[1].id == "id0"
60
+ assert instances[1].perturbation.name == "extra_space"
61
+ media_objects = instances[1].input.multimedia_content.media_objects
62
+ assert media_objects[0].text == "Hello what is"
63
+ assert media_objects[1].text == "your name"
64
+
65
+
36
66
  def test_misspelling_perturbation():
37
67
  data_augmenter = DataAugmenter(perturbations=[MisspellingPerturbation(prob=1.0)])
38
68
  instance: Instance = Instance(
@@ -91,8 +91,15 @@ class EfficiencyMetric:
91
91
  window_service: WindowService = WindowServiceFactory.get_window_service(
92
92
  adapter_spec.model_deployment, tokenizer_service
93
93
  )
94
- prompt: str = request_state.request.prompt
95
- num_prompt_tokens: int = window_service.get_num_tokens(prompt)
94
+
95
+ prompt: str
96
+ num_prompt_tokens: int
97
+ if request_state.request.multimodal_prompt is not None:
98
+ prompt = request_state.request.multimodal_prompt.text
99
+ num_prompt_tokens = window_service.get_num_tokens(prompt)
100
+ else:
101
+ prompt = request_state.request.prompt
102
+ num_prompt_tokens = window_service.get_num_tokens(prompt)
96
103
 
97
104
  # Total number of tokens in the completion.
98
105
  num_completion_tokens: int = sum([len(completion.tokens) for completion in request_state.result.completions])
@@ -10,6 +10,7 @@ from helm.benchmark.metrics.metric_service import MetricService
10
10
  from helm.benchmark.metrics.statistic import Stat
11
11
  from helm.benchmark.scenarios.code_scenario import CodeReference
12
12
  from helm.benchmark.scenarios.scenario import Reference
13
+ from helm.common.optional_dependencies import handle_module_not_found_error
13
14
  from helm.common.request import GeneratedOutput
14
15
  from helm.benchmark.scenarios.math_scenario import is_equiv, is_equiv_chain_of_thought
15
16
  from nltk.metrics.scores import f_measure
@@ -21,6 +22,7 @@ import string
21
22
  from . import code_metrics_helper
22
23
  import nltk
23
24
 
25
+
24
26
  try:
25
27
  nltk.data.find("tokenizers/punkt")
26
28
  except LookupError:
@@ -188,6 +190,19 @@ def bleu_4(gold: str, pred: str) -> float:
188
190
  return sentence_bleu([word_tokenize(gold)], word_tokenize(pred), weights=(0, 0, 0, 1))
189
191
 
190
192
 
193
+ def cider(gold: str, pred: str) -> float:
194
+ try:
195
+ from pycocoevalcap.cider.cider import Cider
196
+ except ModuleNotFoundError as e:
197
+ handle_module_not_found_error(e, ["vlm"])
198
+
199
+ cider_evaluator = Cider()
200
+ candidate = {"caption": [pred]}
201
+ reference = {"caption": [gold]}
202
+ average_score, _ = cider_evaluator.compute_score(reference, candidate)
203
+ return average_score
204
+
205
+
191
206
  def extract_set_from_text(
192
207
  set_str: str,
193
208
  set_start_str: str = " is ",
@@ -325,6 +340,7 @@ def compute_reference_metrics(
325
340
  "math_equiv_chain_of_thought": is_equiv_chain_of_thought,
326
341
  "code_eval_acc": code_eval,
327
342
  "pass": code_eval,
343
+ "cider": cider,
328
344
  "f1_score": f1_score,
329
345
  "rouge_1": get_rouge_function("rouge1"),
330
346
  "rouge_2": get_rouge_function("rouge2"),
@@ -28,7 +28,7 @@ from helm.benchmark.metrics.vision_language.image_utils import (
28
28
  pixel_similarity,
29
29
  sift_similarity,
30
30
  )
31
- from helm.benchmark.metrics.vision_language.emd_utils import compute_emd_recursive
31
+ from helm.benchmark.metrics.vision_language.emd_utils import compute_emd_recursive, get_most_frequent_color
32
32
 
33
33
  try:
34
34
  from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
@@ -78,7 +78,9 @@ class AnnotatedImageMetrics(Metric):
78
78
 
79
79
  # Metric names
80
80
  COMPILE_METRIC: str = "compilation_success"
81
- EARTH_MOVER_SIMILARITY: str = "earth_mover_similarity"
81
+ BLOCK_EARTH_MOVER_SIMILARITY_NORM1: str = "block_emd_similarity_white"
82
+ BLOCK_EARTH_MOVER_SIMILARITY_NORM2: str = "block_emd_similarity_median_color"
83
+ BLOCK_EARTH_MOVER_SIMILARITY: str = "block_emd_similarity"
82
84
  PIXEL_SIMILARITY: str = "pixel_similarity"
83
85
  SIFT_SIMILARITY: str = "sift_similarity"
84
86
  LPIPS_SIMILARITY: str = "lpips_similarity"
@@ -106,7 +108,12 @@ class AnnotatedImageMetrics(Metric):
106
108
  metrics: List[AnnotatedMetric] = [
107
109
  AnnotatedMetric(self.PIXEL_SIMILARITY, pixel_similarity, "image_np_gray"),
108
110
  AnnotatedMetric(self.SIFT_SIMILARITY, sift_similarity, "image_np"),
109
- AnnotatedMetric(self.EARTH_MOVER_SIMILARITY, self.compute_emd_similarity_recursive, "image_PIL"),
111
+ # Raw block EMD
112
+ AnnotatedMetric(self.BLOCK_EARTH_MOVER_SIMILARITY, self.compute_block_emd_raw, "image_PIL"),
113
+ # Normalized block EMD against white
114
+ AnnotatedMetric(self.BLOCK_EARTH_MOVER_SIMILARITY_NORM1, self.compute_block_emd_white, "image_PIL"),
115
+ # Normalized block EMD against median
116
+ AnnotatedMetric(self.BLOCK_EARTH_MOVER_SIMILARITY_NORM2, self.compute_block_emd_median, "image_PIL"),
110
117
  AnnotatedMetric(self.LPIPS_SIMILARITY, self.lpips_similarity, "image_PIL"),
111
118
  AnnotatedMetric(self.FID_SIMILARITY, self.fid_similarity, "image_PIL"),
112
119
  AnnotatedMetric(self.SSIM_SIMILARITY, self.compute_ssim, "image_np_gray"),
@@ -407,7 +414,7 @@ class AnnotatedImageMetrics(Metric):
407
414
  result = _edit_similarity(completion_tokens, truncated_reference_tokens)
408
415
  return result
409
416
 
410
- def compute_emd_similarity_recursive(
417
+ def compute_block_emd_white(
411
418
  self,
412
419
  pred_image: Image.Image,
413
420
  ref_image: Image.Image,
@@ -417,17 +424,23 @@ class AnnotatedImageMetrics(Metric):
417
424
  weight_most_frequent_color: float = 0.001,
418
425
  use_tqdm: bool = False,
419
426
  ):
420
- emd_value = compute_emd_recursive(
421
- pred_image,
422
- ref_image,
423
- threshold_most_frequent_color,
424
- patch_size,
425
- max_num_patches,
426
- weight_most_frequent_color,
427
- use_tqdm,
428
- )
427
+ """Computes the block Earth Moving Distance (EMD). This attempts to
428
+ speed up EMD for images with huge areas by considering movement/transformatio
429
+ of blocks of pixels. The score is normalized against EMD against white images
430
+ """
429
431
 
430
- def do_it():
432
+ def compute_numerator():
433
+ return self.compute_block_emd_raw_wrapper(
434
+ pred_image,
435
+ ref_image,
436
+ threshold_most_frequent_color,
437
+ patch_size,
438
+ max_num_patches,
439
+ weight_most_frequent_color,
440
+ use_tqdm,
441
+ )
442
+
443
+ def compute_denominator():
431
444
  constant_image = Image.new("RGB", ref_image.size, (255, 255, 255)) # default color is white
432
445
  value = compute_emd_recursive(
433
446
  constant_image,
@@ -443,8 +456,120 @@ class AnnotatedImageMetrics(Metric):
443
456
  hash_dict = {
444
457
  "reference_image": str(AnnotatedImageMetrics.HASH_FUNC(ref_image, hash_size=self.HASH_LENGTH)),
445
458
  }
446
- cache_key = {"metric_name": f"intermediate_{self.EARTH_MOVER_SIMILARITY}", **hash_dict}
459
+ cache_key_numerator = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY}", **hash_dict}
460
+ cache_key_denominator = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY_NORM1}", **hash_dict}
461
+
462
+ assert self._cache is not None
463
+ emd_raw, _ = self._cache.get(cache_key_numerator, compute_numerator)
464
+ emd_base, _ = self._cache.get(cache_key_denominator, compute_denominator)
465
+
466
+ return 1.0 - emd_raw["value"] / emd_base["value"]
467
+
468
+ def compute_block_emd_median(
469
+ self,
470
+ pred_image: Image.Image,
471
+ ref_image: Image.Image,
472
+ threshold_most_frequent_color: float = 0.5,
473
+ patch_size: Tuple[int, int] = (8, 8),
474
+ max_num_patches: int = 100,
475
+ weight_most_frequent_color: float = 0.001,
476
+ use_tqdm: bool = False,
477
+ ):
478
+ """Same as compute_emd_similarity_recursive EXCEPT that
479
+ the normalization is against an image of the median color.
480
+ """
481
+
482
+ def compute_numerator():
483
+ return self.compute_block_emd_raw_wrapper(
484
+ pred_image,
485
+ ref_image,
486
+ threshold_most_frequent_color,
487
+ patch_size,
488
+ max_num_patches,
489
+ weight_most_frequent_color,
490
+ use_tqdm,
491
+ )
492
+
493
+ def compute_denominator():
494
+ ref_img_np = np.array(ref_image)
495
+ (rgb_most_frequent_color, _) = get_most_frequent_color(ref_img_np)
496
+
497
+ # Most frequent color as base
498
+ constant_image = Image.new("RGB", ref_image.size, tuple(rgb_most_frequent_color)) # type: ignore
499
+ value = compute_emd_recursive(
500
+ constant_image,
501
+ ref_image,
502
+ threshold_most_frequent_color,
503
+ patch_size,
504
+ max_num_patches,
505
+ weight_most_frequent_color,
506
+ use_tqdm,
507
+ )
508
+ return {"value": value}
509
+
510
+ hash_dict = {
511
+ "reference_image": str(AnnotatedImageMetrics.HASH_FUNC(ref_image, hash_size=self.HASH_LENGTH)),
512
+ }
513
+ cache_key_numerator = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY}", **hash_dict}
514
+ cache_key_denominator = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY_NORM2}", **hash_dict}
515
+
516
+ assert self._cache is not None
517
+ emd_raw, _ = self._cache.get(cache_key_numerator, compute_numerator)
518
+ emd_base, _ = self._cache.get(cache_key_denominator, compute_denominator)
519
+
520
+ return 1.0 - emd_raw["value"] / emd_base["value"]
521
+
522
+ def compute_block_emd_raw(
523
+ self,
524
+ pred_image: Image.Image,
525
+ ref_image: Image.Image,
526
+ threshold_most_frequent_color: float = 0.5,
527
+ patch_size: Tuple[int, int] = (8, 8),
528
+ max_num_patches: int = 100,
529
+ weight_most_frequent_color: float = 0.001,
530
+ use_tqdm: bool = False,
531
+ ):
532
+ def compute():
533
+ return self.compute_block_emd_raw_wrapper(
534
+ pred_image,
535
+ ref_image,
536
+ threshold_most_frequent_color,
537
+ patch_size,
538
+ max_num_patches,
539
+ weight_most_frequent_color,
540
+ use_tqdm,
541
+ )
542
+
543
+ hash_dict = {
544
+ "reference_image": str(AnnotatedImageMetrics.HASH_FUNC(ref_image, hash_size=self.HASH_LENGTH)),
545
+ }
546
+ cache_key = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY}", **hash_dict}
447
547
  assert self._cache is not None
448
- response_metric, _ = self._cache.get(cache_key, do_it)
548
+ emd_raw, _ = self._cache.get(cache_key, compute)
549
+
550
+ return emd_raw["value"]
449
551
 
450
- return 1.0 - emd_value / response_metric["value"]
552
+ def compute_block_emd_raw_wrapper(
553
+ self,
554
+ pred_image: Image.Image,
555
+ ref_image: Image.Image,
556
+ threshold_most_frequent_color: float = 0.5,
557
+ patch_size: Tuple[int, int] = (8, 8),
558
+ max_num_patches: int = 100,
559
+ weight_most_frequent_color: float = 0.001,
560
+ use_tqdm: bool = False,
561
+ ):
562
+ """Computes the block Earth Moving Distance (EMD). This attempts to
563
+ speed up EMD for images with huge areas by considering movement/transformatio
564
+ of blocks of pixels. The score is normalized against EMD against white images
565
+ """
566
+ emd_value = compute_emd_recursive(
567
+ pred_image,
568
+ ref_image,
569
+ threshold_most_frequent_color,
570
+ patch_size,
571
+ max_num_patches,
572
+ weight_most_frequent_color,
573
+ use_tqdm,
574
+ )
575
+ return {"value": emd_value}
@@ -32,6 +32,7 @@ ANTHROPIC_CLAUDE_3_MODEL_TAG: str = "ANTHROPIC_CLAUDE_3_MODEL_TAG"
32
32
 
33
33
  GOOGLE_PALM_2_MODEL_TAG: str = "GOOGLE_PALM_2_MODEL_TAG"
34
34
  GOOGLE_GEMINI_MODEL_TAG: str = "GOOGLE_GEMINI_MODEL_TAG"
35
+ GOOGLE_GEMINI_PRO_VISION_V1_TAG: str = "GOOGLE_GEMINI_PRO_VISION_V1_TAG"
35
36
  GOOGLE_GEMMA_INSTRUCT_MODEL_TAG: str = "GOOGLE_GEMMA_INSTRUCT_MODEL_TAG"
36
37
 
37
38
  # Models which emit garbage tokens when temperature=0.
@@ -159,7 +160,10 @@ def register_model_metadata(model_metadata: ModelMetadata) -> None:
159
160
  def get_model_metadata(model_name: str) -> ModelMetadata:
160
161
  """Return the `ModelMetadata` for the model name."""
161
162
  if model_name not in MODEL_NAME_TO_MODEL_METADATA:
162
- raise ValueError(f"No model with name: {model_name}")
163
+ raise ValueError(
164
+ f"No model metadata for model name: {model_name} - "
165
+ "did you remember to add this model to model_metadata.yaml?"
166
+ )
163
167
 
164
168
  return MODEL_NAME_TO_MODEL_METADATA[model_name]
165
169