crfm-helm 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (184) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +29 -55
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +146 -134
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  10. helm/benchmark/annotation/call_center_annotator.py +247 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +32 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +31 -44
  18. helm/benchmark/annotation/model_as_judge.py +45 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  20. helm/benchmark/annotation/xstest_annotator.py +110 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +57 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  30. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  31. helm/benchmark/model_metadata_registry.py +3 -3
  32. helm/benchmark/presentation/test_run_entry.py +1 -0
  33. helm/benchmark/run.py +15 -0
  34. helm/benchmark/run_expander.py +56 -30
  35. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  36. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  37. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  38. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  39. helm/benchmark/run_specs/finance_run_specs.py +78 -1
  40. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  41. helm/benchmark/run_specs/vlm_run_specs.py +92 -21
  42. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  43. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  44. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  45. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  46. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  47. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  48. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  49. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  50. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  51. helm/benchmark/scenarios/scenario.py +1 -1
  52. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  53. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  54. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  55. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  56. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  57. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  58. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  59. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  60. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  61. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  62. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  63. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  64. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  65. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  66. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  67. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  68. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  69. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  70. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  71. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  72. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  73. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  74. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  75. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  76. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  78. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  79. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  80. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  81. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  82. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  83. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  84. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  85. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  86. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  87. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  88. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  89. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  91. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  92. helm/benchmark/server.py +1 -6
  93. helm/benchmark/static/schema_air_bench.yaml +750 -750
  94. helm/benchmark/static/schema_bhasa.yaml +709 -0
  95. helm/benchmark/static/schema_call_center.yaml +232 -0
  96. helm/benchmark/static/schema_cleva.yaml +768 -0
  97. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  98. helm/benchmark/static/schema_ewok.yaml +367 -0
  99. helm/benchmark/static/schema_finance.yaml +55 -9
  100. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  101. helm/benchmark/static/schema_safety.yaml +247 -0
  102. helm/benchmark/static/schema_tables.yaml +124 -7
  103. helm/benchmark/static/schema_thai.yaml +21 -0
  104. helm/benchmark/static/schema_vhelm.yaml +96 -91
  105. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  106. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  107. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  108. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  109. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  110. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  111. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  112. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  113. helm/benchmark/static_build/index.html +2 -2
  114. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  115. helm/clients/ai21_client.py +71 -1
  116. helm/clients/anthropic_client.py +7 -19
  117. helm/clients/huggingface_client.py +38 -37
  118. helm/clients/nvidia_nim_client.py +35 -0
  119. helm/clients/openai_client.py +2 -3
  120. helm/clients/palmyra_client.py +25 -0
  121. helm/clients/perspective_api_client.py +11 -6
  122. helm/clients/test_client.py +4 -6
  123. helm/clients/vision_language/open_flamingo_client.py +1 -2
  124. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  125. helm/common/images_utils.py +6 -0
  126. helm/common/mongo_key_value_store.py +2 -1
  127. helm/common/request.py +16 -0
  128. helm/config/model_deployments.yaml +315 -332
  129. helm/config/model_metadata.yaml +384 -110
  130. helm/config/tokenizer_configs.yaml +116 -11
  131. helm/proxy/example_queries.py +14 -21
  132. helm/proxy/services/server_service.py +1 -2
  133. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  134. helm/tokenizers/ai21_tokenizer.py +51 -59
  135. helm/tokenizers/cohere_tokenizer.py +0 -75
  136. helm/tokenizers/huggingface_tokenizer.py +0 -1
  137. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  138. helm/benchmark/static/benchmarking.css +0 -156
  139. helm/benchmark/static/benchmarking.js +0 -1705
  140. helm/benchmark/static/config.js +0 -3
  141. helm/benchmark/static/general.js +0 -122
  142. helm/benchmark/static/images/crfm-logo.png +0 -0
  143. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  144. helm/benchmark/static/images/helm-logo.png +0 -0
  145. helm/benchmark/static/images/language-model-helm.png +0 -0
  146. helm/benchmark/static/images/organizations/ai21.png +0 -0
  147. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  148. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  149. helm/benchmark/static/images/organizations/cohere.png +0 -0
  150. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  151. helm/benchmark/static/images/organizations/google.png +0 -0
  152. helm/benchmark/static/images/organizations/meta.png +0 -0
  153. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  154. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  155. helm/benchmark/static/images/organizations/openai.png +0 -0
  156. helm/benchmark/static/images/organizations/together.png +0 -0
  157. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  158. helm/benchmark/static/images/organizations/yandex.png +0 -0
  159. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  160. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  161. helm/benchmark/static/index.html +0 -68
  162. helm/benchmark/static/info-icon.png +0 -0
  163. helm/benchmark/static/json-urls.js +0 -69
  164. helm/benchmark/static/plot-captions.js +0 -27
  165. helm/benchmark/static/utils.js +0 -285
  166. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  167. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  168. helm/benchmark/window_services/ai21_window_service.py +0 -247
  169. helm/benchmark/window_services/cohere_window_service.py +0 -101
  170. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  171. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  172. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  173. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  174. helm/tokenizers/ice_tokenizer.py +0 -30
  175. helm/tokenizers/test_ice_tokenizer.py +0 -57
  176. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  177. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  178. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  179. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  180. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  181. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  182. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  183. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  184. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: crfm-helm
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: Benchmark for language models
5
5
  Home-page: https://github.com/stanford-crfm/helm
6
6
  Author: Stanford CRFM
@@ -28,8 +28,7 @@ Requires-Dist: bottle ~=0.12.23
28
28
  Requires-Dist: datasets ~=2.17
29
29
  Requires-Dist: pyarrow >=11.0.0
30
30
  Requires-Dist: pyarrow-hotfix ~=0.6
31
- Requires-Dist: nltk ~=3.7
32
- Requires-Dist: pyext ~=0.7
31
+ Requires-Dist: nltk <3.8.2,~=3.7
33
32
  Requires-Dist: rouge-score ~=0.1.2
34
33
  Requires-Dist: scipy ~=1.10
35
34
  Requires-Dist: uncertainty-calibration ~=0.1.4
@@ -37,7 +36,8 @@ Requires-Dist: scikit-learn ~=1.1
37
36
  Requires-Dist: transformers ~=4.40
38
37
  Requires-Dist: torch <3.0.0,>=1.13.1
39
38
  Requires-Dist: torchvision <3.0.0,>=0.14.1
40
- Requires-Dist: google-api-python-client ~=2.64
39
+ Provides-Extra: accelerate
40
+ Requires-Dist: accelerate ~=0.25 ; extra == 'accelerate'
41
41
  Provides-Extra: aleph-alpha
42
42
  Requires-Dist: aleph-alpha-client ~=2.14.0 ; extra == 'aleph-alpha'
43
43
  Requires-Dist: tokenizers >=0.13.3 ; extra == 'aleph-alpha'
@@ -55,6 +55,7 @@ Requires-Dist: crfm-helm[models] ; extra == 'all'
55
55
  Requires-Dist: crfm-helm[mongo] ; extra == 'all'
56
56
  Requires-Dist: crfm-helm[heim] ; extra == 'all'
57
57
  Requires-Dist: crfm-helm[vlm] ; extra == 'all'
58
+ Requires-Dist: crfm-helm[bhasa] ; extra == 'all'
58
59
  Provides-Extra: allenai
59
60
  Requires-Dist: ai2-olmo ~=0.2 ; extra == 'allenai'
60
61
  Provides-Extra: amazon
@@ -64,6 +65,10 @@ Requires-Dist: botocore ~=1.31.57 ; extra == 'amazon'
64
65
  Provides-Extra: anthropic
65
66
  Requires-Dist: anthropic ~=0.17 ; extra == 'anthropic'
66
67
  Requires-Dist: websocket-client ~=1.3.2 ; extra == 'anthropic'
68
+ Provides-Extra: bhasa
69
+ Requires-Dist: pythainlp ==5.0.0 ; extra == 'bhasa'
70
+ Requires-Dist: pyonmttok ==1.37.0 ; extra == 'bhasa'
71
+ Requires-Dist: sacrebleu ~=2.2.1 ; extra == 'bhasa'
67
72
  Provides-Extra: cleva
68
73
  Requires-Dist: unidecode ==1.3.6 ; extra == 'cleva'
69
74
  Requires-Dist: pypinyin ==0.49.0 ; extra == 'cleva'
@@ -83,8 +88,9 @@ Requires-Dist: flake8 ==5.0.4 ; extra == 'dev'
83
88
  Provides-Extra: google
84
89
  Requires-Dist: google-cloud-aiplatform ~=1.48 ; extra == 'google'
85
90
  Provides-Extra: heim
86
- Requires-Dist: gdown ~=4.4.0 ; extra == 'heim'
91
+ Requires-Dist: gdown ~=5.1 ; extra == 'heim'
87
92
  Requires-Dist: diffusers ~=0.24.0 ; extra == 'heim'
93
+ Requires-Dist: icetk ~=0.0.4 ; extra == 'heim'
88
94
  Requires-Dist: jax ~=0.4.13 ; extra == 'heim'
89
95
  Requires-Dist: jaxlib ~=0.4.13 ; extra == 'heim'
90
96
  Requires-Dist: crfm-helm[openai] ; extra == 'heim'
@@ -108,29 +114,33 @@ Requires-Dist: tensorflow ~=2.11.1 ; extra == 'heim'
108
114
  Requires-Dist: timm ~=0.6.12 ; extra == 'heim'
109
115
  Requires-Dist: torch-fidelity ~=0.3.0 ; extra == 'heim'
110
116
  Requires-Dist: torchmetrics ~=0.11.1 ; extra == 'heim'
117
+ Requires-Dist: scikit-image ~=0.21.0 ; extra == 'heim'
111
118
  Requires-Dist: crfm-helm[images] ; extra == 'heim'
112
119
  Provides-Extra: human-evaluation
113
120
  Requires-Dist: scaleapi ~=2.13.0 ; extra == 'human-evaluation'
114
121
  Requires-Dist: surge-api ~=1.1.0 ; extra == 'human-evaluation'
115
- Provides-Extra: image2structure
116
- Requires-Dist: crfm-helm[images] ; extra == 'image2structure'
117
- Requires-Dist: latex ~=0.7.0 ; extra == 'image2structure'
118
- Requires-Dist: pdf2image ~=1.16.3 ; extra == 'image2structure'
119
- Requires-Dist: selenium ~=4.17.2 ; extra == 'image2structure'
120
- Requires-Dist: html2text ~=2024.2.26 ; extra == 'image2structure'
121
- Requires-Dist: opencv-python ~=4.7.0.68 ; extra == 'image2structure'
122
- Requires-Dist: lpips ~=0.1.4 ; extra == 'image2structure'
123
- Requires-Dist: imagehash ~=4.3.1 ; extra == 'image2structure'
122
+ Provides-Extra: image2struct
123
+ Requires-Dist: crfm-helm[images] ; extra == 'image2struct'
124
+ Requires-Dist: latex ~=0.7.0 ; extra == 'image2struct'
125
+ Requires-Dist: pdf2image ~=1.16.3 ; extra == 'image2struct'
126
+ Requires-Dist: selenium ~=4.17.2 ; extra == 'image2struct'
127
+ Requires-Dist: html2text ~=2024.2.26 ; extra == 'image2struct'
128
+ Requires-Dist: opencv-python ~=4.7.0.68 ; extra == 'image2struct'
129
+ Requires-Dist: lpips ~=0.1.4 ; extra == 'image2struct'
130
+ Requires-Dist: imagehash ~=4.3.1 ; extra == 'image2struct'
124
131
  Provides-Extra: images
125
- Requires-Dist: accelerate ~=0.25.0 ; extra == 'images'
132
+ Requires-Dist: crfm-helm[accelerate] ; extra == 'images'
126
133
  Requires-Dist: pillow ~=10.2 ; extra == 'images'
127
134
  Provides-Extra: metrics
135
+ Requires-Dist: google-api-python-client ~=2.64 ; extra == 'metrics'
128
136
  Requires-Dist: numba ~=0.56.4 ; extra == 'metrics'
129
137
  Requires-Dist: pytrec-eval ==0.5 ; extra == 'metrics'
130
138
  Requires-Dist: sacrebleu ~=2.2.1 ; extra == 'metrics'
131
139
  Provides-Extra: mistral
132
140
  Requires-Dist: mistralai ~=0.0.11 ; extra == 'mistral'
133
141
  Provides-Extra: models
142
+ Requires-Dist: crfm-helm[ai21] ; extra == 'models'
143
+ Requires-Dist: crfm-helm[accelerate] ; extra == 'models'
134
144
  Requires-Dist: crfm-helm[aleph-alpha] ; extra == 'models'
135
145
  Requires-Dist: crfm-helm[allenai] ; extra == 'models'
136
146
  Requires-Dist: crfm-helm[amazon] ; extra == 'models'
@@ -141,7 +151,6 @@ Requires-Dist: crfm-helm[mistral] ; extra == 'models'
141
151
  Requires-Dist: crfm-helm[openai] ; extra == 'models'
142
152
  Requires-Dist: crfm-helm[reka] ; extra == 'models'
143
153
  Requires-Dist: crfm-helm[together] ; extra == 'models'
144
- Requires-Dist: crfm-helm[tsinghua] ; extra == 'models'
145
154
  Requires-Dist: crfm-helm[yandex] ; extra == 'models'
146
155
  Requires-Dist: crfm-helm[openvino] ; extra == 'models'
147
156
  Provides-Extra: mongo
@@ -161,7 +170,7 @@ Requires-Dist: gunicorn ~=20.1.0 ; extra == 'proxy-server'
161
170
  Provides-Extra: reka
162
171
  Requires-Dist: reka-api ~=2.0.0 ; extra == 'reka'
163
172
  Provides-Extra: scenarios
164
- Requires-Dist: gdown ~=4.4.0 ; extra == 'scenarios'
173
+ Requires-Dist: gdown ~=5.1 ; extra == 'scenarios'
165
174
  Requires-Dist: sympy ~=1.11.1 ; extra == 'scenarios'
166
175
  Requires-Dist: xlrd ~=2.0.1 ; extra == 'scenarios'
167
176
  Provides-Extra: slurm
@@ -170,22 +179,20 @@ Provides-Extra: summarization
170
179
  Requires-Dist: summ-eval ~=0.892 ; extra == 'summarization'
171
180
  Provides-Extra: together
172
181
  Requires-Dist: together ~=1.1 ; extra == 'together'
173
- Provides-Extra: tsinghua
174
- Requires-Dist: icetk ~=0.0.4 ; extra == 'tsinghua'
175
182
  Provides-Extra: unitxt
176
183
  Requires-Dist: evaluate ~=0.4.1 ; extra == 'unitxt'
177
184
  Provides-Extra: vlm
178
185
  Requires-Dist: crfm-helm[openai] ; extra == 'vlm'
179
186
  Requires-Dist: einops ~=0.7.0 ; extra == 'vlm'
180
187
  Requires-Dist: einops-exts ~=0.0.4 ; extra == 'vlm'
181
- Requires-Dist: open-clip-torch ~=2.24.0 ; extra == 'vlm'
182
- Requires-Dist: torch ~=2.1.2 ; extra == 'vlm'
188
+ Requires-Dist: open-clip-torch ~=2.24 ; extra == 'vlm'
189
+ Requires-Dist: torch ~=2.1 ; extra == 'vlm'
183
190
  Requires-Dist: transformers-stream-generator ~=0.0.4 ; extra == 'vlm'
184
191
  Requires-Dist: scipy ~=1.10 ; extra == 'vlm'
185
192
  Requires-Dist: torchvision <3.0.0,>=0.14.1 ; extra == 'vlm'
186
193
  Requires-Dist: crfm-helm[reka] ; extra == 'vlm'
187
194
  Requires-Dist: crfm-helm[images] ; extra == 'vlm'
188
- Requires-Dist: crfm-helm[image2structure] ; extra == 'vlm'
195
+ Requires-Dist: crfm-helm[image2struct] ; extra == 'vlm'
189
196
  Requires-Dist: pycocoevalcap ~=1.2 ; extra == 'vlm'
190
197
  Provides-Extra: yandex
191
198
  Requires-Dist: sentencepiece ~=0.1.97 ; extra == 'yandex'
@@ -209,39 +216,6 @@ Welcome! The **`crfm-helm`** Python package contains code used in the **Holistic
209
216
 
210
217
  To get started, refer to [the documentation on Read the Docs](https://crfm-helm.readthedocs.io/) for how to install and run the package.
211
218
 
212
- ## Directory Structure
213
-
214
- The directory structure for this repo is as follows
215
-
216
- ```
217
- ├── docs # MD used to generate readthedocs
218
-
219
- ├── scripts # Python utility scripts for HELM
220
- │ ├── cache
221
- │ ├── data_overlap # Calculate train test overlap
222
- │ │ ├── common
223
- │ │ ├── scenarios
224
- │ │ └── test
225
- │ ├── efficiency
226
- │ ├── fact_completion
227
- │ ├── offline_eval
228
- │ └── scale
229
- └── src
230
- ├── helm # Benchmarking Scripts for HELM
231
- │ │
232
- │ ├── benchmark # Main Python code for running HELM
233
- │ │ │
234
- │ │ └── static # Current JS (Jquery) code for rendering front-end
235
- │ │ │
236
- │ │ └── ...
237
- │ │
238
- │ ├── common # Additional Python code for running HELM
239
- │ │
240
- │ └── proxy # Python code for external web requests
241
-
242
- └── helm-frontend # New React Front-end
243
- ```
244
-
245
219
  # Holistic Evaluation of Text-To-Image Models
246
220
 
247
221
  <img src="https://github.com/stanford-crfm/helm/raw/heim/src/helm/benchmark/static/heim/images/heim-logo.png" alt="" width="800"/>