crfm-helm 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/METADATA +13 -3
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/RECORD +96 -63
- helm/benchmark/adaptation/adapter_spec.py +32 -31
- helm/benchmark/annotation/air_bench_annotator.py +64 -0
- helm/benchmark/annotation/annotator_factory.py +6 -0
- helm/benchmark/annotation/live_qa_annotator.py +84 -0
- helm/benchmark/annotation/medication_qa_annotator.py +81 -0
- helm/benchmark/augmentations/translate_perturbation.py +1 -0
- helm/benchmark/huggingface_registration.py +16 -6
- helm/benchmark/metrics/air_bench_metrics.py +56 -0
- helm/benchmark/metrics/fin_qa_metrics.py +60 -0
- helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
- helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
- helm/benchmark/metrics/live_qa_metrics.py +23 -0
- helm/benchmark/metrics/medication_qa_metrics.py +23 -0
- helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
- helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
- helm/benchmark/metrics/unitxt_metrics.py +20 -10
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +29 -71
- helm/benchmark/presentation/schema.py +54 -4
- helm/benchmark/presentation/test_schema.py +11 -0
- helm/benchmark/run.py +16 -2
- helm/benchmark/run_expander.py +77 -0
- helm/benchmark/run_spec_factory.py +4 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
- helm/benchmark/run_specs/classic_run_specs.py +15 -11
- helm/benchmark/run_specs/decodingtrust_run_specs.py +3 -1
- helm/benchmark/run_specs/experimental_run_specs.py +33 -0
- helm/benchmark/run_specs/finance_run_specs.py +33 -0
- helm/benchmark/run_specs/vlm_run_specs.py +168 -45
- helm/benchmark/scenarios/air_bench_scenario.py +50 -0
- helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
- helm/benchmark/scenarios/fin_qa_scenario.py +117 -0
- helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +13 -2
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +1 -5
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +0 -4
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +4 -2
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +6 -5
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +95 -0
- helm/benchmark/static/schema_air_bench.yaml +3149 -0
- helm/benchmark/static/schema_classic.yaml +3 -59
- helm/benchmark/static/schema_finance.yaml +143 -0
- helm/benchmark/static/schema_image2structure.yaml +254 -111
- helm/benchmark/static/schema_instruction_following.yaml +3 -52
- helm/benchmark/static/schema_lite.yaml +3 -61
- helm/benchmark/static/schema_medical.yaml +255 -0
- helm/benchmark/static/schema_mmlu.yaml +3 -61
- helm/benchmark/static/schema_tables.yaml +200 -0
- helm/benchmark/static/schema_thai.yaml +223 -0
- helm/benchmark/static/schema_unitxt.yaml +3 -61
- helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +294 -293
- helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
- helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
- helm/benchmark/static_build/assets/index-30dbceba.js +10 -0
- helm/benchmark/static_build/assets/index-66b02d40.css +1 -0
- helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
- helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/clients/anthropic_client.py +43 -9
- helm/clients/auto_client.py +11 -0
- helm/clients/client.py +24 -7
- helm/clients/cohere_client.py +98 -3
- helm/clients/huggingface_client.py +71 -12
- helm/clients/openai_client.py +9 -2
- helm/clients/reka_client.py +189 -0
- helm/clients/test_client.py +3 -3
- helm/clients/test_huggingface_client.py +19 -3
- helm/clients/test_together_client.py +72 -2
- helm/clients/together_client.py +129 -23
- helm/clients/vertexai_client.py +62 -18
- helm/clients/vision_language/huggingface_vlm_client.py +1 -0
- helm/clients/vision_language/paligemma_client.py +146 -0
- helm/clients/vision_language/palmyra_vision_client.py +84 -0
- helm/clients/yi_client.py +31 -0
- helm/common/critique_request.py +10 -1
- helm/common/images_utils.py +19 -0
- helm/config/model_deployments.yaml +412 -18
- helm/config/model_metadata.yaml +447 -25
- helm/config/tokenizer_configs.yaml +93 -1
- helm/proxy/critique/model_critique_client.py +32 -4
- helm/proxy/services/server_service.py +1 -1
- helm/tokenizers/auto_tokenizer.py +1 -1
- helm/tokenizers/cohere_tokenizer.py +44 -2
- helm/tokenizers/huggingface_tokenizer.py +36 -13
- helm/tokenizers/test_cohere_tokenizer.py +39 -0
- helm/tokenizers/test_huggingface_tokenizer.py +5 -1
- helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
- helm/benchmark/static_build/assets/index-878a1094.css +0 -1
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/WHEEL +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: crfm-helm
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: Benchmark for language models
|
|
5
5
|
Home-page: https://github.com/stanford-crfm/helm
|
|
6
6
|
Author: Stanford CRFM
|
|
@@ -70,6 +70,8 @@ Requires-Dist: pypinyin ==0.49.0 ; extra == 'cleva'
|
|
|
70
70
|
Requires-Dist: jieba ==0.42.1 ; extra == 'cleva'
|
|
71
71
|
Requires-Dist: opencc ==1.1.6 ; extra == 'cleva'
|
|
72
72
|
Requires-Dist: langdetect ==1.0.9 ; extra == 'cleva'
|
|
73
|
+
Provides-Extra: cohere
|
|
74
|
+
Requires-Dist: cohere ~=5.3 ; extra == 'cohere'
|
|
73
75
|
Provides-Extra: decodingtrust
|
|
74
76
|
Requires-Dist: fairlearn ~=0.9.0 ; extra == 'decodingtrust'
|
|
75
77
|
Provides-Extra: dev
|
|
@@ -79,7 +81,7 @@ Requires-Dist: black ==24.3.0 ; extra == 'dev'
|
|
|
79
81
|
Requires-Dist: mypy ==1.5.1 ; extra == 'dev'
|
|
80
82
|
Requires-Dist: flake8 ==5.0.4 ; extra == 'dev'
|
|
81
83
|
Provides-Extra: google
|
|
82
|
-
Requires-Dist: google-cloud-aiplatform ~=1.
|
|
84
|
+
Requires-Dist: google-cloud-aiplatform ~=1.48 ; extra == 'google'
|
|
83
85
|
Provides-Extra: heim
|
|
84
86
|
Requires-Dist: gdown ~=4.4.0 ; extra == 'heim'
|
|
85
87
|
Requires-Dist: diffusers ~=0.24.0 ; extra == 'heim'
|
|
@@ -133,24 +135,31 @@ Requires-Dist: crfm-helm[aleph-alpha] ; extra == 'models'
|
|
|
133
135
|
Requires-Dist: crfm-helm[allenai] ; extra == 'models'
|
|
134
136
|
Requires-Dist: crfm-helm[amazon] ; extra == 'models'
|
|
135
137
|
Requires-Dist: crfm-helm[anthropic] ; extra == 'models'
|
|
138
|
+
Requires-Dist: crfm-helm[cohere] ; extra == 'models'
|
|
136
139
|
Requires-Dist: crfm-helm[google] ; extra == 'models'
|
|
137
140
|
Requires-Dist: crfm-helm[mistral] ; extra == 'models'
|
|
138
141
|
Requires-Dist: crfm-helm[openai] ; extra == 'models'
|
|
142
|
+
Requires-Dist: crfm-helm[reka] ; extra == 'models'
|
|
139
143
|
Requires-Dist: crfm-helm[together] ; extra == 'models'
|
|
140
144
|
Requires-Dist: crfm-helm[tsinghua] ; extra == 'models'
|
|
141
145
|
Requires-Dist: crfm-helm[yandex] ; extra == 'models'
|
|
146
|
+
Requires-Dist: crfm-helm[openvino] ; extra == 'models'
|
|
142
147
|
Provides-Extra: mongo
|
|
143
148
|
Requires-Dist: pymongo ~=4.2 ; extra == 'mongo'
|
|
144
149
|
Provides-Extra: openai
|
|
145
150
|
Requires-Dist: openai ~=1.0 ; extra == 'openai'
|
|
146
|
-
Requires-Dist: tiktoken ~=0.
|
|
151
|
+
Requires-Dist: tiktoken ~=0.7 ; extra == 'openai'
|
|
147
152
|
Requires-Dist: pydantic ~=2.0 ; extra == 'openai'
|
|
153
|
+
Provides-Extra: openvino
|
|
154
|
+
Requires-Dist: optimum[openvino] ~=1.19 ; extra == 'openvino'
|
|
148
155
|
Provides-Extra: plots
|
|
149
156
|
Requires-Dist: colorcet ~=3.0.1 ; extra == 'plots'
|
|
150
157
|
Requires-Dist: matplotlib ~=3.6.0 ; extra == 'plots'
|
|
151
158
|
Requires-Dist: seaborn ~=0.11.0 ; extra == 'plots'
|
|
152
159
|
Provides-Extra: proxy-server
|
|
153
160
|
Requires-Dist: gunicorn ~=20.1.0 ; extra == 'proxy-server'
|
|
161
|
+
Provides-Extra: reka
|
|
162
|
+
Requires-Dist: reka-api ~=2.0.0 ; extra == 'reka'
|
|
154
163
|
Provides-Extra: scenarios
|
|
155
164
|
Requires-Dist: gdown ~=4.4.0 ; extra == 'scenarios'
|
|
156
165
|
Requires-Dist: sympy ~=1.11.1 ; extra == 'scenarios'
|
|
@@ -174,6 +183,7 @@ Requires-Dist: torch ~=2.1.2 ; extra == 'vlm'
|
|
|
174
183
|
Requires-Dist: transformers-stream-generator ~=0.0.4 ; extra == 'vlm'
|
|
175
184
|
Requires-Dist: scipy ~=1.10 ; extra == 'vlm'
|
|
176
185
|
Requires-Dist: torchvision <3.0.0,>=0.14.1 ; extra == 'vlm'
|
|
186
|
+
Requires-Dist: crfm-helm[reka] ; extra == 'vlm'
|
|
177
187
|
Requires-Dist: crfm-helm[images] ; extra == 'vlm'
|
|
178
188
|
Requires-Dist: crfm-helm[image2structure] ; extra == 'vlm'
|
|
179
189
|
Requires-Dist: pycocoevalcap ~=1.2 ; extra == 'vlm'
|
|
@@ -5,14 +5,14 @@ helm/benchmark/annotation_executor.py,sha256=ZJCc5xT8E0E6gux8dq3HPS4YzQs2YPCNl4g
|
|
|
5
5
|
helm/benchmark/config_registry.py,sha256=Cd25a8FHriUzAgvGGU5sBAPyhisdSIjdUJR4YbYs6T4,1603
|
|
6
6
|
helm/benchmark/data_preprocessor.py,sha256=aNdM-o2t4qkLIQHiQeWUFg03DjjJ8HTBIphYCK8pXVo,2173
|
|
7
7
|
helm/benchmark/executor.py,sha256=simd7SdJ7TciUpoq3D0uz_XUSCZj5KIWCIP57FYm4js,4906
|
|
8
|
-
helm/benchmark/huggingface_registration.py,sha256=
|
|
8
|
+
helm/benchmark/huggingface_registration.py,sha256=unEBO21V8K3-Ya0xLqjO9H1oq7RmU-f1MYV0tCIbXzY,4578
|
|
9
9
|
helm/benchmark/model_deployment_registry.py,sha256=BjL0ghHgO7_Z5jZZ7kuSOj9saegI3BivaL-b699C0rc,9527
|
|
10
10
|
helm/benchmark/model_metadata_registry.py,sha256=fXRJOLUIrLOHUG5duncEqhnpmfb9hyloUlGbOM2L9ds,8194
|
|
11
11
|
helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
|
|
12
|
-
helm/benchmark/run.py,sha256=
|
|
13
|
-
helm/benchmark/run_expander.py,sha256=
|
|
12
|
+
helm/benchmark/run.py,sha256=WNj10uNCqxwS2pCmt_s5Bn_JIC-NItEjK1PyQl9SXmo,13193
|
|
13
|
+
helm/benchmark/run_expander.py,sha256=sWfcL0caHTsp1NqqsGrG-fZaIbScY8LECJqQMVIPZtE,51191
|
|
14
14
|
helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
|
|
15
|
-
helm/benchmark/run_spec_factory.py,sha256=
|
|
15
|
+
helm/benchmark/run_spec_factory.py,sha256=hp29n_Stb7RMwRm2jrP_qpyzxi8X8ojdqXTFN3KRSiY,6978
|
|
16
16
|
helm/benchmark/runner.py,sha256=zlHDJ2Ys5-HxtXcwpkXcrdfXy_i886fBcq1iNeLyC3Q,14669
|
|
17
17
|
helm/benchmark/runner_config_registry.py,sha256=2gW5wBLkHdYb2WNbZulto06hTcto2ROvjy8HULw3jNM,515
|
|
18
18
|
helm/benchmark/server.py,sha256=ysd5MT1TDu65NH-OzIGf9wmZlr8FHNRwoy2ybjSc5Yk,6140
|
|
@@ -22,7 +22,7 @@ helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5
|
|
|
22
22
|
helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
|
|
23
23
|
helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
|
|
24
24
|
helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
helm/benchmark/adaptation/adapter_spec.py,sha256=
|
|
25
|
+
helm/benchmark/adaptation/adapter_spec.py,sha256=K5BwqTe2iimjswdw_SONlJo0xt-T-o5KH7VqxrPaov0,5072
|
|
26
26
|
helm/benchmark/adaptation/common_adapter_specs.py,sha256=-ILsVxWjpEE6an1ncrRRrLkdP5ky_-2GN1TxSxJo38M,10449
|
|
27
27
|
helm/benchmark/adaptation/prompt.py,sha256=n0Ka3RGSWMr3CBnJrPNPy626x9TJE3k677wKbG8hO9A,2133
|
|
28
28
|
helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
|
|
@@ -49,8 +49,11 @@ helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_a
|
|
|
49
49
|
helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py,sha256=VjSqWiZEcW6K2jrokGUmky7syEOqJ6cbHImR7YZgwzU,10151
|
|
50
50
|
helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py,sha256=KKOOlna6SHLJHSPgfgguPQysc2Nf4kKrqumqwlG27bs,3542
|
|
51
51
|
helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
+
helm/benchmark/annotation/air_bench_annotator.py,sha256=9W3zLO2f4OzxGdavkDI2dDUStxpExa7sgrI-ATGG7NY,3048
|
|
52
53
|
helm/benchmark/annotation/annotator.py,sha256=2UIXY71S5dRaZBLb1v4lcv8-O6pyJ9zTeSJl78AEWGI,1538
|
|
53
|
-
helm/benchmark/annotation/annotator_factory.py,sha256=
|
|
54
|
+
helm/benchmark/annotation/annotator_factory.py,sha256=3Soh0V3lbsIR_HGHLg-XTc3eKVRj7SL9lLT_AoqUVTs,2997
|
|
55
|
+
helm/benchmark/annotation/live_qa_annotator.py,sha256=IlUV4K-ddbL1XsvIgBAfsLH0_bdKx8kyDev1G3Kwyek,4364
|
|
56
|
+
helm/benchmark/annotation/medication_qa_annotator.py,sha256=7LRmx2a1JODP5puAM0IH0HFTextfeLOzK7ef4sw9XIU,4129
|
|
54
57
|
helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
|
|
55
58
|
helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
|
|
56
59
|
helm/benchmark/annotation/image2structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -78,7 +81,7 @@ helm/benchmark/augmentations/space_perturbation.py,sha256=g4rbyoureBaOVf_lrRXIWY
|
|
|
78
81
|
helm/benchmark/augmentations/suffix_perturbation.py,sha256=P3AfJj_ajTVdjO7AJRQ9dKS-cT1PyRSt8Un57iZQDVc,785
|
|
79
82
|
helm/benchmark/augmentations/synonym_perturbation.py,sha256=komOV5M342_8unopnwN6gkPWpJIZXidywiu6PO9_riU,4151
|
|
80
83
|
helm/benchmark/augmentations/test_perturbation.py,sha256=4EooKVcyub70I81trzpNx3Ij-m1vpFa5cFIo6O52icE,13185
|
|
81
|
-
helm/benchmark/augmentations/translate_perturbation.py,sha256=
|
|
84
|
+
helm/benchmark/augmentations/translate_perturbation.py,sha256=vMXCYXGVSo8E78IAzH9HI4p2pvyLzcvO77BnvR2QB0k,1097
|
|
82
85
|
helm/benchmark/augmentations/typos_perturbation.py,sha256=_F9zwvrLie8hX7mzUtQmYq6oq6yqaFiKGsvc9LAuBr4,2798
|
|
83
86
|
helm/benchmark/data_overlap/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
87
|
helm/benchmark/data_overlap/data_overlap_spec.py,sha256=aj_l1l0qxUbUMrSWr70-Sb1j_JN-7WYop5BXPG_xj44,1998
|
|
@@ -88,6 +91,7 @@ helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8
|
|
|
88
91
|
helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
|
|
89
92
|
helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
|
|
90
93
|
helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
94
|
+
helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
|
|
91
95
|
helm/benchmark/metrics/basic_metrics.py,sha256=7hk5PZL7d09uG1y7wHBhY_ox8hlXw-n7Yt_FDv_AIKw,20375
|
|
92
96
|
helm/benchmark/metrics/bbq_metrics.py,sha256=Dqccr7GdfKNs1S_1QSB75d8AY7moovEPAqvacGfrCAE,6157
|
|
93
97
|
helm/benchmark/metrics/bias_metrics.py,sha256=GQ4CwOk1Sa9g-LcJCxcoQLD1vWY2Hvujck9l-9qsmf4,11418
|
|
@@ -109,16 +113,23 @@ helm/benchmark/metrics/dry_run_metrics.py,sha256=d8RgltW4nGTH1tZeGOIlQRwRaJLIxL6
|
|
|
109
113
|
helm/benchmark/metrics/efficiency_metrics.py,sha256=v8Eg56HHIWEMQruODKBvwdUfR6ZLGgrNifo-senCaUo,11786
|
|
110
114
|
helm/benchmark/metrics/evaluate_instances_metric.py,sha256=EBUf0ONnNoi7pcxYab7RD0B_JqGksqDX8TOaosSmJk8,2847
|
|
111
115
|
helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=vUJavaLVfbWtrwyrIA81npK_1iirhko7_zMF1kL7Gfw,15559
|
|
112
|
-
helm/benchmark/metrics/
|
|
116
|
+
helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
|
|
117
|
+
helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
|
|
118
|
+
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py,sha256=1m7IWy9vu66svnmdBRjZQI-2YsGYzH2vXZMptlRGM0Y,5654
|
|
119
|
+
helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=QJxGzyERQv_vMn3PM9fy3IxfBgSg0BjcOf_mv574lGA,9786
|
|
113
120
|
helm/benchmark/metrics/language_modeling_metrics.py,sha256=ofqwj1PMJQu16QhLDULXBmZ5iFz91ducwLRpNsRYELE,4510
|
|
121
|
+
helm/benchmark/metrics/live_qa_metrics.py,sha256=f2XFmQaohjQNqYqNg8NcDVavCzyP4cd8Cl8rLArn9EM,816
|
|
114
122
|
helm/benchmark/metrics/machine_translation_metrics.py,sha256=bp_EDXyxntIty5gORDa7va-C73quOzoTc5o8MpxFmL4,3816
|
|
123
|
+
helm/benchmark/metrics/medication_qa_metrics.py,sha256=Z939iAc0A5xn_GdnCtfiefhUZK9qk6jZjtde2-F7IH8,840
|
|
115
124
|
helm/benchmark/metrics/metric.py,sha256=dPq7ZMB0w-LgJKMzWYDJtfn-oYD4oG4jJX0yiUEziJM,14245
|
|
116
125
|
helm/benchmark/metrics/metric_name.py,sha256=POhgmUqqIWh_LjCbYpiKkzGqqChBLeW3FADy9u_FcWw,1354
|
|
117
126
|
helm/benchmark/metrics/metric_service.py,sha256=mlX_MEFSYNzME6GFS3El_VVOvzPYnOMosKI0XIxygP4,1802
|
|
118
127
|
helm/benchmark/metrics/numeracy_metrics.py,sha256=panMWD3a1NPerg3Ix7l6NhR7jGOIQOQV9i_KysBeDA8,2818
|
|
119
128
|
helm/benchmark/metrics/paraphrase_generation_metrics.py,sha256=-VkAknRhAEBmC_lpz_1aeXU8OppL8KfEPtIYCJkHTmw,1981
|
|
129
|
+
helm/benchmark/metrics/prometheus_vision_critique_metrics.py,sha256=pexBbEFF3-bzWoPWNFuVs-3fm7XJw2EC4xgiSb3gSa4,8508
|
|
120
130
|
helm/benchmark/metrics/ranking_metrics.py,sha256=5hDRapsxx_cmo-ag_80kOQnrgZn3lfVsLZVtWxuxH-s,17391
|
|
121
131
|
helm/benchmark/metrics/reference_metric.py,sha256=RlIM_PFTEkBo0_EEMq8d4_BSagNSBR_XyovMtjDeqqU,6026
|
|
132
|
+
helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
|
|
122
133
|
helm/benchmark/metrics/statistic.py,sha256=FuxNxMtAfiCkOxBS9KHlhEyxe61e0YXt2emvsufgPZQ,3424
|
|
123
134
|
helm/benchmark/metrics/summarization_critique_metrics.py,sha256=Lf7PDuce62HDzyofsyxaOvH0QvzcaS-vJvDWtIs8xKk,4694
|
|
124
135
|
helm/benchmark/metrics/summarization_metrics.py,sha256=laLMGRDy1wjcFvgSWXvzOZwBXshkmPr0S2Ofu79Z01Q,16461
|
|
@@ -131,7 +142,7 @@ helm/benchmark/metrics/test_numeracy_metrics.py,sha256=ls1ZIHDePKpHMoqAbf4HmJ1SI
|
|
|
131
142
|
helm/benchmark/metrics/test_statistic.py,sha256=AejuYLSeUwEOqpEMRKZFjnxu4HKUraeExU8TPmZEqW4,1229
|
|
132
143
|
helm/benchmark/metrics/toxicity_metrics.py,sha256=6MCpHuCXbXZqWwvO57ifKYHnHWBzszN9cZjwgPQQF2Y,4027
|
|
133
144
|
helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
|
|
134
|
-
helm/benchmark/metrics/unitxt_metrics.py,sha256=
|
|
145
|
+
helm/benchmark/metrics/unitxt_metrics.py,sha256=2F9T4iQV0_BbDMCWrZrd9sc30XHYv8MR4xSBd_dD3eI,4053
|
|
135
146
|
helm/benchmark/metrics/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
136
147
|
helm/benchmark/metrics/image_generation/aesthetics_metrics.py,sha256=AXQjWBd9zBZOoCF8vQV9FjUy33teC0IF7pdbq-XiHjM,2101
|
|
137
148
|
helm/benchmark/metrics/image_generation/aesthetics_scorer.py,sha256=ISdThDKMrx-SHQe69dCcr8qUrMCa_GsxX3BeZnd0WPA,2538
|
|
@@ -180,32 +191,37 @@ helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=l9UQZ0aAI
|
|
|
180
191
|
helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=_wJ3E3LbJB9XPLixTH82BYQbp32o3oij6Sz3lsZL30E,2648
|
|
181
192
|
helm/benchmark/metrics/tokens/token_cost_estimator.py,sha256=fTGUfhHV6yMwpTkCEMTGMxKO8jskqJz4sAtwXT6M_C8,425
|
|
182
193
|
helm/benchmark/metrics/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
|
-
helm/benchmark/metrics/vision_language/emd_utils.py,sha256=
|
|
184
|
-
helm/benchmark/metrics/vision_language/image_metrics.py,sha256=
|
|
194
|
+
helm/benchmark/metrics/vision_language/emd_utils.py,sha256=KdZdcqu3eo016FdAjAm_83v92-wWuR90EPsTogfTcok,15196
|
|
195
|
+
helm/benchmark/metrics/vision_language/image_metrics.py,sha256=HyXeZiDszSV1Q99ScqeS_xYvyrp1dlWBYahfxt42N3E,23554
|
|
185
196
|
helm/benchmark/metrics/vision_language/image_utils.py,sha256=XeYF3E6MnYyPJ5hYp4TtiTP27-y4S8LTBH5bZVcvJFg,3758
|
|
186
197
|
helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
198
|
helm/benchmark/presentation/contamination.py,sha256=PiIdcaD3-xfExjOmyL5q4Ao2ASa-OlScJAB9u1Zxe7o,2811
|
|
188
199
|
helm/benchmark/presentation/create_plots.py,sha256=2-ZOuEdRwqqF1biRmzWggMZjmODoxOQOBoz9GT7tVww,28737
|
|
189
200
|
helm/benchmark/presentation/run_display.py,sha256=tC1DciLvDTQJog4BDo8StWDdX7DbBkhrG2sX_SwXSPQ,11838
|
|
190
201
|
helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
|
|
191
|
-
helm/benchmark/presentation/schema.py,sha256=
|
|
202
|
+
helm/benchmark/presentation/schema.py,sha256=fPw-794HbacZR5z1SmYGUqYgqXbZ8-BrcexWV4h6vgc,10809
|
|
192
203
|
helm/benchmark/presentation/summarize.py,sha256=2fJ9BYOJRxe9eBylLUK3qcZZwAwRtJF_C8plEQlAPEU,67266
|
|
193
204
|
helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
|
|
194
205
|
helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
|
|
195
206
|
helm/benchmark/presentation/test_create_plots.py,sha256=5PPPegMTdBZurxyyUxI4rN13AVsjV3eQrwFqlobJ8UA,1286
|
|
196
207
|
helm/benchmark/presentation/test_run_entry.py,sha256=OM-027j2A0Lx-ai2zBprOxSqzZhS_dh0OKw3ThocZW0,751
|
|
208
|
+
helm/benchmark/presentation/test_schema.py,sha256=6mq6CeAOLW2Kxi1lX_ZW8QCVqVR73XImR8ylcRGFkBE,378
|
|
197
209
|
helm/benchmark/presentation/test_summarize.py,sha256=UfSp33Q9xvuGnPYfFmLJdH5y7KWp9qbZprRMyx8LGP0,1618
|
|
198
210
|
helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
199
|
-
helm/benchmark/run_specs/
|
|
211
|
+
helm/benchmark/run_specs/air_bench_run_specs.py,sha256=VdXis1HN8_KLrMHDCVi0J7WdqjRjAGbZMhrsnpzC-Kg,1604
|
|
212
|
+
helm/benchmark/run_specs/classic_run_specs.py,sha256=Cn0z-6QY-ehbLaHJMvCwjw11DFBQgUyqVCaXwTVFyJ8,58331
|
|
200
213
|
helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
|
|
201
|
-
helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=
|
|
214
|
+
helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=fDyIxmOdgLLWVtwBfxcnd3nFnBZNFpJHbcM4Kyq5gZA,14315
|
|
215
|
+
helm/benchmark/run_specs/experimental_run_specs.py,sha256=7aF-Ox8iBC2obfJkyKwobJaCjk1SqxtSDuRv_RxA3Eo,1310
|
|
216
|
+
helm/benchmark/run_specs/finance_run_specs.py,sha256=7DCmeBQpETQjK0fvUKS1nDIbM_wxTXb2GhXcjzIDyIE,1181
|
|
202
217
|
helm/benchmark/run_specs/heim_run_specs.py,sha256=Pt1eVbzvwZ5EXq8WB2b3XYw62SWYN_i1P_H3oE4i8KY,22096
|
|
203
218
|
helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
|
|
204
219
|
helm/benchmark/run_specs/lite_run_specs.py,sha256=ViCPJ86Aah8301GTEk6z4_MtP0g8iik33t4GudobhWQ,11113
|
|
205
220
|
helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
|
|
206
221
|
helm/benchmark/run_specs/unitxt_run_specs.py,sha256=ejp_knrcIjf0J4WiKj9LTgDTcUr29-XFZYHYz0w_dkM,1518
|
|
207
|
-
helm/benchmark/run_specs/vlm_run_specs.py,sha256=
|
|
222
|
+
helm/benchmark/run_specs/vlm_run_specs.py,sha256=uwnk9DHZKQj8nnC14kGiSN8xKiZfpigoz5S86TiHc4k,31118
|
|
208
223
|
helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
224
|
+
helm/benchmark/scenarios/air_bench_scenario.py,sha256=WUZvsUTqlsjNzQsd2baZZIgO30B4Zf3g0QjsyEaGmLc,1772
|
|
209
225
|
helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=Wyt7J5BAvAqC5JTqCW4fh7ex9-itX11P_9rLTocqvtk,4973
|
|
210
226
|
helm/benchmark/scenarios/babi_qa_scenario.py,sha256=S1tPQY2x1I3hQL1JQ6wvUwvKyiSe7SqpRSW6N3_T0mo,5043
|
|
211
227
|
helm/benchmark/scenarios/bbq_scenario.py,sha256=lT1XKSM-PXYtENI-ryScC4yb1TtII7YoH8kt_S1dZQo,9579
|
|
@@ -213,6 +229,7 @@ helm/benchmark/scenarios/big_bench_scenario.py,sha256=bSk8Ia4u_6OqMjiyadpYQAWN-8
|
|
|
213
229
|
helm/benchmark/scenarios/blimp_scenario.py,sha256=o1MDcHT14KFDET4K9otx8pDiIgXrhsD19pvO0mR2ADU,6260
|
|
214
230
|
helm/benchmark/scenarios/bold_scenario.py,sha256=NEfECMVzlVP_yo6sOuIzj6vZ5jd72_nvtEQ1lWrq85Q,4106
|
|
215
231
|
helm/benchmark/scenarios/boolq_scenario.py,sha256=rvSp5SwXMCVzBo5BFxfhj1Xv06_ksqKrtTQR7nPiS-o,8013
|
|
232
|
+
helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
|
|
216
233
|
helm/benchmark/scenarios/civil_comments_scenario.py,sha256=VO5G-cQ9qctmBN0O76uSewnO_mFslMo5mbR2ZTrjuds,4851
|
|
217
234
|
helm/benchmark/scenarios/cleva_scenario.py,sha256=xhwZ616iz0CN3fYIfrXHcV1XlcRQjyPSzML8fq8D3l4,57939
|
|
218
235
|
helm/benchmark/scenarios/code_scenario.py,sha256=s4AGW8eBY0gFnu6EXvVWL0xbFYO28N9sgP1V8eBO7EI,12171
|
|
@@ -233,9 +250,10 @@ helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=AI8HX
|
|
|
233
250
|
helm/benchmark/scenarios/dialogue_scenarios.py,sha256=-I7FY6q1b11zpFd1_oAgar5qlfaFcXsNCKGVln9etPI,5629
|
|
234
251
|
helm/benchmark/scenarios/disinformation_scenario.py,sha256=kQi0MVVoSDhx2vOTnUaCIttPXMf8zz7Eld2FD_77tnA,8504
|
|
235
252
|
helm/benchmark/scenarios/dyck_language_scenario.py,sha256=vMxND9wPJenrGlCLhSw5UxOw3TV2Jq8cTmIXGpzEWaA,9318
|
|
236
|
-
helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=
|
|
253
|
+
helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4cv7u2lmUFcigkAX_eMwIn49Pa3p-aHClkT-r-0roLU,6616
|
|
237
254
|
helm/benchmark/scenarios/entity_matching_scenario.py,sha256=YjBX61TlL3CDQ3X6D-JyR-qlOYGLdoRXJxl9AEeqxYs,7022
|
|
238
255
|
helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
|
|
256
|
+
helm/benchmark/scenarios/fin_qa_scenario.py,sha256=pXUeJ34KiRSlEjYERgXqVSbr7zxvdXnOuMSpXvnUw5I,5782
|
|
239
257
|
helm/benchmark/scenarios/grammar.py,sha256=Pb9vEP_0Ki87UdQCj1ym7QWJ24M4DRP6TXB5d3GnhLs,5597
|
|
240
258
|
helm/benchmark/scenarios/grammar_scenario.py,sha256=bl-Cm9caDs077zSu38mzaS9maZ2gM-QazgjOEMFvxYg,1454
|
|
241
259
|
helm/benchmark/scenarios/gsm_scenario.py,sha256=9fV2SEw3ocKNAD-TrDZZTpq4l7mbttQQWbO0YNz4e6k,2613
|
|
@@ -279,6 +297,7 @@ helm/benchmark/scenarios/summarization_scenario.py,sha256=MlNMgsY369DC04nhMUdG2o
|
|
|
279
297
|
helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=pzifpsJJbucmTjujNqQnwQa4Y7wpQjkS6QjNXOrgTAQ,3096
|
|
280
298
|
helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=1b3e3WpFMNBV3li17-0Ug6QCSKO4qRFaWDF23bYNsvQ,16326
|
|
281
299
|
helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=k8IGK6VABOr6wuha4HynP47peoAkmIViAVhScOtCANo,8345
|
|
300
|
+
helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
|
|
282
301
|
helm/benchmark/scenarios/test_grammar.py,sha256=sPlA36sHpThbXgnGlXyOuqHfDPe2epIafmzIeL0nkoU,1364
|
|
283
302
|
helm/benchmark/scenarios/test_math_scenario.py,sha256=s3-CllgCB8DL9-L4DmJ6Zcf9xi803nWYN84KlhN7PhM,1016
|
|
284
303
|
helm/benchmark/scenarios/test_scenario.py,sha256=HexTZBKphMDJbhIYj-HRCDwltPTDqHFHdT7FjPmu8Xs,2070
|
|
@@ -314,7 +333,7 @@ helm/benchmark/scenarios/image_generation/time_most_significant_historical_figur
|
|
|
314
333
|
helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
|
|
315
334
|
helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
316
335
|
helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=zXR0LmXsD2tv_ovJsbY_HP53kdiFOvty7Y_Ai3ZCrT4,3037
|
|
317
|
-
helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=
|
|
336
|
+
helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=jwGEouY30Yy5U9lRUbv0XAO98gUJ669g0dhdDCGQ-8w,4097
|
|
318
337
|
helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=82qplX4gJ4GsSVhBjwrsVU46TAHh-jym3F_M5A-odRE,4608
|
|
319
338
|
helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=3pBAQgOsnSyMCzt60s1m8Kf_fEJ4C7XgCDbtXatTlX0,2599
|
|
320
339
|
helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=sBQfqAxmP-Z0ifCgwTbP11aPsKA4vogcWBqSDiKlbE4,3512
|
|
@@ -329,19 +348,20 @@ helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py,sha256=HU
|
|
|
329
348
|
helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py,sha256=c7YfclYMDtygsLnEfA8oP6Vl7evdrqqTZazmuD9Oy-8,5353
|
|
330
349
|
helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py,sha256=HuizbYsN5Nlihfzu4bfGuC8KSBbeIc6TVknMS4kpVJY,7149
|
|
331
350
|
helm/benchmark/scenarios/vision_language/originality_scenario.py,sha256=1inr-klQEz08CM2GWqbYdy-AuXQmMhOAywAlA0lJHik,1029
|
|
332
|
-
helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=
|
|
351
|
+
helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=wVcTNUql4TBClgm7oyLq5cmybsnlurc0MblqRRxXRyc,9929
|
|
333
352
|
helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=uFkzMMsjhmuSYo3v_QdfJFX6RFse83JjzMfMa3ynvV4,3975
|
|
334
353
|
helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=5MwGb9BOyB2Xy70BGYZcjencf0ZskxBuzcPa7ABRuww,5106
|
|
335
|
-
helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=
|
|
354
|
+
helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=bH5FfAgwyzpVMPOJKNCmOgpX-lvJF-B42uVi4m1mY-I,4231
|
|
355
|
+
helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=2foCM7ik9RvYahauKIoNAxkGiluOYuT0w0r7FZi-MQo,3621
|
|
336
356
|
helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=hJ3sOSpPnOCwLtpVnfasI_X89oofI-2PBRjMnx8eiVA,4139
|
|
337
357
|
helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=2hY-qngKC69ZL9SHNei3IK3C2PvJDWvwLFVQ8yNSOVs,5196
|
|
338
358
|
helm/benchmark/scenarios/vision_language/image2structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
339
359
|
helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py,sha256=ImhfiC_y_hihAGvlj9zRsaoW614QFCBopBD2KxnbSs0,1805
|
|
340
|
-
helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py,sha256
|
|
341
|
-
helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py,sha256=
|
|
342
|
-
helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py,sha256=
|
|
360
|
+
helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
|
|
361
|
+
helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py,sha256=RSLYpw3BsIIxkhS-6RfVM_UhjmwJDMoA3JQl3FBjv7I,1147
|
|
362
|
+
helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py,sha256=_pgW_aNaM3E7MTl_tNExupvENdtAH3DvZuSwZIiopCg,837
|
|
343
363
|
helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py,sha256=ovg8-FfJ8_I1xbajFGSLvERZIA1fQjaUn0zd04ZbI84,15316
|
|
344
|
-
helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py,sha256=
|
|
364
|
+
helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py,sha256=dOt-gif-4Z0JekI2KAel4KS1zyvzqyqoFLP3xoe5DKY,9710
|
|
345
365
|
helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
346
366
|
helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py,sha256=i-i0mlG5oRRDNYNqP7o7Ul56iL02p_anJoThXaSvFiM,2826
|
|
347
367
|
helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
|
|
@@ -355,14 +375,19 @@ helm/benchmark/static/index.html,sha256=xIJGjMg0qn9eemfdBiNbTI0jzPfBD5x0v8HJF-dM
|
|
|
355
375
|
helm/benchmark/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
|
|
356
376
|
helm/benchmark/static/json-urls.js,sha256=AaULgfHw8OLfrQLJpBHfcC013uavQnlNNFS9vzb0qOg,1981
|
|
357
377
|
helm/benchmark/static/plot-captions.js,sha256=bTR8gYx-QqF_RJyKX-L-eQP7hSEtawfJSoADCvgjKag,3011
|
|
358
|
-
helm/benchmark/static/
|
|
359
|
-
helm/benchmark/static/
|
|
360
|
-
helm/benchmark/static/
|
|
361
|
-
helm/benchmark/static/
|
|
362
|
-
helm/benchmark/static/
|
|
363
|
-
helm/benchmark/static/
|
|
364
|
-
helm/benchmark/static/
|
|
365
|
-
helm/benchmark/static/
|
|
378
|
+
helm/benchmark/static/schema_air_bench.yaml,sha256=ePZAGL4X-yH4cAQvzS5uU44duCKwdDrMwDSvCC9y7-k,139384
|
|
379
|
+
helm/benchmark/static/schema_classic.yaml,sha256=sK3yVQCrk3Tn3Kmg9WITBmJZI7AKVjmIY0f3zgH_t0c,104611
|
|
380
|
+
helm/benchmark/static/schema_finance.yaml,sha256=vZG0EssYr_BVZmyV4sZmRaeLFSX2ycjni8O_L_kGzzc,5283
|
|
381
|
+
helm/benchmark/static/schema_image2structure.yaml,sha256=IV57vHTaZakH6EupIlT6PRjK8aI14OSNFYUAHD9QBxo,15593
|
|
382
|
+
helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
|
|
383
|
+
helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
|
|
384
|
+
helm/benchmark/static/schema_medical.yaml,sha256=hDk4834FKn-5cMr6pHcu1P60sh6cXJ2J0Z1ADIj2MSc,8455
|
|
385
|
+
helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
|
|
386
|
+
helm/benchmark/static/schema_tables.yaml,sha256=i4ylaq5yZoIEUvxPS8dniPQWKHZF5bz3hMgjNbzC_MM,7064
|
|
387
|
+
helm/benchmark/static/schema_thai.yaml,sha256=25-PjBhZMHM89M01XxLQWNg0mdQnfo4H0XInF9ZzDow,7900
|
|
388
|
+
helm/benchmark/static/schema_unitxt.yaml,sha256=9FQhoueYNNYQ2xMuJ2KHzpg_9-_ZhZ9efk6jtTQ3tlc,11855
|
|
389
|
+
helm/benchmark/static/schema_vhelm.yaml,sha256=IZ1oAmEjnoWQ6YtMpnwZ2IQkXx86bJS1j3686mvtAGc,29476
|
|
390
|
+
helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
|
|
366
391
|
helm/benchmark/static/utils.js,sha256=bgN0PT53Dregc-nLmEmAEmg2psufWpS8jTf74WoypHw,7681
|
|
367
392
|
helm/benchmark/static/images/crfm-logo.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
|
|
368
393
|
helm/benchmark/static/images/helm-logo-simple.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
|
|
@@ -384,9 +409,10 @@ helm/benchmark/static/images/organizations/together.png,sha256=pmWjW4r7GnlKqFhKL
|
|
|
384
409
|
helm/benchmark/static/images/organizations/tsinghua-keg.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
|
|
385
410
|
helm/benchmark/static/images/organizations/yandex.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
|
|
386
411
|
helm/benchmark/static_build/config.js,sha256=ER8utDIqVZi9uge7Qrk1gmlT88TOOkFF9xYp3j10m8U,165
|
|
387
|
-
helm/benchmark/static_build/index.html,sha256=
|
|
412
|
+
helm/benchmark/static_build/index.html,sha256=J0TrGE5-kOkopr-iSRHvvCzDL00w8Si-8OaIt9vSX0M,1149
|
|
388
413
|
helm/benchmark/static_build/assets/01-694cb9b7.png,sha256=aUy5t0DYCg4r52HDOmeNi1S2CHsnv3mE7ySokJg3Ouo,8903
|
|
389
414
|
helm/benchmark/static_build/assets/ai21-0eb91ec3.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
|
|
415
|
+
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
|
|
390
416
|
helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png,sha256=fOEANHS8RymKaCzUWn9gQWebts2ghSmtW9Fdda_TjR8,7224
|
|
391
417
|
helm/benchmark/static_build/assets/anthropic-70d8bc39.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
|
|
392
418
|
helm/benchmark/static_build/assets/bigscience-7f0400c0.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
|
|
@@ -397,13 +423,15 @@ helm/benchmark/static_build/assets/google-06d997ad.png,sha256=BtmXrVQZHr3WH5c8c2
|
|
|
397
423
|
helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
|
|
398
424
|
helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
|
|
399
425
|
helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
|
|
400
|
-
helm/benchmark/static_build/assets/index-
|
|
401
|
-
helm/benchmark/static_build/assets/index-
|
|
426
|
+
helm/benchmark/static_build/assets/index-30dbceba.js,sha256=WXT0A-yH9f-3wCwQ3rwKWTCIOOpjETQwOQyZt2OMAwc,77064
|
|
427
|
+
helm/benchmark/static_build/assets/index-66b02d40.css,sha256=ZrAtQOMv7vRJwOA9urNRk_rs8hJljom_xhn-wI89g08,486795
|
|
402
428
|
helm/benchmark/static_build/assets/meta-5580e9f1.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
|
|
403
429
|
helm/benchmark/static_build/assets/microsoft-f5ee5016.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
|
|
404
430
|
helm/benchmark/static_build/assets/mistral-18e1be23.png,sha256=GOG-Ix7XlctGOUmvJfO2oVSBM7E5O562G88OnoxsjBw,14402
|
|
405
431
|
helm/benchmark/static_build/assets/nvidia-86fa75c1.png,sha256=hvp1wZMwYxkfrVMvJs73PX71JwY5L8ZvxIH_fL4n6Po,27945
|
|
406
432
|
helm/benchmark/static_build/assets/openai-3f8653e4.png,sha256=P4ZT5ISIlt6Dl0mOp7juSM4Y7dfyRNPqdc0PJuwNoqg,16877
|
|
433
|
+
helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
|
|
434
|
+
helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
|
|
407
435
|
helm/benchmark/static_build/assets/react-d4a0b69b.js,sha256=rNTpl8Is3LkYXqJowRMc8vc4SXQwP94Ozy4DZZWwldU,275141
|
|
408
436
|
helm/benchmark/static_build/assets/recharts-6d337683.js,sha256=rDrVmtTCCSLY2hpcxSDxhlQ6CQmTTSQOESNeO3oVQgg,432466
|
|
409
437
|
helm/benchmark/static_build/assets/tii-24de195c.png,sha256=JN4ZXAa0rbR2IlxPfd_mKtntFZcYpDcXocSiqrC2rNg,63389
|
|
@@ -454,38 +482,40 @@ helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
454
482
|
helm/clients/ai21_client.py,sha256=LIdkmzcUDR9uIF2tIk5YgDNGNmfQ9JDYmgscvFoCHDs,5509
|
|
455
483
|
helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
|
|
456
484
|
helm/clients/aleph_alpha_client.py,sha256=koPqXF6uRD905atoiCaPg5yxr6B25J0g2OTWk8geebQ,4969
|
|
457
|
-
helm/clients/anthropic_client.py,sha256=
|
|
458
|
-
helm/clients/auto_client.py,sha256=
|
|
485
|
+
helm/clients/anthropic_client.py,sha256=wptP4u4NhQknoy7VQsWqVzn9tv3IrCuJ3vUMq6fiq0E,34909
|
|
486
|
+
helm/clients/auto_client.py,sha256=uK9EWQFWBt4DoV1oytm0dIeA3YpcfGi_H0rCRZSVE8c,11438
|
|
459
487
|
helm/clients/bedrock_client.py,sha256=BsH9UopsP6ZHf-K0Yzg1PYSMLDwY0yIUmPHDhJVMUi0,5293
|
|
460
488
|
helm/clients/bedrock_utils.py,sha256=okZ6Z8pviGOUNlrdF2QquAqFs8-QYgcqci95eij8giM,2574
|
|
461
|
-
helm/clients/client.py,sha256=
|
|
489
|
+
helm/clients/client.py,sha256=InjCQi62TWhWHmfyi-mC3fSAVztd-YDyfB3BkpacHXk,9002
|
|
462
490
|
helm/clients/clip_score_client.py,sha256=ct3GHZ2Zh3fGwyvQ9DyoIPT6PwDPI-nUaFkUFuc8PIE,1622
|
|
463
|
-
helm/clients/cohere_client.py,sha256=
|
|
491
|
+
helm/clients/cohere_client.py,sha256=PtVrDdm_-dXBiWzu_dfwiJPt5GLGw3wdN-Qw3u8ugtU,10976
|
|
464
492
|
helm/clients/cohere_utils.py,sha256=aYmj60m0e9RF9BIdxp1vmA-uZv17TEALw0dbgTUSpCc,504
|
|
465
493
|
helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,3231
|
|
466
494
|
helm/clients/google_client.py,sha256=EOpPzK5_9yzWkMjK-4ILiixDF3aeOa8AbR2SPnEO-nw,2900
|
|
467
495
|
helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
|
|
468
496
|
helm/clients/http_model_client.py,sha256=DBgkVDZPmg99DCcO_1Xdf6nFQo2kyxLkgoQpwC-wkHI,2806
|
|
469
|
-
helm/clients/huggingface_client.py,sha256=
|
|
497
|
+
helm/clients/huggingface_client.py,sha256=xmdqOWoioqoYQjtBqJFN-K9Fm3oHEQrOEjyzDz4ZWBY,15847
|
|
470
498
|
helm/clients/lit_gpt_client.py,sha256=Sjec16bNODosEhDoBkRc4t-LNS-nCUY_jVivWj5zvfU,6205
|
|
471
499
|
helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
|
|
472
500
|
helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
|
|
473
501
|
helm/clients/mistral_client.py,sha256=thOLMcEfrzWR00JUabIZ_PnW2o9YZsdSmNf9z3jbYKo,5982
|
|
474
502
|
helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
|
|
475
503
|
helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
|
|
476
|
-
helm/clients/openai_client.py,sha256=
|
|
504
|
+
helm/clients/openai_client.py,sha256=tXxi9nZsxz2I4YQLrQrV-GhlgZ1Z9ifrUhC_3Aw5SPE,14238
|
|
477
505
|
helm/clients/palmyra_client.py,sha256=LBYFHNc5LdpPbiSp1AAHuMm8cUUCQ2EB03BB6XnDTYQ,6551
|
|
478
506
|
helm/clients/perspective_api_client.py,sha256=WQDArqlKVWwcK2SicnSIAgV6JGVHsxibTzkdezT3z_U,5920
|
|
507
|
+
helm/clients/reka_client.py,sha256=K8b9p7U6LLAy4PRjgYrUS06gF4G2xjhjRoMEO4XDe0o,8329
|
|
479
508
|
helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
|
|
480
509
|
helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
|
|
481
|
-
helm/clients/test_client.py,sha256=
|
|
482
|
-
helm/clients/test_huggingface_client.py,sha256=
|
|
510
|
+
helm/clients/test_client.py,sha256=V7Y56Ahqa8C2Kc2_W2QE0VfGbBEJzFmnic3LGHZkOqQ,3940
|
|
511
|
+
helm/clients/test_huggingface_client.py,sha256=x2NjMuIrinfUy0wQ1S6F5cYZVr09YfvN6LfhWmyGNAM,3388
|
|
483
512
|
helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
|
|
484
|
-
helm/clients/test_together_client.py,sha256=
|
|
485
|
-
helm/clients/together_client.py,sha256=
|
|
513
|
+
helm/clients/test_together_client.py,sha256=yYNrhU3kQjmHwhILuoP5QwUgbmkm2gg2NHiNycHjoeE,6145
|
|
514
|
+
helm/clients/together_client.py,sha256=rtYdx53ZE19ziJpBc7MYTeSHJjN3Ke51I3Uldg0IAbs,20595
|
|
486
515
|
helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
|
|
487
|
-
helm/clients/vertexai_client.py,sha256=
|
|
516
|
+
helm/clients/vertexai_client.py,sha256=K_vCanJU97o2P_WJOeLhUFJA8SdfJDlVNl7Mi1HuIrQ,21860
|
|
488
517
|
helm/clients/vllm_client.py,sha256=p9atBtq3PBOoPkOPSifkMrYZjNLnNM_sWM6tL_3N-WY,1675
|
|
518
|
+
helm/clients/yi_client.py,sha256=0t4WJ8MTLOpB1LCZ-P6UdYa-KbGB7hkDrBluSkioot0,835
|
|
489
519
|
helm/clients/clip_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
490
520
|
helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgjbZLmy9dRnBaLiWwk,695
|
|
491
521
|
helm/clients/clip_scorers/clip_scorer.py,sha256=waLI_rI6dQPjmtywvGeQKK7bGCWXyoIgIuBc8P3zSB8,1907
|
|
@@ -544,9 +574,11 @@ helm/clients/image_generation/mindalle/utils/sampling.py,sha256=soTHaJrN4FV1lDdh
|
|
|
544
574
|
helm/clients/image_generation/mindalle/utils/utils.py,sha256=ESugpzG-_73GKl07mj-8o-_nim_FOICxfYkczy3s9x4,3119
|
|
545
575
|
helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
546
576
|
helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=hTywh5nM95BmPoDyKOSDWg9G3-QwLO3KZEJZVkmFroo,6478
|
|
547
|
-
helm/clients/vision_language/huggingface_vlm_client.py,sha256=
|
|
577
|
+
helm/clients/vision_language/huggingface_vlm_client.py,sha256=H7AE8mm506PkEcUO8VaLVtptHTwVX58nZx1A_BWdKzA,4968
|
|
548
578
|
helm/clients/vision_language/idefics_client.py,sha256=hi1VCDBegHfBssmW0C62H3OX3U2ISVRhaSkd24gb1K4,7692
|
|
549
579
|
helm/clients/vision_language/open_flamingo_client.py,sha256=CkN0JCeR742ZG9Nc4A85hp4BSE0WLU-3Rs-ZwdmDkzs,6632
|
|
580
|
+
helm/clients/vision_language/paligemma_client.py,sha256=IU_T8r1RgpGkEAqabLKBbmoUOWV6c1a9_FXgiTy8exE,6835
|
|
581
|
+
helm/clients/vision_language/palmyra_vision_client.py,sha256=mY6vj918f-tbqhOmh7PCSEgnSpHzWY8UTqAdvYgXJ8Q,3757
|
|
550
582
|
helm/clients/vision_language/qwen_vlm_client.py,sha256=6rCH4gJMDyQHyjAE_GDIrLsInH_bvd6to-4RMWbRLeM,7407
|
|
551
583
|
helm/clients/vision_language/open_flamingo/__init__.py,sha256=i1tGJj6ckeE6eS1EWV5tbQKYLmPCrdSI45mPchfv_Ic,88
|
|
552
584
|
helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -563,13 +595,13 @@ helm/common/clip_score_request.py,sha256=WnNg89owDCmG7tyy8nnQL0RdKQLsUdMWiYH9Xqq
|
|
|
563
595
|
helm/common/codec.py,sha256=gTh6AwIQ0Bbul_QSnIO7eItwMZmYtnkIrG1jkc4GOL4,7100
|
|
564
596
|
helm/common/concurrency.py,sha256=8THtHlCtXo5c8iCuz_UcBBdzZX6aiEALLc4u0M4SYL0,856
|
|
565
597
|
helm/common/credentials_utils.py,sha256=O-57nUgkWLbZF0k3lsSaVGPPHj2_OYeVuCMe0to3bRE,1118
|
|
566
|
-
helm/common/critique_request.py,sha256=
|
|
598
|
+
helm/common/critique_request.py,sha256=yo4aRe-DEjudUmydthtpTj6LdhRXfZ3JZptxTkWzZ3U,3068
|
|
567
599
|
helm/common/file_upload_request.py,sha256=OZeAW1_zsiNdXnWDwNNvhPs0b48TUmW_e4kzzCYmyiY,543
|
|
568
600
|
helm/common/general.py,sha256=nMfHNPXyAAorAMmgDClD8r8XXeJcvfF0QXTP-FgH5PQ,11690
|
|
569
601
|
helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
|
|
570
602
|
helm/common/hierarchical_logger.py,sha256=EnKLnfbQftca08EJfjGEQb4tcnCKbx-JtwLnoCnhMQs,2908
|
|
571
603
|
helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
|
|
572
|
-
helm/common/images_utils.py,sha256=
|
|
604
|
+
helm/common/images_utils.py,sha256=bsxgW9knrfa9NTa6V-O13_nDnflqrqHpnKlTRxul-aY,3187
|
|
573
605
|
helm/common/key_value_store.py,sha256=iHi1WQuWttLNJnuM48QNOAXHoneNbmbBmtXYPq-dyys,3147
|
|
574
606
|
helm/common/media_object.py,sha256=3VZqfb0py5dDKwWtnLp2kdl8svaike-Cn7Mjk-b0cvM,5130
|
|
575
607
|
helm/common/moderations_api_request.py,sha256=3xTsErSsCr2PHD2jpdV1JglHaYHwP2Yqu25_JFtfa68,2234
|
|
@@ -590,9 +622,9 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
|
|
|
590
622
|
helm/common/file_caches/local_file_cache.py,sha256=wBOAbbkGLiClaX4YdunokRfSQCKNkTYmMVx2KTLy4Lc,1921
|
|
591
623
|
helm/common/file_caches/test_local_file_cache.py,sha256=bOCWR9MglwQXV98xk8auyjgFxaOr85zRdxWwxMBQW9s,663
|
|
592
624
|
helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
593
|
-
helm/config/model_deployments.yaml,sha256=
|
|
594
|
-
helm/config/model_metadata.yaml,sha256=
|
|
595
|
-
helm/config/tokenizer_configs.yaml,sha256=
|
|
625
|
+
helm/config/model_deployments.yaml,sha256=x4j3LMGHTV3jObKK0dT5SOtKJvReWOHyyjs6jV2D2L0,89739
|
|
626
|
+
helm/config/model_metadata.yaml,sha256=M7EsOSnf4tcrSlNYBT50SiC6mReXfZ1q5rt7_OpdzpU,138011
|
|
627
|
+
helm/config/tokenizer_configs.yaml,sha256=lBGPsRPRPeqlN_j194hEVP8HAMC6J5NLrIZpN95Y8ug,15078
|
|
596
628
|
helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
597
629
|
helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
|
|
598
630
|
helm/proxy/cli.py,sha256=l8F7UYqrIOoBD9ZCIxJFA4fhxlzhae0-2Nn8A7FMkzk,8244
|
|
@@ -608,12 +640,12 @@ helm/proxy/critique/mechanical_turk_critique_client.py,sha256=OcppmFOMweBSfVTiLI
|
|
|
608
640
|
helm/proxy/critique/mechanical_turk_critique_exporter.py,sha256=taULrc_cIP0O9c5UpGz3l9DmWQadTVzN_v-qzTgMoyo,8470
|
|
609
641
|
helm/proxy/critique/mechanical_turk_critique_importer.py,sha256=NL97joO5pRkcICRdVyG4kf9JhfYRaySsxRoZ7KWDYv0,5581
|
|
610
642
|
helm/proxy/critique/mechanical_turk_utils.py,sha256=mKpUv4zz3s5ptzDY7UrwuI7Cr5HmNgSjPC10BnN9AL4,1766
|
|
611
|
-
helm/proxy/critique/model_critique_client.py,sha256=
|
|
643
|
+
helm/proxy/critique/model_critique_client.py,sha256=QMFiMpALXnneumKbJpXOZDEb3lPPdkIaSCasmdXHB8o,12806
|
|
612
644
|
helm/proxy/critique/scale_critique_client.py,sha256=B4povtceyfal95eE3N7em9cC_B5Vy4jMrHXcsXc_5m4,15889
|
|
613
645
|
helm/proxy/critique/surge_ai_critique_client.py,sha256=HnzgAoF4Du9Me0GS_lbNaozZslS4a2OZx735gh-coo0,8357
|
|
614
646
|
helm/proxy/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
615
647
|
helm/proxy/services/remote_service.py,sha256=emYN0qWOJLQ7q1n06V4TwlvXaqylQcUxmqDcGZXqPJ8,9097
|
|
616
|
-
helm/proxy/services/server_service.py,sha256=
|
|
648
|
+
helm/proxy/services/server_service.py,sha256=U-1g0VMjCY9bBK8BecbUxVzSx7hyC_rpwSNm67bqmCg,11534
|
|
617
649
|
helm/proxy/services/service.py,sha256=Be-Z5F6AN4vMzsJr3BS6tJ9NHHy_dc_yn2Ex9cm0ChU,6193
|
|
618
650
|
helm/proxy/services/test_remote_service.py,sha256=NFnLjg3QNHoDKdK0DlcrtylwlKXx1vdzheNZRrLEv7c,6605
|
|
619
651
|
helm/proxy/services/test_service.py,sha256=FUZoI8pGiUg5adgB1wTJ869QOgFYjPtM6yf6FGMdE64,8968
|
|
@@ -625,16 +657,17 @@ helm/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
625
657
|
helm/tokenizers/ai21_tokenizer.py,sha256=oXImuAY9kMohHH6Zm7BWysfT88b00NBoSELeGQ920y4,2255
|
|
626
658
|
helm/tokenizers/aleph_alpha_tokenizer.py,sha256=UlWC_SjObBvexpZ3OfKZT2yjhbSsHlKjQe_oWuRrXno,3818
|
|
627
659
|
helm/tokenizers/anthropic_tokenizer.py,sha256=d-HO9OEFkhYzFZu0VkOsHjxbqqSUseCNX0KQqgb3s2Q,2114
|
|
628
|
-
helm/tokenizers/auto_tokenizer.py,sha256=
|
|
660
|
+
helm/tokenizers/auto_tokenizer.py,sha256=Of-T-CFOhLAjjU45T1hnrEPG_k_hzPufuDE7FRAcSN8,4251
|
|
629
661
|
helm/tokenizers/caching_tokenizer.py,sha256=kSegrCFotRevSDgJsn0g52dWiSUCNa7_EZpRNrELeUE,8163
|
|
630
|
-
helm/tokenizers/cohere_tokenizer.py,sha256
|
|
662
|
+
helm/tokenizers/cohere_tokenizer.py,sha256=6rahykq1SxqS8vCWOzYo_oeUoVwhg_zOfWFIkQxP6GY,5632
|
|
631
663
|
helm/tokenizers/http_model_tokenizer.py,sha256=wBTtDA2UdEYspffa1wqgkT3y3YHoyLXXoucnJ5PGjhs,3109
|
|
632
|
-
helm/tokenizers/huggingface_tokenizer.py,sha256=
|
|
664
|
+
helm/tokenizers/huggingface_tokenizer.py,sha256=_XXx8uApENK7-o81qxEn0SOeJL_L2UpiiuteSYiODpE,8734
|
|
633
665
|
helm/tokenizers/ice_tokenizer.py,sha256=4ZTIRpmt2cqwcxnmrDpCRhiJ0BI3ELE-GHoBuHWgrDA,1200
|
|
634
666
|
helm/tokenizers/lit_gpt_tokenizer.py,sha256=LMrpaje64UmnDKoYjPG_RQeXVA4xQUwW5t48IJIeLaQ,1660
|
|
635
667
|
helm/tokenizers/simple_tokenizer.py,sha256=6_NROqVbygs-HRA7bYAZluN4YB5gUhVaRsYQeRTjA1E,1147
|
|
636
668
|
helm/tokenizers/test_anthropic_tokenizer.py,sha256=_wzXp9FVR2Ml0s2A79TTXbSPHyTRp28i9tiEyQ9S6Ko,3792
|
|
637
|
-
helm/tokenizers/
|
|
669
|
+
helm/tokenizers/test_cohere_tokenizer.py,sha256=15z2GJtZ-VlrliC2_Fk5DIZhQYFkJS7J73fjxYMf8YM,1431
|
|
670
|
+
helm/tokenizers/test_huggingface_tokenizer.py,sha256=8tFyZQb4DLg6MdKg13a66bLbp0yf4Ar1fGWM_sYeSjg,6309
|
|
638
671
|
helm/tokenizers/test_ice_tokenizer.py,sha256=-xi_f8TBSkAYr5CcA56HDq7rZ9HAGd99J7twNfkLzFU,2619
|
|
639
672
|
helm/tokenizers/test_simple_tokenizer.py,sha256=vUNdcnJqZV99-E8H1rwUH85AQPJ2HTnDr5DrZ_-zRL4,1219
|
|
640
673
|
helm/tokenizers/test_yalm_tokenizer.py,sha256=qWpKnUuAlePd6t-UJB_mAiBwtAacnC8caKXLJ_GdTkk,2477
|
|
@@ -646,9 +679,9 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
|
|
|
646
679
|
helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=W9p5QNn1GSm-y85yVEQe_82zn5CVK_vR6jvhk7JTs_k,869
|
|
647
680
|
helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
|
|
648
681
|
helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
|
|
649
|
-
crfm_helm-0.5.
|
|
650
|
-
crfm_helm-0.5.
|
|
651
|
-
crfm_helm-0.5.
|
|
652
|
-
crfm_helm-0.5.
|
|
653
|
-
crfm_helm-0.5.
|
|
654
|
-
crfm_helm-0.5.
|
|
682
|
+
crfm_helm-0.5.2.dist-info/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
|
|
683
|
+
crfm_helm-0.5.2.dist-info/METADATA,sha256=g-tT_a7wm7L7iaNCQVwNIrpUnVHK8PKfbXjel0KyhmQ,19591
|
|
684
|
+
crfm_helm-0.5.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
685
|
+
crfm_helm-0.5.2.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
|
|
686
|
+
crfm_helm-0.5.2.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
|
|
687
|
+
crfm_helm-0.5.2.dist-info/RECORD,,
|
|
@@ -39,90 +39,91 @@ class AdapterSpec:
|
|
|
39
39
|
Note that an `Instance` could produce many `Request`s (e.g., one for each `Reference`).
|
|
40
40
|
"""
|
|
41
41
|
|
|
42
|
-
# Method of adaptation
|
|
43
42
|
method: str = ""
|
|
43
|
+
"""The high-level strategy for converting instances into a prompt for the language model."""
|
|
44
44
|
|
|
45
|
-
# Prepend all prompts with this string.
|
|
46
|
-
# For example, it is recommended to prefix all prompts with [NLG] for UL2.
|
|
47
45
|
global_prefix: str = ""
|
|
46
|
+
"""The string that is prepended to the entire prompt."""
|
|
48
47
|
|
|
49
|
-
# Append all prompts with this string.
|
|
50
48
|
global_suffix: str = ""
|
|
49
|
+
"""The string that is appended to the entire prompt."""
|
|
51
50
|
|
|
52
|
-
# Prompt starts with instructions
|
|
53
51
|
instructions: str = ""
|
|
52
|
+
"""The description of the task that is included at the very beginning of the prompt."""
|
|
54
53
|
|
|
55
|
-
# What goes before the input
|
|
56
54
|
input_prefix: str = "Input: "
|
|
55
|
+
"""The string that is included before each input (e.g., 'Question:')."""
|
|
57
56
|
|
|
58
|
-
# What goes after the input
|
|
59
57
|
input_suffix: str = "\n"
|
|
58
|
+
"""The string that is included after each input (e.g., '\\n')."""
|
|
60
59
|
|
|
61
|
-
# What goes before the input (for multiple choice)
|
|
62
60
|
reference_prefix: str = "A. "
|
|
61
|
+
"""The string that is included before each reference (for multiple-choice questions)."""
|
|
63
62
|
|
|
64
|
-
# What goes before the input (for multiple choice)
|
|
65
63
|
reference_suffix: str = "\n"
|
|
64
|
+
"""The string that is included after each reference (for multiple-choice questions)."""
|
|
66
65
|
|
|
67
|
-
# What goes before the output
|
|
68
66
|
output_prefix: str = "Output: "
|
|
67
|
+
"""The string that is included before the correct answer/predicted output (e.g., 'Answer:')."""
|
|
69
68
|
|
|
70
|
-
# What goes after the output
|
|
71
69
|
output_suffix: str = "\n"
|
|
70
|
+
"""The string that is included after the correct answer/predicted output (e.g., '\\n')."""
|
|
72
71
|
|
|
73
|
-
# What goes between instruction and in-context example blocks in the constructed prompt
|
|
74
72
|
instance_prefix: str = "\n"
|
|
73
|
+
"""The string that is included before each instance (e.g., '\\n\\n')."""
|
|
75
74
|
|
|
76
|
-
# List of regular expression substitutions that we perform
|
|
77
75
|
substitutions: List[Substitution] = field(default_factory=list, hash=False)
|
|
76
|
+
"""A list of regular expression substitutions (e.g., replacing '\\n' with ';\\n')
|
|
77
|
+
to perform at the very end on the prompt."""
|
|
78
78
|
|
|
79
|
-
# Maximum number of (in-context) training instances to put into the prompt
|
|
80
79
|
max_train_instances: int = 5
|
|
80
|
+
"""Maximum number of training instances to include in the prompt (currently by randomly sampling)."""
|
|
81
81
|
|
|
82
|
-
# Maximum number of evaluation instances. For getting valid numbers, this
|
|
83
|
-
# should be the entire dataset; only reduce this for piloting.
|
|
84
82
|
max_eval_instances: Optional[int] = None
|
|
83
|
+
"""Maximum number of instances to evaluate on (over all splits - test, valid, etc.)."""
|
|
85
84
|
|
|
86
|
-
# Generate this many outputs (which could be realized by `num_completions`
|
|
87
|
-
# or `top_k_per_token`).
|
|
88
85
|
num_outputs: int = 5
|
|
86
|
+
"""Maximum number of possible outputs to generate by sampling multiple outputs."""
|
|
89
87
|
|
|
90
|
-
# Number of trials, where in each trial we choose an independent, random
|
|
91
|
-
# set of training instances. Used to compute error bars.
|
|
92
88
|
num_train_trials: int = 1
|
|
89
|
+
"""Number of trials, where in each trial we choose an independent, random set of training instances.
|
|
90
|
+
Used to compute variance."""
|
|
93
91
|
|
|
94
|
-
# Number of trials, where we query the model with the same requests, but different random seeds
|
|
95
92
|
num_trials: int = 1
|
|
93
|
+
"""Number of trials, where we query the model with the same requests, but different random seeds."""
|
|
96
94
|
|
|
97
|
-
# If true, randomly sample N training examples; if false, select N consecutive training examples
|
|
98
95
|
sample_train: bool = True
|
|
96
|
+
"""If true, randomly sample N training examples; if false, select N consecutive training examples"""
|
|
99
97
|
|
|
100
98
|
# Decoding parameters (inherited by `Request`)
|
|
101
99
|
|
|
102
|
-
# Model deployment to make the request to (need to fill in)
|
|
103
100
|
model_deployment: str = ""
|
|
101
|
+
"""Name of the language model deployment (<host_organization>/<model name>) to send requests to."""
|
|
104
102
|
|
|
105
|
-
# Model to make the request to
|
|
106
103
|
model: str = ""
|
|
104
|
+
"""Name of the language model (<creator_organization>/<model name>) to send requests to."""
|
|
107
105
|
|
|
108
|
-
# Temperature to use
|
|
109
106
|
temperature: float = 1
|
|
107
|
+
"""Temperature parameter used in generation."""
|
|
110
108
|
|
|
111
|
-
# Maximum number of tokens to generate
|
|
112
109
|
max_tokens: int = 100
|
|
110
|
+
"""Maximum number of tokens to generate."""
|
|
113
111
|
|
|
114
|
-
#
|
|
112
|
+
# Set hash=False to make `AdapterSpec` hashable
|
|
115
113
|
stop_sequences: List[str] = field(default_factory=list, hash=False)
|
|
114
|
+
"""List of stop sequences. Output generation will be stopped if any stop sequence is encountered."""
|
|
116
115
|
|
|
117
116
|
# Random string (used concretely to bypass cache / see diverse results)
|
|
118
117
|
random: Optional[str] = None
|
|
118
|
+
"""Random seed (string), which guarantees reproducibility."""
|
|
119
119
|
|
|
120
|
-
# If true, for instances with multiple correct reference, the gold answer should be considered
|
|
121
|
-
# to be all the correct references rather than any of the correct references.
|
|
122
120
|
multi_label: bool = False
|
|
121
|
+
"""If true, for instances with multiple correct reference, the gold answer should be considered to be all
|
|
122
|
+
of the correct references rather than any of the correct references."""
|
|
123
123
|
|
|
124
|
-
# Parameters for image generation
|
|
125
124
|
image_generation_parameters: Optional[ImageGenerationParameters] = None
|
|
125
|
+
"""Parameters for image generation."""
|
|
126
126
|
|
|
127
|
-
#
|
|
127
|
+
# Set hash=False to make `AdapterSpec` hashable
|
|
128
128
|
eval_splits: Optional[List[str]] = field(default=None, hash=False)
|
|
129
|
+
"""The splits from which evaluation instances will be drawn."""
|