crfm-helm 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +29 -55
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +146 -134
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
- helm/benchmark/adaptation/common_adapter_specs.py +2 -0
- helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
- helm/benchmark/annotation/call_center_annotator.py +247 -0
- helm/benchmark/annotation/financebench_annotator.py +79 -0
- helm/benchmark/annotation/harm_bench_annotator.py +68 -0
- helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
- helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
- helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
- helm/benchmark/annotation/live_qa_annotator.py +32 -45
- helm/benchmark/annotation/medication_qa_annotator.py +31 -44
- helm/benchmark/annotation/model_as_judge.py +45 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
- helm/benchmark/annotation/xstest_annotator.py +110 -0
- helm/benchmark/metrics/annotation_metrics.py +108 -0
- helm/benchmark/metrics/bhasa_metrics.py +188 -0
- helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
- helm/benchmark/metrics/code_metrics_helper.py +11 -1
- helm/benchmark/metrics/safety_metrics.py +57 -0
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
- helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
- helm/benchmark/metrics/vision_language/image_utils.py +1 -1
- helm/benchmark/model_metadata_registry.py +3 -3
- helm/benchmark/presentation/test_run_entry.py +1 -0
- helm/benchmark/run.py +15 -0
- helm/benchmark/run_expander.py +56 -30
- helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
- helm/benchmark/run_specs/call_center_run_specs.py +152 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
- helm/benchmark/run_specs/experimental_run_specs.py +52 -0
- helm/benchmark/run_specs/finance_run_specs.py +78 -1
- helm/benchmark/run_specs/safety_run_specs.py +154 -0
- helm/benchmark/run_specs/vlm_run_specs.py +92 -21
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
- helm/benchmark/scenarios/banking77_scenario.py +51 -0
- helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
- helm/benchmark/scenarios/call_center_scenario.py +84 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
- helm/benchmark/scenarios/ewok_scenario.py +116 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
- helm/benchmark/scenarios/financebench_scenario.py +53 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
- helm/benchmark/scenarios/scenario.py +1 -1
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
- helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
- helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
- helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
- helm/benchmark/scenarios/test_math_scenario.py +2 -8
- helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
- helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
- helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
- helm/benchmark/scenarios/xstest_scenario.py +35 -0
- helm/benchmark/server.py +1 -6
- helm/benchmark/static/schema_air_bench.yaml +750 -750
- helm/benchmark/static/schema_bhasa.yaml +709 -0
- helm/benchmark/static/schema_call_center.yaml +232 -0
- helm/benchmark/static/schema_cleva.yaml +768 -0
- helm/benchmark/static/schema_decodingtrust.yaml +444 -0
- helm/benchmark/static/schema_ewok.yaml +367 -0
- helm/benchmark/static/schema_finance.yaml +55 -9
- helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
- helm/benchmark/static/schema_safety.yaml +247 -0
- helm/benchmark/static/schema_tables.yaml +124 -7
- helm/benchmark/static/schema_thai.yaml +21 -0
- helm/benchmark/static/schema_vhelm.yaml +96 -91
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
- helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/benchmark/window_services/test_openai_window_service.py +8 -8
- helm/clients/ai21_client.py +71 -1
- helm/clients/anthropic_client.py +7 -19
- helm/clients/huggingface_client.py +38 -37
- helm/clients/nvidia_nim_client.py +35 -0
- helm/clients/openai_client.py +2 -3
- helm/clients/palmyra_client.py +25 -0
- helm/clients/perspective_api_client.py +11 -6
- helm/clients/test_client.py +4 -6
- helm/clients/vision_language/open_flamingo_client.py +1 -2
- helm/clients/vision_language/palmyra_vision_client.py +28 -13
- helm/common/images_utils.py +6 -0
- helm/common/mongo_key_value_store.py +2 -1
- helm/common/request.py +16 -0
- helm/config/model_deployments.yaml +315 -332
- helm/config/model_metadata.yaml +384 -110
- helm/config/tokenizer_configs.yaml +116 -11
- helm/proxy/example_queries.py +14 -21
- helm/proxy/services/server_service.py +1 -2
- helm/proxy/token_counters/test_auto_token_counter.py +2 -2
- helm/tokenizers/ai21_tokenizer.py +51 -59
- helm/tokenizers/cohere_tokenizer.py +0 -75
- helm/tokenizers/huggingface_tokenizer.py +0 -1
- helm/tokenizers/test_ai21_tokenizer.py +48 -0
- helm/benchmark/static/benchmarking.css +0 -156
- helm/benchmark/static/benchmarking.js +0 -1705
- helm/benchmark/static/config.js +0 -3
- helm/benchmark/static/general.js +0 -122
- helm/benchmark/static/images/crfm-logo.png +0 -0
- helm/benchmark/static/images/helm-logo-simple.png +0 -0
- helm/benchmark/static/images/helm-logo.png +0 -0
- helm/benchmark/static/images/language-model-helm.png +0 -0
- helm/benchmark/static/images/organizations/ai21.png +0 -0
- helm/benchmark/static/images/organizations/anthropic.png +0 -0
- helm/benchmark/static/images/organizations/bigscience.png +0 -0
- helm/benchmark/static/images/organizations/cohere.png +0 -0
- helm/benchmark/static/images/organizations/eleutherai.png +0 -0
- helm/benchmark/static/images/organizations/google.png +0 -0
- helm/benchmark/static/images/organizations/meta.png +0 -0
- helm/benchmark/static/images/organizations/microsoft.png +0 -0
- helm/benchmark/static/images/organizations/nvidia.png +0 -0
- helm/benchmark/static/images/organizations/openai.png +0 -0
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
- helm/benchmark/static/images/organizations/yandex.png +0 -0
- helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
- helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
- helm/benchmark/static/index.html +0 -68
- helm/benchmark/static/info-icon.png +0 -0
- helm/benchmark/static/json-urls.js +0 -69
- helm/benchmark/static/plot-captions.js +0 -27
- helm/benchmark/static/utils.js +0 -285
- helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
- helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
- helm/benchmark/window_services/ai21_window_service.py +0 -247
- helm/benchmark/window_services/cohere_window_service.py +0 -101
- helm/benchmark/window_services/test_ai21_window_service.py +0 -163
- helm/benchmark/window_services/test_cohere_window_service.py +0 -75
- helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
- helm/benchmark/window_services/test_ice_window_service.py +0 -327
- helm/tokenizers/ice_tokenizer.py +0 -30
- helm/tokenizers/test_ice_tokenizer.py +0 -57
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
|
@@ -7,15 +7,15 @@ helm/benchmark/data_preprocessor.py,sha256=aNdM-o2t4qkLIQHiQeWUFg03DjjJ8HTBIphYC
|
|
|
7
7
|
helm/benchmark/executor.py,sha256=simd7SdJ7TciUpoq3D0uz_XUSCZj5KIWCIP57FYm4js,4906
|
|
8
8
|
helm/benchmark/huggingface_registration.py,sha256=unEBO21V8K3-Ya0xLqjO9H1oq7RmU-f1MYV0tCIbXzY,4578
|
|
9
9
|
helm/benchmark/model_deployment_registry.py,sha256=BjL0ghHgO7_Z5jZZ7kuSOj9saegI3BivaL-b699C0rc,9527
|
|
10
|
-
helm/benchmark/model_metadata_registry.py,sha256=
|
|
10
|
+
helm/benchmark/model_metadata_registry.py,sha256=m39FqNaGdxP4r7W7Vmq6r-gOLjYtn_5WmRNsGzci6d8,8283
|
|
11
11
|
helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
|
|
12
|
-
helm/benchmark/run.py,sha256=
|
|
13
|
-
helm/benchmark/run_expander.py,sha256=
|
|
12
|
+
helm/benchmark/run.py,sha256=cPJh1Rwit8E_Kjf8Te2D75cd19ag4WgS2YrHHu2Fc8Q,13997
|
|
13
|
+
helm/benchmark/run_expander.py,sha256=YOTYbewbHLi0N7_fM_86Nke4U0wPwdeXLv47_CCVjQw,52659
|
|
14
14
|
helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
|
|
15
15
|
helm/benchmark/run_spec_factory.py,sha256=hp29n_Stb7RMwRm2jrP_qpyzxi8X8ojdqXTFN3KRSiY,6978
|
|
16
16
|
helm/benchmark/runner.py,sha256=zlHDJ2Ys5-HxtXcwpkXcrdfXy_i886fBcq1iNeLyC3Q,14669
|
|
17
17
|
helm/benchmark/runner_config_registry.py,sha256=2gW5wBLkHdYb2WNbZulto06hTcto2ROvjy8HULw3jNM,515
|
|
18
|
-
helm/benchmark/server.py,sha256=
|
|
18
|
+
helm/benchmark/server.py,sha256=kaGpUzBwzprmTDiMcy8-sfT8KfVEOb0wWytWODsAQ94,5925
|
|
19
19
|
helm/benchmark/slurm_jobs.py,sha256=eNCAoaWDfT0Wk32ZJRIGo-x8kgjhDPnPB4Xrvw_eLB0,3225
|
|
20
20
|
helm/benchmark/slurm_runner.py,sha256=Tozimrjr2R6mlKHcmrGgxTy9ga-ArIW6AoAWtxqzw-M,16567
|
|
21
21
|
helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5drG-4Y4UhIM,2219
|
|
@@ -23,7 +23,7 @@ helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCv
|
|
|
23
23
|
helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
|
|
24
24
|
helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
helm/benchmark/adaptation/adapter_spec.py,sha256=K5BwqTe2iimjswdw_SONlJo0xt-T-o5KH7VqxrPaov0,5072
|
|
26
|
-
helm/benchmark/adaptation/common_adapter_specs.py,sha256
|
|
26
|
+
helm/benchmark/adaptation/common_adapter_specs.py,sha256=Er8aMbDi8RTBtGWjcI08E2mRDl5AoBzUaBT1EY38Nlw,10515
|
|
27
27
|
helm/benchmark/adaptation/prompt.py,sha256=n0Ka3RGSWMr3CBnJrPNPy626x9TJE3k677wKbG8hO9A,2133
|
|
28
28
|
helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
|
|
29
29
|
helm/benchmark/adaptation/scenario_state.py,sha256=mWEhgzk18SVoMEuj2pSnc_r9JrGAHLdOlteHJKUMA5k,1961
|
|
@@ -35,12 +35,12 @@ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=F7Aou6r9CZ1xEuAX
|
|
|
35
35
|
helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=BbcBEJjY8Cp58me9sUktd2p3dEVFL8ZJ7RFfus3hSYE,14997
|
|
36
36
|
helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=LhZHmciP8lAfu7T0p634GOPTHrJR7qRCRRIxPgVlW9E,14873
|
|
37
37
|
helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=VJ66MfIGQWJg0VXCV0MJEMwF9Jx1DeJ7RxsgYlOTx_4,1889
|
|
38
|
-
helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=
|
|
38
|
+
helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=S38Y_MjLRE86LS9RfB4qHmNy5x5n2KyYa4DtA63lees,4402
|
|
39
39
|
helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=aMe-y4iiyEnM7_bqIoddeZBsVLoDxMmjKY2eZKB6Y2Q,2156
|
|
40
40
|
helm/benchmark/adaptation/adapters/test_adapter.py,sha256=0-JrYnogZu4kENQG1eQMXHWnuSurCLRbkLpDuSnfRqs,745
|
|
41
|
-
helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=
|
|
42
|
-
helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=
|
|
43
|
-
helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=
|
|
41
|
+
helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=NyhVTvLznCVMB-DJeX2DRjWx91XmW3FBcrkm0RN-fJU,12766
|
|
42
|
+
helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=BCEhKRVEDKPHsLKhpnIv0krV37a8Eu78r8EtJxH_MXA,7980
|
|
43
|
+
helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=BoozcN0zPWwk6HKEPN0b61ieqwk5y8bwKvr9m8DR_2k,11874
|
|
44
44
|
helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=o7CGClyVWYOuJ4G56-whq5fTvCr7QIn51Mo6DTdvwg0,1881
|
|
46
46
|
helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=bvY8xT2ak_3WG4m2Z5bCM6FLImPIWG1qAn9H2ZNwNv0,6359
|
|
@@ -52,15 +52,22 @@ helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
52
52
|
helm/benchmark/annotation/air_bench_annotator.py,sha256=9W3zLO2f4OzxGdavkDI2dDUStxpExa7sgrI-ATGG7NY,3048
|
|
53
53
|
helm/benchmark/annotation/annotator.py,sha256=2UIXY71S5dRaZBLb1v4lcv8-O6pyJ9zTeSJl78AEWGI,1538
|
|
54
54
|
helm/benchmark/annotation/annotator_factory.py,sha256=3Soh0V3lbsIR_HGHLg-XTc3eKVRj7SL9lLT_AoqUVTs,2997
|
|
55
|
-
helm/benchmark/annotation/
|
|
56
|
-
helm/benchmark/annotation/
|
|
55
|
+
helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=kpnIrydou3THgEFealGZyGneVKxgK5wwQ4kiMbDzJH4,2974
|
|
56
|
+
helm/benchmark/annotation/call_center_annotator.py,sha256=3vHsgJD24PaR4rRTfLD3wvwvbslkQdDHLokggFxijhI,11233
|
|
57
|
+
helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
|
|
58
|
+
helm/benchmark/annotation/harm_bench_annotator.py,sha256=z8EX1F7chOf-sZ93aognaTMmOqQDgWEa4KO0LLSABjM,2853
|
|
59
|
+
helm/benchmark/annotation/live_qa_annotator.py,sha256=I8wfDt8-iLC_C77r7fBjn9jdoXatVc_pJ_2YEWv392M,3474
|
|
60
|
+
helm/benchmark/annotation/medication_qa_annotator.py,sha256=TWjB3BIbBR_jVvrp2kF0PJW2p1U4MoosrSJ-b4QTgXE,3223
|
|
61
|
+
helm/benchmark/annotation/model_as_judge.py,sha256=CffsM05JPZbtLY9xFi1qOuy1JY4Yp-qF_OWrd_YC0yE,1737
|
|
62
|
+
helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=ztqagaM2M0OPKSMCo112_regyr2rDE44zpb0_HESRZs,2699
|
|
57
63
|
helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
|
|
58
64
|
helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
|
|
59
|
-
helm/benchmark/annotation/
|
|
60
|
-
helm/benchmark/annotation/
|
|
61
|
-
helm/benchmark/annotation/
|
|
62
|
-
helm/benchmark/annotation/
|
|
63
|
-
helm/benchmark/annotation/
|
|
65
|
+
helm/benchmark/annotation/xstest_annotator.py,sha256=pW3Dgu77ZoS5hVoapn-FsK3KQOHGHiRLyaKpSqnMRLg,4149
|
|
66
|
+
helm/benchmark/annotation/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
+
helm/benchmark/annotation/image2struct/image_compiler_annotator.py,sha256=eJFm3iyBe_eEN5Yt0G2IpeA1xdKxRmyR4krsNd6eXoE,3524
|
|
68
|
+
helm/benchmark/annotation/image2struct/latex_compiler_annotator.py,sha256=drbxogMMGwGxgVFbhT7hxPGDh7uyhptlmEmeP1Gq2xM,2471
|
|
69
|
+
helm/benchmark/annotation/image2struct/lilypond_compiler_annotator.py,sha256=odIGciLX2oVq_O8_H15lWUZoSfVvY-jRb0ILjs7GCIg,4061
|
|
70
|
+
helm/benchmark/annotation/image2struct/webpage_compiler_annotator.py,sha256=w6RKv7Fz__j_abKXnsTn98kHPv9tWKipdLW3NVT55m8,6389
|
|
64
71
|
helm/benchmark/augmentations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
72
|
helm/benchmark/augmentations/cleva_perturbation.py,sha256=arUkY_luc274YEMZocOos9rpAZVbEFZphbMlobAxTy0,29208
|
|
66
73
|
helm/benchmark/augmentations/contraction_expansion_perturbation.py,sha256=yni1UR2fviN0Wig8MpOp0zzLn4H-gYocTjKTpxBwywg,4850
|
|
@@ -92,8 +99,11 @@ helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc
|
|
|
92
99
|
helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
|
|
93
100
|
helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
94
101
|
helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
|
|
102
|
+
helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
|
|
95
103
|
helm/benchmark/metrics/basic_metrics.py,sha256=7hk5PZL7d09uG1y7wHBhY_ox8hlXw-n7Yt_FDv_AIKw,20375
|
|
96
104
|
helm/benchmark/metrics/bbq_metrics.py,sha256=Dqccr7GdfKNs1S_1QSB75d8AY7moovEPAqvacGfrCAE,6157
|
|
105
|
+
helm/benchmark/metrics/bhasa_metrics.py,sha256=Nw5fdZrYedYUEVJXFFnGSdOBxJ4-99GELd699TBmcSg,6958
|
|
106
|
+
helm/benchmark/metrics/bhasa_metrics_specs.py,sha256=fwXd1fRoeizd4kVQfLZ9ny-PzHTe1ieFKsGesiPDef0,440
|
|
97
107
|
helm/benchmark/metrics/bias_metrics.py,sha256=GQ4CwOk1Sa9g-LcJCxcoQLD1vWY2Hvujck9l-9qsmf4,11418
|
|
98
108
|
helm/benchmark/metrics/bias_word_lists.py,sha256=mx5JjW3mHffXIqo4GcQN-zENUEttBqQnEjPTz3J3J_4,13909
|
|
99
109
|
helm/benchmark/metrics/classification_metrics.py,sha256=uB23jRFzkmtJgs1sTO5pPjdV_mOg35gWubjGS8pynLM,5654
|
|
@@ -101,7 +111,7 @@ helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIef
|
|
|
101
111
|
helm/benchmark/metrics/cleva_harms_metrics.py,sha256=c_x9MYg8WjM1yym1S374GKxH_lwP6wZOiXrknf0mJis,11077
|
|
102
112
|
helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
|
|
103
113
|
helm/benchmark/metrics/code_metrics.py,sha256=e0aqLcxBAdCc0qAqebzK40Ilv2Py6xZbosud5v169x8,5121
|
|
104
|
-
helm/benchmark/metrics/code_metrics_helper.py,sha256=
|
|
114
|
+
helm/benchmark/metrics/code_metrics_helper.py,sha256=UNai154RuhYRZM_YK-rveLct4Ui5iEBNPYmYdKq34Xs,22712
|
|
105
115
|
helm/benchmark/metrics/common_metric_specs.py,sha256=k_IW0A6BevAskS0_C6ZaP9XvIfrdLI974_NhC89rMoo,5846
|
|
106
116
|
helm/benchmark/metrics/copyright_metrics.py,sha256=X9j3YsfzWEoGpgPpIvCzm18-JggLAW5QFooifE1KqaM,7729
|
|
107
117
|
helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=TcyklpfcTMXrpJeaHQfxS9QQxe-gwmT-HD0g_DmIFLQ,3253
|
|
@@ -130,6 +140,7 @@ helm/benchmark/metrics/prometheus_vision_critique_metrics.py,sha256=pexBbEFF3-bz
|
|
|
130
140
|
helm/benchmark/metrics/ranking_metrics.py,sha256=5hDRapsxx_cmo-ag_80kOQnrgZn3lfVsLZVtWxuxH-s,17391
|
|
131
141
|
helm/benchmark/metrics/reference_metric.py,sha256=RlIM_PFTEkBo0_EEMq8d4_BSagNSBR_XyovMtjDeqqU,6026
|
|
132
142
|
helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
|
|
143
|
+
helm/benchmark/metrics/safety_metrics.py,sha256=SsVRJXduF4S6C3sOozkOS-0gwy-Ff0Pz9C69jnh3Y-A,2355
|
|
133
144
|
helm/benchmark/metrics/statistic.py,sha256=FuxNxMtAfiCkOxBS9KHlhEyxe61e0YXt2emvsufgPZQ,3424
|
|
134
145
|
helm/benchmark/metrics/summarization_critique_metrics.py,sha256=Lf7PDuce62HDzyofsyxaOvH0QvzcaS-vJvDWtIs8xKk,4694
|
|
135
146
|
helm/benchmark/metrics/summarization_metrics.py,sha256=laLMGRDy1wjcFvgSWXvzOZwBXshkmPr0S2Ofu79Z01Q,16461
|
|
@@ -178,7 +189,7 @@ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBS
|
|
|
178
189
|
helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=__f7NVsVQatDFn_2Bfx7ObiQ68kAMvyyClApaTxqx80,649
|
|
179
190
|
helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
|
|
180
191
|
helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
181
|
-
helm/benchmark/metrics/summac/model_summac.py,sha256=
|
|
192
|
+
helm/benchmark/metrics/summac/model_summac.py,sha256=PJ2lPa-JQPnM86N0T2rPcAviTNHmSV721PTnbL1eGnk,17460
|
|
182
193
|
helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
|
|
183
194
|
helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
184
195
|
helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=WeNP4yiM4TVrD9Kid-uVRmWIVDqETnBsMycZmIBiTZ0,665
|
|
@@ -187,13 +198,13 @@ helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py,sha256=5igmDhWu7H8-
|
|
|
187
198
|
helm/benchmark/metrics/tokens/free_token_cost_estimator.py,sha256=G_6UK6Js_NZ_eqY0ZQnrC9QJVMERGhV1f6v7xq2lM-Y,461
|
|
188
199
|
helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py,sha256=9zjtuxMbvfPBYuxOYMFEmNP8ZKFDVywrZ08n6nrjbA4,1520
|
|
189
200
|
helm/benchmark/metrics/tokens/openai_token_cost_estimator.py,sha256=7jgjcgmbcVfLA_nTOrWXKIF8TEXng_KnE6cSgsSXWmE,1398
|
|
190
|
-
helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=
|
|
191
|
-
helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=
|
|
201
|
+
helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=eVnCYhRq2LT7F4BXsiIDb1bkmhvoHLgDAdMR73Xz5p8,1071
|
|
202
|
+
helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=h5ggZCGpgCQUjfqS0JS4Bxmx7NBaT4w43pXAgbCEnw4,2628
|
|
192
203
|
helm/benchmark/metrics/tokens/token_cost_estimator.py,sha256=fTGUfhHV6yMwpTkCEMTGMxKO8jskqJz4sAtwXT6M_C8,425
|
|
193
204
|
helm/benchmark/metrics/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
194
205
|
helm/benchmark/metrics/vision_language/emd_utils.py,sha256=KdZdcqu3eo016FdAjAm_83v92-wWuR90EPsTogfTcok,15196
|
|
195
|
-
helm/benchmark/metrics/vision_language/image_metrics.py,sha256=
|
|
196
|
-
helm/benchmark/metrics/vision_language/image_utils.py,sha256=
|
|
206
|
+
helm/benchmark/metrics/vision_language/image_metrics.py,sha256=3fh7vR4J2arFXIT6hLBNdR18PKxQBLPBbVrHWv0hBeA,23551
|
|
207
|
+
helm/benchmark/metrics/vision_language/image_utils.py,sha256=4E0NYh09O6-5sGhAPo6KZqYaZfBpCtuYbD3vLt-wQzk,3755
|
|
197
208
|
helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
198
209
|
helm/benchmark/presentation/contamination.py,sha256=PiIdcaD3-xfExjOmyL5q4Ao2ASa-OlScJAB9u1Zxe7o,2811
|
|
199
210
|
helm/benchmark/presentation/create_plots.py,sha256=2-ZOuEdRwqqF1biRmzWggMZjmODoxOQOBoz9GT7tVww,28737
|
|
@@ -204,31 +215,38 @@ helm/benchmark/presentation/summarize.py,sha256=2fJ9BYOJRxe9eBylLUK3qcZZwAwRtJF_
|
|
|
204
215
|
helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
|
|
205
216
|
helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
|
|
206
217
|
helm/benchmark/presentation/test_create_plots.py,sha256=5PPPegMTdBZurxyyUxI4rN13AVsjV3eQrwFqlobJ8UA,1286
|
|
207
|
-
helm/benchmark/presentation/test_run_entry.py,sha256=
|
|
218
|
+
helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
|
|
208
219
|
helm/benchmark/presentation/test_schema.py,sha256=6mq6CeAOLW2Kxi1lX_ZW8QCVqVR73XImR8ylcRGFkBE,378
|
|
209
220
|
helm/benchmark/presentation/test_summarize.py,sha256=UfSp33Q9xvuGnPYfFmLJdH5y7KWp9qbZprRMyx8LGP0,1618
|
|
210
221
|
helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
211
222
|
helm/benchmark/run_specs/air_bench_run_specs.py,sha256=VdXis1HN8_KLrMHDCVi0J7WdqjRjAGbZMhrsnpzC-Kg,1604
|
|
223
|
+
helm/benchmark/run_specs/bhasa_run_specs.py,sha256=2m5dXJKP0ojdACgvSREiV25SB9T6IL9JeYHYjhL7xX4,23480
|
|
224
|
+
helm/benchmark/run_specs/call_center_run_specs.py,sha256=GX5P2tTj4YS037EEZ8so_mX9LlPWyfJ-pF8ICoErpio,5324
|
|
212
225
|
helm/benchmark/run_specs/classic_run_specs.py,sha256=Cn0z-6QY-ehbLaHJMvCwjw11DFBQgUyqVCaXwTVFyJ8,58331
|
|
213
226
|
helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
|
|
214
|
-
helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=
|
|
215
|
-
helm/benchmark/run_specs/experimental_run_specs.py,sha256=
|
|
216
|
-
helm/benchmark/run_specs/finance_run_specs.py,sha256=
|
|
227
|
+
helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
|
|
228
|
+
helm/benchmark/run_specs/experimental_run_specs.py,sha256=wduA6K3mpIRHmr8g3h0c5k7rUsKiPFOqJktdbbGxtoE,2950
|
|
229
|
+
helm/benchmark/run_specs/finance_run_specs.py,sha256=hCaB3uBSlTZbFztdsDqdxuAdYQM20S9m9rXYQITgL5M,4161
|
|
217
230
|
helm/benchmark/run_specs/heim_run_specs.py,sha256=Pt1eVbzvwZ5EXq8WB2b3XYw62SWYN_i1P_H3oE4i8KY,22096
|
|
218
231
|
helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
|
|
219
232
|
helm/benchmark/run_specs/lite_run_specs.py,sha256=ViCPJ86Aah8301GTEk6z4_MtP0g8iik33t4GudobhWQ,11113
|
|
233
|
+
helm/benchmark/run_specs/safety_run_specs.py,sha256=ZTvLbRBxHWMIKPapugNfXPStJRBHfiaiXUHgpWMBONY,5469
|
|
220
234
|
helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
|
|
221
235
|
helm/benchmark/run_specs/unitxt_run_specs.py,sha256=ejp_knrcIjf0J4WiKj9LTgDTcUr29-XFZYHYz0w_dkM,1518
|
|
222
|
-
helm/benchmark/run_specs/vlm_run_specs.py,sha256=
|
|
236
|
+
helm/benchmark/run_specs/vlm_run_specs.py,sha256=A-e3npwbqvUEHvC9iGta9N1zFCHfoP8C1_vWBVLf8ns,34134
|
|
223
237
|
helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
224
238
|
helm/benchmark/scenarios/air_bench_scenario.py,sha256=WUZvsUTqlsjNzQsd2baZZIgO30B4Zf3g0QjsyEaGmLc,1772
|
|
225
239
|
helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=Wyt7J5BAvAqC5JTqCW4fh7ex9-itX11P_9rLTocqvtk,4973
|
|
240
|
+
helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=Ic0ak_5vGHeNT5PFgOptl-Ns8nQuM5nKpiQlhB1H3X0,3158
|
|
226
241
|
helm/benchmark/scenarios/babi_qa_scenario.py,sha256=S1tPQY2x1I3hQL1JQ6wvUwvKyiSe7SqpRSW6N3_T0mo,5043
|
|
242
|
+
helm/benchmark/scenarios/banking77_scenario.py,sha256=pVA2LXB9uJ12GnjiEvjhRV-P8YNEjpFhyZr-J8MV2SA,1747
|
|
227
243
|
helm/benchmark/scenarios/bbq_scenario.py,sha256=lT1XKSM-PXYtENI-ryScC4yb1TtII7YoH8kt_S1dZQo,9579
|
|
244
|
+
helm/benchmark/scenarios/bhasa_scenario.py,sha256=N7SYVwUOLAD_WZtkIYoCnPuRb_nFbIege-5_j4yX6nQ,70915
|
|
228
245
|
helm/benchmark/scenarios/big_bench_scenario.py,sha256=bSk8Ia4u_6OqMjiyadpYQAWN-8GFWqvd3Ft3JiVGpi8,8081
|
|
229
246
|
helm/benchmark/scenarios/blimp_scenario.py,sha256=o1MDcHT14KFDET4K9otx8pDiIgXrhsD19pvO0mR2ADU,6260
|
|
230
247
|
helm/benchmark/scenarios/bold_scenario.py,sha256=NEfECMVzlVP_yo6sOuIzj6vZ5jd72_nvtEQ1lWrq85Q,4106
|
|
231
248
|
helm/benchmark/scenarios/boolq_scenario.py,sha256=rvSp5SwXMCVzBo5BFxfhj1Xv06_ksqKrtTQR7nPiS-o,8013
|
|
249
|
+
helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
|
|
232
250
|
helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
|
|
233
251
|
helm/benchmark/scenarios/civil_comments_scenario.py,sha256=VO5G-cQ9qctmBN0O76uSewnO_mFslMo5mbR2ZTrjuds,4851
|
|
234
252
|
helm/benchmark/scenarios/cleva_scenario.py,sha256=xhwZ616iz0CN3fYIfrXHcV1XlcRQjyPSzML8fq8D3l4,57939
|
|
@@ -245,7 +263,7 @@ helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=rAOZnFSxO3ENO
|
|
|
245
263
|
helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=qhzqW614WnsiyN7TiHUdZY_NpEdW_iMO0AMrLK8DmK0,14116
|
|
246
264
|
helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=RSigvRdqjeFTwFfXNmslz8zyAGSmLf6UtBDA4NrQBCo,8304
|
|
247
265
|
helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=goGmHtN7MYnAQIXhffZZhuuuMWN0gHNOXyI9_injiZM,20119
|
|
248
|
-
helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=
|
|
266
|
+
helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=Qkwhg1s5f2_5rnCoX4BxjQGKKGVRp2StIwONvBjJVqo,2909
|
|
249
267
|
helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=AI8HX16_Lw9MKqrck62q8IFLUU-P5hxaOEHcmTS4rdA,2928
|
|
250
268
|
helm/benchmark/scenarios/dialogue_scenarios.py,sha256=-I7FY6q1b11zpFd1_oAgar5qlfaFcXsNCKGVln9etPI,5629
|
|
251
269
|
helm/benchmark/scenarios/disinformation_scenario.py,sha256=kQi0MVVoSDhx2vOTnUaCIttPXMf8zz7Eld2FD_77tnA,8504
|
|
@@ -253,10 +271,13 @@ helm/benchmark/scenarios/dyck_language_scenario.py,sha256=vMxND9wPJenrGlCLhSw5Ux
|
|
|
253
271
|
helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4cv7u2lmUFcigkAX_eMwIn49Pa3p-aHClkT-r-0roLU,6616
|
|
254
272
|
helm/benchmark/scenarios/entity_matching_scenario.py,sha256=YjBX61TlL3CDQ3X6D-JyR-qlOYGLdoRXJxl9AEeqxYs,7022
|
|
255
273
|
helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
|
|
256
|
-
helm/benchmark/scenarios/
|
|
274
|
+
helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
|
|
275
|
+
helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
|
|
276
|
+
helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
|
|
257
277
|
helm/benchmark/scenarios/grammar.py,sha256=Pb9vEP_0Ki87UdQCj1ym7QWJ24M4DRP6TXB5d3GnhLs,5597
|
|
258
278
|
helm/benchmark/scenarios/grammar_scenario.py,sha256=bl-Cm9caDs077zSu38mzaS9maZ2gM-QazgjOEMFvxYg,1454
|
|
259
279
|
helm/benchmark/scenarios/gsm_scenario.py,sha256=9fV2SEw3ocKNAD-TrDZZTpq4l7mbttQQWbO0YNz4e6k,2613
|
|
280
|
+
helm/benchmark/scenarios/harm_bench_scenario.py,sha256=wzzia3HlfwALgRLFLABv3blxBh1ras-YtHk4iQ_EX30,2454
|
|
260
281
|
helm/benchmark/scenarios/ice_scenario.py,sha256=vvk11cFPGUhg_CcGh3wEfVsGzrvMFgkByN-xcF-OOjI,16473
|
|
261
282
|
helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
|
|
262
283
|
helm/benchmark/scenarios/imdb_scenario.py,sha256=X1k76AweFECCpYCXy8HuvjRbXbfmDfwK3SES_t_wkUs,6174
|
|
@@ -290,19 +311,28 @@ helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=zVL1gb3eVz-LbK2hfdnRR9ItaM
|
|
|
290
311
|
helm/benchmark/scenarios/quac_scenario.py,sha256=SRAhMp6TAsmTRq6VRONLl3SEayFIe23He_mBhzkZ7qM,6628
|
|
291
312
|
helm/benchmark/scenarios/raft_scenario.py,sha256=_5QhHS3opxxML7Rek6F-q5NVOf0M2UgbC6OTnQZ4C1U,4452
|
|
292
313
|
helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=GkgJo_13MWQQQTZbhlknvTR6ZrYr7NEn1WdMZrPs4y4,2400
|
|
293
|
-
helm/benchmark/scenarios/scenario.py,sha256=
|
|
314
|
+
helm/benchmark/scenarios/scenario.py,sha256=1HC8EjiZ-5k5AJhxtwRreLe3hBbTyZJWrs-Aa3Uq43Q,8229
|
|
294
315
|
helm/benchmark/scenarios/self_instruct_scenario.py,sha256=jZ2MksT4N_4g_sp5egw7ycrsM-Ya786_RFmiYYdMvG8,2285
|
|
316
|
+
helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=grYOqccYBtB4m-_UUV20EOXsY6tkukwC6kwPOBAmdnY,1223
|
|
295
317
|
helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
|
|
296
318
|
helm/benchmark/scenarios/summarization_scenario.py,sha256=MlNMgsY369DC04nhMUdG2o9Ydi6yze1fGOjC0bK-UwQ,6847
|
|
297
319
|
helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=pzifpsJJbucmTjujNqQnwQa4Y7wpQjkS6QjNXOrgTAQ,3096
|
|
298
320
|
helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=1b3e3WpFMNBV3li17-0Ug6QCSKO4qRFaWDF23bYNsvQ,16326
|
|
299
321
|
helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=k8IGK6VABOr6wuha4HynP47peoAkmIViAVhScOtCANo,8345
|
|
300
322
|
helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
|
|
323
|
+
helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
|
|
324
|
+
helm/benchmark/scenarios/test_ewok_scenario.py,sha256=9piplj3i53_-xNSMkIN47JYEU3JB65WgEPT7qdyK4Ng,953
|
|
325
|
+
helm/benchmark/scenarios/test_financebench_scenario.py,sha256=EFZLJXXBoyjlTiMQFaQ6MiYkve1lfQDjQWjn4BjqgAQ,1184
|
|
301
326
|
helm/benchmark/scenarios/test_grammar.py,sha256=sPlA36sHpThbXgnGlXyOuqHfDPe2epIafmzIeL0nkoU,1364
|
|
302
|
-
helm/benchmark/scenarios/
|
|
327
|
+
helm/benchmark/scenarios/test_gsm_scenario.py,sha256=I-Sl8Sg8kmFd7u0zZbwbNmeFV1mQLuOHoQ1cQDDwovs,1123
|
|
328
|
+
helm/benchmark/scenarios/test_legalbench_scenario.py,sha256=FqbgwBAhHWyTIUYSzI5FOnTDx0A3u1o2ANKa_6bfA4g,1212
|
|
329
|
+
helm/benchmark/scenarios/test_math_scenario.py,sha256=ieI8-c6yx-3U3iaEz2yiCGSwnQTBJE_06-dMKX7a8Vk,723
|
|
330
|
+
helm/benchmark/scenarios/test_med_qa_scenario.py,sha256=Ekp6r5eYPkCxV3FCzVvLemKxlhENhelqdO0Mdhg5yFo,1515
|
|
331
|
+
helm/benchmark/scenarios/test_mmlu_scenario.py,sha256=mxEsTydKUOt8OD1Ei82nPgUFV1Tlvu5Z6drEMToEURM,1593
|
|
332
|
+
helm/benchmark/scenarios/test_narrativeqa_scenario.py,sha256=Rac_OrUpd2ruT95YvSrmoVz2Jpycgq3Roiyogm_0aAc,6420
|
|
303
333
|
helm/benchmark/scenarios/test_scenario.py,sha256=HexTZBKphMDJbhIYj-HRCDwltPTDqHFHdT7FjPmu8Xs,2070
|
|
304
334
|
helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsHzMZSzOe8j0FrUQmMyZM4,1736
|
|
305
|
-
helm/benchmark/scenarios/thai_exam_scenario.py,sha256=
|
|
335
|
+
helm/benchmark/scenarios/thai_exam_scenario.py,sha256=5Q-KL6fVrk2FKApVyY2ulreFduwBaUG0iJOsJ8M6El8,6008
|
|
306
336
|
helm/benchmark/scenarios/the_pile_scenario.py,sha256=RqU8yXQJ4FkmEc6rO9J3QMXenyUZrsEZlLAQUx4-Wnk,4995
|
|
307
337
|
helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=iqL-tuqUQZjF9-DKAnI6wV-oLGC_I3aFuofdtJYHt8E,6035
|
|
308
338
|
helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=CRlPxVfkg3HPZV-lUMyCUSFOiAqg5IIPt-dq3qR9LU4,2096
|
|
@@ -312,6 +342,7 @@ helm/benchmark/scenarios/vicuna_scenario.py,sha256=zLwLuEr6n9VQjVxQwgFIM-os23kJe
|
|
|
312
342
|
helm/benchmark/scenarios/wikifact_scenario.py,sha256=cOVKgDisBdjPcmVMCLhTekdgX3hpDJUT-aKbvRSaMoM,5791
|
|
313
343
|
helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=PAPkmZdC4aIBQ1k29dDvTFBEFaPV1ZR1Ifif4FHoZqs,3087
|
|
314
344
|
helm/benchmark/scenarios/wmt_14_scenario.py,sha256=NArkTZntYdYlegHo_-fkzeyCUOjosOONQKlquPbZRxY,4498
|
|
345
|
+
helm/benchmark/scenarios/xstest_scenario.py,sha256=wpagohfuFE1juuXjq0dleSIHr5Uk6hnClIv-wABbzEI,1285
|
|
315
346
|
helm/benchmark/scenarios/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
316
347
|
helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py,sha256=c8zcoGCOFqBGE4TAEx1uLsUmGXw_jIS8alI99ubGeDA,5477
|
|
317
348
|
helm/benchmark/scenarios/image_generation/cub200_scenario.py,sha256=7p3G4mJRc8QHR4Mw2GLsfAFuJcEe6OeZbezVhbyc55E,4103
|
|
@@ -332,99 +363,84 @@ helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,s
|
|
|
332
363
|
helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
|
|
333
364
|
helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
|
|
334
365
|
helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
335
|
-
helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=
|
|
336
|
-
helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=
|
|
337
|
-
helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=
|
|
338
|
-
helm/benchmark/scenarios/vision_language/
|
|
339
|
-
helm/benchmark/scenarios/vision_language/
|
|
340
|
-
helm/benchmark/scenarios/vision_language/
|
|
366
|
+
helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
|
|
367
|
+
helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
|
|
368
|
+
helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=lfRHjhhXCo0YeDQe4_gfSHCzVKtqQVZ6DALLABcCmtI,4637
|
|
369
|
+
helm/benchmark/scenarios/vision_language/exams_v_scenario.py,sha256=pLD--gtL5q7jLSWQ8iwAdsiOrTJ_rBsLbwWMWKRhPbs,3853
|
|
370
|
+
helm/benchmark/scenarios/vision_language/fair_face_scenario.py,sha256=V6_1Kl2nWDRyHvwnKcSxkP0DChzKDBW0i_-t9oAxps0,4721
|
|
371
|
+
helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=CDutFh1PHLyeMdJ9HojzYKE1zJidL9ktcsfn9uHNLZY,2612
|
|
372
|
+
helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=k4E6JAN8a_KT1jjV2Ch3K5YhWKJ0f-9iCXLO-_2Xl8M,3535
|
|
373
|
+
helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=qiLLdiSzhnSyjmqCAvMxjhcZ7yBiX37L1cdsZvHL4ds,3845
|
|
341
374
|
helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py,sha256=7GK_jAOfCgRIGiN_GInDePwuT2wZqmWHp1rqdx18xQg,4994
|
|
342
|
-
helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=
|
|
343
|
-
helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=
|
|
344
|
-
helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=
|
|
345
|
-
helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=
|
|
346
|
-
helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=
|
|
375
|
+
helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=HnzA0L1Mm9rw9uyK-hnCGrxo33z_U_86TLnlELjDV6E,4738
|
|
376
|
+
helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=7ZHpRD7TdQQ-Mp5XQV5yyiLUE0k1KpgbLSYKLBJMxs0,4343
|
|
377
|
+
helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=cM7eTE4bpcIzLyEDye86Ud3rD4Id-0ju73EXjg0DYoI,4340
|
|
378
|
+
helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=7Aa3y0TWGZH3QrPDiqIMkj83LU2Klrzgcb46jv5uytY,5498
|
|
379
|
+
helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=deDMdg2-ORZPV623ngncDPlRn6z6cq_QbQtMu-z0Ydo,7665
|
|
347
380
|
helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py,sha256=HUO09uM2rBXOfCsxzwovmwtihq53xjuzDOtQO_S3J4I,4161
|
|
348
381
|
helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py,sha256=c7YfclYMDtygsLnEfA8oP6Vl7evdrqqTZazmuD9Oy-8,5353
|
|
349
382
|
helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py,sha256=HuizbYsN5Nlihfzu4bfGuC8KSBbeIc6TVknMS4kpVJY,7149
|
|
350
383
|
helm/benchmark/scenarios/vision_language/originality_scenario.py,sha256=1inr-klQEz08CM2GWqbYdy-AuXQmMhOAywAlA0lJHik,1029
|
|
351
|
-
helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=
|
|
352
|
-
helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=
|
|
353
|
-
helm/benchmark/scenarios/vision_language/
|
|
354
|
-
helm/benchmark/scenarios/vision_language/
|
|
355
|
-
helm/benchmark/scenarios/vision_language/
|
|
356
|
-
helm/benchmark/scenarios/vision_language/
|
|
357
|
-
helm/benchmark/scenarios/vision_language/
|
|
358
|
-
helm/benchmark/scenarios/vision_language/
|
|
359
|
-
helm/benchmark/scenarios/vision_language/
|
|
360
|
-
helm/benchmark/scenarios/vision_language/
|
|
361
|
-
helm/benchmark/scenarios/vision_language/
|
|
362
|
-
helm/benchmark/scenarios/vision_language/
|
|
363
|
-
helm/benchmark/scenarios/vision_language/
|
|
364
|
-
helm/benchmark/scenarios/vision_language/
|
|
365
|
-
helm/benchmark/scenarios/vision_language/
|
|
366
|
-
helm/benchmark/scenarios/vision_language/
|
|
367
|
-
helm/benchmark/scenarios/vision_language/
|
|
368
|
-
helm/benchmark/scenarios/vision_language/
|
|
369
|
-
helm/benchmark/
|
|
370
|
-
helm/benchmark/static/benchmarking.js,sha256=lqEmoAikBwycVBf1h-et3ZmHKW_DcwxzlwmDez2A1EU,54531
|
|
371
|
-
helm/benchmark/static/config.js,sha256=kIfkgr6gaMdFOAdqB35EvuBohq0DWYSQZbe_pTK09VM,103
|
|
384
|
+
helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=D3nNu3uU87eMDiMZZafuRTntXjwbqPaSDygUgQm45F8,9943
|
|
385
|
+
helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=gWrBG5U8uoU92JPGNm5kuzo1GekoJo1rKQaNhv6MYGA,3996
|
|
386
|
+
helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py,sha256=OJtiGhSN_KYgEz0VGXjCjQik_Xihtgiali70Z00XOzk,2083
|
|
387
|
+
helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=YNwuIMJBo7wwftx-T5tCYmGo2oy_794fZ330lkDyqb0,5171
|
|
388
|
+
helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=DxGZ7EL22SzxpAkuiA5twuGVTm96wG_RBg3dU3Vh_c4,4241
|
|
389
|
+
helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=wRa_OuOdyf-qcy9hml-Kj6YtVP5MDzeTbGcqva6LqdA,3707
|
|
390
|
+
helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=zCnkiSya-PHc3ywAhmw03bFdsvLCxAUwGfE6OviEXDQ,4153
|
|
391
|
+
helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=cC8_Vyqw2f4K4hJY-eo9ptj6ANfWgiFAK7b6OOTIPLI,5239
|
|
392
|
+
helm/benchmark/scenarios/vision_language/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
393
|
+
helm/benchmark/scenarios/vision_language/image2struct/chart2csv_scenario.py,sha256=qcs3o9dPsXoeaP0bu9UVZ6P0GPEcRLoaqABxysLN6VY,1802
|
|
394
|
+
helm/benchmark/scenarios/vision_language/image2struct/image2struct_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
|
|
395
|
+
helm/benchmark/scenarios/vision_language/image2struct/latex_scenario.py,sha256=SnZuHATg5i764MAdgaGwjIGdjCZNrOqP83Y5jE_fkHs,1153
|
|
396
|
+
helm/benchmark/scenarios/vision_language/image2struct/musicsheet_scenario.py,sha256=c08cquz2IALY7PlpOoEfAjupKZmn5GDVZ1H8Gbj4r8s,831
|
|
397
|
+
helm/benchmark/scenarios/vision_language/image2struct/utils_latex.py,sha256=jW3_c63a6u39PJGJw6lM9pIa3dnF8CQgZlPNZdH0sfs,15001
|
|
398
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage_scenario.py,sha256=DJQIa8NaKV-nhkXEBuY97MJ8a1O3x-Yr6hACVa-67Ns,11117
|
|
399
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
400
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage/driver.py,sha256=WBFbb3N_eHIa7OFvHQS3Pmwbmkl6r9VyobxlIEKhty8,2823
|
|
401
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
|
|
402
|
+
helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
|
|
372
403
|
helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
|
|
373
|
-
helm/benchmark/static/
|
|
374
|
-
helm/benchmark/static/
|
|
375
|
-
helm/benchmark/static/
|
|
376
|
-
helm/benchmark/static/json-urls.js,sha256=AaULgfHw8OLfrQLJpBHfcC013uavQnlNNFS9vzb0qOg,1981
|
|
377
|
-
helm/benchmark/static/plot-captions.js,sha256=bTR8gYx-QqF_RJyKX-L-eQP7hSEtawfJSoADCvgjKag,3011
|
|
378
|
-
helm/benchmark/static/schema_air_bench.yaml,sha256=ePZAGL4X-yH4cAQvzS5uU44duCKwdDrMwDSvCC9y7-k,139384
|
|
404
|
+
helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
|
|
405
|
+
helm/benchmark/static/schema_bhasa.yaml,sha256=R3f48oqk9Va8rtSe9B93K_rCy_IfAhHZdTh4vNDdsOY,27444
|
|
406
|
+
helm/benchmark/static/schema_call_center.yaml,sha256=Mt7_rLG6IT701YrjiJdNb7HpoMVkFjabrawnBieUUhM,8049
|
|
379
407
|
helm/benchmark/static/schema_classic.yaml,sha256=sK3yVQCrk3Tn3Kmg9WITBmJZI7AKVjmIY0f3zgH_t0c,104611
|
|
380
|
-
helm/benchmark/static/
|
|
381
|
-
helm/benchmark/static/
|
|
408
|
+
helm/benchmark/static/schema_cleva.yaml,sha256=TDh-zcCzzTTs7bu0IWlY5dXYaTFhxly8sJIBGQdBvug,25401
|
|
409
|
+
helm/benchmark/static/schema_decodingtrust.yaml,sha256=2VPxzcyKYea7mx-qmswyVRjPfVatjVH4Rs3OU82mgII,15670
|
|
410
|
+
helm/benchmark/static/schema_ewok.yaml,sha256=MluPnZSy22wZLFB2pR7ycBRgUSvIUsqvq4qM0Vk2ur4,12113
|
|
411
|
+
helm/benchmark/static/schema_finance.yaml,sha256=OgsYMSFK__8ZZS96ktsgVRfM40-BhbOY15j9OlV-rNE,7010
|
|
412
|
+
helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0yPsYgu_MB7uu5pwHE,19894
|
|
382
413
|
helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
|
|
383
414
|
helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
|
|
384
415
|
helm/benchmark/static/schema_medical.yaml,sha256=hDk4834FKn-5cMr6pHcu1P60sh6cXJ2J0Z1ADIj2MSc,8455
|
|
385
416
|
helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
|
|
386
|
-
helm/benchmark/static/
|
|
387
|
-
helm/benchmark/static/
|
|
417
|
+
helm/benchmark/static/schema_safety.yaml,sha256=LEGt9EuwjHZX-oLVrBQushbL4YUQmIYpHCjlauK_tGQ,8099
|
|
418
|
+
helm/benchmark/static/schema_tables.yaml,sha256=PSk00UHgbMZA8xnAVE6ka2a-py_4rX7VDdodjYBqe-4,10400
|
|
419
|
+
helm/benchmark/static/schema_thai.yaml,sha256=yJUrevvgTJ46TpyXfNecW_B9urh7LPwSbBi_mT4ZngA,8348
|
|
388
420
|
helm/benchmark/static/schema_unitxt.yaml,sha256=9FQhoueYNNYQ2xMuJ2KHzpg_9-_ZhZ9efk6jtTQ3tlc,11855
|
|
389
|
-
helm/benchmark/static/schema_vhelm.yaml,sha256=
|
|
421
|
+
helm/benchmark/static/schema_vhelm.yaml,sha256=ryxslQJZun-HqM9ib4rp3_dBVufa01jgdo1bsHccYSk,29943
|
|
390
422
|
helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
|
|
391
|
-
helm/benchmark/static/utils.js,sha256=bgN0PT53Dregc-nLmEmAEmg2psufWpS8jTf74WoypHw,7681
|
|
392
|
-
helm/benchmark/static/images/crfm-logo.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
|
|
393
|
-
helm/benchmark/static/images/helm-logo-simple.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
|
|
394
|
-
helm/benchmark/static/images/helm-logo.png,sha256=GTqbrxJr0oQXbBRq-8v6afY5zB5x0M6PhEbKRIX9qIE,280667
|
|
395
|
-
helm/benchmark/static/images/language-model-helm.png,sha256=mG0-bkdziXeiF0wOGd67y2jnYmVKJYqhD2N5Q8VIF8Q,26563
|
|
396
|
-
helm/benchmark/static/images/scenarios-by-metrics.png,sha256=F7g9mvIYopm-n7sDGg-7I0XCyZvloKsi2wIq1i6da_Q,51331
|
|
397
|
-
helm/benchmark/static/images/taxonomy-scenarios.png,sha256=2MiuCLaxnuHvwsWWJHnZFc-rvoQIi_tNIjDatY7I-Dg,100766
|
|
398
|
-
helm/benchmark/static/images/organizations/ai21.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
|
|
399
|
-
helm/benchmark/static/images/organizations/anthropic.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
|
|
400
|
-
helm/benchmark/static/images/organizations/bigscience.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
|
|
401
|
-
helm/benchmark/static/images/organizations/cohere.png,sha256=7cr4LI8WK9yPryQboyWK_T5baSND-d-tVrlPNflLQMg,8757
|
|
402
|
-
helm/benchmark/static/images/organizations/eleutherai.png,sha256=uUURFF8YWY85mwGoKVEjArO5DUBCy4es5naCXsBzn6c,4526
|
|
403
|
-
helm/benchmark/static/images/organizations/google.png,sha256=BtmXrVQZHr3WH5c8c23ent2FO8aPWeNwO8czl22lDCo,4914
|
|
404
|
-
helm/benchmark/static/images/organizations/meta.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
|
|
405
|
-
helm/benchmark/static/images/organizations/microsoft.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
|
|
406
|
-
helm/benchmark/static/images/organizations/nvidia.png,sha256=hvp1wZMwYxkfrVMvJs73PX71JwY5L8ZvxIH_fL4n6Po,27945
|
|
407
|
-
helm/benchmark/static/images/organizations/openai.png,sha256=P4ZT5ISIlt6Dl0mOp7juSM4Y7dfyRNPqdc0PJuwNoqg,16877
|
|
408
|
-
helm/benchmark/static/images/organizations/together.png,sha256=pmWjW4r7GnlKqFhKLPTiBeILiOighL3XzcSCsxWtB7U,48053
|
|
409
|
-
helm/benchmark/static/images/organizations/tsinghua-keg.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
|
|
410
|
-
helm/benchmark/static/images/organizations/yandex.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
|
|
411
423
|
helm/benchmark/static_build/config.js,sha256=ER8utDIqVZi9uge7Qrk1gmlT88TOOkFF9xYp3j10m8U,165
|
|
412
|
-
helm/benchmark/static_build/index.html,sha256=
|
|
424
|
+
helm/benchmark/static_build/index.html,sha256=YHWao7kJaMx9osFxRgfuCDxu-FwaBOWDhUcaAEVe7-0,1149
|
|
413
425
|
helm/benchmark/static_build/assets/01-694cb9b7.png,sha256=aUy5t0DYCg4r52HDOmeNi1S2CHsnv3mE7ySokJg3Ouo,8903
|
|
426
|
+
helm/benchmark/static_build/assets/accenture-6f97eeda.png,sha256=b5fu2p7L_mnwg-p5jjPk1sFRwJEBRtGwXsVyQU_Runk,9537
|
|
414
427
|
helm/benchmark/static_build/assets/ai21-0eb91ec3.png,sha256=Drkew6Vlwi2_4_S8hjagK2x8smOwLKTNiXIT3rDiurs,10208
|
|
415
428
|
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
|
|
429
|
+
helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png,sha256=bfyazxJvVs5GTSSlnm6nOb2r_jzo3TJybqF04S5Dxhw,69372
|
|
416
430
|
helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png,sha256=fOEANHS8RymKaCzUWn9gQWebts2ghSmtW9Fdda_TjR8,7224
|
|
417
431
|
helm/benchmark/static_build/assets/anthropic-70d8bc39.png,sha256=cNi8OdIshIIb8PdodcX8mAj-khaUD0O6nhah-_6nYfs,8017
|
|
418
432
|
helm/benchmark/static_build/assets/bigscience-7f0400c0.png,sha256=fwQAwN1x2Fr_ztD_HZdcOkdFcyxuDjtS3B5-VuRNkuc,19036
|
|
419
433
|
helm/benchmark/static_build/assets/cohere-3550c6cb.png,sha256=NVDGy09xliCqZy2TKUAka-B90jVDB_VRCS9A2_sN7VU,4414
|
|
434
|
+
helm/benchmark/static_build/assets/cresta-9e22b983.png,sha256=niK5g8HYADkbhKM9gSVtYEdPegBS40zZXF4nNe9Fu4o,8131
|
|
420
435
|
helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
|
|
436
|
+
helm/benchmark/static_build/assets/cuhk-8c5631e9.png,sha256=jFYx6Xx-SGYANpsSnqrlaQytYuOBOsTHhpqPJZk3EwE,30385
|
|
421
437
|
helm/benchmark/static_build/assets/eleutherai-b9451114.png,sha256=uUURFF8YWY85mwGoKVEjArO5DUBCy4es5naCXsBzn6c,4526
|
|
422
438
|
helm/benchmark/static_build/assets/google-06d997ad.png,sha256=BtmXrVQZHr3WH5c8c23ent2FO8aPWeNwO8czl22lDCo,4914
|
|
423
439
|
helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
|
|
424
440
|
helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
|
|
425
441
|
helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
|
|
426
|
-
helm/benchmark/static_build/assets/index-
|
|
427
|
-
helm/benchmark/static_build/assets/index-
|
|
442
|
+
helm/benchmark/static_build/assets/index-05c76bb1.css,sha256=BcdrsQgUFadqYf5z-wdFNosV_c2MlxV8xktld2BFKBk,489017
|
|
443
|
+
helm/benchmark/static_build/assets/index-58f97dcd.js,sha256=XJY99lqQJAVIYis7oEhi6Hl4drYXcG2WDGUCAGX1YVg,91191
|
|
428
444
|
helm/benchmark/static_build/assets/meta-5580e9f1.png,sha256=VYDp8arkAe2eYRJhAOcIAsZY1qY0hqyOEQDgVMbX9M8,4646
|
|
429
445
|
helm/benchmark/static_build/assets/microsoft-f5ee5016.png,sha256=9e5QFl23yTbnAk8u7lZKaQOf4oPHbr_aiQda5n4MZqE,50850
|
|
430
446
|
helm/benchmark/static_build/assets/mistral-18e1be23.png,sha256=GOG-Ix7XlctGOUmvJfO2oVSBM7E5O562G88OnoxsjBw,14402
|
|
@@ -434,33 +450,29 @@ helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUI
|
|
|
434
450
|
helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
|
|
435
451
|
helm/benchmark/static_build/assets/react-d4a0b69b.js,sha256=rNTpl8Is3LkYXqJowRMc8vc4SXQwP94Ozy4DZZWwldU,275141
|
|
436
452
|
helm/benchmark/static_build/assets/recharts-6d337683.js,sha256=rDrVmtTCCSLY2hpcxSDxhlQ6CQmTTSQOESNeO3oVQgg,432466
|
|
453
|
+
helm/benchmark/static_build/assets/scb10x-204bd786.png,sha256=IEvXhlxgBA9NCH4RrGWJkMx0Yc7V9EK6o7vrAI5KZCE,4990
|
|
437
454
|
helm/benchmark/static_build/assets/tii-24de195c.png,sha256=JN4ZXAa0rbR2IlxPfd_mKtntFZcYpDcXocSiqrC2rNg,63389
|
|
438
455
|
helm/benchmark/static_build/assets/together-a665a35b.png,sha256=pmWjW4r7GnlKqFhKLPTiBeILiOighL3XzcSCsxWtB7U,48053
|
|
439
456
|
helm/benchmark/static_build/assets/tremor-54a99cc4.js,sha256=x_K5Bp7szI2zsvESrKqffUOHbm8ohjjvuoIeY_yD_CA,293015
|
|
440
457
|
helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png,sha256=l9SzlZCsLF18BY876wYJcVgiQbgvwte7uoILPDcVwHk,7776
|
|
441
458
|
helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png,sha256=zedhimhku2Q3QIvaRSYlUAQ0b5ia9pU4cFzKnABfr4c,118544
|
|
442
459
|
helm/benchmark/static_build/assets/vhelm-model-6d812526.png,sha256=bYElJoVkSaMJ_lFZj5qoSrIbygbNyBk35q89jtFRet8,168494
|
|
460
|
+
helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png,sha256=qGpsSjEu7HFlPAk_zXuUEdDqj0wkCfFHA1bCtu8Ugdw,8531
|
|
443
461
|
helm/benchmark/static_build/assets/yandex-38e09d70.png,sha256=OOCdcKubAP4x7h4VW7z5a-AHPWBiSDTjsIJea6ZiovA,27964
|
|
444
462
|
helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
445
|
-
helm/benchmark/window_services/ai21_window_service.py,sha256=1ZDLJv73bxoLj_MzEBu4TgH5xHw-hx0nI6KX6RU73kE,12593
|
|
446
|
-
helm/benchmark/window_services/cohere_window_service.py,sha256=5jm8o5ZYrbDUluA5LbMWLOOrOlIuHR7MhAJkOuzBagM,4750
|
|
447
463
|
helm/benchmark/window_services/default_window_service.py,sha256=F099qF-YeM7YPVtph0dRFPry5vP8_BiudHTy2CpuICQ,151
|
|
448
464
|
helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=EU3QevFOiQYBN2te54FsVRnGYZdgDxK6KqOWSQOa0q4,2125
|
|
449
465
|
helm/benchmark/window_services/ice_window_service.py,sha256=9NeBN_tmOvwrK1miUnX3wJA70BP5ifIIeHpNR2gVwls,1070
|
|
450
466
|
helm/benchmark/window_services/local_window_service.py,sha256=e9JHG72kFWlV6UKg_IhRCipOWQUrOD6ZjsT-_Mwewps,5232
|
|
451
467
|
helm/benchmark/window_services/no_decoding_window_service.py,sha256=s_i_cqIuU9p0GDRIBApaOHzjH7gHrBPTJ2X5NEcN33Y,1375
|
|
452
|
-
helm/benchmark/window_services/test_ai21_window_service.py,sha256=HkpNSaJAClZfaa-bQZ2BrRm1UB_u4sLAGSBlGQqRUD4,8221
|
|
453
468
|
helm/benchmark/window_services/test_anthropic_window_service.py,sha256=lnxLiW5BPaWN6m03L93qCFugsxnVBbLmYPCarlrO-So,4196
|
|
454
469
|
helm/benchmark/window_services/test_bloom_window_service.py,sha256=x7WBh0S223ABC9KvL2-y9G-cUxFUPm6oIkqvYO_4mt8,4288
|
|
455
|
-
helm/benchmark/window_services/test_cohere_window_service.py,sha256=rKXnw2E7MLAtkLgtrUvnZuQp99_agDO4qcpb3daik-E,3348
|
|
456
|
-
helm/benchmark/window_services/test_cohere_window_service_utils.py,sha256=sf25f9MeXzoqsbDzZ7d7le13hm8RkDe54nhLtKF2pqo,158150
|
|
457
470
|
helm/benchmark/window_services/test_flan_t5_window_service.py,sha256=IhQMWBq2d39O3uNKGwbaMWJkz8585Zc-J_yqvPJfwu4,695
|
|
458
471
|
helm/benchmark/window_services/test_gpt2_window_service.py,sha256=2UHKt4Wmh6XmSCdepjuMbZHFpb1oUcrKRSxcdOzBE1s,2671
|
|
459
472
|
helm/benchmark/window_services/test_gpt4_window_service.py,sha256=tV5WdpxYxewchEp1rnsIlEfdJFrHVFKYQ-_8NhGK2yo,1052
|
|
460
473
|
helm/benchmark/window_services/test_gptj_window_service.py,sha256=0lu4Os_3x3N-AbejG3LZ3-_ikxEHg1Lbmfq-Pzg_D9Y,2374
|
|
461
474
|
helm/benchmark/window_services/test_gptneox_window_service.py,sha256=8CaOW_ln9bxKA4--dVLfLdsASo6RrR7ouP6EcSruzdA,4210
|
|
462
|
-
helm/benchmark/window_services/
|
|
463
|
-
helm/benchmark/window_services/test_openai_window_service.py,sha256=W_QJKaMgzYU7qGFuSS6JeM_f50UX0SuHpkH-u2bEvI4,2312
|
|
475
|
+
helm/benchmark/window_services/test_openai_window_service.py,sha256=Mt-dDtjQmz25n7hwNVyy1T_rl0TMvcvJfuhWNe_AvSw,2314
|
|
464
476
|
helm/benchmark/window_services/test_opt_window_service.py,sha256=Gh1GzWnlgYIGwDNBw4EnHds3fXwMaSjzkfFXeLn47os,4215
|
|
465
477
|
helm/benchmark/window_services/test_palmyra_window_service.py,sha256=yy7D2C0ZzExCbptYNsEI9zuX2AEGsEUTj0a_vbqub4o,4212
|
|
466
478
|
helm/benchmark/window_services/test_t0pp_window_service.py,sha256=pvp55FyqjunkDpHVAhPup3h-iNkepQpxyr4nC87-5iY,3998
|
|
@@ -479,10 +491,10 @@ helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,s
|
|
|
479
491
|
helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
|
|
480
492
|
helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
|
|
481
493
|
helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
482
|
-
helm/clients/ai21_client.py,sha256=
|
|
494
|
+
helm/clients/ai21_client.py,sha256=PYyqpbnMK1l18Rv_qhE5KdHHqZHgHePaJtJOowTyG7I,8128
|
|
483
495
|
helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
|
|
484
496
|
helm/clients/aleph_alpha_client.py,sha256=koPqXF6uRD905atoiCaPg5yxr6B25J0g2OTWk8geebQ,4969
|
|
485
|
-
helm/clients/anthropic_client.py,sha256=
|
|
497
|
+
helm/clients/anthropic_client.py,sha256=s3eCwHh8mbhxLi8up1WtQWKkUsHJa-LO44prNd7XYFc,34059
|
|
486
498
|
helm/clients/auto_client.py,sha256=uK9EWQFWBt4DoV1oytm0dIeA3YpcfGi_H0rCRZSVE8c,11438
|
|
487
499
|
helm/clients/bedrock_client.py,sha256=BsH9UopsP6ZHf-K0Yzg1PYSMLDwY0yIUmPHDhJVMUi0,5293
|
|
488
500
|
helm/clients/bedrock_utils.py,sha256=okZ6Z8pviGOUNlrdF2QquAqFs8-QYgcqci95eij8giM,2574
|
|
@@ -494,20 +506,21 @@ helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,32
|
|
|
494
506
|
helm/clients/google_client.py,sha256=EOpPzK5_9yzWkMjK-4ILiixDF3aeOa8AbR2SPnEO-nw,2900
|
|
495
507
|
helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
|
|
496
508
|
helm/clients/http_model_client.py,sha256=DBgkVDZPmg99DCcO_1Xdf6nFQo2kyxLkgoQpwC-wkHI,2806
|
|
497
|
-
helm/clients/huggingface_client.py,sha256=
|
|
509
|
+
helm/clients/huggingface_client.py,sha256=k-8J4nnDbve8UtGsa0RytWhS9IpAy8hoJAUw4nRZTMI,15734
|
|
498
510
|
helm/clients/lit_gpt_client.py,sha256=Sjec16bNODosEhDoBkRc4t-LNS-nCUY_jVivWj5zvfU,6205
|
|
499
511
|
helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
|
|
500
512
|
helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
|
|
501
513
|
helm/clients/mistral_client.py,sha256=thOLMcEfrzWR00JUabIZ_PnW2o9YZsdSmNf9z3jbYKo,5982
|
|
502
514
|
helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
|
|
515
|
+
helm/clients/nvidia_nim_client.py,sha256=f3ZWoTnJmBIFeWsHeUDaTCbDZLK_kdlUWNO1hWumUOo,987
|
|
503
516
|
helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
|
|
504
|
-
helm/clients/openai_client.py,sha256=
|
|
505
|
-
helm/clients/palmyra_client.py,sha256=
|
|
506
|
-
helm/clients/perspective_api_client.py,sha256=
|
|
517
|
+
helm/clients/openai_client.py,sha256=faWpoZjKxQu3EoeYwMz0deesFlH9VTVIjJ2W74c3gxY,14117
|
|
518
|
+
helm/clients/palmyra_client.py,sha256=XBfrTE-mxiYhLF2EXqd87DckfuZ4mwVLoI_Qif_p5KA,7223
|
|
519
|
+
helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
|
|
507
520
|
helm/clients/reka_client.py,sha256=K8b9p7U6LLAy4PRjgYrUS06gF4G2xjhjRoMEO4XDe0o,8329
|
|
508
521
|
helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
|
|
509
522
|
helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
|
|
510
|
-
helm/clients/test_client.py,sha256=
|
|
523
|
+
helm/clients/test_client.py,sha256=6cLpQc2IMR5o7iBxZYPvoRtHJa5i0E7JHh1VKaCtfBw,3842
|
|
511
524
|
helm/clients/test_huggingface_client.py,sha256=x2NjMuIrinfUy0wQ1S6F5cYZVr09YfvN6LfhWmyGNAM,3388
|
|
512
525
|
helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
|
|
513
526
|
helm/clients/test_together_client.py,sha256=yYNrhU3kQjmHwhILuoP5QwUgbmkm2gg2NHiNycHjoeE,6145
|
|
@@ -576,9 +589,9 @@ helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
576
589
|
helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=hTywh5nM95BmPoDyKOSDWg9G3-QwLO3KZEJZVkmFroo,6478
|
|
577
590
|
helm/clients/vision_language/huggingface_vlm_client.py,sha256=H7AE8mm506PkEcUO8VaLVtptHTwVX58nZx1A_BWdKzA,4968
|
|
578
591
|
helm/clients/vision_language/idefics_client.py,sha256=hi1VCDBegHfBssmW0C62H3OX3U2ISVRhaSkd24gb1K4,7692
|
|
579
|
-
helm/clients/vision_language/open_flamingo_client.py,sha256=
|
|
592
|
+
helm/clients/vision_language/open_flamingo_client.py,sha256=QH6el-wkEl4PMZM9b3_H-o2PRaMvumGbN29ee9dmkMU,6519
|
|
580
593
|
helm/clients/vision_language/paligemma_client.py,sha256=IU_T8r1RgpGkEAqabLKBbmoUOWV6c1a9_FXgiTy8exE,6835
|
|
581
|
-
helm/clients/vision_language/palmyra_vision_client.py,sha256=
|
|
594
|
+
helm/clients/vision_language/palmyra_vision_client.py,sha256=4elEdmwllMr2qzTzBdlRC8L5Ut3vOXFtanGGYrx4lv8,4074
|
|
582
595
|
helm/clients/vision_language/qwen_vlm_client.py,sha256=6rCH4gJMDyQHyjAE_GDIrLsInH_bvd6to-4RMWbRLeM,7407
|
|
583
596
|
helm/clients/vision_language/open_flamingo/__init__.py,sha256=i1tGJj6ckeE6eS1EWV5tbQKYLmPCrdSI45mPchfv_Ic,88
|
|
584
597
|
helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -601,17 +614,17 @@ helm/common/general.py,sha256=nMfHNPXyAAorAMmgDClD8r8XXeJcvfF0QXTP-FgH5PQ,11690
|
|
|
601
614
|
helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
|
|
602
615
|
helm/common/hierarchical_logger.py,sha256=EnKLnfbQftca08EJfjGEQb4tcnCKbx-JtwLnoCnhMQs,2908
|
|
603
616
|
helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
|
|
604
|
-
helm/common/images_utils.py,sha256=
|
|
617
|
+
helm/common/images_utils.py,sha256=icE0tH9P3FT_qggfbi8vVwkmIjOAN5l3HcGDF9gmNnY,3345
|
|
605
618
|
helm/common/key_value_store.py,sha256=iHi1WQuWttLNJnuM48QNOAXHoneNbmbBmtXYPq-dyys,3147
|
|
606
619
|
helm/common/media_object.py,sha256=3VZqfb0py5dDKwWtnLp2kdl8svaike-Cn7Mjk-b0cvM,5130
|
|
607
620
|
helm/common/moderations_api_request.py,sha256=3xTsErSsCr2PHD2jpdV1JglHaYHwP2Yqu25_JFtfa68,2234
|
|
608
|
-
helm/common/mongo_key_value_store.py,sha256=
|
|
621
|
+
helm/common/mongo_key_value_store.py,sha256=Qky55n8jkbJb8oIw6UCLnCbJoUR3H3yBZV7J8wVu1Ns,3878
|
|
609
622
|
helm/common/multimodal_request_utils.py,sha256=GNZQQCcwsARyFCO-uoeeglyK2PEfC4MjClAKDeKqokk,1404
|
|
610
623
|
helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
|
|
611
624
|
helm/common/object_spec.py,sha256=_usgTDQULBF6_jy7C6m-9ZNVvNxbGoTE_CdGcSvBASU,4327
|
|
612
625
|
helm/common/optional_dependencies.py,sha256=Qam3QCHff8tuXbS-fCw-MVe-pK18gSvHw-uQoXXxT7M,616
|
|
613
626
|
helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
|
|
614
|
-
helm/common/request.py,sha256=
|
|
627
|
+
helm/common/request.py,sha256=Z_YUd77WQ15yeSN8YYdT48dI4ehUc869KuaDisAiyIA,8806
|
|
615
628
|
helm/common/test_cache.py,sha256=XqboYHQAkFWIHPsuIjuageRSLeN7QoATKF7wwxggPqE,7054
|
|
616
629
|
helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
|
|
617
630
|
helm/common/test_general.py,sha256=c8Lh0mK8I-SfcMprq909B6zWRBxSBngq2nNL1L6-cYA,1788
|
|
@@ -622,13 +635,13 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
|
|
|
622
635
|
helm/common/file_caches/local_file_cache.py,sha256=wBOAbbkGLiClaX4YdunokRfSQCKNkTYmMVx2KTLy4Lc,1921
|
|
623
636
|
helm/common/file_caches/test_local_file_cache.py,sha256=bOCWR9MglwQXV98xk8auyjgFxaOr85zRdxWwxMBQW9s,663
|
|
624
637
|
helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
625
|
-
helm/config/model_deployments.yaml,sha256=
|
|
626
|
-
helm/config/model_metadata.yaml,sha256=
|
|
627
|
-
helm/config/tokenizer_configs.yaml,sha256=
|
|
638
|
+
helm/config/model_deployments.yaml,sha256=_Yeji7Zz8XfyYGJzrTEFzIDL1hpVPcv_mPDvANKSGQ8,89215
|
|
639
|
+
helm/config/model_metadata.yaml,sha256=E2Rg5_4kR3RGtjz9XaSKg_B7nfz9KgtqGXWgXw7bLWI,158654
|
|
640
|
+
helm/config/tokenizer_configs.yaml,sha256=RD7lrDgoEW-foqJI0QxLo4XPHS7G8HyuaB3r4rwIK6Q,18761
|
|
628
641
|
helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
629
642
|
helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
|
|
630
643
|
helm/proxy/cli.py,sha256=l8F7UYqrIOoBD9ZCIxJFA4fhxlzhae0-2Nn8A7FMkzk,8244
|
|
631
|
-
helm/proxy/example_queries.py,sha256=
|
|
644
|
+
helm/proxy/example_queries.py,sha256=rVGmQ2ej4OS7m5Y3uI5dp9Mfdw6bv53c0o2QknsmYes,4379
|
|
632
645
|
helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
|
|
633
646
|
helm/proxy/retry.py,sha256=iLZmKATEJQa9jsSpOIx6YDRhmrA8G1Qm21cUxCuo2Ug,3490
|
|
634
647
|
helm/proxy/server.py,sha256=V05YdMy0lZqYfYkxLDqksGYe-8CIFa6Jg8aSb8YHM7I,10753
|
|
@@ -645,30 +658,29 @@ helm/proxy/critique/scale_critique_client.py,sha256=B4povtceyfal95eE3N7em9cC_B5V
|
|
|
645
658
|
helm/proxy/critique/surge_ai_critique_client.py,sha256=HnzgAoF4Du9Me0GS_lbNaozZslS4a2OZx735gh-coo0,8357
|
|
646
659
|
helm/proxy/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
647
660
|
helm/proxy/services/remote_service.py,sha256=emYN0qWOJLQ7q1n06V4TwlvXaqylQcUxmqDcGZXqPJ8,9097
|
|
648
|
-
helm/proxy/services/server_service.py,sha256=
|
|
661
|
+
helm/proxy/services/server_service.py,sha256=SPaiP4D4zYwaNKaULugNtDCYxz1HqgoUPcI7BU-eS64,11469
|
|
649
662
|
helm/proxy/services/service.py,sha256=Be-Z5F6AN4vMzsJr3BS6tJ9NHHy_dc_yn2Ex9cm0ChU,6193
|
|
650
663
|
helm/proxy/services/test_remote_service.py,sha256=NFnLjg3QNHoDKdK0DlcrtylwlKXx1vdzheNZRrLEv7c,6605
|
|
651
664
|
helm/proxy/services/test_service.py,sha256=FUZoI8pGiUg5adgB1wTJ869QOgFYjPtM6yf6FGMdE64,8968
|
|
652
665
|
helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
653
666
|
helm/proxy/token_counters/auto_token_counter.py,sha256=34PWvF96DurTrUtUxW4Td5VNV1_BhAebCkXQLl3xp4M,2046
|
|
654
|
-
helm/proxy/token_counters/test_auto_token_counter.py,sha256=
|
|
667
|
+
helm/proxy/token_counters/test_auto_token_counter.py,sha256=LO3H_NbVeoeaMmEuFNCmhoEWKjWVvxeW5U4yTKfE-84,8590
|
|
655
668
|
helm/proxy/token_counters/token_counter.py,sha256=TCij1Cp08RoFTLLLdjNPoaeDGHpA1A2hQsrRV775Kf4,425
|
|
656
669
|
helm/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
657
|
-
helm/tokenizers/ai21_tokenizer.py,sha256=
|
|
670
|
+
helm/tokenizers/ai21_tokenizer.py,sha256=CE-u39ZY5Y4XQHONpiPHKK7uvEmySYLBQi2n70OV004,2059
|
|
658
671
|
helm/tokenizers/aleph_alpha_tokenizer.py,sha256=UlWC_SjObBvexpZ3OfKZT2yjhbSsHlKjQe_oWuRrXno,3818
|
|
659
672
|
helm/tokenizers/anthropic_tokenizer.py,sha256=d-HO9OEFkhYzFZu0VkOsHjxbqqSUseCNX0KQqgb3s2Q,2114
|
|
660
673
|
helm/tokenizers/auto_tokenizer.py,sha256=Of-T-CFOhLAjjU45T1hnrEPG_k_hzPufuDE7FRAcSN8,4251
|
|
661
674
|
helm/tokenizers/caching_tokenizer.py,sha256=kSegrCFotRevSDgJsn0g52dWiSUCNa7_EZpRNrELeUE,8163
|
|
662
|
-
helm/tokenizers/cohere_tokenizer.py,sha256=
|
|
675
|
+
helm/tokenizers/cohere_tokenizer.py,sha256=6WwHIt7SsICmYR2QQpwDJ7pfNF8VWrFHFxF5Kynq6aY,2116
|
|
663
676
|
helm/tokenizers/http_model_tokenizer.py,sha256=wBTtDA2UdEYspffa1wqgkT3y3YHoyLXXoucnJ5PGjhs,3109
|
|
664
|
-
helm/tokenizers/huggingface_tokenizer.py,sha256=
|
|
665
|
-
helm/tokenizers/ice_tokenizer.py,sha256=4ZTIRpmt2cqwcxnmrDpCRhiJ0BI3ELE-GHoBuHWgrDA,1200
|
|
677
|
+
helm/tokenizers/huggingface_tokenizer.py,sha256=vmzcbgzMMlwx1x2n0syyp6KuN47nskgoP9yi1BNEGMQ,8696
|
|
666
678
|
helm/tokenizers/lit_gpt_tokenizer.py,sha256=LMrpaje64UmnDKoYjPG_RQeXVA4xQUwW5t48IJIeLaQ,1660
|
|
667
679
|
helm/tokenizers/simple_tokenizer.py,sha256=6_NROqVbygs-HRA7bYAZluN4YB5gUhVaRsYQeRTjA1E,1147
|
|
680
|
+
helm/tokenizers/test_ai21_tokenizer.py,sha256=V8orjdKxmEV44VYoZ9Sq5E7CIq2caNnr6vjdk0T_w1A,1646
|
|
668
681
|
helm/tokenizers/test_anthropic_tokenizer.py,sha256=_wzXp9FVR2Ml0s2A79TTXbSPHyTRp28i9tiEyQ9S6Ko,3792
|
|
669
682
|
helm/tokenizers/test_cohere_tokenizer.py,sha256=15z2GJtZ-VlrliC2_Fk5DIZhQYFkJS7J73fjxYMf8YM,1431
|
|
670
683
|
helm/tokenizers/test_huggingface_tokenizer.py,sha256=8tFyZQb4DLg6MdKg13a66bLbp0yf4Ar1fGWM_sYeSjg,6309
|
|
671
|
-
helm/tokenizers/test_ice_tokenizer.py,sha256=-xi_f8TBSkAYr5CcA56HDq7rZ9HAGd99J7twNfkLzFU,2619
|
|
672
684
|
helm/tokenizers/test_simple_tokenizer.py,sha256=vUNdcnJqZV99-E8H1rwUH85AQPJ2HTnDr5DrZ_-zRL4,1219
|
|
673
685
|
helm/tokenizers/test_yalm_tokenizer.py,sha256=qWpKnUuAlePd6t-UJB_mAiBwtAacnC8caKXLJ_GdTkk,2477
|
|
674
686
|
helm/tokenizers/tiktoken_tokenizer.py,sha256=FU2g_FF0pVoyspYhHcz3SyCBGNbsTby-nWVrj0Cq4_c,1265
|
|
@@ -679,9 +691,9 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
|
|
|
679
691
|
helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=W9p5QNn1GSm-y85yVEQe_82zn5CVK_vR6jvhk7JTs_k,869
|
|
680
692
|
helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
|
|
681
693
|
helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
|
|
682
|
-
crfm_helm-0.5.
|
|
683
|
-
crfm_helm-0.5.
|
|
684
|
-
crfm_helm-0.5.
|
|
685
|
-
crfm_helm-0.5.
|
|
686
|
-
crfm_helm-0.5.
|
|
687
|
-
crfm_helm-0.5.
|
|
694
|
+
crfm_helm-0.5.3.dist-info/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
|
|
695
|
+
crfm_helm-0.5.3.dist-info/METADATA,sha256=JNa1JuzCQTPbczD-UfRLsa_f8OW7JT1zHQML-ilNh_c,19060
|
|
696
|
+
crfm_helm-0.5.3.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
|
697
|
+
crfm_helm-0.5.3.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
|
|
698
|
+
crfm_helm-0.5.3.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
|
|
699
|
+
crfm_helm-0.5.3.dist-info/RECORD,,
|