PyPI - crfm-helm - Versions diffs - 0.5.0__tar.gz → 0.5.2__tar.gz - Mend

crfm-helm 0.5.0tar.gz → 0.5.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (701) hide show

{crfm_helm-0.5.0/src/crfm_helm.egg-info → crfm_helm-0.5.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: crfm-helm
-Version: 0.5.0
+Version: 0.5.2
 Summary: Benchmark for language models
 Home-page: https://github.com/stanford-crfm/helm
 Author: Stanford CRFM
@@ -25,7 +25,7 @@ Requires-Dist: tqdm~=4.64
 Requires-Dist: zstandard~=0.18.0
 Requires-Dist: sqlitedict~=1.7
 Requires-Dist: bottle~=0.12.23
-Requires-Dist: datasets~=2.15
+Requires-Dist: datasets~=2.17
 Requires-Dist: pyarrow>=11.0.0
 Requires-Dist: pyarrow-hotfix~=0.6
 Requires-Dist: nltk~=3.7
@@ -34,7 +34,7 @@ Requires-Dist: rouge-score~=0.1.2
 Requires-Dist: scipy~=1.10
 Requires-Dist: uncertainty-calibration~=0.1.4
 Requires-Dist: scikit-learn~=1.1
-Requires-Dist: transformers~=4.37
+Requires-Dist: transformers~=4.40
 Requires-Dist: torch<3.0.0,>=1.13.1
 Requires-Dist: torchvision<3.0.0,>=0.14.1
 Requires-Dist: google-api-python-client~=2.64
@@ -77,6 +77,8 @@ Requires-Dist: evaluate~=0.4.1; extra == "unitxt"
 Provides-Extra: aleph-alpha
 Requires-Dist: aleph-alpha-client~=2.14.0; extra == "aleph-alpha"
 Requires-Dist: tokenizers>=0.13.3; extra == "aleph-alpha"
+Provides-Extra: openvino
+Requires-Dist: optimum[openvino]~=1.19; extra == "openvino"
 Provides-Extra: allenai
 Requires-Dist: ai2-olmo~=0.2; extra == "allenai"
 Provides-Extra: amazon
@@ -86,14 +88,18 @@ Requires-Dist: botocore~=1.31.57; extra == "amazon"
 Provides-Extra: anthropic
 Requires-Dist: anthropic~=0.17; extra == "anthropic"
 Requires-Dist: websocket-client~=1.3.2; extra == "anthropic"
+Provides-Extra: cohere
+Requires-Dist: cohere~=5.3; extra == "cohere"
 Provides-Extra: mistral
 Requires-Dist: mistralai~=0.0.11; extra == "mistral"
 Provides-Extra: openai
 Requires-Dist: openai~=1.0; extra == "openai"
-Requires-Dist: tiktoken~=0.3.3; extra == "openai"
+Requires-Dist: tiktoken~=0.7; extra == "openai"
 Requires-Dist: pydantic~=2.0; extra == "openai"
 Provides-Extra: google
-Requires-Dist: google-cloud-aiplatform~=1.44; extra == "google"
+Requires-Dist: google-cloud-aiplatform~=1.48; extra == "google"
+Provides-Extra: together
+Requires-Dist: together~=1.1; extra == "together"
 Provides-Extra: tsinghua
 Requires-Dist: icetk~=0.0.4; extra == "tsinghua"
 Provides-Extra: yandex
@@ -103,11 +109,17 @@ Requires-Dist: crfm-helm[aleph-alpha]; extra == "models"
 Requires-Dist: crfm-helm[allenai]; extra == "models"
 Requires-Dist: crfm-helm[amazon]; extra == "models"
 Requires-Dist: crfm-helm[anthropic]; extra == "models"
+Requires-Dist: crfm-helm[cohere]; extra == "models"
 Requires-Dist: crfm-helm[google]; extra == "models"
 Requires-Dist: crfm-helm[mistral]; extra == "models"
 Requires-Dist: crfm-helm[openai]; extra == "models"
+Requires-Dist: crfm-helm[reka]; extra == "models"
+Requires-Dist: crfm-helm[together]; extra == "models"
 Requires-Dist: crfm-helm[tsinghua]; extra == "models"
 Requires-Dist: crfm-helm[yandex]; extra == "models"
+Requires-Dist: crfm-helm[openvino]; extra == "models"
+Provides-Extra: reka
+Requires-Dist: reka-api~=2.0.0; extra == "reka"
 Provides-Extra: vlm
 Requires-Dist: crfm-helm[openai]; extra == "vlm"
 Requires-Dist: einops~=0.7.0; extra == "vlm"
@@ -117,8 +129,10 @@ Requires-Dist: torch~=2.1.2; extra == "vlm"
 Requires-Dist: transformers_stream_generator~=0.0.4; extra == "vlm"
 Requires-Dist: scipy~=1.10; extra == "vlm"
 Requires-Dist: torchvision<3.0.0,>=0.14.1; extra == "vlm"
+Requires-Dist: crfm-helm[reka]; extra == "vlm"
 Requires-Dist: crfm-helm[images]; extra == "vlm"
 Requires-Dist: crfm-helm[image2structure]; extra == "vlm"
+Requires-Dist: pycocoevalcap~=1.2; extra == "vlm"
 Provides-Extra: image2structure
 Requires-Dist: crfm-helm[images]; extra == "image2structure"
 Requires-Dist: latex~=0.7.0; extra == "image2structure"

{crfm_helm-0.5.0 → crfm_helm-0.5.2}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = crfm-helm
-version = 0.5.0
+version = 0.5.2
 author = Stanford CRFM
 author_email = contact-crfm@stanford.edu
 description = Benchmark for language models
@@ -35,7 +35,7 @@ install_requires =
 	sqlitedict~=1.7
 	bottle~=0.12.23
-	datasets~=2.15
+	datasets~=2.17
 	pyarrow>=11.0.0  # Pinned transitive dependency for datasets; workaround for #1026
 	pyarrow-hotfix~=0.6  # Hotfix for CVE-2023-47248
@@ -46,7 +46,7 @@ install_requires =
 	uncertainty-calibration~=0.1.4
 	scikit-learn~=1.1
-	transformers~=4.37  # For anthropic_client, vision_language.huggingface_vlm_client, huggingface_client, huggingface_tokenizer, test_openai_token_cost_estimator, model_summac (via summarization_metrics)
+	transformers~=4.40  # For anthropic_client, vision_language.huggingface_vlm_client, huggingface_client, huggingface_tokenizer, test_openai_token_cost_estimator, model_summac (via summarization_metrics)
 	torch>=1.13.1,<3.0.0  # For huggingface_client, yalm_tokenizer, model_summac (via summarization_metrics)
 	torchvision>=0.14.1,<3.0.0  # For huggingface_client, yalm_tokenizer, model_summac (via summarization_metrics)
@@ -92,6 +92,8 @@ unitxt =
 aleph-alpha =
 	aleph-alpha-client~=2.14.0
 	tokenizers>=0.13.3
+openvino =
+	optimum[openvino]~=1.19
 allenai =
 	ai2-olmo~=0.2
 amazon =
@@ -101,14 +103,18 @@ amazon =
 anthropic =
 	anthropic~=0.17
 	websocket-client~=1.3.2  # For legacy stanford-online-all-v4-s3
+cohere =
+	cohere~=5.3
 mistral =
 	mistralai~=0.0.11
 openai =
 	openai~=1.0
-	tiktoken~=0.3.3
+	tiktoken~=0.7
 	pydantic~=2.0  # For model_dump(mode="json") - openai only requires pydantic>=1.9.0
 google =
-	google-cloud-aiplatform~=1.44
+	google-cloud-aiplatform~=1.48
+together =
+	together~=1.1
 tsinghua =
 	icetk~=0.0.4
 yandex =
@@ -118,11 +124,17 @@ models =
 	crfm-helm[allenai]
 	crfm-helm[amazon]
 	crfm-helm[anthropic]
+	crfm-helm[cohere]
 	crfm-helm[google]
 	crfm-helm[mistral]
 	crfm-helm[openai]
+	crfm-helm[reka]
+	crfm-helm[together]
 	crfm-helm[tsinghua]
 	crfm-helm[yandex]
+	crfm-helm[openvino]
+reka =
+	reka-api~=2.0.0
 vlm =
 	crfm-helm[openai]
@@ -136,8 +148,12 @@ vlm =
 	scipy~=1.10
 	torchvision>=0.14.1,<3.0.0
+	crfm-helm[reka]
 	crfm-helm[images]
 	crfm-helm[image2structure]
+	pycocoevalcap~=1.2
 image2structure =
 	crfm-helm[images]

{crfm_helm-0.5.0 → crfm_helm-0.5.2/src/crfm_helm.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: crfm-helm
-Version: 0.5.0
+Version: 0.5.2
 Summary: Benchmark for language models
 Home-page: https://github.com/stanford-crfm/helm
 Author: Stanford CRFM
@@ -25,7 +25,7 @@ Requires-Dist: tqdm~=4.64
 Requires-Dist: zstandard~=0.18.0
 Requires-Dist: sqlitedict~=1.7
 Requires-Dist: bottle~=0.12.23
-Requires-Dist: datasets~=2.15
+Requires-Dist: datasets~=2.17
 Requires-Dist: pyarrow>=11.0.0
 Requires-Dist: pyarrow-hotfix~=0.6
 Requires-Dist: nltk~=3.7
@@ -34,7 +34,7 @@ Requires-Dist: rouge-score~=0.1.2
 Requires-Dist: scipy~=1.10
 Requires-Dist: uncertainty-calibration~=0.1.4
 Requires-Dist: scikit-learn~=1.1
-Requires-Dist: transformers~=4.37
+Requires-Dist: transformers~=4.40
 Requires-Dist: torch<3.0.0,>=1.13.1
 Requires-Dist: torchvision<3.0.0,>=0.14.1
 Requires-Dist: google-api-python-client~=2.64
@@ -77,6 +77,8 @@ Requires-Dist: evaluate~=0.4.1; extra == "unitxt"
 Provides-Extra: aleph-alpha
 Requires-Dist: aleph-alpha-client~=2.14.0; extra == "aleph-alpha"
 Requires-Dist: tokenizers>=0.13.3; extra == "aleph-alpha"
+Provides-Extra: openvino
+Requires-Dist: optimum[openvino]~=1.19; extra == "openvino"
 Provides-Extra: allenai
 Requires-Dist: ai2-olmo~=0.2; extra == "allenai"
 Provides-Extra: amazon
@@ -86,14 +88,18 @@ Requires-Dist: botocore~=1.31.57; extra == "amazon"
 Provides-Extra: anthropic
 Requires-Dist: anthropic~=0.17; extra == "anthropic"
 Requires-Dist: websocket-client~=1.3.2; extra == "anthropic"
+Provides-Extra: cohere
+Requires-Dist: cohere~=5.3; extra == "cohere"
 Provides-Extra: mistral
 Requires-Dist: mistralai~=0.0.11; extra == "mistral"
 Provides-Extra: openai
 Requires-Dist: openai~=1.0; extra == "openai"
-Requires-Dist: tiktoken~=0.3.3; extra == "openai"
+Requires-Dist: tiktoken~=0.7; extra == "openai"
 Requires-Dist: pydantic~=2.0; extra == "openai"
 Provides-Extra: google
-Requires-Dist: google-cloud-aiplatform~=1.44; extra == "google"
+Requires-Dist: google-cloud-aiplatform~=1.48; extra == "google"
+Provides-Extra: together
+Requires-Dist: together~=1.1; extra == "together"
 Provides-Extra: tsinghua
 Requires-Dist: icetk~=0.0.4; extra == "tsinghua"
 Provides-Extra: yandex
@@ -103,11 +109,17 @@ Requires-Dist: crfm-helm[aleph-alpha]; extra == "models"
 Requires-Dist: crfm-helm[allenai]; extra == "models"
 Requires-Dist: crfm-helm[amazon]; extra == "models"
 Requires-Dist: crfm-helm[anthropic]; extra == "models"
+Requires-Dist: crfm-helm[cohere]; extra == "models"
 Requires-Dist: crfm-helm[google]; extra == "models"
 Requires-Dist: crfm-helm[mistral]; extra == "models"
 Requires-Dist: crfm-helm[openai]; extra == "models"
+Requires-Dist: crfm-helm[reka]; extra == "models"
+Requires-Dist: crfm-helm[together]; extra == "models"
 Requires-Dist: crfm-helm[tsinghua]; extra == "models"
 Requires-Dist: crfm-helm[yandex]; extra == "models"
+Requires-Dist: crfm-helm[openvino]; extra == "models"
+Provides-Extra: reka
+Requires-Dist: reka-api~=2.0.0; extra == "reka"
 Provides-Extra: vlm
 Requires-Dist: crfm-helm[openai]; extra == "vlm"
 Requires-Dist: einops~=0.7.0; extra == "vlm"
@@ -117,8 +129,10 @@ Requires-Dist: torch~=2.1.2; extra == "vlm"
 Requires-Dist: transformers_stream_generator~=0.0.4; extra == "vlm"
 Requires-Dist: scipy~=1.10; extra == "vlm"
 Requires-Dist: torchvision<3.0.0,>=0.14.1; extra == "vlm"
+Requires-Dist: crfm-helm[reka]; extra == "vlm"
 Requires-Dist: crfm-helm[images]; extra == "vlm"
 Requires-Dist: crfm-helm[image2structure]; extra == "vlm"
+Requires-Dist: pycocoevalcap~=1.2; extra == "vlm"
 Provides-Extra: image2structure
 Requires-Dist: crfm-helm[images]; extra == "image2structure"
 Requires-Dist: latex~=0.7.0; extra == "image2structure"

{crfm_helm-0.5.0 → crfm_helm-0.5.2}/src/crfm_helm.egg-info/SOURCES.txt RENAMED Viewed

@@ -32,7 +32,6 @@ src/helm/benchmark/server.py
 src/helm/benchmark/slurm_jobs.py
 src/helm/benchmark/slurm_runner.py
 src/helm/benchmark/test_data_preprocessor.py
-src/helm/benchmark/test_model_deployment_definition.py
 src/helm/benchmark/test_run_expander.py
 src/helm/benchmark/tokenizer_config_registry.py
 src/helm/benchmark/adaptation/__init__.py
@@ -63,8 +62,11 @@ src/helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimod
 src/helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py
 src/helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py
 src/helm/benchmark/annotation/__init__.py
+src/helm/benchmark/annotation/air_bench_annotator.py
 src/helm/benchmark/annotation/annotator.py
 src/helm/benchmark/annotation/annotator_factory.py
+src/helm/benchmark/annotation/live_qa_annotator.py
+src/helm/benchmark/annotation/medication_qa_annotator.py
 src/helm/benchmark/annotation/test_annotator_factory.py
 src/helm/benchmark/annotation/test_dummy_annotator.py
 src/helm/benchmark/annotation/image2structure/__init__.py
@@ -102,6 +104,7 @@ src/helm/benchmark/efficiency_data/inference_denoised_runtimes.json
 src/helm/benchmark/efficiency_data/inference_idealized_runtimes.json
 src/helm/benchmark/efficiency_data/training_efficiency.json
 src/helm/benchmark/metrics/__init__.py
+src/helm/benchmark/metrics/air_bench_metrics.py
 src/helm/benchmark/metrics/basic_metrics.py
 src/helm/benchmark/metrics/bbq_metrics.py
 src/helm/benchmark/metrics/bias_metrics.py
@@ -123,16 +126,23 @@ src/helm/benchmark/metrics/dry_run_metrics.py
 src/helm/benchmark/metrics/efficiency_metrics.py
 src/helm/benchmark/metrics/evaluate_instances_metric.py
 src/helm/benchmark/metrics/evaluate_reference_metrics.py
+src/helm/benchmark/metrics/fin_qa_metrics.py
+src/helm/benchmark/metrics/fin_qa_metrics_helper.py
+src/helm/benchmark/metrics/gpt4v_originality_critique_metrics.py
 src/helm/benchmark/metrics/instruction_following_critique_metrics.py
 src/helm/benchmark/metrics/language_modeling_metrics.py
+src/helm/benchmark/metrics/live_qa_metrics.py
 src/helm/benchmark/metrics/machine_translation_metrics.py
+src/helm/benchmark/metrics/medication_qa_metrics.py
 src/helm/benchmark/metrics/metric.py
 src/helm/benchmark/metrics/metric_name.py
 src/helm/benchmark/metrics/metric_service.py
 src/helm/benchmark/metrics/numeracy_metrics.py
 src/helm/benchmark/metrics/paraphrase_generation_metrics.py
+src/helm/benchmark/metrics/prometheus_vision_critique_metrics.py
 src/helm/benchmark/metrics/ranking_metrics.py
 src/helm/benchmark/metrics/reference_metric.py
+src/helm/benchmark/metrics/reka_vibe_critique_metrics.py
 src/helm/benchmark/metrics/statistic.py
 src/helm/benchmark/metrics/summarization_critique_metrics.py
 src/helm/benchmark/metrics/summarization_metrics.py
@@ -208,11 +218,15 @@ src/helm/benchmark/presentation/table.py
 src/helm/benchmark/presentation/test_contamination.py
 src/helm/benchmark/presentation/test_create_plots.py
 src/helm/benchmark/presentation/test_run_entry.py
+src/helm/benchmark/presentation/test_schema.py
 src/helm/benchmark/presentation/test_summarize.py
 src/helm/benchmark/run_specs/__init__.py
+src/helm/benchmark/run_specs/air_bench_run_specs.py
 src/helm/benchmark/run_specs/classic_run_specs.py
 src/helm/benchmark/run_specs/cleva_run_specs.py
 src/helm/benchmark/run_specs/decodingtrust_run_specs.py
+src/helm/benchmark/run_specs/experimental_run_specs.py
+src/helm/benchmark/run_specs/finance_run_specs.py
 src/helm/benchmark/run_specs/heim_run_specs.py
 src/helm/benchmark/run_specs/instruction_following_run_specs.py
 src/helm/benchmark/run_specs/lite_run_specs.py
@@ -220,6 +234,7 @@ src/helm/benchmark/run_specs/simple_run_specs.py
 src/helm/benchmark/run_specs/unitxt_run_specs.py
 src/helm/benchmark/run_specs/vlm_run_specs.py
 src/helm/benchmark/scenarios/__init__.py
+src/helm/benchmark/scenarios/air_bench_scenario.py
 src/helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py
 src/helm/benchmark/scenarios/babi_qa_scenario.py
 src/helm/benchmark/scenarios/bbq_scenario.py
@@ -227,6 +242,7 @@ src/helm/benchmark/scenarios/big_bench_scenario.py
 src/helm/benchmark/scenarios/blimp_scenario.py
 src/helm/benchmark/scenarios/bold_scenario.py
 src/helm/benchmark/scenarios/boolq_scenario.py
+src/helm/benchmark/scenarios/ci_mcqa_scenario.py
 src/helm/benchmark/scenarios/civil_comments_scenario.py
 src/helm/benchmark/scenarios/cleva_scenario.py
 src/helm/benchmark/scenarios/code_scenario.py
@@ -250,6 +266,7 @@ src/helm/benchmark/scenarios/dyck_language_scenario.py
 src/helm/benchmark/scenarios/entity_data_imputation_scenario.py
 src/helm/benchmark/scenarios/entity_matching_scenario.py
 src/helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py
+src/helm/benchmark/scenarios/fin_qa_scenario.py
 src/helm/benchmark/scenarios/grammar.py
 src/helm/benchmark/scenarios/grammar_scenario.py
 src/helm/benchmark/scenarios/gsm_scenario.py
@@ -293,6 +310,7 @@ src/helm/benchmark/scenarios/summarization_scenario.py
 src/helm/benchmark/scenarios/synthetic_efficiency_scenario.py
 src/helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py
 src/helm/benchmark/scenarios/synthetic_reasoning_scenario.py
+src/helm/benchmark/scenarios/test_air_bench_scenario.py
 src/helm/benchmark/scenarios/test_grammar.py
 src/helm/benchmark/scenarios/test_math_scenario.py
 src/helm/benchmark/scenarios/test_scenario.py
@@ -327,16 +345,27 @@ src/helm/benchmark/scenarios/image_generation/relational_understanding_scenario.
 src/helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py
 src/helm/benchmark/scenarios/image_generation/winoground_scenario.py
 src/helm/benchmark/scenarios/vision_language/__init__.py
+src/helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py
 src/helm/benchmark/scenarios/vision_language/bingo_scenario.py
+src/helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py
+src/helm/benchmark/scenarios/vision_language/flickr30k_scenario.py
+src/helm/benchmark/scenarios/vision_language/gqa_scenario.py
 src/helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py
 src/helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py
+src/helm/benchmark/scenarios/vision_language/math_vista_scenario.py
 src/helm/benchmark/scenarios/vision_language/mementos_scenario.py
+src/helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py
 src/helm/benchmark/scenarios/vision_language/mme_scenario.py
 src/helm/benchmark/scenarios/vision_language/mmmu_scenario.py
+src/helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py
+src/helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py
 src/helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py
+src/helm/benchmark/scenarios/vision_language/originality_scenario.py
+src/helm/benchmark/scenarios/vision_language/pairs_scenario.py
 src/helm/benchmark/scenarios/vision_language/pope_scenario.py
 src/helm/benchmark/scenarios/vision_language/seed_bench_scenario.py
 src/helm/benchmark/scenarios/vision_language/unicorn_scenario.py
+src/helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py
 src/helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py
 src/helm/benchmark/scenarios/vision_language/vqa_scenario.py
 src/helm/benchmark/scenarios/vision_language/image2structure/__init__.py
@@ -359,12 +388,19 @@ src/helm/benchmark/static/index.html
 src/helm/benchmark/static/info-icon.png
 src/helm/benchmark/static/json-urls.js
 src/helm/benchmark/static/plot-captions.js
+src/helm/benchmark/static/schema_air_bench.yaml
 src/helm/benchmark/static/schema_classic.yaml
+src/helm/benchmark/static/schema_finance.yaml
+src/helm/benchmark/static/schema_image2structure.yaml
 src/helm/benchmark/static/schema_instruction_following.yaml
 src/helm/benchmark/static/schema_lite.yaml
+src/helm/benchmark/static/schema_medical.yaml
 src/helm/benchmark/static/schema_mmlu.yaml
+src/helm/benchmark/static/schema_tables.yaml
+src/helm/benchmark/static/schema_thai.yaml
 src/helm/benchmark/static/schema_unitxt.yaml
-src/helm/benchmark/static/schema_vlm.yaml
+src/helm/benchmark/static/schema_vhelm.yaml
+src/helm/benchmark/static/schema_vhelm_lite.yaml
 src/helm/benchmark/static/utils.js
 src/helm/benchmark/static/images/crfm-logo.png
 src/helm/benchmark/static/images/helm-logo-simple.png
@@ -389,6 +425,7 @@ src/helm/benchmark/static_build/config.js
 src/helm/benchmark/static_build/index.html
 src/helm/benchmark/static_build/assets/01-694cb9b7.png
 src/helm/benchmark/static_build/assets/ai21-0eb91ec3.png
+src/helm/benchmark/static_build/assets/air-overview-d2e6c49f.png
 src/helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png
 src/helm/benchmark/static_build/assets/anthropic-70d8bc39.png
 src/helm/benchmark/static_build/assets/bigscience-7f0400c0.png
@@ -399,13 +436,15 @@ src/helm/benchmark/static_build/assets/google-06d997ad.png
 src/helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png
 src/helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png
 src/helm/benchmark/static_build/assets/helmhero-28e90f4d.png
-src/helm/benchmark/static_build/assets/index-5088afcb.css
-src/helm/benchmark/static_build/assets/index-d839df55.js
+src/helm/benchmark/static_build/assets/index-30dbceba.js
+src/helm/benchmark/static_build/assets/index-66b02d40.css
 src/helm/benchmark/static_build/assets/meta-5580e9f1.png
 src/helm/benchmark/static_build/assets/microsoft-f5ee5016.png
 src/helm/benchmark/static_build/assets/mistral-18e1be23.png
 src/helm/benchmark/static_build/assets/nvidia-86fa75c1.png
 src/helm/benchmark/static_build/assets/openai-3f8653e4.png
+src/helm/benchmark/static_build/assets/overview-74aea3d8.png
+src/helm/benchmark/static_build/assets/process-flow-bd2eba96.png
 src/helm/benchmark/static_build/assets/react-d4a0b69b.js
 src/helm/benchmark/static_build/assets/recharts-6d337683.js
 src/helm/benchmark/static_build/assets/tii-24de195c.png
@@ -478,6 +517,7 @@ src/helm/clients/open_lm_client.py
 src/helm/clients/openai_client.py
 src/helm/clients/palmyra_client.py
 src/helm/clients/perspective_api_client.py
+src/helm/clients/reka_client.py
 src/helm/clients/simple_client.py
 src/helm/clients/test_auto_client.py
 src/helm/clients/test_client.py
@@ -488,6 +528,7 @@ src/helm/clients/together_client.py
 src/helm/clients/toxicity_classifier_client.py
 src/helm/clients/vertexai_client.py
 src/helm/clients/vllm_client.py
+src/helm/clients/yi_client.py
 src/helm/clients/clip_scorers/__init__.py
 src/helm/clients/clip_scorers/base_clip_scorer.py
 src/helm/clients/clip_scorers/clip_scorer.py
@@ -545,9 +586,12 @@ src/helm/clients/image_generation/mindalle/utils/config.py
 src/helm/clients/image_generation/mindalle/utils/sampling.py
 src/helm/clients/image_generation/mindalle/utils/utils.py
 src/helm/clients/vision_language/__init__.py
+src/helm/clients/vision_language/huggingface_vision2seq_client.py
 src/helm/clients/vision_language/huggingface_vlm_client.py
 src/helm/clients/vision_language/idefics_client.py
 src/helm/clients/vision_language/open_flamingo_client.py
+src/helm/clients/vision_language/paligemma_client.py
+src/helm/clients/vision_language/palmyra_vision_client.py
 src/helm/clients/vision_language/qwen_vlm_client.py
 src/helm/clients/vision_language/open_flamingo/__init__.py
 src/helm/clients/vision_language/open_flamingo/src/__init__.py
@@ -635,6 +679,7 @@ src/helm/tokenizers/ice_tokenizer.py
 src/helm/tokenizers/lit_gpt_tokenizer.py
 src/helm/tokenizers/simple_tokenizer.py
 src/helm/tokenizers/test_anthropic_tokenizer.py
+src/helm/tokenizers/test_cohere_tokenizer.py
 src/helm/tokenizers/test_huggingface_tokenizer.py
 src/helm/tokenizers/test_ice_tokenizer.py
 src/helm/tokenizers/test_simple_tokenizer.py

{crfm_helm-0.5.0 → crfm_helm-0.5.2}/src/crfm_helm.egg-info/requires.txt RENAMED Viewed

@@ -10,7 +10,7 @@ tqdm~=4.64
 zstandard~=0.18.0
 sqlitedict~=1.7
 bottle~=0.12.23
-datasets~=2.15
+datasets~=2.17
 pyarrow>=11.0.0
 pyarrow-hotfix~=0.6
 nltk~=3.7
@@ -19,7 +19,7 @@ rouge-score~=0.1.2
 scipy~=1.10
 uncertainty-calibration~=0.1.4
 scikit-learn~=1.1
-transformers~=4.37
+transformers~=4.40
 torch<3.0.0,>=1.13.1
 torchvision<3.0.0,>=0.14.1
 google-api-python-client~=2.64
@@ -62,6 +62,9 @@ jieba==0.42.1
 opencc==1.1.6
 langdetect==1.0.9
+[cohere]
+cohere~=5.3
 [decodingtrust]
 fairlearn~=0.9.0
@@ -73,7 +76,7 @@ mypy==1.5.1
 flake8==5.0.4
 [google]
-google-cloud-aiplatform~=1.44
+google-cloud-aiplatform~=1.48
 [heim]
 gdown~=4.4.0
@@ -134,20 +137,27 @@ crfm-helm[aleph-alpha]
 crfm-helm[allenai]
 crfm-helm[amazon]
 crfm-helm[anthropic]
+crfm-helm[cohere]
 crfm-helm[google]
 crfm-helm[mistral]
 crfm-helm[openai]
+crfm-helm[reka]
+crfm-helm[together]
 crfm-helm[tsinghua]
 crfm-helm[yandex]
+crfm-helm[openvino]
 [mongo]
 pymongo~=4.2
 [openai]
 openai~=1.0
-tiktoken~=0.3.3
+tiktoken~=0.7
 pydantic~=2.0
+[openvino]
+optimum[openvino]~=1.19
 [plots]
 colorcet~=3.0.1
 matplotlib~=3.6.0
@@ -156,6 +166,9 @@ seaborn~=0.11.0
 [proxy-server]
 gunicorn~=20.1.0
+[reka]
+reka-api~=2.0.0
 [scenarios]
 gdown~=4.4.0
 sympy~=1.11.1
@@ -167,6 +180,9 @@ simple-slurm~=0.2.6
 [summarization]
 summ-eval~=0.892
+[together]
+together~=1.1
 [tsinghua]
 icetk~=0.0.4
@@ -182,8 +198,10 @@ torch~=2.1.2
 transformers_stream_generator~=0.0.4
 scipy~=1.10
 torchvision<3.0.0,>=0.14.1
+crfm-helm[reka]
 crfm-helm[images]
 crfm-helm[image2structure]
+pycocoevalcap~=1.2
 [yandex]
 sentencepiece~=0.1.97

crfm-helm 0.5.0__tar.gz → 0.5.2__tar.gz

crfm-helm 0.5.0tar.gz → 0.5.2tar.gz