ai2-olmo-eval 0.7.2__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/PKG-INFO +1 -1
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/ai2_olmo_eval.egg-info/PKG-INFO +1 -1
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/metrics.py +112 -87
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/tasks.py +430 -2
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/version.py +2 -2
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/LICENSE +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/README.md +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/pyproject.toml +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/setup.cfg +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/ai2_olmo_eval.egg-info/SOURCES.txt +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/ai2_olmo_eval.egg-info/dependency_links.txt +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/ai2_olmo_eval.egg-info/requires.txt +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/ai2_olmo_eval.egg-info/top_level.txt +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/__init__.py +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/boolq/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/boolq/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/mrpc/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/mrpc/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/rte/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/rte/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/sst2/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/glue/sst2/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hellaswag/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/hellaswag/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/nq_open/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/nq_open/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/openbookqa/main/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/openbookqa/main/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/sciq/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/sciq/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/super_glue/cb/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/super_glue/cb/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/super_glue/copa/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/super_glue/copa/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/copycolors/10way/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/copycolors/10way/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/tokenizer.py +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json +0 -0
- {ai2_olmo_eval-0.7.2 → ai2_olmo_eval-0.8.0}/src/olmo_eval/util.py +0 -0
|
@@ -98,96 +98,121 @@ class ICLMetric(Metric):
|
|
|
98
98
|
batch["ctx_len"][idx] - 1 : batch["ctx_len"][idx] + batch["cont_len"][idx] - 1
|
|
99
99
|
]
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
/ batch["cont_byte_len"][idx]
|
|
135
|
-
* LOG_2_OF_E
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
log_likelihood_no_leading_space = torch.gather(
|
|
139
|
-
lm_cont_logits, 1, cont_tokens.unsqueeze(-1)
|
|
140
|
-
).sum()
|
|
141
|
-
celoss_no_leading_space = (
|
|
142
|
-
-torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
143
|
-
/ batch["cont_str_len_no_leading_space"][idx]
|
|
144
|
-
)
|
|
145
|
-
bpb_no_leading_space = (
|
|
146
|
-
-torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
147
|
-
/ batch["cont_byte_len_no_leading_space"][idx]
|
|
148
|
-
* LOG_2_OF_E
|
|
149
|
-
)
|
|
150
|
-
elif self.metric_type in ["len_norm", "ce_loss", "bpb"]:
|
|
151
|
-
log_likelihood = (
|
|
152
|
-
torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
153
|
-
/ batch["cont_str_len"][idx]
|
|
154
|
-
)
|
|
155
|
-
celoss = (
|
|
156
|
-
-torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
157
|
-
/ batch["cont_str_len"][idx]
|
|
158
|
-
)
|
|
159
|
-
bpb = (
|
|
160
|
-
-torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
161
|
-
/ batch["cont_byte_len"][idx]
|
|
162
|
-
* LOG_2_OF_E
|
|
163
|
-
)
|
|
101
|
+
if "choice_ids" in batch:
|
|
102
|
+
fast_mc = True
|
|
103
|
+
choice_ids = batch["choice_ids"][idx]
|
|
104
|
+
else:
|
|
105
|
+
fast_mc = False
|
|
106
|
+
choice_ids = cont_tokens
|
|
107
|
+
|
|
108
|
+
# For each choice token, calculate metrics and append as separate entries
|
|
109
|
+
for choice_idx, choice_token in enumerate(choice_ids):
|
|
110
|
+
if fast_mc:
|
|
111
|
+
_cont_id = choice_idx
|
|
112
|
+
_cont_tokens = choice_token.unsqueeze(-1)
|
|
113
|
+
else:
|
|
114
|
+
_cont_id = cont_id
|
|
115
|
+
_cont_tokens = cont_tokens
|
|
116
|
+
|
|
117
|
+
# Skip choices for Qs with less than the max choices (for questions w/ different nubmers of choices)
|
|
118
|
+
is_empty_choice = (choice_token.unsqueeze(-1).unsqueeze(-1) == -1).all().item()
|
|
119
|
+
if is_empty_choice:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
log_likelihood: torch.Tensor
|
|
123
|
+
celoss: torch.Tensor
|
|
124
|
+
bpb: torch.Tensor
|
|
125
|
+
log_likelihood_no_leading_space: torch.Tensor
|
|
126
|
+
celoss_no_leading_space: torch.Tensor
|
|
127
|
+
bpb_no_leading_space: torch.Tensor
|
|
128
|
+
if self.metric_type == "pmi_dc":
|
|
129
|
+
assert dc_lm_logits is not None
|
|
130
|
+
# get domain conditional continuation logits: [cont_len, vocab]
|
|
131
|
+
dc_lm_cont_logits = dc_lm_logits[idx][
|
|
132
|
+
batch["dc_len"][idx] - 1 : batch["dc_len"][idx] + batch["cont_len"][idx] - 1
|
|
133
|
+
]
|
|
164
134
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
135
|
+
# gather log-probs at continuation token indices but divide by domain conditional prob
|
|
136
|
+
log_likelihood = (
|
|
137
|
+
torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
138
|
+
/ torch.gather(dc_lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
139
|
+
)
|
|
140
|
+
celoss = -log_likelihood
|
|
141
|
+
bpb = -log_likelihood # the normalization factors cancel out
|
|
142
|
+
|
|
143
|
+
log_likelihood_no_leading_space = log_likelihood
|
|
144
|
+
celoss_no_leading_space = celoss
|
|
145
|
+
bpb_no_leading_space = bpb
|
|
146
|
+
elif self.metric_type == "acc" or self.metric_type == "f1":
|
|
147
|
+
# gather log-probs at continuation token indices
|
|
148
|
+
log_likelihood = torch.gather(
|
|
149
|
+
lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)
|
|
150
|
+
).sum()
|
|
151
|
+
celoss = (
|
|
152
|
+
-torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
153
|
+
/ batch["cont_str_len"][idx]
|
|
154
|
+
)
|
|
155
|
+
bpb = (
|
|
156
|
+
-torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
157
|
+
/ batch["cont_byte_len"][idx]
|
|
158
|
+
* LOG_2_OF_E
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
log_likelihood_no_leading_space = torch.gather(
|
|
162
|
+
lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)
|
|
163
|
+
).sum()
|
|
164
|
+
celoss_no_leading_space = (
|
|
165
|
+
-torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
166
|
+
/ batch["cont_str_len_no_leading_space"][idx]
|
|
167
|
+
)
|
|
168
|
+
bpb_no_leading_space = (
|
|
169
|
+
-torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
170
|
+
/ batch["cont_byte_len_no_leading_space"][idx]
|
|
171
|
+
* LOG_2_OF_E
|
|
172
|
+
)
|
|
173
|
+
elif self.metric_type in ["len_norm", "ce_loss", "bpb"]:
|
|
174
|
+
log_likelihood = (
|
|
175
|
+
torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
176
|
+
/ batch["cont_str_len"][idx]
|
|
177
|
+
)
|
|
178
|
+
celoss = (
|
|
179
|
+
-torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
180
|
+
/ batch["cont_str_len"][idx]
|
|
181
|
+
)
|
|
182
|
+
bpb = (
|
|
183
|
+
-torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
184
|
+
/ batch["cont_byte_len"][idx]
|
|
185
|
+
* LOG_2_OF_E
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
log_likelihood_no_leading_space = (
|
|
189
|
+
torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
190
|
+
/ batch["cont_str_len_no_leading_space"][idx]
|
|
191
|
+
)
|
|
192
|
+
celoss_no_leading_space = (
|
|
193
|
+
-torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
194
|
+
/ batch["cont_str_len_no_leading_space"][idx]
|
|
195
|
+
)
|
|
196
|
+
bpb_no_leading_space = (
|
|
197
|
+
-torch.gather(lm_cont_logits, 1, _cont_tokens.unsqueeze(-1)).sum()
|
|
198
|
+
/ batch["cont_byte_len_no_leading_space"][idx]
|
|
199
|
+
* LOG_2_OF_E
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
raise ValueError(self.metric_type)
|
|
203
|
+
|
|
204
|
+
self.labels.append((doc_id, _cont_id, int(batch["label_id"][idx])))
|
|
205
|
+
self.loglikelihoods.append((doc_id, _cont_id, float(log_likelihood)))
|
|
206
|
+
self.celosses.append((doc_id, _cont_id, float(celoss)))
|
|
207
|
+
self.bpbs.append((doc_id, _cont_id, float(bpb)))
|
|
208
|
+
|
|
209
|
+
self.loglikelihoods_no_leading_space.append(
|
|
210
|
+
(doc_id, _cont_id, float(log_likelihood_no_leading_space))
|
|
172
211
|
)
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
/ batch["cont_byte_len_no_leading_space"][idx]
|
|
176
|
-
* LOG_2_OF_E
|
|
212
|
+
self.celosses_no_leading_space.append(
|
|
213
|
+
(doc_id, _cont_id, float(celoss_no_leading_space))
|
|
177
214
|
)
|
|
178
|
-
|
|
179
|
-
raise ValueError(self.metric_type)
|
|
180
|
-
|
|
181
|
-
self.labels.append((doc_id, cont_id, int(batch["label_id"][idx])))
|
|
182
|
-
self.loglikelihoods.append((doc_id, cont_id, float(log_likelihood)))
|
|
183
|
-
self.celosses.append((doc_id, cont_id, float(celoss)))
|
|
184
|
-
self.bpbs.append((doc_id, cont_id, float(bpb)))
|
|
185
|
-
|
|
186
|
-
self.loglikelihoods_no_leading_space.append(
|
|
187
|
-
(doc_id, cont_id, float(log_likelihood_no_leading_space))
|
|
188
|
-
)
|
|
189
|
-
self.celosses_no_leading_space.append((doc_id, cont_id, float(celoss_no_leading_space)))
|
|
190
|
-
self.bpbs_no_leading_space.append((doc_id, cont_id, float(bpb_no_leading_space)))
|
|
215
|
+
self.bpbs_no_leading_space.append((doc_id, _cont_id, float(bpb_no_leading_space)))
|
|
191
216
|
|
|
192
217
|
def compute(self) -> Dict[str, torch.Tensor]:
|
|
193
218
|
# Task "suffix" -> tensor
|