ai2-olmo-eval 0.7.0__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/PKG-INFO +3 -2
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/PKG-INFO +3 -2
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/metrics.py +164 -9
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/tasks.py +40 -6
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/version.py +1 -1
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/LICENSE +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/README.md +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/pyproject.toml +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/setup.cfg +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/SOURCES.txt +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/dependency_links.txt +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/requires.txt +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/top_level.txt +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/__init__.py +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/boolq/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/boolq/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/rte/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/rte/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/sciq/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/sciq/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/state.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copycolors/10way/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copycolors/10way/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/tokenizer.py +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json +0 -0
- {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: ai2-olmo-eval
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: In-loop evaluation tasks for language modeling
|
|
5
5
|
Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
|
|
6
6
|
License: Apache License
|
|
@@ -234,6 +234,7 @@ Requires-Dist: boto3; extra == "dev"
|
|
|
234
234
|
Requires-Dist: google-cloud-storage; extra == "dev"
|
|
235
235
|
Provides-Extra: all
|
|
236
236
|
Requires-Dist: ai2-olmo-eval[dev]; extra == "all"
|
|
237
|
+
Dynamic: license-file
|
|
237
238
|
|
|
238
239
|
# OLMo-in-loop-evals
|
|
239
240
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: ai2-olmo-eval
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: In-loop evaluation tasks for language modeling
|
|
5
5
|
Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
|
|
6
6
|
License: Apache License
|
|
@@ -234,6 +234,7 @@ Requires-Dist: boto3; extra == "dev"
|
|
|
234
234
|
Requires-Dist: google-cloud-storage; extra == "dev"
|
|
235
235
|
Provides-Extra: all
|
|
236
236
|
Requires-Dist: ai2-olmo-eval[dev]; extra == "all"
|
|
237
|
+
Dynamic: license-file
|
|
237
238
|
|
|
238
239
|
# OLMo-in-loop-evals
|
|
239
240
|
|
|
@@ -37,12 +37,26 @@ class ICLMetric(Metric):
|
|
|
37
37
|
self.add_state("bpbs", default=[], dist_reduce_fx=dist_combine_lists)
|
|
38
38
|
self.add_state("labels", default=[], dist_reduce_fx=dist_combine_lists)
|
|
39
39
|
|
|
40
|
+
self.add_state(
|
|
41
|
+
"loglikelihoods_no_leading_space", default=[], dist_reduce_fx=dist_combine_lists
|
|
42
|
+
)
|
|
43
|
+
self.add_state("celosses_no_leading_space", default=[], dist_reduce_fx=dist_combine_lists)
|
|
44
|
+
self.add_state("bpbs_no_leading_space", default=[], dist_reduce_fx=dist_combine_lists)
|
|
45
|
+
|
|
40
46
|
def reset(self):
|
|
41
47
|
self.loglikelihoods: List[Tuple[Optional[int], Optional[int], Optional[float]]] = []
|
|
42
48
|
self.celosses: List[Tuple[Optional[int], Optional[int], Optional[float]]] = []
|
|
43
49
|
self.bpbs: List[Tuple[Optional[int], Optional[int], Optional[float]]] = []
|
|
44
50
|
self.labels: List[Tuple[Optional[int], Optional[int], Optional[int]]] = []
|
|
45
51
|
|
|
52
|
+
self.loglikelihoods_no_leading_space: List[
|
|
53
|
+
Tuple[Optional[int], Optional[int], Optional[float]]
|
|
54
|
+
] = []
|
|
55
|
+
self.celosses_no_leading_space: List[
|
|
56
|
+
Tuple[Optional[int], Optional[int], Optional[float]]
|
|
57
|
+
] = []
|
|
58
|
+
self.bpbs_no_leading_space: List[Tuple[Optional[int], Optional[int], Optional[float]]] = []
|
|
59
|
+
|
|
46
60
|
def update(
|
|
47
61
|
self,
|
|
48
62
|
batch: Dict[str, Any],
|
|
@@ -56,6 +70,11 @@ class ICLMetric(Metric):
|
|
|
56
70
|
self.loglikelihoods.append((None, None, None))
|
|
57
71
|
self.celosses.append((None, None, None))
|
|
58
72
|
self.bpbs.append((None, None, None))
|
|
73
|
+
|
|
74
|
+
self.loglikelihoods_no_leading_space.append((None, None, None))
|
|
75
|
+
self.celosses_no_leading_space.append((None, None, None))
|
|
76
|
+
self.bpbs_no_leading_space.append((None, None, None))
|
|
77
|
+
|
|
59
78
|
self.labels.append((None, None, None))
|
|
60
79
|
return
|
|
61
80
|
|
|
@@ -82,6 +101,9 @@ class ICLMetric(Metric):
|
|
|
82
101
|
log_likelihood: torch.Tensor
|
|
83
102
|
celoss: torch.Tensor
|
|
84
103
|
bpb: torch.Tensor
|
|
104
|
+
log_likelihood_no_leading_space: torch.Tensor
|
|
105
|
+
celoss_no_leading_space: torch.Tensor
|
|
106
|
+
bpb_no_leading_space: torch.Tensor
|
|
85
107
|
if self.metric_type == "pmi_dc":
|
|
86
108
|
assert dc_lm_logits is not None
|
|
87
109
|
# get domain conditional continuation logits: [cont_len, vocab]
|
|
@@ -96,6 +118,10 @@ class ICLMetric(Metric):
|
|
|
96
118
|
)
|
|
97
119
|
celoss = -log_likelihood
|
|
98
120
|
bpb = -log_likelihood # the normalization factors cancel out
|
|
121
|
+
|
|
122
|
+
log_likelihood_no_leading_space = log_likelihood
|
|
123
|
+
celoss_no_leading_space = celoss
|
|
124
|
+
bpb_no_leading_space = bpb
|
|
99
125
|
elif self.metric_type == "acc" or self.metric_type == "f1":
|
|
100
126
|
# gather log-probs at continuation token indices
|
|
101
127
|
log_likelihood = torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
@@ -108,6 +134,19 @@ class ICLMetric(Metric):
|
|
|
108
134
|
/ batch["cont_byte_len"][idx]
|
|
109
135
|
* LOG_2_OF_E
|
|
110
136
|
)
|
|
137
|
+
|
|
138
|
+
log_likelihood_no_leading_space = torch.gather(
|
|
139
|
+
lm_cont_logits, 1, cont_tokens.unsqueeze(-1)
|
|
140
|
+
).sum()
|
|
141
|
+
celoss_no_leading_space = (
|
|
142
|
+
-torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
143
|
+
/ batch["cont_str_len_no_leading_space"][idx]
|
|
144
|
+
)
|
|
145
|
+
bpb_no_leading_space = (
|
|
146
|
+
-torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
147
|
+
/ batch["cont_byte_len_no_leading_space"][idx]
|
|
148
|
+
* LOG_2_OF_E
|
|
149
|
+
)
|
|
111
150
|
elif self.metric_type in ["len_norm", "ce_loss", "bpb"]:
|
|
112
151
|
log_likelihood = (
|
|
113
152
|
torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
@@ -122,23 +161,46 @@ class ICLMetric(Metric):
|
|
|
122
161
|
/ batch["cont_byte_len"][idx]
|
|
123
162
|
* LOG_2_OF_E
|
|
124
163
|
)
|
|
164
|
+
|
|
165
|
+
log_likelihood_no_leading_space = (
|
|
166
|
+
torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
167
|
+
/ batch["cont_str_len_no_leading_space"][idx]
|
|
168
|
+
)
|
|
169
|
+
celoss_no_leading_space = (
|
|
170
|
+
-torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
171
|
+
/ batch["cont_str_len_no_leading_space"][idx]
|
|
172
|
+
)
|
|
173
|
+
bpb_no_leading_space = (
|
|
174
|
+
-torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
|
|
175
|
+
/ batch["cont_byte_len_no_leading_space"][idx]
|
|
176
|
+
* LOG_2_OF_E
|
|
177
|
+
)
|
|
125
178
|
else:
|
|
126
179
|
raise ValueError(self.metric_type)
|
|
127
180
|
|
|
128
|
-
self.loglikelihoods.append((doc_id, cont_id, float(log_likelihood)))
|
|
129
181
|
self.labels.append((doc_id, cont_id, int(batch["label_id"][idx])))
|
|
182
|
+
self.loglikelihoods.append((doc_id, cont_id, float(log_likelihood)))
|
|
130
183
|
self.celosses.append((doc_id, cont_id, float(celoss)))
|
|
131
184
|
self.bpbs.append((doc_id, cont_id, float(bpb)))
|
|
132
185
|
|
|
186
|
+
self.loglikelihoods_no_leading_space.append(
|
|
187
|
+
(doc_id, cont_id, float(log_likelihood_no_leading_space))
|
|
188
|
+
)
|
|
189
|
+
self.celosses_no_leading_space.append((doc_id, cont_id, float(celoss_no_leading_space)))
|
|
190
|
+
self.bpbs_no_leading_space.append((doc_id, cont_id, float(bpb_no_leading_space)))
|
|
191
|
+
|
|
133
192
|
def compute(self) -> Dict[str, torch.Tensor]:
|
|
134
193
|
# Task "suffix" -> tensor
|
|
135
194
|
|
|
136
195
|
# states should have been synced from all accelerators at this point
|
|
137
196
|
# account for duplicates here because of DistributedSampler compensating for drop_last=False
|
|
138
197
|
loglikelihood_dict: Dict[int, Dict[int, float]] = {}
|
|
198
|
+
loglikelihood_no_leading_space_dict: Dict[int, Dict[int, float]] = {}
|
|
139
199
|
label_dict: Dict[int, int] = {}
|
|
140
200
|
celoss_dict: Dict[int, Dict[int, float]] = {}
|
|
201
|
+
celoss_no_leading_space_dict: Dict[int, Dict[int, float]] = {}
|
|
141
202
|
bpb_dict: Dict[int, Dict[int, float]] = {}
|
|
203
|
+
bpb_no_leading_space_dict: Dict[int, Dict[int, float]] = {}
|
|
142
204
|
|
|
143
205
|
# collect labels
|
|
144
206
|
for doc_id, cont_id, label_id in self.labels:
|
|
@@ -159,6 +221,17 @@ class ICLMetric(Metric):
|
|
|
159
221
|
if cont_id not in loglikelihood_dict[doc_id]:
|
|
160
222
|
loglikelihood_dict[doc_id][cont_id] = loglikelihood
|
|
161
223
|
|
|
224
|
+
# collect loglikelihoods no leading space
|
|
225
|
+
for doc_id, cont_id, loglikelihood in self.loglikelihoods_no_leading_space:
|
|
226
|
+
if doc_id is None or cont_id is None or loglikelihood is None:
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
if doc_id not in loglikelihood_no_leading_space_dict:
|
|
230
|
+
loglikelihood_no_leading_space_dict[doc_id] = {}
|
|
231
|
+
|
|
232
|
+
if cont_id not in loglikelihood_no_leading_space_dict[doc_id]:
|
|
233
|
+
loglikelihood_no_leading_space_dict[doc_id][cont_id] = loglikelihood
|
|
234
|
+
|
|
162
235
|
# collect celosses
|
|
163
236
|
for doc_id, cont_id, celoss_val in self.celosses:
|
|
164
237
|
if doc_id is None or cont_id is None or celoss_val is None:
|
|
@@ -170,6 +243,17 @@ class ICLMetric(Metric):
|
|
|
170
243
|
if cont_id not in celoss_dict[doc_id]:
|
|
171
244
|
celoss_dict[doc_id][cont_id] = celoss_val
|
|
172
245
|
|
|
246
|
+
# collect celosses no leading space
|
|
247
|
+
for doc_id, cont_id, celoss_val in self.celosses_no_leading_space:
|
|
248
|
+
if doc_id is None or cont_id is None or celoss_val is None:
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
if doc_id not in celoss_no_leading_space_dict:
|
|
252
|
+
celoss_no_leading_space_dict[doc_id] = {}
|
|
253
|
+
|
|
254
|
+
if cont_id not in celoss_no_leading_space_dict[doc_id]:
|
|
255
|
+
celoss_no_leading_space_dict[doc_id][cont_id] = celoss_val
|
|
256
|
+
|
|
173
257
|
# collect bpbs
|
|
174
258
|
for doc_id, cont_id, bpb_val in self.bpbs:
|
|
175
259
|
if doc_id is None or cont_id is None or bpb_val is None:
|
|
@@ -181,13 +265,30 @@ class ICLMetric(Metric):
|
|
|
181
265
|
if cont_id not in bpb_dict[doc_id]:
|
|
182
266
|
bpb_dict[doc_id][cont_id] = bpb_val
|
|
183
267
|
|
|
268
|
+
# collect bpbs no leading space
|
|
269
|
+
for doc_id, cont_id, bpb_val in self.bpbs_no_leading_space:
|
|
270
|
+
if doc_id is None or cont_id is None or bpb_val is None:
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
if doc_id not in bpb_no_leading_space_dict:
|
|
274
|
+
bpb_no_leading_space_dict[doc_id] = {}
|
|
275
|
+
|
|
276
|
+
if cont_id not in bpb_no_leading_space_dict[doc_id]:
|
|
277
|
+
bpb_no_leading_space_dict[doc_id][cont_id] = bpb_val
|
|
278
|
+
|
|
184
279
|
# compute acc
|
|
280
|
+
correct_no_leading_space = []
|
|
185
281
|
correct = []
|
|
186
282
|
celoss = []
|
|
283
|
+
celoss_no_leading_space = []
|
|
187
284
|
bpb = []
|
|
285
|
+
bpb_no_leading_space = []
|
|
188
286
|
soft_score = []
|
|
189
287
|
soft_log_score = []
|
|
288
|
+
soft_score_no_leading_space = []
|
|
289
|
+
soft_log_score_no_leading_space = []
|
|
190
290
|
preds: Optional[List[float]] = None
|
|
291
|
+
preds_no_leading_space: Optional[List[float]] = None
|
|
191
292
|
labels: Optional[List[int]] = None
|
|
192
293
|
if self.metric_type == "f1":
|
|
193
294
|
preds = []
|
|
@@ -197,15 +298,25 @@ class ICLMetric(Metric):
|
|
|
197
298
|
# each doc_id might have a different number of continuation
|
|
198
299
|
num_continuations = len(loglikelihood_dict[doc_id].keys())
|
|
199
300
|
loglikelihoods = torch.tensor([-float("inf")] * num_continuations)
|
|
301
|
+
loglikelihoods_no_leading_space = torch.tensor([-float("inf")] * num_continuations)
|
|
200
302
|
celosses = torch.tensor([float("inf")] * num_continuations)
|
|
303
|
+
celosses_no_leading_space = torch.tensor([float("inf")] * num_continuations)
|
|
201
304
|
bpbs = torch.tensor([float("inf")] * num_continuations)
|
|
305
|
+
bpbs_no_leading_space = torch.tensor([float("inf")] * num_continuations)
|
|
202
306
|
|
|
203
307
|
skip_document = False
|
|
204
308
|
for cont_id in loglikelihood_dict[doc_id]:
|
|
205
309
|
try:
|
|
206
310
|
loglikelihoods[cont_id] = loglikelihood_dict[doc_id][cont_id]
|
|
311
|
+
loglikelihoods_no_leading_space[cont_id] = loglikelihood_no_leading_space_dict[
|
|
312
|
+
doc_id
|
|
313
|
+
][cont_id]
|
|
207
314
|
celosses[cont_id] = celoss_dict[doc_id][cont_id]
|
|
315
|
+
celosses_no_leading_space[cont_id] = celoss_no_leading_space_dict[doc_id][
|
|
316
|
+
cont_id
|
|
317
|
+
]
|
|
208
318
|
bpbs[cont_id] = bpb_dict[doc_id][cont_id]
|
|
319
|
+
bpbs_no_leading_space[cont_id] = bpb_no_leading_space_dict[doc_id][cont_id]
|
|
209
320
|
except IndexError:
|
|
210
321
|
# We didn't process all of the continuations, so skip this document.
|
|
211
322
|
skip_document = True
|
|
@@ -216,39 +327,83 @@ class ICLMetric(Metric):
|
|
|
216
327
|
|
|
217
328
|
if self.metric_type == "ce_loss":
|
|
218
329
|
celoss.append(celosses[0]) # Only one answer is scored
|
|
330
|
+
celoss_no_leading_space.append(celosses_no_leading_space[0])
|
|
219
331
|
elif self.metric_type == "bpb":
|
|
220
332
|
bpb.append(bpbs[0]) # Only one answer is scored
|
|
333
|
+
bpb_no_leading_space.append(bpbs_no_leading_space[0])
|
|
221
334
|
elif self.metric_type == "f1":
|
|
222
335
|
assert preds is not None
|
|
336
|
+
assert preds_no_leading_space is not None
|
|
223
337
|
assert labels is not None
|
|
224
338
|
preds.append(torch.argmax(loglikelihoods).item())
|
|
339
|
+
preds_no_leading_space.append(torch.argmax(loglikelihoods_no_leading_space).item())
|
|
225
340
|
labels.append(label_dict[doc_id])
|
|
226
341
|
else:
|
|
227
342
|
correct.append(
|
|
228
343
|
1.0 if torch.argmax(loglikelihoods).item() == label_dict[doc_id] else 0.0
|
|
229
344
|
)
|
|
345
|
+
correct_no_leading_space.append(
|
|
346
|
+
1.0
|
|
347
|
+
if torch.argmax(loglikelihoods_no_leading_space).item() == label_dict[doc_id]
|
|
348
|
+
else 0.0
|
|
349
|
+
)
|
|
230
350
|
celoss.append(celosses[label_dict[doc_id]].item())
|
|
351
|
+
celoss_no_leading_space.append(celosses_no_leading_space[label_dict[doc_id]].item())
|
|
231
352
|
bpb.append(bpbs[label_dict[doc_id]].item())
|
|
353
|
+
bpb_no_leading_space.append(bpbs_no_leading_space[label_dict[doc_id]].item())
|
|
232
354
|
soft_score.append(torch.softmax(loglikelihoods, dim=0)[label_dict[doc_id]].item())
|
|
233
355
|
soft_log_score.append(
|
|
234
356
|
torch.log_softmax(loglikelihoods, dim=0)[label_dict[doc_id]].item()
|
|
235
357
|
)
|
|
358
|
+
soft_score_no_leading_space.append(
|
|
359
|
+
torch.softmax(loglikelihoods_no_leading_space, dim=0)[label_dict[doc_id]].item()
|
|
360
|
+
)
|
|
361
|
+
soft_log_score_no_leading_space.append(
|
|
362
|
+
torch.log_softmax(loglikelihoods_no_leading_space, dim=0)[
|
|
363
|
+
label_dict[doc_id]
|
|
364
|
+
].item()
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# v1 vs. v2 corresponds to whether we add a 1 to the num chars or num bytes when normalizing the answer length. See https://github.com/allenai/OLMo-in-loop-evals/pull/6
|
|
236
368
|
|
|
237
369
|
if self.metric_type == "f1":
|
|
238
370
|
assert preds is not None
|
|
239
371
|
assert labels is not None
|
|
240
372
|
# for NLI tasks, continuations are yes, no, neither, so idx=0 assigned to pos label
|
|
241
373
|
score = f1_score(labels, preds, pos_label=0)
|
|
242
|
-
|
|
374
|
+
score_no_leading_space = f1_score(labels, preds_no_leading_space, pos_label=0)
|
|
375
|
+
return {
|
|
376
|
+
"f1_v1": torch.tensor(score),
|
|
377
|
+
"f1_v2": torch.tensor(score_no_leading_space),
|
|
378
|
+
}
|
|
243
379
|
elif self.metric_type == "ce_loss":
|
|
244
|
-
return {
|
|
380
|
+
return {
|
|
381
|
+
"ce_loss_v1": torch.tensor(
|
|
382
|
+
sum(celoss_no_leading_space) / len(celoss_no_leading_space)
|
|
383
|
+
),
|
|
384
|
+
"ce_loss_v2": torch.tensor(sum(celoss) / len(celoss)),
|
|
385
|
+
}
|
|
245
386
|
elif self.metric_type == "bpb":
|
|
246
|
-
return {
|
|
387
|
+
return {
|
|
388
|
+
"bpb_v1": torch.tensor(sum(bpb_no_leading_space) / len(bpb_no_leading_space)),
|
|
389
|
+
"bpb_v2": torch.tensor(sum(bpb) / len(bpb)),
|
|
390
|
+
}
|
|
247
391
|
else:
|
|
248
392
|
return {
|
|
249
|
-
self.metric_type: torch.tensor(sum(correct) / len(correct)),
|
|
250
|
-
"
|
|
251
|
-
"
|
|
252
|
-
|
|
253
|
-
|
|
393
|
+
f"{self.metric_type}_v1": torch.tensor(sum(correct) / len(correct)),
|
|
394
|
+
f"{self.metric_type}_v2": torch.tensor(sum(correct) / len(correct)),
|
|
395
|
+
"ce_loss_v1": torch.tensor(
|
|
396
|
+
sum(celoss_no_leading_space) / len(celoss_no_leading_space)
|
|
397
|
+
),
|
|
398
|
+
"ce_loss_v2": torch.tensor(sum(celoss) / len(celoss)),
|
|
399
|
+
"bpb_v1": torch.tensor(sum(bpb_no_leading_space) / len(bpb_no_leading_space)),
|
|
400
|
+
"bpb_v2": torch.tensor(sum(bpb) / len(bpb)),
|
|
401
|
+
"soft_v1": torch.tensor(
|
|
402
|
+
sum(soft_score_no_leading_space) / len(soft_score_no_leading_space)
|
|
403
|
+
),
|
|
404
|
+
"soft_v2": torch.tensor(sum(soft_score) / len(soft_score)),
|
|
405
|
+
"soft_log_v1": torch.tensor(
|
|
406
|
+
sum(soft_log_score_no_leading_space) / len(soft_log_score_no_leading_space)
|
|
407
|
+
),
|
|
408
|
+
"soft_log_v2": torch.tensor(sum(soft_log_score) / len(soft_log_score)),
|
|
254
409
|
}
|
|
@@ -103,8 +103,15 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
|
|
|
103
103
|
)
|
|
104
104
|
|
|
105
105
|
for cont_id, continuation_str in enumerate(continuations):
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
# The original implementation did not count the first character (usually the leading space) as
|
|
107
|
+
# part of the continuation length (e.g., " A", " " is not counted). The OLMES standard does not
|
|
108
|
+
# do this, but we track both for backwards compatibility.
|
|
109
|
+
cont_str_len_no_leading_space = len(continuation_str) - 1
|
|
110
|
+
cont_byte_len_no_leading_space = len(continuation_str[1:].encode("utf-8"))
|
|
111
|
+
|
|
112
|
+
cont_str_len = len(continuation_str)
|
|
113
|
+
cont_byte_len = len(continuation_str.encode("utf-8"))
|
|
114
|
+
|
|
108
115
|
continuation = self.token_encode(continuation_str)
|
|
109
116
|
|
|
110
117
|
# query, remove last token from continuation, truncate from left is longer than model ctx length
|
|
@@ -131,6 +138,8 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
|
|
|
131
138
|
), # even if query has last token removed, LM will output same cont len
|
|
132
139
|
"cont_str_len": cont_str_len,
|
|
133
140
|
"cont_byte_len": cont_byte_len,
|
|
141
|
+
"cont_str_len_no_leading_space": cont_str_len_no_leading_space,
|
|
142
|
+
"cont_byte_len_no_leading_space": cont_byte_len_no_leading_space,
|
|
134
143
|
"query": query, # remove last token from continuation
|
|
135
144
|
"dc_query": dc_query,
|
|
136
145
|
"label_id": label_id,
|
|
@@ -209,6 +218,8 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
|
|
|
209
218
|
cont_lens = []
|
|
210
219
|
cont_str_lens = []
|
|
211
220
|
cont_byte_lens = []
|
|
221
|
+
cont_str_len_no_leading_space = []
|
|
222
|
+
cont_byte_len_no_leading_space = []
|
|
212
223
|
queries = []
|
|
213
224
|
dc_queries = []
|
|
214
225
|
label_ids = []
|
|
@@ -232,6 +243,8 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
|
|
|
232
243
|
cont_lens.append(sample["cont_len"])
|
|
233
244
|
cont_str_lens.append(sample["cont_str_len"])
|
|
234
245
|
cont_byte_lens.append(sample["cont_byte_len"])
|
|
246
|
+
cont_str_len_no_leading_space.append(sample["cont_str_len_no_leading_space"])
|
|
247
|
+
cont_byte_len_no_leading_space.append(sample["cont_byte_len_no_leading_space"])
|
|
235
248
|
|
|
236
249
|
queries.append(
|
|
237
250
|
torch.LongTensor(
|
|
@@ -261,6 +274,8 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
|
|
|
261
274
|
), # since query has last token removed from continuation
|
|
262
275
|
"cont_str_len": torch.LongTensor(cont_str_lens),
|
|
263
276
|
"cont_byte_len": torch.LongTensor(cont_byte_lens),
|
|
277
|
+
"cont_str_len_no_leading_space": torch.LongTensor(cont_str_len_no_leading_space),
|
|
278
|
+
"cont_byte_len_no_leading_space": torch.LongTensor(cont_byte_len_no_leading_space),
|
|
264
279
|
"input_ids": torch.stack(queries),
|
|
265
280
|
"dc_input_ids": torch.stack(dc_queries),
|
|
266
281
|
"label_id": torch.LongTensor(label_ids),
|
|
@@ -456,8 +471,15 @@ class WinoGrande(ICLMultiChoiceTaskDataset):
|
|
|
456
471
|
|
|
457
472
|
continuation_str = self.doc_to_continuations(doc)
|
|
458
473
|
label_id = self.doc_to_label(doc)
|
|
459
|
-
|
|
460
|
-
|
|
474
|
+
|
|
475
|
+
# The original implementation did not count the first character (usually the leading space) as
|
|
476
|
+
# part of the continuation length (e.g., " A", " " is not counted). The OLMES standard does not
|
|
477
|
+
# do this, but we track both for backwards compatibility.
|
|
478
|
+
cont_str_len_no_leading_space = len(continuation_str) - 1
|
|
479
|
+
cont_byte_len_no_leading_space = len(continuation_str[1:].encode("utf-8"))
|
|
480
|
+
|
|
481
|
+
cont_str_len = len(continuation_str)
|
|
482
|
+
cont_byte_len = len(continuation_str.encode("utf-8"))
|
|
461
483
|
|
|
462
484
|
# tokenize
|
|
463
485
|
continuation = self.token_encode(continuation_str)
|
|
@@ -488,6 +510,8 @@ class WinoGrande(ICLMultiChoiceTaskDataset):
|
|
|
488
510
|
), # even if query has last token removed, LM will output same cont len
|
|
489
511
|
"cont_str_len": cont_str_len,
|
|
490
512
|
"cont_byte_len": cont_byte_len,
|
|
513
|
+
"cont_str_len_no_leading_space": cont_str_len_no_leading_space,
|
|
514
|
+
"cont_byte_len_no_leading_space": cont_byte_len_no_leading_space,
|
|
491
515
|
"query": query, # remove last token from continuation
|
|
492
516
|
"dc_query": dc_query,
|
|
493
517
|
"label_id": label_id,
|
|
@@ -1524,8 +1548,16 @@ class OEEvalTask(ICLMultiChoiceTaskDataset):
|
|
|
1524
1548
|
f"Sample doc from ({self.dataset_path}, {ds_name}):"
|
|
1525
1549
|
+ f"\ndoc_text: {doc_text}\ncontinuation: {continuation_str}"
|
|
1526
1550
|
)
|
|
1527
|
-
|
|
1528
|
-
|
|
1551
|
+
|
|
1552
|
+
# The original implementation did not count the first character (usually the leading space) as
|
|
1553
|
+
# part of the continuation length (e.g., " A", " " is not counted). The OLMES standard does not
|
|
1554
|
+
# do this, but we track both for backwards compatibility.
|
|
1555
|
+
cont_str_len_no_leading_space = len(continuation_str) - 1
|
|
1556
|
+
cont_byte_len_no_leading_space = len(continuation_str[1:].encode("utf-8"))
|
|
1557
|
+
|
|
1558
|
+
cont_str_len = len(continuation_str)
|
|
1559
|
+
cont_byte_len = len(continuation_str.encode("utf-8"))
|
|
1560
|
+
|
|
1529
1561
|
continuation = self.token_encode(continuation_str)
|
|
1530
1562
|
|
|
1531
1563
|
# query, remove last token from continuation, truncate from left is longer than model ctx length
|
|
@@ -1552,6 +1584,8 @@ class OEEvalTask(ICLMultiChoiceTaskDataset):
|
|
|
1552
1584
|
), # even if query has last token removed, LM will output same cont len
|
|
1553
1585
|
"cont_str_len": cont_str_len,
|
|
1554
1586
|
"cont_byte_len": cont_byte_len,
|
|
1587
|
+
"cont_str_len_no_leading_space": cont_str_len_no_leading_space,
|
|
1588
|
+
"cont_byte_len_no_leading_space": cont_byte_len_no_leading_space,
|
|
1555
1589
|
"query": query, # remove last token from continuation
|
|
1556
1590
|
"dc_query": dc_query,
|
|
1557
1591
|
"label_id": label_id,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/rte/validation/state.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|