ai2-olmo-eval 0.8.0__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/PKG-INFO +1 -1
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/PKG-INFO +1 -1
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/SOURCES.txt +36 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/requests.jsonl.gz +0 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/config.json +1 -0
- ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/tasks.py +84 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/version.py +1 -1
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/LICENSE +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/README.md +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/pyproject.toml +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/setup.cfg +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/dependency_links.txt +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/requires.txt +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/top_level.txt +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/__init__.py +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/boolq/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/boolq/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/rte/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/rte/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/sciq/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/sciq/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/state.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/metrics.py +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copycolors/10way/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copycolors/10way/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/tokenizer.py +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json +0 -0
- {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/util.py +0 -0
|
@@ -661,6 +661,8 @@ src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json
|
|
|
661
661
|
src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz
|
|
662
662
|
src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json
|
|
663
663
|
src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz
|
|
664
|
+
src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/config.json
|
|
665
|
+
src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/requests.jsonl.gz
|
|
664
666
|
src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json
|
|
665
667
|
src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz
|
|
666
668
|
src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json
|
|
@@ -675,6 +677,40 @@ src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json
|
|
|
675
677
|
src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz
|
|
676
678
|
src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json
|
|
677
679
|
src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz
|
|
680
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/config.json
|
|
681
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/requests.jsonl.gz
|
|
682
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/config.json
|
|
683
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/requests.jsonl.gz
|
|
684
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/config.json
|
|
685
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/requests.jsonl.gz
|
|
686
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/config.json
|
|
687
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/requests.jsonl.gz
|
|
688
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/config.json
|
|
689
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/requests.jsonl.gz
|
|
690
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/config.json
|
|
691
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/requests.jsonl.gz
|
|
692
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/config.json
|
|
693
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/requests.jsonl.gz
|
|
694
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/config.json
|
|
695
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/requests.jsonl.gz
|
|
696
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/config.json
|
|
697
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/requests.jsonl.gz
|
|
698
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/config.json
|
|
699
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/requests.jsonl.gz
|
|
700
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/config.json
|
|
701
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/requests.jsonl.gz
|
|
702
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/config.json
|
|
703
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/requests.jsonl.gz
|
|
704
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/config.json
|
|
705
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/requests.jsonl.gz
|
|
706
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/config.json
|
|
707
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/requests.jsonl.gz
|
|
708
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/config.json
|
|
709
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/requests.jsonl.gz
|
|
710
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/config.json
|
|
711
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/requests.jsonl.gz
|
|
712
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/config.json
|
|
713
|
+
src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/requests.jsonl.gz
|
|
678
714
|
src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json
|
|
679
715
|
src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz
|
|
680
716
|
src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "minerva_math_500", "task_hash": "75c1b390d73949780c88a5ff49948b5d", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "minerva_math_500", "task_core": "minerva_math_500", "limit": null, "split": "test", "num_shots": 4, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {"use_cot": true, "cot_style": "minerva"}, "generation_kwargs": {"max_gen_toks": 1024, "temperature": 0.0, "do_sample": false, "stop_sequences": ["Problem:", "\n\n"]}, "metric_kwargs": {}, "native_id_field": "index", "fewshot_source": "Minerva:MATH:fixed", "dataset_path": "HuggingFaceH4/MATH-500", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "minerva_math_500:bpb::olmes"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.9896588325500488, "current_date": "2025-05-18 01:08:29 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:bash", "task_hash": "12bf5ff314ab6e3b192fdb28a364b610", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:bash", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "bash", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:bash"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 3.0172407627105713, "current_date": "2025-05-18 01:08:32 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:c", "task_hash": "a61c21b0fd7fa57512e11b2c624dec05", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:c", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "c", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:c"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.3485467433929443, "current_date": "2025-05-18 01:08:34 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:cpp", "task_hash": "51069b2a5f1bf7fe9d54b54a37128b1d", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:cpp", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "cpp", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:cpp"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.267606496810913, "current_date": "2025-05-18 01:08:36 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:csharp", "task_hash": "1bd53de5a3c6987e174dc031e5496975", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:csharp", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "csharp", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:csharp"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.9407422542572021, "current_date": "2025-05-18 01:08:38 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:go", "task_hash": "ad42237d305a14bf48d22fbd7275d533", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:go", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "go", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:go"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.7393126487731934, "current_date": "2025-05-18 01:08:40 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:haskell", "task_hash": "fb523f2ace6fa704fe5ac33cf8d57c26", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:haskell", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "haskell", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:haskell"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.2595787048339844, "current_date": "2025-05-18 01:08:42 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:java", "task_hash": "09dca3d5dc08e5549be48c7c840d4a87", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:java", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "java", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:java"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.4217495918273926, "current_date": "2025-05-18 01:08:45 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:javascript", "task_hash": "e02e668d2bb8b66897858b7ce39eb8ea", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:javascript", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "javascript", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:javascript"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.7279460430145264, "current_date": "2025-05-18 01:08:47 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:matlab", "task_hash": "8d2c28b2bc33eb546714fdb3a72a8f50", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:matlab", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "matlab", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:matlab"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.80657958984375, "current_date": "2025-05-18 01:08:49 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:php", "task_hash": "d6319dd39349460d65796302a83f7d31", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:php", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "php", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:php"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.0057003498077393, "current_date": "2025-05-18 01:08:51 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:python", "task_hash": "fee56e18d38a80c1118f60e81a72d442", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:python", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "python", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:python"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.9594206809997559, "current_date": "2025-05-18 01:08:53 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:r", "task_hash": "d81e15e102450362af2d7171e33a40d0", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:r", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "r", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:r"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.6297383308410645, "current_date": "2025-05-18 01:08:56 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:ruby", "task_hash": "295088b5bf617929bc5f6c50c3c8e178", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:ruby", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "ruby", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:ruby"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.7374136447906494, "current_date": "2025-05-18 01:08:58 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:rust", "task_hash": "c4e090ab96af1f8b427bbf55e2f15a92", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:rust", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "rust", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:rust"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.1582589149475098, "current_date": "2025-05-18 01:09:00 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:scala", "task_hash": "69a440383704f4474586c6642ad58c22", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:scala", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "scala", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:scala"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.035874366760254, "current_date": "2025-05-18 01:09:02 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:swift", "task_hash": "20d99f047a4973c156ee030770a02d10", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:swift", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "swift", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:swift"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.7264349460601807, "current_date": "2025-05-18 01:09:04 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"task_name": "mt_mbpp:typescript", "task_hash": "0124047c8167d9c7b97d38642efc1c5d", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:typescript", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "typescript", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:typescript"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.5021820068359375, "current_date": "2025-05-18 01:09:06 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
|
ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/requests.jsonl.gz
ADDED
|
Binary file
|
|
@@ -2680,6 +2680,90 @@ LABEL_TO_TASK_MAP_EXPANDED = {
|
|
|
2680
2680
|
"metric_type": "bpb",
|
|
2681
2681
|
},
|
|
2682
2682
|
),
|
|
2683
|
+
"minerva_math_500_gold_bpb_0shot": (
|
|
2684
|
+
OEEvalTask,
|
|
2685
|
+
{
|
|
2686
|
+
"dataset_path": "minerva_math_500",
|
|
2687
|
+
"dataset_name": "gold_bpb_0shot",
|
|
2688
|
+
"metric_type": "bpb",
|
|
2689
|
+
},
|
|
2690
|
+
),
|
|
2691
|
+
"mt_mbpp_haskell_gold_bpb_3shot": (
|
|
2692
|
+
OEEvalTask,
|
|
2693
|
+
{"dataset_path": "mt_mbpp_haskell", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2694
|
+
),
|
|
2695
|
+
"mt_mbpp_go_gold_bpb_3shot": (
|
|
2696
|
+
OEEvalTask,
|
|
2697
|
+
{"dataset_path": "mt_mbpp_go", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2698
|
+
),
|
|
2699
|
+
"mt_mbpp_python_gold_bpb_3shot": (
|
|
2700
|
+
OEEvalTask,
|
|
2701
|
+
{"dataset_path": "mt_mbpp_python", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2702
|
+
),
|
|
2703
|
+
"mt_mbpp_cpp_gold_bpb_3shot": (
|
|
2704
|
+
OEEvalTask,
|
|
2705
|
+
{"dataset_path": "mt_mbpp_cpp", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2706
|
+
),
|
|
2707
|
+
"mt_mbpp_javascript_gold_bpb_3shot": (
|
|
2708
|
+
OEEvalTask,
|
|
2709
|
+
{
|
|
2710
|
+
"dataset_path": "mt_mbpp_javascript",
|
|
2711
|
+
"dataset_name": "gold_bpb_3shot",
|
|
2712
|
+
"metric_type": "bpb",
|
|
2713
|
+
},
|
|
2714
|
+
),
|
|
2715
|
+
"mt_mbpp_swift_gold_bpb_3shot": (
|
|
2716
|
+
OEEvalTask,
|
|
2717
|
+
{"dataset_path": "mt_mbpp_swift", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2718
|
+
),
|
|
2719
|
+
"mt_mbpp_scala_gold_bpb_3shot": (
|
|
2720
|
+
OEEvalTask,
|
|
2721
|
+
{"dataset_path": "mt_mbpp_scala", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2722
|
+
),
|
|
2723
|
+
"mt_mbpp_bash_gold_bpb_3shot": (
|
|
2724
|
+
OEEvalTask,
|
|
2725
|
+
{"dataset_path": "mt_mbpp_bash", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2726
|
+
),
|
|
2727
|
+
"mt_mbpp_typescript_gold_bpb_3shot": (
|
|
2728
|
+
OEEvalTask,
|
|
2729
|
+
{
|
|
2730
|
+
"dataset_path": "mt_mbpp_typescript",
|
|
2731
|
+
"dataset_name": "gold_bpb_3shot",
|
|
2732
|
+
"metric_type": "bpb",
|
|
2733
|
+
},
|
|
2734
|
+
),
|
|
2735
|
+
"mt_mbpp_c_gold_bpb_3shot": (
|
|
2736
|
+
OEEvalTask,
|
|
2737
|
+
{"dataset_path": "mt_mbpp_c", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2738
|
+
),
|
|
2739
|
+
"mt_mbpp_php_gold_bpb_3shot": (
|
|
2740
|
+
OEEvalTask,
|
|
2741
|
+
{"dataset_path": "mt_mbpp_php", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2742
|
+
),
|
|
2743
|
+
"mt_mbpp_rust_gold_bpb_3shot": (
|
|
2744
|
+
OEEvalTask,
|
|
2745
|
+
{"dataset_path": "mt_mbpp_rust", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2746
|
+
),
|
|
2747
|
+
"mt_mbpp_csharp_gold_bpb_3shot": (
|
|
2748
|
+
OEEvalTask,
|
|
2749
|
+
{"dataset_path": "mt_mbpp_csharp", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2750
|
+
),
|
|
2751
|
+
"mt_mbpp_r_gold_bpb_3shot": (
|
|
2752
|
+
OEEvalTask,
|
|
2753
|
+
{"dataset_path": "mt_mbpp_r", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2754
|
+
),
|
|
2755
|
+
"mt_mbpp_ruby_gold_bpb_3shot": (
|
|
2756
|
+
OEEvalTask,
|
|
2757
|
+
{"dataset_path": "mt_mbpp_ruby", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2758
|
+
),
|
|
2759
|
+
"mt_mbpp_java_gold_bpb_3shot": (
|
|
2760
|
+
OEEvalTask,
|
|
2761
|
+
{"dataset_path": "mt_mbpp_java", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2762
|
+
),
|
|
2763
|
+
"mt_mbpp_matlab_gold_bpb_3shot": (
|
|
2764
|
+
OEEvalTask,
|
|
2765
|
+
{"dataset_path": "mt_mbpp_matlab", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
|
|
2766
|
+
),
|
|
2683
2767
|
}
|
|
2684
2768
|
|
|
2685
2769
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/rte/validation/state.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|