ai2-olmo-eval 0.8.0__tar.gz → 0.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (763) hide show
  1. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/PKG-INFO +1 -1
  2. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/PKG-INFO +1 -1
  3. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/SOURCES.txt +36 -0
  4. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/config.json +1 -0
  5. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/requests.jsonl.gz +0 -0
  6. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/config.json +1 -0
  7. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/requests.jsonl.gz +0 -0
  8. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/config.json +1 -0
  9. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/requests.jsonl.gz +0 -0
  10. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/config.json +1 -0
  11. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/requests.jsonl.gz +0 -0
  12. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/config.json +1 -0
  13. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/requests.jsonl.gz +0 -0
  14. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/config.json +1 -0
  15. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/requests.jsonl.gz +0 -0
  16. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/config.json +1 -0
  17. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/requests.jsonl.gz +0 -0
  18. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/config.json +1 -0
  19. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/requests.jsonl.gz +0 -0
  20. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/config.json +1 -0
  21. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/requests.jsonl.gz +0 -0
  22. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/config.json +1 -0
  23. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/requests.jsonl.gz +0 -0
  24. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/config.json +1 -0
  25. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/requests.jsonl.gz +0 -0
  26. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/config.json +1 -0
  27. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/requests.jsonl.gz +0 -0
  28. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/config.json +1 -0
  29. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/requests.jsonl.gz +0 -0
  30. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/config.json +1 -0
  31. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/requests.jsonl.gz +0 -0
  32. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/config.json +1 -0
  33. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/requests.jsonl.gz +0 -0
  34. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/config.json +1 -0
  35. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/requests.jsonl.gz +0 -0
  36. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/config.json +1 -0
  37. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/requests.jsonl.gz +0 -0
  38. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/config.json +1 -0
  39. ai2_olmo_eval-0.8.1/src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/requests.jsonl.gz +0 -0
  40. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/tasks.py +84 -0
  41. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/version.py +1 -1
  42. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/LICENSE +0 -0
  43. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/README.md +0 -0
  44. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/pyproject.toml +0 -0
  45. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/setup.cfg +0 -0
  46. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/dependency_links.txt +0 -0
  47. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/requires.txt +0 -0
  48. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/ai2_olmo_eval.egg-info/top_level.txt +0 -0
  49. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/__init__.py +0 -0
  50. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow +0 -0
  51. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json +0 -0
  52. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json +0 -0
  53. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow +0 -0
  54. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json +0 -0
  55. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/state.json +0 -0
  56. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow +0 -0
  57. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json +0 -0
  58. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/state.json +0 -0
  59. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow +0 -0
  60. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/boolq/none/validation/dataset_info.json +0 -0
  61. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/boolq/none/validation/state.json +0 -0
  62. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow +0 -0
  63. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/dataset_info.json +0 -0
  64. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/state.json +0 -0
  65. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow +0 -0
  66. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/rte/validation/dataset_info.json +0 -0
  67. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/rte/validation/state.json +0 -0
  68. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow +0 -0
  69. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/dataset_info.json +0 -0
  70. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/state.json +0 -0
  71. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow +0 -0
  72. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json +0 -0
  73. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json +0 -0
  74. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow +0 -0
  75. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json +0 -0
  76. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json +0 -0
  77. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow +0 -0
  78. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json +0 -0
  79. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json +0 -0
  80. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow +0 -0
  81. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json +0 -0
  82. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json +0 -0
  83. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow +0 -0
  84. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json +0 -0
  85. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json +0 -0
  86. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow +0 -0
  87. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json +0 -0
  88. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json +0 -0
  89. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow +0 -0
  90. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json +0 -0
  91. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json +0 -0
  92. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow +0 -0
  93. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json +0 -0
  94. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json +0 -0
  95. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow +0 -0
  96. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json +0 -0
  97. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json +0 -0
  98. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow +0 -0
  99. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json +0 -0
  100. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json +0 -0
  101. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow +0 -0
  102. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json +0 -0
  103. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json +0 -0
  104. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow +0 -0
  105. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json +0 -0
  106. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json +0 -0
  107. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow +0 -0
  108. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json +0 -0
  109. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json +0 -0
  110. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow +0 -0
  111. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json +0 -0
  112. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json +0 -0
  113. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow +0 -0
  114. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json +0 -0
  115. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json +0 -0
  116. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow +0 -0
  117. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json +0 -0
  118. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json +0 -0
  119. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow +0 -0
  120. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json +0 -0
  121. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json +0 -0
  122. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow +0 -0
  123. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json +0 -0
  124. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json +0 -0
  125. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow +0 -0
  126. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json +0 -0
  127. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json +0 -0
  128. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow +0 -0
  129. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json +0 -0
  130. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json +0 -0
  131. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow +0 -0
  132. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json +0 -0
  133. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json +0 -0
  134. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow +0 -0
  135. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json +0 -0
  136. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json +0 -0
  137. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow +0 -0
  138. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json +0 -0
  139. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json +0 -0
  140. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow +0 -0
  141. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json +0 -0
  142. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json +0 -0
  143. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow +0 -0
  144. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json +0 -0
  145. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json +0 -0
  146. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow +0 -0
  147. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json +0 -0
  148. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json +0 -0
  149. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow +0 -0
  150. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json +0 -0
  151. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json +0 -0
  152. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow +0 -0
  153. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json +0 -0
  154. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json +0 -0
  155. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow +0 -0
  156. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json +0 -0
  157. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json +0 -0
  158. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow +0 -0
  159. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json +0 -0
  160. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json +0 -0
  161. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow +0 -0
  162. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json +0 -0
  163. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json +0 -0
  164. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow +0 -0
  165. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json +0 -0
  166. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json +0 -0
  167. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow +0 -0
  168. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json +0 -0
  169. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json +0 -0
  170. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow +0 -0
  171. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json +0 -0
  172. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json +0 -0
  173. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow +0 -0
  174. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json +0 -0
  175. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json +0 -0
  176. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow +0 -0
  177. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json +0 -0
  178. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json +0 -0
  179. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow +0 -0
  180. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json +0 -0
  181. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json +0 -0
  182. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow +0 -0
  183. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json +0 -0
  184. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json +0 -0
  185. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow +0 -0
  186. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json +0 -0
  187. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json +0 -0
  188. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow +0 -0
  189. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json +0 -0
  190. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json +0 -0
  191. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow +0 -0
  192. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json +0 -0
  193. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json +0 -0
  194. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow +0 -0
  195. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json +0 -0
  196. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json +0 -0
  197. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow +0 -0
  198. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json +0 -0
  199. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json +0 -0
  200. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow +0 -0
  201. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json +0 -0
  202. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json +0 -0
  203. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow +0 -0
  204. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json +0 -0
  205. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json +0 -0
  206. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow +0 -0
  207. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json +0 -0
  208. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json +0 -0
  209. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow +0 -0
  210. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json +0 -0
  211. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json +0 -0
  212. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow +0 -0
  213. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json +0 -0
  214. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json +0 -0
  215. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow +0 -0
  216. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json +0 -0
  217. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json +0 -0
  218. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow +0 -0
  219. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json +0 -0
  220. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json +0 -0
  221. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow +0 -0
  222. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json +0 -0
  223. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json +0 -0
  224. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow +0 -0
  225. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json +0 -0
  226. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json +0 -0
  227. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow +0 -0
  228. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json +0 -0
  229. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json +0 -0
  230. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow +0 -0
  231. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json +0 -0
  232. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json +0 -0
  233. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow +0 -0
  234. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json +0 -0
  235. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json +0 -0
  236. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow +0 -0
  237. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json +0 -0
  238. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json +0 -0
  239. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow +0 -0
  240. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json +0 -0
  241. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json +0 -0
  242. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow +0 -0
  243. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json +0 -0
  244. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json +0 -0
  245. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow +0 -0
  246. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json +0 -0
  247. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json +0 -0
  248. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow +0 -0
  249. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json +0 -0
  250. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json +0 -0
  251. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow +0 -0
  252. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json +0 -0
  253. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json +0 -0
  254. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow +0 -0
  255. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json +0 -0
  256. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json +0 -0
  257. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow +0 -0
  258. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json +0 -0
  259. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json +0 -0
  260. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow +0 -0
  261. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json +0 -0
  262. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json +0 -0
  263. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow +0 -0
  264. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json +0 -0
  265. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json +0 -0
  266. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow +0 -0
  267. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json +0 -0
  268. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json +0 -0
  269. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow +0 -0
  270. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json +0 -0
  271. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json +0 -0
  272. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow +0 -0
  273. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json +0 -0
  274. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json +0 -0
  275. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow +0 -0
  276. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json +0 -0
  277. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json +0 -0
  278. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow +0 -0
  279. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json +0 -0
  280. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json +0 -0
  281. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow +0 -0
  282. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json +0 -0
  283. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json +0 -0
  284. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow +0 -0
  285. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json +0 -0
  286. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json +0 -0
  287. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow +0 -0
  288. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json +0 -0
  289. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json +0 -0
  290. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow +0 -0
  291. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json +0 -0
  292. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json +0 -0
  293. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow +0 -0
  294. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json +0 -0
  295. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json +0 -0
  296. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow +0 -0
  297. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json +0 -0
  298. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json +0 -0
  299. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow +0 -0
  300. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json +0 -0
  301. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json +0 -0
  302. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow +0 -0
  303. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json +0 -0
  304. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json +0 -0
  305. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow +0 -0
  306. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json +0 -0
  307. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json +0 -0
  308. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow +0 -0
  309. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json +0 -0
  310. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json +0 -0
  311. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow +0 -0
  312. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json +0 -0
  313. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json +0 -0
  314. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow +0 -0
  315. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json +0 -0
  316. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json +0 -0
  317. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow +0 -0
  318. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json +0 -0
  319. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json +0 -0
  320. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow +0 -0
  321. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json +0 -0
  322. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json +0 -0
  323. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow +0 -0
  324. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json +0 -0
  325. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json +0 -0
  326. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow +0 -0
  327. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json +0 -0
  328. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json +0 -0
  329. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow +0 -0
  330. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json +0 -0
  331. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json +0 -0
  332. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow +0 -0
  333. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json +0 -0
  334. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json +0 -0
  335. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow +0 -0
  336. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json +0 -0
  337. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json +0 -0
  338. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow +0 -0
  339. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json +0 -0
  340. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json +0 -0
  341. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow +0 -0
  342. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json +0 -0
  343. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json +0 -0
  344. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow +0 -0
  345. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json +0 -0
  346. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json +0 -0
  347. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow +0 -0
  348. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json +0 -0
  349. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json +0 -0
  350. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow +0 -0
  351. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json +0 -0
  352. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json +0 -0
  353. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow +0 -0
  354. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json +0 -0
  355. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json +0 -0
  356. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow +0 -0
  357. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json +0 -0
  358. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json +0 -0
  359. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow +0 -0
  360. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json +0 -0
  361. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json +0 -0
  362. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow +0 -0
  363. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json +0 -0
  364. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json +0 -0
  365. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow +0 -0
  366. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json +0 -0
  367. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json +0 -0
  368. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow +0 -0
  369. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json +0 -0
  370. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json +0 -0
  371. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow +0 -0
  372. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json +0 -0
  373. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json +0 -0
  374. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow +0 -0
  375. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json +0 -0
  376. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json +0 -0
  377. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow +0 -0
  378. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json +0 -0
  379. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json +0 -0
  380. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow +0 -0
  381. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json +0 -0
  382. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/state.json +0 -0
  383. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow +0 -0
  384. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json +0 -0
  385. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json +0 -0
  386. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow +0 -0
  387. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json +0 -0
  388. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json +0 -0
  389. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow +0 -0
  390. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json +0 -0
  391. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json +0 -0
  392. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow +0 -0
  393. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json +0 -0
  394. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json +0 -0
  395. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow +0 -0
  396. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json +0 -0
  397. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json +0 -0
  398. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow +0 -0
  399. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json +0 -0
  400. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json +0 -0
  401. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow +0 -0
  402. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json +0 -0
  403. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json +0 -0
  404. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow +0 -0
  405. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json +0 -0
  406. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json +0 -0
  407. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow +0 -0
  408. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json +0 -0
  409. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json +0 -0
  410. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow +0 -0
  411. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json +0 -0
  412. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json +0 -0
  413. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow +0 -0
  414. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json +0 -0
  415. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/state.json +0 -0
  416. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow +0 -0
  417. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json +0 -0
  418. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/state.json +0 -0
  419. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow +0 -0
  420. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json +0 -0
  421. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/state.json +0 -0
  422. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow +0 -0
  423. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json +0 -0
  424. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json +0 -0
  425. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow +0 -0
  426. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json +0 -0
  427. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/state.json +0 -0
  428. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow +0 -0
  429. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json +0 -0
  430. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json +0 -0
  431. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow +0 -0
  432. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json +0 -0
  433. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json +0 -0
  434. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow +0 -0
  435. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json +0 -0
  436. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json +0 -0
  437. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow +0 -0
  438. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json +0 -0
  439. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json +0 -0
  440. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow +0 -0
  441. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json +0 -0
  442. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json +0 -0
  443. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow +0 -0
  444. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json +0 -0
  445. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json +0 -0
  446. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow +0 -0
  447. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json +0 -0
  448. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json +0 -0
  449. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow +0 -0
  450. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json +0 -0
  451. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json +0 -0
  452. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow +0 -0
  453. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json +0 -0
  454. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json +0 -0
  455. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow +0 -0
  456. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json +0 -0
  457. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json +0 -0
  458. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow +0 -0
  459. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json +0 -0
  460. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json +0 -0
  461. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow +0 -0
  462. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json +0 -0
  463. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json +0 -0
  464. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow +0 -0
  465. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json +0 -0
  466. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json +0 -0
  467. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow +0 -0
  468. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json +0 -0
  469. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json +0 -0
  470. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow +0 -0
  471. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json +0 -0
  472. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json +0 -0
  473. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow +0 -0
  474. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json +0 -0
  475. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json +0 -0
  476. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow +0 -0
  477. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json +0 -0
  478. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json +0 -0
  479. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow +0 -0
  480. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json +0 -0
  481. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json +0 -0
  482. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow +0 -0
  483. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json +0 -0
  484. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json +0 -0
  485. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow +0 -0
  486. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json +0 -0
  487. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json +0 -0
  488. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow +0 -0
  489. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json +0 -0
  490. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json +0 -0
  491. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow +0 -0
  492. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json +0 -0
  493. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json +0 -0
  494. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow +0 -0
  495. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json +0 -0
  496. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json +0 -0
  497. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow +0 -0
  498. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json +0 -0
  499. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json +0 -0
  500. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow +0 -0
  501. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json +0 -0
  502. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json +0 -0
  503. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow +0 -0
  504. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json +0 -0
  505. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json +0 -0
  506. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow +0 -0
  507. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json +0 -0
  508. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json +0 -0
  509. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow +0 -0
  510. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json +0 -0
  511. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json +0 -0
  512. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow +0 -0
  513. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json +0 -0
  514. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json +0 -0
  515. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow +0 -0
  516. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json +0 -0
  517. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json +0 -0
  518. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow +0 -0
  519. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json +0 -0
  520. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json +0 -0
  521. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow +0 -0
  522. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json +0 -0
  523. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json +0 -0
  524. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow +0 -0
  525. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json +0 -0
  526. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json +0 -0
  527. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow +0 -0
  528. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json +0 -0
  529. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json +0 -0
  530. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow +0 -0
  531. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json +0 -0
  532. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json +0 -0
  533. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow +0 -0
  534. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json +0 -0
  535. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json +0 -0
  536. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow +0 -0
  537. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json +0 -0
  538. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json +0 -0
  539. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow +0 -0
  540. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json +0 -0
  541. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json +0 -0
  542. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow +0 -0
  543. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json +0 -0
  544. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json +0 -0
  545. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow +0 -0
  546. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json +0 -0
  547. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json +0 -0
  548. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow +0 -0
  549. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json +0 -0
  550. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json +0 -0
  551. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow +0 -0
  552. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json +0 -0
  553. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/state.json +0 -0
  554. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow +0 -0
  555. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json +0 -0
  556. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json +0 -0
  557. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow +0 -0
  558. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json +0 -0
  559. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json +0 -0
  560. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow +0 -0
  561. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json +0 -0
  562. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json +0 -0
  563. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow +0 -0
  564. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json +0 -0
  565. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json +0 -0
  566. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow +0 -0
  567. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json +0 -0
  568. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/state.json +0 -0
  569. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow +0 -0
  570. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json +0 -0
  571. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/state.json +0 -0
  572. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow +0 -0
  573. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json +0 -0
  574. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/state.json +0 -0
  575. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow +0 -0
  576. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json +0 -0
  577. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json +0 -0
  578. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow +0 -0
  579. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json +0 -0
  580. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json +0 -0
  581. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow +0 -0
  582. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json +0 -0
  583. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json +0 -0
  584. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow +0 -0
  585. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/dataset_info.json +0 -0
  586. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/state.json +0 -0
  587. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow +0 -0
  588. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/dataset_info.json +0 -0
  589. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/state.json +0 -0
  590. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow +0 -0
  591. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/dataset_info.json +0 -0
  592. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/state.json +0 -0
  593. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow +0 -0
  594. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/dataset_info.json +0 -0
  595. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/state.json +0 -0
  596. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow +0 -0
  597. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/sciq/none/validation/dataset_info.json +0 -0
  598. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/sciq/none/validation/state.json +0 -0
  599. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow +0 -0
  600. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/dataset_info.json +0 -0
  601. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/state.json +0 -0
  602. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow +0 -0
  603. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/dataset_info.json +0 -0
  604. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/state.json +0 -0
  605. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow +0 -0
  606. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/dataset_info.json +0 -0
  607. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/state.json +0 -0
  608. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow +0 -0
  609. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json +0 -0
  610. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/state.json +0 -0
  611. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow +0 -0
  612. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json +0 -0
  613. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json +0 -0
  614. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow +0 -0
  615. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json +0 -0
  616. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/state.json +0 -0
  617. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/metrics.py +0 -0
  618. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/config.json +0 -0
  619. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/requests.jsonl.gz +0 -0
  620. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/config.json +0 -0
  621. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/requests.jsonl.gz +0 -0
  622. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/config.json +0 -0
  623. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/requests.jsonl.gz +0 -0
  624. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/config.json +0 -0
  625. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/requests.jsonl.gz +0 -0
  626. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/config.json +0 -0
  627. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/requests.jsonl.gz +0 -0
  628. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/config.json +0 -0
  629. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/requests.jsonl.gz +0 -0
  630. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/config.json +0 -0
  631. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/requests.jsonl.gz +0 -0
  632. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/config.json +0 -0
  633. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/requests.jsonl.gz +0 -0
  634. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/config.json +0 -0
  635. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/requests.jsonl.gz +0 -0
  636. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/config.json +0 -0
  637. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/requests.jsonl.gz +0 -0
  638. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/config.json +0 -0
  639. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/requests.jsonl.gz +0 -0
  640. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/config.json +0 -0
  641. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/requests.jsonl.gz +0 -0
  642. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/config.json +0 -0
  643. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/requests.jsonl.gz +0 -0
  644. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/config.json +0 -0
  645. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/requests.jsonl.gz +0 -0
  646. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/config.json +0 -0
  647. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/requests.jsonl.gz +0 -0
  648. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/config.json +0 -0
  649. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/requests.jsonl.gz +0 -0
  650. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/config.json +0 -0
  651. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/requests.jsonl.gz +0 -0
  652. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/config.json +0 -0
  653. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/requests.jsonl.gz +0 -0
  654. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/config.json +0 -0
  655. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/requests.jsonl.gz +0 -0
  656. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/config.json +0 -0
  657. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/requests.jsonl.gz +0 -0
  658. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/config.json +0 -0
  659. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/requests.jsonl.gz +0 -0
  660. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/config.json +0 -0
  661. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/requests.jsonl.gz +0 -0
  662. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/config.json +0 -0
  663. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/requests.jsonl.gz +0 -0
  664. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/config.json +0 -0
  665. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/requests.jsonl.gz +0 -0
  666. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/config.json +0 -0
  667. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/requests.jsonl.gz +0 -0
  668. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/config.json +0 -0
  669. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/requests.jsonl.gz +0 -0
  670. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/config.json +0 -0
  671. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/requests.jsonl.gz +0 -0
  672. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/config.json +0 -0
  673. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/requests.jsonl.gz +0 -0
  674. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copycolors/10way/config.json +0 -0
  675. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copycolors/10way/requests.jsonl.gz +0 -0
  676. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/config.json +0 -0
  677. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/requests.jsonl.gz +0 -0
  678. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/config.json +0 -0
  679. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/requests.jsonl.gz +0 -0
  680. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/config.json +0 -0
  681. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/requests.jsonl.gz +0 -0
  682. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/config.json +0 -0
  683. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/requests.jsonl.gz +0 -0
  684. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/config.json +0 -0
  685. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/requests.jsonl.gz +0 -0
  686. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/config.json +0 -0
  687. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/requests.jsonl.gz +0 -0
  688. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/config.json +0 -0
  689. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/requests.jsonl.gz +0 -0
  690. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/config.json +0 -0
  691. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/requests.jsonl.gz +0 -0
  692. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/config.json +0 -0
  693. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/requests.jsonl.gz +0 -0
  694. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/config.json +0 -0
  695. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/requests.jsonl.gz +0 -0
  696. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json +0 -0
  697. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz +0 -0
  698. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json +0 -0
  699. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz +0 -0
  700. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json +0 -0
  701. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  702. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json +0 -0
  703. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/requests.jsonl.gz +0 -0
  704. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/config.json +0 -0
  705. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/requests.jsonl.gz +0 -0
  706. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/config.json +0 -0
  707. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  708. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/config.json +0 -0
  709. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/requests.jsonl.gz +0 -0
  710. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json +0 -0
  711. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  712. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json +0 -0
  713. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz +0 -0
  714. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json +0 -0
  715. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz +0 -0
  716. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json +0 -0
  717. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/requests.jsonl.gz +0 -0
  718. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/config.json +0 -0
  719. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/requests.jsonl.gz +0 -0
  720. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/config.json +0 -0
  721. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/requests.jsonl.gz +0 -0
  722. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/config.json +0 -0
  723. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/requests.jsonl.gz +0 -0
  724. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/config.json +0 -0
  725. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/requests.jsonl.gz +0 -0
  726. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/config.json +0 -0
  727. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/requests.jsonl.gz +0 -0
  728. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/config.json +0 -0
  729. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/requests.jsonl.gz +0 -0
  730. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/config.json +0 -0
  731. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/requests.jsonl.gz +0 -0
  732. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/config.json +0 -0
  733. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/requests.jsonl.gz +0 -0
  734. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/config.json +0 -0
  735. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/requests.jsonl.gz +0 -0
  736. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/config.json +0 -0
  737. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/requests.jsonl.gz +0 -0
  738. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/config.json +0 -0
  739. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/requests.jsonl.gz +0 -0
  740. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/config.json +0 -0
  741. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/requests.jsonl.gz +0 -0
  742. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/config.json +0 -0
  743. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/requests.jsonl.gz +0 -0
  744. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/config.json +0 -0
  745. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/requests.jsonl.gz +0 -0
  746. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/config.json +0 -0
  747. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/requests.jsonl.gz +0 -0
  748. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/config.json +0 -0
  749. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/requests.jsonl.gz +0 -0
  750. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/config.json +0 -0
  751. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/requests.jsonl.gz +0 -0
  752. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/config.json +0 -0
  753. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/requests.jsonl.gz +0 -0
  754. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/config.json +0 -0
  755. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/requests.jsonl.gz +0 -0
  756. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/config.json +0 -0
  757. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/requests.jsonl.gz +0 -0
  758. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json +0 -0
  759. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz +0 -0
  760. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/tokenizer.py +0 -0
  761. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json +0 -0
  762. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json +0 -0
  763. {ai2_olmo_eval-0.8.0 → ai2_olmo_eval-0.8.1}/src/olmo_eval/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai2-olmo-eval
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: In-loop evaluation tasks for language modeling
5
5
  Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
6
6
  License: Apache License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai2-olmo-eval
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: In-loop evaluation tasks for language modeling
5
5
  Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
6
6
  License: Apache License
@@ -661,6 +661,8 @@ src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json
661
661
  src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz
662
662
  src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json
663
663
  src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz
664
+ src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/config.json
665
+ src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/requests.jsonl.gz
664
666
  src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json
665
667
  src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz
666
668
  src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json
@@ -675,6 +677,40 @@ src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json
675
677
  src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz
676
678
  src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json
677
679
  src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz
680
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/config.json
681
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/requests.jsonl.gz
682
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/config.json
683
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/requests.jsonl.gz
684
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/config.json
685
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/requests.jsonl.gz
686
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/config.json
687
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/requests.jsonl.gz
688
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/config.json
689
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/requests.jsonl.gz
690
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/config.json
691
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/requests.jsonl.gz
692
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/config.json
693
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/requests.jsonl.gz
694
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/config.json
695
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/requests.jsonl.gz
696
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/config.json
697
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/requests.jsonl.gz
698
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/config.json
699
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/requests.jsonl.gz
700
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/config.json
701
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/requests.jsonl.gz
702
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/config.json
703
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/requests.jsonl.gz
704
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/config.json
705
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/requests.jsonl.gz
706
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/config.json
707
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/requests.jsonl.gz
708
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/config.json
709
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/requests.jsonl.gz
710
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/config.json
711
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/requests.jsonl.gz
712
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/config.json
713
+ src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/requests.jsonl.gz
678
714
  src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json
679
715
  src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz
680
716
  src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json
@@ -0,0 +1 @@
1
+ {"task_name": "minerva_math_500", "task_hash": "75c1b390d73949780c88a5ff49948b5d", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "minerva_math_500", "task_core": "minerva_math_500", "limit": null, "split": "test", "num_shots": 4, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {"use_cot": true, "cot_style": "minerva"}, "generation_kwargs": {"max_gen_toks": 1024, "temperature": 0.0, "do_sample": false, "stop_sequences": ["Problem:", "\n\n"]}, "metric_kwargs": {}, "native_id_field": "index", "fewshot_source": "Minerva:MATH:fixed", "dataset_path": "HuggingFaceH4/MATH-500", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "minerva_math_500:bpb::olmes"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.9896588325500488, "current_date": "2025-05-18 01:08:29 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:bash", "task_hash": "12bf5ff314ab6e3b192fdb28a364b610", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:bash", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "bash", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:bash"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 3.0172407627105713, "current_date": "2025-05-18 01:08:32 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:c", "task_hash": "a61c21b0fd7fa57512e11b2c624dec05", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:c", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "c", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:c"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.3485467433929443, "current_date": "2025-05-18 01:08:34 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:cpp", "task_hash": "51069b2a5f1bf7fe9d54b54a37128b1d", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:cpp", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "cpp", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:cpp"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.267606496810913, "current_date": "2025-05-18 01:08:36 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:csharp", "task_hash": "1bd53de5a3c6987e174dc031e5496975", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:csharp", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "csharp", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:csharp"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.9407422542572021, "current_date": "2025-05-18 01:08:38 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:go", "task_hash": "ad42237d305a14bf48d22fbd7275d533", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:go", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "go", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:go"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.7393126487731934, "current_date": "2025-05-18 01:08:40 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:haskell", "task_hash": "fb523f2ace6fa704fe5ac33cf8d57c26", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:haskell", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "haskell", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:haskell"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.2595787048339844, "current_date": "2025-05-18 01:08:42 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:java", "task_hash": "09dca3d5dc08e5549be48c7c840d4a87", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:java", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "java", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:java"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.4217495918273926, "current_date": "2025-05-18 01:08:45 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:javascript", "task_hash": "e02e668d2bb8b66897858b7ce39eb8ea", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:javascript", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "javascript", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:javascript"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.7279460430145264, "current_date": "2025-05-18 01:08:47 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:matlab", "task_hash": "8d2c28b2bc33eb546714fdb3a72a8f50", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:matlab", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "matlab", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:matlab"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.80657958984375, "current_date": "2025-05-18 01:08:49 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:php", "task_hash": "d6319dd39349460d65796302a83f7d31", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:php", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "php", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:php"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.0057003498077393, "current_date": "2025-05-18 01:08:51 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:python", "task_hash": "fee56e18d38a80c1118f60e81a72d442", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:python", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "python", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:python"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.9594206809997559, "current_date": "2025-05-18 01:08:53 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:r", "task_hash": "d81e15e102450362af2d7171e33a40d0", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:r", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "r", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:r"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.6297383308410645, "current_date": "2025-05-18 01:08:56 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:ruby", "task_hash": "295088b5bf617929bc5f6c50c3c8e178", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:ruby", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "ruby", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:ruby"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.7374136447906494, "current_date": "2025-05-18 01:08:58 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:rust", "task_hash": "c4e090ab96af1f8b427bbf55e2f15a92", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:rust", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "rust", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:rust"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.1582589149475098, "current_date": "2025-05-18 01:09:00 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:scala", "task_hash": "69a440383704f4474586c6642ad58c22", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:scala", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "scala", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:scala"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.035874366760254, "current_date": "2025-05-18 01:09:02 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:swift", "task_hash": "20d99f047a4973c156ee030770a02d10", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:swift", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "swift", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:swift"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.7264349460601807, "current_date": "2025-05-18 01:09:04 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -0,0 +1 @@
1
+ {"task_name": "mt_mbpp:typescript", "task_hash": "0124047c8167d9c7b97d38642efc1c5d", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mt_mbpp:typescript", "task_core": "mt_mbpp", "limit": 500, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {"max_gen_toks": 512, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n"]}, "metric_kwargs": {}, "native_id_field": "task_id", "fewshot_source": "multilingual_mbpp", "dataset_path": "allenai/multilingual_mbpp", "dataset_name": "typescript", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "mt_mbpp:typescript"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.5021820068359375, "current_date": "2025-05-18 01:09:06 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5Q99TVRRHS826AQSXY3H", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVDSK0DTYMJWXCKFBRFNA24V", "BEAKER_WORKLOAD_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ENVIRONMENT_ID": "01JVDSK0DTA4GTNDS6ANJS0352", "BEAKER_ASSIGNED_CPU_COUNT": "31.875", "BEAKER_ASSIGNED_GPU_COUNT": "1", "BEAKER_NODE_HOSTNAME": "triton-cs-aus-454.reviz.ai2.in"}}
@@ -2680,6 +2680,90 @@ LABEL_TO_TASK_MAP_EXPANDED = {
2680
2680
  "metric_type": "bpb",
2681
2681
  },
2682
2682
  ),
2683
+ "minerva_math_500_gold_bpb_0shot": (
2684
+ OEEvalTask,
2685
+ {
2686
+ "dataset_path": "minerva_math_500",
2687
+ "dataset_name": "gold_bpb_0shot",
2688
+ "metric_type": "bpb",
2689
+ },
2690
+ ),
2691
+ "mt_mbpp_haskell_gold_bpb_3shot": (
2692
+ OEEvalTask,
2693
+ {"dataset_path": "mt_mbpp_haskell", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2694
+ ),
2695
+ "mt_mbpp_go_gold_bpb_3shot": (
2696
+ OEEvalTask,
2697
+ {"dataset_path": "mt_mbpp_go", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2698
+ ),
2699
+ "mt_mbpp_python_gold_bpb_3shot": (
2700
+ OEEvalTask,
2701
+ {"dataset_path": "mt_mbpp_python", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2702
+ ),
2703
+ "mt_mbpp_cpp_gold_bpb_3shot": (
2704
+ OEEvalTask,
2705
+ {"dataset_path": "mt_mbpp_cpp", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2706
+ ),
2707
+ "mt_mbpp_javascript_gold_bpb_3shot": (
2708
+ OEEvalTask,
2709
+ {
2710
+ "dataset_path": "mt_mbpp_javascript",
2711
+ "dataset_name": "gold_bpb_3shot",
2712
+ "metric_type": "bpb",
2713
+ },
2714
+ ),
2715
+ "mt_mbpp_swift_gold_bpb_3shot": (
2716
+ OEEvalTask,
2717
+ {"dataset_path": "mt_mbpp_swift", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2718
+ ),
2719
+ "mt_mbpp_scala_gold_bpb_3shot": (
2720
+ OEEvalTask,
2721
+ {"dataset_path": "mt_mbpp_scala", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2722
+ ),
2723
+ "mt_mbpp_bash_gold_bpb_3shot": (
2724
+ OEEvalTask,
2725
+ {"dataset_path": "mt_mbpp_bash", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2726
+ ),
2727
+ "mt_mbpp_typescript_gold_bpb_3shot": (
2728
+ OEEvalTask,
2729
+ {
2730
+ "dataset_path": "mt_mbpp_typescript",
2731
+ "dataset_name": "gold_bpb_3shot",
2732
+ "metric_type": "bpb",
2733
+ },
2734
+ ),
2735
+ "mt_mbpp_c_gold_bpb_3shot": (
2736
+ OEEvalTask,
2737
+ {"dataset_path": "mt_mbpp_c", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2738
+ ),
2739
+ "mt_mbpp_php_gold_bpb_3shot": (
2740
+ OEEvalTask,
2741
+ {"dataset_path": "mt_mbpp_php", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2742
+ ),
2743
+ "mt_mbpp_rust_gold_bpb_3shot": (
2744
+ OEEvalTask,
2745
+ {"dataset_path": "mt_mbpp_rust", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2746
+ ),
2747
+ "mt_mbpp_csharp_gold_bpb_3shot": (
2748
+ OEEvalTask,
2749
+ {"dataset_path": "mt_mbpp_csharp", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2750
+ ),
2751
+ "mt_mbpp_r_gold_bpb_3shot": (
2752
+ OEEvalTask,
2753
+ {"dataset_path": "mt_mbpp_r", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2754
+ ),
2755
+ "mt_mbpp_ruby_gold_bpb_3shot": (
2756
+ OEEvalTask,
2757
+ {"dataset_path": "mt_mbpp_ruby", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2758
+ ),
2759
+ "mt_mbpp_java_gold_bpb_3shot": (
2760
+ OEEvalTask,
2761
+ {"dataset_path": "mt_mbpp_java", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2762
+ ),
2763
+ "mt_mbpp_matlab_gold_bpb_3shot": (
2764
+ OEEvalTask,
2765
+ {"dataset_path": "mt_mbpp_matlab", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2766
+ ),
2683
2767
  }
2684
2768
 
2685
2769
 
@@ -1,6 +1,6 @@
1
1
  _MAJOR = "0"
2
2
  _MINOR = "8"
3
- _PATCH = "0"
3
+ _PATCH = "1"
4
4
  _SUFFIX = ""
5
5
 
6
6
  VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
File without changes
File without changes
File without changes