ai2-olmo-eval 0.7.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (715) hide show
  1. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/PKG-INFO +3 -2
  2. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/PKG-INFO +3 -2
  3. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/metrics.py +164 -9
  4. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/tasks.py +40 -6
  5. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/version.py +1 -1
  6. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/LICENSE +0 -0
  7. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/README.md +0 -0
  8. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/pyproject.toml +0 -0
  9. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/setup.cfg +0 -0
  10. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/SOURCES.txt +0 -0
  11. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/dependency_links.txt +0 -0
  12. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/requires.txt +0 -0
  13. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/ai2_olmo_eval.egg-info/top_level.txt +0 -0
  14. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/__init__.py +0 -0
  15. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow +0 -0
  16. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json +0 -0
  17. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json +0 -0
  18. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow +0 -0
  19. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json +0 -0
  20. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/state.json +0 -0
  21. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow +0 -0
  22. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json +0 -0
  23. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/state.json +0 -0
  24. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow +0 -0
  25. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/boolq/none/validation/dataset_info.json +0 -0
  26. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/boolq/none/validation/state.json +0 -0
  27. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow +0 -0
  28. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/dataset_info.json +0 -0
  29. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/mrpc/validation/state.json +0 -0
  30. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow +0 -0
  31. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/rte/validation/dataset_info.json +0 -0
  32. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/rte/validation/state.json +0 -0
  33. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow +0 -0
  34. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/dataset_info.json +0 -0
  35. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/glue/sst2/validation/state.json +0 -0
  36. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow +0 -0
  37. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json +0 -0
  38. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json +0 -0
  39. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow +0 -0
  40. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json +0 -0
  41. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json +0 -0
  42. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow +0 -0
  43. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json +0 -0
  44. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json +0 -0
  45. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow +0 -0
  46. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json +0 -0
  47. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json +0 -0
  48. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow +0 -0
  49. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json +0 -0
  50. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json +0 -0
  51. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow +0 -0
  52. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json +0 -0
  53. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json +0 -0
  54. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow +0 -0
  55. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json +0 -0
  56. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json +0 -0
  57. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow +0 -0
  58. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json +0 -0
  59. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json +0 -0
  60. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow +0 -0
  61. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json +0 -0
  62. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json +0 -0
  63. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow +0 -0
  64. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json +0 -0
  65. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json +0 -0
  66. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow +0 -0
  67. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json +0 -0
  68. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json +0 -0
  69. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow +0 -0
  70. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json +0 -0
  71. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json +0 -0
  72. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow +0 -0
  73. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json +0 -0
  74. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json +0 -0
  75. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow +0 -0
  76. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json +0 -0
  77. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json +0 -0
  78. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow +0 -0
  79. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json +0 -0
  80. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json +0 -0
  81. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow +0 -0
  82. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json +0 -0
  83. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json +0 -0
  84. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow +0 -0
  85. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json +0 -0
  86. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json +0 -0
  87. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow +0 -0
  88. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json +0 -0
  89. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json +0 -0
  90. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow +0 -0
  91. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json +0 -0
  92. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json +0 -0
  93. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow +0 -0
  94. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json +0 -0
  95. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json +0 -0
  96. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow +0 -0
  97. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json +0 -0
  98. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json +0 -0
  99. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow +0 -0
  100. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json +0 -0
  101. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json +0 -0
  102. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow +0 -0
  103. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json +0 -0
  104. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json +0 -0
  105. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow +0 -0
  106. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json +0 -0
  107. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json +0 -0
  108. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow +0 -0
  109. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json +0 -0
  110. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json +0 -0
  111. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow +0 -0
  112. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json +0 -0
  113. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json +0 -0
  114. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow +0 -0
  115. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json +0 -0
  116. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json +0 -0
  117. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow +0 -0
  118. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json +0 -0
  119. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json +0 -0
  120. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow +0 -0
  121. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json +0 -0
  122. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json +0 -0
  123. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow +0 -0
  124. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json +0 -0
  125. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json +0 -0
  126. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow +0 -0
  127. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json +0 -0
  128. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json +0 -0
  129. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow +0 -0
  130. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json +0 -0
  131. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json +0 -0
  132. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow +0 -0
  133. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json +0 -0
  134. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json +0 -0
  135. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow +0 -0
  136. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json +0 -0
  137. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json +0 -0
  138. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow +0 -0
  139. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json +0 -0
  140. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json +0 -0
  141. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow +0 -0
  142. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json +0 -0
  143. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json +0 -0
  144. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow +0 -0
  145. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json +0 -0
  146. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json +0 -0
  147. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow +0 -0
  148. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json +0 -0
  149. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json +0 -0
  150. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow +0 -0
  151. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json +0 -0
  152. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json +0 -0
  153. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow +0 -0
  154. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json +0 -0
  155. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json +0 -0
  156. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow +0 -0
  157. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json +0 -0
  158. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json +0 -0
  159. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow +0 -0
  160. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json +0 -0
  161. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json +0 -0
  162. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow +0 -0
  163. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json +0 -0
  164. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json +0 -0
  165. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow +0 -0
  166. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json +0 -0
  167. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json +0 -0
  168. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow +0 -0
  169. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json +0 -0
  170. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json +0 -0
  171. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow +0 -0
  172. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json +0 -0
  173. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json +0 -0
  174. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow +0 -0
  175. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json +0 -0
  176. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json +0 -0
  177. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow +0 -0
  178. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json +0 -0
  179. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json +0 -0
  180. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow +0 -0
  181. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json +0 -0
  182. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json +0 -0
  183. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow +0 -0
  184. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json +0 -0
  185. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json +0 -0
  186. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow +0 -0
  187. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json +0 -0
  188. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json +0 -0
  189. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow +0 -0
  190. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json +0 -0
  191. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json +0 -0
  192. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow +0 -0
  193. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json +0 -0
  194. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json +0 -0
  195. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow +0 -0
  196. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json +0 -0
  197. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json +0 -0
  198. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow +0 -0
  199. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json +0 -0
  200. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json +0 -0
  201. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow +0 -0
  202. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json +0 -0
  203. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json +0 -0
  204. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow +0 -0
  205. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json +0 -0
  206. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json +0 -0
  207. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow +0 -0
  208. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json +0 -0
  209. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json +0 -0
  210. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow +0 -0
  211. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json +0 -0
  212. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json +0 -0
  213. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow +0 -0
  214. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json +0 -0
  215. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json +0 -0
  216. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow +0 -0
  217. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json +0 -0
  218. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json +0 -0
  219. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow +0 -0
  220. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json +0 -0
  221. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json +0 -0
  222. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow +0 -0
  223. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json +0 -0
  224. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json +0 -0
  225. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow +0 -0
  226. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json +0 -0
  227. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json +0 -0
  228. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow +0 -0
  229. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json +0 -0
  230. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json +0 -0
  231. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow +0 -0
  232. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json +0 -0
  233. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json +0 -0
  234. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow +0 -0
  235. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json +0 -0
  236. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json +0 -0
  237. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow +0 -0
  238. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json +0 -0
  239. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json +0 -0
  240. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow +0 -0
  241. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json +0 -0
  242. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json +0 -0
  243. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow +0 -0
  244. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json +0 -0
  245. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json +0 -0
  246. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow +0 -0
  247. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json +0 -0
  248. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json +0 -0
  249. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow +0 -0
  250. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json +0 -0
  251. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json +0 -0
  252. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow +0 -0
  253. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json +0 -0
  254. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json +0 -0
  255. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow +0 -0
  256. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json +0 -0
  257. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json +0 -0
  258. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow +0 -0
  259. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json +0 -0
  260. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json +0 -0
  261. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow +0 -0
  262. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json +0 -0
  263. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json +0 -0
  264. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow +0 -0
  265. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json +0 -0
  266. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json +0 -0
  267. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow +0 -0
  268. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json +0 -0
  269. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json +0 -0
  270. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow +0 -0
  271. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json +0 -0
  272. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json +0 -0
  273. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow +0 -0
  274. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json +0 -0
  275. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json +0 -0
  276. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow +0 -0
  277. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json +0 -0
  278. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json +0 -0
  279. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow +0 -0
  280. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json +0 -0
  281. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json +0 -0
  282. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow +0 -0
  283. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json +0 -0
  284. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json +0 -0
  285. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow +0 -0
  286. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json +0 -0
  287. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json +0 -0
  288. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow +0 -0
  289. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json +0 -0
  290. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json +0 -0
  291. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow +0 -0
  292. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json +0 -0
  293. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json +0 -0
  294. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow +0 -0
  295. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json +0 -0
  296. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json +0 -0
  297. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow +0 -0
  298. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json +0 -0
  299. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json +0 -0
  300. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow +0 -0
  301. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json +0 -0
  302. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json +0 -0
  303. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow +0 -0
  304. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json +0 -0
  305. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json +0 -0
  306. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow +0 -0
  307. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json +0 -0
  308. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json +0 -0
  309. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow +0 -0
  310. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json +0 -0
  311. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json +0 -0
  312. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow +0 -0
  313. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json +0 -0
  314. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json +0 -0
  315. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow +0 -0
  316. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json +0 -0
  317. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json +0 -0
  318. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow +0 -0
  319. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json +0 -0
  320. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json +0 -0
  321. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow +0 -0
  322. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json +0 -0
  323. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json +0 -0
  324. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow +0 -0
  325. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json +0 -0
  326. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json +0 -0
  327. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow +0 -0
  328. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json +0 -0
  329. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json +0 -0
  330. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow +0 -0
  331. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json +0 -0
  332. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json +0 -0
  333. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow +0 -0
  334. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json +0 -0
  335. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json +0 -0
  336. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow +0 -0
  337. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json +0 -0
  338. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json +0 -0
  339. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow +0 -0
  340. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json +0 -0
  341. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json +0 -0
  342. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow +0 -0
  343. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json +0 -0
  344. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json +0 -0
  345. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow +0 -0
  346. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json +0 -0
  347. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/state.json +0 -0
  348. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow +0 -0
  349. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json +0 -0
  350. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json +0 -0
  351. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow +0 -0
  352. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json +0 -0
  353. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json +0 -0
  354. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow +0 -0
  355. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json +0 -0
  356. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json +0 -0
  357. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow +0 -0
  358. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json +0 -0
  359. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json +0 -0
  360. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow +0 -0
  361. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json +0 -0
  362. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json +0 -0
  363. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow +0 -0
  364. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json +0 -0
  365. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json +0 -0
  366. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow +0 -0
  367. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json +0 -0
  368. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json +0 -0
  369. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow +0 -0
  370. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json +0 -0
  371. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json +0 -0
  372. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow +0 -0
  373. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json +0 -0
  374. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json +0 -0
  375. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow +0 -0
  376. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json +0 -0
  377. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json +0 -0
  378. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow +0 -0
  379. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json +0 -0
  380. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/state.json +0 -0
  381. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow +0 -0
  382. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json +0 -0
  383. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/state.json +0 -0
  384. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow +0 -0
  385. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json +0 -0
  386. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/state.json +0 -0
  387. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow +0 -0
  388. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json +0 -0
  389. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json +0 -0
  390. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow +0 -0
  391. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json +0 -0
  392. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/state.json +0 -0
  393. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow +0 -0
  394. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json +0 -0
  395. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json +0 -0
  396. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow +0 -0
  397. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json +0 -0
  398. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json +0 -0
  399. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow +0 -0
  400. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json +0 -0
  401. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json +0 -0
  402. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow +0 -0
  403. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json +0 -0
  404. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json +0 -0
  405. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow +0 -0
  406. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json +0 -0
  407. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json +0 -0
  408. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow +0 -0
  409. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json +0 -0
  410. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json +0 -0
  411. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow +0 -0
  412. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json +0 -0
  413. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json +0 -0
  414. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow +0 -0
  415. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json +0 -0
  416. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json +0 -0
  417. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow +0 -0
  418. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json +0 -0
  419. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json +0 -0
  420. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow +0 -0
  421. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json +0 -0
  422. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json +0 -0
  423. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow +0 -0
  424. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json +0 -0
  425. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json +0 -0
  426. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow +0 -0
  427. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json +0 -0
  428. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json +0 -0
  429. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow +0 -0
  430. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json +0 -0
  431. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json +0 -0
  432. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow +0 -0
  433. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json +0 -0
  434. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json +0 -0
  435. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow +0 -0
  436. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json +0 -0
  437. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json +0 -0
  438. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow +0 -0
  439. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json +0 -0
  440. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json +0 -0
  441. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow +0 -0
  442. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json +0 -0
  443. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json +0 -0
  444. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow +0 -0
  445. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json +0 -0
  446. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json +0 -0
  447. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow +0 -0
  448. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json +0 -0
  449. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json +0 -0
  450. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow +0 -0
  451. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json +0 -0
  452. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json +0 -0
  453. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow +0 -0
  454. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json +0 -0
  455. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json +0 -0
  456. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow +0 -0
  457. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json +0 -0
  458. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json +0 -0
  459. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow +0 -0
  460. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json +0 -0
  461. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json +0 -0
  462. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow +0 -0
  463. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json +0 -0
  464. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json +0 -0
  465. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow +0 -0
  466. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json +0 -0
  467. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json +0 -0
  468. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow +0 -0
  469. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json +0 -0
  470. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json +0 -0
  471. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow +0 -0
  472. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json +0 -0
  473. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json +0 -0
  474. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow +0 -0
  475. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json +0 -0
  476. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json +0 -0
  477. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow +0 -0
  478. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json +0 -0
  479. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json +0 -0
  480. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow +0 -0
  481. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json +0 -0
  482. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json +0 -0
  483. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow +0 -0
  484. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json +0 -0
  485. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json +0 -0
  486. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow +0 -0
  487. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json +0 -0
  488. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json +0 -0
  489. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow +0 -0
  490. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json +0 -0
  491. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json +0 -0
  492. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow +0 -0
  493. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json +0 -0
  494. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json +0 -0
  495. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow +0 -0
  496. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json +0 -0
  497. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json +0 -0
  498. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow +0 -0
  499. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json +0 -0
  500. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json +0 -0
  501. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow +0 -0
  502. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json +0 -0
  503. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json +0 -0
  504. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow +0 -0
  505. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json +0 -0
  506. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json +0 -0
  507. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow +0 -0
  508. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json +0 -0
  509. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json +0 -0
  510. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow +0 -0
  511. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json +0 -0
  512. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json +0 -0
  513. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow +0 -0
  514. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json +0 -0
  515. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json +0 -0
  516. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow +0 -0
  517. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json +0 -0
  518. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/state.json +0 -0
  519. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow +0 -0
  520. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json +0 -0
  521. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json +0 -0
  522. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow +0 -0
  523. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json +0 -0
  524. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json +0 -0
  525. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow +0 -0
  526. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json +0 -0
  527. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json +0 -0
  528. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow +0 -0
  529. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json +0 -0
  530. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json +0 -0
  531. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow +0 -0
  532. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json +0 -0
  533. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/state.json +0 -0
  534. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow +0 -0
  535. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json +0 -0
  536. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/state.json +0 -0
  537. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow +0 -0
  538. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json +0 -0
  539. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/state.json +0 -0
  540. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow +0 -0
  541. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json +0 -0
  542. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json +0 -0
  543. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow +0 -0
  544. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json +0 -0
  545. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json +0 -0
  546. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow +0 -0
  547. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json +0 -0
  548. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json +0 -0
  549. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow +0 -0
  550. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/dataset_info.json +0 -0
  551. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/hellaswag/none/validation/state.json +0 -0
  552. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow +0 -0
  553. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/dataset_info.json +0 -0
  554. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/nq_open/none/validation/state.json +0 -0
  555. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow +0 -0
  556. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/dataset_info.json +0 -0
  557. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/openbookqa/main/validation/state.json +0 -0
  558. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow +0 -0
  559. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/dataset_info.json +0 -0
  560. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/state.json +0 -0
  561. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow +0 -0
  562. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/sciq/none/validation/dataset_info.json +0 -0
  563. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/sciq/none/validation/state.json +0 -0
  564. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow +0 -0
  565. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/dataset_info.json +0 -0
  566. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/state.json +0 -0
  567. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow +0 -0
  568. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/dataset_info.json +0 -0
  569. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/cb/validation/state.json +0 -0
  570. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow +0 -0
  571. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/dataset_info.json +0 -0
  572. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/super_glue/copa/validation/state.json +0 -0
  573. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow +0 -0
  574. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json +0 -0
  575. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/state.json +0 -0
  576. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow +0 -0
  577. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json +0 -0
  578. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json +0 -0
  579. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow +0 -0
  580. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json +0 -0
  581. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/state.json +0 -0
  582. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/config.json +0 -0
  583. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/requests.jsonl.gz +0 -0
  584. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/config.json +0 -0
  585. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/requests.jsonl.gz +0 -0
  586. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/config.json +0 -0
  587. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/requests.jsonl.gz +0 -0
  588. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/config.json +0 -0
  589. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/requests.jsonl.gz +0 -0
  590. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/config.json +0 -0
  591. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/requests.jsonl.gz +0 -0
  592. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/config.json +0 -0
  593. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/requests.jsonl.gz +0 -0
  594. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/config.json +0 -0
  595. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/requests.jsonl.gz +0 -0
  596. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/config.json +0 -0
  597. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/requests.jsonl.gz +0 -0
  598. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/config.json +0 -0
  599. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/requests.jsonl.gz +0 -0
  600. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/config.json +0 -0
  601. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/requests.jsonl.gz +0 -0
  602. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/config.json +0 -0
  603. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/requests.jsonl.gz +0 -0
  604. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/config.json +0 -0
  605. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/requests.jsonl.gz +0 -0
  606. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/config.json +0 -0
  607. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/requests.jsonl.gz +0 -0
  608. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/config.json +0 -0
  609. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/requests.jsonl.gz +0 -0
  610. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/config.json +0 -0
  611. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/requests.jsonl.gz +0 -0
  612. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/config.json +0 -0
  613. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/requests.jsonl.gz +0 -0
  614. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/config.json +0 -0
  615. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/requests.jsonl.gz +0 -0
  616. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/config.json +0 -0
  617. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/requests.jsonl.gz +0 -0
  618. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/config.json +0 -0
  619. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/requests.jsonl.gz +0 -0
  620. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/config.json +0 -0
  621. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/requests.jsonl.gz +0 -0
  622. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/config.json +0 -0
  623. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/requests.jsonl.gz +0 -0
  624. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/config.json +0 -0
  625. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/requests.jsonl.gz +0 -0
  626. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copycolors/10way/config.json +0 -0
  627. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copycolors/10way/requests.jsonl.gz +0 -0
  628. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/config.json +0 -0
  629. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/requests.jsonl.gz +0 -0
  630. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/config.json +0 -0
  631. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/requests.jsonl.gz +0 -0
  632. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/config.json +0 -0
  633. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/requests.jsonl.gz +0 -0
  634. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/config.json +0 -0
  635. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/requests.jsonl.gz +0 -0
  636. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/config.json +0 -0
  637. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/requests.jsonl.gz +0 -0
  638. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/config.json +0 -0
  639. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/requests.jsonl.gz +0 -0
  640. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/config.json +0 -0
  641. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/requests.jsonl.gz +0 -0
  642. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/config.json +0 -0
  643. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/requests.jsonl.gz +0 -0
  644. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/config.json +0 -0
  645. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/requests.jsonl.gz +0 -0
  646. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/config.json +0 -0
  647. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/requests.jsonl.gz +0 -0
  648. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json +0 -0
  649. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz +0 -0
  650. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json +0 -0
  651. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz +0 -0
  652. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json +0 -0
  653. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  654. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json +0 -0
  655. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/requests.jsonl.gz +0 -0
  656. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/config.json +0 -0
  657. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/requests.jsonl.gz +0 -0
  658. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/config.json +0 -0
  659. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  660. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/config.json +0 -0
  661. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/requests.jsonl.gz +0 -0
  662. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json +0 -0
  663. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  664. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json +0 -0
  665. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz +0 -0
  666. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json +0 -0
  667. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz +0 -0
  668. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json +0 -0
  669. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/requests.jsonl.gz +0 -0
  670. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/config.json +0 -0
  671. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/requests.jsonl.gz +0 -0
  672. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/config.json +0 -0
  673. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/requests.jsonl.gz +0 -0
  674. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/config.json +0 -0
  675. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/requests.jsonl.gz +0 -0
  676. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/config.json +0 -0
  677. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/requests.jsonl.gz +0 -0
  678. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/config.json +0 -0
  679. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/requests.jsonl.gz +0 -0
  680. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/config.json +0 -0
  681. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/requests.jsonl.gz +0 -0
  682. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/config.json +0 -0
  683. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/requests.jsonl.gz +0 -0
  684. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/config.json +0 -0
  685. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/requests.jsonl.gz +0 -0
  686. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/config.json +0 -0
  687. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/requests.jsonl.gz +0 -0
  688. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/config.json +0 -0
  689. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/requests.jsonl.gz +0 -0
  690. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/config.json +0 -0
  691. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/requests.jsonl.gz +0 -0
  692. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/config.json +0 -0
  693. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/requests.jsonl.gz +0 -0
  694. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/config.json +0 -0
  695. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/requests.jsonl.gz +0 -0
  696. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/config.json +0 -0
  697. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/requests.jsonl.gz +0 -0
  698. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/config.json +0 -0
  699. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/requests.jsonl.gz +0 -0
  700. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/config.json +0 -0
  701. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/requests.jsonl.gz +0 -0
  702. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/config.json +0 -0
  703. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/requests.jsonl.gz +0 -0
  704. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/config.json +0 -0
  705. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/requests.jsonl.gz +0 -0
  706. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/config.json +0 -0
  707. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/requests.jsonl.gz +0 -0
  708. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/config.json +0 -0
  709. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/requests.jsonl.gz +0 -0
  710. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json +0 -0
  711. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz +0 -0
  712. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/tokenizer.py +0 -0
  713. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json +0 -0
  714. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json +0 -0
  715. {ai2_olmo_eval-0.7.0 → ai2_olmo_eval-0.7.1}/src/olmo_eval/util.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: ai2-olmo-eval
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: In-loop evaluation tasks for language modeling
5
5
  Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
6
6
  License: Apache License
@@ -234,6 +234,7 @@ Requires-Dist: boto3; extra == "dev"
234
234
  Requires-Dist: google-cloud-storage; extra == "dev"
235
235
  Provides-Extra: all
236
236
  Requires-Dist: ai2-olmo-eval[dev]; extra == "all"
237
+ Dynamic: license-file
237
238
 
238
239
  # OLMo-in-loop-evals
239
240
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: ai2-olmo-eval
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: In-loop evaluation tasks for language modeling
5
5
  Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
6
6
  License: Apache License
@@ -234,6 +234,7 @@ Requires-Dist: boto3; extra == "dev"
234
234
  Requires-Dist: google-cloud-storage; extra == "dev"
235
235
  Provides-Extra: all
236
236
  Requires-Dist: ai2-olmo-eval[dev]; extra == "all"
237
+ Dynamic: license-file
237
238
 
238
239
  # OLMo-in-loop-evals
239
240
 
@@ -37,12 +37,26 @@ class ICLMetric(Metric):
37
37
  self.add_state("bpbs", default=[], dist_reduce_fx=dist_combine_lists)
38
38
  self.add_state("labels", default=[], dist_reduce_fx=dist_combine_lists)
39
39
 
40
+ self.add_state(
41
+ "loglikelihoods_no_leading_space", default=[], dist_reduce_fx=dist_combine_lists
42
+ )
43
+ self.add_state("celosses_no_leading_space", default=[], dist_reduce_fx=dist_combine_lists)
44
+ self.add_state("bpbs_no_leading_space", default=[], dist_reduce_fx=dist_combine_lists)
45
+
40
46
  def reset(self):
41
47
  self.loglikelihoods: List[Tuple[Optional[int], Optional[int], Optional[float]]] = []
42
48
  self.celosses: List[Tuple[Optional[int], Optional[int], Optional[float]]] = []
43
49
  self.bpbs: List[Tuple[Optional[int], Optional[int], Optional[float]]] = []
44
50
  self.labels: List[Tuple[Optional[int], Optional[int], Optional[int]]] = []
45
51
 
52
+ self.loglikelihoods_no_leading_space: List[
53
+ Tuple[Optional[int], Optional[int], Optional[float]]
54
+ ] = []
55
+ self.celosses_no_leading_space: List[
56
+ Tuple[Optional[int], Optional[int], Optional[float]]
57
+ ] = []
58
+ self.bpbs_no_leading_space: List[Tuple[Optional[int], Optional[int], Optional[float]]] = []
59
+
46
60
  def update(
47
61
  self,
48
62
  batch: Dict[str, Any],
@@ -56,6 +70,11 @@ class ICLMetric(Metric):
56
70
  self.loglikelihoods.append((None, None, None))
57
71
  self.celosses.append((None, None, None))
58
72
  self.bpbs.append((None, None, None))
73
+
74
+ self.loglikelihoods_no_leading_space.append((None, None, None))
75
+ self.celosses_no_leading_space.append((None, None, None))
76
+ self.bpbs_no_leading_space.append((None, None, None))
77
+
59
78
  self.labels.append((None, None, None))
60
79
  return
61
80
 
@@ -82,6 +101,9 @@ class ICLMetric(Metric):
82
101
  log_likelihood: torch.Tensor
83
102
  celoss: torch.Tensor
84
103
  bpb: torch.Tensor
104
+ log_likelihood_no_leading_space: torch.Tensor
105
+ celoss_no_leading_space: torch.Tensor
106
+ bpb_no_leading_space: torch.Tensor
85
107
  if self.metric_type == "pmi_dc":
86
108
  assert dc_lm_logits is not None
87
109
  # get domain conditional continuation logits: [cont_len, vocab]
@@ -96,6 +118,10 @@ class ICLMetric(Metric):
96
118
  )
97
119
  celoss = -log_likelihood
98
120
  bpb = -log_likelihood # the normalization factors cancel out
121
+
122
+ log_likelihood_no_leading_space = log_likelihood
123
+ celoss_no_leading_space = celoss
124
+ bpb_no_leading_space = bpb
99
125
  elif self.metric_type == "acc" or self.metric_type == "f1":
100
126
  # gather log-probs at continuation token indices
101
127
  log_likelihood = torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
@@ -108,6 +134,19 @@ class ICLMetric(Metric):
108
134
  / batch["cont_byte_len"][idx]
109
135
  * LOG_2_OF_E
110
136
  )
137
+
138
+ log_likelihood_no_leading_space = torch.gather(
139
+ lm_cont_logits, 1, cont_tokens.unsqueeze(-1)
140
+ ).sum()
141
+ celoss_no_leading_space = (
142
+ -torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
143
+ / batch["cont_str_len_no_leading_space"][idx]
144
+ )
145
+ bpb_no_leading_space = (
146
+ -torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
147
+ / batch["cont_byte_len_no_leading_space"][idx]
148
+ * LOG_2_OF_E
149
+ )
111
150
  elif self.metric_type in ["len_norm", "ce_loss", "bpb"]:
112
151
  log_likelihood = (
113
152
  torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
@@ -122,23 +161,46 @@ class ICLMetric(Metric):
122
161
  / batch["cont_byte_len"][idx]
123
162
  * LOG_2_OF_E
124
163
  )
164
+
165
+ log_likelihood_no_leading_space = (
166
+ torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
167
+ / batch["cont_str_len_no_leading_space"][idx]
168
+ )
169
+ celoss_no_leading_space = (
170
+ -torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
171
+ / batch["cont_str_len_no_leading_space"][idx]
172
+ )
173
+ bpb_no_leading_space = (
174
+ -torch.gather(lm_cont_logits, 1, cont_tokens.unsqueeze(-1)).sum()
175
+ / batch["cont_byte_len_no_leading_space"][idx]
176
+ * LOG_2_OF_E
177
+ )
125
178
  else:
126
179
  raise ValueError(self.metric_type)
127
180
 
128
- self.loglikelihoods.append((doc_id, cont_id, float(log_likelihood)))
129
181
  self.labels.append((doc_id, cont_id, int(batch["label_id"][idx])))
182
+ self.loglikelihoods.append((doc_id, cont_id, float(log_likelihood)))
130
183
  self.celosses.append((doc_id, cont_id, float(celoss)))
131
184
  self.bpbs.append((doc_id, cont_id, float(bpb)))
132
185
 
186
+ self.loglikelihoods_no_leading_space.append(
187
+ (doc_id, cont_id, float(log_likelihood_no_leading_space))
188
+ )
189
+ self.celosses_no_leading_space.append((doc_id, cont_id, float(celoss_no_leading_space)))
190
+ self.bpbs_no_leading_space.append((doc_id, cont_id, float(bpb_no_leading_space)))
191
+
133
192
  def compute(self) -> Dict[str, torch.Tensor]:
134
193
  # Task "suffix" -> tensor
135
194
 
136
195
  # states should have been synced from all accelerators at this point
137
196
  # account for duplicates here because of DistributedSampler compensating for drop_last=False
138
197
  loglikelihood_dict: Dict[int, Dict[int, float]] = {}
198
+ loglikelihood_no_leading_space_dict: Dict[int, Dict[int, float]] = {}
139
199
  label_dict: Dict[int, int] = {}
140
200
  celoss_dict: Dict[int, Dict[int, float]] = {}
201
+ celoss_no_leading_space_dict: Dict[int, Dict[int, float]] = {}
141
202
  bpb_dict: Dict[int, Dict[int, float]] = {}
203
+ bpb_no_leading_space_dict: Dict[int, Dict[int, float]] = {}
142
204
 
143
205
  # collect labels
144
206
  for doc_id, cont_id, label_id in self.labels:
@@ -159,6 +221,17 @@ class ICLMetric(Metric):
159
221
  if cont_id not in loglikelihood_dict[doc_id]:
160
222
  loglikelihood_dict[doc_id][cont_id] = loglikelihood
161
223
 
224
+ # collect loglikelihoods no leading space
225
+ for doc_id, cont_id, loglikelihood in self.loglikelihoods_no_leading_space:
226
+ if doc_id is None or cont_id is None or loglikelihood is None:
227
+ continue
228
+
229
+ if doc_id not in loglikelihood_no_leading_space_dict:
230
+ loglikelihood_no_leading_space_dict[doc_id] = {}
231
+
232
+ if cont_id not in loglikelihood_no_leading_space_dict[doc_id]:
233
+ loglikelihood_no_leading_space_dict[doc_id][cont_id] = loglikelihood
234
+
162
235
  # collect celosses
163
236
  for doc_id, cont_id, celoss_val in self.celosses:
164
237
  if doc_id is None or cont_id is None or celoss_val is None:
@@ -170,6 +243,17 @@ class ICLMetric(Metric):
170
243
  if cont_id not in celoss_dict[doc_id]:
171
244
  celoss_dict[doc_id][cont_id] = celoss_val
172
245
 
246
+ # collect celosses no leading space
247
+ for doc_id, cont_id, celoss_val in self.celosses_no_leading_space:
248
+ if doc_id is None or cont_id is None or celoss_val is None:
249
+ continue
250
+
251
+ if doc_id not in celoss_no_leading_space_dict:
252
+ celoss_no_leading_space_dict[doc_id] = {}
253
+
254
+ if cont_id not in celoss_no_leading_space_dict[doc_id]:
255
+ celoss_no_leading_space_dict[doc_id][cont_id] = celoss_val
256
+
173
257
  # collect bpbs
174
258
  for doc_id, cont_id, bpb_val in self.bpbs:
175
259
  if doc_id is None or cont_id is None or bpb_val is None:
@@ -181,13 +265,30 @@ class ICLMetric(Metric):
181
265
  if cont_id not in bpb_dict[doc_id]:
182
266
  bpb_dict[doc_id][cont_id] = bpb_val
183
267
 
268
+ # collect bpbs no leading space
269
+ for doc_id, cont_id, bpb_val in self.bpbs_no_leading_space:
270
+ if doc_id is None or cont_id is None or bpb_val is None:
271
+ continue
272
+
273
+ if doc_id not in bpb_no_leading_space_dict:
274
+ bpb_no_leading_space_dict[doc_id] = {}
275
+
276
+ if cont_id not in bpb_no_leading_space_dict[doc_id]:
277
+ bpb_no_leading_space_dict[doc_id][cont_id] = bpb_val
278
+
184
279
  # compute acc
280
+ correct_no_leading_space = []
185
281
  correct = []
186
282
  celoss = []
283
+ celoss_no_leading_space = []
187
284
  bpb = []
285
+ bpb_no_leading_space = []
188
286
  soft_score = []
189
287
  soft_log_score = []
288
+ soft_score_no_leading_space = []
289
+ soft_log_score_no_leading_space = []
190
290
  preds: Optional[List[float]] = None
291
+ preds_no_leading_space: Optional[List[float]] = None
191
292
  labels: Optional[List[int]] = None
192
293
  if self.metric_type == "f1":
193
294
  preds = []
@@ -197,15 +298,25 @@ class ICLMetric(Metric):
197
298
  # each doc_id might have a different number of continuation
198
299
  num_continuations = len(loglikelihood_dict[doc_id].keys())
199
300
  loglikelihoods = torch.tensor([-float("inf")] * num_continuations)
301
+ loglikelihoods_no_leading_space = torch.tensor([-float("inf")] * num_continuations)
200
302
  celosses = torch.tensor([float("inf")] * num_continuations)
303
+ celosses_no_leading_space = torch.tensor([float("inf")] * num_continuations)
201
304
  bpbs = torch.tensor([float("inf")] * num_continuations)
305
+ bpbs_no_leading_space = torch.tensor([float("inf")] * num_continuations)
202
306
 
203
307
  skip_document = False
204
308
  for cont_id in loglikelihood_dict[doc_id]:
205
309
  try:
206
310
  loglikelihoods[cont_id] = loglikelihood_dict[doc_id][cont_id]
311
+ loglikelihoods_no_leading_space[cont_id] = loglikelihood_no_leading_space_dict[
312
+ doc_id
313
+ ][cont_id]
207
314
  celosses[cont_id] = celoss_dict[doc_id][cont_id]
315
+ celosses_no_leading_space[cont_id] = celoss_no_leading_space_dict[doc_id][
316
+ cont_id
317
+ ]
208
318
  bpbs[cont_id] = bpb_dict[doc_id][cont_id]
319
+ bpbs_no_leading_space[cont_id] = bpb_no_leading_space_dict[doc_id][cont_id]
209
320
  except IndexError:
210
321
  # We didn't process all of the continuations, so skip this document.
211
322
  skip_document = True
@@ -216,39 +327,83 @@ class ICLMetric(Metric):
216
327
 
217
328
  if self.metric_type == "ce_loss":
218
329
  celoss.append(celosses[0]) # Only one answer is scored
330
+ celoss_no_leading_space.append(celosses_no_leading_space[0])
219
331
  elif self.metric_type == "bpb":
220
332
  bpb.append(bpbs[0]) # Only one answer is scored
333
+ bpb_no_leading_space.append(bpbs_no_leading_space[0])
221
334
  elif self.metric_type == "f1":
222
335
  assert preds is not None
336
+ assert preds_no_leading_space is not None
223
337
  assert labels is not None
224
338
  preds.append(torch.argmax(loglikelihoods).item())
339
+ preds_no_leading_space.append(torch.argmax(loglikelihoods_no_leading_space).item())
225
340
  labels.append(label_dict[doc_id])
226
341
  else:
227
342
  correct.append(
228
343
  1.0 if torch.argmax(loglikelihoods).item() == label_dict[doc_id] else 0.0
229
344
  )
345
+ correct_no_leading_space.append(
346
+ 1.0
347
+ if torch.argmax(loglikelihoods_no_leading_space).item() == label_dict[doc_id]
348
+ else 0.0
349
+ )
230
350
  celoss.append(celosses[label_dict[doc_id]].item())
351
+ celoss_no_leading_space.append(celosses_no_leading_space[label_dict[doc_id]].item())
231
352
  bpb.append(bpbs[label_dict[doc_id]].item())
353
+ bpb_no_leading_space.append(bpbs_no_leading_space[label_dict[doc_id]].item())
232
354
  soft_score.append(torch.softmax(loglikelihoods, dim=0)[label_dict[doc_id]].item())
233
355
  soft_log_score.append(
234
356
  torch.log_softmax(loglikelihoods, dim=0)[label_dict[doc_id]].item()
235
357
  )
358
+ soft_score_no_leading_space.append(
359
+ torch.softmax(loglikelihoods_no_leading_space, dim=0)[label_dict[doc_id]].item()
360
+ )
361
+ soft_log_score_no_leading_space.append(
362
+ torch.log_softmax(loglikelihoods_no_leading_space, dim=0)[
363
+ label_dict[doc_id]
364
+ ].item()
365
+ )
366
+
367
+ # v1 vs. v2 corresponds to whether we add a 1 to the num chars or num bytes when normalizing the answer length. See https://github.com/allenai/OLMo-in-loop-evals/pull/6
236
368
 
237
369
  if self.metric_type == "f1":
238
370
  assert preds is not None
239
371
  assert labels is not None
240
372
  # for NLI tasks, continuations are yes, no, neither, so idx=0 assigned to pos label
241
373
  score = f1_score(labels, preds, pos_label=0)
242
- return {"f1": torch.tensor(score)}
374
+ score_no_leading_space = f1_score(labels, preds_no_leading_space, pos_label=0)
375
+ return {
376
+ "f1_v1": torch.tensor(score),
377
+ "f1_v2": torch.tensor(score_no_leading_space),
378
+ }
243
379
  elif self.metric_type == "ce_loss":
244
- return {"ce_loss": torch.tensor(sum(celoss) / len(celoss))}
380
+ return {
381
+ "ce_loss_v1": torch.tensor(
382
+ sum(celoss_no_leading_space) / len(celoss_no_leading_space)
383
+ ),
384
+ "ce_loss_v2": torch.tensor(sum(celoss) / len(celoss)),
385
+ }
245
386
  elif self.metric_type == "bpb":
246
- return {"bpb": torch.tensor(sum(bpb) / len(bpb))}
387
+ return {
388
+ "bpb_v1": torch.tensor(sum(bpb_no_leading_space) / len(bpb_no_leading_space)),
389
+ "bpb_v2": torch.tensor(sum(bpb) / len(bpb)),
390
+ }
247
391
  else:
248
392
  return {
249
- self.metric_type: torch.tensor(sum(correct) / len(correct)),
250
- "ce_loss": torch.tensor(sum(celoss) / len(celoss)),
251
- "bpb": torch.tensor(sum(bpb) / len(bpb)),
252
- "soft": torch.tensor(sum(soft_score) / len(soft_score)),
253
- "soft_log": torch.tensor(sum(soft_log_score) / len(soft_log_score)),
393
+ f"{self.metric_type}_v1": torch.tensor(sum(correct) / len(correct)),
394
+ f"{self.metric_type}_v2": torch.tensor(sum(correct) / len(correct)),
395
+ "ce_loss_v1": torch.tensor(
396
+ sum(celoss_no_leading_space) / len(celoss_no_leading_space)
397
+ ),
398
+ "ce_loss_v2": torch.tensor(sum(celoss) / len(celoss)),
399
+ "bpb_v1": torch.tensor(sum(bpb_no_leading_space) / len(bpb_no_leading_space)),
400
+ "bpb_v2": torch.tensor(sum(bpb) / len(bpb)),
401
+ "soft_v1": torch.tensor(
402
+ sum(soft_score_no_leading_space) / len(soft_score_no_leading_space)
403
+ ),
404
+ "soft_v2": torch.tensor(sum(soft_score) / len(soft_score)),
405
+ "soft_log_v1": torch.tensor(
406
+ sum(soft_log_score_no_leading_space) / len(soft_log_score_no_leading_space)
407
+ ),
408
+ "soft_log_v2": torch.tensor(sum(soft_log_score) / len(soft_log_score)),
254
409
  }
@@ -103,8 +103,15 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
103
103
  )
104
104
 
105
105
  for cont_id, continuation_str in enumerate(continuations):
106
- cont_str_len = len(continuation_str) - 1 # continuation contain leading blank
107
- cont_byte_len = len(continuation_str[1:].encode("utf-8"))
106
+ # The original implementation did not count the first character (usually the leading space) as
107
+ # part of the continuation length (e.g., " A", " " is not counted). The OLMES standard does not
108
+ # do this, but we track both for backwards compatibility.
109
+ cont_str_len_no_leading_space = len(continuation_str) - 1
110
+ cont_byte_len_no_leading_space = len(continuation_str[1:].encode("utf-8"))
111
+
112
+ cont_str_len = len(continuation_str)
113
+ cont_byte_len = len(continuation_str.encode("utf-8"))
114
+
108
115
  continuation = self.token_encode(continuation_str)
109
116
 
110
117
  # query, remove last token from continuation, truncate from left is longer than model ctx length
@@ -131,6 +138,8 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
131
138
  ), # even if query has last token removed, LM will output same cont len
132
139
  "cont_str_len": cont_str_len,
133
140
  "cont_byte_len": cont_byte_len,
141
+ "cont_str_len_no_leading_space": cont_str_len_no_leading_space,
142
+ "cont_byte_len_no_leading_space": cont_byte_len_no_leading_space,
134
143
  "query": query, # remove last token from continuation
135
144
  "dc_query": dc_query,
136
145
  "label_id": label_id,
@@ -209,6 +218,8 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
209
218
  cont_lens = []
210
219
  cont_str_lens = []
211
220
  cont_byte_lens = []
221
+ cont_str_len_no_leading_space = []
222
+ cont_byte_len_no_leading_space = []
212
223
  queries = []
213
224
  dc_queries = []
214
225
  label_ids = []
@@ -232,6 +243,8 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
232
243
  cont_lens.append(sample["cont_len"])
233
244
  cont_str_lens.append(sample["cont_str_len"])
234
245
  cont_byte_lens.append(sample["cont_byte_len"])
246
+ cont_str_len_no_leading_space.append(sample["cont_str_len_no_leading_space"])
247
+ cont_byte_len_no_leading_space.append(sample["cont_byte_len_no_leading_space"])
235
248
 
236
249
  queries.append(
237
250
  torch.LongTensor(
@@ -261,6 +274,8 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
261
274
  ), # since query has last token removed from continuation
262
275
  "cont_str_len": torch.LongTensor(cont_str_lens),
263
276
  "cont_byte_len": torch.LongTensor(cont_byte_lens),
277
+ "cont_str_len_no_leading_space": torch.LongTensor(cont_str_len_no_leading_space),
278
+ "cont_byte_len_no_leading_space": torch.LongTensor(cont_byte_len_no_leading_space),
264
279
  "input_ids": torch.stack(queries),
265
280
  "dc_input_ids": torch.stack(dc_queries),
266
281
  "label_id": torch.LongTensor(label_ids),
@@ -456,8 +471,15 @@ class WinoGrande(ICLMultiChoiceTaskDataset):
456
471
 
457
472
  continuation_str = self.doc_to_continuations(doc)
458
473
  label_id = self.doc_to_label(doc)
459
- cont_str_len = len(continuation_str) - 1 # continuations contain leading blank space
460
- cont_byte_len = len(continuation_str[1:].encode("utf-8"))
474
+
475
+ # The original implementation did not count the first character (usually the leading space) as
476
+ # part of the continuation length (e.g., " A", " " is not counted). The OLMES standard does not
477
+ # do this, but we track both for backwards compatibility.
478
+ cont_str_len_no_leading_space = len(continuation_str) - 1
479
+ cont_byte_len_no_leading_space = len(continuation_str[1:].encode("utf-8"))
480
+
481
+ cont_str_len = len(continuation_str)
482
+ cont_byte_len = len(continuation_str.encode("utf-8"))
461
483
 
462
484
  # tokenize
463
485
  continuation = self.token_encode(continuation_str)
@@ -488,6 +510,8 @@ class WinoGrande(ICLMultiChoiceTaskDataset):
488
510
  ), # even if query has last token removed, LM will output same cont len
489
511
  "cont_str_len": cont_str_len,
490
512
  "cont_byte_len": cont_byte_len,
513
+ "cont_str_len_no_leading_space": cont_str_len_no_leading_space,
514
+ "cont_byte_len_no_leading_space": cont_byte_len_no_leading_space,
491
515
  "query": query, # remove last token from continuation
492
516
  "dc_query": dc_query,
493
517
  "label_id": label_id,
@@ -1524,8 +1548,16 @@ class OEEvalTask(ICLMultiChoiceTaskDataset):
1524
1548
  f"Sample doc from ({self.dataset_path}, {ds_name}):"
1525
1549
  + f"\ndoc_text: {doc_text}\ncontinuation: {continuation_str}"
1526
1550
  )
1527
- cont_str_len = len(continuation_str) - 1 # continuation contain leading blank
1528
- cont_byte_len = len(continuation_str[1:].encode("utf-8"))
1551
+
1552
+ # The original implementation did not count the first character (usually the leading space) as
1553
+ # part of the continuation length (e.g., " A", " " is not counted). The OLMES standard does not
1554
+ # do this, but we track both for backwards compatibility.
1555
+ cont_str_len_no_leading_space = len(continuation_str) - 1
1556
+ cont_byte_len_no_leading_space = len(continuation_str[1:].encode("utf-8"))
1557
+
1558
+ cont_str_len = len(continuation_str)
1559
+ cont_byte_len = len(continuation_str.encode("utf-8"))
1560
+
1529
1561
  continuation = self.token_encode(continuation_str)
1530
1562
 
1531
1563
  # query, remove last token from continuation, truncate from left is longer than model ctx length
@@ -1552,6 +1584,8 @@ class OEEvalTask(ICLMultiChoiceTaskDataset):
1552
1584
  ), # even if query has last token removed, LM will output same cont len
1553
1585
  "cont_str_len": cont_str_len,
1554
1586
  "cont_byte_len": cont_byte_len,
1587
+ "cont_str_len_no_leading_space": cont_str_len_no_leading_space,
1588
+ "cont_byte_len_no_leading_space": cont_byte_len_no_leading_space,
1555
1589
  "query": query, # remove last token from continuation
1556
1590
  "dc_query": dc_query,
1557
1591
  "label_id": label_id,
@@ -1,6 +1,6 @@
1
1
  _MAJOR = "0"
2
2
  _MINOR = "7"
3
- _PATCH = "0"
3
+ _PATCH = "1"
4
4
  _SUFFIX = ""
5
5
 
6
6
  VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
File without changes
File without changes
File without changes