ai2-olmo-eval 0.8.4__tar.gz → 0.8.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (768) hide show
  1. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/PKG-INFO +22 -5
  2. ai2_olmo_eval-0.8.6/README.md +27 -0
  3. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/pyproject.toml +3 -4
  4. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/ai2_olmo_eval.egg-info/PKG-INFO +22 -5
  5. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/ai2_olmo_eval.egg-info/requires.txt +3 -4
  6. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/metrics.py +22 -3
  7. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/version.py +1 -1
  8. ai2_olmo_eval-0.8.4/README.md +0 -9
  9. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/LICENSE +0 -0
  10. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/setup.cfg +0 -0
  11. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/ai2_olmo_eval.egg-info/SOURCES.txt +0 -0
  12. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/ai2_olmo_eval.egg-info/dependency_links.txt +0 -0
  13. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/ai2_olmo_eval.egg-info/top_level.txt +0 -0
  14. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/__init__.py +0 -0
  15. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow +0 -0
  16. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json +0 -0
  17. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json +0 -0
  18. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow +0 -0
  19. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json +0 -0
  20. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/state.json +0 -0
  21. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow +0 -0
  22. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json +0 -0
  23. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/state.json +0 -0
  24. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow +0 -0
  25. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/boolq/none/validation/dataset_info.json +0 -0
  26. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/boolq/none/validation/state.json +0 -0
  27. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow +0 -0
  28. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/mrpc/validation/dataset_info.json +0 -0
  29. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/mrpc/validation/state.json +0 -0
  30. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow +0 -0
  31. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/rte/validation/dataset_info.json +0 -0
  32. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/rte/validation/state.json +0 -0
  33. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow +0 -0
  34. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/sst2/validation/dataset_info.json +0 -0
  35. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/glue/sst2/validation/state.json +0 -0
  36. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow +0 -0
  37. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json +0 -0
  38. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json +0 -0
  39. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow +0 -0
  40. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json +0 -0
  41. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json +0 -0
  42. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow +0 -0
  43. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json +0 -0
  44. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json +0 -0
  45. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow +0 -0
  46. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json +0 -0
  47. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json +0 -0
  48. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow +0 -0
  49. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json +0 -0
  50. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json +0 -0
  51. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow +0 -0
  52. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json +0 -0
  53. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json +0 -0
  54. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow +0 -0
  55. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json +0 -0
  56. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json +0 -0
  57. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow +0 -0
  58. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json +0 -0
  59. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json +0 -0
  60. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow +0 -0
  61. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json +0 -0
  62. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json +0 -0
  63. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow +0 -0
  64. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json +0 -0
  65. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json +0 -0
  66. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow +0 -0
  67. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json +0 -0
  68. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json +0 -0
  69. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow +0 -0
  70. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json +0 -0
  71. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json +0 -0
  72. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow +0 -0
  73. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json +0 -0
  74. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json +0 -0
  75. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow +0 -0
  76. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json +0 -0
  77. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json +0 -0
  78. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow +0 -0
  79. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json +0 -0
  80. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json +0 -0
  81. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow +0 -0
  82. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json +0 -0
  83. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json +0 -0
  84. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow +0 -0
  85. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json +0 -0
  86. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json +0 -0
  87. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow +0 -0
  88. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json +0 -0
  89. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json +0 -0
  90. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow +0 -0
  91. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json +0 -0
  92. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json +0 -0
  93. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow +0 -0
  94. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json +0 -0
  95. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json +0 -0
  96. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow +0 -0
  97. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json +0 -0
  98. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json +0 -0
  99. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow +0 -0
  100. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json +0 -0
  101. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json +0 -0
  102. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow +0 -0
  103. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json +0 -0
  104. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json +0 -0
  105. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow +0 -0
  106. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json +0 -0
  107. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json +0 -0
  108. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow +0 -0
  109. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json +0 -0
  110. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json +0 -0
  111. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow +0 -0
  112. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json +0 -0
  113. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json +0 -0
  114. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow +0 -0
  115. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json +0 -0
  116. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json +0 -0
  117. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow +0 -0
  118. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json +0 -0
  119. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json +0 -0
  120. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow +0 -0
  121. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json +0 -0
  122. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json +0 -0
  123. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow +0 -0
  124. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json +0 -0
  125. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json +0 -0
  126. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow +0 -0
  127. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json +0 -0
  128. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json +0 -0
  129. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow +0 -0
  130. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json +0 -0
  131. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json +0 -0
  132. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow +0 -0
  133. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json +0 -0
  134. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json +0 -0
  135. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow +0 -0
  136. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json +0 -0
  137. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json +0 -0
  138. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow +0 -0
  139. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json +0 -0
  140. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json +0 -0
  141. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow +0 -0
  142. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json +0 -0
  143. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json +0 -0
  144. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow +0 -0
  145. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json +0 -0
  146. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json +0 -0
  147. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow +0 -0
  148. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json +0 -0
  149. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json +0 -0
  150. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow +0 -0
  151. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json +0 -0
  152. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json +0 -0
  153. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow +0 -0
  154. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json +0 -0
  155. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json +0 -0
  156. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow +0 -0
  157. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json +0 -0
  158. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json +0 -0
  159. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow +0 -0
  160. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json +0 -0
  161. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json +0 -0
  162. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow +0 -0
  163. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json +0 -0
  164. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json +0 -0
  165. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow +0 -0
  166. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json +0 -0
  167. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json +0 -0
  168. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow +0 -0
  169. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json +0 -0
  170. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json +0 -0
  171. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow +0 -0
  172. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json +0 -0
  173. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json +0 -0
  174. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow +0 -0
  175. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json +0 -0
  176. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json +0 -0
  177. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow +0 -0
  178. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json +0 -0
  179. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json +0 -0
  180. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow +0 -0
  181. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json +0 -0
  182. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json +0 -0
  183. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow +0 -0
  184. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json +0 -0
  185. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json +0 -0
  186. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow +0 -0
  187. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json +0 -0
  188. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json +0 -0
  189. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow +0 -0
  190. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json +0 -0
  191. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json +0 -0
  192. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow +0 -0
  193. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json +0 -0
  194. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json +0 -0
  195. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow +0 -0
  196. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json +0 -0
  197. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json +0 -0
  198. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow +0 -0
  199. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json +0 -0
  200. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json +0 -0
  201. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow +0 -0
  202. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json +0 -0
  203. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json +0 -0
  204. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow +0 -0
  205. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json +0 -0
  206. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json +0 -0
  207. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow +0 -0
  208. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json +0 -0
  209. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json +0 -0
  210. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow +0 -0
  211. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json +0 -0
  212. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json +0 -0
  213. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow +0 -0
  214. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json +0 -0
  215. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json +0 -0
  216. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow +0 -0
  217. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json +0 -0
  218. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json +0 -0
  219. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow +0 -0
  220. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json +0 -0
  221. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json +0 -0
  222. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow +0 -0
  223. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json +0 -0
  224. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json +0 -0
  225. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow +0 -0
  226. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json +0 -0
  227. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json +0 -0
  228. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow +0 -0
  229. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json +0 -0
  230. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json +0 -0
  231. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow +0 -0
  232. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json +0 -0
  233. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json +0 -0
  234. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow +0 -0
  235. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json +0 -0
  236. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json +0 -0
  237. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow +0 -0
  238. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json +0 -0
  239. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json +0 -0
  240. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow +0 -0
  241. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json +0 -0
  242. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json +0 -0
  243. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow +0 -0
  244. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json +0 -0
  245. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json +0 -0
  246. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow +0 -0
  247. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json +0 -0
  248. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json +0 -0
  249. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow +0 -0
  250. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json +0 -0
  251. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json +0 -0
  252. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow +0 -0
  253. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json +0 -0
  254. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json +0 -0
  255. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow +0 -0
  256. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json +0 -0
  257. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json +0 -0
  258. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow +0 -0
  259. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json +0 -0
  260. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json +0 -0
  261. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow +0 -0
  262. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json +0 -0
  263. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json +0 -0
  264. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow +0 -0
  265. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json +0 -0
  266. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json +0 -0
  267. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow +0 -0
  268. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json +0 -0
  269. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json +0 -0
  270. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow +0 -0
  271. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json +0 -0
  272. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json +0 -0
  273. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow +0 -0
  274. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json +0 -0
  275. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json +0 -0
  276. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow +0 -0
  277. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json +0 -0
  278. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json +0 -0
  279. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow +0 -0
  280. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json +0 -0
  281. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json +0 -0
  282. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow +0 -0
  283. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json +0 -0
  284. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json +0 -0
  285. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow +0 -0
  286. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json +0 -0
  287. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json +0 -0
  288. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow +0 -0
  289. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json +0 -0
  290. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json +0 -0
  291. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow +0 -0
  292. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json +0 -0
  293. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json +0 -0
  294. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow +0 -0
  295. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json +0 -0
  296. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json +0 -0
  297. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow +0 -0
  298. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json +0 -0
  299. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json +0 -0
  300. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow +0 -0
  301. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json +0 -0
  302. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json +0 -0
  303. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow +0 -0
  304. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json +0 -0
  305. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json +0 -0
  306. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow +0 -0
  307. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json +0 -0
  308. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json +0 -0
  309. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow +0 -0
  310. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json +0 -0
  311. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json +0 -0
  312. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow +0 -0
  313. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json +0 -0
  314. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json +0 -0
  315. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow +0 -0
  316. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json +0 -0
  317. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json +0 -0
  318. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow +0 -0
  319. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json +0 -0
  320. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json +0 -0
  321. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow +0 -0
  322. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json +0 -0
  323. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json +0 -0
  324. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow +0 -0
  325. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json +0 -0
  326. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json +0 -0
  327. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow +0 -0
  328. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json +0 -0
  329. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json +0 -0
  330. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow +0 -0
  331. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json +0 -0
  332. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json +0 -0
  333. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow +0 -0
  334. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json +0 -0
  335. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json +0 -0
  336. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow +0 -0
  337. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json +0 -0
  338. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json +0 -0
  339. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow +0 -0
  340. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json +0 -0
  341. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json +0 -0
  342. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow +0 -0
  343. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json +0 -0
  344. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json +0 -0
  345. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow +0 -0
  346. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json +0 -0
  347. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/state.json +0 -0
  348. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow +0 -0
  349. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json +0 -0
  350. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json +0 -0
  351. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow +0 -0
  352. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json +0 -0
  353. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json +0 -0
  354. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow +0 -0
  355. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json +0 -0
  356. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json +0 -0
  357. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow +0 -0
  358. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json +0 -0
  359. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json +0 -0
  360. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow +0 -0
  361. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json +0 -0
  362. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json +0 -0
  363. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow +0 -0
  364. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json +0 -0
  365. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json +0 -0
  366. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow +0 -0
  367. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json +0 -0
  368. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json +0 -0
  369. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow +0 -0
  370. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json +0 -0
  371. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json +0 -0
  372. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow +0 -0
  373. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json +0 -0
  374. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json +0 -0
  375. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow +0 -0
  376. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json +0 -0
  377. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json +0 -0
  378. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow +0 -0
  379. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json +0 -0
  380. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/state.json +0 -0
  381. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow +0 -0
  382. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json +0 -0
  383. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/state.json +0 -0
  384. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow +0 -0
  385. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json +0 -0
  386. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/state.json +0 -0
  387. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow +0 -0
  388. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json +0 -0
  389. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json +0 -0
  390. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow +0 -0
  391. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json +0 -0
  392. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/state.json +0 -0
  393. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow +0 -0
  394. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json +0 -0
  395. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json +0 -0
  396. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow +0 -0
  397. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json +0 -0
  398. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json +0 -0
  399. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow +0 -0
  400. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json +0 -0
  401. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json +0 -0
  402. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow +0 -0
  403. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json +0 -0
  404. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json +0 -0
  405. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow +0 -0
  406. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json +0 -0
  407. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json +0 -0
  408. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow +0 -0
  409. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json +0 -0
  410. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json +0 -0
  411. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow +0 -0
  412. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json +0 -0
  413. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json +0 -0
  414. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow +0 -0
  415. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json +0 -0
  416. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json +0 -0
  417. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow +0 -0
  418. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json +0 -0
  419. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json +0 -0
  420. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow +0 -0
  421. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json +0 -0
  422. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json +0 -0
  423. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow +0 -0
  424. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json +0 -0
  425. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json +0 -0
  426. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow +0 -0
  427. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json +0 -0
  428. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json +0 -0
  429. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow +0 -0
  430. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json +0 -0
  431. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json +0 -0
  432. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow +0 -0
  433. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json +0 -0
  434. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json +0 -0
  435. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow +0 -0
  436. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json +0 -0
  437. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json +0 -0
  438. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow +0 -0
  439. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json +0 -0
  440. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json +0 -0
  441. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow +0 -0
  442. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json +0 -0
  443. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json +0 -0
  444. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow +0 -0
  445. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json +0 -0
  446. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json +0 -0
  447. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow +0 -0
  448. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json +0 -0
  449. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json +0 -0
  450. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow +0 -0
  451. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json +0 -0
  452. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json +0 -0
  453. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow +0 -0
  454. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json +0 -0
  455. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json +0 -0
  456. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow +0 -0
  457. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json +0 -0
  458. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json +0 -0
  459. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow +0 -0
  460. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json +0 -0
  461. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json +0 -0
  462. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow +0 -0
  463. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json +0 -0
  464. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json +0 -0
  465. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow +0 -0
  466. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json +0 -0
  467. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json +0 -0
  468. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow +0 -0
  469. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json +0 -0
  470. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json +0 -0
  471. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow +0 -0
  472. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json +0 -0
  473. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json +0 -0
  474. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow +0 -0
  475. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json +0 -0
  476. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json +0 -0
  477. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow +0 -0
  478. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json +0 -0
  479. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json +0 -0
  480. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow +0 -0
  481. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json +0 -0
  482. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json +0 -0
  483. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow +0 -0
  484. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json +0 -0
  485. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json +0 -0
  486. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow +0 -0
  487. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json +0 -0
  488. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json +0 -0
  489. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow +0 -0
  490. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json +0 -0
  491. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json +0 -0
  492. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow +0 -0
  493. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json +0 -0
  494. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json +0 -0
  495. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow +0 -0
  496. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json +0 -0
  497. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json +0 -0
  498. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow +0 -0
  499. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json +0 -0
  500. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json +0 -0
  501. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow +0 -0
  502. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json +0 -0
  503. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json +0 -0
  504. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow +0 -0
  505. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json +0 -0
  506. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json +0 -0
  507. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow +0 -0
  508. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json +0 -0
  509. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json +0 -0
  510. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow +0 -0
  511. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json +0 -0
  512. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json +0 -0
  513. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow +0 -0
  514. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json +0 -0
  515. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json +0 -0
  516. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow +0 -0
  517. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json +0 -0
  518. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/state.json +0 -0
  519. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow +0 -0
  520. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json +0 -0
  521. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json +0 -0
  522. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow +0 -0
  523. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json +0 -0
  524. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json +0 -0
  525. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow +0 -0
  526. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json +0 -0
  527. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json +0 -0
  528. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow +0 -0
  529. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json +0 -0
  530. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json +0 -0
  531. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow +0 -0
  532. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json +0 -0
  533. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/state.json +0 -0
  534. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow +0 -0
  535. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json +0 -0
  536. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/state.json +0 -0
  537. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow +0 -0
  538. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json +0 -0
  539. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/state.json +0 -0
  540. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow +0 -0
  541. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json +0 -0
  542. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json +0 -0
  543. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow +0 -0
  544. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json +0 -0
  545. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json +0 -0
  546. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow +0 -0
  547. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json +0 -0
  548. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json +0 -0
  549. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow +0 -0
  550. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hellaswag/none/validation/dataset_info.json +0 -0
  551. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/hellaswag/none/validation/state.json +0 -0
  552. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow +0 -0
  553. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/nq_open/none/validation/dataset_info.json +0 -0
  554. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/nq_open/none/validation/state.json +0 -0
  555. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow +0 -0
  556. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/openbookqa/main/validation/dataset_info.json +0 -0
  557. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/openbookqa/main/validation/state.json +0 -0
  558. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow +0 -0
  559. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/dataset_info.json +0 -0
  560. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/piqa/plain_text/validation/state.json +0 -0
  561. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow +0 -0
  562. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/sciq/none/validation/dataset_info.json +0 -0
  563. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/sciq/none/validation/state.json +0 -0
  564. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow +0 -0
  565. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/dataset_info.json +0 -0
  566. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/social_i_qa/none/validation/state.json +0 -0
  567. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow +0 -0
  568. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/super_glue/cb/validation/dataset_info.json +0 -0
  569. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/super_glue/cb/validation/state.json +0 -0
  570. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow +0 -0
  571. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/super_glue/copa/validation/dataset_info.json +0 -0
  572. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/super_glue/copa/validation/state.json +0 -0
  573. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow +0 -0
  574. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json +0 -0
  575. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/state.json +0 -0
  576. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow +0 -0
  577. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json +0 -0
  578. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json +0 -0
  579. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow +0 -0
  580. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json +0 -0
  581. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/state.json +0 -0
  582. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/config.json +0 -0
  583. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/requests.jsonl.gz +0 -0
  584. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/config.json +0 -0
  585. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/requests.jsonl.gz +0 -0
  586. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/config.json +0 -0
  587. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/requests.jsonl.gz +0 -0
  588. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/config.json +0 -0
  589. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/requests.jsonl.gz +0 -0
  590. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/config.json +0 -0
  591. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/requests.jsonl.gz +0 -0
  592. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/config.json +0 -0
  593. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/requests.jsonl.gz +0 -0
  594. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/config.json +0 -0
  595. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/requests.jsonl.gz +0 -0
  596. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/config.json +0 -0
  597. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/requests.jsonl.gz +0 -0
  598. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/config.json +0 -0
  599. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/requests.jsonl.gz +0 -0
  600. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/config.json +0 -0
  601. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/requests.jsonl.gz +0 -0
  602. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/config.json +0 -0
  603. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/requests.jsonl.gz +0 -0
  604. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/config.json +0 -0
  605. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/requests.jsonl.gz +0 -0
  606. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/config.json +0 -0
  607. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/requests.jsonl.gz +0 -0
  608. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/config.json +0 -0
  609. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/requests.jsonl.gz +0 -0
  610. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/config.json +0 -0
  611. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/requests.jsonl.gz +0 -0
  612. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/config.json +0 -0
  613. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/requests.jsonl.gz +0 -0
  614. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/config.json +0 -0
  615. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/requests.jsonl.gz +0 -0
  616. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/config.json +0 -0
  617. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/requests.jsonl.gz +0 -0
  618. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/config.json +0 -0
  619. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/requests.jsonl.gz +0 -0
  620. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/config.json +0 -0
  621. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/requests.jsonl.gz +0 -0
  622. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/config.json +0 -0
  623. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/requests.jsonl.gz +0 -0
  624. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/config.json +0 -0
  625. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/requests.jsonl.gz +0 -0
  626. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/config.json +0 -0
  627. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/requests.jsonl.gz +0 -0
  628. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/config.json +0 -0
  629. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/requests.jsonl.gz +0 -0
  630. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/config.json +0 -0
  631. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/requests.jsonl.gz +0 -0
  632. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/config.json +0 -0
  633. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/requests.jsonl.gz +0 -0
  634. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_3shot/config.json +0 -0
  635. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_3shot/requests.jsonl.gz +0 -0
  636. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/config.json +0 -0
  637. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/requests.jsonl.gz +0 -0
  638. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_3shot/config.json +0 -0
  639. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_3shot/requests.jsonl.gz +0 -0
  640. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/config.json +0 -0
  641. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/copa/rc_0shot/requests.jsonl.gz +0 -0
  642. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/copycolors/10way/config.json +0 -0
  643. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/copycolors/10way/requests.jsonl.gz +0 -0
  644. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/config.json +0 -0
  645. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/requests.jsonl.gz +0 -0
  646. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/config.json +0 -0
  647. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/requests.jsonl.gz +0 -0
  648. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/config.json +0 -0
  649. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/requests.jsonl.gz +0 -0
  650. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/config.json +0 -0
  651. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/requests.jsonl.gz +0 -0
  652. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/config.json +0 -0
  653. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/requests.jsonl.gz +0 -0
  654. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/config.json +0 -0
  655. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/requests.jsonl.gz +0 -0
  656. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/config.json +0 -0
  657. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/requests.jsonl.gz +0 -0
  658. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/config.json +0 -0
  659. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/requests.jsonl.gz +0 -0
  660. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/config.json +0 -0
  661. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/requests.jsonl.gz +0 -0
  662. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/config.json +0 -0
  663. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/requests.jsonl.gz +0 -0
  664. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json +0 -0
  665. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz +0 -0
  666. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json +0 -0
  667. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz +0 -0
  668. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/config.json +0 -0
  669. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_500/gold_bpb_0shot/requests.jsonl.gz +0 -0
  670. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json +0 -0
  671. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  672. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json +0 -0
  673. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/requests.jsonl.gz +0 -0
  674. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/config.json +0 -0
  675. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/requests.jsonl.gz +0 -0
  676. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/config.json +0 -0
  677. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  678. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/config.json +0 -0
  679. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/requests.jsonl.gz +0 -0
  680. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json +0 -0
  681. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz +0 -0
  682. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json +0 -0
  683. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz +0 -0
  684. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/config.json +0 -0
  685. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_bash/gold_bpb_3shot/requests.jsonl.gz +0 -0
  686. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/config.json +0 -0
  687. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_c/gold_bpb_3shot/requests.jsonl.gz +0 -0
  688. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/config.json +0 -0
  689. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_cpp/gold_bpb_3shot/requests.jsonl.gz +0 -0
  690. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/config.json +0 -0
  691. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_csharp/gold_bpb_3shot/requests.jsonl.gz +0 -0
  692. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/config.json +0 -0
  693. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_go/gold_bpb_3shot/requests.jsonl.gz +0 -0
  694. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/config.json +0 -0
  695. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_haskell/gold_bpb_3shot/requests.jsonl.gz +0 -0
  696. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/config.json +0 -0
  697. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_java/gold_bpb_3shot/requests.jsonl.gz +0 -0
  698. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/config.json +0 -0
  699. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_javascript/gold_bpb_3shot/requests.jsonl.gz +0 -0
  700. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/config.json +0 -0
  701. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_matlab/gold_bpb_3shot/requests.jsonl.gz +0 -0
  702. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/config.json +0 -0
  703. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_php/gold_bpb_3shot/requests.jsonl.gz +0 -0
  704. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/config.json +0 -0
  705. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_python/gold_bpb_3shot/requests.jsonl.gz +0 -0
  706. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/config.json +0 -0
  707. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_r/gold_bpb_3shot/requests.jsonl.gz +0 -0
  708. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/config.json +0 -0
  709. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_ruby/gold_bpb_3shot/requests.jsonl.gz +0 -0
  710. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/config.json +0 -0
  711. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_rust/gold_bpb_3shot/requests.jsonl.gz +0 -0
  712. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/config.json +0 -0
  713. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_scala/gold_bpb_3shot/requests.jsonl.gz +0 -0
  714. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/config.json +0 -0
  715. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_swift/gold_bpb_3shot/requests.jsonl.gz +0 -0
  716. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/config.json +0 -0
  717. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/mt_mbpp_typescript/gold_bpb_3shot/requests.jsonl.gz +0 -0
  718. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json +0 -0
  719. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz +0 -0
  720. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json +0 -0
  721. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/requests.jsonl.gz +0 -0
  722. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/config.json +0 -0
  723. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/requests.jsonl.gz +0 -0
  724. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/config.json +0 -0
  725. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/requests.jsonl.gz +0 -0
  726. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/config.json +0 -0
  727. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/requests.jsonl.gz +0 -0
  728. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/config.json +0 -0
  729. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/requests.jsonl.gz +0 -0
  730. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/config.json +0 -0
  731. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/requests.jsonl.gz +0 -0
  732. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/config.json +0 -0
  733. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/requests.jsonl.gz +0 -0
  734. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/config.json +0 -0
  735. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/requests.jsonl.gz +0 -0
  736. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/config.json +0 -0
  737. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/requests.jsonl.gz +0 -0
  738. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/config.json +0 -0
  739. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/requests.jsonl.gz +0 -0
  740. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/config.json +0 -0
  741. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/requests.jsonl.gz +0 -0
  742. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/config.json +0 -0
  743. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/requests.jsonl.gz +0 -0
  744. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/config.json +0 -0
  745. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/requests.jsonl.gz +0 -0
  746. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/config.json +0 -0
  747. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/requests.jsonl.gz +0 -0
  748. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/config.json +0 -0
  749. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/requests.jsonl.gz +0 -0
  750. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/config.json +0 -0
  751. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/requests.jsonl.gz +0 -0
  752. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/config.json +0 -0
  753. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/requests.jsonl.gz +0 -0
  754. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/config.json +0 -0
  755. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/requests.jsonl.gz +0 -0
  756. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/config.json +0 -0
  757. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/requests.jsonl.gz +0 -0
  758. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/config.json +0 -0
  759. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/requests.jsonl.gz +0 -0
  760. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/config.json +0 -0
  761. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/requests.jsonl.gz +0 -0
  762. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json +0 -0
  763. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz +0 -0
  764. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/tasks.py +0 -0
  765. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/tokenizer.py +0 -0
  766. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json +0 -0
  767. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json +0 -0
  768. {ai2_olmo_eval-0.8.4 → ai2_olmo_eval-0.8.6}/src/olmo_eval/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai2-olmo-eval
3
- Version: 0.8.4
3
+ Version: 0.8.6
4
4
  Summary: In-loop evaluation tasks for language modeling
5
5
  Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
6
6
  License: Apache License
@@ -210,16 +210,15 @@ Project-URL: Changelog, https://github.com/allenai/OLMo-in-loop-evals/blob/main/
210
210
  Requires-Python: >=3.9
211
211
  Description-Content-Type: text/markdown
212
212
  License-File: LICENSE
213
- Requires-Dist: numpy<2.0
214
213
  Requires-Dist: torch
215
214
  Requires-Dist: torchmetrics
216
- Requires-Dist: datasets
217
- Requires-Dist: tokenizers
218
- Requires-Dist: scikit-learn
215
+ Requires-Dist: datasets<4,>=3.6.0
219
216
  Requires-Dist: cached-path
220
217
  Requires-Dist: requests
221
218
  Requires-Dist: packaging
222
219
  Requires-Dist: importlib_resources
220
+ Requires-Dist: tokenizers<0.20,>=0.19.1
221
+ Requires-Dist: pyarrow<20,>=19.0
223
222
  Provides-Extra: dev
224
223
  Requires-Dist: ruff; extra == "dev"
225
224
  Requires-Dist: mypy<1.4,>=1.0; extra == "dev"
@@ -245,3 +244,21 @@ Code for in-loop evaluation tasks used by the OLMo training team.
245
244
  ```
246
245
  pip install ai2-olmo-eval
247
246
  ```
247
+
248
+ ## Release process
249
+
250
+ ### Steps
251
+
252
+ 1. Update the version in `src/olmo_eval/version.py`.
253
+ 2. Run the release script:
254
+
255
+ ```bash
256
+ ./src/scripts/release.sh
257
+ ```
258
+
259
+ This will commit the changes to the CHANGELOG and `version.py` files and then create a new tag in git
260
+ which will trigger a workflow on GitHub Actions that handles the rest.
261
+
262
+ ### Fixing a failed release
263
+
264
+ If for some reason the GitHub Actions release workflow failed with an error that needs to be fixed, you'll have to delete the tag on GitHub. Once you've pushed a fix you can simply repeat the steps above.
@@ -0,0 +1,27 @@
1
+ # OLMo-in-loop-evals
2
+
3
+ Code for in-loop evaluation tasks used by the OLMo training team.
4
+
5
+ ## Installation
6
+
7
+ ```
8
+ pip install ai2-olmo-eval
9
+ ```
10
+
11
+ ## Release process
12
+
13
+ ### Steps
14
+
15
+ 1. Update the version in `src/olmo_eval/version.py`.
16
+ 2. Run the release script:
17
+
18
+ ```bash
19
+ ./src/scripts/release.sh
20
+ ```
21
+
22
+ This will commit the changes to the CHANGELOG and `version.py` files and then create a new tag in git
23
+ which will trigger a workflow on GitHub Actions that handles the rest.
24
+
25
+ ### Fixing a failed release
26
+
27
+ If for some reason the GitHub Actions release workflow failed with an error that needs to be fixed, you'll have to delete the tag on GitHub. Once you've pushed a fix you can simply repeat the steps above.
@@ -13,16 +13,15 @@ authors = [
13
13
  requires-python = ">=3.9"
14
14
  license = { file = "LICENSE" }
15
15
  dependencies = [
16
- "numpy<2.0",
17
16
  "torch",
18
17
  "torchmetrics",
19
- "datasets",
20
- "tokenizers",
21
- "scikit-learn",
18
+ "datasets>=3.6.0,<4",
22
19
  "cached-path",
23
20
  "requests",
24
21
  "packaging",
25
22
  "importlib_resources",
23
+ "tokenizers>=0.19.1,<0.20",
24
+ "pyarrow>=19.0,<20", # datasets brings pyarrow, but version 21 breaks things
26
25
  ]
27
26
 
28
27
  [project.urls]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai2-olmo-eval
3
- Version: 0.8.4
3
+ Version: 0.8.6
4
4
  Summary: In-loop evaluation tasks for language modeling
5
5
  Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
6
6
  License: Apache License
@@ -210,16 +210,15 @@ Project-URL: Changelog, https://github.com/allenai/OLMo-in-loop-evals/blob/main/
210
210
  Requires-Python: >=3.9
211
211
  Description-Content-Type: text/markdown
212
212
  License-File: LICENSE
213
- Requires-Dist: numpy<2.0
214
213
  Requires-Dist: torch
215
214
  Requires-Dist: torchmetrics
216
- Requires-Dist: datasets
217
- Requires-Dist: tokenizers
218
- Requires-Dist: scikit-learn
215
+ Requires-Dist: datasets<4,>=3.6.0
219
216
  Requires-Dist: cached-path
220
217
  Requires-Dist: requests
221
218
  Requires-Dist: packaging
222
219
  Requires-Dist: importlib_resources
220
+ Requires-Dist: tokenizers<0.20,>=0.19.1
221
+ Requires-Dist: pyarrow<20,>=19.0
223
222
  Provides-Extra: dev
224
223
  Requires-Dist: ruff; extra == "dev"
225
224
  Requires-Dist: mypy<1.4,>=1.0; extra == "dev"
@@ -245,3 +244,21 @@ Code for in-loop evaluation tasks used by the OLMo training team.
245
244
  ```
246
245
  pip install ai2-olmo-eval
247
246
  ```
247
+
248
+ ## Release process
249
+
250
+ ### Steps
251
+
252
+ 1. Update the version in `src/olmo_eval/version.py`.
253
+ 2. Run the release script:
254
+
255
+ ```bash
256
+ ./src/scripts/release.sh
257
+ ```
258
+
259
+ This will commit the changes to the CHANGELOG and `version.py` files and then create a new tag in git
260
+ which will trigger a workflow on GitHub Actions that handles the rest.
261
+
262
+ ### Fixing a failed release
263
+
264
+ If for some reason the GitHub Actions release workflow failed with an error that needs to be fixed, you'll have to delete the tag on GitHub. Once you've pushed a fix you can simply repeat the steps above.
@@ -1,13 +1,12 @@
1
- numpy<2.0
2
1
  torch
3
2
  torchmetrics
4
- datasets
5
- tokenizers
6
- scikit-learn
3
+ datasets<4,>=3.6.0
7
4
  cached-path
8
5
  requests
9
6
  packaging
10
7
  importlib_resources
8
+ tokenizers<0.20,>=0.19.1
9
+ pyarrow<20,>=19.0
11
10
 
12
11
  [all]
13
12
  ai2-olmo-eval[dev]
@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Tuple, TypeVar
3
3
 
4
4
  import torch
5
5
  import torch.nn.functional as F
6
- from sklearn.metrics import f1_score
7
6
  from torchmetrics import Metric
8
7
 
9
8
  from .util import all_gather_object
@@ -395,8 +394,10 @@ class ICLMetric(Metric):
395
394
  assert preds is not None
396
395
  assert labels is not None
397
396
  # for NLI tasks, continuations are yes, no, neither, so idx=0 assigned to pos label
398
- score = f1_score(labels, preds, pos_label=0)
399
- score_no_leading_space = f1_score(labels, preds_no_leading_space, pos_label=0)
397
+ score = self.custom_f1_score(labels, preds, pos_label=0)
398
+ score_no_leading_space = self.custom_f1_score(
399
+ labels, preds_no_leading_space, pos_label=0
400
+ )
400
401
  return {
401
402
  "f1_v1": torch.tensor(score),
402
403
  "f1_v2": torch.tensor(score_no_leading_space),
@@ -432,3 +433,21 @@ class ICLMetric(Metric):
432
433
  ),
433
434
  "soft_log_v2": torch.tensor(sum(soft_log_score) / len(soft_log_score)),
434
435
  }
436
+
437
+ def custom_f1_score(self, y_true, y_pred, pos_label=1):
438
+ y_true = list(y_true)
439
+ y_pred = list(y_pred)
440
+ tp = sum((yt == pos_label) and (yp == pos_label) for yt, yp in zip(y_true, y_pred))
441
+ fp = sum((yt != pos_label) and (yp == pos_label) for yt, yp in zip(y_true, y_pred))
442
+ fn = sum((yt == pos_label) and (yp != pos_label) for yt, yp in zip(y_true, y_pred))
443
+
444
+ if tp + fp == 0 or tp + fn == 0:
445
+ return 0.0
446
+
447
+ precision = tp / (tp + fp)
448
+ recall = tp / (tp + fn)
449
+
450
+ if precision + recall == 0:
451
+ return 0.0
452
+
453
+ return 2 * precision * recall / (precision + recall)
@@ -1,6 +1,6 @@
1
1
  _MAJOR = "0"
2
2
  _MINOR = "8"
3
- _PATCH = "4"
3
+ _PATCH = "6"
4
4
  _SUFFIX = ""
5
5
 
6
6
  VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
@@ -1,9 +0,0 @@
1
- # OLMo-in-loop-evals
2
-
3
- Code for in-loop evaluation tasks used by the OLMo training team.
4
-
5
- ## Installation
6
-
7
- ```
8
- pip install ai2-olmo-eval
9
- ```
File without changes
File without changes