dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,201 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+ from tqdm.auto import tqdm
7
+
8
+ from mlebench.utils import read_csv
9
+
10
+ # columns in train.csv
11
+ train_columns = [
12
+ "site_id",
13
+ "patient_id",
14
+ "image_id",
15
+ "laterality",
16
+ "view",
17
+ "age",
18
+ "cancer",
19
+ "biopsy",
20
+ "invasive",
21
+ "BIRADS",
22
+ "implant",
23
+ "density",
24
+ "machine_id",
25
+ "difficult_negative_case",
26
+ ]
27
+ # columns in test.csv
28
+ test_columns = [
29
+ "site_id",
30
+ "patient_id",
31
+ "image_id",
32
+ "laterality",
33
+ "view",
34
+ "age",
35
+ "implant",
36
+ "machine_id",
37
+ "prediction_id",
38
+ ]
39
+
40
+ # columns in answers/submission
41
+ submission_columns = ["prediction_id", "cancer"]
42
+
43
+
44
+ DEV = False
45
+
46
+
47
+ def _process_split(
48
+ source_df: pd.DataFrame,
49
+ train_pids: set,
50
+ test_pids: set,
51
+ public_path: Path,
52
+ private_path: Path,
53
+ raw_images_path: Path,
54
+ ):
55
+ """
56
+ Processes a data split, creating all required files and directories.
57
+
58
+ Args:
59
+ source_df: The DataFrame to split (e.g., the full dataset or a training subset).
60
+ train_pids: A set of patient IDs for the training set.
61
+ test_pids: A set of patient IDs for the test set.
62
+ public_path: The public output directory.
63
+ private_path: The private output directory.
64
+ raw_images_path: Path to the original raw images.
65
+ """
66
+ public_path.mkdir(exist_ok=True, parents=True)
67
+ private_path.mkdir(exist_ok=True, parents=True)
68
+
69
+ # Create train and test dataframes from the source data
70
+ new_train = source_df[source_df["patient_id"].isin(train_pids)].copy()
71
+ # dont index the `prediction_id` (last col) since its not in train and we need to build it
72
+ # index also the `cancer` column, which we'll drop later for the without_labels version
73
+ answers = source_df[source_df["patient_id"].isin(test_pids)][
74
+ test_columns[:-1] + ["cancer"]
75
+ ].copy()
76
+
77
+ new_train.to_csv(public_path / "train.csv", index=False)
78
+
79
+ answers["prediction_id"] = answers["patient_id"].astype(str) + "_" + answers["laterality"]
80
+ new_test_without_labels = answers.drop(columns=["cancer"])
81
+ new_test_without_labels.to_csv(public_path / "test.csv", index=False)
82
+
83
+ # merge multiple prediction_ids for the same patient into one for sample_submission and private test
84
+ answers = answers[submission_columns].copy()
85
+ # just take the first label for each prediction id -- the rest will be identical duplicates
86
+ answers = answers.groupby("prediction_id").first().reset_index()
87
+ answers.to_csv(private_path / "answers.csv", index=False)
88
+
89
+ sample_submission = answers.copy()
90
+ sample_submission["cancer"] = new_train.cancer.mean() # mean cancer rate in train set
91
+ sample_submission.to_csv(public_path / "sample_submission.csv", index=False)
92
+
93
+ assert len(sample_submission) == len(
94
+ answers
95
+ ), "sample_submission and test.csv should have the same number of rows"
96
+ assert len(new_test_without_labels) + len(new_train) == len(
97
+ source_df
98
+ ), "The sum of the rows in new_test_without_labels and new_train should be equal to the number of rows in the source_df"
99
+ # because of the merging
100
+ assert len(answers) != len(
101
+ new_test_without_labels
102
+ ), "new_test and new_test_without_labels should have different number of rows"
103
+
104
+ assert (
105
+ answers.columns.tolist() == submission_columns
106
+ ), f"answers should have columns {submission_columns}"
107
+ assert (
108
+ sample_submission.columns.tolist() == submission_columns
109
+ ), f"sample_submission should have columns {submission_columns}"
110
+
111
+ assert (
112
+ new_train.columns.tolist() == source_df.columns.tolist()
113
+ ), f"new_train should have columns {source_df.columns.tolist()}, got {new_train.columns.tolist()}"
114
+ assert (
115
+ new_test_without_labels.columns.tolist() == test_columns
116
+ ), f"new_test_without_labels should have columns {test_columns}, got {new_test_without_labels.columns.tolist()}"
117
+
118
+ assert set(new_test_without_labels["patient_id"]).isdisjoint(
119
+ set(new_train["patient_id"])
120
+ ), "new_test_without_labels and new_train should have disjoint patient_ids"
121
+
122
+ # finally, split the images
123
+ (public_path / "train_images").mkdir(exist_ok=True)
124
+ for patient_id in tqdm(train_pids, desc=f"Copying train images to {public_path.name}"):
125
+ patient_id_str = str(patient_id)
126
+ patient_dir = public_path / "train_images" / patient_id_str
127
+ patient_dir.mkdir(exist_ok=True)
128
+ image_ids = new_train[new_train["patient_id"] == patient_id]["image_id"].to_list()
129
+ for image_id in image_ids:
130
+ shutil.copy(raw_images_path / patient_id_str / f"{image_id}.dcm", patient_dir)
131
+
132
+ (public_path / "test_images").mkdir(exist_ok=True)
133
+ for patient_id in tqdm(test_pids, desc=f"Copying test images to {public_path.name}"):
134
+ patient_id_str = str(patient_id)
135
+ patient_dir = public_path / "test_images" / patient_id_str
136
+ patient_dir.mkdir(exist_ok=True)
137
+ image_ids = new_test_without_labels[new_test_without_labels["patient_id"] == patient_id][
138
+ "image_id"
139
+ ].to_list()
140
+ for image_id in image_ids:
141
+ shutil.copy(raw_images_path / patient_id_str / f"{image_id}.dcm", patient_dir)
142
+
143
+ # final checks
144
+ assert len(list((public_path / "train_images").rglob("*.dcm"))) == len(
145
+ new_train
146
+ ), "Number of images in train_images should be equal to the number of rows in new_train"
147
+ assert len(list((public_path / "test_images").rglob("*.dcm"))) == len(
148
+ new_test_without_labels
149
+ ), "Number of images in test_images should be equal to the number of rows in new_test_without_labels"
150
+
151
+
152
+ def prepare(raw: Path, public: Path, private: Path):
153
+
154
+ old_train = read_csv(raw / "train.csv")
155
+ # work on 5k samples for now, instead of 54k
156
+ if DEV:
157
+ old_train = old_train.sample(5000, random_state=42)
158
+
159
+ # "You can expect roughly 8,000 patients" in the test set
160
+ # so, split on patients. There are 11913 patients in train set
161
+ # Original ratio is 8000/ (8000 + 11913) ~ 0.4
162
+ # We use 0.1 to avoid taking too many samples out of train
163
+ all_patient_ids = old_train["patient_id"].unique()
164
+ train_patients_orig, test_patients_orig = train_test_split(
165
+ all_patient_ids, test_size=0.1, random_state=42
166
+ )
167
+
168
+ # --- 1. Create the original public/private split ---
169
+ # This call generates the primary competition data. Its outputs must not be changed.
170
+ _process_split(
171
+ source_df=old_train,
172
+ train_pids=set(train_patients_orig),
173
+ test_pids=set(test_patients_orig),
174
+ public_path=public,
175
+ private_path=private,
176
+ raw_images_path=raw / "train_images",
177
+ )
178
+
179
+ # --- 2. Create the new validation split ---
180
+ # Define new parallel directories for the validation set.
181
+ public_val = public.parent / "public_val"
182
+ private_val = private.parent / "private_val"
183
+
184
+ # Split the *original training patients* again to create a new train/validation set.
185
+ # We use the same test_size and random_state to replicate the splitting methodology.
186
+ train_val_patients, test_val_patients = train_test_split(
187
+ train_patients_orig, test_size=0.1, random_state=42
188
+ )
189
+
190
+ # The source data for this second split is the original training data.
191
+ original_train_df = old_train[old_train["patient_id"].isin(train_patients_orig)].copy()
192
+
193
+ # This call generates the new validation data in public_val/private_val.
194
+ _process_split(
195
+ source_df=original_train_df,
196
+ train_pids=set(train_val_patients),
197
+ test_pids=set(test_val_patients),
198
+ public_path=public_val,
199
+ private_path=private_val,
200
+ raw_images_path=raw / "train_images",
201
+ )
@@ -0,0 +1,13 @@
1
+ import pandas as pd
2
+ from sklearn.metrics import roc_auc_score
3
+
4
+ from mlebench.competitions.utils import prepare_for_auroc_metric
5
+ from mlebench.grade_helpers import InvalidSubmissionError
6
+
7
+
8
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
9
+ roc_auc_inputs = prepare_for_auroc_metric(
10
+ answers=answers, submission=submission, id_col="BraTS21ID", target_col="MGMT_value"
11
+ )
12
+ score = roc_auc_score(y_true=roc_auc_inputs["y_true"], y_score=roc_auc_inputs["y_score"])
13
+ return score
@@ -0,0 +1,47 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import numpy as np
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import read_csv
8
+
9
+
10
+ def prepare(raw: Path, public: Path, private: Path):
11
+ # Create train and test splits from train set
12
+ old_train = read_csv(raw / "train_labels.csv", dtype={"BraTS21ID": str, "MGMT_value": int})
13
+ new_train, new_test = train_test_split(old_train, test_size=0.1, random_state=0)
14
+
15
+ # Copy over images
16
+ (public / "train").mkdir(exist_ok=True)
17
+ for file_id in new_train["BraTS21ID"]:
18
+ (public / "train" / file_id).mkdir(exist_ok=True)
19
+ shutil.copytree(
20
+ src=raw / "train" / file_id,
21
+ dst=public / "train" / file_id,
22
+ dirs_exist_ok=True,
23
+ )
24
+ assert len(list(public.glob("train/*"))) == len(
25
+ new_train
26
+ ), "Public train should have the same number of images as the train set"
27
+
28
+ (public / "test").mkdir(exist_ok=True)
29
+ for file_id in new_test["BraTS21ID"]:
30
+ (public / "test" / file_id).mkdir(exist_ok=True)
31
+ shutil.copytree(
32
+ src=raw / "train" / file_id,
33
+ dst=public / "test" / file_id,
34
+ dirs_exist_ok=True,
35
+ )
36
+ assert len(list(public.glob("test/*"))) == len(
37
+ new_test
38
+ ), "Public train should have the same number of images as the train set"
39
+
40
+ # Create a sample submission file
41
+ submission_df = new_test.copy()
42
+ submission_df["MGMT_value"] = 0.5
43
+
44
+ # Copy over files
45
+ new_train.to_csv(public / "train_labels.csv", index=False)
46
+ new_test.to_csv(private / "test.csv", index=False)
47
+ submission_df.to_csv(public / "sample_submission.csv", index=False)
@@ -0,0 +1,97 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import read_csv
8
+
9
+
10
+ def _create_split_files(
11
+ train_df: pd.DataFrame,
12
+ test_df: pd.DataFrame,
13
+ public_dest: Path,
14
+ private_dest: Path,
15
+ raw_data_path: Path,
16
+ ):
17
+ """
18
+ Helper function to populate public and private directories for a given split.
19
+
20
+ This function handles copying images, creating label files, and generating
21
+ a sample submission, ensuring a consistent structure across different data splits.
22
+ """
23
+ public_dest.mkdir(exist_ok=True)
24
+ private_dest.mkdir(exist_ok=True)
25
+
26
+ # Copy over images for the training set
27
+ (public_dest / "train").mkdir(exist_ok=True)
28
+ for file_id in train_df["BraTS21ID"]:
29
+ (public_dest / "train" / file_id).mkdir(exist_ok=True)
30
+ shutil.copytree(
31
+ src=raw_data_path / "train" / file_id,
32
+ dst=public_dest / "train" / file_id,
33
+ dirs_exist_ok=True,
34
+ )
35
+ assert len(list(public_dest.glob("train/*"))) == len(
36
+ train_df
37
+ ), "Public train should have the same number of images as the train set"
38
+
39
+ # Copy over images for the test set (without labels)
40
+ (public_dest / "test").mkdir(exist_ok=True)
41
+ for file_id in test_df["BraTS21ID"]:
42
+ (public_dest / "test" / file_id).mkdir(exist_ok=True)
43
+ shutil.copytree(
44
+ src=raw_data_path / "train" / file_id,
45
+ dst=public_dest / "test" / file_id,
46
+ dirs_exist_ok=True,
47
+ )
48
+ assert len(list(public_dest.glob("test/*"))) == len(
49
+ test_df
50
+ ), "Public test should have the same number of images as the test set"
51
+
52
+ # Create a sample submission file for the public directory
53
+ submission_df = test_df.copy()
54
+ submission_df["MGMT_value"] = 0.5
55
+
56
+ # Copy over the final CSV files to their destinations
57
+ train_df.to_csv(public_dest / "train_labels.csv", index=False)
58
+ test_df.to_csv(private_dest / "test.csv", index=False)
59
+ submission_df.to_csv(public_dest / "sample_submission.csv", index=False)
60
+
61
+
62
+ def prepare(raw: Path, public: Path, private: Path):
63
+ # Load the full labeled dataset from the raw directory
64
+ all_labeled_data = read_csv(raw / "train_labels.csv", dtype={"BraTS21ID": str, "MGMT_value": int})
65
+
66
+ # --- Original Split (for public/private) ---
67
+ # Create the original train and test splits from the full dataset
68
+ train_df, test_df = train_test_split(all_labeled_data, test_size=0.1, random_state=0)
69
+
70
+ # Generate the original public and private directories.
71
+ # The output of this call will be identical to the original script's output.
72
+ _create_split_files(
73
+ train_df=train_df,
74
+ test_df=test_df,
75
+ public_dest=public,
76
+ private_dest=private,
77
+ raw_data_path=raw,
78
+ )
79
+
80
+ # --- New Validation Split (for public_val/private_val) ---
81
+ # Define new paths for the validation split, parallel to the original ones
82
+ public_val = public.parent / "public_val"
83
+ private_val = private.parent / "private_val"
84
+
85
+ # Create a new train/validation split from the *original training data* (train_df).
86
+ # The splitting logic (test_size, random_state) is intentionally replicated.
87
+ train_val_df, test_val_df = train_test_split(train_df, test_size=0.1, random_state=0)
88
+
89
+ # Generate the new public_val and private_val directories using the same helper function.
90
+ # This ensures the directory structure and filenames are identical to the original split.
91
+ _create_split_files(
92
+ train_df=train_val_df,
93
+ test_df=test_val_df,
94
+ public_dest=public_val,
95
+ private_dest=private_val,
96
+ raw_data_path=raw,
97
+ )
@@ -0,0 +1,10 @@
1
+ import pandas as pd
2
+ from sklearn.metrics import roc_auc_score
3
+
4
+ from mlebench.competitions.utils import prepare_for_auroc_metric
5
+
6
+
7
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
8
+ """Compute AUC-ROC between submitted probabilities and ground truth."""
9
+ roc_auc_inputs = prepare_for_auroc_metric(submission, answers, id_col="ID", target_col="TARGET")
10
+ return roc_auc_score(y_true=roc_auc_inputs["y_true"], y_score=roc_auc_inputs["y_score"])
@@ -0,0 +1,41 @@
1
+ from pathlib import Path
2
+
3
+ import pandas as pd
4
+
5
+ from mlebench.utils import read_csv
6
+
7
+
8
+ def prepare(raw: Path, public: Path, private: Path) -> None:
9
+ """Materialize Santander Customer Satisfaction CSV splits."""
10
+ public.mkdir(parents=True, exist_ok=True)
11
+ private.mkdir(parents=True, exist_ok=True)
12
+
13
+ train = read_csv(raw / "train.csv")
14
+ test_public = read_csv(raw / "test.csv")
15
+ answers = read_csv(raw / "test_answer.csv")
16
+
17
+ if (raw / "sample_submission.csv").exists():
18
+ sample_submission = read_csv(raw / "sample_submission.csv")
19
+ else:
20
+ sample_submission = answers.copy()
21
+ sample_submission["TARGET"] = 0.0
22
+
23
+ merged_private = test_public.merge(answers, on="ID", how="left", validate="one_to_one")
24
+ if merged_private["TARGET"].isna().any():
25
+ missing_ids = merged_private.loc[merged_private["TARGET"].isna(), "ID"].tolist()[:5]
26
+ raise ValueError(f"Missing TARGET labels for IDs (showing up to 5): {missing_ids}")
27
+
28
+ train.to_csv(public / "train.csv", index=False)
29
+ test_public.to_csv(public / "test.csv", index=False)
30
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
31
+
32
+ merged_private.to_csv(private / "test.csv", index=False)
33
+ answers.to_csv(private / "gold_submission.csv", index=False)
34
+
35
+ assert len(test_public) == len(answers) == len(
36
+ merged_private
37
+ ), "Test features, answers, and private test must have the same length."
38
+ assert set(train.columns) - {"TARGET"} == set(
39
+ test_public.columns
40
+ ), "Train and public test feature columns should match aside from TARGET."
41
+ assert sample_submission.columns.tolist() == ["ID", "TARGET"], "Sample submission must be ID,TARGET."
@@ -0,0 +1,56 @@
1
+ """
2
+ Grading function for ScienceBench task 1
3
+ """
4
+
5
+ import pandas as pd
6
+ from sklearn.metrics import roc_auc_score
7
+
8
+
9
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
10
+ """
11
+ Grade submission using ROC-AUC (as configured in `config.yaml`).
12
+
13
+ Args:
14
+ submission: DataFrame with predictions
15
+ answers: DataFrame with ground truth
16
+
17
+ Returns:
18
+ ROC-AUC score (higher is better)
19
+ """
20
+ if submission is None or answers is None:
21
+ return None
22
+
23
+ id_col = "smiles" if "smiles" in submission.columns and "smiles" in answers.columns else None
24
+
25
+ # Align rows on identifier if available; otherwise assume same order.
26
+ if id_col:
27
+ merged = pd.merge(
28
+ answers,
29
+ submission,
30
+ on=id_col,
31
+ suffixes=("_true", "_pred"),
32
+ how="inner",
33
+ )
34
+ else:
35
+ merged = pd.concat(
36
+ [answers.add_suffix("_true"), submission.add_suffix("_pred")],
37
+ axis=1,
38
+ )
39
+
40
+ # Score all common target columns (exclude identifier column)
41
+ target_cols = [c for c in answers.columns if c != id_col and c in submission.columns]
42
+ if not target_cols:
43
+ return None
44
+
45
+ aucs: list[float] = []
46
+ for col in target_cols:
47
+ y_true = merged[f"{col}_true"]
48
+ y_pred = merged[f"{col}_pred"]
49
+ # ROC-AUC is undefined if only one class is present.
50
+ if y_true.nunique(dropna=True) < 2:
51
+ continue
52
+ aucs.append(float(roc_auc_score(y_true, y_pred)))
53
+
54
+ if not aucs:
55
+ return None
56
+ return float(sum(aucs) / len(aucs))
@@ -0,0 +1,75 @@
1
+ """
2
+ Data preparation for ScienceBench Task 1: clintox_nn
3
+ Dataset: clintox
4
+ """
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from pathlib import Path
9
+ import shutil
10
+
11
+
12
+ def prepare(raw: Path, public: Path, private: Path):
13
+ """
14
+ Prepare the clintox task data.
15
+
16
+ Args:
17
+ raw: Path to raw data directory (/path/to/ScienceAgent-bench/benchmark/datasets/clintox)
18
+ public: Path to public directory (visible to participants)
19
+ private: Path to private directory (used for grading)
20
+ """
21
+ print(f"=" * 60)
22
+ print(f"Preparing ScienceBench Task 1: clintox_nn")
23
+ print(f"=" * 60)
24
+ print(f"Raw directory: {raw}")
25
+ print(f"Public directory: {public}")
26
+ print(f"Private directory: {private}")
27
+
28
+ # Source dataset path
29
+ source_dir = Path("/path/to/ScienceAgent-bench/benchmark/datasets/clintox")
30
+
31
+ if not source_dir.exists():
32
+ raise FileNotFoundError(f"Source dataset not found: {source_dir}")
33
+
34
+ # Copy training and test data to public
35
+ train_file = source_dir / "clintox_train.csv"
36
+ test_file = source_dir / "clintox_test.csv"
37
+
38
+ if not train_file.exists() or not test_file.exists():
39
+ raise FileNotFoundError(f"Required data files not found in {source_dir}")
40
+
41
+ print(f"\nCopying data files to public directory...")
42
+ shutil.copy2(train_file, public / "clintox_train.csv")
43
+ shutil.copy2(test_file, public / "clintox_test.csv")
44
+ print(f" ✓ Copied: clintox_train.csv")
45
+ print(f" ✓ Copied: clintox_test.csv")
46
+
47
+ # Read test file to get structure
48
+ test_df = pd.read_csv(test_file)
49
+
50
+ # Create sample_submission with expected format
51
+ # The submission should contain: smiles, FDA_APPROVED, CT_TOX
52
+ sample_submission = pd.DataFrame({
53
+ "smiles": test_df["smiles"],
54
+ "FDA_APPROVED": 0.5, # Probability placeholder
55
+ "CT_TOX": 0.5 # Probability placeholder
56
+ })
57
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
58
+ print(f"\n✓ Created sample_submission.csv with {len(sample_submission)} rows")
59
+
60
+ # Load gold results for answer
61
+ gold_file = Path("/path/to/ScienceAgent-bench/benchmark/eval_programs/gold_results/clintox_gold.csv")
62
+ if gold_file.exists():
63
+ gold_df = pd.read_csv(gold_file)
64
+ gold_df.to_csv(private / "answer.csv", index=False)
65
+ print(f"✓ Created answer.csv with {len(gold_df)} rows from gold results")
66
+ else:
67
+ # If gold file doesn't exist, create placeholder
68
+ print(f"⚠ Warning: Gold results not found at {gold_file}")
69
+ answer = sample_submission.copy()
70
+ answer.to_csv(private / "answer.csv", index=False)
71
+ print(f"✓ Created placeholder answer.csv")
72
+
73
+ print(f"\nData preparation completed!")
74
+ print(f" Public files: {sorted([f.name for f in public.glob('*')])}")
75
+ print(f" Private files: {sorted([f.name for f in private.glob('*')])}")
@@ -0,0 +1,37 @@
1
+ """Grading function for ScienceBench task 15 (admet_ai)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pandas as pd
6
+ from sklearn.metrics import roc_auc_score
7
+
8
+ REQUIRED_COLUMNS = {"Drug", "Y"}
9
+ AUC_THRESHOLD = 0.84
10
+
11
+
12
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
13
+ """Return 1.0 if ordering matches and ROC AUC >= threshold, else 0.0."""
14
+ if submission.empty:
15
+ print("Submission is empty.")
16
+ return 0.0
17
+
18
+ if not REQUIRED_COLUMNS.issubset(submission.columns):
19
+ print(f"Submission missing required columns: {REQUIRED_COLUMNS - set(submission.columns)}")
20
+ return 0.0
21
+
22
+ if not REQUIRED_COLUMNS.issubset(answers.columns):
23
+ print("Answer file is missing required columns.")
24
+ return 0.0
25
+
26
+ if list(submission["Drug"]) != list(answers["Drug"]):
27
+ print("Drug ordering mismatch.")
28
+ return 0.0
29
+
30
+ try:
31
+ auc = roc_auc_score(answers["Y"].values, submission["Y"].values)
32
+ except ValueError as exc:
33
+ print(f"Unable to compute ROC AUC: {exc}")
34
+ return 0.0
35
+
36
+ print(f"ROC AUC: {auc}")
37
+ return 1.0 if auc >= AUC_THRESHOLD else 0.0