dslighting 1.7.1__py3-none-any.whl → 1.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/METADATA +3 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,139 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+ from tqdm.auto import tqdm
7
+
8
+ from mlebench.utils import read_csv
9
+
10
+
11
+ def verify_directory_sync(df: pd.DataFrame, expected_dir: Path, unexpected_dir: Path):
12
+ """
13
+ Checks that the dataframe contents match the directory structure.
14
+ """
15
+ for _, row in tqdm(
16
+ df.iterrows(), desc=f"Verifying directory sync for {expected_dir.name}", total=len(df)
17
+ ):
18
+ case_day_path = expected_dir / row["case"] / f"{row['case']}_{row['day']}"
19
+ assert (
20
+ case_day_path.exists()
21
+ ), f"Directory {case_day_path} does not exist but is listed in the dataframe."
22
+ non_existent_path = unexpected_dir / row["case"] / f"{row['case']}_{row['day']}"
23
+ assert (
24
+ not non_existent_path.exists()
25
+ ), f"Directory {non_existent_path} exists but is not listed in the dataframe."
26
+
27
+
28
+ def prepare(raw: Path, public: Path, private: Path):
29
+ old_train = read_csv(raw / "train.csv")
30
+
31
+ # ----------------------- Splitting
32
+ # Extract case and day from 'id'
33
+ old_train["case"] = old_train["id"].apply(lambda x: x.split("_")[0])
34
+ old_train["day"] = old_train["id"].apply(lambda x: x.split("_")[1])
35
+ old_train["slice"] = old_train["id"].apply(lambda x: x.split("_")[-1])
36
+
37
+ # Split cases into train and test
38
+ unique_cases = old_train["case"].unique()
39
+ train_cases, test_cases = train_test_split(unique_cases, test_size=0.1, random_state=42)
40
+
41
+ # Initially assign entire cases to train or test set
42
+ old_train["set"] = old_train["case"].apply(lambda x: "test" if x in test_cases else "train")
43
+
44
+ # Then mark some days from train to be test, to match competition test description
45
+ days_df = old_train[old_train["set"] == "train"].groupby("case")["day"].apply(set).reset_index()
46
+ for _, row in days_df.iterrows():
47
+ # if theres more than 4 days, we will move any days past the 4th to the test set
48
+ days = row["day"]
49
+ if len(days) > 4:
50
+ days = sorted(days, key=lambda x: int(x[len("day") :]))
51
+ days_to_move = days[4:]
52
+ # change their set to "test"
53
+ old_train.loc[
54
+ old_train["case"].eq(row["case"]) & old_train["day"].isin(days_to_move), "set"
55
+ ] = "test"
56
+
57
+ # ----------------------- Move the files to the correct new locations
58
+ old_train_dir = raw / "train"
59
+ new_train_dir = public / "train"
60
+ new_test_dir = public / "test"
61
+
62
+ # Create new directories if they don't exist
63
+ new_train_dir.mkdir(parents=True, exist_ok=True)
64
+ new_test_dir.mkdir(parents=True, exist_ok=True)
65
+
66
+ # Move directories based on the set assignment
67
+ for case in tqdm(unique_cases, desc="Splitting by case"):
68
+ original_path = old_train_dir / case
69
+ if case in train_cases:
70
+ new_path = new_train_dir / case
71
+ else:
72
+ new_path = new_test_dir / case
73
+ # new_path.mkdir(parents=True, exist_ok=True)
74
+ shutil.copytree(original_path, new_path, dirs_exist_ok=True)
75
+
76
+ # Move specific days from public/train/ to public/test/ for marked case-days
77
+ for _, row in tqdm(
78
+ old_train.iterrows(), desc="Handling additional day-based splits", total=len(old_train)
79
+ ):
80
+ if row["set"] == "test":
81
+ source_day_path = new_train_dir / row["case"] / f"{row['case']}_{row['day']}"
82
+ target_day_path = new_test_dir / row["case"] / f"{row['case']}_{row['day']}"
83
+ if source_day_path.exists():
84
+ target_day_path.parent.mkdir(parents=True, exist_ok=True)
85
+ shutil.move(source_day_path.as_posix(), target_day_path.as_posix())
86
+
87
+ # ------------------------ Saving splits
88
+ new_train = old_train[old_train["set"] == "train"].copy()
89
+ new_test = old_train[old_train["set"] == "test"].copy()
90
+ # some asserts before we drop columns
91
+ verify_directory_sync(new_train, expected_dir=new_train_dir, unexpected_dir=new_test_dir)
92
+ verify_directory_sync(new_test, expected_dir=new_test_dir, unexpected_dir=new_train_dir)
93
+
94
+ # get image height and image width for the test set, since this is needed for the metric
95
+ for _, row in tqdm(
96
+ new_test.iterrows(), desc="Getting image dimensions for test set", total=len(new_test)
97
+ ):
98
+ case, day, day_slice = row["case"], row["day"], row["slice"]
99
+ image_paths = list(
100
+ (old_train_dir / case / f"{case}_{day}" / "scans").glob(f"slice_{day_slice}_*.png")
101
+ )
102
+ assert len(image_paths) == 1, f"Expected 1 image, found {len(image_paths)}"
103
+ image_path = image_paths[0]
104
+ width, height = (int(length) for length in image_path.stem.split("_")[2:4])
105
+ new_test.loc[row.name, "image_width"] = width
106
+ new_test.loc[row.name, "image_height"] = height
107
+
108
+ # dont need these anymore, and werent part of the original data
109
+ new_train.drop(columns=["set", "case", "day", "slice"], inplace=True)
110
+ new_test.drop(columns=["set", "case", "day", "slice"], inplace=True)
111
+
112
+ # create sample submission
113
+ sample_submission = new_test.copy()
114
+ sample_submission["segmentation"] = "1 1 5 2"
115
+ # these are just metadata for the private test set necessary for the metric
116
+ sample_submission.drop(columns=["image_height", "image_width"], inplace=True)
117
+ # rename 'segmentation' to 'predicted' to match kaggle.com
118
+ sample_submission.rename(columns={"segmentation": "predicted"}, inplace=True)
119
+ sample_submission.to_csv(public / "sample_submission.csv", index=False, na_rep="")
120
+
121
+ # create private files
122
+ # rename 'segmentation' to 'predicted' to match sample_submission format
123
+ new_test.rename(columns={"segmentation": "predicted"}, inplace=True)
124
+ new_test.to_csv(private / "test.csv", index=False, na_rep="")
125
+
126
+ # create public files
127
+ new_train.to_csv(public / "train.csv", index=False, na_rep="")
128
+ # including this because we are converting this from code to csv competition
129
+ # and we need to point the model to the ids it needs to produce labels for
130
+ new_test_without_labels = new_test.drop(columns=["predicted", "image_width", "image_height"])
131
+ new_test_without_labels.to_csv(public / "test.csv", index=False, na_rep="")
132
+
133
+ # ------------------------ checks
134
+
135
+ assert new_test_without_labels.shape[1] == 2, "Public test should have 2 columns."
136
+ assert new_train.shape[1] == 3, "Public train should have 3 columns."
137
+ assert len(new_train) + len(new_test) == len(
138
+ old_train
139
+ ), "Train and test should sum up to the original data."
@@ -0,0 +1,193 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+ from tqdm.auto import tqdm
7
+
8
+ from mlebench.utils import read_csv
9
+
10
+
11
+ def verify_directory_sync(df: pd.DataFrame, expected_dir: Path, unexpected_dir: Path):
12
+ """
13
+ Checks that the dataframe contents match the directory structure.
14
+ """
15
+ for _, row in tqdm(
16
+ df.iterrows(), desc=f"Verifying directory sync for {expected_dir.name}", total=len(df)
17
+ ):
18
+ case_day_path = expected_dir / row["case"] / f"{row['case']}_{row['day']}"
19
+ assert (
20
+ case_day_path.exists()
21
+ ), f"Directory {case_day_path} does not exist but is listed in the dataframe."
22
+ non_existent_path = unexpected_dir / row["case"] / f"{row['case']}_{row['day']}"
23
+ assert (
24
+ not non_existent_path.exists()
25
+ ), f"Directory {non_existent_path} exists but is not listed in the dataframe."
26
+
27
+
28
+ def _create_split(
29
+ input_df: pd.DataFrame,
30
+ raw_images_dir: Path,
31
+ output_public_path: Path,
32
+ output_private_path: Path,
33
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
34
+ """
35
+ Helper function to perform a train/test split on a dataframe, move image files accordingly,
36
+ and save the resulting CSVs and submission files.
37
+
38
+ Args:
39
+ input_df: The dataframe to be split.
40
+ raw_images_dir: The source directory of all raw image data.
41
+ output_public_path: The destination directory for public files (e.g., public/ or public_val/).
42
+ output_private_path: The destination directory for private files (e.g., private/ or private_val/).
43
+
44
+ Returns:
45
+ A tuple containing the created training and testing dataframes.
46
+ """
47
+ # ----------------------- Splitting
48
+ # Extract case and day from 'id'
49
+ df_to_split = input_df.copy()
50
+ df_to_split["case"] = df_to_split["id"].apply(lambda x: x.split("_")[0])
51
+ df_to_split["day"] = df_to_split["id"].apply(lambda x: x.split("_")[1])
52
+ df_to_split["slice"] = df_to_split["id"].apply(lambda x: x.split("_")[-1])
53
+
54
+ # Split cases into train and test
55
+ unique_cases = df_to_split["case"].unique()
56
+ train_cases, test_cases = train_test_split(unique_cases, test_size=0.1, random_state=42)
57
+
58
+ # Initially assign entire cases to train or test set
59
+ df_to_split["set"] = df_to_split["case"].apply(lambda x: "test" if x in test_cases else "train")
60
+
61
+ # Then mark some days from train to be test, to match competition test description
62
+ days_df = df_to_split[df_to_split["set"] == "train"].groupby("case")["day"].apply(set).reset_index()
63
+ for _, row in days_df.iterrows():
64
+ # if theres more than 4 days, we will move any days past the 4th to the test set
65
+ days = row["day"]
66
+ if len(days) > 4:
67
+ days = sorted(days, key=lambda x: int(x[len("day") :]))
68
+ days_to_move = days[4:]
69
+ # change their set to "test"
70
+ df_to_split.loc[
71
+ df_to_split["case"].eq(row["case"]) & df_to_split["day"].isin(days_to_move), "set"
72
+ ] = "test"
73
+
74
+ # ----------------------- Move the files to the correct new locations
75
+ new_train_dir = output_public_path / "train"
76
+ new_test_dir = output_public_path / "test"
77
+
78
+ # Create new directories if they don't exist
79
+ new_train_dir.mkdir(parents=True, exist_ok=True)
80
+ new_test_dir.mkdir(parents=True, exist_ok=True)
81
+ output_private_path.mkdir(parents=True, exist_ok=True)
82
+
83
+ # Move directories based on the set assignment
84
+ for case in tqdm(unique_cases, desc=f"Splitting by case for {output_public_path.name}"):
85
+ original_path = raw_images_dir / case
86
+ if case in train_cases:
87
+ new_path = new_train_dir / case
88
+ else:
89
+ new_path = new_test_dir / case
90
+ shutil.copytree(original_path, new_path, dirs_exist_ok=True)
91
+
92
+ # Move specific days from public/train/ to public/test/ for marked case-days
93
+ for _, row in tqdm(
94
+ df_to_split.iterrows(),
95
+ desc=f"Handling day-based splits for {output_public_path.name}",
96
+ total=len(df_to_split),
97
+ ):
98
+ if row["set"] == "test":
99
+ source_day_path = new_train_dir / row["case"] / f"{row['case']}_{row['day']}"
100
+ target_day_path = new_test_dir / row["case"] / f"{row['case']}_{row['day']}"
101
+ if source_day_path.exists():
102
+ target_day_path.parent.mkdir(parents=True, exist_ok=True)
103
+ shutil.move(source_day_path.as_posix(), target_day_path.as_posix())
104
+
105
+ # ------------------------ Saving splits
106
+ new_train = df_to_split[df_to_split["set"] == "train"].copy()
107
+ new_test = df_to_split[df_to_split["set"] == "test"].copy()
108
+ # some asserts before we drop columns
109
+ verify_directory_sync(new_train, expected_dir=new_train_dir, unexpected_dir=new_test_dir)
110
+ verify_directory_sync(new_test, expected_dir=new_test_dir, unexpected_dir=new_train_dir)
111
+
112
+ # get image height and image width for the test set, since this is needed for the metric
113
+ for _, row in tqdm(
114
+ new_test.iterrows(),
115
+ desc=f"Getting image dimensions for {output_public_path.name} test set",
116
+ total=len(new_test),
117
+ ):
118
+ case, day, day_slice = row["case"], row["day"], row["slice"]
119
+ image_paths = list(
120
+ (raw_images_dir / case / f"{case}_{day}" / "scans").glob(f"slice_{day_slice}_*.png")
121
+ )
122
+ assert len(image_paths) == 1, f"Expected 1 image, found {len(image_paths)}"
123
+ image_path = image_paths[0]
124
+ width, height = (int(length) for length in image_path.stem.split("_")[2:4])
125
+ new_test.loc[row.name, "image_width"] = width
126
+ new_test.loc[row.name, "image_height"] = height
127
+
128
+ # dont need these anymore, and werent part of the original data
129
+ new_train.drop(columns=["set", "case", "day", "slice"], inplace=True)
130
+ new_test.drop(columns=["set", "case", "day", "slice"], inplace=True)
131
+
132
+ # create sample submission
133
+ sample_submission = new_test.copy()
134
+ sample_submission["segmentation"] = "1 1 5 2"
135
+ sample_submission.drop(columns=["image_height", "image_width"], inplace=True)
136
+ sample_submission.rename(columns={"segmentation": "predicted"}, inplace=True)
137
+ sample_submission.to_csv(output_public_path / "sample_submission.csv", index=False, na_rep="")
138
+
139
+ # create private files
140
+ new_test.rename(columns={"segmentation": "predicted"}, inplace=True)
141
+ new_test.to_csv(output_private_path / "test.csv", index=False, na_rep="")
142
+
143
+ # create public files
144
+ new_train.to_csv(output_public_path / "train.csv", index=False, na_rep="")
145
+ new_test_without_labels = new_test.drop(columns=["predicted", "image_width", "image_height"])
146
+ new_test_without_labels.to_csv(output_public_path / "test.csv", index=False, na_rep="")
147
+
148
+ # ------------------------ checks
149
+ assert new_test_without_labels.shape[1] == 2, "Public test should have 2 columns."
150
+ assert new_train.shape[1] == 3, "Public train should have 3 columns."
151
+ assert len(new_train) + len(new_test) == len(
152
+ input_df
153
+ ), "Train and test should sum up to the original data."
154
+
155
+ return new_train, new_test
156
+
157
+
158
+ def prepare(raw: Path, public: Path, private: Path):
159
+ """
160
+ Prepares the raw data by creating two sets of splits:
161
+ 1. A main train/test split for the final competition (`public`/`private`).
162
+ 2. A validation split from the main training data (`public_val`/`private_val`).
163
+ """
164
+ initial_train_df = read_csv(raw / "train.csv")
165
+ raw_images_dir = raw / "train"
166
+
167
+ # --- 1. Create the original train/test split ---
168
+ # This generates the primary competition files in public/ and private/.
169
+ # The output of this step will remain identical to the original script.
170
+ print("--- Generating main train/test split for 'public' and 'private' directories ---")
171
+ main_train_df, _ = _create_split(
172
+ input_df=initial_train_df,
173
+ raw_images_dir=raw_images_dir,
174
+ output_public_path=public,
175
+ output_private_path=private,
176
+ )
177
+
178
+ # --- 2. Create the validation split from the main training set ---
179
+ # This takes the training data from the first split and splits it again,
180
+ # creating a smaller training set and a validation set.
181
+ # The outputs are saved in parallel directories to avoid conflicts.
182
+ print("\n--- Generating validation split for 'public_val' and 'private_val' directories ---")
183
+ public_val = public.parent / "public_val"
184
+ private_val = private.parent / "private_val"
185
+
186
+ _create_split(
187
+ input_df=main_train_df, # Use the training set from the first split as input
188
+ raw_images_dir=raw_images_dir, # Image sources are the same
189
+ output_public_path=public_val,
190
+ output_private_path=private_val,
191
+ )
192
+
193
+ print("\nData preparation complete.")
@@ -0,0 +1,52 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import mean_absolute_error
4
+
5
+ from mlebench.grade_helpers import InvalidSubmissionError
6
+
7
+
8
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
9
+ """Prepare submission and answers for MAE calculation."""
10
+ if len(submission) != len(answers):
11
+ raise InvalidSubmissionError(
12
+ f"Submission length ({len(submission)}) != answers length ({len(answers)})"
13
+ )
14
+
15
+ if "id" not in submission.columns:
16
+ raise InvalidSubmissionError("Submission must have an 'id' column")
17
+
18
+ if "pressure" not in submission.columns:
19
+ raise InvalidSubmissionError("Submission must have a 'pressure' column")
20
+
21
+ # Sort by id
22
+ submission = submission.sort_values("id").reset_index(drop=True)
23
+ answers = answers.sort_values("id").reset_index(drop=True)
24
+
25
+ # Check id alignment
26
+ if not (submission["id"] == answers["id"]).all():
27
+ raise InvalidSubmissionError("Submission and answers id mismatch")
28
+
29
+ # Extract predictions and true values
30
+ y_pred = submission["pressure"].values
31
+ y_true = answers["pressure"].values
32
+
33
+ # Validate predictions
34
+ try:
35
+ y_pred = y_pred.astype(float)
36
+ except (ValueError, TypeError):
37
+ raise InvalidSubmissionError("Predictions must be numeric")
38
+
39
+ if np.any(np.isnan(y_pred)):
40
+ raise InvalidSubmissionError("Predictions cannot contain NaN values")
41
+
42
+ return y_true, y_pred
43
+
44
+
45
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
46
+ """Calculate MAE score."""
47
+ y_true, y_pred = prepare_for_metric(submission, answers)
48
+
49
+ # MAE
50
+ mae = mean_absolute_error(y_true, y_pred)
51
+
52
+ return mae
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+ import pandas as pd
3
+
4
+
5
+ def prepare(raw: Path, public: Path, private: Path):
6
+ """
7
+ Prepare ventilator-pressure-prediction dataset.
8
+ """
9
+ # Read data
10
+ train = pd.read_csv(raw / "train.csv")
11
+ test = pd.read_csv(raw / "test.csv")
12
+ sample_submission = pd.read_csv(raw / "sample_submission.csv")
13
+ test_answer = pd.read_csv(raw / "test_answer.csv")
14
+
15
+ # Public files (visible to agents)
16
+ train.to_csv(public / "train.csv", index=False)
17
+ test.to_csv(public / "test.csv", index=False)
18
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
19
+
20
+ # Private files (for grading)
21
+ test_answer.to_csv(private / "test.csv", index=False)
22
+
23
+ # Validation checks
24
+ assert len(test_answer) == len(sample_submission), \
25
+ f"Test answer ({len(test_answer)}) and sample submission ({len(sample_submission)}) must have same length"
26
+ assert "id" in test_answer.columns, "Test answer must have 'id' column"
27
+ assert "pressure" in test_answer.columns, "Test answer must have 'pressure' column"
@@ -0,0 +1,142 @@
1
+ from pathlib import Path
2
+
3
+ import pandas as pd
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ from mlebench.utils import read_csv
7
+
8
+
9
+ def _save_split_data(
10
+ train_df: pd.DataFrame,
11
+ test_df: pd.DataFrame,
12
+ public_dir: Path,
13
+ private_dir: Path,
14
+ ):
15
+ """
16
+ Saves a train/test split to the specified public and private directories,
17
+ maintaining a consistent file structure and naming convention.
18
+
19
+ Args:
20
+ train_df (pd.DataFrame): The training dataframe.
21
+ test_df (pd.DataFrame): The testing dataframe with labels.
22
+ public_dir (Path): The directory to save public-facing files.
23
+ private_dir (Path): The directory to save private/ground-truth files.
24
+ """
25
+ # Ensure directories exist
26
+ public_dir.mkdir(exist_ok=True, parents=True)
27
+ private_dir.mkdir(exist_ok=True, parents=True)
28
+
29
+ # Make copies to avoid side-effects from ID resetting on the original dataframes
30
+ train_df = train_df.copy()
31
+ test_df = test_df.copy()
32
+
33
+ # Reset the 'id' column of train and test, starting at 1
34
+ train_df["id"] = range(1, len(train_df) + 1)
35
+ test_df["id"] = range(1, len(test_df) + 1)
36
+
37
+ assert set(train_df["breath_id"]).isdisjoint(
38
+ set(test_df["breath_id"])
39
+ ), "Test set contains breath_ids that are in the train set"
40
+
41
+ # Create public test
42
+ test_without_labels = test_df.drop(columns=["pressure"])
43
+
44
+ # Create sample submission
45
+ sample_submission = test_without_labels.copy()[["id"]]
46
+ sample_submission["pressure"] = 0
47
+
48
+ # Write CSVs with identical filenames for both original and validation splits
49
+ train_df.to_csv(public_dir / "train.csv", index=False, float_format="%.10g")
50
+ test_without_labels.to_csv(public_dir / "test.csv", index=False, float_format="%.10g")
51
+ sample_submission.to_csv(
52
+ public_dir / "sample_submission.csv", index=False, float_format="%.10g"
53
+ )
54
+ test_df.to_csv(private_dir / "test.csv", index=False, float_format="%.10g")
55
+
56
+ # Checks
57
+ assert (
58
+ sample_submission.shape[0] == test_without_labels.shape[0]
59
+ ), "Sample submission and new_test should have the same number of rows"
60
+ assert sample_submission.shape[1] == 2, "Sample submission should have 2 columns"
61
+ assert (
62
+ test_without_labels.shape[1] == 7
63
+ ), f"Expected 7 columns in test_without_labels, but got {test_without_labels.shape[1]}"
64
+ assert (
65
+ train_df.shape[1] == 8
66
+ ), f"Expected 8 columns in new_train, but got {train_df.shape[1]}"
67
+
68
+
69
+ def prepare(raw: Path, public: Path, private: Path):
70
+
71
+ # Create train, test from train split
72
+ dtypes = {
73
+ "id": "int32",
74
+ "breath_id": "int32",
75
+ "R": "int8",
76
+ "C": "int8",
77
+ "time_step": "float64",
78
+ "u_in": "float64",
79
+ "u_out": "int8",
80
+ "pressure": "float64",
81
+ }
82
+
83
+ old_train = read_csv(raw / "train.csv", dtype=dtypes)
84
+
85
+ # Group by 'breath_id' and maintain the groups as lists of indices
86
+ groups = [df.index.tolist() for _, df in old_train.groupby("breath_id")]
87
+
88
+ # Split the groups into train and test sets such that train and test sets
89
+ # do not contain the same 'breath_id's
90
+ train_groups, test_groups = train_test_split(groups, test_size=0.1, random_state=0)
91
+
92
+ # Flatten the list of indices to get indices for train and test sets
93
+ train_idx = [idx for sublist in train_groups for idx in sublist]
94
+ test_idx = [idx for sublist in test_groups for idx in sublist]
95
+
96
+ # Create train and test DataFrames using the indices
97
+ new_train = old_train.loc[train_idx]
98
+ new_test = old_train.loc[test_idx]
99
+
100
+ # --- Original Output Generation ---
101
+ # This part remains unchanged in its output. The original script's file
102
+ # creation logic is now encapsulated and called here to produce the
103
+ # final competition assets.
104
+ _save_split_data(new_train, new_test, public, private)
105
+
106
+ # Check that original total size is preserved
107
+ assert len(old_train) == len(new_train) + len(
108
+ new_test
109
+ ), "New train and test should sum up to the old train size"
110
+
111
+ # --- New Validation Set Generation ---
112
+
113
+ # Define paths for the new validation split
114
+ public_val = public.parent / "public_val"
115
+ private_val = private.parent / "private_val"
116
+
117
+ # To make the validation set size match the original test set size,
118
+ # we use the number of groups from the original test split as the test_size.
119
+ val_test_group_size = len(test_groups)
120
+
121
+ # Split the train_groups again to create a new, smaller training set and a validation set.
122
+ # We use the same random_state for reproducibility.
123
+ train_val_groups, test_val_groups = train_test_split(
124
+ train_groups, test_size=val_test_group_size, random_state=0
125
+ )
126
+
127
+ # Flatten the list of indices for the new split
128
+ train_val_idx = [idx for sublist in train_val_groups for idx in sublist]
129
+ test_val_idx = [idx for sublist in test_val_groups for idx in sublist]
130
+
131
+ # Create the new train_val and test_val DataFrames from the original data
132
+ train_val = old_train.loc[train_val_idx]
133
+ test_val = old_train.loc[test_val_idx]
134
+
135
+ # Save the new validation split using the same helper function to ensure
136
+ # identical file structure and naming in the new `_val` directories.
137
+ _save_split_data(train_val, test_val, public_val, private_val)
138
+
139
+ # Check that the validation split correctly partitioned the new_train set
140
+ assert len(new_train) == len(train_val) + len(
141
+ test_val
142
+ ), "train_val and test_val should sum up to the new_train size"
@@ -0,0 +1,52 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import mean_absolute_error
4
+
5
+ from mlebench.grade_helpers import InvalidSubmissionError
6
+
7
+
8
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
9
+ """Prepare submission and answers for MAE calculation."""
10
+ if len(submission) != len(answers):
11
+ raise InvalidSubmissionError(
12
+ f"Submission length ({len(submission)}) != answers length ({len(answers)})"
13
+ )
14
+
15
+ if "id" not in submission.columns:
16
+ raise InvalidSubmissionError("Submission must have an 'id' column")
17
+
18
+ if "pressure" not in submission.columns:
19
+ raise InvalidSubmissionError("Submission must have a 'pressure' column")
20
+
21
+ # Sort by id
22
+ submission = submission.sort_values("id").reset_index(drop=True)
23
+ answers = answers.sort_values("id").reset_index(drop=True)
24
+
25
+ # Check id alignment
26
+ if not (submission["id"] == answers["id"]).all():
27
+ raise InvalidSubmissionError("Submission and answers id mismatch")
28
+
29
+ # Extract predictions and true values
30
+ y_pred = submission["pressure"].values
31
+ y_true = answers["pressure"].values
32
+
33
+ # Validate predictions
34
+ try:
35
+ y_pred = y_pred.astype(float)
36
+ except (ValueError, TypeError):
37
+ raise InvalidSubmissionError("Predictions must be numeric")
38
+
39
+ if np.any(np.isnan(y_pred)):
40
+ raise InvalidSubmissionError("Predictions cannot contain NaN values")
41
+
42
+ return y_true, y_pred
43
+
44
+
45
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
46
+ """Calculate MAE score."""
47
+ y_true, y_pred = prepare_for_metric(submission, answers)
48
+
49
+ # MAE
50
+ mae = mean_absolute_error(y_true, y_pred)
51
+
52
+ return mae
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+ import pandas as pd
3
+
4
+
5
+ def prepare(raw: Path, public: Path, private: Path):
6
+ """
7
+ Prepare ventilator-pressure-prediction dataset.
8
+ """
9
+ # Read data
10
+ train = pd.read_csv(raw / "train.csv")
11
+ test = pd.read_csv(raw / "test.csv")
12
+ sample_submission = pd.read_csv(raw / "sample_submission.csv")
13
+ test_answer = pd.read_csv(raw / "test_answer.csv")
14
+
15
+ # Public files (visible to agents)
16
+ train.to_csv(public / "train.csv", index=False)
17
+ test.to_csv(public / "test.csv", index=False)
18
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
19
+
20
+ # Private files (for grading)
21
+ test_answer.to_csv(private / "test.csv", index=False)
22
+
23
+ # Validation checks
24
+ assert len(test_answer) == len(sample_submission), \
25
+ f"Test answer ({len(test_answer)}) and sample submission ({len(sample_submission)}) must have same length"
26
+ assert "id" in test_answer.columns, "Test answer must have 'id' column"
27
+ assert "pressure" in test_answer.columns, "Test answer must have 'pressure' column"