dslighting 1.7.1__py3-none-any.whl → 1.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/METADATA +3 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,130 @@
1
+ import shutil
2
+ from pathlib import Path
3
+ from typing import Dict
4
+
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from tqdm.auto import tqdm
8
+
9
+ from mlebench.utils import extract, read_csv
10
+
11
+
12
+ def _process_and_save_split(
13
+ source_df: pd.DataFrame,
14
+ public_path: Path,
15
+ private_path: Path,
16
+ raw_images_path: Path,
17
+ test_size: float,
18
+ random_state: int,
19
+ ) -> pd.DataFrame:
20
+ """
21
+ Helper function to perform a data split, save CSVs, and copy image files.
22
+
23
+ Args:
24
+ source_df: The DataFrame to split.
25
+ public_path: The public output directory.
26
+ private_path: The private output directory.
27
+ raw_images_path: The path to the source images.
28
+ test_size: The proportion of the dataset to allocate to the test split.
29
+ random_state: The random state for reproducibility.
30
+
31
+ Returns:
32
+ The training portion of the split DataFrame.
33
+ """
34
+ # Create output directories
35
+ public_path.mkdir(exist_ok=True, parents=True)
36
+ private_path.mkdir(exist_ok=True, parents=True)
37
+
38
+ # Create train, test from the source dataframe
39
+ new_train, answers = train_test_split(
40
+ source_df, test_size=test_size, random_state=random_state
41
+ )
42
+
43
+ # Create a sample submission file
44
+ submission_df = answers.copy()
45
+ submission_df["labels"] = "healthy"
46
+
47
+ # Checks
48
+ assert len(answers) == len(submission_df), "Answers and submission should have the same length"
49
+ assert not set(new_train["image"]).intersection(
50
+ set(answers["image"])
51
+ ), "new_train and answers should not share any image"
52
+ assert (
53
+ "image" in new_train.columns and "labels" in new_train.columns
54
+ ), "Train DataFrame must have 'image' and 'labels' columns"
55
+ assert (
56
+ "image" in submission_df.columns and "labels" in submission_df.columns
57
+ ), "Sample submission DataFrame must have 'image' and 'labels' columns"
58
+ assert len(new_train) + len(answers) == len(
59
+ source_df
60
+ ), "The combined length of new_train and answers should equal the length of the source dataframe"
61
+
62
+ # Write CSVs using the required standard filenames
63
+ answers.to_csv(private_path / "answers.csv", index=False)
64
+ new_train.to_csv(public_path / "train.csv", index=False)
65
+ submission_df.to_csv(public_path / "sample_submission.csv", index=False)
66
+
67
+ # Copy files
68
+ (public_path / "test_images").mkdir(exist_ok=True)
69
+ (public_path / "train_images").mkdir(exist_ok=True)
70
+
71
+ for file_id in tqdm(new_train["image"], desc=f"Copying Train Images to {public_path.name}"):
72
+ shutil.copyfile(
73
+ src=raw_images_path / f"{file_id}",
74
+ dst=public_path / "train_images" / f"{file_id}",
75
+ )
76
+
77
+ for file_id in tqdm(answers["image"], desc=f"Copying Test Images to {public_path.name}"):
78
+ shutil.copyfile(
79
+ src=raw_images_path / f"{file_id}",
80
+ dst=public_path / "test_images" / f"{file_id}",
81
+ )
82
+
83
+ # Checks
84
+ assert len(list(public_path.glob("train_images/*.jpg"))) == len(
85
+ new_train
86
+ ), f"Public train images in {public_path.name} should have the same number of images as the train DataFrame"
87
+ assert len(list(public_path.glob("test_images/*.jpg"))) == len(
88
+ answers
89
+ ), f"Public test images in {public_path.name} should have the same number of images as the answers DataFrame"
90
+
91
+ return new_train
92
+
93
+
94
+ def prepare(raw: Path, public: Path, private: Path):
95
+ """
96
+ Splits the data in raw into public/private datasets for the main competition,
97
+ and creates a parallel validation split in public_val/private_val directories.
98
+ """
99
+ # Define paths for the new validation split
100
+ public_val = public.parent / "public_val"
101
+ private_val = private.parent / "private_val"
102
+
103
+ old_train = read_csv(raw / "train.csv")
104
+ raw_images_path = raw / "train_images"
105
+
106
+ # 1. Create the original train/test split for the competition.
107
+ # This creates the `public` and `private` directories. The outputs here
108
+ # will be identical to the original script's outputs.
109
+ # The returned `competition_train_df` is the 80% training set from this first split.
110
+ competition_train_df = _process_and_save_split(
111
+ source_df=old_train,
112
+ public_path=public,
113
+ private_path=private,
114
+ raw_images_path=raw_images_path,
115
+ test_size=0.2,
116
+ random_state=0,
117
+ )
118
+
119
+ # 2. Create the new validation split from the competition's training data.
120
+ # We split the `competition_train_df` (80% of original data) again.
121
+ # To get a validation set of the same size as the original test set (20% of total),
122
+ # we take 25% from this new pool of data (0.25 * 0.8 = 0.2).
123
+ _process_and_save_split(
124
+ source_df=competition_train_df,
125
+ public_path=public_val,
126
+ private_path=private_val,
127
+ raw_images_path=raw_images_path,
128
+ test_size=0.25,
129
+ random_state=0,
130
+ )
@@ -0,0 +1,39 @@
1
+ import pandas as pd
2
+ from sklearn.metrics import f1_score
3
+
4
+ from mlebench.grade_helpers import InvalidSubmissionError
5
+
6
+
7
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
8
+ """
9
+ `submission` and `answers` are pd.DataFrame with "file" and "species" columns
10
+ """
11
+
12
+ # Answer checks
13
+ assert "file" in answers.columns, "Answers must have 'file' column"
14
+ assert "species" in answers.columns, "Answers must have 'species' column"
15
+
16
+ # Submission checks
17
+ if "file" not in submission.columns:
18
+ raise InvalidSubmissionError("Submission must have 'file' column")
19
+ if "species" not in submission.columns:
20
+ raise InvalidSubmissionError("Submission must have 'species' column")
21
+ if len(submission) != len(answers):
22
+ raise InvalidSubmissionError(
23
+ f"Submission length {len(submission)} != answers length {len(answers)}"
24
+ )
25
+
26
+ # Sort submission and answers by "file"
27
+ submission = submission.sort_values("file")
28
+ answers = answers.sort_values("file")
29
+ if (submission["file"].values != answers["file"].values).any():
30
+ raise InvalidSubmissionError("Submission and answers have mismatched 'file' columns")
31
+
32
+ y_true = answers["species"].tolist()
33
+ y_pred = submission["species"].tolist()
34
+ return y_true, y_pred
35
+
36
+
37
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
38
+ y_true, y_pred = prepare_for_metric(submission, answers)
39
+ return f1_score(y_true=y_true, y_pred=y_pred, average="micro")
@@ -0,0 +1,91 @@
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from tqdm import tqdm
8
+
9
+ from mlebench.utils import extract, read_csv
10
+
11
+
12
+ def create_dataframe_from_directory(directory: str) -> pd.DataFrame:
13
+ """
14
+ Creates a DataFrame from a directory of images.
15
+
16
+ Args:
17
+ directory (str): The path to the directory containing subdirectories of images.
18
+
19
+ Returns:
20
+ pd.DataFrame: A DataFrame with two columns: 'image' and 'label'. The 'image' column contains the file paths to the images, and the 'label' column contains the corresponding labels (subdirectory names).
21
+ """
22
+ data = []
23
+ for label in sorted(os.listdir(directory)): # Sort labels for determinism
24
+ label_path = os.path.join(directory, label)
25
+ if os.path.isdir(label_path):
26
+ for file_name in sorted(os.listdir(label_path)): # Sort files for determinism
27
+ if file_name.endswith(".png"):
28
+ file_path = os.path.join(label_path, file_name)
29
+ data.append({"file": os.path.basename(file_path), "species": label})
30
+ return pd.DataFrame(data)
31
+
32
+
33
+ def prepare(raw: Path, public: Path, private: Path):
34
+ """
35
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
36
+ """
37
+
38
+ # Directory containing the training images
39
+ train_dir = raw / "train"
40
+ old_train = create_dataframe_from_directory(train_dir)
41
+ test_ratio = 0.14 # 794/(4750+794) = 0.14
42
+ train_df, test_df = train_test_split(old_train, test_size=test_ratio, random_state=0)
43
+
44
+ # Create a sample submission file
45
+ submission_df = test_df.copy()
46
+ submission_df["species"] = "Sugar beet"
47
+
48
+ # Checks
49
+ assert len(test_df) == len(submission_df), "Answers and submission should have the same length"
50
+ assert not set(train_df["file"]).intersection(
51
+ set(test_df["file"])
52
+ ), "new_train and answers should not share any image"
53
+ assert (
54
+ "file" in train_df.columns and "species" in train_df.columns
55
+ ), "Train DataFrame must have 'file' and 'species' columns"
56
+ assert (
57
+ "file" in submission_df.columns and "species" in submission_df.columns
58
+ ), "Sample submission DataFrame must have 'file' and 'species' columns"
59
+ assert len(train_df) + len(test_df) == len(
60
+ old_train
61
+ ), "The combined length of new_train and answers should equal the length of old_train"
62
+
63
+ # Write CSVs
64
+ test_df.to_csv(private / "answers.csv", index=False)
65
+ submission_df.to_csv(public / "sample_submission.csv", index=False)
66
+
67
+ # Copy files
68
+ (public / "test").mkdir(exist_ok=True)
69
+ (public / "train").mkdir(exist_ok=True)
70
+
71
+ # Create nested folder structure for train
72
+ for species in train_df["species"].unique():
73
+ (public / "train" / species).mkdir(parents=True, exist_ok=True)
74
+
75
+ for _, row in tqdm(train_df.iterrows(), desc="Copying Train Images", total=len(train_df)):
76
+ src_path = train_dir / row["species"] / row["file"]
77
+ dst_path = public / "train" / row["species"] / row["file"]
78
+ shutil.copyfile(src=src_path, dst=dst_path)
79
+
80
+ for _, row in tqdm(test_df.iterrows(), desc="Copying Test Images", total=len(test_df)):
81
+ src_path = train_dir / row["species"] / row["file"]
82
+ dst_path = public / "test" / row["file"]
83
+ shutil.copyfile(src=src_path, dst=dst_path)
84
+
85
+ # Checks
86
+ assert len(list(public.glob("train/**/*.png"))) == len(
87
+ train_df
88
+ ), f"Public train images should have the same number of images as the train DataFrame: number of files {len(list(public.glob('train/**/*.png')))} != len(train_df)={len(train_df)}"
89
+ assert len(list(public.glob("test/*.png"))) == len(
90
+ test_df
91
+ ), f"Public test images should have the same number of images as the answers DataFrame: number of files {len(list(public.glob('test/*.png')))} != len(test_df)={len(test_df)}"
@@ -0,0 +1,158 @@
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from tqdm import tqdm
8
+
9
+ from mlebench.utils import extract, read_csv
10
+
11
+
12
+ def create_dataframe_from_directory(directory: str) -> pd.DataFrame:
13
+ """
14
+ Creates a DataFrame from a directory of images.
15
+
16
+ Args:
17
+ directory (str): The path to the directory containing subdirectories of images.
18
+
19
+ Returns:
20
+ pd.DataFrame: A DataFrame with two columns: 'image' and 'label'. The 'image' column contains the file paths to the images, and the 'label' column contains the corresponding labels (subdirectory names).
21
+ """
22
+ data = []
23
+ for label in sorted(os.listdir(directory)): # Sort labels for determinism
24
+ label_path = os.path.join(directory, label)
25
+ if os.path.isdir(label_path):
26
+ for file_name in sorted(os.listdir(label_path)): # Sort files for determinism
27
+ if file_name.endswith(".png"):
28
+ file_path = os.path.join(label_path, file_name)
29
+ data.append({"file": os.path.basename(file_path), "species": label})
30
+ return pd.DataFrame(data)
31
+
32
+
33
+ def _process_split(
34
+ train_df: pd.DataFrame,
35
+ test_df: pd.DataFrame,
36
+ public_path: Path,
37
+ private_path: Path,
38
+ source_images_dir: Path,
39
+ ):
40
+ """
41
+ Helper function to process a train/test split and write all necessary files and folders.
42
+ This function creates the public and private directories, generates CSVs, and copies image files.
43
+ """
44
+ # Ensure destination directories exist
45
+ public_path.mkdir(exist_ok=True)
46
+ private_path.mkdir(exist_ok=True)
47
+
48
+ # Create a sample submission file
49
+ submission_df = test_df.copy()
50
+ submission_df["species"] = "Sugar beet"
51
+
52
+ # Checks
53
+ assert len(test_df) == len(submission_df), "Answers and submission should have the same length"
54
+ assert not set(train_df["file"]).intersection(
55
+ set(test_df["file"])
56
+ ), "new_train and answers should not share any image"
57
+ assert (
58
+ "file" in train_df.columns and "species" in train_df.columns
59
+ ), "Train DataFrame must have 'file' and 'species' columns"
60
+ assert (
61
+ "file" in submission_df.columns and "species" in submission_df.columns
62
+ ), "Sample submission DataFrame must have 'file' and 'species' columns"
63
+
64
+ # Write CSVs
65
+ test_df.to_csv(private_path / "answers.csv", index=False)
66
+ submission_df.to_csv(public_path / "sample_submission.csv", index=False)
67
+
68
+ # Prepare image directories
69
+ public_test_images_path = public_path / "test"
70
+ public_train_images_path = public_path / "train"
71
+ public_test_images_path.mkdir(exist_ok=True)
72
+ public_train_images_path.mkdir(exist_ok=True)
73
+
74
+ # Create nested folder structure for train
75
+ for species in train_df["species"].unique():
76
+ (public_train_images_path / species).mkdir(parents=True, exist_ok=True)
77
+
78
+ # Use public path name for progress bar description
79
+ desc_prefix = public_path.name.capitalize()
80
+
81
+ for _, row in tqdm(
82
+ train_df.iterrows(), desc=f"Copying {desc_prefix} Train Images", total=len(train_df)
83
+ ):
84
+ src_path = source_images_dir / row["species"] / row["file"]
85
+ dst_path = public_train_images_path / row["species"] / row["file"]
86
+ shutil.copyfile(src=src_path, dst=dst_path)
87
+
88
+ for _, row in tqdm(
89
+ test_df.iterrows(), desc=f"Copying {desc_prefix} Test Images", total=len(test_df)
90
+ ):
91
+ src_path = source_images_dir / row["species"] / row["file"]
92
+ dst_path = public_test_images_path / row["file"]
93
+ shutil.copyfile(src=src_path, dst=dst_path)
94
+
95
+ # Final checks on copied files
96
+ assert len(list(public_train_images_path.glob("**/*.png"))) == len(
97
+ train_df
98
+ ), f"Public train images should have the same number of images as the train DataFrame: number of files {len(list(public_train_images_path.glob('**/*.png')))} != len(train_df)={len(train_df)}"
99
+ assert len(list(public_test_images_path.glob("*.png"))) == len(
100
+ test_df
101
+ ), f"Public test images should have the same number of images as the answers DataFrame: number of files {len(list(public_test_images_path.glob('*.png')))} != len(test_df)={len(test_df)}"
102
+
103
+
104
+ def prepare(raw: Path, public: Path, private: Path):
105
+ """
106
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
107
+ It then creates a secondary validation split (public_val, private_val) from the main training data.
108
+ """
109
+ # Directory containing the original images
110
+ train_dir = raw / "train"
111
+ old_train = create_dataframe_from_directory(train_dir)
112
+ test_ratio = 0.14 # 794/(4750+794) = 0.14
113
+
114
+ # --- 1. Original Split: Create main train and test sets ---
115
+ train_df, test_df = train_test_split(old_train, test_size=test_ratio, random_state=0)
116
+
117
+ # Check for original split integrity
118
+ assert len(train_df) + len(test_df) == len(
119
+ old_train
120
+ ), "The combined length of new_train and answers should equal the length of old_train"
121
+
122
+ # Process and save the original split to public/ and private/
123
+ # This generates the original, unmodified competition output.
124
+ _process_split(
125
+ train_df=train_df,
126
+ test_df=test_df,
127
+ public_path=public,
128
+ private_path=private,
129
+ source_images_dir=train_dir,
130
+ )
131
+
132
+ # --- 2. New Validation Split: Split the main train_df again ---
133
+ # Define paths for the new validation dataset, parallel to the original ones.
134
+ public_val = public.parent / "public_val"
135
+ private_val = private.parent / "private_val"
136
+
137
+ # Calculate the new test ratio to make the validation set (test_val_df)
138
+ # approximately the same size as the original test set (test_df).
139
+ new_test_ratio = len(test_df) / len(train_df)
140
+
141
+ # Split the training data to create a new, smaller training set and a validation set.
142
+ # We use the same random_state for reproducibility and consistency with the first split.
143
+ train_val_df, test_val_df = train_test_split(train_df, test_size=new_test_ratio, random_state=0)
144
+
145
+ # Check for new split integrity
146
+ assert (
147
+ len(train_val_df) + len(test_val_df) == len(train_df)
148
+ ), "The combined length of train_val_df and test_val_df should equal the length of train_df"
149
+
150
+ # Process and save the new validation split to public_val/ and private_val/
151
+ # Using the same helper ensures an identical file and directory structure.
152
+ _process_split(
153
+ train_df=train_val_df,
154
+ test_df=test_val_df,
155
+ public_path=public_val,
156
+ private_path=private_val,
157
+ source_images_dir=train_dir,
158
+ )
@@ -0,0 +1,52 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import mean_squared_error
4
+
5
+ from mlebench.grade_helpers import InvalidSubmissionError
6
+
7
+
8
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
9
+ """Prepare submission and answers for RMSE calculation."""
10
+ if len(submission) != len(answers):
11
+ raise InvalidSubmissionError(
12
+ f"Submission length ({len(submission)}) != answers length ({len(answers)})"
13
+ )
14
+
15
+ if "id" not in submission.columns:
16
+ raise InvalidSubmissionError("Submission must have an 'id' column")
17
+
18
+ if "MedHouseVal" not in submission.columns:
19
+ raise InvalidSubmissionError("Submission must have a 'MedHouseVal' column")
20
+
21
+ # Sort by id
22
+ submission = submission.sort_values("id").reset_index(drop=True)
23
+ answers = answers.sort_values("id").reset_index(drop=True)
24
+
25
+ # Check id alignment
26
+ if not (submission["id"] == answers["id"]).all():
27
+ raise InvalidSubmissionError("Submission and answers id mismatch")
28
+
29
+ # Extract predictions and true values
30
+ y_pred = submission["MedHouseVal"].values
31
+ y_true = answers["MedHouseVal"].values
32
+
33
+ # Validate predictions
34
+ try:
35
+ y_pred = y_pred.astype(float)
36
+ except (ValueError, TypeError):
37
+ raise InvalidSubmissionError("Predictions must be numeric")
38
+
39
+ if np.any(np.isnan(y_pred)):
40
+ raise InvalidSubmissionError("Predictions cannot contain NaN values")
41
+
42
+ return y_true, y_pred
43
+
44
+
45
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
46
+ """Calculate RMSE score."""
47
+ y_true, y_pred = prepare_for_metric(submission, answers)
48
+
49
+ # RMSE
50
+ rmse = np.sqrt(mean_squared_error(y_true, y_pred))
51
+
52
+ return rmse
@@ -0,0 +1,25 @@
1
+ from pathlib import Path
2
+ import pandas as pd
3
+
4
+
5
+ def prepare(raw: Path, public: Path, private: Path):
6
+ """
7
+ Prepare playground-series-s3e1 dataset.
8
+ """
9
+ # Read data
10
+ train = pd.read_csv(raw / "train.csv")
11
+ test = pd.read_csv(raw / "test.csv")
12
+ sample_submission = pd.read_csv(raw / "sample_submission.csv")
13
+ test_answer = pd.read_csv(raw / "test_answer.csv")
14
+
15
+ # Public files (visible to agents)
16
+ train.to_csv(public / "train.csv", index=False)
17
+ test.to_csv(public / "test.csv", index=False)
18
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
19
+
20
+ # Private files (for grading)
21
+ test_answer.to_csv(private / "test.csv", index=False)
22
+
23
+ # Validation checks
24
+ assert len(test_answer) == len(sample_submission), \
25
+ f"Test answer ({len(test_answer)}) and sample submission ({len(sample_submission)}) must have same length"
@@ -0,0 +1,55 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import mean_squared_log_error
4
+
5
+ from mlebench.grade_helpers import InvalidSubmissionError
6
+
7
+
8
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
9
+ """Prepare submission and answers for RMSLE calculation."""
10
+ if len(submission) != len(answers):
11
+ raise InvalidSubmissionError(
12
+ f"Submission length ({len(submission)}) != answers length ({len(answers)})"
13
+ )
14
+
15
+ if "id" not in submission.columns:
16
+ raise InvalidSubmissionError("Submission must have an 'id' column")
17
+
18
+ if "cost" not in submission.columns:
19
+ raise InvalidSubmissionError("Submission must have a 'cost' column")
20
+
21
+ # Sort by id
22
+ submission = submission.sort_values("id").reset_index(drop=True)
23
+ answers = answers.sort_values("id").reset_index(drop=True)
24
+
25
+ # Check id alignment
26
+ if not (submission["id"] == answers["id"]).all():
27
+ raise InvalidSubmissionError("Submission and answers id mismatch")
28
+
29
+ # Extract predictions and true values
30
+ y_pred = submission["cost"].values
31
+ y_true = answers["cost"].values
32
+
33
+ # Validate predictions
34
+ try:
35
+ y_pred = y_pred.astype(float)
36
+ except (ValueError, TypeError):
37
+ raise InvalidSubmissionError("Predictions must be numeric")
38
+
39
+ if np.any(np.isnan(y_pred)):
40
+ raise InvalidSubmissionError("Predictions cannot contain NaN values")
41
+
42
+ if np.any(y_pred < 0):
43
+ raise InvalidSubmissionError("Predictions cannot be negative")
44
+
45
+ return y_true, y_pred
46
+
47
+
48
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
49
+ """Calculate RMSLE score."""
50
+ y_true, y_pred = prepare_for_metric(submission, answers)
51
+
52
+ # RMSLE
53
+ rmsle = np.sqrt(mean_squared_log_error(y_true, y_pred))
54
+
55
+ return rmsle
@@ -0,0 +1,25 @@
1
+ from pathlib import Path
2
+ import pandas as pd
3
+
4
+
5
+ def prepare(raw: Path, public: Path, private: Path):
6
+ """
7
+ Prepare playground-series-s3e11 dataset.
8
+ """
9
+ # Read data
10
+ train = pd.read_csv(raw / "train.csv")
11
+ test = pd.read_csv(raw / "test.csv")
12
+ sample_submission = pd.read_csv(raw / "sample_submission.csv")
13
+ test_answer = pd.read_csv(raw / "test_answer.csv")
14
+
15
+ # Public files (visible to agents)
16
+ train.to_csv(public / "train.csv", index=False)
17
+ test.to_csv(public / "test.csv", index=False)
18
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
19
+
20
+ # Private files (for grading)
21
+ test_answer.to_csv(private / "test.csv", index=False)
22
+
23
+ # Validation checks
24
+ assert len(test_answer) == len(sample_submission), \
25
+ f"Test answer ({len(test_answer)}) and sample submission ({len(sample_submission)}) must have same length"
@@ -0,0 +1,39 @@
1
+ import pandas as pd
2
+ from sklearn.metrics import roc_auc_score
3
+
4
+ from mlebench.grade_helpers import InvalidSubmissionError
5
+
6
+
7
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame) -> dict:
8
+
9
+ if "id" not in submission.columns or "id" not in answers.columns:
10
+ raise InvalidSubmissionError(
11
+ "Both submission and answers DataFrames must contain an 'id' column."
12
+ )
13
+
14
+ if "EC1" not in submission.columns or "EC2" not in submission.columns:
15
+ raise InvalidSubmissionError("Submission DataFrame must contain 'EC1' and 'EC2' columns.")
16
+
17
+ assert (
18
+ "EC1" in answers.columns and "EC2" in answers.columns
19
+ ), "Answers DataFrame must contain 'EC1' and 'EC2' columns."
20
+
21
+ if len(submission) != len(answers):
22
+ raise InvalidSubmissionError("Submission must be the same length as the answers.")
23
+
24
+ answers_sorted = answers.sort_values("id")
25
+ submission_sorted = submission.sort_values("id")
26
+
27
+ if (submission_sorted["id"].values != answers_sorted["id"].values).any():
28
+ raise InvalidSubmissionError("Submission and answers have mismatched 'id' columns")
29
+
30
+ y_true = answers_sorted[["EC1", "EC2"]].to_numpy()
31
+ y_score = submission_sorted[["EC1", "EC2"]].to_numpy()
32
+
33
+ return {"y_true": y_true, "y_score": y_score}
34
+
35
+
36
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
37
+ roc_auc_inputs = prepare_for_metric(submission, answers)
38
+ # macro average: calc metric for each class, and then average, as outlined in comp description
39
+ return roc_auc_score(**roc_auc_inputs, average="macro")
@@ -0,0 +1,36 @@
1
+ from pathlib import Path
2
+
3
+ from sklearn.model_selection import train_test_split
4
+
5
+ from mlebench.utils import read_csv
6
+
7
+
8
+ def prepare(raw: Path, public: Path, private: Path):
9
+
10
+ # Create train and test splits from train set
11
+ old_train = read_csv(raw / "train.csv")
12
+ new_train, new_test = train_test_split(old_train, test_size=0.1, random_state=0)
13
+
14
+ # Create sample submission
15
+ sample_submission = new_test.copy()
16
+ sample_submission["EC1"] = 0.5
17
+ sample_submission["EC2"] = 0.5
18
+ sample_submission.drop(
19
+ sample_submission.columns.difference(["id", "EC1", "EC2"]), axis=1, inplace=True
20
+ )
21
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
22
+
23
+ # Create private files
24
+ new_test.to_csv(private / "test.csv", index=False)
25
+
26
+ # Create public files visible to agents
27
+ new_train.to_csv(public / "train.csv", index=False)
28
+ new_test.drop(["EC1", "EC2", "EC3", "EC4", "EC5", "EC6"], axis=1, inplace=True)
29
+ new_test.to_csv(public / "test.csv", index=False)
30
+
31
+ # Checks
32
+ assert new_test.shape[1] == 32, "Public test set should have 32 columns"
33
+ assert new_train.shape[1] == 38, "Public train set should have 38 columns"
34
+ assert len(new_train) + len(new_test) == len(
35
+ old_train
36
+ ), "Length of new_train and new_test should equal length of old_train"