dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,113 @@
1
+ import csv
2
+ import zipfile
3
+ from pathlib import Path
4
+
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import compress, extract, read_csv
8
+
9
+
10
+ def prepare(raw: Path, public: Path, private: Path):
11
+
12
+ # Extract
13
+ extract(raw / "ru_test_2.csv.zip", raw) # We only use the 2nd stage test set
14
+ extract(raw / "ru_train.csv.zip", raw)
15
+ extract(raw / "ru_sample_submission_2.csv.zip", raw)
16
+
17
+ # Create train and test splits from train set
18
+ old_train = read_csv(raw / "ru_train.csv")
19
+ # We split so that we don't share any sentence_ids between train and test
20
+ # This gives us len(new_train) = 9515325 and len(answers) = 1059191
21
+ unique_sentence_ids = old_train["sentence_id"].unique()
22
+ train_sentence_ids, test_sentence_ids = train_test_split(
23
+ unique_sentence_ids, test_size=0.1, random_state=0
24
+ )
25
+ new_train = old_train[old_train["sentence_id"].isin(train_sentence_ids)]
26
+ answers = old_train[old_train["sentence_id"].isin(test_sentence_ids)]
27
+ assert set(new_train["sentence_id"]).isdisjoint(
28
+ set(answers["sentence_id"])
29
+ ), f"sentence_id is not disjoint between train and test sets"
30
+
31
+ # "sentence_id" counts need to be reset for new_train and answers
32
+ new_train_id_mapping = {
33
+ old_id: new_id for new_id, old_id in enumerate(new_train["sentence_id"].unique())
34
+ }
35
+ new_train["sentence_id"] = new_train["sentence_id"].map(new_train_id_mapping)
36
+ answers_id_mapping = {
37
+ old_id: new_id for new_id, old_id in enumerate(answers["sentence_id"].unique())
38
+ }
39
+ answers["sentence_id"] = answers["sentence_id"].map(answers_id_mapping)
40
+
41
+ # Create new test set
42
+ new_test = answers.drop(["after", "class"], axis=1).copy()
43
+
44
+ # Reformat answers to match sample submission format
45
+ answers = answers[["sentence_id", "token_id", "after"]].copy()
46
+ answers["id"] = answers["sentence_id"].astype(str) + "_" + answers["token_id"].astype(str)
47
+ answers = answers[["id", "after"]]
48
+
49
+ # Create sample submission
50
+ sample_submission = new_test[["sentence_id", "token_id", "before"]].copy()
51
+ sample_submission["id"] = (
52
+ sample_submission["sentence_id"].astype(str)
53
+ + "_"
54
+ + sample_submission["token_id"].astype(str)
55
+ )
56
+ sample_submission["after"] = sample_submission["before"]
57
+ sample_submission = sample_submission[["id", "after"]]
58
+
59
+ # Checks
60
+ assert new_train.columns.tolist() == [
61
+ "sentence_id",
62
+ "token_id",
63
+ "class",
64
+ "before",
65
+ "after",
66
+ ], f"new_train.columns.tolist() == {new_train.columns.tolist()}"
67
+ assert new_test.columns.tolist() == [
68
+ "sentence_id",
69
+ "token_id",
70
+ "before",
71
+ ], f"new_test.columns.tolist() == {new_test.columns.tolist()}"
72
+ assert sample_submission.columns.tolist() == [
73
+ "id",
74
+ "after",
75
+ ], f"sample_submission.columns.tolist() == {sample_submission.columns.tolist()}"
76
+ assert answers.columns.tolist() == [
77
+ "id",
78
+ "after",
79
+ ], f"answers.columns.tolist() == {answers.columns.tolist()}"
80
+ assert len(new_test) + len(new_train) == len(
81
+ old_train
82
+ ), f"New train and test sets do not sum to old train set, got {len(new_test) + len(new_train)} and {len(old_train)}"
83
+
84
+ # Write CSVs
85
+ answers.to_csv(
86
+ private / "answers.csv", index=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC
87
+ )
88
+ sample_submission.to_csv(
89
+ private / "sample_submission.csv", index=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC
90
+ )
91
+ new_train.to_csv(
92
+ public / "ru_train.csv", index=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC
93
+ )
94
+ new_test.to_csv(
95
+ public / "ru_test_2.csv", index=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC
96
+ )
97
+ sample_submission.to_csv(
98
+ public / "ru_sample_submission_2.csv",
99
+ index=False,
100
+ quotechar='"',
101
+ quoting=csv.QUOTE_NONNUMERIC,
102
+ )
103
+
104
+ # Zip up
105
+ with zipfile.ZipFile(public / "ru_train.csv.zip", "w") as zipf:
106
+ zipf.write(public / "ru_train.csv", arcname="ru_train.csv")
107
+ with zipfile.ZipFile(public / "ru_test_2.csv.zip", "w") as zipf:
108
+ zipf.write(public / "ru_test_2.csv", arcname="ru_test_2.csv")
109
+ with zipfile.ZipFile(public / "ru_sample_submission_2.csv.zip", "w") as zipf:
110
+ zipf.write(public / "ru_sample_submission_2.csv", arcname="ru_sample_submission_2.csv")
111
+ (public / "ru_train.csv").unlink()
112
+ (public / "ru_test_2.csv").unlink()
113
+ (public / "ru_sample_submission_2.csv").unlink()
@@ -0,0 +1,165 @@
1
+ import csv
2
+ import zipfile
3
+ from pathlib import Path
4
+
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import compress, extract, read_csv
8
+
9
+
10
+ def _process_split(input_df, public_path, private_path, test_size, random_state):
11
+ """
12
+ Splits an input DataFrame into train and test sets, processes them,
13
+ and writes the final files to the specified public and private paths.
14
+
15
+ This function encapsulates the entire data preparation logic for one split,
16
+ ensuring it can be reused for creating both the main test set and a
17
+ subsequent validation set.
18
+
19
+ Args:
20
+ input_df: The DataFrame to be split.
21
+ public_path: The directory to save public artifacts (e.g., train data, test features).
22
+ private_path: The directory to save private artifacts (e.g., test labels).
23
+ test_size: The proportion of the dataset to allocate to the test split.
24
+ random_state: The seed used by the random number generator.
25
+
26
+ Returns:
27
+ The newly created training DataFrame, which can be used for a subsequent split.
28
+ """
29
+ # Create train and test splits from the provided dataframe
30
+ # We split so that we don't share any sentence_ids between train and test
31
+ unique_sentence_ids = input_df["sentence_id"].unique()
32
+ train_sentence_ids, test_sentence_ids = train_test_split(
33
+ unique_sentence_ids, test_size=test_size, random_state=random_state
34
+ )
35
+ new_train = input_df[input_df["sentence_id"].isin(train_sentence_ids)]
36
+ answers = input_df[input_df["sentence_id"].isin(test_sentence_ids)]
37
+ assert set(new_train["sentence_id"]).isdisjoint(
38
+ set(answers["sentence_id"])
39
+ ), f"sentence_id is not disjoint between train and test sets"
40
+
41
+ # "sentence_id" counts need to be reset for new_train and answers
42
+ new_train_id_mapping = {
43
+ old_id: new_id for new_id, old_id in enumerate(new_train["sentence_id"].unique())
44
+ }
45
+ new_train["sentence_id"] = new_train["sentence_id"].map(new_train_id_mapping)
46
+ answers_id_mapping = {
47
+ old_id: new_id for new_id, old_id in enumerate(answers["sentence_id"].unique())
48
+ }
49
+ answers["sentence_id"] = answers["sentence_id"].map(answers_id_mapping)
50
+
51
+ # Create new test set
52
+ new_test = answers.drop(["after", "class"], axis=1).copy()
53
+
54
+ # Reformat answers to match sample submission format
55
+ answers = answers[["sentence_id", "token_id", "after"]].copy()
56
+ answers["id"] = answers["sentence_id"].astype(str) + "_" + answers["token_id"].astype(str)
57
+ answers = answers[["id", "after"]]
58
+
59
+ # Create sample submission
60
+ sample_submission = new_test[["sentence_id", "token_id", "before"]].copy()
61
+ sample_submission["id"] = (
62
+ sample_submission["sentence_id"].astype(str)
63
+ + "_"
64
+ + sample_submission["token_id"].astype(str)
65
+ )
66
+ sample_submission["after"] = sample_submission["before"]
67
+ sample_submission = sample_submission[["id", "after"]]
68
+
69
+ # Checks
70
+ assert new_train.columns.tolist() == [
71
+ "sentence_id",
72
+ "token_id",
73
+ "class",
74
+ "before",
75
+ "after",
76
+ ], f"new_train.columns.tolist() == {new_train.columns.tolist()}"
77
+ assert new_test.columns.tolist() == [
78
+ "sentence_id",
79
+ "token_id",
80
+ "before",
81
+ ], f"new_test.columns.tolist() == {new_test.columns.tolist()}"
82
+ assert sample_submission.columns.tolist() == [
83
+ "id",
84
+ "after",
85
+ ], f"sample_submission.columns.tolist() == {sample_submission.columns.tolist()}"
86
+ assert answers.columns.tolist() == [
87
+ "id",
88
+ "after",
89
+ ], f"answers.columns.tolist() == {answers.columns.tolist()}"
90
+ assert len(new_test) + len(new_train) == len(
91
+ input_df
92
+ ), f"New train and test sets do not sum to old train set, got {len(new_test) + len(new_train)} and {len(input_df)}"
93
+
94
+ # Write CSVs
95
+ answers.to_csv(
96
+ private_path / "answers.csv", index=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC
97
+ )
98
+ sample_submission.to_csv(
99
+ private_path / "sample_submission.csv", index=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC
100
+ )
101
+ new_train.to_csv(
102
+ public_path / "ru_train.csv", index=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC
103
+ )
104
+ new_test.to_csv(
105
+ public_path / "ru_test_2.csv", index=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC
106
+ )
107
+ sample_submission.to_csv(
108
+ public_path / "ru_sample_submission_2.csv",
109
+ index=False,
110
+ quotechar='"',
111
+ quoting=csv.QUOTE_NONNUMERIC,
112
+ )
113
+
114
+ # Zip up
115
+ with zipfile.ZipFile(public_path / "ru_train.csv.zip", "w") as zipf:
116
+ zipf.write(public_path / "ru_train.csv", arcname="ru_train.csv")
117
+ with zipfile.ZipFile(public_path / "ru_test_2.csv.zip", "w") as zipf:
118
+ zipf.write(public_path / "ru_test_2.csv", arcname="ru_test_2.csv")
119
+ with zipfile.ZipFile(public_path / "ru_sample_submission_2.csv.zip", "w") as zipf:
120
+ zipf.write(public_path / "ru_sample_submission_2.csv", arcname="ru_sample_submission_2.csv")
121
+ (public_path / "ru_train.csv").unlink()
122
+ (public_path / "ru_test_2.csv").unlink()
123
+ (public_path / "ru_sample_submission_2.csv").unlink()
124
+
125
+ return new_train
126
+
127
+
128
+ def prepare(raw: Path, public: Path, private: Path):
129
+ # Define paths for the new validation set split
130
+ public_val = public.parent / "public_val"
131
+ private_val = private.parent / "private_val"
132
+
133
+ # Create all output directories ahead of time
134
+ for p in [public, private, public_val, private_val]:
135
+ p.mkdir(parents=True, exist_ok=True)
136
+
137
+ # Extract
138
+ extract(raw / "ru_test_2.csv.zip", raw) # We only use the 2nd stage test set
139
+ extract(raw / "ru_train.csv.zip", raw)
140
+ extract(raw / "ru_sample_submission_2.csv.zip", raw)
141
+
142
+ # Load initial raw data
143
+ initial_train_data = read_csv(raw / "ru_train.csv")
144
+
145
+ # --- First Split: Create Original Train and Test sets ---
146
+ # This call generates the final public/private outputs. These must not be changed.
147
+ # The returned DataFrame is the training set from this split.
148
+ main_train_set = _process_split(
149
+ input_df=initial_train_data,
150
+ public_path=public,
151
+ private_path=private,
152
+ test_size=0.1,
153
+ random_state=0,
154
+ )
155
+
156
+ # --- Second Split: Create New Train and Validation sets ---
157
+ # This call takes the training data from the first split and splits it again
158
+ # using the exact same logic. The outputs are written to the new _val directories.
159
+ _ = _process_split(
160
+ input_df=main_train_set,
161
+ public_path=public_val,
162
+ private_path=private_val,
163
+ test_size=0.1, # Use same proportion
164
+ random_state=0, # Use same random seed for consistent splitting logic
165
+ )
@@ -0,0 +1,144 @@
1
+ import numpy as np
2
+ from numpy import ndarray
3
+ from pandas import DataFrame
4
+
5
+ from mlebench.competitions.utils import rle_decode
6
+ from mlebench.grade_helpers import InvalidSubmissionError
7
+
8
+ IMG_HEIGHT = 101
9
+ IMG_WIDTH = 101
10
+
11
+
12
+ def grade(submission: DataFrame, answers: DataFrame) -> float:
13
+ expected_columns = ["id", "rle_mask"]
14
+
15
+ assert set(answers.columns) == set(
16
+ expected_columns
17
+ ), f"Expected answers to have columns {expected_columns}, but got {answers.columns} instead!"
18
+
19
+ if set(submission.columns) != set(expected_columns):
20
+ raise InvalidSubmissionError(
21
+ f"Expected submission to have columns {expected_columns}, but got {submission.columns} instead!"
22
+ )
23
+
24
+ if len(submission) != len(answers):
25
+ raise InvalidSubmissionError(
26
+ f"Expected submission to have {len(answers)} rows, but got {len(submission)} instead!"
27
+ )
28
+
29
+ transformed_submission = submission.fillna("")
30
+ transformed_answers = answers.fillna("")
31
+
32
+ assert transformed_answers.applymap(
33
+ lambda x: isinstance(x, str)
34
+ ).values.all(), "Expected all items in `answers` to be strings, but found non-string items!"
35
+
36
+ if not transformed_submission.applymap(lambda x: isinstance(x, str)).values.all():
37
+ raise InvalidSubmissionError(
38
+ "Expected all items in `submission` to be strings, but found non-string items!"
39
+ )
40
+
41
+ sorted_submission = transformed_submission.sort_values(by="id")
42
+ sorted_answers = transformed_answers.sort_values(by="id")
43
+
44
+ if (sorted_submission["id"].values != sorted_answers["id"].values).any():
45
+ raise InvalidSubmissionError(
46
+ f"Expected submission to have the same IDs as answers, but got a different set of IDs!"
47
+ )
48
+
49
+ y_preds_rle = sorted_submission["rle_mask"].values
50
+ y_trues_rle = sorted_answers["rle_mask"].values
51
+
52
+ try:
53
+ y_preds = np.array(
54
+ [rle_decode(rle, height=IMG_HEIGHT, width=IMG_WIDTH) for rle in y_preds_rle]
55
+ )
56
+ except AssertionError as e:
57
+ raise InvalidSubmissionError(f"Error decoding RLE masks: {e}")
58
+
59
+ y_trues = np.array([rle_decode(rle, height=IMG_HEIGHT, width=IMG_WIDTH) for rle in y_trues_rle])
60
+
61
+ scores = [iou_metric(y_trues[i], y_preds[i]) for i in range(len(y_trues))]
62
+ score = np.mean(scores)
63
+
64
+ return score
65
+
66
+
67
+ def iou_metric(y_true_in: ndarray, y_pred_in: ndarray) -> float:
68
+ """
69
+ Adapted from https://www.kaggle.com/code/phoenigs/u-net-dropout-augmentation-stratification.
70
+ """
71
+
72
+ if np.sum(y_true_in) == 0 and np.sum(y_pred_in) == 0:
73
+ return 1.0
74
+
75
+ if np.sum(y_true_in) == 0 and np.sum(y_pred_in) > 0:
76
+ return 0.0
77
+
78
+ labels = y_true_in
79
+ y_pred = y_pred_in
80
+
81
+ true_objects = 2
82
+ pred_objects = 2
83
+
84
+ intersection = np.histogram2d(
85
+ labels.flatten(),
86
+ y_pred.flatten(),
87
+ bins=(true_objects, pred_objects),
88
+ )[0]
89
+
90
+ # Compute areas (needed for finding the union between all objects)
91
+ area_true = np.histogram(labels, bins=true_objects)[0]
92
+ area_pred = np.histogram(y_pred, bins=pred_objects)[0]
93
+ area_true = np.expand_dims(area_true, -1)
94
+ area_pred = np.expand_dims(area_pred, 0)
95
+
96
+ # Compute union
97
+ union = area_true + area_pred - intersection
98
+
99
+ # Exclude background from the analysis
100
+ intersection = intersection[1:, 1:]
101
+ union = union[1:, 1:]
102
+ union[union == 0] = 1e-9
103
+
104
+ # Compute the intersection over union
105
+ iou = intersection / union
106
+
107
+ # Precision helper function
108
+ def precision_at(threshold, iou):
109
+ matches = iou > threshold
110
+ true_positives = np.sum(matches, axis=1) == 1 # Correct objects
111
+ false_positives = np.sum(matches, axis=0) == 0 # Missed objects
112
+ false_negatives = np.sum(matches, axis=1) == 0 # Extra objects
113
+ tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)
114
+ return tp, fp, fn
115
+
116
+ # Loop over IoU thresholds
117
+ prec = []
118
+
119
+ for t in np.arange(0.5, 1.0, 0.05):
120
+ tp, fp, fn = precision_at(t, iou)
121
+
122
+ if (tp + fp + fn) > 0:
123
+ p = tp / (tp + fp + fn)
124
+ else:
125
+ p = 0
126
+
127
+ prec.append(p)
128
+
129
+ return np.mean(prec)
130
+
131
+
132
+ def iou_metric_batch(y_true_in, y_pred_in):
133
+ """
134
+ Adapted from https://www.kaggle.com/code/phoenigs/u-net-dropout-augmentation-stratification.
135
+ """
136
+
137
+ batch_size = y_true_in.shape[0]
138
+ metric = []
139
+
140
+ for batch in range(batch_size):
141
+ value = iou_metric(y_true_in[batch], y_pred_in[batch])
142
+ metric.append(value)
143
+
144
+ return np.mean(metric)
@@ -0,0 +1,158 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ from mlebench.utils import extract, read_csv
7
+
8
+
9
+ def prepare(raw: Path, public: Path, private: Path) -> None:
10
+ extract(raw / "competition_data.zip", raw)
11
+
12
+ old_train = read_csv(raw / "competition_data" / "train.csv")
13
+ old_train = old_train.fillna("")
14
+
15
+ # Original ratio is Train set - 4,000 samples; Test set - ~18,000 samples (82% ratio)
16
+ # We use a 0.25 ratio to get number of test samples into thousand OOM
17
+ new_train, new_test = train_test_split(old_train, test_size=0.25, random_state=0)
18
+
19
+ assert len(new_train) + len(new_test) == len(
20
+ old_train
21
+ ), "Some samples were lost when creating the new train and test sets!"
22
+
23
+ new_train_ids = set(new_train["id"])
24
+ new_test_ids = set(new_test["id"])
25
+
26
+ assert new_train_ids.isdisjoint(
27
+ new_test_ids
28
+ ), "`id` is not disjoint between train and test sets"
29
+
30
+ new_train.sort_values(by="id").to_csv(public / "train.csv", index=False)
31
+ new_test.sort_values(by="id").to_csv(private / "test.csv", index=False)
32
+
33
+ old_train_imgs = set((raw / "competition_data" / "train" / "images").glob("*.png"))
34
+
35
+ assert len(old_train_imgs) == len(
36
+ old_train
37
+ ), "The number of images in the old train set doesn't match the number of training images!"
38
+
39
+ new_train_imgs = set(img for img in old_train_imgs if img.stem in new_train_ids)
40
+ new_test_imgs = set(img for img in old_train_imgs if img.stem in new_test_ids)
41
+
42
+ assert new_train_imgs.isdisjoint(
43
+ new_test_imgs
44
+ ), "Images are not disjoint between train and test sets"
45
+
46
+ assert len(new_train_imgs) + len(new_test_imgs) == len(
47
+ old_train_imgs
48
+ ), "Some images were lost when creating the new train and test sets!"
49
+
50
+ (public / "train" / "images").mkdir(parents=True, exist_ok=True)
51
+ (public / "train" / "masks").mkdir(parents=True, exist_ok=True)
52
+
53
+ for fpath in new_train_imgs:
54
+ shutil.copyfile(
55
+ src=fpath,
56
+ dst=public / "train" / "images" / fpath.name,
57
+ )
58
+
59
+ shutil.copyfile(
60
+ src=raw / "competition_data" / "train" / "masks" / fpath.name,
61
+ dst=public / "train" / "masks" / fpath.name,
62
+ )
63
+
64
+ (public / "test" / "images").mkdir(parents=True, exist_ok=True)
65
+
66
+ for fpath in new_test_imgs:
67
+ shutil.copyfile(
68
+ src=fpath,
69
+ dst=public / "test" / "images" / fpath.name,
70
+ )
71
+
72
+ sample_submission = new_test.drop(columns=["rle_mask"]).copy()
73
+ sample_submission["rle_mask"] = "1 1"
74
+ sample_submission.sort_values(by="id").to_csv(public / "sample_submission.csv", index=False)
75
+
76
+ old_depths = read_csv(raw / "depths.csv")
77
+ new_depths_mask = old_depths["id"].isin(new_train_ids)
78
+ new_depths = old_depths[new_depths_mask]
79
+ new_depths.sort_values(by="id").to_csv(public / "depths.csv", index=False)
80
+
81
+ # Sanity checks
82
+
83
+ assert (public / "train.csv").exists(), "`train.csv` doesn't exist!"
84
+ assert (public / "sample_submission.csv").exists(), "`sample_submission.csv` doesn't exist!"
85
+ assert (public / "depths.csv").exists(), "`depths.csv` doesn't exist!"
86
+ assert (public / "train").exists(), "`train` directory doesn't exist!"
87
+ assert (public / "test").exists(), "`test` directory doesn't exist!"
88
+ assert (private / "test.csv").exists(), "`test.csv` doesn't exist!"
89
+
90
+ actual_new_train_imgs = set(img.stem for img in (public / "train" / "images").glob("*.png"))
91
+ actual_new_train_masks = set(img.stem for img in (public / "train" / "masks").glob("*.png"))
92
+
93
+ assert len(actual_new_train_imgs) == len(
94
+ new_train
95
+ ), "The number of images in the train set doesn't match the number of training images!"
96
+
97
+ assert len(actual_new_train_masks) == len(
98
+ new_train
99
+ ), "The number of masks in the train set doesn't match the number of training masks!"
100
+
101
+ for new_train_id in new_train["id"]:
102
+ assert (
103
+ public / "train" / "images" / f"{new_train_id}.png"
104
+ ).exists(), f"Expected `{new_train_id}.png` to exist in train images, but it doesn't!"
105
+
106
+ assert (
107
+ public / "train" / "masks" / f"{new_train_id}.png"
108
+ ).exists(), f"Expected `{new_train_id}.png` to exist in train masks, but it doesn't!"
109
+
110
+ actual_new_test_imgs = set(img.stem for img in (public / "test" / "images").glob("*.png"))
111
+
112
+ assert not (
113
+ public / "test" / "masks"
114
+ ).exists(), f"Expected `public / test / masks` to not exist, but it does!"
115
+
116
+ assert len(actual_new_test_imgs) == len(
117
+ new_test
118
+ ), "The number of images in the test set doesn't match the number of test images!"
119
+
120
+ for new_test_id in new_test["id"]:
121
+ assert (
122
+ public / "test" / "images" / f"{new_test_id}.png"
123
+ ).exists(), f"Expected `{new_test_id}.png` to exist in test images, but it doesn't!"
124
+
125
+ assert not (
126
+ public / "test" / "masks" / f"{new_test_id}.png"
127
+ ).exists(), f"Expected `{new_test_id}.png` to exist in test masks, but it doesn't!"
128
+
129
+ assert actual_new_train_imgs.isdisjoint(
130
+ actual_new_test_imgs
131
+ ), "Expected no overlap in images between the new train and test sets, but there is!"
132
+
133
+ actual_sample_submission = read_csv(public / "sample_submission.csv")
134
+ actual_new_test = read_csv(private / "test.csv")
135
+
136
+ assert len(actual_sample_submission) == len(
137
+ actual_new_test
138
+ ), "The number of samples in the sample submission doesn't match the number of samples in the test set!"
139
+
140
+ assert set(actual_sample_submission["id"]) == set(
141
+ actual_new_test["id"]
142
+ ), "The ids in the sample submission don't match the ids in the test set!"
143
+
144
+ assert len(actual_new_test_imgs) == len(
145
+ actual_new_test
146
+ ), "The number of images in the test set doesn't match the number of test images!"
147
+
148
+ assert (
149
+ set(actual_new_test["id"]) == actual_new_test_imgs
150
+ ), "The ids in the test set don't match the test images!"
151
+
152
+ assert new_train.applymap(
153
+ lambda x: isinstance(x, str)
154
+ ).values.all(), "Not all elements in the DataFrame are strings!"
155
+
156
+ assert new_test.applymap(
157
+ lambda x: isinstance(x, str)
158
+ ).values.all(), "Not all elements in the DataFrame are strings!"