dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,213 @@
1
+ import json
2
+ import random
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ from sklearn.model_selection import train_test_split
8
+ from tqdm import tqdm
9
+
10
+ from mlebench.utils import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ def _organize_data_by_category(metadata: dict) -> dict:
16
+ """Organizes image and annotation data by category ID."""
17
+ annotations_images_by_category = {}
18
+ for annotation, image in list(zip(metadata["annotations"], metadata["images"])):
19
+ assert annotation["image_id"] == image["image_id"]
20
+ category_id = annotation["category_id"]
21
+ if category_id not in annotations_images_by_category:
22
+ annotations_images_by_category[category_id] = []
23
+ annotations_images_by_category[category_id].append(
24
+ {
25
+ "annotation": annotation,
26
+ "image": image,
27
+ }
28
+ )
29
+ return annotations_images_by_category
30
+
31
+
32
+ def _split_data(data_by_category: dict, test_size: float, random_state: int) -> tuple[dict, dict]:
33
+ """Splits data for each category into train and test sets."""
34
+ train_annotations_images_by_category = {}
35
+ test_annotations_images_by_category = {}
36
+ for category_id, annotations_images in data_by_category.items():
37
+ # Each category needs to be in both train and test
38
+ train_annotations_images, test_annotations_images = train_test_split(
39
+ annotations_images, test_size=test_size, random_state=random_state
40
+ )
41
+ assert len(train_annotations_images) > 0 and len(test_annotations_images) > 0
42
+ train_annotations_images_by_category[category_id] = train_annotations_images
43
+ test_annotations_images_by_category[category_id] = test_annotations_images
44
+ return train_annotations_images_by_category, test_annotations_images_by_category
45
+
46
+
47
+ def _process_train_set(
48
+ train_data: dict, base_metadata: dict, raw_path: Path, output_public_path: Path
49
+ ):
50
+ """Processes and writes the training set data, images, and metadata."""
51
+ logger.info(f"Processing train set for output: {output_public_path}")
52
+ new_train_metadata = base_metadata.copy()
53
+ new_train_metadata.update({"annotations": [], "images": []})
54
+ train_sample_count = sum(len(v) for v in train_data.values())
55
+
56
+ output_train_images_path = output_public_path / "train_images"
57
+
58
+ with tqdm(
59
+ desc=f"Creating train dataset in {output_public_path.name}",
60
+ total=train_sample_count,
61
+ ) as pbar:
62
+ for category_id, annotations_images in train_data.items():
63
+ category_subdir = f"{category_id // 100:03d}/{category_id % 100:02d}"
64
+ (output_train_images_path / category_subdir).mkdir(exist_ok=True, parents=True)
65
+ for idx, annotation_image in enumerate(annotations_images):
66
+ new_image_id = f"{category_id:05d}__{(idx + 1):03d}"
67
+ new_file_name = f"{category_subdir}/{new_image_id}.jpg"
68
+
69
+ new_annotation = annotation_image["annotation"].copy()
70
+ new_annotation["image_id"] = new_image_id
71
+ new_train_metadata["annotations"].append(new_annotation)
72
+
73
+ new_image = annotation_image["image"].copy()
74
+ new_image["image_id"] = new_image_id
75
+ new_image["file_name"] = new_file_name
76
+ new_train_metadata["images"].append(new_image)
77
+
78
+ src_path = raw_path / "train_images" / annotation_image["image"]["file_name"]
79
+ dst_path = output_train_images_path / new_file_name
80
+ shutil.copyfile(src=src_path, dst=dst_path)
81
+
82
+ pbar.update(1)
83
+
84
+ with open(output_public_path / "train_metadata.json", "w") as f:
85
+ json.dump(new_train_metadata, f, indent=4, sort_keys=True)
86
+
87
+ assert len(list(output_train_images_path.glob("**/*.jpg"))) == len(
88
+ new_train_metadata["images"]
89
+ )
90
+ assert len(new_train_metadata["annotations"]) == len(new_train_metadata["images"])
91
+
92
+
93
+ def _process_test_set(
94
+ test_data: dict, raw_path: Path, output_public_path: Path, output_private_path: Path
95
+ ):
96
+ """Processes and writes the test set data, images, metadata, and private answers."""
97
+ logger.info(
98
+ f"Processing test set for outputs: {output_public_path} and {output_private_path}"
99
+ )
100
+ new_test_metadata = {"annotations": [], "images": []}
101
+ test_annotations_images = [item for sublist in test_data.values() for item in sublist]
102
+ random.Random(0).shuffle(test_annotations_images)
103
+
104
+ output_test_images_path = output_public_path / "test_images"
105
+
106
+ for idx, annotation_image in tqdm(
107
+ enumerate(test_annotations_images),
108
+ desc=f"Creating test dataset in {output_public_path.name}",
109
+ total=len(test_annotations_images),
110
+ ):
111
+ new_image_id = str(idx)
112
+ new_file_name = f"{idx // 1000:03d}/test-{idx:06d}.jpg"
113
+
114
+ new_annotation = annotation_image["annotation"].copy()
115
+ new_annotation["image_id"] = new_image_id
116
+ new_test_metadata["annotations"].append(new_annotation)
117
+
118
+ new_image = annotation_image["image"].copy()
119
+ new_image["image_id"] = new_image_id
120
+ new_image["file_name"] = new_file_name
121
+ new_test_metadata["images"].append(new_image)
122
+
123
+ src_path = raw_path / "train_images" / annotation_image["image"]["file_name"]
124
+ dst_path = output_test_images_path / new_file_name
125
+ dst_path.parent.mkdir(exist_ok=True, parents=True)
126
+ shutil.copyfile(src=src_path, dst=dst_path)
127
+
128
+ with open(output_public_path / "test_metadata.json", "w") as f:
129
+ json.dump(new_test_metadata["images"], f, indent=4, sort_keys=True)
130
+
131
+ answers_rows = [
132
+ {"Id": image["image_id"], "Predicted": annotation["category_id"]}
133
+ for image, annotation in zip(new_test_metadata["images"], new_test_metadata["annotations"])
134
+ ]
135
+ answers_df = pd.DataFrame(answers_rows)
136
+ answers_df.to_csv(output_private_path / "answers.csv", index=False)
137
+
138
+ sample_rows = [{"Id": image["image_id"], "Predicted": 42} for image in new_test_metadata["images"]]
139
+ sample_df = pd.DataFrame(sample_rows)
140
+ sample_df.to_csv(output_public_path / "sample_submission.csv", index=False)
141
+
142
+ assert len(list(output_test_images_path.glob("**/*.jpg"))) == len(new_test_metadata["images"])
143
+ assert len(new_test_metadata["annotations"]) == len(new_test_metadata["images"])
144
+ assert len(answers_df) == len(new_test_metadata["images"])
145
+ assert len(sample_df) == len(answers_df)
146
+
147
+
148
+ def prepare(raw: Path, public: Path, private: Path):
149
+ """
150
+ Splits the raw data into public and private datasets with appropriate test/train splits.
151
+
152
+ `train_metadata.json` is the "table of contents" for our data, with the following structure:
153
+ (More details at https://www.kaggle.com/competitions/herbarium-2022-fgvc9/data)
154
+ ```
155
+ {
156
+ "annotations" : [annotation],
157
+ "categories" : [category],
158
+ "genera" : [genus]
159
+ "images" : [image],
160
+ "distances" : [distance],
161
+ "licenses" : [license],
162
+ "institutions" : [institution]
163
+ }
164
+ ```
165
+ - `images` and `annotations` are both N-length lists corresponding to the N samples.
166
+ We'll need to split each of these lists into train and test.
167
+ - The other fields are dataset-wide metadata that we don't need to touch.
168
+
169
+ Other notes:
170
+ - train/test splits need to occur per category (each category should be in both train and test).
171
+ - The `test_images` and `train_images` folders have nested subdirs to make it easier to browse
172
+ - `train_images` is structured as `{category_id[:3]}/{category_id[3:]}/{image_id}.jpg`
173
+ - `test_images` is structured as `{image_idx[:3]}/test-{image_idx}.jpg` (to not reveal the category)
174
+ - When we create the new splits, we re-assign image indices so that we don't give away labels based on the index
175
+ - train images are indexed within their own category
176
+ - test images follow a flat index after shuffling the categories
177
+ """
178
+ # Load raw data and organize it by category
179
+ with open(raw / "train_metadata.json") as f:
180
+ old_train_metadata = json.load(f)
181
+ annotations_images_by_category = _organize_data_by_category(old_train_metadata)
182
+
183
+ # --- 1. Create the original public/private datasets ---
184
+ # This first split creates the main train and test sets.
185
+ # The outputs in `public` and `private` will be identical to the original script.
186
+ logger.info("--- Creating original train/test split for 'public' and 'private' directories ---")
187
+ original_train_split, original_test_split = _split_data(
188
+ annotations_images_by_category, test_size=0.2, random_state=0
189
+ )
190
+
191
+ _process_train_set(original_train_split, old_train_metadata, raw, public)
192
+ _process_test_set(original_test_split, raw, public, private)
193
+ logger.info("Finished creating original 'public' and 'private' datasets.")
194
+
195
+ # --- 2. Create the validation datasets from the original training set ---
196
+ # This second split takes the `original_train_split` and splits it *again*
197
+ # to create a new, smaller training set and a validation set.
198
+ # The outputs are saved to new `public_val` and `private_val` directories.
199
+ logger.info("--- Creating validation split for 'public_val' and 'private_val' directories ---")
200
+ public_val = public.parent / "public_val"
201
+ private_val = private.parent / "private_val"
202
+ public_val.mkdir(exist_ok=True, parents=True)
203
+ private_val.mkdir(exist_ok=True, parents=True)
204
+
205
+ # The input for this split is the *train set* from the first split.
206
+ # We use the exact same splitting logic to ensure consistency.
207
+ train_val_split, test_val_split = _split_data(
208
+ original_train_split, test_size=0.2, random_state=0
209
+ )
210
+
211
+ _process_train_set(train_val_split, old_train_metadata, raw, public_val)
212
+ _process_test_set(test_val_split, raw, public_val, private_val)
213
+ logger.info("Finished creating validation 'public_val' and 'private_val' datasets.")
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import roc_auc_score
4
+
5
+ from mlebench.competitions.utils import prepare_for_auroc_metric
6
+
7
+
8
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
9
+ roc_auc_inputs = prepare_for_auroc_metric(
10
+ submission=submission, answers=answers, id_col="id", target_col="label"
11
+ )
12
+ return roc_auc_score(y_true=roc_auc_inputs["y_true"], y_score=roc_auc_inputs["y_score"])
@@ -0,0 +1,59 @@
1
+ import json
2
+ import shutil
3
+ from pathlib import Path
4
+
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from tqdm import tqdm
8
+
9
+
10
+ def prepare(raw: Path, public: Path, private: Path):
11
+ old_train = pd.read_csv(raw / "train_labels.csv")
12
+
13
+ num_test = len(list((raw / "test").glob("*.tif")))
14
+ test_ratio = num_test / (len(old_train) + num_test)
15
+
16
+ new_train_ids, new_test_ids = train_test_split(
17
+ old_train["id"], test_size=test_ratio, random_state=0
18
+ )
19
+ new_train = old_train[old_train["id"].isin(new_train_ids)]
20
+ new_test = old_train[old_train["id"].isin(new_test_ids)]
21
+
22
+ assert set(new_train["id"]).isdisjoint(
23
+ set(new_test["id"])
24
+ ), "Train should not contain id's of test images"
25
+ assert len(new_train_ids) + len(new_test_ids) == len(
26
+ old_train
27
+ ), "The combined length of new_train_ids and new_test_ids should equal the length of old_train"
28
+
29
+ # Copy over files
30
+ (public / "train").mkdir(exist_ok=True)
31
+ (public / "test").mkdir(exist_ok=True)
32
+
33
+ for file_id in tqdm(new_train_ids, desc="Copying train images"):
34
+ shutil.copyfile(
35
+ src=raw / "train" / f"{file_id}.tif",
36
+ dst=public / "train" / f"{file_id}.tif",
37
+ )
38
+ for file_id in tqdm(new_test_ids, desc="Copying test images"):
39
+ shutil.copyfile(
40
+ src=raw / "train" / f"{file_id}.tif",
41
+ dst=public / "test" / f"{file_id}.tif",
42
+ )
43
+
44
+ # Create sample submission
45
+ sample_submission = new_test.copy()
46
+ sample_submission["label"] = 0
47
+
48
+ # Copy over files
49
+ new_train.to_csv(public / "train_labels.csv", index=False)
50
+ new_test.to_csv(private / "answers.csv", index=False)
51
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
52
+
53
+ # Check integrity of files copied
54
+ assert len(list(public.glob("train/*.tif"))) == len(
55
+ new_train_ids
56
+ ), "Number of train images should be equal to the number of unique id's in the train set"
57
+ assert len(list(public.glob("test/*.tif"))) == len(
58
+ new_test_ids
59
+ ), "Number of test images should be equal to the number of unique id's in the test set"
@@ -0,0 +1,131 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+ from tqdm import tqdm
7
+
8
+
9
+ def _create_split(
10
+ df_to_split: pd.DataFrame,
11
+ image_source_dir: Path,
12
+ test_split_size: float,
13
+ public_dest: Path,
14
+ private_dest: Path,
15
+ random_state: int,
16
+ ) -> pd.DataFrame:
17
+ """
18
+ Splits a dataframe of labels, copies corresponding images, and saves the results.
19
+
20
+ Args:
21
+ df_to_split: DataFrame containing image IDs and labels to be split.
22
+ image_source_dir: Path to the directory containing the source images.
23
+ test_split_size: The proportion of the dataset to allocate to the test split.
24
+ public_dest: The destination directory for public-facing files (train set, test images, sample submission).
25
+ private_dest: The destination directory for private files (test set answers).
26
+ random_state: The seed used by the random number generator for the split.
27
+
28
+ Returns:
29
+ The training portion of the split as a pandas DataFrame.
30
+ """
31
+ # Create destination directories
32
+ public_dest.mkdir(exist_ok=True, parents=True)
33
+ private_dest.mkdir(exist_ok=True, parents=True)
34
+ (public_dest / "train").mkdir(exist_ok=True)
35
+ (public_dest / "test").mkdir(exist_ok=True)
36
+
37
+ # Perform the split
38
+ train_ids, test_ids = train_test_split(
39
+ df_to_split["id"], test_size=test_split_size, random_state=random_state
40
+ )
41
+ train_df = df_to_split[df_to_split["id"].isin(train_ids)]
42
+ test_df = df_to_split[df_to_split["id"].isin(test_ids)]
43
+
44
+ assert set(train_df["id"]).isdisjoint(
45
+ set(test_df["id"])
46
+ ), "Train should not contain id's of test images"
47
+ assert len(train_ids) + len(test_ids) == len(
48
+ df_to_split
49
+ ), "The combined length of train_ids and test_ids should equal the length of the source df"
50
+
51
+ # Copy over image files
52
+ for file_id in tqdm(train_ids, desc=f"Copying train images to {public_dest}"):
53
+ shutil.copyfile(
54
+ src=image_source_dir / f"{file_id}.tif",
55
+ dst=public_dest / "train" / f"{file_id}.tif",
56
+ )
57
+ for file_id in tqdm(test_ids, desc=f"Copying test images to {public_dest}"):
58
+ shutil.copyfile(
59
+ src=image_source_dir / f"{file_id}.tif",
60
+ dst=public_dest / "test" / f"{file_id}.tif",
61
+ )
62
+
63
+ # Create and save label/submission files
64
+ sample_submission = test_df.copy()
65
+ sample_submission["label"] = 0
66
+
67
+ train_df.to_csv(public_dest / "train_labels.csv", index=False)
68
+ test_df.to_csv(private_dest / "answers.csv", index=False)
69
+ sample_submission.to_csv(public_dest / "sample_submission.csv", index=False)
70
+
71
+ # Check integrity of files copied
72
+ assert len(list(public_dest.glob("train/*.tif"))) == len(
73
+ train_ids
74
+ ), "Number of train images should be equal to the number of unique id's in the train set"
75
+ assert len(list(public_dest.glob("test/*.tif"))) == len(
76
+ test_ids
77
+ ), "Number of test images should be equal to the number of unique id's in the test set"
78
+
79
+ return train_df
80
+
81
+
82
+ def prepare(raw: Path, public: Path, private: Path):
83
+ """
84
+ Prepares the data for the competition by performing two splits.
85
+ 1. A main split of the raw data into a primary train/test set for the competition.
86
+ Outputs are saved to `public/` and `private/`.
87
+ 2. A secondary split of the primary train set into a smaller train/validation set for user convenience.
88
+ Outputs are saved to `public_val/` and `private_val/`, mirroring the main structure.
89
+ """
90
+ # Common setup
91
+ all_train_labels = pd.read_csv(raw / "train_labels.csv")
92
+ image_source_dir = raw / "train"
93
+ RANDOM_STATE = 0
94
+
95
+ # --- 1. Main Split: Create the primary train/test sets ---
96
+ # This logic is identical to the original script to ensure outputs do not change.
97
+ num_test_from_pool = len(list((raw / "test").glob("*.tif")))
98
+ test_ratio_main = num_test_from_pool / (len(all_train_labels) + num_test_from_pool)
99
+
100
+ # The returned `primary_train_df` is the larger portion of the first split.
101
+ primary_train_df = _create_split(
102
+ df_to_split=all_train_labels,
103
+ image_source_dir=image_source_dir,
104
+ test_split_size=test_ratio_main,
105
+ public_dest=public,
106
+ private_dest=private,
107
+ random_state=RANDOM_STATE,
108
+ )
109
+
110
+ # --- 2. Validation Split: Create a secondary train/validation set ---
111
+ # Define new output paths for the validation split
112
+ public_val = public.parent / "public_val"
113
+ private_val = private.parent / "private_val"
114
+
115
+ # To get the same number of test samples as the main split, we must adjust the test_size
116
+ # for this second split, which operates on a smaller dataset (the primary_train_df).
117
+ if (1 - test_ratio_main) <= 0:
118
+ # Avoid division by zero in the unlikely case the main split uses all data for test
119
+ test_ratio_val = 0
120
+ else:
121
+ test_ratio_val = test_ratio_main / (1 - test_ratio_main)
122
+
123
+ # Perform the second split on the primary training data
124
+ _create_split(
125
+ df_to_split=primary_train_df,
126
+ image_source_dir=image_source_dir,
127
+ test_split_size=test_ratio_val,
128
+ public_dest=public_val,
129
+ private_dest=private_val,
130
+ random_state=RANDOM_STATE,
131
+ )
@@ -0,0 +1,9 @@
1
+ ID_COL = "eeg_id"
2
+ TARGET_COLS = [
3
+ "seizure_vote",
4
+ "lpd_vote",
5
+ "gpd_vote",
6
+ "lrda_vote",
7
+ "grda_vote",
8
+ "other_vote",
9
+ ]
@@ -0,0 +1,43 @@
1
+ import pandas as pd
2
+
3
+ from mlebench.grade_helpers import InvalidSubmissionError
4
+
5
+ from . import kullback_leibler_divergence as kl_divergence
6
+ from .constants import ID_COL, TARGET_COLS
7
+
8
+
9
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame) -> tuple:
10
+
11
+ if ID_COL not in submission.columns:
12
+ raise InvalidSubmissionError(f"Submission must contain {ID_COL} column")
13
+ if not all(col in submission.columns for col in TARGET_COLS):
14
+ raise InvalidSubmissionError(f"Submission must contain all target columns: {TARGET_COLS}")
15
+ if len(submission) != len(answers):
16
+ raise InvalidSubmissionError("Submission and answers must have the same length")
17
+ if not (submission[TARGET_COLS].sum(axis=1).apply(lambda x: round(x, 5) == 1).all()):
18
+ raise InvalidSubmissionError("Submission probabilities must add to 1 for each row")
19
+ if not set(answers[ID_COL]) == set(submission[ID_COL]):
20
+ raise InvalidSubmissionError("Submission and answers must have the same IDs")
21
+
22
+ assert ID_COL in answers.columns, f"Answers must contain {ID_COL} column"
23
+ assert all(
24
+ col in answers.columns for col in TARGET_COLS
25
+ ), f"Answers must contain all target columns: {TARGET_COLS}"
26
+
27
+ submission = submission.sort_values(ID_COL).reset_index(drop=True)
28
+ answers = answers.sort_values(ID_COL).reset_index(drop=True)
29
+
30
+ answers = answers.copy()[[ID_COL] + TARGET_COLS]
31
+ # normalize answers to be max 1, by taking vote / sum(votes)
32
+ # https://www.kaggle.com/competitions/hms-harmful-brain-activity-classification/discussion/468705#2606605
33
+ answers[TARGET_COLS] = answers[TARGET_COLS].div(answers[TARGET_COLS].sum(axis=1), axis=0)
34
+
35
+ return submission, answers
36
+
37
+
38
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
39
+
40
+ submission, answers = prepare_for_metric(submission, answers)
41
+ score = kl_divergence.score(answers, submission, ID_COL)
42
+
43
+ return score
@@ -0,0 +1,96 @@
1
+ """
2
+ This script exists to reduce code duplication across metrics.
3
+ Source: https://www.kaggle.com/code/metric/kaggle-metric-utilities
4
+ Linked from: https://www.kaggle.com/code/metric/kullback-leibler-divergence
5
+ Linked from: https://www.kaggle.com/competitions/hms-harmful-brain-activity-classification
6
+ """
7
+
8
+ from typing import Union
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import pandas.api.types
13
+
14
+
15
+ class ParticipantVisibleError(Exception):
16
+ pass
17
+
18
+
19
+ class HostVisibleError(Exception):
20
+ pass
21
+
22
+
23
+ def treat_as_participant_error(
24
+ error_message: str, solution: Union[pd.DataFrame, np.ndarray]
25
+ ) -> bool:
26
+ """Many metrics can raise more errors than can be handled manually. This function attempts
27
+ to identify errors that can be treated as ParticipantVisibleError without leaking any competition data.
28
+
29
+ If the solution is purely numeric, and there are no numbers in the error message,
30
+ then the error message is sufficiently unlikely to leak usable data and can be shown to participants.
31
+
32
+ We expect this filter to reject many safe messages. It's intended only to reduce the number of errors we need to manage manually.
33
+ """
34
+ # This check treats bools as numeric
35
+ if isinstance(solution, pd.DataFrame):
36
+ solution_is_all_numeric = all(
37
+ [pandas.api.types.is_numeric_dtype(x) for x in solution.dtypes.values]
38
+ )
39
+ solution_has_bools = any(
40
+ [pandas.api.types.is_bool_dtype(x) for x in solution.dtypes.values]
41
+ )
42
+ elif isinstance(solution, np.ndarray):
43
+ solution_is_all_numeric = pandas.api.types.is_numeric_dtype(solution)
44
+ solution_has_bools = pandas.api.types.is_bool_dtype(solution)
45
+
46
+ if not solution_is_all_numeric:
47
+ return False
48
+
49
+ for char in error_message:
50
+ if char.isnumeric():
51
+ return False
52
+ if solution_has_bools:
53
+ if "true" in error_message.lower() or "false" in error_message.lower():
54
+ return False
55
+ return True
56
+
57
+
58
+ def safe_call_score(metric_function, solution, submission, **metric_func_kwargs):
59
+ """
60
+ Call score. If that raises an error and that already been specifically handled, just raise it.
61
+ Otherwise make a conservative attempt to identify potential participant visible errors.
62
+ """
63
+ try:
64
+ score_result = metric_function(solution, submission, **metric_func_kwargs)
65
+ except Exception as err:
66
+ error_message = str(err)
67
+ if err.__class__.__name__ == "ParticipantVisibleError":
68
+ raise ParticipantVisibleError(error_message)
69
+ elif err.__class__.__name__ == "HostVisibleError":
70
+ raise HostVisibleError(error_message)
71
+ else:
72
+ if treat_as_participant_error(error_message, solution):
73
+ raise ParticipantVisibleError(error_message)
74
+ else:
75
+ raise err
76
+ return score_result
77
+
78
+
79
+ def verify_valid_probabilities(df: pd.DataFrame, df_name: str):
80
+ """Verify that the dataframe contains valid probabilities.
81
+
82
+ The dataframe must be limited to the target columns; do not pass in any ID columns.
83
+ """
84
+ if not pandas.api.types.is_numeric_dtype(df.values):
85
+ raise ParticipantVisibleError(f"All target values in {df_name} must be numeric")
86
+
87
+ if df.min().min() < 0:
88
+ raise ParticipantVisibleError(f"All target values in {df_name} must be at least zero")
89
+
90
+ if df.max().max() > 1:
91
+ raise ParticipantVisibleError(f"All target values in {df_name} must be no greater than one")
92
+
93
+ if not np.allclose(df.sum(axis=1), 1):
94
+ raise ParticipantVisibleError(
95
+ f"Target values in {df_name} do not add to one within all rows"
96
+ )