dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,42 @@
1
+ from pathlib import Path
2
+
3
+ from mlebench.utils import extract, read_csv
4
+
5
+ from .classes import CLASSES
6
+
7
+
8
+ def prepare(raw: Path, public: Path, private: Path):
9
+ # extract only what is needed
10
+ extract(raw / "train.csv.zip", raw)
11
+ extract(raw / "test.csv.zip", raw)
12
+ extract(raw / "test_labels.csv.zip", raw)
13
+ extract(raw / "sample_submission.csv.zip", raw)
14
+
15
+ # the test set is provided, so we dont have to split the train set nor form the sample submission
16
+ train_with_labels = read_csv(raw / "train.csv")
17
+ test_without_labels = read_csv(raw / "test.csv")
18
+ answers = read_csv(raw / "test_labels.csv")
19
+ sample_submission = read_csv(raw / "sample_submission.csv")
20
+ sample_submission[CLASSES] = 0.5
21
+
22
+ # save to public
23
+ train_with_labels.to_csv(public / "train.csv", index=False)
24
+ test_without_labels.to_csv(public / "test.csv", index=False)
25
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
26
+
27
+ # save to private
28
+ answers.to_csv(private / "test.csv", index=False)
29
+
30
+ assert len(answers) == len(
31
+ sample_submission
32
+ ), "Private test set and sample submission should be of the same length"
33
+
34
+ assert sorted(answers["id"]) == sorted(
35
+ test_without_labels["id"]
36
+ ), "Private and Public test IDs should match"
37
+ assert sorted(sample_submission["id"]) == sorted(
38
+ test_without_labels["id"]
39
+ ), "Public test and sample submission IDs should match"
40
+ assert (
41
+ len(set(train_with_labels["id"]) & set(test_without_labels["id"])) == 0
42
+ ), "Train and test IDs should not overlap"
@@ -0,0 +1,88 @@
1
+ from pathlib import Path
2
+
3
+ from sklearn.model_selection import train_test_split
4
+
5
+ from mlebench.utils import extract, read_csv
6
+
7
+ from .classes import CLASSES
8
+
9
+
10
+ def prepare(raw: Path, public: Path, private: Path):
11
+ # extract only what is needed
12
+ extract(raw / "train.csv.zip", raw)
13
+ extract(raw / "test.csv.zip", raw)
14
+ extract(raw / "test_labels.csv.zip", raw)
15
+ extract(raw / "sample_submission.csv.zip", raw)
16
+
17
+ # the test set is provided, so we dont have to split the train set nor form the sample submission
18
+ train_with_labels = read_csv(raw / "train.csv")
19
+ test_without_labels = read_csv(raw / "test.csv")
20
+ answers = read_csv(raw / "test_labels.csv")
21
+ sample_submission = read_csv(raw / "sample_submission.csv")
22
+ sample_submission[CLASSES] = 0.5
23
+
24
+ # save to public
25
+ train_with_labels.to_csv(public / "train.csv", index=False)
26
+ test_without_labels.to_csv(public / "test.csv", index=False)
27
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
28
+
29
+ # save to private
30
+ answers.to_csv(private / "test.csv", index=False)
31
+
32
+ assert len(answers) == len(
33
+ sample_submission
34
+ ), "Private test set and sample submission should be of the same length"
35
+
36
+ assert sorted(answers["id"]) == sorted(
37
+ test_without_labels["id"]
38
+ ), "Private and Public test IDs should match"
39
+ assert sorted(sample_submission["id"]) == sorted(
40
+ test_without_labels["id"]
41
+ ), "Public test and sample submission IDs should match"
42
+ assert (
43
+ len(set(train_with_labels["id"]) & set(test_without_labels["id"])) == 0
44
+ ), "Train and test IDs should not overlap"
45
+
46
+ # ==================================================================
47
+ # === New code for creating the validation set starts here ===
48
+ # The code above this line is untouched to ensure original outputs
49
+ # remain identical.
50
+ # ==================================================================
51
+
52
+ # 1. Define new paths and create the directories for the validation set.
53
+ public_val = public.parent / "public_val"
54
+ private_val = private.parent / "private_val"
55
+ public_val.mkdir(exist_ok=True)
56
+ private_val.mkdir(exist_ok=True)
57
+
58
+ # 2. Split the original training data to create a new, smaller training set
59
+ # and a new validation set. The size of the validation set will be the same
60
+ # as the size of the original test set.
61
+ validation_set_size = len(test_without_labels)
62
+ train_val, test_val_with_labels = train_test_split(
63
+ train_with_labels,
64
+ test_size=validation_set_size,
65
+ random_state=42, # Use a fixed random state for reproducibility
66
+ )
67
+
68
+ # 3. Prepare the validation set files, mirroring the original test set structure.
69
+ # The public part (input features, without labels)
70
+ test_val_without_labels = test_val_with_labels[["id", "comment_text"]].copy()
71
+
72
+ # The private part (ground truth labels for the validation set)
73
+ answers_val = test_val_with_labels[["id"] + CLASSES].copy()
74
+
75
+ # 4. Create a sample submission file for the new validation set,
76
+ # mirroring the original sample submission format.
77
+ sample_submission_val = test_val_without_labels[["id"]].copy()
78
+ sample_submission_val[CLASSES] = 0.5
79
+
80
+ # 5. Save the new sets to the 'public_val' and 'private_val' directories,
81
+ # using the same filenames as in the original 'public' and 'private' dirs.
82
+ # Save to public_val
83
+ train_val.to_csv(public_val / "train.csv", index=False)
84
+ test_val_without_labels.to_csv(public_val / "test.csv", index=False)
85
+ sample_submission_val.to_csv(public_val / "sample_submission.csv", index=False)
86
+
87
+ # Save to private_val
88
+ answers_val.to_csv(private_val / "test.csv", index=False)
@@ -0,0 +1,153 @@
1
+ from typing import Tuple
2
+
3
+ from pandas import DataFrame
4
+ from sklearn.metrics import roc_auc_score
5
+
6
+ from mlebench.grade_helpers import InvalidSubmissionError
7
+
8
+
9
+ def _filter_for_subgroup(
10
+ answers: DataFrame, submission: DataFrame, identity_column: str, target_value: int = 1
11
+ ) -> Tuple[DataFrame, DataFrame]:
12
+ """
13
+ Restricts the data to only the examples that mention the specific identity subgroup.
14
+ A low value in this metric means the model does a poor job of distinguishing between toxic and non-toxic comments that mention the identity.
15
+ """
16
+
17
+ filtered_answers = answers[answers[identity_column] == target_value]
18
+ filtered_submission = submission[submission["id"].isin(filtered_answers["id"])]
19
+ return filtered_answers, filtered_submission
20
+
21
+
22
+ def _filter_for_bpsn(
23
+ answers: DataFrame, submission: DataFrame, identity_column: str, target_value: int = 1
24
+ ) -> Tuple[DataFrame, DataFrame]:
25
+ """
26
+ Filter for BPSN (Background Positive, Subgroup Negative) AUC
27
+ Restricts the data to only the non-toxic examples that mention the identity and the toxic examples that do not.
28
+ A low value in this metric means that the model confuses non-toxic examples that mention the identity with toxic examples that do not, likely meaning that the model predicts higher toxicity scores than it should for non-toxic examples mentioning the identity.
29
+ """
30
+ filtered_answers = answers[
31
+ ((answers["toxicity"] == 0) & (answers[identity_column] == target_value))
32
+ | ((answers["toxicity"] == 1) & (answers[identity_column] != target_value))
33
+ ]
34
+ filtered_submission = submission[submission["id"].isin(filtered_answers["id"])]
35
+ return filtered_answers, filtered_submission
36
+
37
+
38
+ def _filter_for_bnsp(
39
+ answers: DataFrame, submission: DataFrame, identity_column: str, target_value: int = 1
40
+ ) -> Tuple[DataFrame, DataFrame]:
41
+ """
42
+ Filter for BNSP (Background Negative, Subgroup Positive) AUC
43
+ Restricts the data to only the toxic examples that mention the identity and the non-toxic examples that do not.
44
+ A low value in this metric means that the model confuses toxic examples that mention the identity with non-toxic examples that do not, likely meaning that the model predicts lower toxicity scores than it should for toxic examples mentioning the identity.
45
+ """
46
+ filtered_answers = answers[
47
+ ((answers["toxicity"] == 1) & (answers[identity_column] == target_value))
48
+ | ((answers["toxicity"] == 0) & (answers[identity_column] != target_value))
49
+ ]
50
+ filtered_submission = submission[submission["id"].isin(filtered_answers["id"])]
51
+ return filtered_answers, filtered_submission
52
+
53
+
54
+ def _generalized_mean(scores: list, p: int, identity_columns: list[str]) -> float:
55
+ return (sum([s**p for s in scores]) / len(identity_columns)) ** (1 / p)
56
+
57
+
58
+ def prepare_for_metric(
59
+ submission: DataFrame, answers: DataFrame, identity_columns: list[str]
60
+ ) -> Tuple[DataFrame, DataFrame]:
61
+
62
+ # Ensure that the submission DataFrame contains the necessary columns
63
+ if "id" not in submission.columns:
64
+ raise InvalidSubmissionError("Submission DataFrame must have an 'id' column.")
65
+ if (
66
+ "prediction" not in submission.columns and "toxicity" not in submission.columns
67
+ ): # Allow for the submission to be the CSV of answers, which has a "toxicity" column instead of a "prediction" column
68
+ raise InvalidSubmissionError("Submission DataFrame must have a 'prediction' column.")
69
+
70
+ # Ensure that the answers DataFrame contains the necessary columns
71
+ assert "id" in answers.columns, "Answers DataFrame must have an 'id' column."
72
+ assert "toxicity" in answers.columns, "Answers DataFrame must have a 'toxicity' column."
73
+
74
+ submission = submission.sort_values("id")
75
+ answers = answers.sort_values("id")
76
+
77
+ if (submission["id"].values != answers["id"].values).any():
78
+ raise InvalidSubmissionError("IDs in the submission and answers DataFrames do not match.")
79
+
80
+ # Ensure that all identity columns are present in the answers DataFrame
81
+ for identity_column in identity_columns:
82
+ assert (
83
+ identity_column in answers.columns
84
+ ), f"Answers DataFrame must have an '{identity_column}' column."
85
+
86
+ submission.rename(columns={"prediction": "toxicity"}, inplace=True)
87
+
88
+ # Convert continuous toxicity scores to binary toxicity labels
89
+ submission["toxicity"] = submission["toxicity"].apply(lambda x: 1 if x >= 0.5 else 0)
90
+ answers["toxicity"] = answers["toxicity"].apply(lambda x: 1 if x >= 0.5 else 0)
91
+
92
+ # Convert continuous identity columns to binary identity columns
93
+ for identity_column in identity_columns:
94
+ answers[identity_column] = answers[identity_column].apply(lambda x: 1 if x >= 0.5 else 0)
95
+
96
+ return submission, answers
97
+
98
+
99
+ def grade(
100
+ submission: DataFrame,
101
+ answers: DataFrame,
102
+ identity_columns: list[str] = [
103
+ "male",
104
+ "female",
105
+ "homosexual_gay_or_lesbian",
106
+ "christian",
107
+ "jewish",
108
+ "muslim",
109
+ "black",
110
+ "white",
111
+ "psychiatric_or_mental_illness",
112
+ ],
113
+ p: int = -5,
114
+ w: float = 0.25,
115
+ ) -> float:
116
+ """
117
+ Computes the custom metric for Jigsaw Unintended Bias in Toxicity Classification.
118
+ See https://www.kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification/overview/evaluation for details on this metric.
119
+ """
120
+
121
+ submission, answers = prepare_for_metric(submission, answers, identity_columns)
122
+
123
+ overall_auc = roc_auc_score(answers["toxicity"], submission["toxicity"])
124
+
125
+ # To measure unintended bias, we again calculate the ROC-AUC, this time on three specific subsets of the test set for each identity, each capturing a different aspect of unintended bias.
126
+ bias_aucs = []
127
+ for identity_column in identity_columns:
128
+ subgroup_answers, subgroup_submission = _filter_for_subgroup(
129
+ answers, submission, identity_column
130
+ )
131
+ bpsn_answers, bpsn_submission = _filter_for_bpsn(answers, submission, identity_column)
132
+ bnsp_answers, bnsp_submission = _filter_for_bnsp(answers, submission, identity_column)
133
+
134
+ subgroup_aucs = roc_auc_score(subgroup_answers["toxicity"], subgroup_submission["toxicity"])
135
+ bpsn_aucs = roc_auc_score(bpsn_answers["toxicity"], bpsn_submission["toxicity"])
136
+ bnsp_aucs = roc_auc_score(bnsp_answers["toxicity"], bnsp_submission["toxicity"])
137
+
138
+ bias_aucs.append([subgroup_aucs, bpsn_aucs, bnsp_aucs])
139
+
140
+ # Calculate generalized mean for each identity column
141
+ generalized_means = [
142
+ _generalized_mean([bias_auc[i] for bias_auc in bias_aucs], p, identity_columns)
143
+ for i in range(3) # there are 3 bias metrics
144
+ ]
145
+
146
+ # Combine the overall AUC with the generalized mean of the Bias AUCs
147
+ weighted_overall_auc = w * overall_auc
148
+ weighted_generalized_means = sum(
149
+ [w * generalized_means[i] for i in range(len(generalized_means))]
150
+ )
151
+ final_score = weighted_overall_auc + weighted_generalized_means
152
+
153
+ return final_score
@@ -0,0 +1,36 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from mlebench.utils import read_csv
5
+
6
+
7
+ def prepare(raw: Path, public: Path, private: Path):
8
+ # Create test and test without labels
9
+ test = read_csv(raw / "test_private_expanded.csv")
10
+ test_without_labels = test[["id", "comment_text"]]
11
+
12
+ # Copy over files
13
+ shutil.copy(raw / "test_private_expanded.csv", private / "test.csv")
14
+ shutil.copy(raw / "train.csv", public / "train.csv")
15
+ test_without_labels.to_csv(public / "test.csv", index=False)
16
+
17
+ # Create a sample submission file
18
+ submission_df = test_without_labels.copy()
19
+ submission_df = submission_df.drop(columns=["comment_text"])
20
+ submission_df["prediction"] = 0.0
21
+ submission_df.to_csv(public / "sample_submission.csv", index=False)
22
+
23
+ # Checks
24
+ assert test.shape == (
25
+ len(test),
26
+ 45,
27
+ ), "test.csv should have 45 columns as per raw data"
28
+ assert test_without_labels.shape == (
29
+ len(test_without_labels),
30
+ 2,
31
+ ), "test.csv should have 2 columns: id, comment_text"
32
+
33
+ assert submission_df.shape == (
34
+ len(test),
35
+ 2,
36
+ ), "sample_submission.csv should have 2 columns: id, prediction"
@@ -0,0 +1,117 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ # New import required for the data splitting
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import read_csv
8
+
9
+
10
+ def prepare(raw: Path, public: Path, private: Path):
11
+ """
12
+ Prepares the data for the competition.
13
+
14
+ This script performs two main operations:
15
+ 1. Processes the original raw data into `public` and `private` directories
16
+ for the official test set, ensuring the output is identical to the
17
+ original script's output.
18
+ 2. Creates a new validation split from the original training data. It splits
19
+ the original `train.csv` into a new, smaller training set (`train_val`)
20
+ and a validation set (`test_val`). The results are saved in parallel
21
+ `public_val` and `private_val` directories with a mirrored file structure.
22
+ """
23
+ # --- Part 1: Original Data Preparation (UNCHANGED OUTPUT) ---
24
+ # This block ensures the 'public' and 'private' directories are populated
25
+ # exactly as they were in the original script.
26
+
27
+ # Ensure original output directories exist
28
+ public.mkdir(parents=True, exist_ok=True)
29
+ private.mkdir(parents=True, exist_ok=True)
30
+
31
+ # Create test and test without labels
32
+ test = read_csv(raw / "test_private_expanded.csv")
33
+ test_without_labels = test[["id", "comment_text"]]
34
+
35
+ # Copy over files
36
+ shutil.copy(raw / "test_private_expanded.csv", private / "test.csv")
37
+ shutil.copy(raw / "train.csv", public / "train.csv")
38
+ test_without_labels.to_csv(public / "test.csv", index=False)
39
+
40
+ # Create a sample submission file
41
+ submission_df = test_without_labels.copy()
42
+ submission_df = submission_df.drop(columns=["comment_text"])
43
+ submission_df["prediction"] = 0.0
44
+ submission_df.to_csv(public / "sample_submission.csv", index=False)
45
+
46
+ # Checks
47
+ assert test.shape == (
48
+ len(test),
49
+ 45,
50
+ ), "test.csv should have 45 columns as per raw data"
51
+ assert test_without_labels.shape == (
52
+ len(test_without_labels),
53
+ 2,
54
+ ), "test.csv should have 2 columns: id, comment_text"
55
+
56
+ assert submission_df.shape == (
57
+ len(test),
58
+ 2,
59
+ ), "sample_submission.csv should have 2 columns: id, prediction"
60
+
61
+ # --- Part 2: New Validation Set Creation ---
62
+ # This block creates a new split from the original training data to form
63
+ # a new, smaller training set and a validation set. Outputs are saved
64
+ # to 'public_val' and 'private_val' directories.
65
+
66
+ # Define and create the new parallel directories for the validation split
67
+ public_val = public.parent / "public_val"
68
+ private_val = private.parent / "private_val"
69
+ public_val.mkdir(parents=True, exist_ok=True)
70
+ private_val.mkdir(parents=True, exist_ok=True)
71
+
72
+ # Load the full original training data, which will be split
73
+ full_train_df = read_csv(raw / "train.csv")
74
+
75
+ # The size of the new validation set ('test_val') should be the same
76
+ # as the size of the original test set to replicate the split ratio.
77
+ test_set_size = len(test)
78
+
79
+ # Split the full training data into a new training and validation set.
80
+ # A fixed random_state is used to ensure the split is deterministic.
81
+ train_val, test_val = train_test_split(
82
+ full_train_df, test_size=test_set_size, random_state=42
83
+ )
84
+
85
+ # Create the public version of the validation set (without labels)
86
+ test_val_without_labels = test_val[["id", "comment_text"]]
87
+
88
+ # Save the new split data, mirroring the original file structure and names.
89
+ # private_val contains the ground truth for the new validation set
90
+ test_val.to_csv(private_val / "test.csv", index=False)
91
+
92
+ # public_val contains the new smaller training set and the unlabeled validation set
93
+ train_val.to_csv(public_val / "train.csv", index=False)
94
+ test_val_without_labels.to_csv(public_val / "test.csv", index=False)
95
+
96
+ # Create a sample submission file for the new validation set
97
+ submission_df_val = test_val_without_labels.copy()
98
+ submission_df_val = submission_df_val.drop(columns=["comment_text"])
99
+ submission_df_val["prediction"] = 0.0
100
+ submission_df_val.to_csv(public_val / "sample_submission.csv", index=False)
101
+
102
+ # Add checks for the new validation split dataframes
103
+ assert test_val.shape == (
104
+ len(test_val),
105
+ full_train_df.shape[1],
106
+ ), f"private_val/test.csv should have {full_train_df.shape[1]} columns"
107
+ assert test_val_without_labels.shape == (
108
+ len(test_val_without_labels),
109
+ 2,
110
+ ), "public_val/test.csv should have 2 columns: id, comment_text"
111
+ assert submission_df_val.shape == (
112
+ len(test_val),
113
+ 2,
114
+ ), "public_val/sample_submission.csv should have 2 columns: id, prediction"
115
+ assert len(train_val) + len(test_val) == len(
116
+ full_train_df
117
+ ), "Split should conserve all data from original train set"
@@ -0,0 +1,58 @@
1
+ import pandas as pd
2
+
3
+ from mlebench.grade_helpers import InvalidSubmissionError
4
+
5
+ from .kuzushiji_metric import kuzushiji_f1
6
+
7
+
8
+ def prepare_for_metric(
9
+ submission: pd.DataFrame, answers: pd.DataFrame
10
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
11
+ """
12
+ No changes needed, since kuzushiji_metric.kuzushiji_f1 expects the submission and answers
13
+ exactly as they are provided. In this function we just check for invalid submissions.
14
+ """
15
+ if len(submission) != len(answers):
16
+ raise InvalidSubmissionError(
17
+ f"Submission has {len(submission)} rows, but expected {len(answers)} rows."
18
+ )
19
+ if not {"image_id", "labels"}.issubset(submission.columns):
20
+ raise InvalidSubmissionError("Submission must have columns 'image_id' and 'labels'.")
21
+
22
+ submission = submission.sort_values("image_id").reset_index(drop=True)
23
+ answers = answers.sort_values("image_id").reset_index(drop=True)
24
+
25
+ if not all(submission["image_id"] == answers["image_id"]):
26
+ raise InvalidSubmissionError("Submission image_id codes don't match solution.")
27
+
28
+ # Check that labels have the correct format
29
+ for pred in submission["labels"]:
30
+ # Skip empty predictions
31
+ if pd.isna(pred):
32
+ continue
33
+
34
+ # Pred is a string of the form "label1 X1 Y1 label2 X2 Y2 ..."
35
+ # where label must be a str and X, Y are floats
36
+ preds = pred.split(" ")
37
+ if len(preds) % 3 != 0:
38
+ raise InvalidSubmissionError(f"Malformed prediction string: {pred}")
39
+ # Check that the X and Y values can be converted to floats
40
+ if not all(isinstance(float(preds[i]), float) for i in range(1, len(preds), 3)) or not all(
41
+ isinstance(float(preds[i]), float) for i in range(2, len(preds), 3)
42
+ ):
43
+ raise InvalidSubmissionError(f"X and Y values must be floats: {pred}")
44
+
45
+ return submission, answers
46
+
47
+
48
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
49
+ """
50
+ Submissions are evaluated on a modified version of the F1 Score. To score a true positive,
51
+ you must provide center point coordinates that are within the ground truth bounding box and
52
+ a matching label. The ground truth bounding boxes are defined in the format {label X Y Width Height},
53
+ so if the ground truth label is U+003F 1 1 10 10 then a prediction of U+003F 3 3 would pass.
54
+
55
+ See https://www.kaggle.com/competitions/kuzushiji-recognition/overview for more details.
56
+ """
57
+ submission, answers = prepare_for_metric(submission, answers)
58
+ return float(kuzushiji_f1(sub=submission, solution=answers))
@@ -0,0 +1,118 @@
1
+ # ADAPTED FROM: https://gist.github.com/SohierDane/a90ef46d79808fe3afc70c80bae45972
2
+ """
3
+ Python equivalent of the Kuzushiji competition metric (https://www.kaggle.com/c/kuzushiji-recognition/)
4
+ Kaggle's backend uses a C# implementation of the same metric. This version is
5
+ provided for convenience only; in the event of any discrepancies the C# implementation
6
+ is the master version.
7
+
8
+ Tested on Python 3.6 with numpy 1.16.4 and pandas 0.24.2.
9
+
10
+ Update 2024/06/05: Also tested on Python 3.12 with numpy 1.26.4 and pandas 2.2.2.
11
+ """
12
+
13
+
14
+ import multiprocessing
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+
20
+ def score_page(preds, truth):
21
+ """
22
+ Scores a single page.
23
+ Args:
24
+ preds: prediction string of labels and center points.
25
+ truth: ground truth string of labels and bounding boxes.
26
+ Returns:
27
+ True/false positive and false negative counts for the page
28
+ """
29
+ tp = 0
30
+ fp = 0
31
+ fn = 0
32
+
33
+ truth_indices = {"label": 0, "X": 1, "Y": 2, "Width": 3, "Height": 4}
34
+ preds_indices = {"label": 0, "X": 1, "Y": 2}
35
+
36
+ if pd.isna(truth) and pd.isna(preds):
37
+ return {"tp": tp, "fp": fp, "fn": fn}
38
+
39
+ if pd.isna(truth):
40
+ fp += len(preds.split(" ")) // len(preds_indices)
41
+ return {"tp": tp, "fp": fp, "fn": fn}
42
+
43
+ if pd.isna(preds):
44
+ fn += len(truth.split(" ")) // len(truth_indices)
45
+ return {"tp": tp, "fp": fp, "fn": fn}
46
+
47
+ truth = truth.split(" ")
48
+ if len(truth) % len(truth_indices) != 0:
49
+ raise ValueError("Malformed solution string")
50
+ truth_label = np.array(truth[truth_indices["label"] :: len(truth_indices)])
51
+ truth_xmin = np.array(truth[truth_indices["X"] :: len(truth_indices)]).astype(float)
52
+ truth_ymin = np.array(truth[truth_indices["Y"] :: len(truth_indices)]).astype(float)
53
+ truth_xmax = truth_xmin + np.array(truth[truth_indices["Width"] :: len(truth_indices)]).astype(
54
+ float
55
+ )
56
+ truth_ymax = truth_ymin + np.array(truth[truth_indices["Height"] :: len(truth_indices)]).astype(
57
+ float
58
+ )
59
+
60
+ preds = preds.split(" ")
61
+ if len(preds) % len(preds_indices) != 0:
62
+ raise ValueError("Malformed prediction string")
63
+ preds_label = np.array(preds[preds_indices["label"] :: len(preds_indices)])
64
+ preds_x = np.array(preds[preds_indices["X"] :: len(preds_indices)]).astype(float)
65
+ preds_y = np.array(preds[preds_indices["Y"] :: len(preds_indices)]).astype(float)
66
+ preds_unused = np.ones(len(preds_label)).astype(bool)
67
+
68
+ for xmin, xmax, ymin, ymax, label in zip(
69
+ truth_xmin, truth_xmax, truth_ymin, truth_ymax, truth_label
70
+ ):
71
+ # Matching = point inside box & character same & prediction not already used
72
+ matching = (
73
+ (xmin < preds_x)
74
+ & (xmax > preds_x)
75
+ & (ymin < preds_y)
76
+ & (ymax > preds_y)
77
+ & (preds_label == label)
78
+ & preds_unused
79
+ )
80
+ if matching.sum() == 0:
81
+ fn += 1
82
+ else:
83
+ tp += 1
84
+ preds_unused[np.argmax(matching)] = False
85
+ fp += preds_unused.sum()
86
+ return {"tp": tp, "fp": fp, "fn": fn}
87
+
88
+
89
+ def kuzushiji_f1(sub, solution):
90
+ """
91
+ Calculates the competition metric.
92
+ Args:
93
+ sub: submissions, as a Pandas dataframe
94
+ solution: solution, as a Pandas dataframe
95
+ Returns:
96
+ f1 score
97
+ """
98
+ if not all(sub["image_id"].values == solution["image_id"].values):
99
+ raise ValueError("Submission image id codes don't match solution")
100
+
101
+ pool = multiprocessing.Pool()
102
+ results = pool.starmap(score_page, zip(sub["labels"].values, solution["labels"].values))
103
+ pool.close()
104
+ pool.join()
105
+
106
+ tp = sum([x["tp"] for x in results])
107
+ fp = sum([x["fp"] for x in results])
108
+ fn = sum([x["fn"] for x in results])
109
+
110
+ if (tp + fp) == 0 or (tp + fn) == 0:
111
+ return 0
112
+ precision = tp / (tp + fp)
113
+ recall = tp / (tp + fn)
114
+ if precision > 0 and recall > 0:
115
+ f1 = (2 * precision * recall) / (precision + recall)
116
+ else:
117
+ f1 = 0
118
+ return f1