dslighting 1.7.1__py3-none-any.whl → 1.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/METADATA +3 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,62 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from tqdm.auto import tqdm
4
+
5
+ from mlebench.competitions.utils import rles_to_masks
6
+ from mlebench.grade_helpers import InvalidSubmissionError
7
+ from mlebench.metrics import dice_coefficient
8
+
9
+
10
+ def prepare_for_metric(
11
+ submission: pd.DataFrame, answers: pd.DataFrame
12
+ ) -> tuple[list[np.ndarray], list[np.ndarray]]:
13
+ id_col = "id"
14
+
15
+ if id_col not in submission.columns:
16
+ raise InvalidSubmissionError(f"Submission is missing the '{id_col}' column")
17
+ if "predicted" not in submission.columns:
18
+ raise InvalidSubmissionError("Submission is missing the 'predicted' column")
19
+
20
+ assert id_col in answers.columns, f"Answers is missing the '{id_col}' column"
21
+ assert "encoding" in answers.columns, "Answers is missing the 'encoded' column"
22
+ assert "width_pixels" in answers.columns, "Answers is missing the 'width_pixels' column"
23
+ assert "height_pixels" in answers.columns, "Answers is missing the 'height_pixels' column"
24
+
25
+ if len(submission) != len(answers):
26
+ raise InvalidSubmissionError(
27
+ f"Submission has {len(submission)} rows while answers has {len(answers)} rows"
28
+ )
29
+
30
+ if set(submission[id_col]) != set(answers[id_col]):
31
+ raise InvalidSubmissionError(f"Submission and answers have different {id_col} values")
32
+
33
+ submission = submission.sort_values(id_col).reset_index(drop=True)
34
+ answers = answers.sort_values(id_col).reset_index(drop=True)
35
+
36
+ # pandas reads empty cells as nan, we mark them as empty RLE strings
37
+ submission["predicted"] = submission["predicted"].fillna("")
38
+ submission["answers"] = submission["predicted"].fillna("")
39
+
40
+ image_heights = answers["height_pixels"].tolist()
41
+ image_widths = answers["width_pixels"].tolist()
42
+
43
+ prediced_masks = rles_to_masks(submission["predicted"].to_list(), image_heights, image_widths)
44
+ true_masks = rles_to_masks(answers["encoding"], image_heights, image_widths)
45
+
46
+ return prediced_masks, true_masks
47
+
48
+
49
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
50
+ """
51
+ Computes the mean dice coefficient for the submission and answers.
52
+ """
53
+ predicted_masks, true_masks = prepare_for_metric(submission, answers)
54
+
55
+ return np.mean(
56
+ [
57
+ dice_coefficient(predicted_mask, true_mask, both_empty_value=1.0)
58
+ for predicted_mask, true_mask in tqdm(
59
+ zip(predicted_masks, true_masks), total=len(predicted_masks)
60
+ )
61
+ ]
62
+ )
@@ -0,0 +1,108 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from sklearn.model_selection import train_test_split
5
+ from tqdm.auto import tqdm
6
+
7
+ from mlebench.utils import read_csv
8
+
9
+
10
+ def prepare(raw: Path, public: Path, private: Path):
11
+
12
+ old_train = read_csv(raw / "train.csv")
13
+ old_dataset_info = read_csv(raw / "HuBMAP-20-dataset_information.csv")
14
+
15
+ new_train, new_test = train_test_split(old_train, train_size=12, test_size=3, random_state=0)
16
+ # dataset info doesnt have an id column, so quickly add it so that we can filter out old test
17
+ old_dataset_info["id"] = old_dataset_info["image_file"].str.replace(".tiff", "")
18
+ dataset_info = old_dataset_info[old_dataset_info["id"].isin(old_train["id"])]
19
+ # put height and width in new_test, for grading
20
+ new_test = new_test.merge(dataset_info[["id", "width_pixels", "height_pixels"]], on="id")
21
+ dataset_info = dataset_info.drop(columns=["id"], inplace=False)
22
+
23
+ dataset_info.to_csv(public / "HuBMAP-20-dataset_information.csv", index=False)
24
+ new_train.to_csv(public / "train.csv", index=False)
25
+ new_test.to_csv(private / "test.csv", index=False)
26
+
27
+ sample_submission = new_test[["id"]].copy()
28
+ sample_submission["predicted"] = ""
29
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
30
+
31
+ # basically the same as new_test but with a different column name
32
+ gold_submission = sample_submission.copy()
33
+ gold_submission["predicted"] = new_test["encoding"]
34
+ gold_submission.to_csv(private / "gold_submission.csv", index=False)
35
+
36
+ (public / "train").mkdir(parents=True, exist_ok=True)
37
+ for image_id in tqdm(new_train["id"], desc="Copying train images"):
38
+ shutil.copy(raw / "train" / f"{image_id}.tiff", public / "train" / f"{image_id}.tiff")
39
+ shutil.copy(raw / "train" / f"{image_id}.json", public / "train" / f"{image_id}.json")
40
+ shutil.copy(
41
+ raw / "train" / f"{image_id}-anatomical-structure.json",
42
+ public / "train" / f"{image_id}-anatomical-structure.json",
43
+ )
44
+
45
+ (public / "test").mkdir(parents=True, exist_ok=True)
46
+ for image_id in tqdm(new_test["id"], desc="Copying test images"):
47
+ shutil.copy(raw / "train" / f"{image_id}.tiff", public / "test" / f"{image_id}.tiff")
48
+ shutil.copy(raw / "train" / f"{image_id}.json", public / "test" / f"{image_id}.json")
49
+ shutil.copy(
50
+ raw / "train" / f"{image_id}-anatomical-structure.json",
51
+ public / "test" / f"{image_id}-anatomical-structure.json",
52
+ )
53
+
54
+ # for some reason sample_submission.csv is also in test/
55
+ shutil.copy(public / "sample_submission.csv", public / "test" / "sample_submission.csv")
56
+
57
+ # Checks
58
+ assert len(new_train) + len(new_test) == len(
59
+ old_train
60
+ ), "Length of new_train and new_test should equal length of old_train"
61
+
62
+ assert new_train.columns.to_list() == [
63
+ "id",
64
+ "encoding",
65
+ ], "Public train set should have 2 columns, called 'id' and 'encoding'"
66
+ assert new_test.columns.to_list() == [
67
+ "id",
68
+ "encoding",
69
+ "width_pixels",
70
+ "height_pixels",
71
+ ], "Private test set should have 2 columns called 'id' and 'encoding'"
72
+
73
+ assert len(sample_submission) == len(new_test), "Sample submission length should match test set"
74
+ assert sample_submission.columns.to_list() == [
75
+ "id",
76
+ "predicted",
77
+ ], "Sample submissions should have two columns, 'id' and 'predicted'"
78
+
79
+ assert len(gold_submission) == len(new_test), "Gold submission length should match test set"
80
+ assert gold_submission.columns.to_list() == [
81
+ "id",
82
+ "predicted",
83
+ ], "Gold submissions should have two columns, 'id' and 'predicted'"
84
+
85
+ assert gold_submission["predicted"].equals(
86
+ new_test["encoding"]
87
+ ), "Gold submission should match private test set"
88
+
89
+ # assert no overlap in ids between train and test
90
+ assert set(new_train["id"]).isdisjoint(
91
+ set(new_test["id"])
92
+ ), "Train and test ids should not overlap"
93
+
94
+ # check that the images are copied correctly
95
+ assert len(list((public / "train").glob("*.tiff"))) == len(
96
+ new_train
97
+ ), "Missing train tiff files"
98
+ assert len(list((public / "train").glob("*-anatomical-structure.json"))) == len(
99
+ new_train
100
+ ), "Missing train structure json files"
101
+ assert (
102
+ len(list((public / "train").glob("*.json"))) == len(new_train) * 2
103
+ ), "Missing train json files"
104
+
105
+ assert len(list((public / "test").glob("*.tiff"))) == len(new_test), "Missing test tiff files"
106
+ assert len(list((public / "test").glob("*-anatomical-structure.json"))) == len(
107
+ new_test
108
+ ), "Missing test structure json files"
@@ -0,0 +1,153 @@
1
+ import shutil
2
+ from pathlib import Path
3
+ import pandas as pd
4
+
5
+ from sklearn.model_selection import train_test_split
6
+ from tqdm.auto import tqdm
7
+
8
+ from mlebench.utils import read_csv
9
+
10
+
11
+ def _create_split_files(
12
+ train_df: pd.DataFrame,
13
+ test_df: pd.DataFrame,
14
+ full_dataset_info: pd.DataFrame,
15
+ raw_path: Path,
16
+ public_path: Path,
17
+ private_path: Path,
18
+ ):
19
+ """
20
+ Helper function to generate all required files for a given train/test split.
21
+
22
+ This function encapsulates the logic of creating CSVs, sample submissions,
23
+ and copying image files to ensure that the process is identical for both
24
+ the main split (public/private) and the validation split (public_val/private_val).
25
+ """
26
+ # Create output directories
27
+ public_path.mkdir(parents=True, exist_ok=True)
28
+ private_path.mkdir(parents=True, exist_ok=True)
29
+ (public_path / "train").mkdir(parents=True, exist_ok=True)
30
+ (public_path / "test").mkdir(parents=True, exist_ok=True)
31
+
32
+ # Process and save data files
33
+ dataset_info = full_dataset_info.drop(columns=["id"], inplace=False)
34
+ dataset_info.to_csv(public_path / "HuBMAP-20-dataset_information.csv", index=False)
35
+
36
+ train_df.to_csv(public_path / "train.csv", index=False)
37
+
38
+ # Put height and width in test_df, for grading
39
+ private_test_df = test_df.merge(full_dataset_info[["id", "width_pixels", "height_pixels"]], on="id")
40
+ private_test_df.to_csv(private_path / "test.csv", index=False)
41
+
42
+ sample_submission = private_test_df[["id"]].copy()
43
+ sample_submission["predicted"] = ""
44
+ sample_submission.to_csv(public_path / "sample_submission.csv", index=False)
45
+ # for some reason sample_submission.csv is also in test/
46
+ shutil.copy(public_path / "sample_submission.csv", public_path / "test" / "sample_submission.csv")
47
+
48
+ # basically the same as private_test_df but with a different column name
49
+ gold_submission = sample_submission.copy()
50
+ gold_submission["predicted"] = private_test_df["encoding"]
51
+ gold_submission.to_csv(private_path / "gold_submission.csv", index=False)
52
+
53
+ # Copy image files
54
+ for image_id in tqdm(train_df["id"], desc=f"Copying train images to {public_path.name}"):
55
+ shutil.copy(raw_path / "train" / f"{image_id}.tiff", public_path / "train" / f"{image_id}.tiff")
56
+ shutil.copy(raw_path / "train" / f"{image_id}.json", public_path / "train" / f"{image_id}.json")
57
+ shutil.copy(
58
+ raw_path / "train" / f"{image_id}-anatomical-structure.json",
59
+ public_path / "train" / f"{image_id}-anatomical-structure.json",
60
+ )
61
+
62
+ for image_id in tqdm(private_test_df["id"], desc=f"Copying test images to {public_path.name}"):
63
+ shutil.copy(raw_path / "train" / f"{image_id}.tiff", public_path / "test" / f"{image_id}.tiff")
64
+ shutil.copy(raw_path / "train" / f"{image_id}.json", public_path / "test" / f"{image_id}.json")
65
+ shutil.copy(
66
+ raw_path / "train" / f"{image_id}-anatomical-structure.json",
67
+ public_path / "test" / f"{image_id}-anatomical-structure.json",
68
+ )
69
+
70
+ # Checks
71
+ assert train_df.columns.to_list() == [
72
+ "id",
73
+ "encoding",
74
+ ], f"Public train set in {public_path.name} should have 2 columns, called 'id' and 'encoding'"
75
+ assert private_test_df.columns.to_list() == [
76
+ "id",
77
+ "encoding",
78
+ "width_pixels",
79
+ "height_pixels",
80
+ ], f"Private test set in {private_path.name} should have 4 columns"
81
+
82
+ assert len(sample_submission) == len(private_test_df), "Sample submission length should match test set"
83
+ assert sample_submission.columns.to_list() == [
84
+ "id",
85
+ "predicted",
86
+ ], "Sample submissions should have two columns, 'id' and 'predicted'"
87
+
88
+ assert len(gold_submission) == len(private_test_df), "Gold submission length should match test set"
89
+ assert gold_submission.columns.to_list() == [
90
+ "id",
91
+ "predicted",
92
+ ], "Gold submissions should have two columns, 'id' and 'predicted'"
93
+
94
+ assert gold_submission["predicted"].equals(
95
+ private_test_df["encoding"]
96
+ ), "Gold submission should match private test set"
97
+
98
+ assert set(train_df["id"]).isdisjoint(
99
+ set(private_test_df["id"])
100
+ ), "Train and test ids should not overlap"
101
+
102
+ assert len(list((public_path / "train").glob("*.tiff"))) == len(
103
+ train_df
104
+ ), f"Missing train tiff files in {public_path.name}"
105
+ assert len(list((public_path / "train").glob("*-anatomical-structure.json"))) == len(
106
+ train_df
107
+ ), f"Missing train structure json files in {public_path.name}"
108
+ assert (
109
+ len(list((public_path / "train").glob("*.json"))) == len(train_df) * 2
110
+ ), f"Missing train json files in {public_path.name}"
111
+
112
+ assert len(list((public_path / "test").glob("*.tiff"))) == len(private_test_df), f"Missing test tiff files in {public_path.name}"
113
+ assert len(list((public_path / "test").glob("*-anatomical-structure.json"))) == len(
114
+ private_test_df
115
+ ), f"Missing test structure json files in {public_path.name}"
116
+
117
+
118
+ def prepare(raw: Path, public: Path, private: Path):
119
+
120
+ old_train = read_csv(raw / "train.csv")
121
+ old_dataset_info = read_csv(raw / "HuBMAP-20-dataset_information.csv")
122
+
123
+ # --- First Split: Create the main train and test sets ---
124
+ # This split is identical to the original script to ensure public/private are not changed.
125
+ new_train, new_test = train_test_split(old_train, train_size=12, test_size=3, random_state=0)
126
+
127
+ # Process dataset_info once. This info is based on the full original train set
128
+ # and will be used for both the main and validation splits.
129
+ old_dataset_info["id"] = old_dataset_info["image_file"].str.replace(".tiff", "")
130
+ dataset_info = old_dataset_info[old_dataset_info["id"].isin(old_train["id"])]
131
+
132
+ # Create the original public and private directories and their contents.
133
+ # The results of this call will be IDENTICAL to the original script's output.
134
+ _create_split_files(new_train, new_test, dataset_info, raw, public, private)
135
+
136
+ # --- Second Split: Create a validation set from the main train set ---
137
+ # This creates a new, smaller training set and a validation set.
138
+ # The outputs are saved to parallel 'public_val' and 'private_val' directories.
139
+ public_val = public.parent / "public_val"
140
+ private_val = private.parent / "private_val"
141
+
142
+ # Split new_train (12 samples) into train_val (9) and test_val (3).
143
+ # This replicates the test set size (3) and random_state (0) from the first split.
144
+ train_val, test_val = train_test_split(new_train, train_size=9, test_size=3, random_state=0)
145
+
146
+ # Create the new validation directories and their contents.
147
+ # The file structure and names inside these directories will mirror the original ones.
148
+ _create_split_files(train_val, test_val, dataset_info, raw, public_val, private_val)
149
+
150
+ # Final check from original script
151
+ assert len(new_train) + len(new_test) == len(
152
+ old_train
153
+ ), "Length of new_train and new_test should equal length of old_train"
@@ -0,0 +1,111 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from mlebench.grade_helpers import InvalidSubmissionError
5
+
6
+
7
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame) -> dict:
8
+ # submission
9
+ if set(submission.columns) != {"azimuth", "zenith", "event_id"}:
10
+ raise InvalidSubmissionError(
11
+ "Submission must contain columns 'azimuth','zenith' and 'event_id'"
12
+ )
13
+
14
+ if not np.issubdtype(submission["azimuth"].dtype, np.number):
15
+ raise InvalidSubmissionError("Azimuth must be a number")
16
+ if not np.issubdtype(submission["zenith"].dtype, np.number):
17
+ raise InvalidSubmissionError("Zenith must be a number")
18
+ if not np.all(np.isfinite(submission["azimuth"])):
19
+ raise InvalidSubmissionError("Azimuth must not be infinite")
20
+ if not np.all(np.isfinite(submission["zenith"])):
21
+ raise InvalidSubmissionError("Zenith must not be infinite")
22
+ if submission["azimuth"].isnull().any():
23
+ raise InvalidSubmissionError("Azimuth must not be NaN")
24
+ if submission["zenith"].isnull().any():
25
+ raise InvalidSubmissionError("Zenith must not be NaN")
26
+
27
+ # answers
28
+ assert set(answers.columns) == {
29
+ "azimuth",
30
+ "zenith",
31
+ "event_id",
32
+ }, "Answers must contain columns 'azimuth','zenith' and 'event_id'"
33
+ assert np.issubdtype(answers["azimuth"].dtype, np.number), "Azimuth must be a number"
34
+ assert np.issubdtype(answers["zenith"].dtype, np.number), "Zenith must be a number"
35
+ assert np.all(np.isfinite(answers["azimuth"])), "Azimuth must not be infinite"
36
+ assert np.all(np.isfinite(answers["zenith"])), "Zenith must not be infinite"
37
+ assert not answers["azimuth"].isnull().any(), "Azimuth must not be NaN"
38
+ assert not answers["zenith"].isnull().any(), "Zenith must not be NaN"
39
+
40
+ # both
41
+ if len(submission) != len(answers):
42
+ raise InvalidSubmissionError("Submission and answers must have the same length")
43
+ if set(submission["event_id"]) != set(answers["event_id"]):
44
+ raise InvalidSubmissionError("Submission and answers must have the same event_ids")
45
+
46
+ # sort values by id so that the order is correct
47
+ submission = submission.sort_values("event_id")
48
+ answers = answers.sort_values("event_id")
49
+
50
+ return {
51
+ "az_true": answers["azimuth"].to_numpy(),
52
+ "zen_true": answers["zenith"].to_numpy(),
53
+ "az_pred": submission["azimuth"].to_numpy(),
54
+ "zen_pred": submission["zenith"].to_numpy(),
55
+ }
56
+
57
+
58
+ # courtesy of notebook from competition host: https://www.kaggle.com/code/sohier/mean-angular-error
59
+ def angular_dist_score(az_true, zen_true, az_pred, zen_pred):
60
+ """
61
+ calculate the MAE of the angular distance between two directions.
62
+ The two vectors are first converted to cartesian unit vectors,
63
+ and then their scalar product is computed, which is equal to
64
+ the cosine of the angle between the two vectors. The inverse
65
+ cosine (arccos) thereof is then the angle between the two input vectors
66
+
67
+ Parameters:
68
+ -----------
69
+
70
+ az_true : float (or array thereof)
71
+ true azimuth value(s) in radian
72
+ zen_true : float (or array thereof)
73
+ true zenith value(s) in radian
74
+ az_pred : float (or array thereof)
75
+ predicted azimuth value(s) in radian
76
+ zen_pred : float (or array thereof)
77
+ predicted zenith value(s) in radian
78
+
79
+ Returns:
80
+ --------
81
+
82
+ dist : float
83
+ mean over the angular distance(s) in radian
84
+ """
85
+
86
+ # pre-compute all sine and cosine values
87
+ sa1 = np.sin(az_true)
88
+ ca1 = np.cos(az_true)
89
+ sz1 = np.sin(zen_true)
90
+ cz1 = np.cos(zen_true)
91
+
92
+ sa2 = np.sin(az_pred)
93
+ ca2 = np.cos(az_pred)
94
+ sz2 = np.sin(zen_pred)
95
+ cz2 = np.cos(zen_pred)
96
+
97
+ # scalar product of the two cartesian vectors (x = sz*ca, y = sz*sa, z = cz)
98
+ scalar_prod = sz1 * sz2 * (ca1 * ca2 + sa1 * sa2) + (cz1 * cz2)
99
+
100
+ # scalar product of two unit vectors is always between -1 and 1, this is against nummerical instability
101
+ # that might otherwise occure from the finite precision of the sine and cosine functions
102
+ scalar_prod = np.clip(scalar_prod, -1, 1)
103
+
104
+ # convert back to an angle (in radian)
105
+ return np.average(np.abs(np.arccos(scalar_prod)))
106
+
107
+
108
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
109
+ ang_dist_score_inputs = prepare_for_metric(submission, answers)
110
+ score = angular_dist_score(**ang_dist_score_inputs)
111
+ return score
@@ -0,0 +1,127 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+ from tqdm.auto import tqdm
7
+
8
+ from mlebench.utils import get_logger
9
+
10
+ logger = get_logger(__name__)
11
+
12
+
13
+ def prepare(raw: Path, public: Path, private: Path):
14
+ DEV = False
15
+
16
+ if DEV:
17
+ batch_cutoff = 66 # 66 instead of 660 when in dev mode
18
+ else:
19
+ batch_cutoff = None
20
+
21
+ logger.info("Loading raw metadata")
22
+ old_train = pd.read_parquet(raw / "train_meta.parquet")
23
+
24
+ # this has batch_id and event_id, we will do a test-train split based on batch_id
25
+ # each batch id is equally sized so we can proceed with a simple split
26
+ batch_ids = old_train["batch_id"].unique()[:batch_cutoff]
27
+
28
+ logger.info("Splitting batches into train and test")
29
+ train_batch_ids, test_batch_ids = train_test_split(batch_ids, test_size=0.1, random_state=0)
30
+
31
+ # new column tracking the split
32
+ old_train["split"] = None
33
+ old_train.loc[old_train["batch_id"].isin(train_batch_ids), "split"] = "train"
34
+ old_train.loc[old_train["batch_id"].isin(test_batch_ids), "split"] = "test"
35
+
36
+ new_train = (
37
+ old_train[old_train["split"] == "train"]
38
+ .drop(columns=["split"])
39
+ .reset_index(drop=True)
40
+ .copy()
41
+ )
42
+ new_test = (
43
+ old_train[old_train["split"] == "test"]
44
+ .drop(columns=["split"])
45
+ .reset_index(drop=True)
46
+ .copy()
47
+ )
48
+
49
+ logger.info("Creating label-less test and sample submission")
50
+ new_test_without_labels = new_test.drop(columns=["azimuth", "zenith"])
51
+
52
+ # match sample submission format
53
+ new_test = new_test[["event_id", "azimuth", "zenith"]]
54
+
55
+ # copy the format as the private test and fill dummy values like kaggle.com
56
+ sample_submission = new_test.copy()
57
+ sample_submission["azimuth"] = 1
58
+ sample_submission["zenith"] = 1
59
+
60
+ logger.info("Saving files")
61
+ # save the prepared tables
62
+ new_train.to_parquet(public / "train_meta.parquet", index=False, engine="fastparquet")
63
+ new_test_without_labels.to_parquet(
64
+ public / "test_meta.parquet", index=False, engine="fastparquet"
65
+ )
66
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
67
+ new_test.to_csv(private / "test.csv", index=False)
68
+
69
+ logger.info("Copying remaining files")
70
+
71
+ # sensor_geometry can be copied as is
72
+ shutil.copy(raw / "sensor_geometry.csv", public / "sensor_geometry.csv")
73
+
74
+ # copy the raw train files to train and test folders respectively
75
+ train_batch_ids = set(train_batch_ids)
76
+ train_dest = public / "train"
77
+ train_dest.mkdir(exist_ok=True, parents=True)
78
+ test_batch_ids = set(test_batch_ids)
79
+ test_dest = public / "test"
80
+ test_dest.mkdir(exist_ok=True, parents=True)
81
+ for batch_file in tqdm(
82
+ sorted((raw / "train").glob("*.parquet")), desc="Copying batch parquet files"
83
+ ):
84
+ batch_id = int(
85
+ batch_file.stem.split("_")[-1]
86
+ ) # i.e. go from e.g. 'train_000.parquet' to '000' to 0
87
+ if batch_id in train_batch_ids:
88
+ shutil.copy(batch_file, train_dest / batch_file.name)
89
+ elif batch_id in test_batch_ids:
90
+ shutil.copy(batch_file, test_dest / batch_file.name)
91
+
92
+ logger.info("Running checks")
93
+ # Asserts
94
+ assert len(list(public.glob("train/*.parquet"))) == len(
95
+ train_batch_ids
96
+ ), "Not all train batches copied"
97
+ assert len(list(public.glob("test/*.parquet"))) == len(
98
+ test_batch_ids
99
+ ), "Not all test batches copied"
100
+ assert len(train_batch_ids) + len(test_batch_ids) == len(
101
+ batch_ids
102
+ ), "Something went wrong with splitting the batches"
103
+
104
+ assert len(new_train) + len(new_test) == len(
105
+ old_train[old_train["split"].notnull()]
106
+ ), "Expected train + test to equal the original data"
107
+ assert len(sample_submission) == len(
108
+ new_test
109
+ ), "Length mismatch between private test and sample submission"
110
+
111
+ assert sample_submission.columns.equals(
112
+ new_test.columns
113
+ ), f"Column mismatch between sample_submission and private test"
114
+ assert new_train.columns.equals(
115
+ old_train.drop(columns=["split"]).columns
116
+ ), f"Unexpected columns in train, expected {old_train.columns}, got {new_train.columns}"
117
+ assert new_test_without_labels.columns.equals(
118
+ old_train.drop(columns=["azimuth", "zenith", "split"]).columns
119
+ ), f"Unexpected columns in test, expected {old_train.drop(columns=['azimuth', 'zenith']).columns}, got {new_test_without_labels.columns}"
120
+
121
+ assert (
122
+ len(set(new_train["event_id"]).intersection(set(new_test["event_id"]))) == 0
123
+ ), "Event ids overlap between train and test"
124
+ assert set(new_test["event_id"]) == set(
125
+ sample_submission["event_id"]
126
+ ), "Event ids mismatch between test and sample submission"
127
+ logger.info("Done.")