dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,144 @@
1
+ import json
2
+ import shutil
3
+ from pathlib import Path
4
+ from typing import List, Dict, Any
5
+
6
+ import pandas as pd
7
+ from sklearn.model_selection import train_test_split
8
+
9
+
10
+ def _create_split(
11
+ data_to_split: List[Dict],
12
+ test_size: float,
13
+ test_fields: List[str],
14
+ public_path: Path,
15
+ private_path: Path,
16
+ random_state: int,
17
+ ) -> List[Dict]:
18
+ """
19
+ Helper function to perform a data split and create all required files.
20
+
21
+ Args:
22
+ data_to_split: The list of data samples to be split.
23
+ test_size: The proportion of the dataset to allocate to the test split.
24
+ test_fields: The list of fields to keep in the test set.
25
+ public_path: The directory for public-facing files (train set, unlabeled test set).
26
+ private_path: The directory for private files (test set labels).
27
+ random_state: The seed for the random number generator.
28
+
29
+ Returns:
30
+ The training portion of the split, to be used for subsequent splits if needed.
31
+ """
32
+ # Create output directories if they don't exist
33
+ public_path.mkdir(parents=True, exist_ok=True)
34
+ private_path.mkdir(parents=True, exist_ok=True)
35
+
36
+ # Create train, test from the provided data
37
+ train_set, test_set = train_test_split(
38
+ data_to_split, test_size=test_size, random_state=random_state
39
+ )
40
+
41
+ test_set_without_labels = []
42
+ for sample in test_set:
43
+ # Keep only the fields that should be in the test set
44
+ sample_unlabeled = {key: sample[key] for key in test_fields}
45
+ test_set_without_labels.append(sample_unlabeled)
46
+
47
+ # Save the new train and test splits to the public directory
48
+ with open(public_path / "train.json", "w") as f:
49
+ json.dump(train_set, f, indent=4)
50
+ with open(public_path / "test.json", "w") as f:
51
+ json.dump(test_set_without_labels, f, indent=4)
52
+ # Also save zipped versions
53
+ shutil.make_archive(public_path / "train", "zip", public_path, "train.json")
54
+ shutil.make_archive(public_path / "test", "zip", public_path, "test.json")
55
+
56
+ # Create answers for the private directory
57
+ answers_rows = []
58
+ for sample in test_set:
59
+ answers_rows.append(
60
+ {
61
+ "request_id": sample["request_id"],
62
+ "requester_received_pizza": int(sample["requester_received_pizza"]),
63
+ }
64
+ )
65
+ answers = pd.DataFrame(answers_rows)
66
+ answers.to_csv(private_path / "test.csv", index=False)
67
+
68
+ # Create sample submission for the public directory
69
+ sample_submission = answers.copy()
70
+ sample_submission["requester_received_pizza"] = 0
71
+ sample_submission.to_csv(public_path / "sampleSubmission.csv", index=False)
72
+
73
+ # Perform checks
74
+ assert len(train_set) + len(test_set) == len(
75
+ data_to_split
76
+ ), f"Expected {len(data_to_split)} total samples, but got {len(train_set)} in train and {len(test_set)} in test"
77
+ assert len(test_set) == len(
78
+ test_set_without_labels
79
+ ), "Test set and unlabeled test set must have the same length"
80
+ assert len(answers) == len(test_set), "Answers must have the same length as the test set"
81
+ assert len(sample_submission) == len(
82
+ test_set
83
+ ), "Sample submission must have the same length as the test set"
84
+ assert set(answers.columns) == set(
85
+ ["request_id", "requester_received_pizza"]
86
+ ), "Answers must have 'request_id' and 'requester_received_pizza' columns"
87
+ assert set(sample_submission.columns) == set(
88
+ ["request_id", "requester_received_pizza"]
89
+ ), "Sample submission must have 'request_id' and 'requester_received_pizza' columns"
90
+
91
+ return train_set
92
+
93
+
94
+ def prepare(raw: Path, public: Path, private: Path):
95
+ """
96
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
97
+ Then, it creates a second, parallel validation split from the first training set.
98
+ """
99
+
100
+ # Load data
101
+ with open(raw / "train.json") as f:
102
+ old_train = json.load(f)
103
+ with open(raw / "test.json") as f:
104
+ old_test = json.load(f)
105
+
106
+ test_ratio = len(old_test) / (len(old_train) + len(old_test))
107
+
108
+ all_fields = list([key for key in old_train[0].keys()])
109
+ assert all(set(all_fields) == set([key for key in sample.keys()]) for sample in old_train)
110
+ test_fields = list([key for key in old_test[0].keys()])
111
+ assert all(set(test_fields) == set([key for key in sample.keys()]) for sample in old_test)
112
+
113
+ # --- Original Split ---
114
+ # This split creates the primary `public` and `private` competition data.
115
+ # The returned `new_train` set will be used for the subsequent validation split.
116
+ new_train = _create_split(
117
+ data_to_split=old_train,
118
+ test_size=test_ratio,
119
+ test_fields=test_fields,
120
+ public_path=public,
121
+ private_path=private,
122
+ random_state=0,
123
+ )
124
+
125
+ # --- New Validation Split ---
126
+ # Define new directories for the validation set, parallel to the original ones.
127
+ public_val = public.parent / "public_val"
128
+ private_val = private.parent / "private_val"
129
+
130
+ # To keep the new test set (`test_val`) size consistent with the original test set,
131
+ # we adjust the test ratio for the second split.
132
+ # val_test_size = original_test_size / original_train_size
133
+ val_test_size = test_ratio / (1.0 - test_ratio)
134
+
135
+ # Create the validation split using the same logic, but on the `new_train` data
136
+ # and saving to the new `_val` directories.
137
+ _create_split(
138
+ data_to_split=new_train,
139
+ test_size=val_test_size,
140
+ test_fields=test_fields,
141
+ public_path=public_val,
142
+ private_path=private_val,
143
+ random_state=0,
144
+ )
@@ -0,0 +1,11 @@
1
+ CLASSES = [
2
+ "ETT - Abnormal",
3
+ "ETT - Borderline",
4
+ "ETT - Normal",
5
+ "NGT - Abnormal",
6
+ "NGT - Borderline",
7
+ "NGT - Incompletely Imaged",
8
+ "NGT - Normal",
9
+ "CVC - Abnormal",
10
+ "CVC - Borderline",
11
+ ]
@@ -0,0 +1,31 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import roc_auc_score
4
+
5
+ from mlebench.competitions.utils import prepare_for_auroc_metric
6
+ from mlebench.grade_helpers import InvalidSubmissionError
7
+
8
+ from .classes import CLASSES
9
+
10
+
11
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
12
+ for class_name in CLASSES:
13
+ assert class_name in answers.columns, f"Missing class {class_name} in answers."
14
+ if class_name not in submission.columns:
15
+ raise InvalidSubmissionError(f"Class {class_name} is not in the submission.")
16
+ assert len(submission) == len(
17
+ answers
18
+ ), f"Expected {len(answers)} rows in submission, got {len(submission)}."
19
+
20
+ roc_auc_scores = []
21
+ for class_name in CLASSES:
22
+ roc_auc_inputs = prepare_for_auroc_metric(
23
+ submission=submission,
24
+ answers=answers,
25
+ id_col="StudyInstanceUID",
26
+ target_col=class_name,
27
+ )
28
+ roc_auc_scores.append(roc_auc_score(roc_auc_inputs["y_true"], roc_auc_inputs["y_score"]))
29
+
30
+ score = np.average(roc_auc_scores)
31
+ return score
@@ -0,0 +1,53 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ from mlebench.utils import read_csv
7
+
8
+ from .classes import CLASSES
9
+
10
+
11
+ def prepare(raw: Path, public: Path, private: Path):
12
+ # Create train, test from train split
13
+ old_train = read_csv(raw / "train.csv")
14
+ new_train, new_test = train_test_split(old_train, test_size=0.1, random_state=0)
15
+
16
+ old_train_annotations = read_csv(raw / "train_annotations.csv")
17
+ old_train_uids = old_train_annotations["StudyInstanceUID"]
18
+ new_train_uids = new_train["StudyInstanceUID"]
19
+ is_in_new_train = old_train_uids.isin(new_train_uids)
20
+
21
+ new_train_annotations = old_train_annotations[is_in_new_train]
22
+
23
+ (public / "train").mkdir(exist_ok=True)
24
+ (public / "test").mkdir(exist_ok=True)
25
+
26
+ for file_id in new_train["StudyInstanceUID"]:
27
+ shutil.copyfile(
28
+ src=raw / "train" / f"{file_id}.jpg",
29
+ dst=public / "train" / f"{file_id}.jpg",
30
+ )
31
+
32
+ for file_id in new_test["StudyInstanceUID"]:
33
+ shutil.copyfile(
34
+ src=raw / "train" / f"{file_id}.jpg",
35
+ dst=public / "test" / f"{file_id}.jpg",
36
+ )
37
+
38
+ assert len(list(public.glob("train/*.jpg"))) == len(
39
+ new_train
40
+ ), f"Expected {len(new_train)} files in public train, got {len(list(public.glob('train/*.jpg')))}"
41
+ assert len(list(public.glob("test/*.jpg"))) == len(
42
+ new_test
43
+ ), f"Expected {len(new_test)} files in public test, got {len(list(public.glob('test/*.jpg')))}"
44
+
45
+ # Create a sample submission file
46
+ submission_df = new_test[["StudyInstanceUID"] + CLASSES]
47
+ submission_df[CLASSES] = 0
48
+
49
+ # Copy over files
50
+ new_train.to_csv(public / "train.csv", index=False)
51
+ new_train_annotations.to_csv(public / "train_annotations.csv", index=False)
52
+ new_test.to_csv(private / "test.csv", index=False)
53
+ submission_df.to_csv(public / "sample_submission.csv", index=False)
@@ -0,0 +1,113 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import read_csv
8
+
9
+ from .classes import CLASSES
10
+
11
+
12
+ def _create_split_files(
13
+ train_df: pd.DataFrame,
14
+ test_df: pd.DataFrame,
15
+ all_annotations_df: pd.DataFrame,
16
+ public_dir: Path,
17
+ private_dir: Path,
18
+ raw_image_dir: Path,
19
+ ):
20
+ """
21
+ Helper function to populate public and private directories for a given data split.
22
+
23
+ This function handles directory creation, image copying, and CSV file generation,
24
+ ensuring a consistent output structure.
25
+ """
26
+ # Ensure output directories exist
27
+ public_dir.mkdir(parents=True, exist_ok=True)
28
+ private_dir.mkdir(parents=True, exist_ok=True)
29
+ (public_dir / "train").mkdir(exist_ok=True)
30
+ (public_dir / "test").mkdir(exist_ok=True)
31
+
32
+ # Filter annotations to only include those for the current training set
33
+ train_uids = train_df["StudyInstanceUID"]
34
+ is_in_train = all_annotations_df["StudyInstanceUID"].isin(train_uids)
35
+ split_train_annotations = all_annotations_df[is_in_train]
36
+
37
+ # Copy image files for the current train and test sets
38
+ for file_id in train_df["StudyInstanceUID"]:
39
+ shutil.copyfile(
40
+ src=raw_image_dir / f"{file_id}.jpg",
41
+ dst=public_dir / "train" / f"{file_id}.jpg",
42
+ )
43
+
44
+ for file_id in test_df["StudyInstanceUID"]:
45
+ shutil.copyfile(
46
+ src=raw_image_dir / f"{file_id}.jpg",
47
+ dst=public_dir / "test" / f"{file_id}.jpg",
48
+ )
49
+
50
+ # Assert that the correct number of images were copied
51
+ assert len(list(public_dir.glob("train/*.jpg"))) == len(
52
+ train_df
53
+ ), f"Expected {len(train_df)} files in {public_dir}/train, got {len(list(public_dir.glob('train/*.jpg')))}"
54
+ assert len(list(public_dir.glob("test/*.jpg"))) == len(
55
+ test_df
56
+ ), f"Expected {len(test_df)} files in {public_dir}/test, got {len(list(public_dir.glob('test/*.jpg')))}"
57
+
58
+ # Create a sample submission file for the current test set
59
+ submission_df = test_df[["StudyInstanceUID"] + CLASSES].copy()
60
+ submission_df[CLASSES] = 0
61
+
62
+ # Save all required CSV files with the required standard filenames
63
+ train_df.to_csv(public_dir / "train.csv", index=False)
64
+ split_train_annotations.to_csv(public_dir / "train_annotations.csv", index=False)
65
+ submission_df.to_csv(public_dir / "sample_submission.csv", index=False)
66
+ test_df.to_csv(private_dir / "test.csv", index=False)
67
+
68
+
69
+ def prepare(raw: Path, public: Path, private: Path):
70
+ # Load raw data once
71
+ old_train = read_csv(raw / "train.csv")
72
+ old_train_annotations = read_csv(raw / "train_annotations.csv")
73
+ raw_image_dir = raw / "train"
74
+
75
+ # --- 1. Original Split: Create main train and test sets ---
76
+ # This split creates the primary train/test sets for the competition.
77
+ # The outputs in `public` and `private` must remain identical to the original script.
78
+ new_train, new_test = train_test_split(old_train, test_size=0.1, random_state=0)
79
+
80
+ # Use the helper to create the original public/private directory contents
81
+ _create_split_files(
82
+ train_df=new_train,
83
+ test_df=new_test,
84
+ all_annotations_df=old_train_annotations,
85
+ public_dir=public,
86
+ private_dir=private,
87
+ raw_image_dir=raw_image_dir,
88
+ )
89
+
90
+ # --- 2. New Validation Split: Create a validation set from the main train set ---
91
+ # Define paths for the new validation split directories.
92
+ public_val = public.parent / "public_val"
93
+ private_val = private.parent / "private_val"
94
+
95
+ # Split the main training set (`new_train`) again to create a smaller
96
+ # training set and a validation set.
97
+ # The validation set (`test_val`) size should be ~10% of the *original* data.
98
+ # new_train size = 0.9 * total. test_val_size = (0.1 * total) / (0.9 * total) = 1/9.
99
+ test_val_size = 1 / 9
100
+ train_val, test_val = train_test_split(
101
+ new_train, test_size=test_val_size, random_state=0
102
+ )
103
+
104
+ # Use the same helper to create the new validation directories (`public_val`, `private_val`)
105
+ # The helper ensures filenames and structure are identical to the original outputs.
106
+ _create_split_files(
107
+ train_df=train_val,
108
+ test_df=test_val,
109
+ all_annotations_df=old_train_annotations,
110
+ public_dir=public_val,
111
+ private_dir=private_val,
112
+ raw_image_dir=raw_image_dir,
113
+ )
@@ -0,0 +1,124 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from mlebench.grade_helpers import InvalidSubmissionError
5
+
6
+ column_order = ["patient_overall", "C1", "C2", "C3", "C4", "C5", "C6", "C7"]
7
+
8
+
9
+ def unmelt_df(melted_df: pd.DataFrame, id_col: str, target_col: str) -> pd.DataFrame:
10
+ unmelted_df = melted_df.copy()
11
+ unmelted_df[["StudyInstanceUID", "variable"]] = unmelted_df[id_col].str.split(
12
+ "_", n=1, expand=True
13
+ )
14
+ unmelted_df = unmelted_df.pivot(
15
+ index="StudyInstanceUID", columns="variable", values=target_col
16
+ ).reset_index()
17
+ return unmelted_df
18
+
19
+
20
+ def binary_log_loss(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
21
+ """
22
+ Computes binary log loss for each sample and variable (no reduction).
23
+
24
+ Args:
25
+ y_true: True binary labels, shape (n_samples, n_variables)
26
+ y_pred: Predicted probabilities, shape (n_samples, n_variables)
27
+
28
+ Returns:
29
+ Binary log loss for each sample and variable, shape (n_samples, n_variables)
30
+ """
31
+ epsilon = 1e-15
32
+ # avoid y_preds of 0 or 1, since we'll be taking the log and that is undefined/inf
33
+ y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
34
+ loss = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
35
+ return loss
36
+
37
+
38
+ def custom_comp_weighted_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float:
39
+ """
40
+ Adapted from
41
+ https://www.kaggle.com/competitions/rsna-2022-cervical-spine-fracture-detection/discussion/341854
42
+
43
+ Adaptations:
44
+ - custom binary_log_loss function in numpy to avoid torch dependency
45
+ - working in numpy arrays instead of torch tensors
46
+ - explanatory comments
47
+ - added comp weights inside fn
48
+ """
49
+ # https://www.kaggle.com/competitions/rsna-2022-cervical-spine-fracture-detection/discussion/340392
50
+ # assuming ordering [patient_overall, C1, C2, C3, C4, C5, C6, C7]
51
+ competition_weights = {
52
+ # weights for negative labels
53
+ "-": np.array([7, 1, 1, 1, 1, 1, 1, 1], dtype=np.float32),
54
+ # weights for positive labels
55
+ "+": np.array([14, 2, 2, 2, 2, 2, 2, 2], dtype=np.float32),
56
+ }
57
+
58
+ # (n_samples, n_variables)
59
+ loss = binary_log_loss(y_true, y_pred)
60
+ # (n_samples, n_variables) weights matrix, conditionally built based on y_true values
61
+ weights = y_true * competition_weights["+"] + (1 - y_true) * competition_weights["-"]
62
+ # (n_samples, n_variables) element-wise multiplication of loss and weights
63
+ loss = loss * weights
64
+ # (n_samples,) sum of weighted losses across variables
65
+ loss = np.sum(loss, axis=1)
66
+ # (n_samples,) normalize the loss by the sum of weights
67
+ loss = loss / weights.sum(axis=1)
68
+ # finally, take the mean across all samples
69
+ return np.mean(loss)
70
+
71
+
72
+ def prepare_for_metric(
73
+ submission: pd.DataFrame, answers: pd.DataFrame
74
+ ) -> tuple[np.ndarray, np.ndarray]:
75
+ id_col = "row_id"
76
+ target_col = "fractured"
77
+
78
+ if id_col not in submission.columns:
79
+ raise InvalidSubmissionError(f"Submission is missing the `{id_col}` column.")
80
+ if target_col not in submission.columns:
81
+ raise InvalidSubmissionError(f"Submission is missing the `{target_col}` column.")
82
+ if not submission[target_col].between(0, 1).all():
83
+ raise InvalidSubmissionError(f"Submission `{target_col}` values must be between 0 and 1.")
84
+ if submission[target_col].isnull().any():
85
+ raise InvalidSubmissionError(f"Submission `{target_col}` values must not be missing.")
86
+
87
+ assert id_col in answers.columns, f"Answers are missing the `{id_col}` column."
88
+ assert target_col in answers.columns, f"Answers are missing the `{target_col}` column."
89
+
90
+ # all answers should be either 0 or 1
91
+ assert answers[target_col].isin([0, 1]).all(), f"Answers `{target_col}` values must be 0 or 1."
92
+ # all answers should be present
93
+ assert (
94
+ not answers[target_col].isnull().any()
95
+ ), f"Answers `{target_col}` values must not be missing."
96
+
97
+ # sort both submission and answers by id_col
98
+ submission = submission.sort_values(by=id_col)
99
+ answers = answers.sort_values(by=id_col)
100
+
101
+ # check that the ids match
102
+ if (submission[id_col].values != answers[id_col].values).any():
103
+ raise InvalidSubmissionError("Submission should contain same ids as answers.")
104
+
105
+ # checks complete; now we can prepare
106
+
107
+ # need to unmelt both submission and answers, back to getting one row per StudyInstanceUID
108
+ unmelted_submission = unmelt_df(melted_df=submission, id_col=id_col, target_col=target_col)
109
+ unmelted_answers = unmelt_df(melted_df=answers, id_col=id_col, target_col=target_col)
110
+
111
+ # sort both by StudyInstanceUID
112
+ unmelted_submission = unmelted_submission.sort_values(by="StudyInstanceUID")
113
+ unmelted_answers = unmelted_answers.sort_values(by="StudyInstanceUID")
114
+
115
+ # extract the target columns
116
+ y_true = unmelted_answers[column_order].to_numpy()
117
+ y_pred = unmelted_submission[column_order].to_numpy()
118
+
119
+ return y_true, y_pred
120
+
121
+
122
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
123
+ y_true, y_pred = prepare_for_metric(submission, answers)
124
+ return custom_comp_weighted_loss(y_true, y_pred)