dslighting 1.7.1__py3-none-any.whl → 1.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/METADATA +3 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,124 @@
1
+ _dogs_str = """
2
+ affenpinscher
3
+ afghan_hound
4
+ african_hunting_dog
5
+ airedale
6
+ american_staffordshire_terrier
7
+ appenzeller
8
+ australian_terrier
9
+ basenji
10
+ basset
11
+ beagle
12
+ bedlington_terrier
13
+ bernese_mountain_dog
14
+ black-and-tan_coonhound
15
+ blenheim_spaniel
16
+ bloodhound
17
+ bluetick
18
+ border_collie
19
+ border_terrier
20
+ borzoi
21
+ boston_bull
22
+ bouvier_des_flandres
23
+ boxer
24
+ brabancon_griffon
25
+ briard
26
+ brittany_spaniel
27
+ bull_mastiff
28
+ cairn
29
+ cardigan
30
+ chesapeake_bay_retriever
31
+ chihuahua
32
+ chow
33
+ clumber
34
+ cocker_spaniel
35
+ collie
36
+ curly-coated_retriever
37
+ dandie_dinmont
38
+ dhole
39
+ dingo
40
+ doberman
41
+ english_foxhound
42
+ english_setter
43
+ english_springer
44
+ entlebucher
45
+ eskimo_dog
46
+ flat-coated_retriever
47
+ french_bulldog
48
+ german_shepherd
49
+ german_short-haired_pointer
50
+ giant_schnauzer
51
+ golden_retriever
52
+ gordon_setter
53
+ great_dane
54
+ great_pyrenees
55
+ greater_swiss_mountain_dog
56
+ groenendael
57
+ ibizan_hound
58
+ irish_setter
59
+ irish_terrier
60
+ irish_water_spaniel
61
+ irish_wolfhound
62
+ italian_greyhound
63
+ japanese_spaniel
64
+ keeshond
65
+ kelpie
66
+ kerry_blue_terrier
67
+ komondor
68
+ kuvasz
69
+ labrador_retriever
70
+ lakeland_terrier
71
+ leonberg
72
+ lhasa
73
+ malamute
74
+ malinois
75
+ maltese_dog
76
+ mexican_hairless
77
+ miniature_pinscher
78
+ miniature_poodle
79
+ miniature_schnauzer
80
+ newfoundland
81
+ norfolk_terrier
82
+ norwegian_elkhound
83
+ norwich_terrier
84
+ old_english_sheepdog
85
+ otterhound
86
+ papillon
87
+ pekinese
88
+ pembroke
89
+ pomeranian
90
+ pug
91
+ redbone
92
+ rhodesian_ridgeback
93
+ rottweiler
94
+ saint_bernard
95
+ saluki
96
+ samoyed
97
+ schipperke
98
+ scotch_terrier
99
+ scottish_deerhound
100
+ sealyham_terrier
101
+ shetland_sheepdog
102
+ shih-tzu
103
+ siberian_husky
104
+ silky_terrier
105
+ soft-coated_wheaten_terrier
106
+ staffordshire_bullterrier
107
+ standard_poodle
108
+ standard_schnauzer
109
+ sussex_spaniel
110
+ tibetan_mastiff
111
+ tibetan_terrier
112
+ toy_poodle
113
+ toy_terrier
114
+ vizsla
115
+ walker_hound
116
+ weimaraner
117
+ welsh_springer_spaniel
118
+ west_highland_white_terrier
119
+ whippet
120
+ wire-haired_fox_terrier
121
+ yorkshire_terrier
122
+ """
123
+ DOGS_LIST = _dogs_str.strip().split("\n")
124
+ assert len(DOGS_LIST) == 120
@@ -0,0 +1,42 @@
1
+ import numpy as np
2
+ from pandas import DataFrame
3
+ from sklearn.metrics import log_loss
4
+
5
+ from mlebench.competitions.utils import one_hot_dfs_to_log_loss_inputs
6
+ from mlebench.grade_helpers import InvalidSubmissionError
7
+
8
+ from .dogs import DOGS_LIST
9
+
10
+
11
+ def prepare_for_metric(submission: DataFrame, answers: DataFrame) -> dict:
12
+ if not all(dog in submission.columns for dog in DOGS_LIST):
13
+ raise InvalidSubmissionError(f"Submission must have columns for all dogs: {DOGS_LIST}")
14
+ if "id" not in submission.columns:
15
+ raise InvalidSubmissionError("Submission must have an `id` column")
16
+ if len(submission) != len(answers):
17
+ raise InvalidSubmissionError("Submission should be the same length as the answers")
18
+
19
+ assert "id" in answers.columns, "Answers must have an `id` column"
20
+ assert all(
21
+ dog in answers.columns for dog in DOGS_LIST
22
+ ), f"Answers must have columns for all dogs: {DOGS_LIST}"
23
+
24
+ tolerance = 1e-6
25
+ if not np.all(np.isclose(submission[DOGS_LIST].sum(axis=1), 1, atol=tolerance)):
26
+ raise InvalidSubmissionError(
27
+ "Dog probabilities in each row in submission should sum to one, as probabilities."
28
+ )
29
+ if not ((submission[DOGS_LIST] >= 0) & (submission[DOGS_LIST] <= 1)).all().all():
30
+ raise InvalidSubmissionError(
31
+ "All probabilities in submission DataFrame must be between 0 and 1."
32
+ )
33
+
34
+ log_loss_inputs = one_hot_dfs_to_log_loss_inputs(
35
+ submission, answers, id_column="id", apply_softmax=False
36
+ )
37
+ return log_loss_inputs
38
+
39
+
40
+ def grade(submission: DataFrame, answers: DataFrame) -> float:
41
+ log_loss_inputs = prepare_for_metric(submission, answers)
42
+ return log_loss(**log_loss_inputs)
@@ -0,0 +1,55 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ from mlebench.competitions.utils import df_to_one_hot
7
+ from mlebench.utils import read_csv
8
+
9
+ from .dogs import DOGS_LIST
10
+
11
+
12
+ def to_one_hot(df):
13
+ return df_to_one_hot(df, id_column="id", target_column="breed", classes=DOGS_LIST)
14
+
15
+
16
+ def prepare(raw: Path, public: Path, private: Path):
17
+
18
+ # Create train, test from train split
19
+ old_train = read_csv(raw / "labels.csv")
20
+ new_train, new_test = train_test_split(old_train, test_size=0.1, random_state=0)
21
+ # one hot the private test because we will one-hot the submission, as per kaggle.com
22
+ new_test = to_one_hot(new_test)
23
+
24
+ # Copy over files
25
+ new_train.to_csv(public / "labels.csv", index=False)
26
+ new_test.to_csv(private / "test.csv", index=False)
27
+
28
+ (public / "train").mkdir(exist_ok=True)
29
+
30
+ for file_id in new_train["id"]:
31
+ shutil.copyfile(
32
+ src=raw / "train" / f"{file_id}.jpg",
33
+ dst=public / "train" / f"{file_id}.jpg",
34
+ )
35
+
36
+ (public / "test").mkdir(exist_ok=True)
37
+
38
+ for file_id in new_test["id"]:
39
+ shutil.copyfile(
40
+ src=raw / "train" / f"{file_id}.jpg",
41
+ dst=public / "test" / f"{file_id}.jpg",
42
+ )
43
+
44
+ # Check integrity of the files copied
45
+ assert len(list(public.glob("train/*.jpg"))) == len(new_train)
46
+ assert len(list(public.glob("test/*.jpg"))) == len(new_test)
47
+
48
+ # Create a sample submission file
49
+ submission_df = new_test.copy()
50
+ for col in submission_df.columns[1:]:
51
+ submission_df[col] = submission_df[col].astype("float64")
52
+ submission_df.iloc[:, 1:] = 1 / 120
53
+ submission_df.to_csv(public / "sample_submission.csv", index=False)
54
+
55
+ assert submission_df.shape == (len(new_test), 121) # 1 id column + 120 breeds
@@ -0,0 +1,104 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.competitions.utils import df_to_one_hot
8
+ from mlebench.utils import read_csv
9
+
10
+ from .dogs import DOGS_LIST
11
+
12
+
13
+ def to_one_hot(df: pd.DataFrame) -> pd.DataFrame:
14
+ return df_to_one_hot(df, id_column="id", target_column="breed", classes=DOGS_LIST)
15
+
16
+
17
+ def _save_split(
18
+ raw_data_path: Path,
19
+ public_dir: Path,
20
+ private_dir: Path,
21
+ train_df: pd.DataFrame,
22
+ test_df: pd.DataFrame,
23
+ ):
24
+ """Saves a given train/test split to the specified public and private directories."""
25
+ # Ensure target directories exist
26
+ public_dir.mkdir(exist_ok=True)
27
+ private_dir.mkdir(exist_ok=True)
28
+
29
+ # one hot the private test because we will one-hot the submission, as per kaggle.com
30
+ test_labels_private = to_one_hot(test_df.copy())
31
+
32
+ # Copy over files
33
+ train_df.to_csv(public_dir / "labels.csv", index=False)
34
+ test_labels_private.to_csv(private_dir / "test.csv", index=False)
35
+
36
+ (public_dir / "train").mkdir(exist_ok=True)
37
+ for file_id in train_df["id"]:
38
+ shutil.copyfile(
39
+ src=raw_data_path / "train" / f"{file_id}.jpg",
40
+ dst=public_dir / "train" / f"{file_id}.jpg",
41
+ )
42
+
43
+ (public_dir / "test").mkdir(exist_ok=True)
44
+ for file_id in test_df["id"]:
45
+ shutil.copyfile(
46
+ src=raw_data_path / "train" / f"{file_id}.jpg",
47
+ dst=public_dir / "test" / f"{file_id}.jpg",
48
+ )
49
+
50
+ # Check integrity of the files copied
51
+ assert len(list(public_dir.glob("train/*.jpg"))) == len(train_df)
52
+ assert len(list(public_dir.glob("test/*.jpg"))) == len(test_df)
53
+
54
+ # Create a sample submission file
55
+ submission_df = test_labels_private.copy()
56
+ for col in submission_df.columns[1:]:
57
+ submission_df[col] = submission_df[col].astype("float64")
58
+ submission_df.iloc[:, 1:] = 1 / 120
59
+ submission_df.to_csv(public_dir / "sample_submission.csv", index=False)
60
+
61
+ assert submission_df.shape == (len(test_df), 121) # 1 id column + 120 breeds
62
+
63
+
64
+ def prepare(raw: Path, public: Path, private: Path):
65
+ # Read the full raw dataset labels
66
+ all_labels = read_csv(raw / "labels.csv")
67
+
68
+ # --- 1. Original Split: Create main train and test sets ---
69
+ # This split and its outputs must remain identical to the original script.
70
+ train_df, test_df = train_test_split(all_labels, test_size=0.1, random_state=0)
71
+
72
+ # Save the original split to the 'public' and 'private' directories
73
+ _save_split(
74
+ raw_data_path=raw,
75
+ public_dir=public,
76
+ private_dir=private,
77
+ train_df=train_df,
78
+ test_df=test_df,
79
+ )
80
+
81
+ # --- 2. New Split: Create a validation set from the main train set ---
82
+ # Define paths for the new validation directories, parallel to the original ones.
83
+ public_val = public.parent / "public_val"
84
+ private_val = private.parent / "private_val"
85
+
86
+ # To ensure the new validation set ('test_val_df') is the same size as the
87
+ # original test set, we calculate the required test_size for the second split.
88
+ test_size_for_val_split = len(test_df) / len(train_df)
89
+
90
+ # Split the main training data (train_df) into a smaller training set
91
+ # and a validation set, using the same random_state for reproducibility.
92
+ train_val_df, test_val_df = train_test_split(
93
+ train_df, test_size=test_size_for_val_split, random_state=0
94
+ )
95
+
96
+ # Save the new validation split to the 'public_val' and 'private_val' directories
97
+ # using the same helper function to ensure identical structure and filenames.
98
+ _save_split(
99
+ raw_data_path=raw,
100
+ public_dir=public_val,
101
+ private_dir=private_val,
102
+ train_df=train_val_df,
103
+ test_df=test_val_df,
104
+ )
@@ -0,0 +1,43 @@
1
+ from typing import Tuple
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.metrics import log_loss
6
+
7
+ from mlebench.grade_helpers import InvalidSubmissionError
8
+
9
+
10
+ def prepare_for_metric(
11
+ submission: pd.DataFrame, answers: pd.DataFrame
12
+ ) -> Tuple[np.ndarray, np.ndarray]:
13
+ # answers checks
14
+ assert "id" in answers.columns, f"Answers is missing `id` column"
15
+ assert "label" in answers.columns, f"Answers is missing `label` column"
16
+
17
+ # submission checks
18
+ if "id" not in submission.columns:
19
+ raise InvalidSubmissionError(f"Submission is missing `id` column")
20
+ if "label" not in submission.columns:
21
+ raise InvalidSubmissionError(f"Submission is missing `label` column")
22
+ if set(submission["id"]) != set(answers["id"]):
23
+ raise InvalidSubmissionError("Submission and answers have different id's")
24
+ if len(submission) != len(answers):
25
+ raise InvalidSubmissionError("Submission and answers have different lengths")
26
+ if not ((submission["label"] >= 0) & (submission["label"] <= 1)).all():
27
+ raise InvalidSubmissionError(
28
+ "All values in submission `label` column must be between 0 and 1."
29
+ )
30
+
31
+ # sort by id to ensure correct order
32
+ submission = submission.sort_values("id")
33
+ answers = answers.sort_values("id")
34
+
35
+ y_true = answers["label"]
36
+ y_pred = submission["label"]
37
+ return y_true, y_pred
38
+
39
+
40
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
41
+ y_true, y_pred = prepare_for_metric(submission, answers)
42
+ score = log_loss(y_true=y_true, y_pred=y_pred)
43
+ return score
@@ -0,0 +1,70 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+ from tqdm import tqdm
7
+
8
+ from mlebench.utils import compress, extract, read_csv
9
+
10
+
11
+ def prepare(raw: Path, public: Path, private: Path):
12
+ extract(raw / "train.zip", raw)
13
+ extract(raw / "test.zip", raw)
14
+
15
+ all_train_images = sorted(list((raw / "train").glob("*.jpg")))
16
+ # Original test ratio has Train set - 25,000 samples; Test set - 12,500 samples (33% ratio)
17
+ # We use 0.1 ratio to avoid removing too many samples from train
18
+ train_images, test_images = train_test_split(all_train_images, test_size=0.1, random_state=0)
19
+
20
+ # Copy over train images. Rename cat files to cat.0.jpg, cat.1.jpg, etc.
21
+ # Rename dog files to dog.0.jpg, dog.1.jpg, etc.
22
+ cat_ctr = 0
23
+ dog_ctr = 0
24
+ (public / "train").mkdir(exist_ok=True)
25
+ for img in tqdm(train_images):
26
+ if "cat" in img.name:
27
+ shutil.copy(img, public / "train" / f"cat.{cat_ctr}.jpg")
28
+ cat_ctr += 1
29
+ else:
30
+ shutil.copy(img, public / "train" / f"dog.{dog_ctr}.jpg")
31
+ dog_ctr += 1
32
+ assert cat_ctr + dog_ctr == len(
33
+ train_images
34
+ ), f"Expected {len(train_images)} train images but got {cat_ctr + dog_ctr} images."
35
+
36
+ # Copy over test images. Rename files to 1.jpg, 2.jpg, etc.
37
+ (public / "test").mkdir(exist_ok=True)
38
+ for i, img in enumerate(tqdm(test_images), start=1):
39
+ shutil.copy(img, public / "test" / f"{i}.jpg")
40
+ assert i == len(test_images), f"Expected {len(test_images)} test images but got {i} images."
41
+
42
+ # Compress train and test images, then remove the raw images
43
+ compress(public / "train", public / "train.zip", exist_ok=True)
44
+ compress(public / "test", public / "test.zip", exist_ok=True)
45
+ shutil.rmtree(raw / "train")
46
+ shutil.rmtree(raw / "test")
47
+
48
+ # Make answers
49
+ answers = pd.DataFrame(
50
+ {
51
+ "id": [i for i in range(1, len(test_images) + 1)],
52
+ "label": [int("dog" in img.name) for img in test_images],
53
+ }
54
+ )
55
+ answers.to_csv(private / "answers.csv", index=False)
56
+ assert len(answers) == len(
57
+ test_images
58
+ ), f"Expected {len(test_images)} answers but got {len(answers)} answers."
59
+
60
+ # Make sample submission
61
+ sample_submission = pd.DataFrame(
62
+ {
63
+ "id": [i for i in range(1, len(test_images) + 1)],
64
+ "label": [0.5 for _ in range(1, len(test_images) + 1)],
65
+ }
66
+ )
67
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
68
+ assert len(sample_submission) == len(
69
+ test_images
70
+ ), f"Expected {len(test_images)} sample submission rows but got {len(sample_submission)} rows."
@@ -0,0 +1,143 @@
1
+ import shutil
2
+ from pathlib import Path
3
+ from typing import List
4
+
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from tqdm import tqdm
8
+
9
+ from mlebench.utils import compress, extract
10
+
11
+
12
+ def _process_split(
13
+ image_paths: List[Path],
14
+ public_dir: Path,
15
+ private_dir: Path,
16
+ test_size: float,
17
+ random_state: int,
18
+ ) -> List[Path]:
19
+ """
20
+ Splits a list of image paths into train/test sets and generates all required files.
21
+
22
+ This function encapsulates the logic for:
23
+ 1. Splitting data.
24
+ 2. Copying and renaming images to public train/test subdirectories.
25
+ 3. Compressing the public train/test image directories.
26
+ 4. Creating the private answer key for the test set.
27
+ 5. Creating a public sample submission file.
28
+
29
+ Args:
30
+ image_paths: A list of Path objects for the images to be split.
31
+ public_dir: The public output directory (e.g., 'data/public').
32
+ private_dir: The private output directory (e.g., 'data/private').
33
+ test_size: The proportion of the dataset to allocate to the test set.
34
+ random_state: The seed for the random number generator.
35
+
36
+ Returns:
37
+ A list of Path objects corresponding to the training set of this split.
38
+ """
39
+ public_dir.mkdir(exist_ok=True, parents=True)
40
+ private_dir.mkdir(exist_ok=True, parents=True)
41
+
42
+ # Perform the split
43
+ train_images, test_images = train_test_split(
44
+ image_paths, test_size=test_size, random_state=random_state
45
+ )
46
+
47
+ # Copy over train images. Rename cat files to cat.0.jpg, cat.1.jpg, etc.
48
+ # Rename dog files to dog.0.jpg, dog.1.jpg, etc.
49
+ cat_ctr = 0
50
+ dog_ctr = 0
51
+ (public_dir / "train").mkdir(exist_ok=True)
52
+ for img in tqdm(train_images, desc=f"Processing train set for {public_dir.name}"):
53
+ if "cat" in img.name:
54
+ shutil.copy(img, public_dir / "train" / f"cat.{cat_ctr}.jpg")
55
+ cat_ctr += 1
56
+ else:
57
+ shutil.copy(img, public_dir / "train" / f"dog.{dog_ctr}.jpg")
58
+ dog_ctr += 1
59
+ assert cat_ctr + dog_ctr == len(
60
+ train_images
61
+ ), f"Expected {len(train_images)} train images but got {cat_ctr + dog_ctr} images."
62
+
63
+ # Copy over test images. Rename files to 1.jpg, 2.jpg, etc.
64
+ (public_dir / "test").mkdir(exist_ok=True)
65
+ for i, img in enumerate(tqdm(test_images, desc=f"Processing test set for {public_dir.name}"), start=1):
66
+ shutil.copy(img, public_dir / "test" / f"{i}.jpg")
67
+ assert i == len(test_images), f"Expected {len(test_images)} test images but got {i} images."
68
+
69
+ # Compress train and test images
70
+ compress(public_dir / "train", public_dir / "train.zip", exist_ok=True)
71
+ compress(public_dir / "test", public_dir / "test.zip", exist_ok=True)
72
+
73
+ # Make answers
74
+ answers = pd.DataFrame(
75
+ {
76
+ "id": [i for i in range(1, len(test_images) + 1)],
77
+ "label": [int("dog" in img.name) for img in test_images],
78
+ }
79
+ )
80
+ answers.to_csv(private_dir / "answers.csv", index=False)
81
+ assert len(answers) == len(
82
+ test_images
83
+ ), f"Expected {len(test_images)} answers but got {len(answers)} answers."
84
+
85
+ # Make sample submission
86
+ sample_submission = pd.DataFrame(
87
+ {
88
+ "id": [i for i in range(1, len(test_images) + 1)],
89
+ "label": [0.5 for _ in range(1, len(test_images) + 1)],
90
+ }
91
+ )
92
+ sample_submission.to_csv(public_dir / "sample_submission.csv", index=False)
93
+ assert len(sample_submission) == len(
94
+ test_images
95
+ ), f"Expected {len(test_images)} sample submission rows but got {len(sample_submission)} rows."
96
+
97
+ return train_images
98
+
99
+
100
+ def prepare(raw: Path, public: Path, private: Path):
101
+ # This part remains from the original script
102
+ extract(raw / "train.zip", raw)
103
+ extract(raw / "test.zip", raw)
104
+
105
+ all_train_images = sorted(list((raw / "train").glob("*.jpg")))
106
+
107
+ # --- 1. Original Split (train -> train + test) ---
108
+ # This call generates the original competition files in `public` and `private`.
109
+ # The logic and outputs of this step are unchanged.
110
+ # Original test ratio has Train set - 25,000 samples; Test set - 12,500 samples (33% ratio)
111
+ # We use 0.1 ratio to avoid removing too many samples from train
112
+ original_test_size = 0.1
113
+ main_train_set = _process_split(
114
+ image_paths=all_train_images,
115
+ public_dir=public,
116
+ private_dir=private,
117
+ test_size=original_test_size,
118
+ random_state=0,
119
+ )
120
+
121
+ # --- 2. New Validation Split (main_train_set -> train_val + test_val) ---
122
+ # This call generates a new, parallel set of files for validation purposes.
123
+ # It operates *only* on the training data from the first split.
124
+ public_val = public.parent / "public_val"
125
+ private_val = private.parent / "private_val"
126
+
127
+ # To make the new test_val set have the same size as the original test set,
128
+ # we adjust the test_size for the second split.
129
+ # new_size = original_test_size / (1 - original_test_size)
130
+ # e.g., 0.1 / (1 - 0.1) = 0.1 / 0.9 = 1/9
131
+ validation_test_size = original_test_size / (1 - original_test_size)
132
+
133
+ _process_split(
134
+ image_paths=main_train_set,
135
+ public_dir=public_val,
136
+ private_dir=private_val,
137
+ test_size=validation_test_size,
138
+ random_state=0, # Use the same random state for consistency
139
+ )
140
+
141
+ # Final cleanup is done after all splits are complete
142
+ shutil.rmtree(raw / "train")
143
+ shutil.rmtree(raw / "test")
@@ -0,0 +1,23 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.metrics import accuracy_score
4
+
5
+
6
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
7
+ """
8
+ Grade the submission using accuracy metric.
9
+
10
+ Args:
11
+ submission: DataFrame with columns ['id', 'label']
12
+ answers: DataFrame with columns ['id', 'label']
13
+
14
+ Returns:
15
+ Accuracy score (float between 0 and 1)
16
+ """
17
+ # Merge on id to ensure alignment
18
+ merged = pd.merge(answers, submission, on='id', suffixes=('_true', '_pred'))
19
+
20
+ # Calculate accuracy
21
+ accuracy = accuracy_score(merged['label_true'], merged['label_pred'])
22
+
23
+ return accuracy