dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,222 @@
1
+ import json
2
+ import random
3
+ import shutil
4
+ from pathlib import Path
5
+ from typing import Dict, List
6
+
7
+ import pandas as pd
8
+ from sklearn.model_selection import train_test_split
9
+ from tqdm import tqdm
10
+
11
+ from mlebench.utils import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ def _split_data_by_category(
17
+ data_by_category: Dict[int, List[dict]],
18
+ ) -> (Dict[int, List[dict]], Dict[int, List[dict]]):
19
+ """
20
+ Splits data within each category into train and test sets.
21
+
22
+ This function replicates the original script's splitting logic to ensure
23
+ consistency.
24
+ """
25
+ train_split_by_category = {}
26
+ test_split_by_category = {}
27
+
28
+ for category_id, annotations_images in data_by_category.items():
29
+ test_size = 0.2
30
+ n_samples = len(annotations_images)
31
+ if n_samples == 1:
32
+ train_annotations_images = annotations_images
33
+ test_annotations_images = []
34
+ elif n_samples < 5:
35
+ test_size = max(1, int(n_samples * test_size))
36
+ train_annotations_images = annotations_images[:-test_size]
37
+ test_annotations_images = annotations_images[-test_size:]
38
+ else:
39
+ train_annotations_images, test_annotations_images = train_test_split(
40
+ annotations_images, test_size=test_size, random_state=0
41
+ )
42
+ train_split_by_category[category_id] = train_annotations_images
43
+ test_split_by_category[category_id] = test_annotations_images
44
+ return train_split_by_category, test_split_by_category
45
+
46
+
47
+ def _process_and_save_split(
48
+ train_data_by_cat: Dict[int, List[dict]],
49
+ test_data_by_cat: Dict[int, List[dict]],
50
+ base_metadata: dict,
51
+ public_dir: Path,
52
+ private_dir: Path,
53
+ raw_data_path: Path,
54
+ dev_mode: bool,
55
+ dev_count: int,
56
+ ):
57
+ """
58
+ Processes and saves a single train/test split to the specified directories.
59
+
60
+ This function handles:
61
+ - Creating training set metadata and copying images.
62
+ - Creating test set metadata and copying/renaming images.
63
+ - Creating private ground-truth answers.
64
+ - Creating a public sample submission file.
65
+ """
66
+ # Create required directories
67
+ public_dir.mkdir(exist_ok=True, parents=True)
68
+ private_dir.mkdir(exist_ok=True, parents=True)
69
+ (public_dir / "train/images").mkdir(exist_ok=True, parents=True)
70
+ (public_dir / "test/images").mkdir(exist_ok=True, parents=True)
71
+
72
+ # Process train set
73
+ new_train_metadata = base_metadata.copy()
74
+ new_train_metadata.update({"annotations": [], "images": []})
75
+ train_sample_count = sum(len(v) for v in train_data_by_cat.values())
76
+
77
+ with tqdm(
78
+ desc=f"Creating train set for {public_dir.name}",
79
+ total=train_sample_count,
80
+ ) as pbar:
81
+ for category_id, annotations_images in train_data_by_cat.items():
82
+ category_subdir = f"{category_id // 100:03d}/{category_id % 100:02d}"
83
+ (public_dir / "train/images" / category_subdir).mkdir(exist_ok=True, parents=True)
84
+ for idx, annotation_image in enumerate(annotations_images):
85
+ new_train_metadata["annotations"].append(annotation_image["annotation"].copy())
86
+ new_train_metadata["images"].append(annotation_image["image"].copy())
87
+
88
+ if not dev_mode or idx < dev_count:
89
+ src_path = raw_data_path / "train" / annotation_image["image"]["file_name"]
90
+ dst_path = public_dir / "train" / annotation_image["image"]["file_name"]
91
+ shutil.copyfile(src=src_path, dst=dst_path)
92
+ pbar.update(1)
93
+
94
+ with open(public_dir / "train/metadata.json", "w") as f:
95
+ json.dump(new_train_metadata, f, indent=4, sort_keys=True)
96
+
97
+ # Process test set
98
+ new_test_metadata = base_metadata.copy()
99
+ for key_to_del in ["categories", "institutions"]:
100
+ if key_to_del in new_test_metadata:
101
+ del new_test_metadata[key_to_del]
102
+ new_test_metadata.update({"annotations": [], "images": []})
103
+
104
+ test_annotations_images = [
105
+ item for sublist in test_data_by_cat.values() for item in sublist
106
+ ]
107
+ random.Random(0).shuffle(test_annotations_images)
108
+
109
+ for idx, annotation_image in tqdm(
110
+ enumerate(test_annotations_images),
111
+ desc=f"Creating test set for {public_dir.name}",
112
+ total=len(test_annotations_images),
113
+ ):
114
+ new_image_id = str(idx)
115
+ new_file_name = f"images/{idx // 1000:03d}/{idx}.jpg"
116
+
117
+ new_annotation = annotation_image["annotation"].copy()
118
+ new_annotation["image_id"] = new_image_id
119
+ new_test_metadata["annotations"].append(new_annotation)
120
+
121
+ new_image = annotation_image["image"].copy()
122
+ new_image["id"] = new_image_id
123
+ new_image["file_name"] = new_file_name
124
+ new_test_metadata["images"].append(new_image)
125
+
126
+ if not dev_mode or idx < dev_count:
127
+ src_path = raw_data_path / "train" / annotation_image["image"]["file_name"]
128
+ dst_path = public_dir / "test" / new_file_name
129
+ dst_path.parent.mkdir(exist_ok=True, parents=True)
130
+ shutil.copyfile(src=src_path, dst=dst_path)
131
+
132
+ # Save public test metadata (without answers)
133
+ public_new_test = new_test_metadata.copy()
134
+ del public_new_test["annotations"]
135
+ with open(public_dir / "test/metadata.json", "w") as f:
136
+ json.dump(public_new_test, f, indent=4, sort_keys=True)
137
+
138
+ # Save private test answers
139
+ answers_rows = [
140
+ {"Id": img["id"], "Predicted": ann["category_id"]}
141
+ for img, ann in zip(new_test_metadata["images"], new_test_metadata["annotations"])
142
+ ]
143
+ pd.DataFrame(answers_rows).to_csv(private_dir / "answers.csv", index=False)
144
+
145
+ # Save public sample submission
146
+ sample_rows = [{"Id": img["id"], "Predicted": 0} for img in new_test_metadata["images"]]
147
+ pd.DataFrame(sample_rows).to_csv(public_dir / "sample_submission.csv", index=False)
148
+
149
+
150
+ def prepare(raw: Path, public: Path, private: Path):
151
+ """
152
+ Splits the raw data into public and private datasets with appropriate test/train splits.
153
+
154
+ This script now performs two splits:
155
+ 1. raw -> train + test (saved to `public`/`private`)
156
+ 2. train -> train_val + test_val (saved to `public_val`/`private_val`)
157
+
158
+ The second split uses the exact same logic as the first, creating a smaller
159
+ dataset for validation that mirrors the structure of the main one.
160
+ """
161
+ dev_mode = False
162
+ dev_count = 2
163
+
164
+ # --- Start: New code for managing validation paths ---
165
+ # Define and create the new parallel directories for the validation set
166
+ public_val = public.parent / "public_val"
167
+ private_val = private.parent / "private_val"
168
+ public_val.mkdir(exist_ok=True)
169
+ private_val.mkdir(exist_ok=True)
170
+ # --- End: New code for managing validation paths ---
171
+
172
+ json_path = raw / "train/metadata.json"
173
+ with open(json_path, "r", encoding="utf-8") as f:
174
+ old_train_metadata = json.load(f)
175
+
176
+ annotations_images_by_category = {}
177
+ for annotation, image in list(
178
+ zip(old_train_metadata["annotations"], old_train_metadata["images"])
179
+ ):
180
+ category_id = annotation["category_id"]
181
+ if category_id not in annotations_images_by_category:
182
+ annotations_images_by_category[category_id] = []
183
+ annotations_images_by_category[category_id].append(
184
+ {"annotation": annotation, "image": image}
185
+ )
186
+
187
+ # --- SPLIT 1: Create the original train and test sets ---
188
+ logger.info("Starting initial split: raw -> train + test")
189
+ train_annotations_images_by_category, test_annotations_images_by_category = (
190
+ _split_data_by_category(annotations_images_by_category)
191
+ )
192
+
193
+ _process_and_save_split(
194
+ train_data_by_cat=train_annotations_images_by_category,
195
+ test_data_by_cat=test_annotations_images_by_category,
196
+ base_metadata=old_train_metadata,
197
+ public_dir=public,
198
+ private_dir=private,
199
+ raw_data_path=raw,
200
+ dev_mode=dev_mode,
201
+ dev_count=dev_count,
202
+ )
203
+ logger.info(f"Original train/test sets saved to {public.name} and {private.name}")
204
+
205
+ # --- SPLIT 2: Create the new validation sets from the training set of SPLIT 1 ---
206
+ logger.info("Starting second split: train -> train_val + test_val")
207
+ # The input to this split is the *training data* from the first split
208
+ train_val_by_category, test_val_by_category = _split_data_by_category(
209
+ train_annotations_images_by_category
210
+ )
211
+
212
+ _process_and_save_split(
213
+ train_data_by_cat=train_val_by_category,
214
+ test_data_by_cat=test_val_by_category,
215
+ base_metadata=old_train_metadata, # Use original for all top-level keys
216
+ public_dir=public_val,
217
+ private_dir=private_val,
218
+ raw_data_path=raw,
219
+ dev_mode=dev_mode,
220
+ dev_count=dev_count,
221
+ )
222
+ logger.info(f"Validation sets saved to {public_val.name} and {private_val.name}")
@@ -0,0 +1,31 @@
1
+ import pandas as pd
2
+ from sklearn.metrics import f1_score
3
+
4
+ from mlebench.grade_helpers import InvalidSubmissionError
5
+
6
+
7
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
8
+ """
9
+ `submission` and `answers` are pd.DataFrame with "Id" and "Predicted" columns
10
+ """
11
+ if len(submission) != len(answers):
12
+ raise InvalidSubmissionError(
13
+ f"Submission length {len(submission)} != answers length {len(answers)}"
14
+ )
15
+ if "Id" not in submission.columns or "Predicted" not in submission.columns:
16
+ raise InvalidSubmissionError("Submission must have 'Id' and 'Predicted' columns")
17
+
18
+ # Sort submission and answers by "Id"
19
+ submission = submission.sort_values("Id")
20
+ answers = answers.sort_values("Id")
21
+ if (submission["Id"].values != answers["Id"].values).any():
22
+ raise InvalidSubmissionError("Submission and answers have mismatched 'Id' columns")
23
+
24
+ y_true = [int(y) for y in answers["Predicted"]]
25
+ y_pred = [int(y) for y in submission["Predicted"]]
26
+ return y_true, y_pred
27
+
28
+
29
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
30
+ y_true, y_pred = prepare_for_metric(submission, answers)
31
+ return f1_score(y_true=y_true, y_pred=y_pred, average="macro")
@@ -0,0 +1,233 @@
1
+ import json
2
+ import random
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ from sklearn.model_selection import train_test_split
8
+ from tqdm import tqdm
9
+
10
+ from mlebench.utils import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ def prepare(raw: Path, public: Path, private: Path):
16
+ """
17
+ Splits the raw data into public and private datasets with appropriate test/train splits.
18
+
19
+ `train_metadata.json` is the "table of contents" for our data, with the following structure:
20
+ (More details at https://www.kaggle.com/competitions/herbarium-2022-fgvc9/data)
21
+ ```
22
+ {
23
+ "annotations" : [annotation],
24
+ "categories" : [category],
25
+ "genera" : [genus]
26
+ "images" : [image],
27
+ "distances" : [distance],
28
+ "licenses" : [license],
29
+ "institutions" : [institution]
30
+ }
31
+ ```
32
+ - `images` and `annotations` are both N-length lists corresponding to the N samples.
33
+ We'll need to split each of these lists into train and test.
34
+ - The other fields are dataset-wide metadata that we don't need to touch.
35
+
36
+ Other notes:
37
+ - train/test splits need to occur per category (each category should be in both train and test).
38
+ - The `test_images` and `train_images` folders have nested subdirs to make it easier to browse
39
+ - `train_images` is structured as `{category_id[:3]}/{category_id[3:]}/{image_id}.jpg`
40
+ - `test_images` is structured as `{image_idx[:3]}/test-{image_idx}.jpg` (to not reveal the category)
41
+ - When we create the new splits, we re-assign image indices so that we don't give away labels based on the index
42
+ - train images are indexed within their own category
43
+ - test images follow a flat index after shuffling the categories
44
+ """
45
+
46
+ # Create train, test from train split
47
+ with open(raw / "train_metadata.json") as f:
48
+ old_train_metadata = json.load(f)
49
+
50
+ # Organize data by category so that we can split per-category later
51
+ annotations_images_by_category = {} # We'll collect both `annotations` and `images` here
52
+ for annotation, image in list(
53
+ zip(old_train_metadata["annotations"], old_train_metadata["images"])
54
+ ):
55
+ assert annotation["image_id"] == image["image_id"]
56
+ category_id = annotation["category_id"]
57
+ if category_id not in annotations_images_by_category:
58
+ annotations_images_by_category[category_id] = []
59
+ annotations_images_by_category[category_id].append(
60
+ {
61
+ "annotation": annotation,
62
+ "image": image,
63
+ }
64
+ )
65
+
66
+ # Split train/test
67
+ train_sample_count = 0 # Useful for tqdm later
68
+ train_annotations_images_by_category = {}
69
+ test_annotations_images_by_category = {}
70
+ for category_id, annotations_images in tqdm(
71
+ annotations_images_by_category.items(), desc="Assigning train/test splits"
72
+ ):
73
+ # Create split by "category" (class): Each category needs to be in both train and test (80:20)
74
+ train_annotations_images, test_annotations_images = train_test_split(
75
+ annotations_images, test_size=0.2, random_state=0
76
+ )
77
+ assert len(train_annotations_images) > 0 and len(test_annotations_images) > 0
78
+ train_annotations_images_by_category[category_id] = train_annotations_images
79
+ test_annotations_images_by_category[category_id] = test_annotations_images
80
+ train_sample_count += len(train_annotations_images)
81
+
82
+ # Add to train set
83
+ new_train_metadata = old_train_metadata.copy() # Keep peripheral metadata
84
+ new_train_metadata.update(
85
+ {
86
+ "annotations": [],
87
+ "images": [],
88
+ }
89
+ )
90
+ with tqdm(
91
+ desc="Creating new train dataset",
92
+ total=train_sample_count,
93
+ ) as pbar:
94
+ for category_id, annotations_images in train_annotations_images_by_category.items():
95
+ # Create a nested directory from category_id, e.g. 15504 -> "155/04" or 3 -> "000/03"
96
+ category_subdir = f"{category_id // 100:03d}/{category_id % 100:02d}"
97
+ (public / "train_images" / category_subdir).mkdir(exist_ok=True, parents=True)
98
+ for idx, annotation_image in enumerate(annotations_images):
99
+ # Update the image_id and file_name so that we don't have gaps in the image_id
100
+ # (after doing train/test split, image ids are not contiguous within train)
101
+
102
+ # Make new image id from {category_id}__{idx} e.g. 15504__037
103
+ new_image_id = f"{category_id:05d}__{(idx + 1):03d}"
104
+ # Make new filename from image id e.g. "155/04/15504__037.jpg"
105
+ new_file_name = f"{category_subdir}/{new_image_id}.jpg"
106
+
107
+ new_annotation = annotation_image["annotation"].copy()
108
+ new_annotation["image_id"] = new_image_id
109
+ new_train_metadata["annotations"].append(new_annotation)
110
+
111
+ new_image = annotation_image["image"].copy()
112
+ new_image["image_id"] = new_image_id
113
+ new_image["file_name"] = new_file_name
114
+ new_train_metadata["images"].append(new_image)
115
+
116
+ # Copy file from raw to public
117
+ src_path = raw / "train_images" / annotation_image["image"]["file_name"]
118
+ dst_path = public / "train_images" / new_file_name
119
+ shutil.copyfile(src=src_path, dst=dst_path)
120
+
121
+ pbar.update(1)
122
+
123
+ with open(public / "train_metadata.json", "w") as f:
124
+ json.dump(new_train_metadata, f, indent=4, sort_keys=True)
125
+
126
+ assert len(list((public / "train_images").glob("**/*.jpg"))) == len(
127
+ new_train_metadata["images"]
128
+ ), (
129
+ f"Expected {len(new_train_metadata['images'])} images in train_images, but found"
130
+ f"{len(list((public / 'train_images').glob('**/*.jpg')))}"
131
+ )
132
+ assert len(new_train_metadata["annotations"]) == len(new_train_metadata["images"]), (
133
+ f"Mismatching number of annotations ({len(new_train_metadata['annotations'])}) "
134
+ f"and images ({len(new_train_metadata['images'])})"
135
+ )
136
+
137
+ # Add to test set
138
+ new_test_metadata = {} # Test doesn't need all that metadata
139
+ new_test_metadata.update(
140
+ {
141
+ "annotations": [],
142
+ "images": [],
143
+ }
144
+ )
145
+ # Flatten and shuffle test set so that we don't have all the same categories in a row
146
+ test_annotations_images = [
147
+ item for sublist in test_annotations_images_by_category.values() for item in sublist
148
+ ]
149
+ random.Random(0).shuffle(test_annotations_images)
150
+ for idx, annotation_image in tqdm(
151
+ enumerate(test_annotations_images),
152
+ desc="Creating new test dataset",
153
+ total=len(test_annotations_images),
154
+ ):
155
+ # Update the image_id and file_name so that we don't have gaps in the image_id
156
+ # (after doing train/test split, image ids are not contiguous within train and test)
157
+
158
+ # Make new image id, for test set this is just the index
159
+ new_image_id = str(idx)
160
+ # Make new filename from image id e.g. "000/test-000000.jpg"
161
+ new_file_name = f"{idx // 1000:03d}/test-{idx:06d}.jpg"
162
+
163
+ new_annotation = annotation_image["annotation"].copy()
164
+ new_annotation["image_id"] = new_image_id
165
+ new_test_metadata["annotations"].append(new_annotation)
166
+
167
+ new_image = annotation_image["image"].copy()
168
+ new_image["image_id"] = new_image_id
169
+ new_image["file_name"] = new_file_name
170
+ new_test_metadata["images"].append(new_image)
171
+
172
+ # Copy file from raw to public
173
+ src_path = raw / "train_images" / annotation_image["image"]["file_name"]
174
+ dst_path = public / "test_images" / new_file_name
175
+ dst_path.parent.mkdir(exist_ok=True, parents=True)
176
+ shutil.copyfile(src=src_path, dst=dst_path)
177
+
178
+ # Save new test metadata
179
+ with open(public / "test_metadata.json", "w") as f:
180
+ # The public data only contains the image metadata, not the annotations nor anything else
181
+ json.dump(new_test_metadata["images"], f, indent=4, sort_keys=True)
182
+
183
+ assert len(list((public / "test_images").glob("**/*.jpg"))) == len(
184
+ new_test_metadata["images"]
185
+ ), (
186
+ f"Expected {len(new_test_metadata['images'])} images in test_images, but found"
187
+ f"{len(list((public / 'test_images').glob('**/*.jpg')))}"
188
+ )
189
+ assert len(new_test_metadata["annotations"]) == len(new_test_metadata["images"]), (
190
+ f"Mismatching number of annotations ({len(new_test_metadata['annotations'])}) "
191
+ f"and images ({len(new_test_metadata['images'])})"
192
+ )
193
+ assert len(new_train_metadata["annotations"]) + len(new_test_metadata["annotations"]) == len(
194
+ old_train_metadata["annotations"]
195
+ ), (
196
+ f"Expected {len(old_train_metadata['annotations'])} annotations in total, but found"
197
+ f"{len(new_train_metadata['annotations'])} in train and {len(new_test_metadata['annotations'])} in test"
198
+ )
199
+
200
+ # Save private test answers
201
+ answers_rows = []
202
+ for image, annotation in zip(new_test_metadata["images"], new_test_metadata["annotations"]):
203
+ assert image["image_id"] == annotation["image_id"]
204
+ answers_rows.append(
205
+ {
206
+ "Id": image["image_id"],
207
+ "Predicted": annotation["category_id"],
208
+ }
209
+ )
210
+ answers_df = pd.DataFrame(answers_rows)
211
+ answers_df.to_csv(private / "answers.csv", index=False)
212
+
213
+ # Create new sample submission that matches raw/sample_submission.csv, but for the new test set
214
+ sample_rows = []
215
+ for image in new_test_metadata["images"]:
216
+ sample_rows.append(
217
+ {
218
+ "Id": image["image_id"],
219
+ "Predicted": 42,
220
+ }
221
+ )
222
+ sample_df = pd.DataFrame(sample_rows)
223
+ sample_df.to_csv(public / "sample_submission.csv", index=False)
224
+
225
+ assert len(answers_df) == len(
226
+ new_test_metadata["images"]
227
+ ), f"Expected {len(new_test_metadata['images'])} rows in answers, but found {len(answers_df)}"
228
+ assert len(sample_df) == len(
229
+ answers_df
230
+ ), f"Expected {len(answers_df)} rows in sample submission, but found {len(sample_df)}"
231
+ assert answers_df["Id"].equals(
232
+ sample_df["Id"]
233
+ ), "Mismatched 'Id' columns between answers and sample submission"