dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,127 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from tqdm.auto import tqdm
6
+
7
+ from mlebench.competitions.utils import get_ids_from_tf_records
8
+ from mlebench.utils import get_logger
9
+
10
+ logger = get_logger(__name__)
11
+
12
+
13
+ def prepare(raw: Path, public: Path, private: Path):
14
+ # split on the TFRecord files.
15
+ # There's 33126 train samples; 16 TFRecord files with 2071 samples each
16
+ # so we take 2 arbitrary TF record files as our test set (4142 samples is ~ 10% of data)
17
+ DEV = False
18
+ if DEV:
19
+ cutoff_index = 10000
20
+ else:
21
+ cutoff_index = None
22
+
23
+ old_train = pd.read_csv(raw / "train.csv")[:cutoff_index]
24
+
25
+ test_tf_records = {"train00-2071.tfrec", "train06-2071.tfrec"}
26
+ # parse the IDs from the test tf records
27
+ test_ids = []
28
+ for tfrec in tqdm(test_tf_records, desc="Splitting test ids from train TFRecords"):
29
+ test_ids.extend(get_ids_from_tf_records(raw / "tfrecords" / tfrec))
30
+
31
+ old_train["split"] = "train"
32
+ old_train.loc[old_train["image_name"].isin(test_ids), "split"] = "test"
33
+
34
+ new_train = old_train[old_train["split"] == "train"].drop(columns=["split"])
35
+ new_test = old_train[old_train["split"] == "test"].drop(columns=["split"])
36
+ new_test_without_labels = new_test.copy()[
37
+ ["image_name", "patient_id", "sex", "age_approx", "anatom_site_general_challenge"]
38
+ ]
39
+
40
+ # match format of sample submission
41
+ new_test = new_test[["image_name", "target"]]
42
+
43
+ # sample submission
44
+ sample_submission = new_test.copy()
45
+ sample_submission["target"] = 0
46
+
47
+ # save the CSVs
48
+ new_train.to_csv(public / "train.csv", index=False)
49
+ new_test_without_labels.to_csv(public / "test.csv", index=False)
50
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
51
+ new_test.to_csv(private / "test.csv", index=False)
52
+
53
+ # split raw train files to into appropriate prepared/public/test/train directories
54
+ # the files themselves do not contain target metadata so we are free to move them around
55
+ # DICOMs and JPEGs
56
+ (public / "train").mkdir(parents=True, exist_ok=True)
57
+ (public / "jpeg" / "train").mkdir(parents=True, exist_ok=True)
58
+ for image_name in tqdm(new_train["image_name"], desc="Train Images", total=len(new_train)):
59
+ dcm_file = raw / "train" / f"{image_name}.dcm"
60
+ jpg_file = raw / "jpeg" / "train" / f"{image_name}.jpg"
61
+ shutil.copy(dcm_file, public / "train" / f"{image_name}.dcm")
62
+ shutil.copy(jpg_file, public / "jpeg" / "train" / f"{image_name}.jpg")
63
+ (public / "test").mkdir(parents=True, exist_ok=True)
64
+ (public / "jpeg" / "test").mkdir(parents=True, exist_ok=True)
65
+ for image_name in tqdm(new_test["image_name"], desc="Test Images", total=len(new_test)):
66
+ dcm_file = raw / "train" / f"{image_name}.dcm"
67
+ jpg_file = raw / "jpeg" / "train" / f"{image_name}.jpg"
68
+ shutil.copy(dcm_file, public / "test" / f"{image_name}.dcm")
69
+ shutil.copy(jpg_file, public / "jpeg" / "test" / f"{image_name}.jpg")
70
+
71
+ # TFRecords
72
+ train_count = 0
73
+ test_count = 0
74
+ tfrecords_dest_path = public / "tfrecords"
75
+ tfrecords_dest_path.mkdir(parents=True, exist_ok=True)
76
+ for file in tqdm(
77
+ sorted((raw / "tfrecords").glob("train*.tfrec")), desc="Copying TFRecord files"
78
+ ):
79
+ record_count = file.stem.split("-")[1] # i.e. get 2071 from train00-2071
80
+ if file.name in test_tf_records:
81
+ shutil.copy(file, tfrecords_dest_path / f"test{test_count:02d}-{record_count}.tfrec")
82
+ test_count += 1
83
+ else:
84
+ shutil.copy(file, tfrecords_dest_path / f"train{train_count:02d}-{record_count}.tfrec")
85
+ train_count += 1
86
+
87
+ logger.info("Running asserts...")
88
+ assert len(list(public.glob("train/*.dcm"))) == len(new_train), "Train DICOM count mismatch"
89
+ assert len(list(public.glob("test/*.dcm"))) == len(new_test), "Test DICOM count mismatch"
90
+ assert len(list(public.glob("jpeg/train/*.jpg"))) == len(new_train), "Train JPEG count mismatch"
91
+ assert len(list(public.glob("jpeg/test/*.jpg"))) == len(new_test), "Test JPEG count mismatch"
92
+
93
+ assert not set(new_train["image_name"]).intersection(
94
+ new_test["image_name"]
95
+ ), "Train/Test overlap"
96
+
97
+ assert len(sample_submission) == len(new_test), "Sample submission length mismatch"
98
+ assert (
99
+ sample_submission["image_name"]
100
+ .sort_values()
101
+ .reset_index(drop=True)
102
+ .equals(new_test["image_name"].sort_values().reset_index(drop=True))
103
+ ), "Sample submission IDs mismatch"
104
+
105
+ # check columns
106
+ assert new_test_without_labels.columns.tolist() == [
107
+ "image_name",
108
+ "patient_id",
109
+ "sex",
110
+ "age_approx",
111
+ "anatom_site_general_challenge",
112
+ ], "new_test_without_labels columns mismatch"
113
+ assert new_train.columns.tolist() == [
114
+ "image_name",
115
+ "patient_id",
116
+ "sex",
117
+ "age_approx",
118
+ "anatom_site_general_challenge",
119
+ "diagnosis",
120
+ "benign_malignant",
121
+ "target",
122
+ ], "new_train columns mismatch"
123
+ assert new_test.columns.tolist() == ["image_name", "target"], "new_test columns mismatch"
124
+ assert sample_submission.columns.tolist() == [
125
+ "image_name",
126
+ "target",
127
+ ], "sample_submission columns mismatch"
@@ -0,0 +1,158 @@
1
+ import shutil
2
+ from pathlib import Path
3
+ from typing import Set, List
4
+
5
+ import pandas as pd
6
+ from tqdm.auto import tqdm
7
+
8
+ from mlebench.competitions.utils import get_ids_from_tf_records
9
+ from mlebench.utils import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ def _get_ids_from_tfrec_set(raw_path: Path, tfrec_set: Set[str]) -> List[str]:
15
+ """Helper function to parse image IDs from a given set of TFRecord files."""
16
+ ids = []
17
+ for tfrec_name in tqdm(tfrec_set, desc="Splitting IDs from TFRecords"):
18
+ ids.extend(get_ids_from_tf_records(raw_path / "tfrecords" / tfrec_name))
19
+ return ids
20
+
21
+
22
+ def _create_dataset_files(
23
+ train_df: pd.DataFrame,
24
+ test_df: pd.DataFrame,
25
+ source_tfrec_files: List[Path],
26
+ test_tfrec_set: Set[str],
27
+ raw_path: Path,
28
+ public_path: Path,
29
+ private_path: Path,
30
+ ):
31
+ """
32
+ Core function to generate all files for a given train/test split.
33
+ This includes CSVs, image files (JPG, DCM), and TFRecords.
34
+ """
35
+ # Create destination directories
36
+ public_path.mkdir(parents=True, exist_ok=True)
37
+ private_path.mkdir(parents=True, exist_ok=True)
38
+
39
+ # Prepare dataframes for saving
40
+ test_df_without_labels = test_df.copy()[
41
+ ["image_name", "patient_id", "sex", "age_approx", "anatom_site_general_challenge"]
42
+ ]
43
+ test_df_with_labels = test_df[["image_name", "target"]]
44
+ sample_submission = test_df_with_labels.copy()
45
+ sample_submission["target"] = 0
46
+
47
+ # Save the CSVs
48
+ train_df.to_csv(public_path / "train.csv", index=False)
49
+ test_df_without_labels.to_csv(public_path / "test.csv", index=False)
50
+ sample_submission.to_csv(public_path / "sample_submission.csv", index=False)
51
+ test_df_with_labels.to_csv(private_path / "test.csv", index=False)
52
+
53
+ # Copy image files (DICOMs and JPEGs)
54
+ (public_path / "train").mkdir(parents=True, exist_ok=True)
55
+ (public_path / "jpeg" / "train").mkdir(parents=True, exist_ok=True)
56
+ for image_name in tqdm(train_df["image_name"], desc=f"Train Images -> {public_path.name}", total=len(train_df)):
57
+ shutil.copy(raw_path / "train" / f"{image_name}.dcm", public_path / "train" / f"{image_name}.dcm")
58
+ shutil.copy(raw_path / "jpeg" / "train" / f"{image_name}.jpg", public_path / "jpeg" / "train" / f"{image_name}.jpg")
59
+
60
+ (public_path / "test").mkdir(parents=True, exist_ok=True)
61
+ (public_path / "jpeg" / "test").mkdir(parents=True, exist_ok=True)
62
+ for image_name in tqdm(test_df["image_name"], desc=f"Test Images -> {public_path.name}", total=len(test_df)):
63
+ shutil.copy(raw_path / "train" / f"{image_name}.dcm", public_path / "test" / f"{image_name}.dcm")
64
+ shutil.copy(raw_path / "jpeg" / "train" / f"{image_name}.jpg", public_path / "jpeg" / "test" / f"{image_name}.jpg")
65
+
66
+ # Copy and rename TFRecords
67
+ train_count = 0
68
+ test_count = 0
69
+ tfrecords_dest_path = public_path / "tfrecords"
70
+ tfrecords_dest_path.mkdir(parents=True, exist_ok=True)
71
+ for file in tqdm(source_tfrec_files, desc=f"Copying TFRecords -> {public_path.name}"):
72
+ record_count = file.stem.split("-")[1]
73
+ if file.name in test_tfrec_set:
74
+ shutil.copy(file, tfrecords_dest_path / f"test{test_count:02d}-{record_count}.tfrec")
75
+ test_count += 1
76
+ else:
77
+ shutil.copy(file, tfrecords_dest_path / f"train{train_count:02d}-{record_count}.tfrec")
78
+ train_count += 1
79
+
80
+ # Assertions to ensure data integrity
81
+ logger.info(f"Running asserts for {public_path.name} split...")
82
+ assert len(list(public_path.glob("train/*.dcm"))) == len(train_df), "Train DICOM count mismatch"
83
+ assert len(list(public_path.glob("test/*.dcm"))) == len(test_df), "Test DICOM count mismatch"
84
+ assert len(list(public_path.glob("jpeg/train/*.jpg"))) == len(train_df), "Train JPEG count mismatch"
85
+ assert len(list(public_path.glob("jpeg/test/*.jpg"))) == len(test_df), "Test JPEG count mismatch"
86
+ assert not set(train_df["image_name"]).intersection(test_df["image_name"]), "Train/Test overlap"
87
+ assert len(sample_submission) == len(test_df), "Sample submission length mismatch"
88
+ assert (
89
+ sample_submission["image_name"].sort_values().reset_index(drop=True)
90
+ .equals(test_df["image_name"].sort_values().reset_index(drop=True))
91
+ ), "Sample submission IDs mismatch"
92
+
93
+
94
+ def prepare(raw: Path, public: Path, private: Path):
95
+ # Common setup
96
+ DEV = False
97
+ cutoff_index = 10000 if DEV else None
98
+ all_data_df = pd.read_csv(raw / "train.csv")[:cutoff_index]
99
+ all_raw_tfrec_files = sorted((raw / "tfrecords").glob("train*.tfrec"))
100
+
101
+ # --- 1. Original Competition Split (train -> train/test) ---
102
+ logger.info("--- Creating original public/private split ---")
103
+
104
+ # The original split used 2 arbitrary TFRecord files as the test set
105
+ original_test_tfrec_set = {"train00-2071.tfrec", "train06-2071.tfrec"}
106
+ original_test_ids = _get_ids_from_tfrec_set(raw, original_test_tfrec_set)
107
+
108
+ # Split the main dataframe
109
+ all_data_df["split"] = "train"
110
+ all_data_df.loc[all_data_df["image_name"].isin(original_test_ids), "split"] = "test"
111
+
112
+ # These are the final dataframes for the original competition
113
+ final_train_df = all_data_df[all_data_df["split"] == "train"].drop(columns=["split"])
114
+ final_test_df = all_data_df[all_data_df["split"] == "test"].drop(columns=["split"])
115
+
116
+ # Create all files for the original split
117
+ _create_dataset_files(
118
+ train_df=final_train_df,
119
+ test_df=final_test_df,
120
+ source_tfrec_files=all_raw_tfrec_files,
121
+ test_tfrec_set=original_test_tfrec_set,
122
+ raw_path=raw,
123
+ public_path=public,
124
+ private_path=private,
125
+ )
126
+
127
+ # --- 2. New Validation Split (train -> train_val/test_val) ---
128
+ logger.info("--- Creating new public_val/private_val split ---")
129
+
130
+ # Define new directories for the validation set
131
+ public_val = public.parent / "public_val"
132
+ private_val = private.parent / "private_val"
133
+
134
+ # The source for this new split is the *training* data from the *first* split.
135
+ # We replicate the split logic by taking 2 TFRecord files for the validation-test set.
136
+ val_source_tfrec_files = [f for f in all_raw_tfrec_files if f.name not in original_test_tfrec_set]
137
+ val_test_tfrec_set = {"train01-2071.tfrec", "train02-2071.tfrec"}
138
+ val_test_ids = _get_ids_from_tfrec_set(raw, val_test_tfrec_set)
139
+
140
+ # Split the *training dataframe* from the first split to create the new sets
141
+ final_train_df["split"] = "train_val"
142
+ final_train_df.loc[final_train_df["image_name"].isin(val_test_ids), "split"] = "test_val"
143
+
144
+ train_val_df = final_train_df[final_train_df["split"] == "train_val"].drop(columns=["split"])
145
+ test_val_df = final_train_df[final_train_df["split"] == "test_val"].drop(columns=["split"])
146
+
147
+ # Create all files for the new validation split
148
+ _create_dataset_files(
149
+ train_df=train_val_df,
150
+ test_df=test_val_df,
151
+ source_tfrec_files=val_source_tfrec_files,
152
+ test_tfrec_set=val_test_tfrec_set,
153
+ raw_path=raw,
154
+ public_path=public_val,
155
+ private_path=private_val,
156
+ )
157
+
158
+ logger.info("--- Data preparation complete for all splits. ---")
@@ -0,0 +1,55 @@
1
+ import numpy as np
2
+ from pandas import DataFrame
3
+
4
+ from mlebench.grade_helpers import InvalidSubmissionError
5
+
6
+ from .notebook import calc_score as get_score
7
+
8
+
9
+ def grade(submission: DataFrame, answers: DataFrame) -> float:
10
+ assert "tripId" in answers.columns, "Answers DataFrame must have a 'tripId' column."
11
+ assert (
12
+ "UnixTimeMillis" in answers.columns
13
+ ), "Answers DataFrame must have a 'UnixTimeMillis' column."
14
+ assert (
15
+ "LatitudeDegrees" in answers.columns
16
+ ), "Answers DataFrame must have a 'LatitudeDegrees' column."
17
+ assert (
18
+ "LongitudeDegrees" in answers.columns
19
+ ), "Answers DataFrame must have a 'LongitudeDegrees' column."
20
+
21
+ if "tripId" not in submission.columns:
22
+ raise InvalidSubmissionError("Submission DataFrame must have a 'tripId' column.")
23
+
24
+ if "UnixTimeMillis" not in submission.columns:
25
+ raise InvalidSubmissionError("Submission DataFrame must have a 'UnixTimeMillis' column.")
26
+
27
+ if "LatitudeDegrees" not in submission.columns:
28
+ raise InvalidSubmissionError("Submission DataFrame must have a 'LatitudeDegrees' column.")
29
+
30
+ if "LongitudeDegrees" not in submission.columns:
31
+ raise InvalidSubmissionError("Submission DataFrame must have a 'LongitudeDegrees' column.")
32
+
33
+ if len(submission) != len(answers):
34
+ raise InvalidSubmissionError(
35
+ "Submission and answers DataFrames must have the same number of rows."
36
+ )
37
+
38
+ scores = []
39
+ trip_ids = answers["tripId"].unique()
40
+
41
+ for trip_id in trip_ids:
42
+ filtered_answers = answers[answers["tripId"] == trip_id]
43
+ filtered_submission = submission[submission["tripId"] == trip_id]
44
+
45
+ if len(filtered_submission) != len(filtered_answers):
46
+ raise InvalidSubmissionError(
47
+ "Submission and answers DataFrames must have the same number of rows for each 'tripId'."
48
+ f"Got {len(filtered_submission)} rows in the submission and {len(filtered_answers)} rows "
49
+ f"in the answers for tripId {trip_id}."
50
+ )
51
+
52
+ score = get_score(filtered_submission, filtered_answers)
53
+ scores.append(score)
54
+
55
+ return np.mean(scores)
@@ -0,0 +1,86 @@
1
+ """
2
+ Helper functions from the Kaggle notebook `GSDC2 - baseline submission`.
3
+ Adapted from https://www.kaggle.com/code/saitodevel01/gsdc2-baseline-submission.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+
8
+ import numpy as np
9
+
10
+ WGS84_SEMI_MAJOR_AXIS = 6378137.0
11
+ WGS84_SEMI_MINOR_AXIS = 6356752.314245
12
+ WGS84_SQUARED_FIRST_ECCENTRICITY = 6.69437999013e-3
13
+ WGS84_SQUARED_SECOND_ECCENTRICITY = 6.73949674226e-3
14
+
15
+ HAVERSINE_RADIUS = 6_371_000
16
+
17
+
18
+ @dataclass
19
+ class ECEF:
20
+ x: np.array
21
+ y: np.array
22
+ z: np.array
23
+
24
+ def to_numpy(self):
25
+ return np.stack([self.x, self.y, self.z], axis=0)
26
+
27
+ @staticmethod
28
+ def from_numpy(pos):
29
+ x, y, z = [np.squeeze(w) for w in np.split(pos, 3, axis=-1)]
30
+ return ECEF(x=x, y=y, z=z)
31
+
32
+
33
+ @dataclass
34
+ class BLH:
35
+ lat: np.array
36
+ lng: np.array
37
+ hgt: np.array
38
+
39
+
40
+ def ECEF_to_BLH(ecef):
41
+ a = WGS84_SEMI_MAJOR_AXIS
42
+ b = WGS84_SEMI_MINOR_AXIS
43
+ e2 = WGS84_SQUARED_FIRST_ECCENTRICITY
44
+ e2_ = WGS84_SQUARED_SECOND_ECCENTRICITY
45
+ x = ecef.x
46
+ y = ecef.y
47
+ z = ecef.z
48
+ r = np.sqrt(x**2 + y**2)
49
+ t = np.arctan2(z * (a / b), r)
50
+ B = np.arctan2(z + (e2_ * b) * np.sin(t) ** 3, r - (e2 * a) * np.cos(t) ** 3)
51
+ L = np.arctan2(y, x)
52
+ n = a / np.sqrt(1 - e2 * np.sin(B) ** 2)
53
+ H = (r / np.cos(B)) - n
54
+
55
+ return BLH(lat=B, lng=L, hgt=H)
56
+
57
+
58
+ def haversine_distance(blh_1, blh_2):
59
+ dlat = blh_2.lat - blh_1.lat
60
+ dlng = blh_2.lng - blh_1.lng
61
+ a = np.sin(dlat / 2) ** 2 + np.cos(blh_1.lat) * np.cos(blh_2.lat) * np.sin(dlng / 2) ** 2
62
+ dist = 2 * HAVERSINE_RADIUS * np.arcsin(np.sqrt(a))
63
+
64
+ return dist
65
+
66
+
67
+ def pandas_haversine_distance(df1, df2):
68
+ blh1 = BLH(
69
+ lat=np.deg2rad(df1["LatitudeDegrees"].to_numpy()),
70
+ lng=np.deg2rad(df1["LongitudeDegrees"].to_numpy()),
71
+ hgt=0,
72
+ )
73
+
74
+ blh2 = BLH(
75
+ lat=np.deg2rad(df2["LatitudeDegrees"].to_numpy()),
76
+ lng=np.deg2rad(df2["LongitudeDegrees"].to_numpy()),
77
+ hgt=0,
78
+ )
79
+
80
+ return haversine_distance(blh1, blh2)
81
+
82
+
83
+ def calc_score(pred_df, gt_df):
84
+ d = pandas_haversine_distance(pred_df, gt_df)
85
+ score = np.mean([np.quantile(d, 0.50), np.quantile(d, 0.95)])
86
+ return score
@@ -0,0 +1,143 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+
7
+
8
+ def get_date(s: str) -> str:
9
+ """Gets date from string in the format YYYY-MM-DD-X where `X` is an arbitrary string."""
10
+
11
+ split = s.split("-")
12
+
13
+ assert (
14
+ len(split) >= 3
15
+ ), f"Expected the string to have at least 3 parts separated by `-`. Got {len(split)} parts."
16
+
17
+ year, month, day = split[:3]
18
+
19
+ assert (
20
+ isinstance(year, str) and year.isdigit()
21
+ ), f"Expected the year to be a string of digits. Got {year} instead."
22
+
23
+ assert (
24
+ isinstance(month, str) and month.isdigit()
25
+ ), f"Expected the month to be a string of digits. Got {month} instead."
26
+
27
+ assert (
28
+ isinstance(day, str) and day.isdigit()
29
+ ), f"Expected the day to be a string of digits. Got {day} instead."
30
+
31
+ date = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
32
+
33
+ return date
34
+
35
+
36
+ def prepare(raw: Path, public: Path, private: Path) -> None:
37
+ old_train_ids = sorted([folder.name for folder in (raw / "train").glob("*") if folder.is_dir()])
38
+ dates = sorted(set([get_date(s) for s in old_train_ids]))
39
+ new_train_dates, new_test_dates = train_test_split(dates, test_size=0.1, random_state=0)
40
+
41
+ assert (
42
+ len(new_train_dates) >= 1
43
+ ), "Expected the new train set to have at least one date. Got 0 dates."
44
+
45
+ assert (
46
+ len(new_test_dates) >= 1
47
+ ), "Expected the new test set to have at least one date. Got 0 dates."
48
+
49
+ new_train_ids = sorted([i for i in old_train_ids if get_date(i) in new_train_dates])
50
+ new_test_ids = sorted([i for i in old_train_ids if get_date(i) in new_test_dates])
51
+
52
+ assert len(set(new_train_ids).intersection(set(new_test_ids))) == 0, (
53
+ f"Expected the new train and test instances to be disjoint. Got an intersection of "
54
+ f"{set(new_train_ids).intersection(set(new_test_ids))}."
55
+ )
56
+
57
+ assert len(new_train_ids) + len(new_test_ids) == len(old_train_ids), (
58
+ f"Expected the number of new train and test instances to sum up to the number of old train "
59
+ f"instances. Got {len(new_train_ids)} new train instances and {len(new_test_ids)} new test "
60
+ f"instances which sum to {len(new_train_ids) + len(new_test_ids)} instead of "
61
+ f"{len(old_train_ids)}."
62
+ )
63
+
64
+ assert set(new_train_ids).intersection(new_test_ids) == set(), (
65
+ f"Expected the new train and test instances to be disjoint. Got an intersection of "
66
+ f"{set(new_train_ids).intersection(new_test_ids)}."
67
+ )
68
+
69
+ for new_train_id in new_train_ids:
70
+ shutil.copytree(
71
+ src=raw / "train" / new_train_id,
72
+ dst=public / "train" / new_train_id,
73
+ )
74
+
75
+ for new_test_id in new_test_ids:
76
+ shutil.copytree(
77
+ src=raw / "train" / new_test_id,
78
+ dst=public / "test" / new_test_id,
79
+ )
80
+
81
+ # Construct test set by concatenating all ground truth csvs for the test journeys
82
+ dfs = []
83
+
84
+ for fpath in sorted((public / "test").rglob("ground_truth.csv")):
85
+ drive_id = fpath.parent.parent.name
86
+ phone_id = fpath.parent.name
87
+
88
+ assert (
89
+ drive_id in new_test_ids
90
+ ), f"Expected the drive {drive_id} to be one of the new test instances. Got {drive_id} instead."
91
+
92
+ raw_df = pd.read_csv(fpath)
93
+ df = raw_df.copy()
94
+ df.loc[:, "tripId"] = f"{drive_id}-{phone_id}"
95
+ df = df[["tripId", "UnixTimeMillis", "LatitudeDegrees", "LongitudeDegrees"]]
96
+ dfs.append(df)
97
+
98
+ new_test = pd.concat(dfs, ignore_index=True)
99
+ new_test.to_csv(private / "test.csv", index=False)
100
+
101
+ for fpath in (public / "test").rglob("ground_truth.csv"):
102
+ fpath.unlink() # don't include ground truth in public test data
103
+
104
+ shutil.copytree(
105
+ src=raw / "metadata",
106
+ dst=public / "metadata",
107
+ )
108
+
109
+ actual_journey_ids = set(["-".join(s.split("-")[:-1]) for s in new_test["tripId"]])
110
+
111
+ assert len(actual_journey_ids) == len(new_test_ids), (
112
+ f"Expected the new test instances to have {len(new_test_ids)} unique trip IDs. Got "
113
+ f"{len(new_test['tripId'].unique())} unique trip IDs."
114
+ )
115
+
116
+ sample_submission = new_test.copy()
117
+ sample_submission.loc[:, "LatitudeDegrees"] = 37.904611315634504
118
+ sample_submission.loc[:, "LongitudeDegrees"] = -86.48107806249548
119
+
120
+ assert len(sample_submission) == len(new_test), (
121
+ f"Expected the sample submission to have the same number of instances as the new test "
122
+ f"instances. Got {len(sample_submission)} instances in the sample submission and "
123
+ f"{len(new_test)} new test instances."
124
+ )
125
+
126
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
127
+
128
+ assert sorted(list(public.glob("train/*"))) == sorted(
129
+ set([public / "train" / drive_id for drive_id in new_train_ids])
130
+ ), "Expected the public train directory to contain the new train instances."
131
+
132
+ assert sorted(list(public.glob("test/*"))) == sorted(
133
+ set([public / "test" / drive_id for drive_id in new_test_ids])
134
+ ), "Expected the public test directory to contain the new test instances."
135
+
136
+ assert (
137
+ len(list((public / "test").rglob("ground_truth.csv"))) == 0
138
+ ), "Expected the public test directory to not contain any ground truth files."
139
+
140
+ assert len(list((public / "train").rglob("ground_truth.csv"))) >= len(new_train_ids), (
141
+ "Expected the public train directory to contain at least one ground truth file per new "
142
+ "train instance."
143
+ )