dslighting 1.7.1__py3-none-any.whl → 1.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/METADATA +3 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,56 @@
1
+ from pathlib import Path
2
+
3
+ import numpy as np
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ from mlebench.utils import read_csv
7
+
8
+
9
+ def prepare(raw: Path, public: Path, private: Path):
10
+
11
+ old_train = read_csv(raw / "train.csv")
12
+
13
+ # 900k train, 1.6m - 900k = 700k test; so 700k/1.6m = 0.4375
14
+ # We create our split at 100,000 test samples to get same OOM while keeping as many samples as possible in train
15
+ new_train, new_test = train_test_split(old_train, test_size=100_000, random_state=0)
16
+
17
+ # make ids go from 0 to len(new_train) - 1
18
+ new_train.id = np.arange(len(new_train))
19
+ # and from len(new_train) to len(new_train) + len(new_test) - 1
20
+ new_test.id = np.arange(len(new_train), len(new_train) + len(new_test))
21
+
22
+ # make downstream files
23
+ new_test_without_labels = new_test.drop(columns=["target"]).copy()
24
+ gold_submission = new_test[["id", "target"]].copy()
25
+ sample_submission = gold_submission.copy()
26
+ sample_submission.target = 0.5
27
+
28
+ # save
29
+ new_train.to_csv(public / "train.csv", index=False)
30
+ new_test.to_csv(private / "test.csv", index=False)
31
+ new_test_without_labels.to_csv(public / "test.csv", index=False)
32
+ gold_submission.to_csv(private / "gold_submission.csv", index=False)
33
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
34
+
35
+ # checks
36
+ assert len(new_train) + len(new_test) == len(
37
+ old_train
38
+ ), "Expected the sum of the lengths of the new train and test to be equal to the length of the original train."
39
+ assert len(new_test) == len(
40
+ sample_submission
41
+ ), "Expected the length of the private test to be equal to the length of the sample submission."
42
+ assert len(new_test) == len(
43
+ gold_submission
44
+ ), "Expected the length of the private test to be equal to the length of the gold submission."
45
+
46
+ assert (
47
+ new_train.columns.to_list() == old_train.columns.to_list()
48
+ ), "Expected the columns of the new train to be the same as the columns of the original train."
49
+ assert (
50
+ new_test.columns.to_list() == old_train.columns.to_list()
51
+ ), "Expected the columns of the new test to be the same as the columns of the original train"
52
+
53
+ # check that ids dont overlap between train and test
54
+ assert set(new_train.id).isdisjoint(
55
+ set(new_test.id)
56
+ ), "Expected the ids of the new train and test to be disjoint."
@@ -0,0 +1,116 @@
1
+ from pathlib import Path
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import read_csv
8
+
9
+
10
+ def _create_and_save_split(
11
+ df_to_split: pd.DataFrame,
12
+ public_path: Path,
13
+ private_path: Path,
14
+ test_size: int,
15
+ random_state: int,
16
+ ):
17
+ """
18
+ Helper function to perform a data split, re-index, and save all required files.
19
+
20
+ Args:
21
+ df_to_split (pd.DataFrame): The dataframe to be split.
22
+ public_path (Path): The directory for public-facing files (train, test features).
23
+ private_path (Path): The directory for private-facing files (test labels).
24
+ test_size (int): The number of samples for the test set.
25
+ random_state (int): The random state for reproducibility.
26
+
27
+ Returns:
28
+ pd.DataFrame: The new, smaller training dataframe from the split.
29
+ """
30
+ # Create output directories if they don't exist
31
+ public_path.mkdir(parents=True, exist_ok=True)
32
+ private_path.mkdir(parents=True, exist_ok=True)
33
+
34
+ # Perform the split
35
+ train_df, test_df = train_test_split(
36
+ df_to_split, test_size=test_size, random_state=random_state
37
+ )
38
+
39
+ # make ids go from 0 to len(train_df) - 1
40
+ train_df.id = np.arange(len(train_df))
41
+ # and from len(train_df) to len(train_df) + len(test_df) - 1
42
+ test_df.id = np.arange(len(train_df), len(train_df) + len(test_df))
43
+
44
+ # make downstream files
45
+ test_df_without_labels = test_df.drop(columns=["target"]).copy()
46
+ gold_submission = test_df[["id", "target"]].copy()
47
+ sample_submission = gold_submission.copy()
48
+ sample_submission.target = 0.5
49
+
50
+ # save files with required names
51
+ train_df.to_csv(public_path / "train.csv", index=False)
52
+ test_df.to_csv(private_path / "test.csv", index=False)
53
+ test_df_without_labels.to_csv(public_path / "test.csv", index=False)
54
+ gold_submission.to_csv(private_path / "gold_submission.csv", index=False)
55
+ sample_submission.to_csv(public_path / "sample_submission.csv", index=False)
56
+
57
+ # run checks for this split
58
+ assert len(train_df) + len(test_df) == len(
59
+ df_to_split
60
+ ), "Expected the sum of the lengths of the new train and test to be equal to the length of the input data."
61
+ assert len(test_df) == len(
62
+ sample_submission
63
+ ), "Expected the length of the private test to be equal to the length of the sample submission."
64
+ assert len(test_df) == len(
65
+ gold_submission
66
+ ), "Expected the length of the private test to be equal to the length of the gold submission."
67
+ assert (
68
+ train_df.columns.to_list() == df_to_split.columns.to_list()
69
+ ), "Expected the columns of the new train to be the same as the columns of the input data."
70
+ assert (
71
+ test_df.columns.to_list() == df_to_split.columns.to_list()
72
+ ), "Expected the columns of the new test to be the same as the columns of the input data"
73
+ assert set(train_df.id).isdisjoint(
74
+ set(test_df.id)
75
+ ), "Expected the ids of the new train and test to be disjoint."
76
+
77
+ return train_df
78
+
79
+
80
+ def prepare(raw: Path, public: Path, private: Path):
81
+
82
+ old_train = read_csv(raw / "train.csv")
83
+
84
+ # --- Stage 1: Create the original train/test split ---
85
+ # This section produces the primary competition data. Its outputs in `public/`
86
+ # and `private/` are guaranteed to be identical to the original script.
87
+
88
+ # 900k train, 1.6m - 900k = 700k test; so 700k/1.6m = 0.4375
89
+ # We create our split at 100,000 test samples to get same OOM while keeping as many samples as possible in train
90
+ new_train = _create_and_save_split(
91
+ df_to_split=old_train,
92
+ public_path=public,
93
+ private_path=private,
94
+ test_size=100_000,
95
+ random_state=0,
96
+ )
97
+
98
+ # --- Stage 2: Create the new train/validation split ---
99
+ # This section takes the `new_train` set from Stage 1 and splits it again
100
+ # to create a smaller training set and a new validation set.
101
+ # The outputs are saved to parallel `public_val/` and `private_val/` directories.
102
+
103
+ # Define paths for the new validation set directories, parallel to the original ones.
104
+ public_val = public.parent / "public_val"
105
+ private_val = private.parent / "private_val"
106
+
107
+ # Split `new_train` again using the exact same logic and test size.
108
+ # The resulting `test_val` set will have 100,000 samples, same as the original `test` set.
109
+ # The filenames and directory structure are mirrored for consistency.
110
+ _create_and_save_split(
111
+ df_to_split=new_train,
112
+ public_path=public_val,
113
+ private_path=private_val,
114
+ test_size=100_000,
115
+ random_state=0,
116
+ )
@@ -0,0 +1,11 @@
1
+ import pandas as pd
2
+ from sklearn.metrics import accuracy_score
3
+
4
+ from mlebench.competitions.utils import prepare_for_accuracy_metric
5
+
6
+
7
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
8
+ accuracy_inputs = prepare_for_accuracy_metric(
9
+ submission=submission, answers=answers, target_column="label", id_column="fname"
10
+ )
11
+ return accuracy_score(**accuracy_inputs)
@@ -0,0 +1,90 @@
1
+ import shutil
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from tqdm import tqdm
8
+
9
+ from mlebench.utils import extract
10
+
11
+ CLASSES = [
12
+ "yes",
13
+ "no",
14
+ "up",
15
+ "down",
16
+ "left",
17
+ "right",
18
+ "on",
19
+ "off",
20
+ "stop",
21
+ "go",
22
+ "unknown",
23
+ "silence",
24
+ ]
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class AudioFile:
29
+ label: str
30
+ path: Path
31
+
32
+
33
+ def prepare(raw: Path, public: Path, private: Path):
34
+ # extract only what we need
35
+ extract(raw / "train.7z", raw)
36
+
37
+ # Create train, test from train split
38
+ audio_dir = raw / "train" / "audio"
39
+ audio_files = sorted(
40
+ [AudioFile(fpath.parent.name, fpath) for fpath in audio_dir.rglob("*.wav")],
41
+ key=lambda x: f"{x.label}_{x.path.name}",
42
+ )
43
+ train_files, test_files = train_test_split(audio_files, test_size=0.1, random_state=0)
44
+
45
+ # Make necessary directories
46
+ labels = list(
47
+ dict.fromkeys([file.label for file in train_files])
48
+ ) # Gets unique elements deterministically
49
+
50
+ for label in labels:
51
+ (public / "train" / "audio" / label).mkdir(parents=True, exist_ok=True)
52
+
53
+ (public / "test" / "audio").mkdir(parents=True, exist_ok=True)
54
+
55
+ # Copy over train and test files
56
+ for file in tqdm(train_files, desc="Copying train files"):
57
+ shutil.copyfile(
58
+ src=file.path,
59
+ dst=public / "train" / "audio" / file.label / file.path.name,
60
+ )
61
+
62
+ test_records = []
63
+
64
+ for idx, file in enumerate(tqdm(test_files, desc="Copying test files")):
65
+ # Rename files, since training audio files across labels aren't necessarily unique.
66
+ new_id = str(idx).zfill(8)
67
+ new_name = f"clip_{new_id}.wav"
68
+ test_records.append({"fname": new_name, "label": file.label})
69
+
70
+ shutil.copyfile(
71
+ src=file.path,
72
+ dst=public / "test" / "audio" / new_name,
73
+ )
74
+
75
+ test = pd.DataFrame.from_records(test_records)
76
+ test.to_csv(private / "test.csv", index=False)
77
+
78
+ test_without_labels = test.drop(columns=["label"])
79
+ sample_submission = test_without_labels.copy()
80
+ sample_submission["label"] = "silence"
81
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
82
+
83
+ # Sanity checks
84
+ test_audio_files = list((public / "test" / "audio").glob("*.wav"))
85
+ num_test_files = len(test_audio_files)
86
+ num_submission_entries = len(sample_submission)
87
+ assert num_test_files == num_submission_entries, (
88
+ f"The number of test audio files ({num_test_files}) does not match the number of entries "
89
+ f"in sample_submission.csv ({num_submission_entries}). Please check the file copying process."
90
+ )
@@ -0,0 +1,148 @@
1
+ import shutil
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import List
5
+
6
+ import pandas as pd
7
+ from sklearn.model_selection import train_test_split
8
+ from tqdm import tqdm
9
+
10
+ from mlebench.utils import extract
11
+
12
+ CLASSES = [
13
+ "yes",
14
+ "no",
15
+ "up",
16
+ "down",
17
+ "left",
18
+ "right",
19
+ "on",
20
+ "off",
21
+ "stop",
22
+ "go",
23
+ "unknown",
24
+ "silence",
25
+ ]
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class AudioFile:
30
+ label: str
31
+ path: Path
32
+
33
+
34
+ def _create_and_populate_split(
35
+ files_to_split: List[AudioFile],
36
+ public_dir: Path,
37
+ private_dir: Path,
38
+ test_size: float,
39
+ random_state: int,
40
+ ) -> List[AudioFile]:
41
+ """
42
+ Splits a list of audio files into train and test sets, and populates the
43
+ corresponding public and private directories.
44
+
45
+ Args:
46
+ files_to_split: A list of AudioFile objects to be split.
47
+ public_dir: The public directory to save training data and test stubs.
48
+ private_dir: The private directory to save test ground truth.
49
+ test_size: The proportion of the dataset to allocate to the test split.
50
+ random_state: The seed used by the random number generator.
51
+
52
+ Returns:
53
+ A list of AudioFile objects that were assigned to the training set.
54
+ """
55
+ train_files, test_files = train_test_split(
56
+ files_to_split, test_size=test_size, random_state=random_state
57
+ )
58
+
59
+ # Make necessary directories
60
+ public_dir.mkdir(parents=True, exist_ok=True)
61
+ private_dir.mkdir(parents=True, exist_ok=True)
62
+
63
+ labels = list(
64
+ dict.fromkeys([file.label for file in train_files])
65
+ ) # Gets unique elements deterministically
66
+
67
+ for label in labels:
68
+ (public_dir / "train" / "audio" / label).mkdir(parents=True, exist_ok=True)
69
+
70
+ (public_dir / "test" / "audio").mkdir(parents=True, exist_ok=True)
71
+
72
+ # Copy over train and test files
73
+ desc_suffix = public_dir.name
74
+ for file in tqdm(train_files, desc=f"Copying train files to {desc_suffix}"):
75
+ shutil.copyfile(
76
+ src=file.path,
77
+ dst=public_dir / "train" / "audio" / file.label / file.path.name,
78
+ )
79
+
80
+ test_records = []
81
+
82
+ for idx, file in enumerate(tqdm(test_files, desc=f"Copying test files to {desc_suffix}")):
83
+ # Rename files, since training audio files across labels aren't necessarily unique.
84
+ new_id = str(idx).zfill(8)
85
+ new_name = f"clip_{new_id}.wav"
86
+ test_records.append({"fname": new_name, "label": file.label})
87
+
88
+ shutil.copyfile(
89
+ src=file.path,
90
+ dst=public_dir / "test" / "audio" / new_name,
91
+ )
92
+
93
+ test = pd.DataFrame.from_records(test_records)
94
+ test.to_csv(private_dir / "test.csv", index=False)
95
+
96
+ test_without_labels = test.drop(columns=["label"])
97
+ sample_submission = test_without_labels.copy()
98
+ sample_submission["label"] = "silence"
99
+ sample_submission.to_csv(public_dir / "sample_submission.csv", index=False)
100
+
101
+ # Sanity checks
102
+ test_audio_files = list((public_dir / "test" / "audio").glob("*.wav"))
103
+ num_test_files = len(test_audio_files)
104
+ num_submission_entries = len(sample_submission)
105
+ assert num_test_files == num_submission_entries, (
106
+ f"The number of test audio files ({num_test_files}) does not match the number of entries "
107
+ f"in sample_submission.csv ({num_submission_entries}). Please check the file copying process."
108
+ )
109
+
110
+ return train_files
111
+
112
+
113
+ def prepare(raw: Path, public: Path, private: Path):
114
+ # extract only what we need
115
+ extract(raw / "train.7z", raw)
116
+
117
+ # Create train, test from train split
118
+ audio_dir = raw / "train" / "audio"
119
+ all_audio_files = sorted(
120
+ [AudioFile(fpath.parent.name, fpath) for fpath in audio_dir.rglob("*.wav")],
121
+ key=lambda x: f"{x.label}_{x.path.name}",
122
+ )
123
+
124
+ # --- First Split: Create the original train/test sets ---
125
+ # This call populates the original `public` and `private` directories.
126
+ # It returns the files assigned to the training set for the next split.
127
+ train_files_for_val_split = _create_and_populate_split(
128
+ files_to_split=all_audio_files,
129
+ public_dir=public,
130
+ private_dir=private,
131
+ test_size=0.1,
132
+ random_state=0,
133
+ )
134
+
135
+ # --- Second Split: Create the new train_val/test_val sets ---
136
+ # Define new paths for the validation split, parallel to the original ones.
137
+ public_val = public.parent / "public_val"
138
+ private_val = private.parent / "private_val"
139
+
140
+ # To get a validation test set of the same size as the original test set,
141
+ # we take a 1/9 slice of the remaining 90% of the data (0.1 / 0.9 = 1/9).
142
+ _create_and_populate_split(
143
+ files_to_split=train_files_for_val_split,
144
+ public_dir=public_val,
145
+ private_dir=private_val,
146
+ test_size=1 / 9,
147
+ random_state=0, # Use the same random_state for deterministic behavior
148
+ )
@@ -0,0 +1,122 @@
1
+ import pandas as pd
2
+ from sklearn.metrics import f1_score
3
+
4
+ from mlebench.grade_helpers import InvalidSubmissionError
5
+
6
+
7
+ def prepare_for_metric(submission: pd.DataFrame, answers: dict):
8
+ """
9
+ `submission` is a pd.DataFrame with "example_id" and "PredictionString" columns.
10
+
11
+ `answers` is a dict in the format of "simplified-nq-train.jsonl" described here:
12
+ https://www.kaggle.com/c/tensorflow2-question-answering/data
13
+
14
+ The competition uses a micro F1 score metric, which is a binary classification metric.
15
+ For retrieval, we convert the submission and answers for each sample as:
16
+ - y_pred: if the submission has an answer, it is 1, else 0
17
+ - y_true:
18
+ - if the submission has an answer and it exists in the true labels, y_true=1
19
+ - if the submission has an answer but it does not exist in the true labels, y_true=0
20
+ - if the submission has no answer but there exists a true label, y_true=1
21
+ - if the submission has no answer and there is no true label, y_true=0
22
+
23
+ This is consistent with
24
+ https://www.kaggle.com/competitions/tensorflow2-question-answering/overview/evaluation
25
+ - TP = the predicted indices match one of the possible ground truth indices
26
+ - FP = the predicted indices do NOT match one of the possible ground truth indices, OR
27
+ a prediction has been made where no ground truth exists
28
+ - FN = no prediction has been made where a ground truth exists
29
+
30
+ Returns y_true, y_pred which are lists of 0s and 1s.
31
+ """
32
+ if len(submission) != 2 * len(answers):
33
+ raise InvalidSubmissionError(
34
+ f"Submission length {len(submission)} != 2 * answers length {len(answers)}"
35
+ )
36
+ # Empty strings are read as NaN by pandas, but we want these to remain empty strings
37
+ submission.fillna("", inplace=True)
38
+ submission = submission.astype(str)
39
+
40
+ expected_ids = []
41
+ for sample in answers:
42
+ expected_ids.append(f"{sample['example_id']}_long")
43
+ expected_ids.append(f"{sample['example_id']}_short")
44
+
45
+ if not ({"example_id", "PredictionString"}).issubset(submission.columns):
46
+ raise InvalidSubmissionError(
47
+ "Submission requires 'example_id' and 'PredictionString' columns."
48
+ )
49
+ if not set(submission["example_id"]) == set(expected_ids):
50
+ raise InvalidSubmissionError(
51
+ "Submission example_ids do not match expected_ids. Please ensure you have a "
52
+ "long and short answer for each example_id in the answers."
53
+ )
54
+ if not len(set(submission["example_id"])) == len(submission):
55
+ raise InvalidSubmissionError(
56
+ "Submission example_ids are not unique. Please ensure you have a "
57
+ "long and short answer for each example_id in the answers."
58
+ )
59
+
60
+ y_pred = [1 if row["PredictionString"].strip() else 0 for _, row in submission.iterrows()]
61
+
62
+ y_true = []
63
+ true_annotations = {sample["example_id"]: sample["annotations"][0] for sample in answers}
64
+ for idx, row in submission.iterrows():
65
+ sample_id = row["example_id"]
66
+
67
+ # Parse prediction
68
+ # Prediction may be any of "{start_token}:{end_token}", "YES", "NO", ""
69
+ pred = row["PredictionString"].strip()
70
+ if ":" in pred and len(pred.split(":")) == 2:
71
+ start_token, end_token = pred.split(":")
72
+ pred = (int(start_token), int(end_token))
73
+ elif pred in ["YES", "NO", ""]:
74
+ pass
75
+ else:
76
+ raise InvalidSubmissionError(f"Invalid submission format: {pred}")
77
+
78
+ # Parse valid answers
79
+ if sample_id.endswith("_long"):
80
+ sample_id = sample_id.split("_long")[0]
81
+ annotation = true_annotations[sample_id]
82
+ valid_answers = []
83
+ if annotation["long_answer"]["start_token"] != -1:
84
+ valid_answers.append(
85
+ (
86
+ int(annotation["long_answer"]["start_token"]),
87
+ int(annotation["long_answer"]["end_token"]),
88
+ ),
89
+ )
90
+ elif sample_id.endswith("_short"):
91
+ sample_id = sample_id.split("_short")[0]
92
+ annotation = true_annotations[sample_id]
93
+ valid_answers = [
94
+ (int(short_answer["start_token"]), int(short_answer["end_token"]))
95
+ for short_answer in annotation["short_answers"]
96
+ ]
97
+ if annotation["yes_no_answer"] != "NONE":
98
+ valid_answers.append(annotation["yes_no_answer"])
99
+ else:
100
+ raise InvalidSubmissionError(
101
+ f"sample_id {sample_id} does not end with '_long' or '_short'"
102
+ )
103
+
104
+ # Mark as 0 or 1 based on the above rules
105
+ if pred:
106
+ if pred in valid_answers:
107
+ y_true.append(1)
108
+ else:
109
+ y_true.append(0)
110
+ else:
111
+ if valid_answers:
112
+ y_true.append(1)
113
+ else:
114
+ y_true.append(0)
115
+
116
+ assert len(y_true) == len(y_pred), f"y_true {len(y_true)} != y_pred {len(y_pred)}"
117
+ return y_true, y_pred
118
+
119
+
120
+ def grade(submission: pd.DataFrame, answers: dict) -> float:
121
+ y_true, y_pred = prepare_for_metric(submission, answers)
122
+ return f1_score(y_true=y_true, y_pred=y_pred, average="micro")
@@ -0,0 +1,122 @@
1
+ from pathlib import Path
2
+
3
+ import pandas as pd
4
+ from sklearn.model_selection import train_test_split
5
+ from tqdm import tqdm
6
+
7
+ from mlebench.utils import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ def extract_string(document_text: str, start_token_idx: int, end_token_idx: int) -> str:
13
+ document_tokens = document_text.split(" ")
14
+ extract_tokens = document_tokens[start_token_idx:end_token_idx]
15
+ return " ".join(extract_tokens)
16
+
17
+
18
+ def prepare(raw: Path, public: Path, private: Path):
19
+ """
20
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
21
+ """
22
+
23
+ # Create train, test from train split
24
+ train_file = "simplified-nq-train.jsonl"
25
+
26
+ logger.info("Counting lines in train file...")
27
+ with open(raw / train_file, "r") as f:
28
+ n_lines = sum(1 for _ in f)
29
+ logger.info(f"Found {n_lines} lines in train file.")
30
+
31
+ # Read data in chunks to avoid memory issues
32
+ train_ids, test_ids = [], []
33
+ lightweight_test = [] # We'll use this to create a gold submission later
34
+ with tqdm(total=n_lines, desc="Splitting data") as pbar:
35
+ for df in pd.read_json(raw / train_file, orient="records", lines=True, chunksize=1_000):
36
+ # Convert IDs to strings, Kaggle.com is inconsistent about this but strings make more sense
37
+ df["example_id"] = df["example_id"].astype(str)
38
+ new_train, new_test = train_test_split(df, test_size=0.1, random_state=0)
39
+
40
+ keys_to_keep = [
41
+ "example_id",
42
+ "question_text",
43
+ "document_text",
44
+ "long_answer_candidates",
45
+ ]
46
+ new_test_without_labels = new_test.copy()[keys_to_keep]
47
+
48
+ # Append lines to new train and test
49
+ with open(public / "simplified-nq-train.jsonl", "a") as f:
50
+ f.write(new_train.to_json(orient="records", lines=True))
51
+ with open(private / "test.jsonl", "a") as f:
52
+ f.write(new_test.to_json(orient="records", lines=True))
53
+ with open(public / "simplified-nq-test.jsonl", "a") as f:
54
+ f.write(new_test_without_labels.to_json(orient="records", lines=True))
55
+
56
+ train_ids.extend(new_train["example_id"].tolist())
57
+ test_ids.extend(new_test["example_id"].tolist())
58
+ lightweight_test.append(
59
+ new_test.copy()[["example_id", "question_text", "annotations"]]
60
+ ) # For gold submission
61
+ pbar.update(len(df))
62
+
63
+ lightweight_test = pd.concat(lightweight_test, ignore_index=True)
64
+
65
+ assert len(train_ids) + len(test_ids) == n_lines
66
+ assert len(lightweight_test) == len(test_ids)
67
+
68
+ # Create a gold submission with columns "example_id", "PredictionString"
69
+ gold_rows = []
70
+ for idx, sample in tqdm(
71
+ lightweight_test.iterrows(), total=len(lightweight_test), desc="Creating gold submission"
72
+ ):
73
+ sample = sample.to_dict()
74
+ assert len(sample["annotations"]) == 1
75
+ annotation = sample["annotations"][0]
76
+
77
+ # Create short answer
78
+
79
+ # Multiple answers are possible: yes_no_answer or one of short_answers
80
+ # We just take the first one
81
+ if annotation["yes_no_answer"] != "NONE":
82
+ answer = annotation["yes_no_answer"]
83
+ elif len(annotation["short_answers"]) > 0:
84
+ start_token = annotation["short_answers"][0]["start_token"]
85
+ end_token = annotation["short_answers"][0]["end_token"]
86
+ answer = f"{start_token}:{end_token}"
87
+ else:
88
+ answer = ""
89
+
90
+ logger.debug(f"q: {sample['question_text']}")
91
+ logger.debug(f"a: {answer}")
92
+ logger.debug("")
93
+
94
+ gold_rows.append(
95
+ {"example_id": f"{sample['example_id']}_short", "PredictionString": answer}
96
+ )
97
+
98
+ # Create long answer
99
+
100
+ if annotation["long_answer"]["start_token"] != -1:
101
+ start_token = annotation["long_answer"]["start_token"]
102
+ end_token = annotation["long_answer"]["end_token"]
103
+ answer = f"{start_token}:{end_token}"
104
+ else:
105
+ answer = ""
106
+
107
+ logger.debug(f"q: {sample['question_text']}")
108
+ logger.debug(f"a: {answer}")
109
+ logger.debug("")
110
+
111
+ gold_rows.append({"example_id": f"{sample['example_id']}_long", "PredictionString": answer})
112
+
113
+ gold_submission = pd.DataFrame(gold_rows)
114
+ gold_submission.to_csv(private / "gold_submission.csv", index=False)
115
+
116
+ # Sample submission
117
+ sample_submission = gold_submission.copy()
118
+ sample_submission["PredictionString"] = ""
119
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
120
+
121
+ assert len(gold_submission) == 2 * len(test_ids)
122
+ assert len(sample_submission) == 2 * len(test_ids)