dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,96 @@
1
+ from pathlib import Path
2
+ from typing import Tuple
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+
8
+ from mlebench.utils import read_csv
9
+
10
+
11
+ def _create_split(
12
+ input_df: pd.DataFrame, test_size: float, random_state: int
13
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
14
+ """Helper function to perform a single data split and generate associated files."""
15
+ # Create train, test from the input dataframe
16
+ train_df, answers_df = train_test_split(
17
+ input_df, test_size=test_size, random_state=random_state
18
+ )
19
+ test_df = answers_df.drop(columns=["score"])
20
+
21
+ sample_submission_df = answers_df[["essay_id"]].copy()
22
+ sample_submission_df["score"] = np.random.RandomState(42).randint(
23
+ 1, 7, size=len(sample_submission_df)
24
+ )
25
+
26
+ # Checks
27
+ assert set(train_df["essay_id"]).isdisjoint(
28
+ set(test_df["essay_id"])
29
+ ), "Essay IDs in train and test sets are not disjoint"
30
+ assert len(train_df) + len(test_df) == len(
31
+ input_df
32
+ ), f"Train and test sets do not sum to original train set"
33
+ assert len(test_df) == len(
34
+ sample_submission_df
35
+ ), f"Test and sample submission sets do not have the same length"
36
+ assert (
37
+ train_df.columns.tolist() == input_df.columns.tolist()
38
+ ), f"Train set columns do not match original train set, got {train_df.columns.tolist()}"
39
+ assert test_df.columns.tolist() == [
40
+ "essay_id",
41
+ "full_text",
42
+ ], f"Test set columns do not match expected columns, got {test_df.columns.tolist()}"
43
+ assert sample_submission_df.columns.tolist() == [
44
+ "essay_id",
45
+ "score",
46
+ ], f"Sample submission set columns do not match expected columns, got {sample_submission_df.columns.tolist()}"
47
+
48
+ return train_df, test_df, answers_df, sample_submission_df
49
+
50
+
51
+ def prepare(raw: Path, public: Path, private: Path):
52
+ """
53
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
54
+ Also creates a second, parallel split for validation purposes.
55
+ """
56
+
57
+ # Read the original raw data
58
+ old_train = read_csv(raw / "train.csv")
59
+
60
+ # --- Stage 1: Create the original train/test split for the main competition ---
61
+ # This block produces the exact same output as the original script.
62
+
63
+ # Original train has 17307 rows. Original hidden test has approx 8k rows. We just take 10% of the original train as the test set.
64
+ main_train, main_test, main_answers, main_sample_submission = _create_split(
65
+ input_df=old_train, test_size=0.1, random_state=0
66
+ )
67
+
68
+ # Write original CSVs to public/ and private/
69
+ main_answers.to_csv(private / "answers.csv", index=False)
70
+ main_train.to_csv(public / "train.csv", index=False)
71
+ main_test.to_csv(public / "test.csv", index=False)
72
+ main_sample_submission.to_csv(public / "sample_submission.csv", index=False)
73
+
74
+ # --- Stage 2: Create a new validation split from the main training data ---
75
+ # This block creates a new set of directories and files for validation.
76
+
77
+ # Define and create the new parallel directories for the validation set
78
+ public_val = public.parent / "public_val"
79
+ private_val = private.parent / "private_val"
80
+ public_val.mkdir(parents=True, exist_ok=True)
81
+ private_val.mkdir(parents=True, exist_ok=True)
82
+
83
+ # Calculate the test size for the second split to make the validation test set
84
+ # have the same number of samples as the original test set.
85
+ val_test_size = len(main_test) / len(main_train)
86
+
87
+ # Create the new split using the main training data as input
88
+ val_train, val_test, val_answers, val_sample_submission = _create_split(
89
+ input_df=main_train, test_size=val_test_size, random_state=0
90
+ )
91
+
92
+ # Write validation CSVs to public_val/ and private_val/ using identical filenames
93
+ val_answers.to_csv(private_val / "answers.csv", index=False)
94
+ val_train.to_csv(public_val / "train.csv", index=False)
95
+ val_test.to_csv(public_val / "test.csv", index=False)
96
+ val_sample_submission.to_csv(public_val / "sample_submission.csv", index=False)
@@ -0,0 +1,52 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import f1_score
4
+
5
+ from mlebench.grade_helpers import InvalidSubmissionError
6
+
7
+
8
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
9
+ """Prepare submission and answers for Macro F1 calculation."""
10
+ if len(submission) != len(answers):
11
+ raise InvalidSubmissionError(
12
+ f"Submission length ({len(submission)}) != answers length ({len(answers)})"
13
+ )
14
+
15
+ if "time" not in submission.columns:
16
+ raise InvalidSubmissionError("Submission must have a 'time' column")
17
+
18
+ if "open_channels" not in submission.columns:
19
+ raise InvalidSubmissionError("Submission must have an 'open_channels' column")
20
+
21
+ # Sort by time
22
+ submission = submission.sort_values("time").reset_index(drop=True)
23
+ answers = answers.sort_values("time").reset_index(drop=True)
24
+
25
+ # Check time alignment
26
+ if not np.allclose(submission["time"].values, answers["time"].values, atol=1e-4):
27
+ raise InvalidSubmissionError("Submission and answers time mismatch")
28
+
29
+ # Extract predictions and true values
30
+ y_pred = submission["open_channels"].values
31
+ y_true = answers["open_channels"].values
32
+
33
+ # Validate predictions
34
+ try:
35
+ y_pred = y_pred.astype(int)
36
+ except (ValueError, TypeError):
37
+ raise InvalidSubmissionError("Predictions must be integers")
38
+
39
+ if np.any(np.isnan(y_pred)):
40
+ raise InvalidSubmissionError("Predictions cannot contain NaN values")
41
+
42
+ return y_true, y_pred
43
+
44
+
45
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
46
+ """Calculate Macro F1 score."""
47
+ y_true, y_pred = prepare_for_metric(submission, answers)
48
+
49
+ # Macro F1: compute F1 for each class and average
50
+ macro_f1 = f1_score(y_true, y_pred, average='macro')
51
+
52
+ return macro_f1
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+ import pandas as pd
3
+
4
+
5
+ def prepare(raw: Path, public: Path, private: Path):
6
+ """
7
+ Prepare liverpool-ion-switching dataset.
8
+ """
9
+ # Read data
10
+ train = pd.read_csv(raw / "train.csv")
11
+ test = pd.read_csv(raw / "test.csv")
12
+ sample_submission = pd.read_csv(raw / "sample_submission.csv")
13
+ test_answer = pd.read_csv(raw / "test_answer.csv")
14
+
15
+ # Public files (visible to agents)
16
+ train.to_csv(public / "train.csv", index=False)
17
+ test.to_csv(public / "test.csv", index=False)
18
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
19
+
20
+ # Private files (for grading)
21
+ test_answer.to_csv(private / "test.csv", index=False)
22
+
23
+ # Validation checks
24
+ assert len(test_answer) == len(sample_submission), \
25
+ f"Test answer ({len(test_answer)}) and sample submission ({len(sample_submission)}) must have same length"
26
+ assert "time" in test_answer.columns, "Test answer must have 'time' column"
27
+ assert "open_channels" in test_answer.columns, "Test answer must have 'open_channels' column"
@@ -0,0 +1,52 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import f1_score
4
+
5
+ from mlebench.grade_helpers import InvalidSubmissionError
6
+
7
+
8
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
9
+ """Prepare submission and answers for Macro F1 calculation."""
10
+ if len(submission) != len(answers):
11
+ raise InvalidSubmissionError(
12
+ f"Submission length ({len(submission)}) != answers length ({len(answers)})"
13
+ )
14
+
15
+ if "time" not in submission.columns:
16
+ raise InvalidSubmissionError("Submission must have a 'time' column")
17
+
18
+ if "open_channels" not in submission.columns:
19
+ raise InvalidSubmissionError("Submission must have an 'open_channels' column")
20
+
21
+ # Sort by time
22
+ submission = submission.sort_values("time").reset_index(drop=True)
23
+ answers = answers.sort_values("time").reset_index(drop=True)
24
+
25
+ # Check time alignment
26
+ if not np.allclose(submission["time"].values, answers["time"].values, atol=1e-4):
27
+ raise InvalidSubmissionError("Submission and answers time mismatch")
28
+
29
+ # Extract predictions and true values
30
+ y_pred = submission["open_channels"].values
31
+ y_true = answers["open_channels"].values
32
+
33
+ # Validate predictions
34
+ try:
35
+ y_pred = y_pred.astype(int)
36
+ except (ValueError, TypeError):
37
+ raise InvalidSubmissionError("Predictions must be integers")
38
+
39
+ if np.any(np.isnan(y_pred)):
40
+ raise InvalidSubmissionError("Predictions cannot contain NaN values")
41
+
42
+ return y_true, y_pred
43
+
44
+
45
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
46
+ """Calculate Macro F1 score."""
47
+ y_true, y_pred = prepare_for_metric(submission, answers)
48
+
49
+ # Macro F1: compute F1 for each class and average
50
+ macro_f1 = f1_score(y_true, y_pred, average='macro')
51
+
52
+ return macro_f1
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+ import pandas as pd
3
+
4
+
5
+ def prepare(raw: Path, public: Path, private: Path):
6
+ """
7
+ Prepare liverpool-ion-switching dataset.
8
+ """
9
+ # Read data
10
+ train = pd.read_csv(raw / "train.csv")
11
+ test = pd.read_csv(raw / "test.csv")
12
+ sample_submission = pd.read_csv(raw / "sample_submission.csv")
13
+ test_answer = pd.read_csv(raw / "test_answer.csv")
14
+
15
+ # Public files (visible to agents)
16
+ train.to_csv(public / "train.csv", index=False)
17
+ test.to_csv(public / "test.csv", index=False)
18
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
19
+
20
+ # Private files (for grading)
21
+ test_answer.to_csv(private / "test.csv", index=False)
22
+
23
+ # Validation checks
24
+ assert len(test_answer) == len(sample_submission), \
25
+ f"Test answer ({len(test_answer)}) and sample submission ({len(sample_submission)}) must have same length"
26
+ assert "time" in test_answer.columns, "Test answer must have 'time' column"
27
+ assert "open_channels" in test_answer.columns, "Test answer must have 'open_channels' column"
@@ -0,0 +1,63 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics import log_loss
4
+
5
+ from mlebench.competitions.utils import one_hot_dfs_to_log_loss_inputs
6
+ from mlebench.grade_helpers import InvalidSubmissionError
7
+
8
+
9
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame) -> dict:
10
+ """
11
+ The submission and answers are already one-hotted
12
+ """
13
+ classes = ["winner_model_a", "winner_model_b", "winner_tie"]
14
+ required_columns = ["id"] + classes
15
+
16
+ # Check if submission has the required columns
17
+ missing_columns = [col for col in required_columns if col not in submission.columns]
18
+ if missing_columns:
19
+ raise InvalidSubmissionError(
20
+ f"Submission DataFrame is missing required columns: {missing_columns}"
21
+ )
22
+
23
+ # Check if answers has the required columns
24
+ assert set(required_columns).issubset(
25
+ answers.columns
26
+ ), f"Answers DataFrame is missing required columns: {set(required_columns) - set(answers.columns)}"
27
+
28
+ # Check if submission has the correct number of rows
29
+ if len(submission) != len(answers):
30
+ raise InvalidSubmissionError(
31
+ f"Submission DataFrame must have {len(answers)} rows, but has {len(submission)} rows."
32
+ )
33
+
34
+ # Check if all values in submission are between 0 and 1
35
+ if (
36
+ not ((submission[classes] >= 0) & (submission[classes] <= 1)).all().all()
37
+ ): # first all() checks if all rows are valid, second all() checks if all columns are valid
38
+ raise InvalidSubmissionError("All values in submission DataFrame must be between 0 and 1.")
39
+
40
+ # Check if each row in submission sums to 1
41
+ if not submission[classes].sum(axis=1).round(6).eq(1).all():
42
+ raise InvalidSubmissionError("Each row in submission DataFrame must sum to 1.")
43
+
44
+ # Use only the required columns for further processing
45
+ submission = submission[required_columns]
46
+ answers = answers[required_columns]
47
+
48
+ submission = submission.sort_values("id").reset_index(drop=True)
49
+ answers = answers.sort_values("id").reset_index(drop=True)
50
+
51
+ if (submission["id"].values != answers["id"].values).any():
52
+ raise InvalidSubmissionError("Submission and answer IDs do not match after sorting.")
53
+
54
+ log_loss_inputs = one_hot_dfs_to_log_loss_inputs(
55
+ submission, answers, id_column="id", apply_softmax=False
56
+ )
57
+
58
+ return log_loss_inputs
59
+
60
+
61
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
62
+ log_loss_inputs = prepare_for_metric(submission, answers)
63
+ return log_loss(**log_loss_inputs)
@@ -0,0 +1,52 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ from mlebench.utils import extract, read_csv
7
+
8
+
9
+ def prepare(raw: Path, public: Path, private: Path):
10
+ """
11
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
12
+ """
13
+
14
+ # Create train, test from train split
15
+ old_train = read_csv(raw / "train.csv")
16
+ # Original train has 55k rows. Original hidden test has 25k rows. We make a new test set with 10% of the original train.
17
+ new_train, answers = train_test_split(old_train, test_size=0.1, random_state=0)
18
+ new_test = answers[["id", "prompt", "response_a", "response_b"]].copy()
19
+
20
+ sample_submission = answers[["id"]].copy()
21
+ sample_submission["winner_model_a"] = 0.3333333333333333
22
+ sample_submission["winner_model_b"] = 0.3333333333333333
23
+ sample_submission["winner_tie"] = 0.3333333333333333
24
+
25
+ # Checks
26
+ assert len(new_train) + len(new_test) == len(
27
+ old_train
28
+ ), f"New train and test should have the same number of rows as the original train"
29
+ assert set(new_train["id"]).isdisjoint(
30
+ set(new_test["id"])
31
+ ), f"New train and test should have no overlapping ids"
32
+ assert new_test.columns.tolist() == [
33
+ "id",
34
+ "prompt",
35
+ "response_a",
36
+ "response_b",
37
+ ], f"New test should have columns id, prompt, response_a, response_b"
38
+ assert sample_submission.columns.tolist() == [
39
+ "id",
40
+ "winner_model_a",
41
+ "winner_model_b",
42
+ "winner_tie",
43
+ ], f"Sample submission should have columns id, winner_model_a, winner_model_b, winner_tie"
44
+ assert (
45
+ new_train.columns.tolist() == old_train.columns.tolist()
46
+ ), f"New train should have the same columns as the original train"
47
+
48
+ # Write CSVs
49
+ answers.to_csv(private / "answers.csv", index=False)
50
+ new_train.to_csv(public / "train.csv", index=False)
51
+ new_test.to_csv(public / "test.csv", index=False)
52
+ sample_submission.to_csv(public / "sample_submission.csv", index=False)
@@ -0,0 +1,115 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import extract, read_csv
8
+
9
+
10
+ def _create_split(
11
+ df_in: pd.DataFrame,
12
+ public_path: Path,
13
+ private_path: Path,
14
+ test_size: float,
15
+ random_state: int,
16
+ ) -> pd.DataFrame:
17
+ """
18
+ Helper function to perform a data split and write files to specified directories.
19
+
20
+ Args:
21
+ df_in: The input DataFrame to split.
22
+ public_path: The directory for public-facing files (train set, test features).
23
+ private_path: The directory for private files (test answers).
24
+ test_size: The proportion of the dataset to allocate to the test set.
25
+ random_state: The seed for the random number generator.
26
+
27
+ Returns:
28
+ The DataFrame for the newly created training set.
29
+ """
30
+ # Create output directories if they don't exist
31
+ public_path.mkdir(exist_ok=True, parents=True)
32
+ private_path.mkdir(exist_ok=True, parents=True)
33
+
34
+ # Create train, test from the input dataframe
35
+ new_train, answers = train_test_split(
36
+ df_in, test_size=test_size, random_state=random_state
37
+ )
38
+ new_test = answers[["id", "prompt", "response_a", "response_b"]].copy()
39
+
40
+ sample_submission = answers[["id"]].copy()
41
+ sample_submission["winner_model_a"] = 0.3333333333333333
42
+ sample_submission["winner_model_b"] = 0.3333333333333333
43
+ sample_submission["winner_tie"] = 0.3333333333333333
44
+
45
+ # Checks
46
+ assert len(new_train) + len(new_test) == len(
47
+ df_in
48
+ ), f"New train and test should have the same number of rows as the original dataframe"
49
+ assert set(new_train["id"]).isdisjoint(
50
+ set(new_test["id"])
51
+ ), f"New train and test should have no overlapping ids"
52
+ assert new_test.columns.tolist() == [
53
+ "id",
54
+ "prompt",
55
+ "response_a",
56
+ "response_b",
57
+ ], f"New test should have columns id, prompt, response_a, response_b"
58
+ assert sample_submission.columns.tolist() == [
59
+ "id",
60
+ "winner_model_a",
61
+ "winner_model_b",
62
+ "winner_tie",
63
+ ], f"Sample submission should have columns id, winner_model_a, winner_model_b, winner_tie"
64
+ assert (
65
+ new_train.columns.tolist() == df_in.columns.tolist()
66
+ ), f"New train should have the same columns as the original dataframe"
67
+
68
+ # Write CSVs
69
+ answers.to_csv(private_path / "answers.csv", index=False)
70
+ new_train.to_csv(public_path / "train.csv", index=False)
71
+ new_test.to_csv(public_path / "test.csv", index=False)
72
+ sample_submission.to_csv(public_path / "sample_submission.csv", index=False)
73
+
74
+ return new_train
75
+
76
+
77
+ def prepare(raw: Path, public: Path, private: Path):
78
+ """
79
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
80
+ Also creates a secondary validation split (public_val, private_val) for local testing.
81
+ """
82
+
83
+ # --- Stage 1: Create the original competition split (train/test) ---
84
+ # This block generates the primary `public` and `private` directories.
85
+ # Its outputs MUST remain identical to the original script's outputs.
86
+ old_train_df = read_csv(raw / "train.csv")
87
+ train_for_val_split = _create_split(
88
+ df_in=old_train_df,
89
+ public_path=public,
90
+ private_path=private,
91
+ test_size=0.1,
92
+ random_state=0,
93
+ )
94
+
95
+ # --- Stage 2: Create the new validation split (train_val/test_val) ---
96
+ # This block takes the training set from Stage 1 and splits it again
97
+ # to create a new, smaller training set and a validation set.
98
+ # The outputs are saved to parallel `public_val` and `private_val` directories.
99
+ public_val = public.parent / "public_val"
100
+ private_val = private.parent / "private_val"
101
+
102
+ # Calculate the test_size needed to make the new validation set (`test_val`)
103
+ # have the same number of samples as the original test set from Stage 1.
104
+ # Original test size = 0.1 * total. New train size = 0.9 * total.
105
+ # We need a fraction `p` such that p * (0.9 * total) = 0.1 * total.
106
+ # p = 0.1 / 0.9 = 1/9.
107
+ val_test_size = 1 / 9
108
+
109
+ _create_split(
110
+ df_in=train_for_val_split,
111
+ public_path=public_val,
112
+ private_path=private_val,
113
+ test_size=val_test_size,
114
+ random_state=0, # Use same random state for consistency
115
+ )
@@ -0,0 +1,107 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Any
4
+ import pandas as pd
5
+
6
+ # This is a generic LLM-based grader for open-ended tasks.
7
+ # It reads 'rubric.md' from the task directory and evaluates the submission.
8
+
9
+ try:
10
+ from dsat.services.llm import LLMService
11
+ from dsat.config import LLMConfig
12
+ except ImportError:
13
+ # Fallback for when running outside of dsat package context
14
+ import sys
15
+ sys.path.append(str(Path(__file__).resolve().parent.parent.parent.parent))
16
+ from dsat.services.llm import LLMService
17
+ from dsat.config import LLMConfig
18
+
19
+ class Report:
20
+ def __init__(self, score, feedback):
21
+ self.score = score
22
+ self.feedback = feedback
23
+ # Standard fields expected by the framework
24
+ self.is_lower_better = False
25
+ self.submission_exists = True
26
+ self.valid_submission = True
27
+ self.gold_medal = score >= 0.9
28
+ self.silver_medal = score >= 0.7
29
+ self.bronze_medal = score >= 0.5
30
+ self.above_median = score >= 0.5
31
+ self.submission_path = ""
32
+ self.competition_id = "open_ended_task"
33
+
34
+ def grade(submission_path: Path, competition: Any) -> Report:
35
+ """
36
+ Grades the submission using an LLM Judge against rubric.md.
37
+ """
38
+ # 1. Load the Rubric
39
+ task_dir = competition.raw_dir.parent
40
+ rubric_path = task_dir / "rubric.md"
41
+
42
+ if not rubric_path.exists():
43
+ # Fallback if no rubric exists
44
+ print(f"Warning: Rubric not found at {rubric_path}. Returning default score.")
45
+ return Report(0.5, "No grading rubric defined.")
46
+
47
+ rubric_content = rubric_path.read_text(encoding="utf-8")
48
+
49
+ # 2. Load the Submission Content (Preview)
50
+ # Since it's open-ended, the 'submission_path' might be a CSV, code, or just a marker.
51
+ # We'll try to peek at the output artifacts if possible, or assume the agent's recent work
52
+ # is what we are grading. Ideally, AIDE produces a submission file.
53
+
54
+ submission_content = "No submission content readable."
55
+ if submission_path.exists():
56
+ try:
57
+ if submission_path.suffix == '.csv':
58
+ df = pd.read_csv(submission_path)
59
+ submission_content = f"CSV Submission Preview:\n{df.head().to_markdown()}"
60
+ else:
61
+ submission_content = submission_path.read_text(encoding="utf-8")[:2000]
62
+ except Exception as e:
63
+ submission_content = f"Error reading submission: {e}"
64
+
65
+ # 3. Setup LLM for Judging
66
+ # Note: In a real run, we might want to inject the API key securely.
67
+ # Here we assume environment variables are set (which they are in DSATRunner).
68
+ try:
69
+ api_key = os.getenv("API_KEY", "EMPTY")
70
+ base_url = os.getenv("API_BASE", "https://api.openai.com/v1")
71
+ model = os.getenv("LLM_MODEL", "gpt-4o")
72
+
73
+ llm = LLMService(LLMConfig(api_key=api_key, api_base=base_url, model=model))
74
+
75
+ prompt = f"""You are an impartial Judge. Evaluate the following submission against the provided Rubric.
76
+
77
+ # RUBRIC
78
+ {rubric_content}
79
+
80
+ # SUBMISSION CONTENT
81
+ {submission_content}
82
+
83
+ # INSTRUCTION
84
+ Assess the submission.
85
+ Output ONLY a float number between 0.0 and 1.0 on the first line.
86
+ On subsequent lines, provide brief feedback.
87
+ """
88
+ # Synchronous call wrapper or direct call if possible.
89
+ # Since grade() is synchronous in standard mlebench, we need a way to run async code.
90
+ import asyncio
91
+ response = asyncio.run(llm.achat([{"role": "user", "content": prompt}]))
92
+
93
+ lines = response.strip().split('\n')
94
+ try:
95
+ score = float(lines[0].strip())
96
+ except ValueError:
97
+ # Fallback if LLM is chatty
98
+ import re
99
+ match = re.search(r"(\d+(\.\d+)?)", lines[0])
100
+ score = float(match.group(1)) if match else 0.5
101
+
102
+ feedback = "\n".join(lines[1:])
103
+ return Report(score, feedback)
104
+
105
+ except Exception as e:
106
+ print(f"LLM Judging failed: {e}")
107
+ return Report(0.0, f"Judging failed: {e}")
@@ -0,0 +1,2 @@
1
+ def prepare(raw, public, private):
2
+ pass
@@ -0,0 +1,11 @@
1
+ import pandas as pd
2
+ from sklearn.metrics import accuracy_score
3
+
4
+ from mlebench.competitions.utils import prepare_for_accuracy_metric
5
+
6
+
7
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
8
+ accuracy_inputs = prepare_for_accuracy_metric(
9
+ submission=submission, answers=answers, target_column="ClassId", id_column="Id"
10
+ )
11
+ return accuracy_score(y_true=accuracy_inputs["y_true"], y_pred=accuracy_inputs["y_pred"])
@@ -0,0 +1,58 @@
1
+ from pathlib import Path
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+
7
+
8
+ def prepare(raw: Path, public: Path, private: Path):
9
+ """
10
+ Create train/test split from old train set, using same train/test proportion
11
+ """
12
+ old_train = np.load(raw / "timit_11" / "timit_11" / "train_11.npy")
13
+ old_train_label = np.load(raw / "timit_11" / "timit_11" / "train_label_11.npy")
14
+ old_train_idxs = range(len(old_train))
15
+
16
+ # Create new splits
17
+ old_test = np.load(raw / "timit_11" / "timit_11" / "test_11.npy")
18
+ old_test_prop = len(old_test) / (len(old_train) + len(old_test)) # ~= 0.268
19
+ new_train_idxs, new_test_idxs = train_test_split(
20
+ old_train_idxs, test_size=old_test_prop, random_state=0
21
+ )
22
+
23
+ new_train = old_train[new_train_idxs]
24
+ new_train_label = old_train_label[new_train_idxs]
25
+ new_test = old_train[new_test_idxs]
26
+ new_test_label = old_train_label[new_test_idxs]
27
+
28
+ answers_df = pd.DataFrame({"Id": range(len(new_test)), "ClassId": new_test_label})
29
+
30
+ # Create sample submission
31
+ sample_submission = answers_df.copy()
32
+ sample_submission["ClassId"] = 0
33
+
34
+ # Save files
35
+ (public / "timit_11" / "timit_11").mkdir(parents=True, exist_ok=True)
36
+
37
+ np.save(public / "timit_11" / "timit_11" / "train_11.npy", new_train)
38
+ np.save(public / "timit_11" / "timit_11" / "train_label_11.npy", new_train_label)
39
+ np.save(public / "timit_11" / "timit_11" / "test_11.npy", new_test)
40
+ sample_submission.to_csv(public / "sampleSubmission.csv", index=False)
41
+ answers_df.to_csv(private / "answers.csv", index=False)
42
+
43
+ # Sanity checks
44
+ assert (
45
+ public / "timit_11" / "timit_11" / "train_11.npy"
46
+ ).exists(), "`train_11.npy` doesn't exist!"
47
+ assert (
48
+ public / "timit_11" / "timit_11" / "train_label_11.npy"
49
+ ).exists(), "`train_label_11.npy` doesn't exist!"
50
+ assert (
51
+ public / "timit_11" / "timit_11" / "test_11.npy"
52
+ ).exists(), "`test_11.npy` doesn't exist!"
53
+ assert (public / "sampleSubmission.csv").exists(), "`sampleSubmission.csv` doesn't exist!"
54
+ assert (private / "answers.csv").exists(), "`answers.csv` doesn't exist!"
55
+
56
+ assert len(new_train) + len(new_test) == len(
57
+ old_train
58
+ ), f"Expected {len(old_train)} samples in combined new train and test splits, got {len(new_train) + len(new_test)}!"