dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. dslighting/__init__.py +1 -1
  2. dslighting/core/agent.py +78 -62
  3. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
  4. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
  5. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
  6. mlebench/README.md +39 -0
  7. mlebench/__init__.py +0 -0
  8. mlebench/cli.py +221 -0
  9. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
  10. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
  11. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
  12. mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
  13. mlebench/competitions/AI4Code/grade.py +70 -0
  14. mlebench/competitions/AI4Code/prepare.py +84 -0
  15. mlebench/competitions/AI4Code/prepare_val.py +159 -0
  16. mlebench/competitions/__init__.py +0 -0
  17. mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
  18. mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
  19. mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
  20. mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
  21. mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
  22. mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
  23. mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
  24. mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
  25. mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
  26. mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
  27. mlebench/competitions/bike-sharing-demand/grade.py +55 -0
  28. mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
  29. mlebench/competitions/billion-word-imputation/grade.py +37 -0
  30. mlebench/competitions/billion-word-imputation/prepare.py +107 -0
  31. mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
  32. mlebench/competitions/bms-molecular-translation/grade.py +40 -0
  33. mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
  34. mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
  35. mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
  36. mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
  37. mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
  38. mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
  39. mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
  40. mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
  41. mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
  42. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
  43. mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
  44. mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
  45. mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
  46. mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
  47. mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
  48. mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
  49. mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
  50. mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
  51. mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
  52. mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
  53. mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
  54. mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
  55. mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
  56. mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
  57. mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
  58. mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
  59. mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
  60. mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
  61. mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
  62. mlebench/competitions/dog-breed-identification/dogs.py +124 -0
  63. mlebench/competitions/dog-breed-identification/grade.py +42 -0
  64. mlebench/competitions/dog-breed-identification/prepare.py +55 -0
  65. mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
  66. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
  67. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
  68. mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
  69. mlebench/competitions/ethanol-concentration/grade.py +23 -0
  70. mlebench/competitions/ethanol-concentration/prepare.py +90 -0
  71. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
  72. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
  73. mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
  74. mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
  75. mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
  76. mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
  77. mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
  78. mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
  79. mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
  80. mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
  81. mlebench/competitions/google-quest-challenge/classes.py +32 -0
  82. mlebench/competitions/google-quest-challenge/grade.py +45 -0
  83. mlebench/competitions/google-quest-challenge/prepare.py +58 -0
  84. mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
  85. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
  86. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
  87. mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
  88. mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
  89. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
  90. mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
  91. mlebench/competitions/handwriting/grade.py +23 -0
  92. mlebench/competitions/handwriting/prepare.py +179 -0
  93. mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
  94. mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
  95. mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
  96. mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
  97. mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
  98. mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
  99. mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
  100. mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
  101. mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
  102. mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
  103. mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
  104. mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
  105. mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
  106. mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
  107. mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
  108. mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
  109. mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
  110. mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
  111. mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
  112. mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
  113. mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
  114. mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
  115. mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
  116. mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
  117. mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
  118. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
  119. mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
  120. mlebench/competitions/ili/grade.py +60 -0
  121. mlebench/competitions/ili/prepare.py +99 -0
  122. mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
  123. mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
  124. mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
  125. mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
  126. mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
  127. mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
  128. mlebench/competitions/instant-gratification/__init__.py +0 -0
  129. mlebench/competitions/instant-gratification/grade.py +55 -0
  130. mlebench/competitions/instant-gratification/prepare.py +25 -0
  131. mlebench/competitions/instant_gratification/__init__.py +0 -0
  132. mlebench/competitions/instant_gratification/grade.py +55 -0
  133. mlebench/competitions/instant_gratification/prepare.py +25 -0
  134. mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
  135. mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
  136. mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
  137. mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
  138. mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
  139. mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
  140. mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
  141. mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
  142. mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
  143. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
  144. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
  145. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
  146. mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
  147. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
  148. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
  149. mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
  150. mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
  151. mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
  152. mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
  153. mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
  154. mlebench/competitions/leaf-classification/classes.py +101 -0
  155. mlebench/competitions/leaf-classification/grade.py +44 -0
  156. mlebench/competitions/leaf-classification/prepare.py +60 -0
  157. mlebench/competitions/leaf-classification/prepare_val.py +116 -0
  158. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
  159. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
  160. mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
  161. mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
  162. mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
  163. mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
  164. mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
  165. mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
  166. mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
  167. mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
  168. mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
  169. mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
  170. mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
  171. mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
  172. mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
  173. mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
  174. mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
  175. mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
  176. mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
  177. mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
  178. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
  179. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
  180. mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
  181. mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
  182. mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
  183. mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
  184. mlebench/competitions/my-custom-task-01/prepare.py +2 -0
  185. mlebench/competitions/new-my-task-01/prepare.py +2 -0
  186. mlebench/competitions/new-my-task-03/grade.py +107 -0
  187. mlebench/competitions/new-my-task-03/prepare.py +2 -0
  188. mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
  189. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
  190. mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
  191. mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
  192. mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
  193. mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
  194. mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
  195. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
  196. mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
  197. mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
  198. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
  199. mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
  200. mlebench/competitions/paddy-disease-classification/grade.py +35 -0
  201. mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
  202. mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
  203. mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
  204. mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
  205. mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
  206. mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
  207. mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
  208. mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
  209. mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
  210. mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
  211. mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
  212. mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
  213. mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
  214. mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
  215. mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
  216. mlebench/competitions/playground-series-s3e1/grade.py +52 -0
  217. mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
  218. mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
  219. mlebench/competitions/playground-series-s3e11/grade.py +55 -0
  220. mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
  221. mlebench/competitions/playground-series-s3e18/grade.py +39 -0
  222. mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
  223. mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
  224. mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
  225. mlebench/competitions/playground_series_s3e1/grade.py +52 -0
  226. mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
  227. mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
  228. mlebench/competitions/playground_series_s3e11/grade.py +55 -0
  229. mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
  230. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
  231. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
  232. mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
  233. mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
  234. mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
  235. mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
  236. mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
  237. mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
  238. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
  239. mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
  240. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
  241. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
  242. mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
  243. mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
  244. mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
  245. mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
  246. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
  247. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
  248. mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
  249. mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
  250. mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
  251. mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
  252. mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
  253. mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
  254. mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
  255. mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
  256. mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
  257. mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
  258. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
  259. mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
  260. mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
  261. mlebench/competitions/see-click-predict-fix/grade.py +66 -0
  262. mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
  263. mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
  264. mlebench/competitions/see_click_predict_fix/grade.py +66 -0
  265. mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
  266. mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
  267. mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
  268. mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
  269. mlebench/competitions/siim-covid19-detection/grade.py +194 -0
  270. mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
  271. mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
  272. mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
  273. mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
  274. mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
  275. mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
  276. mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
  277. mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
  278. mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
  279. mlebench/competitions/spaceship-titanic/grade.py +11 -0
  280. mlebench/competitions/spaceship-titanic/prepare.py +23 -0
  281. mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
  282. mlebench/competitions/spooky-author-identification/classes.py +1 -0
  283. mlebench/competitions/spooky-author-identification/grade.py +38 -0
  284. mlebench/competitions/spooky-author-identification/prepare.py +40 -0
  285. mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
  286. mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
  287. mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
  288. mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
  289. mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
  290. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
  291. mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
  292. mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
  293. mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
  294. mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
  295. mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
  296. mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
  297. mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
  298. mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
  299. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
  300. mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
  301. mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
  302. mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
  303. mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
  304. mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
  305. mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
  306. mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
  307. mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
  308. mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
  309. mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
  310. mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
  311. mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
  312. mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
  313. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
  314. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
  315. mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
  316. mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
  317. mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
  318. mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
  319. mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
  320. mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
  321. mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
  322. mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
  323. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
  324. mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
  325. mlebench/competitions/utils.py +266 -0
  326. mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
  327. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
  328. mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
  329. mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
  330. mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
  331. mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
  332. mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
  333. mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
  334. mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
  335. mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
  336. mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
  337. mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
  338. mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
  339. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
  340. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
  341. mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
  342. mlebench/competitions/whale-categorization-playground/grade.py +41 -0
  343. mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
  344. mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
  345. mlebench/data.py +420 -0
  346. mlebench/grade.py +209 -0
  347. mlebench/grade_helpers.py +235 -0
  348. mlebench/metrics.py +75 -0
  349. mlebench/registry.py +332 -0
  350. mlebench/utils.py +346 -0
  351. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
  352. {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,120 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from pandas import DataFrame, read_csv
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ from mlebench.utils import extract
8
+
9
+
10
+ def prepare(raw: Path, public: Path, private: Path):
11
+ """
12
+ Prepares the dataset by performing two sequential splits.
13
+ 1. Splits the raw data into a main train/test set for the `public` and
14
+ `private` directories.
15
+ 2. Splits the main training set again to create a smaller train/validation
16
+ set for the `public_val` and `private_val` directories.
17
+ """
18
+
19
+ def _split_and_save(
20
+ data_to_split: DataFrame,
21
+ test_ratio: float,
22
+ public_dir: Path,
23
+ private_dir: Path,
24
+ random_state: int,
25
+ ) -> DataFrame:
26
+ """
27
+ Helper function to perform a data split, save files to specified
28
+ directories, and return the resulting training set for a potential
29
+ subsequent split.
30
+ """
31
+ # Ensure output directories exist
32
+ public_dir.mkdir(parents=True, exist_ok=True)
33
+ private_dir.mkdir(parents=True, exist_ok=True)
34
+
35
+ # Create train and test splits from the provided dataframe
36
+ new_train, answers = train_test_split(
37
+ data_to_split, test_size=test_ratio, random_state=random_state
38
+ )
39
+
40
+ # Create public test set (unlabeled)
41
+ new_test = answers.copy()
42
+ new_test = new_test.drop("Sentiment", axis="columns")
43
+
44
+ # Create sample submission
45
+ sample_submission = answers[["PhraseId", "Sentiment"]].copy()
46
+ sample_submission["Sentiment"] = 2
47
+
48
+ # Checks
49
+ assert new_train["PhraseId"].is_unique, f"PhraseId in new_train ({public_dir.name}) should be unique"
50
+ assert new_test["PhraseId"].is_unique, f"PhraseId in new_test ({public_dir.name}) should be unique"
51
+ assert set(new_train["PhraseId"]).isdisjoint(
52
+ set(new_test["PhraseId"])
53
+ ), f"PhraseId in new_train and new_test ({public_dir.name}) should be disjoint"
54
+ assert (
55
+ new_train.shape[0] + new_test.shape[0] == data_to_split.shape[0]
56
+ ), "New train and new test should have the same number of rows as the input data"
57
+ assert (
58
+ new_train.columns.tolist() == data_to_split.columns.tolist()
59
+ ), "New train and input data should have the same columns"
60
+ assert new_test.columns.tolist() == [
61
+ "PhraseId",
62
+ "SentenceId",
63
+ "Phrase",
64
+ ], "new_test should have columns ['PhraseId', 'SentenceId', 'Phrase']"
65
+
66
+ # Write CSVs to their respective directories
67
+ answers.to_csv(private_dir / "answers.csv", index=False)
68
+ new_train.to_csv(public_dir / "train.tsv", index=False, sep="\t")
69
+ new_test.to_csv(public_dir / "test.tsv", index=False, sep="\t")
70
+ sample_submission.to_csv(public_dir / "sampleSubmission.csv", index=False)
71
+
72
+ # Zip files
73
+ shutil.make_archive(str(public_dir / "train.tsv"), "zip", public_dir, "train.tsv")
74
+ shutil.make_archive(str(public_dir / "test.tsv"), "zip", public_dir, "test.tsv")
75
+
76
+ # Delete unzipped files
77
+ (public_dir / "train.tsv").unlink()
78
+ (public_dir / "test.tsv").unlink()
79
+
80
+ return new_train
81
+
82
+ # --- Main script execution starts here ---
83
+
84
+ # Extract the raw data file
85
+ extract(raw / "train.tsv.zip", raw)
86
+ old_train = read_csv(raw / "train.tsv", sep="\t")
87
+
88
+ # Define the test ratio for the first split
89
+ test_ratio_1 = 0.3 # 66293/(156061+66293) = 0.3
90
+
91
+ # --- First Split: Create the original public/private sets ---
92
+ # This call produces the original, unmodified competition output.
93
+ # The resulting training set is captured for the second split.
94
+ train_for_val_split = _split_and_save(
95
+ data_to_split=old_train,
96
+ test_ratio=test_ratio_1,
97
+ public_dir=public,
98
+ private_dir=private,
99
+ random_state=0,
100
+ )
101
+
102
+ # --- Second Split: Create the new validation sets ---
103
+ # Define the new parallel directories for the validation split
104
+ public_val = public.parent / "public_val"
105
+ private_val = private.parent / "private_val"
106
+
107
+ # Calculate the test ratio for the second split. This ensures the new
108
+ # validation set (`test_val`) is the same size as the original test set.
109
+ # test_ratio_2 = (size of original test set) / (size of new training set)
110
+ # = (total * test_ratio_1) / (total * (1 - test_ratio_1))
111
+ test_ratio_2 = test_ratio_1 / (1 - test_ratio_1)
112
+
113
+ # Perform the second split on the training data from the first split
114
+ _split_and_save(
115
+ data_to_split=train_for_val_split,
116
+ test_ratio=test_ratio_2,
117
+ public_dir=public_val,
118
+ private_dir=private_val,
119
+ random_state=0,
120
+ )
@@ -0,0 +1,58 @@
1
+ from typing import List
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from Levenshtein import distance
6
+
7
+ from mlebench.grade_helpers import InvalidSubmissionError
8
+
9
+
10
+ def final_edit_distance(y_true: List[int], y_pred: List[int]) -> float:
11
+ """
12
+ > The overall score we compute is the sum of the Levenshtein distances for all the lines of the
13
+ result file compared to the corresponding lines in the truth value file, divided by the total
14
+ number of gestures in the truth value file. This score is analogous to an error rate.
15
+ However, it can exceed one.
16
+ (https://www.kaggle.com/competitions/multi-modal-gesture-recognition/overview)
17
+ """
18
+ sum_of_distances = np.sum([distance(a, b) for a, b in zip(y_true, y_pred)])
19
+ total_num_gestures = sum(len(x) for x in y_true)
20
+ return sum_of_distances / total_num_gestures
21
+
22
+
23
+ def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
24
+ if len(submission) != len(answers):
25
+ raise InvalidSubmissionError("Submission and answers must have the same length")
26
+
27
+ if "Id" not in submission.columns:
28
+ raise InvalidSubmissionError("Submission must have an 'Id' column")
29
+
30
+ if "Sequence" not in submission.columns:
31
+ raise InvalidSubmissionError("Submission must have a 'Sequence' column")
32
+
33
+ assert "Id" in answers.columns, "Answers must have 'Id' column"
34
+ assert "Sequence" in answers.columns, "Answers must have 'Sequence' column"
35
+
36
+ submission = submission.sort_values("Id")
37
+ answers = answers.sort_values("Id")
38
+
39
+ if (submission["Id"].values != answers["Id"].values).any():
40
+ raise InvalidSubmissionError("Submission and answers must have the same ids")
41
+
42
+ # Read as strings, convert to list of numbers
43
+ submission["Sequence"] = submission["Sequence"].astype(str)
44
+ answers["Sequence"] = answers["Sequence"].astype(str)
45
+ answers_sequences = [list(map(int, x.split())) for x in answers["Sequence"]]
46
+ try:
47
+ submission_sequences = [list(map(int, x.split())) for x in submission["Sequence"]]
48
+ except ValueError as e:
49
+ raise InvalidSubmissionError(
50
+ f"Submission sequences must be integers separated by spaces. Failed to convert Sequence to list of integers: {e}"
51
+ )
52
+
53
+ return answers_sequences, submission_sequences
54
+
55
+
56
+ def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
57
+ y_true, y_pred = prepare_for_metric(submission, answers)
58
+ return final_edit_distance(y_true, y_pred)
@@ -0,0 +1,85 @@
1
+ import random
2
+ import shutil
3
+ from pathlib import Path
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def prepare(raw: Path, public: Path, private: Path):
9
+ """
10
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
11
+
12
+ Raw dataset has:
13
+ - Train: training1, training2, training3, training4
14
+ - Val: validation1, validation2, validation3 (no labels)
15
+ - Test: (not available)
16
+
17
+ New prepared dataset has:
18
+ - Train: training1, training2, training3
19
+ - Val: validation1, validation2, validation3 (no labels)
20
+ - Test: training4 (renamed to `test.tar.gz`)
21
+
22
+ Furthermore,
23
+ - We modify the `training.csv` to remove training4 samples
24
+ - We modify the `test.csv` and `randomPredictions.csv` to include only training4 IDs
25
+
26
+ No other changes. We copy over the remaining files (devel01-40.7z, valid_all_files_combined.7z, sample_code_mmrgc.zip) as-is.
27
+ """
28
+
29
+ # Unzip the training4 file to get new test IDs
30
+ shutil.unpack_archive(raw / "training4.tar.gz", raw / "training4")
31
+ # training4 contains samples like "Sample00300.zip", the ID is the last 4 digits ("0300")
32
+ test_ids = sorted([fp.stem[-4:] for fp in (raw / "training4").glob("*.zip")])
33
+
34
+ # Update training.csv to remove training4 samples
35
+ training_df = pd.read_csv(raw / "training.csv", dtype={"Id": str, "Sequence": str})
36
+ new_training_df = training_df[~training_df["Id"].isin(test_ids)]
37
+ new_training_df.to_csv(public / "training.csv", index=False)
38
+ assert len(new_training_df) == len(training_df) - len(
39
+ test_ids
40
+ ), f"Expected {len(training_df) - len(test_ids)} samples in training.csv, but got {len(new_training_df)}"
41
+
42
+ # Make private answers
43
+ answers_df = training_df[training_df["Id"].isin(test_ids)]
44
+ answers_df.to_csv(private / "test.csv", index=False)
45
+ assert len(answers_df) == len(
46
+ test_ids
47
+ ), f"Expected {len(test_ids)} samples in private/test.csv, but got {len(answers_df)}"
48
+
49
+ # Make new public test.csv
50
+ test_df = pd.DataFrame({"Id": test_ids})
51
+ test_df.to_csv(public / "test.csv", index=False)
52
+ assert len(test_df) == len(
53
+ test_ids
54
+ ), f"Expected {len(test_ids)} samples in public/test.csv, but got {len(test_df)}"
55
+
56
+ # Make new public randomPredictions.csv
57
+ # predictions are random shufflings of numbers 1-20 (no repeats)
58
+ random.seed(0)
59
+ preds = []
60
+ for _ in range(len(test_ids)):
61
+ pred = " ".join(str(x) for x in random.sample(range(1, 21), 20))
62
+ preds.append(pred)
63
+ random_predictions_df = pd.DataFrame({"Id": test_ids, "Sequence": preds})
64
+ random_predictions_df.to_csv(public / "randomPredictions.csv", index=False)
65
+ assert len(random_predictions_df) == len(
66
+ test_ids
67
+ ), f"Expected {len(test_ids)} samples in public/randomPredictions.csv, but got {len(random_predictions_df)}"
68
+
69
+ # Copy over training4 as new test set
70
+ shutil.copyfile(src=raw / "training4.tar.gz", dst=public / "test.tar.gz")
71
+
72
+ # Copy over train and validation tars
73
+ for file in [
74
+ "training1.tar.gz",
75
+ "training2.tar.gz",
76
+ "training3.tar.gz",
77
+ "validation1.tar.gz",
78
+ "validation2.tar.gz",
79
+ "validation3.tar.gz",
80
+ ]:
81
+ shutil.copyfile(src=raw / file, dst=public / file)
82
+
83
+ # Copy over the rest of the files
84
+ for file in ["devel01-40.7z", "valid_all_files_combined.7z", "sample_code_mmrgc.zip"]:
85
+ shutil.copyfile(src=raw / file, dst=public / file)
@@ -0,0 +1,139 @@
1
+ import random
2
+ import shutil
3
+ from pathlib import Path
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def _create_split(
9
+ source_df: pd.DataFrame,
10
+ test_tar_name: str,
11
+ train_tar_names: list[str],
12
+ raw_dir: Path,
13
+ public_dir: Path,
14
+ private_dir: Path,
15
+ ) -> pd.DataFrame:
16
+ """
17
+ Helper function to perform a data split based on specified tar files.
18
+
19
+ It unpacks a given test tarball to identify test sample IDs, splits the
20
+ source dataframe into train/test sets, creates all necessary public and private
21
+ CSV files, copies the relevant data tarballs, and returns the newly created
22
+ training dataframe for potential subsequent splits.
23
+ """
24
+ # Unpack the test file to get test IDs
25
+ # The ID is the last 4 digits of the sample filename (e.g., "0300" from "Sample00300.zip")
26
+ test_data_dir_name = test_tar_name.replace(".tar.gz", "")
27
+ shutil.unpack_archive(raw_dir / test_tar_name, raw_dir / test_data_dir_name)
28
+ test_ids = sorted([fp.stem[-4:] for fp in (raw_dir / test_data_dir_name).glob("*.zip")])
29
+
30
+ # Create the new training dataframe for this split
31
+ new_training_df = source_df[~source_df["Id"].isin(test_ids)]
32
+ new_training_df.to_csv(public_dir / "training.csv", index=False)
33
+ assert len(new_training_df) == len(source_df) - len(test_ids)
34
+
35
+ # Make private answers
36
+ answers_df = source_df[source_df["Id"].isin(test_ids)]
37
+ answers_df.to_csv(private_dir / "test.csv", index=False)
38
+ assert len(answers_df) == len(test_ids)
39
+
40
+ # Make new public test.csv (IDs only)
41
+ test_df = pd.DataFrame({"Id": test_ids})
42
+ test_df.to_csv(public_dir / "test.csv", index=False)
43
+ assert len(test_df) == len(test_ids)
44
+
45
+ # Make new public randomPredictions.csv
46
+ # predictions are random shufflings of numbers 1-20 (no repeats)
47
+ random.seed(0)
48
+ preds = []
49
+ for _ in range(len(test_ids)):
50
+ pred = " ".join(str(x) for x in random.sample(range(1, 21), 20))
51
+ preds.append(pred)
52
+ random_predictions_df = pd.DataFrame({"Id": test_ids, "Sequence": preds})
53
+ random_predictions_df.to_csv(public_dir / "randomPredictions.csv", index=False)
54
+ assert len(random_predictions_df) == len(test_ids)
55
+
56
+ # Copy over the designated test set tarball
57
+ shutil.copyfile(src=raw_dir / test_tar_name, dst=public_dir / "test.tar.gz")
58
+
59
+ # Copy over the designated training tarballs for this split
60
+ for file in train_tar_names:
61
+ shutil.copyfile(src=raw_dir / file, dst=public_dir / file)
62
+
63
+ return new_training_df
64
+
65
+
66
+ def prepare(raw: Path, public: Path, private: Path):
67
+ """
68
+ Splits the data in raw into public and private datasets with appropriate test/train splits.
69
+
70
+ Raw dataset has:
71
+ - Train: training1, training2, training3, training4
72
+ - Val: validation1, validation2, validation3 (no labels)
73
+ - Test: (not available)
74
+
75
+ New prepared dataset has:
76
+ - Train: training1, training2, training3
77
+ - Val: validation1, validation2, validation3 (no labels)
78
+ - Test: training4 (renamed to `test.tar.gz`)
79
+
80
+ Furthermore,
81
+ - We modify the `training.csv` to remove training4 samples
82
+ - We modify the `test.csv` and `randomPredictions.csv` to include only training4 IDs
83
+
84
+ No other changes. We copy over the remaining files (devel01-40.7z, valid_all_files_combined.7z, sample_code_mmrgc.zip) as-is.
85
+ """
86
+ # --- Setup new directories for the validation split ---
87
+ public_val = public.parent / "public_val"
88
+ private_val = private.parent / "private_val"
89
+ public_val.mkdir(exist_ok=True)
90
+ private_val.mkdir(exist_ok=True)
91
+
92
+ # Load the complete training data manifest
93
+ full_training_df = pd.read_csv(raw / "training.csv", dtype={"Id": str, "Sequence": str})
94
+
95
+ # --- 1. Create the original public/private split ---
96
+ # This split is identical to the original script's behavior.
97
+ # Train set: training1, 2, 3. Test set: training4.
98
+ original_train_tars = ["training1.tar.gz", "training2.tar.gz", "training3.tar.gz"]
99
+ original_test_tar = "training4.tar.gz"
100
+
101
+ train_df_for_val_split = _create_split(
102
+ source_df=full_training_df,
103
+ test_tar_name=original_test_tar,
104
+ train_tar_names=original_train_tars,
105
+ raw_dir=raw,
106
+ public_dir=public,
107
+ private_dir=private,
108
+ )
109
+
110
+ # Copy over validation and other miscellaneous files to the 'public' directory
111
+ files_to_copy = [
112
+ "validation1.tar.gz",
113
+ "validation2.tar.gz",
114
+ "validation3.tar.gz",
115
+ "devel01-40.7z",
116
+ "valid_all_files_combined.7z",
117
+ "sample_code_mmrgc.zip",
118
+ ]
119
+ for file in files_to_copy:
120
+ shutil.copyfile(src=raw / file, dst=public / file)
121
+
122
+ # --- 2. Create the new public_val/private_val split ---
123
+ # This second split uses the training data from the first split as its source.
124
+ # New train set: training1, 2. New test (validation) set: training3.
125
+ val_train_tars = ["training1.tar.gz", "training2.tar.gz"]
126
+ val_test_tar = "training3.tar.gz"
127
+
128
+ _create_split(
129
+ source_df=train_df_for_val_split,
130
+ test_tar_name=val_test_tar,
131
+ train_tar_names=val_train_tars,
132
+ raw_dir=raw,
133
+ public_dir=public_val,
134
+ private_dir=private_val,
135
+ )
136
+
137
+ # Copy over validation and other files to 'public_val' to mirror the 'public' structure
138
+ for file in files_to_copy:
139
+ shutil.copyfile(src=raw / file, dst=public_val / file)
@@ -0,0 +1,2 @@
1
+ from pathlib import Path
2
+ def prepare(r, pub, priv): return pub
@@ -0,0 +1,2 @@
1
+ from pathlib import Path
2
+ def prepare(r, pub, priv): return pub
@@ -0,0 +1,107 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Any
4
+ import pandas as pd
5
+
6
+ # This is a generic LLM-based grader for open-ended tasks.
7
+ # It reads 'rubric.md' from the task directory and evaluates the submission.
8
+
9
+ try:
10
+ from dsat.services.llm import LLMService
11
+ from dsat.config import LLMConfig
12
+ except ImportError:
13
+ # Fallback for when running outside of dsat package context
14
+ import sys
15
+ sys.path.append(str(Path(__file__).resolve().parent.parent.parent.parent))
16
+ from dsat.services.llm import LLMService
17
+ from dsat.config import LLMConfig
18
+
19
+ class Report:
20
+ def __init__(self, score, feedback):
21
+ self.score = score
22
+ self.feedback = feedback
23
+ # Standard fields expected by the framework
24
+ self.is_lower_better = False
25
+ self.submission_exists = True
26
+ self.valid_submission = True
27
+ self.gold_medal = score >= 0.9
28
+ self.silver_medal = score >= 0.7
29
+ self.bronze_medal = score >= 0.5
30
+ self.above_median = score >= 0.5
31
+ self.submission_path = ""
32
+ self.competition_id = "open_ended_task"
33
+
34
+ def grade(submission_path: Path, competition: Any) -> Report:
35
+ """
36
+ Grades the submission using an LLM Judge against rubric.md.
37
+ """
38
+ # 1. Load the Rubric
39
+ task_dir = competition.raw_dir.parent
40
+ rubric_path = task_dir / "rubric.md"
41
+
42
+ if not rubric_path.exists():
43
+ # Fallback if no rubric exists
44
+ print(f"Warning: Rubric not found at {rubric_path}. Returning default score.")
45
+ return Report(0.5, "No grading rubric defined.")
46
+
47
+ rubric_content = rubric_path.read_text(encoding="utf-8")
48
+
49
+ # 2. Load the Submission Content (Preview)
50
+ # Since it's open-ended, the 'submission_path' might be a CSV, code, or just a marker.
51
+ # We'll try to peek at the output artifacts if possible, or assume the agent's recent work
52
+ # is what we are grading. Ideally, AIDE produces a submission file.
53
+
54
+ submission_content = "No submission content readable."
55
+ if submission_path.exists():
56
+ try:
57
+ if submission_path.suffix == '.csv':
58
+ df = pd.read_csv(submission_path)
59
+ submission_content = f"CSV Submission Preview:\n{df.head().to_markdown()}"
60
+ else:
61
+ submission_content = submission_path.read_text(encoding="utf-8")[:2000]
62
+ except Exception as e:
63
+ submission_content = f"Error reading submission: {e}"
64
+
65
+ # 3. Setup LLM for Judging
66
+ # Note: In a real run, we might want to inject the API key securely.
67
+ # Here we assume environment variables are set (which they are in DSATRunner).
68
+ try:
69
+ api_key = os.getenv("API_KEY", "EMPTY")
70
+ base_url = os.getenv("API_BASE", "https://api.openai.com/v1")
71
+ model = os.getenv("LLM_MODEL", "gpt-4o")
72
+
73
+ llm = LLMService(LLMConfig(api_key=api_key, api_base=base_url, model=model))
74
+
75
+ prompt = f"""You are an impartial Judge. Evaluate the following submission against the provided Rubric.
76
+
77
+ # RUBRIC
78
+ {rubric_content}
79
+
80
+ # SUBMISSION CONTENT
81
+ {submission_content}
82
+
83
+ # INSTRUCTION
84
+ Assess the submission.
85
+ Output ONLY a float number between 0.0 and 1.0 on the first line.
86
+ On subsequent lines, provide brief feedback.
87
+ """
88
+ # Synchronous call wrapper or direct call if possible.
89
+ # Since grade() is synchronous in standard mlebench, we need a way to run async code.
90
+ import asyncio
91
+ response = asyncio.run(llm.achat([{"role": "user", "content": prompt}]))
92
+
93
+ lines = response.strip().split('\n')
94
+ try:
95
+ score = float(lines[0].strip())
96
+ except ValueError:
97
+ # Fallback if LLM is chatty
98
+ import re
99
+ match = re.search(r"(\d+(\.\d+)?)", lines[0])
100
+ score = float(match.group(1)) if match else 0.5
101
+
102
+ feedback = "\n".join(lines[1:])
103
+ return Report(score, feedback)
104
+
105
+ except Exception as e:
106
+ print(f"LLM Judging failed: {e}")
107
+ return Report(0.0, f"Judging failed: {e}")
@@ -0,0 +1,2 @@
1
+ def prepare(raw, public, private):
2
+ pass
@@ -0,0 +1,28 @@
1
+ from pandas import DataFrame
2
+ from sklearn.metrics import root_mean_squared_error
3
+
4
+ from mlebench.grade_helpers import InvalidSubmissionError
5
+
6
+
7
+ def prepare_for_metric(submission: DataFrame, answers: DataFrame) -> dict:
8
+
9
+ assert "fare_amount" in answers.columns, "Answers should have a fare_amount column"
10
+ assert "key" in answers.columns, "Answers should have a key column"
11
+ if "fare_amount" not in submission.columns:
12
+ raise InvalidSubmissionError("Submission should have a fare_amount column")
13
+ if "key" not in submission.columns:
14
+ raise InvalidSubmissionError("Submission should have a key column")
15
+
16
+ # Sort by 'key' to ensure alignment
17
+ submission = submission.sort_values("key")
18
+ answers = answers.sort_values("key")
19
+
20
+ y_true = answers["fare_amount"]
21
+ y_pred = submission["fare_amount"]
22
+
23
+ return y_true, y_pred
24
+
25
+
26
+ def grade(submission: DataFrame, answers: DataFrame) -> float:
27
+ y_true, y_pred = prepare_for_metric(submission, answers)
28
+ return root_mean_squared_error(y_true, y_pred)
@@ -0,0 +1,44 @@
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ from mlebench.utils import read_csv
7
+
8
+
9
+ def prepare(raw: Path, public: Path, private: Path):
10
+ # Create train, test from train split
11
+ old_train = read_csv(raw / "train.csv")
12
+
13
+ # Train is c. 55M rows, original test is 9914 rows
14
+ new_train, new_test = train_test_split(old_train, test_size=9914, random_state=0)
15
+ new_test_without_labels = new_test.drop(columns=["fare_amount"])
16
+
17
+ # Create a sample submission file
18
+ submission_df = new_test.copy()[["key"]]
19
+ submission_df["fare_amount"] = 11.35
20
+
21
+ # Write CSVs
22
+ new_train.to_csv(public / "labels.csv", index=False)
23
+ new_test_without_labels.to_csv(public / "test.csv", index=False)
24
+ submission_df.to_csv(public / "sample_submission.csv", index=False)
25
+ new_test.to_csv(private / "test.csv", index=False)
26
+
27
+ # Copy over other files
28
+ shutil.copy(raw / "GCP-Coupons-Instructions.rtf", public / "GCP-Coupons-Instructions.rtf")
29
+
30
+ # Checks
31
+ assert set(new_train["key"]).isdisjoint(
32
+ set(new_test["key"])
33
+ ), "Train and test sets share samples!"
34
+ assert new_test.shape[1] == 8, f"Test set should have 8 columns, but has {new_test.shape[1]}"
35
+ assert (
36
+ new_test_without_labels.shape[1] == 7
37
+ ), f"Test set without labels should have 7 columns, but has {new_test_without_labels.shape[1]}"
38
+ assert new_train.shape[1] == 8, f"Train set should have 8 columns, but has {new_train.shape[1]}"
39
+ assert (
40
+ submission_df.shape[1] == 2
41
+ ), f"Sample submission should have 2 columns, but has {submission_df.shape[1]}"
42
+ assert (
43
+ submission_df.shape[0] == new_test.shape[0]
44
+ ), f"Sample submission should have {new_test.shape[0]} rows, but has {submission_df.shape[0]}"