dslighting 1.7.1__py3-none-any.whl → 1.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dslighting/__init__.py +1 -1
- dslighting/core/agent.py +78 -62
- {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/METADATA +3 -1
- {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/RECORD +352 -7
- {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/top_level.txt +1 -0
- mlebench/README.md +39 -0
- mlebench/__init__.py +0 -0
- mlebench/cli.py +221 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
- mlebench/competitions/AI4Code/grade.py +70 -0
- mlebench/competitions/AI4Code/prepare.py +84 -0
- mlebench/competitions/AI4Code/prepare_val.py +159 -0
- mlebench/competitions/__init__.py +0 -0
- mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
- mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
- mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
- mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
- mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
- mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
- mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
- mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
- mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
- mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
- mlebench/competitions/bike-sharing-demand/grade.py +55 -0
- mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
- mlebench/competitions/billion-word-imputation/grade.py +37 -0
- mlebench/competitions/billion-word-imputation/prepare.py +107 -0
- mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
- mlebench/competitions/bms-molecular-translation/grade.py +40 -0
- mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
- mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
- mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
- mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
- mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
- mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
- mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
- mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
- mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
- mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
- mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
- mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
- mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
- mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
- mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
- mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
- mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
- mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
- mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
- mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
- mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
- mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
- mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
- mlebench/competitions/dog-breed-identification/dogs.py +124 -0
- mlebench/competitions/dog-breed-identification/grade.py +42 -0
- mlebench/competitions/dog-breed-identification/prepare.py +55 -0
- mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
- mlebench/competitions/ethanol-concentration/grade.py +23 -0
- mlebench/competitions/ethanol-concentration/prepare.py +90 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
- mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
- mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
- mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
- mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
- mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
- mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
- mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
- mlebench/competitions/google-quest-challenge/classes.py +32 -0
- mlebench/competitions/google-quest-challenge/grade.py +45 -0
- mlebench/competitions/google-quest-challenge/prepare.py +58 -0
- mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
- mlebench/competitions/handwriting/grade.py +23 -0
- mlebench/competitions/handwriting/prepare.py +179 -0
- mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
- mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
- mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
- mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
- mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
- mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
- mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
- mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
- mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
- mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
- mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
- mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
- mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
- mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
- mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
- mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
- mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
- mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
- mlebench/competitions/ili/grade.py +60 -0
- mlebench/competitions/ili/prepare.py +99 -0
- mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
- mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
- mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
- mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
- mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
- mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
- mlebench/competitions/instant-gratification/__init__.py +0 -0
- mlebench/competitions/instant-gratification/grade.py +55 -0
- mlebench/competitions/instant-gratification/prepare.py +25 -0
- mlebench/competitions/instant_gratification/__init__.py +0 -0
- mlebench/competitions/instant_gratification/grade.py +55 -0
- mlebench/competitions/instant_gratification/prepare.py +25 -0
- mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
- mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
- mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
- mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
- mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
- mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
- mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
- mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
- mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
- mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
- mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
- mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
- mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
- mlebench/competitions/leaf-classification/classes.py +101 -0
- mlebench/competitions/leaf-classification/grade.py +44 -0
- mlebench/competitions/leaf-classification/prepare.py +60 -0
- mlebench/competitions/leaf-classification/prepare_val.py +116 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
- mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
- mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
- mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
- mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
- mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
- mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
- mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
- mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
- mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
- mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
- mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
- mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
- mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
- mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
- mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
- mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
- mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
- mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
- mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
- mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
- mlebench/competitions/my-custom-task-01/prepare.py +2 -0
- mlebench/competitions/new-my-task-01/prepare.py +2 -0
- mlebench/competitions/new-my-task-03/grade.py +107 -0
- mlebench/competitions/new-my-task-03/prepare.py +2 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
- mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
- mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
- mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
- mlebench/competitions/paddy-disease-classification/grade.py +35 -0
- mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
- mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
- mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
- mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
- mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
- mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
- mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
- mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
- mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
- mlebench/competitions/playground-series-s3e1/grade.py +52 -0
- mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
- mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
- mlebench/competitions/playground-series-s3e11/grade.py +55 -0
- mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
- mlebench/competitions/playground-series-s3e18/grade.py +39 -0
- mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
- mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
- mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
- mlebench/competitions/playground_series_s3e1/grade.py +52 -0
- mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
- mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
- mlebench/competitions/playground_series_s3e11/grade.py +55 -0
- mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
- mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
- mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
- mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
- mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
- mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
- mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
- mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
- mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
- mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
- mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
- mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
- mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
- mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
- mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
- mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
- mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
- mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
- mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
- mlebench/competitions/see-click-predict-fix/grade.py +66 -0
- mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
- mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
- mlebench/competitions/see_click_predict_fix/grade.py +66 -0
- mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
- mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
- mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
- mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
- mlebench/competitions/siim-covid19-detection/grade.py +194 -0
- mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
- mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
- mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
- mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
- mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
- mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
- mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
- mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
- mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
- mlebench/competitions/spaceship-titanic/grade.py +11 -0
- mlebench/competitions/spaceship-titanic/prepare.py +23 -0
- mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
- mlebench/competitions/spooky-author-identification/classes.py +1 -0
- mlebench/competitions/spooky-author-identification/grade.py +38 -0
- mlebench/competitions/spooky-author-identification/prepare.py +40 -0
- mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
- mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
- mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
- mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
- mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
- mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
- mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
- mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
- mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
- mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
- mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
- mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
- mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
- mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
- mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
- mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
- mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
- mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
- mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
- mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
- mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
- mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
- mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
- mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
- mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
- mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
- mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
- mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
- mlebench/competitions/utils.py +266 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
- mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
- mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
- mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
- mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
- mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
- mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
- mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
- mlebench/competitions/whale-categorization-playground/grade.py +41 -0
- mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
- mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
- mlebench/data.py +420 -0
- mlebench/grade.py +209 -0
- mlebench/grade_helpers.py +235 -0
- mlebench/metrics.py +75 -0
- mlebench/registry.py +332 -0
- mlebench/utils.py +346 -0
- {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/WHEEL +0 -0
- {dslighting-1.7.1.dist-info → dslighting-1.7.8.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from sklearn.model_selection import train_test_split
|
|
7
|
+
|
|
8
|
+
from mlebench.utils import read_csv
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _create_split(
|
|
12
|
+
input_df: pd.DataFrame, test_size: float, random_state: int
|
|
13
|
+
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
|
14
|
+
"""Helper function to perform a single data split and generate associated files."""
|
|
15
|
+
# Create train, test from the input dataframe
|
|
16
|
+
train_df, answers_df = train_test_split(
|
|
17
|
+
input_df, test_size=test_size, random_state=random_state
|
|
18
|
+
)
|
|
19
|
+
test_df = answers_df.drop(columns=["score"])
|
|
20
|
+
|
|
21
|
+
sample_submission_df = answers_df[["essay_id"]].copy()
|
|
22
|
+
sample_submission_df["score"] = np.random.RandomState(42).randint(
|
|
23
|
+
1, 7, size=len(sample_submission_df)
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Checks
|
|
27
|
+
assert set(train_df["essay_id"]).isdisjoint(
|
|
28
|
+
set(test_df["essay_id"])
|
|
29
|
+
), "Essay IDs in train and test sets are not disjoint"
|
|
30
|
+
assert len(train_df) + len(test_df) == len(
|
|
31
|
+
input_df
|
|
32
|
+
), f"Train and test sets do not sum to original train set"
|
|
33
|
+
assert len(test_df) == len(
|
|
34
|
+
sample_submission_df
|
|
35
|
+
), f"Test and sample submission sets do not have the same length"
|
|
36
|
+
assert (
|
|
37
|
+
train_df.columns.tolist() == input_df.columns.tolist()
|
|
38
|
+
), f"Train set columns do not match original train set, got {train_df.columns.tolist()}"
|
|
39
|
+
assert test_df.columns.tolist() == [
|
|
40
|
+
"essay_id",
|
|
41
|
+
"full_text",
|
|
42
|
+
], f"Test set columns do not match expected columns, got {test_df.columns.tolist()}"
|
|
43
|
+
assert sample_submission_df.columns.tolist() == [
|
|
44
|
+
"essay_id",
|
|
45
|
+
"score",
|
|
46
|
+
], f"Sample submission set columns do not match expected columns, got {sample_submission_df.columns.tolist()}"
|
|
47
|
+
|
|
48
|
+
return train_df, test_df, answers_df, sample_submission_df
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
52
|
+
"""
|
|
53
|
+
Splits the data in raw into public and private datasets with appropriate test/train splits.
|
|
54
|
+
Also creates a second, parallel split for validation purposes.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
# Read the original raw data
|
|
58
|
+
old_train = read_csv(raw / "train.csv")
|
|
59
|
+
|
|
60
|
+
# --- Stage 1: Create the original train/test split for the main competition ---
|
|
61
|
+
# This block produces the exact same output as the original script.
|
|
62
|
+
|
|
63
|
+
# Original train has 17307 rows. Original hidden test has approx 8k rows. We just take 10% of the original train as the test set.
|
|
64
|
+
main_train, main_test, main_answers, main_sample_submission = _create_split(
|
|
65
|
+
input_df=old_train, test_size=0.1, random_state=0
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Write original CSVs to public/ and private/
|
|
69
|
+
main_answers.to_csv(private / "answers.csv", index=False)
|
|
70
|
+
main_train.to_csv(public / "train.csv", index=False)
|
|
71
|
+
main_test.to_csv(public / "test.csv", index=False)
|
|
72
|
+
main_sample_submission.to_csv(public / "sample_submission.csv", index=False)
|
|
73
|
+
|
|
74
|
+
# --- Stage 2: Create a new validation split from the main training data ---
|
|
75
|
+
# This block creates a new set of directories and files for validation.
|
|
76
|
+
|
|
77
|
+
# Define and create the new parallel directories for the validation set
|
|
78
|
+
public_val = public.parent / "public_val"
|
|
79
|
+
private_val = private.parent / "private_val"
|
|
80
|
+
public_val.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
private_val.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
|
|
83
|
+
# Calculate the test size for the second split to make the validation test set
|
|
84
|
+
# have the same number of samples as the original test set.
|
|
85
|
+
val_test_size = len(main_test) / len(main_train)
|
|
86
|
+
|
|
87
|
+
# Create the new split using the main training data as input
|
|
88
|
+
val_train, val_test, val_answers, val_sample_submission = _create_split(
|
|
89
|
+
input_df=main_train, test_size=val_test_size, random_state=0
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Write validation CSVs to public_val/ and private_val/ using identical filenames
|
|
93
|
+
val_answers.to_csv(private_val / "answers.csv", index=False)
|
|
94
|
+
val_train.to_csv(public_val / "train.csv", index=False)
|
|
95
|
+
val_test.to_csv(public_val / "test.csv", index=False)
|
|
96
|
+
val_sample_submission.to_csv(public_val / "sample_submission.csv", index=False)
|
|
File without changes
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sklearn.metrics import f1_score
|
|
4
|
+
|
|
5
|
+
from mlebench.grade_helpers import InvalidSubmissionError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
|
|
9
|
+
"""Prepare submission and answers for Macro F1 calculation."""
|
|
10
|
+
if len(submission) != len(answers):
|
|
11
|
+
raise InvalidSubmissionError(
|
|
12
|
+
f"Submission length ({len(submission)}) != answers length ({len(answers)})"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
if "time" not in submission.columns:
|
|
16
|
+
raise InvalidSubmissionError("Submission must have a 'time' column")
|
|
17
|
+
|
|
18
|
+
if "open_channels" not in submission.columns:
|
|
19
|
+
raise InvalidSubmissionError("Submission must have an 'open_channels' column")
|
|
20
|
+
|
|
21
|
+
# Sort by time
|
|
22
|
+
submission = submission.sort_values("time").reset_index(drop=True)
|
|
23
|
+
answers = answers.sort_values("time").reset_index(drop=True)
|
|
24
|
+
|
|
25
|
+
# Check time alignment
|
|
26
|
+
if not np.allclose(submission["time"].values, answers["time"].values, atol=1e-4):
|
|
27
|
+
raise InvalidSubmissionError("Submission and answers time mismatch")
|
|
28
|
+
|
|
29
|
+
# Extract predictions and true values
|
|
30
|
+
y_pred = submission["open_channels"].values
|
|
31
|
+
y_true = answers["open_channels"].values
|
|
32
|
+
|
|
33
|
+
# Validate predictions
|
|
34
|
+
try:
|
|
35
|
+
y_pred = y_pred.astype(int)
|
|
36
|
+
except (ValueError, TypeError):
|
|
37
|
+
raise InvalidSubmissionError("Predictions must be integers")
|
|
38
|
+
|
|
39
|
+
if np.any(np.isnan(y_pred)):
|
|
40
|
+
raise InvalidSubmissionError("Predictions cannot contain NaN values")
|
|
41
|
+
|
|
42
|
+
return y_true, y_pred
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
|
|
46
|
+
"""Calculate Macro F1 score."""
|
|
47
|
+
y_true, y_pred = prepare_for_metric(submission, answers)
|
|
48
|
+
|
|
49
|
+
# Macro F1: compute F1 for each class and average
|
|
50
|
+
macro_f1 = f1_score(y_true, y_pred, average='macro')
|
|
51
|
+
|
|
52
|
+
return macro_f1
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
6
|
+
"""
|
|
7
|
+
Prepare liverpool-ion-switching dataset.
|
|
8
|
+
"""
|
|
9
|
+
# Read data
|
|
10
|
+
train = pd.read_csv(raw / "train.csv")
|
|
11
|
+
test = pd.read_csv(raw / "test.csv")
|
|
12
|
+
sample_submission = pd.read_csv(raw / "sample_submission.csv")
|
|
13
|
+
test_answer = pd.read_csv(raw / "test_answer.csv")
|
|
14
|
+
|
|
15
|
+
# Public files (visible to agents)
|
|
16
|
+
train.to_csv(public / "train.csv", index=False)
|
|
17
|
+
test.to_csv(public / "test.csv", index=False)
|
|
18
|
+
sample_submission.to_csv(public / "sample_submission.csv", index=False)
|
|
19
|
+
|
|
20
|
+
# Private files (for grading)
|
|
21
|
+
test_answer.to_csv(private / "test.csv", index=False)
|
|
22
|
+
|
|
23
|
+
# Validation checks
|
|
24
|
+
assert len(test_answer) == len(sample_submission), \
|
|
25
|
+
f"Test answer ({len(test_answer)}) and sample submission ({len(sample_submission)}) must have same length"
|
|
26
|
+
assert "time" in test_answer.columns, "Test answer must have 'time' column"
|
|
27
|
+
assert "open_channels" in test_answer.columns, "Test answer must have 'open_channels' column"
|
|
File without changes
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sklearn.metrics import f1_score
|
|
4
|
+
|
|
5
|
+
from mlebench.grade_helpers import InvalidSubmissionError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
|
|
9
|
+
"""Prepare submission and answers for Macro F1 calculation."""
|
|
10
|
+
if len(submission) != len(answers):
|
|
11
|
+
raise InvalidSubmissionError(
|
|
12
|
+
f"Submission length ({len(submission)}) != answers length ({len(answers)})"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
if "time" not in submission.columns:
|
|
16
|
+
raise InvalidSubmissionError("Submission must have a 'time' column")
|
|
17
|
+
|
|
18
|
+
if "open_channels" not in submission.columns:
|
|
19
|
+
raise InvalidSubmissionError("Submission must have an 'open_channels' column")
|
|
20
|
+
|
|
21
|
+
# Sort by time
|
|
22
|
+
submission = submission.sort_values("time").reset_index(drop=True)
|
|
23
|
+
answers = answers.sort_values("time").reset_index(drop=True)
|
|
24
|
+
|
|
25
|
+
# Check time alignment
|
|
26
|
+
if not np.allclose(submission["time"].values, answers["time"].values, atol=1e-4):
|
|
27
|
+
raise InvalidSubmissionError("Submission and answers time mismatch")
|
|
28
|
+
|
|
29
|
+
# Extract predictions and true values
|
|
30
|
+
y_pred = submission["open_channels"].values
|
|
31
|
+
y_true = answers["open_channels"].values
|
|
32
|
+
|
|
33
|
+
# Validate predictions
|
|
34
|
+
try:
|
|
35
|
+
y_pred = y_pred.astype(int)
|
|
36
|
+
except (ValueError, TypeError):
|
|
37
|
+
raise InvalidSubmissionError("Predictions must be integers")
|
|
38
|
+
|
|
39
|
+
if np.any(np.isnan(y_pred)):
|
|
40
|
+
raise InvalidSubmissionError("Predictions cannot contain NaN values")
|
|
41
|
+
|
|
42
|
+
return y_true, y_pred
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
|
|
46
|
+
"""Calculate Macro F1 score."""
|
|
47
|
+
y_true, y_pred = prepare_for_metric(submission, answers)
|
|
48
|
+
|
|
49
|
+
# Macro F1: compute F1 for each class and average
|
|
50
|
+
macro_f1 = f1_score(y_true, y_pred, average='macro')
|
|
51
|
+
|
|
52
|
+
return macro_f1
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
6
|
+
"""
|
|
7
|
+
Prepare liverpool-ion-switching dataset.
|
|
8
|
+
"""
|
|
9
|
+
# Read data
|
|
10
|
+
train = pd.read_csv(raw / "train.csv")
|
|
11
|
+
test = pd.read_csv(raw / "test.csv")
|
|
12
|
+
sample_submission = pd.read_csv(raw / "sample_submission.csv")
|
|
13
|
+
test_answer = pd.read_csv(raw / "test_answer.csv")
|
|
14
|
+
|
|
15
|
+
# Public files (visible to agents)
|
|
16
|
+
train.to_csv(public / "train.csv", index=False)
|
|
17
|
+
test.to_csv(public / "test.csv", index=False)
|
|
18
|
+
sample_submission.to_csv(public / "sample_submission.csv", index=False)
|
|
19
|
+
|
|
20
|
+
# Private files (for grading)
|
|
21
|
+
test_answer.to_csv(private / "test.csv", index=False)
|
|
22
|
+
|
|
23
|
+
# Validation checks
|
|
24
|
+
assert len(test_answer) == len(sample_submission), \
|
|
25
|
+
f"Test answer ({len(test_answer)}) and sample submission ({len(sample_submission)}) must have same length"
|
|
26
|
+
assert "time" in test_answer.columns, "Test answer must have 'time' column"
|
|
27
|
+
assert "open_channels" in test_answer.columns, "Test answer must have 'open_channels' column"
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sklearn.metrics import log_loss
|
|
4
|
+
|
|
5
|
+
from mlebench.competitions.utils import one_hot_dfs_to_log_loss_inputs
|
|
6
|
+
from mlebench.grade_helpers import InvalidSubmissionError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame) -> dict:
|
|
10
|
+
"""
|
|
11
|
+
The submission and answers are already one-hotted
|
|
12
|
+
"""
|
|
13
|
+
classes = ["winner_model_a", "winner_model_b", "winner_tie"]
|
|
14
|
+
required_columns = ["id"] + classes
|
|
15
|
+
|
|
16
|
+
# Check if submission has the required columns
|
|
17
|
+
missing_columns = [col for col in required_columns if col not in submission.columns]
|
|
18
|
+
if missing_columns:
|
|
19
|
+
raise InvalidSubmissionError(
|
|
20
|
+
f"Submission DataFrame is missing required columns: {missing_columns}"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Check if answers has the required columns
|
|
24
|
+
assert set(required_columns).issubset(
|
|
25
|
+
answers.columns
|
|
26
|
+
), f"Answers DataFrame is missing required columns: {set(required_columns) - set(answers.columns)}"
|
|
27
|
+
|
|
28
|
+
# Check if submission has the correct number of rows
|
|
29
|
+
if len(submission) != len(answers):
|
|
30
|
+
raise InvalidSubmissionError(
|
|
31
|
+
f"Submission DataFrame must have {len(answers)} rows, but has {len(submission)} rows."
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Check if all values in submission are between 0 and 1
|
|
35
|
+
if (
|
|
36
|
+
not ((submission[classes] >= 0) & (submission[classes] <= 1)).all().all()
|
|
37
|
+
): # first all() checks if all rows are valid, second all() checks if all columns are valid
|
|
38
|
+
raise InvalidSubmissionError("All values in submission DataFrame must be between 0 and 1.")
|
|
39
|
+
|
|
40
|
+
# Check if each row in submission sums to 1
|
|
41
|
+
if not submission[classes].sum(axis=1).round(6).eq(1).all():
|
|
42
|
+
raise InvalidSubmissionError("Each row in submission DataFrame must sum to 1.")
|
|
43
|
+
|
|
44
|
+
# Use only the required columns for further processing
|
|
45
|
+
submission = submission[required_columns]
|
|
46
|
+
answers = answers[required_columns]
|
|
47
|
+
|
|
48
|
+
submission = submission.sort_values("id").reset_index(drop=True)
|
|
49
|
+
answers = answers.sort_values("id").reset_index(drop=True)
|
|
50
|
+
|
|
51
|
+
if (submission["id"].values != answers["id"].values).any():
|
|
52
|
+
raise InvalidSubmissionError("Submission and answer IDs do not match after sorting.")
|
|
53
|
+
|
|
54
|
+
log_loss_inputs = one_hot_dfs_to_log_loss_inputs(
|
|
55
|
+
submission, answers, id_column="id", apply_softmax=False
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return log_loss_inputs
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
|
|
62
|
+
log_loss_inputs = prepare_for_metric(submission, answers)
|
|
63
|
+
return log_loss(**log_loss_inputs)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from sklearn.model_selection import train_test_split
|
|
5
|
+
|
|
6
|
+
from mlebench.utils import extract, read_csv
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
10
|
+
"""
|
|
11
|
+
Splits the data in raw into public and private datasets with appropriate test/train splits.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Create train, test from train split
|
|
15
|
+
old_train = read_csv(raw / "train.csv")
|
|
16
|
+
# Original train has 55k rows. Original hidden test has 25k rows. We make a new test set with 10% of the original train.
|
|
17
|
+
new_train, answers = train_test_split(old_train, test_size=0.1, random_state=0)
|
|
18
|
+
new_test = answers[["id", "prompt", "response_a", "response_b"]].copy()
|
|
19
|
+
|
|
20
|
+
sample_submission = answers[["id"]].copy()
|
|
21
|
+
sample_submission["winner_model_a"] = 0.3333333333333333
|
|
22
|
+
sample_submission["winner_model_b"] = 0.3333333333333333
|
|
23
|
+
sample_submission["winner_tie"] = 0.3333333333333333
|
|
24
|
+
|
|
25
|
+
# Checks
|
|
26
|
+
assert len(new_train) + len(new_test) == len(
|
|
27
|
+
old_train
|
|
28
|
+
), f"New train and test should have the same number of rows as the original train"
|
|
29
|
+
assert set(new_train["id"]).isdisjoint(
|
|
30
|
+
set(new_test["id"])
|
|
31
|
+
), f"New train and test should have no overlapping ids"
|
|
32
|
+
assert new_test.columns.tolist() == [
|
|
33
|
+
"id",
|
|
34
|
+
"prompt",
|
|
35
|
+
"response_a",
|
|
36
|
+
"response_b",
|
|
37
|
+
], f"New test should have columns id, prompt, response_a, response_b"
|
|
38
|
+
assert sample_submission.columns.tolist() == [
|
|
39
|
+
"id",
|
|
40
|
+
"winner_model_a",
|
|
41
|
+
"winner_model_b",
|
|
42
|
+
"winner_tie",
|
|
43
|
+
], f"Sample submission should have columns id, winner_model_a, winner_model_b, winner_tie"
|
|
44
|
+
assert (
|
|
45
|
+
new_train.columns.tolist() == old_train.columns.tolist()
|
|
46
|
+
), f"New train should have the same columns as the original train"
|
|
47
|
+
|
|
48
|
+
# Write CSVs
|
|
49
|
+
answers.to_csv(private / "answers.csv", index=False)
|
|
50
|
+
new_train.to_csv(public / "train.csv", index=False)
|
|
51
|
+
new_test.to_csv(public / "test.csv", index=False)
|
|
52
|
+
sample_submission.to_csv(public / "sample_submission.csv", index=False)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from sklearn.model_selection import train_test_split
|
|
6
|
+
|
|
7
|
+
from mlebench.utils import extract, read_csv
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _create_split(
|
|
11
|
+
df_in: pd.DataFrame,
|
|
12
|
+
public_path: Path,
|
|
13
|
+
private_path: Path,
|
|
14
|
+
test_size: float,
|
|
15
|
+
random_state: int,
|
|
16
|
+
) -> pd.DataFrame:
|
|
17
|
+
"""
|
|
18
|
+
Helper function to perform a data split and write files to specified directories.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
df_in: The input DataFrame to split.
|
|
22
|
+
public_path: The directory for public-facing files (train set, test features).
|
|
23
|
+
private_path: The directory for private files (test answers).
|
|
24
|
+
test_size: The proportion of the dataset to allocate to the test set.
|
|
25
|
+
random_state: The seed for the random number generator.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
The DataFrame for the newly created training set.
|
|
29
|
+
"""
|
|
30
|
+
# Create output directories if they don't exist
|
|
31
|
+
public_path.mkdir(exist_ok=True, parents=True)
|
|
32
|
+
private_path.mkdir(exist_ok=True, parents=True)
|
|
33
|
+
|
|
34
|
+
# Create train, test from the input dataframe
|
|
35
|
+
new_train, answers = train_test_split(
|
|
36
|
+
df_in, test_size=test_size, random_state=random_state
|
|
37
|
+
)
|
|
38
|
+
new_test = answers[["id", "prompt", "response_a", "response_b"]].copy()
|
|
39
|
+
|
|
40
|
+
sample_submission = answers[["id"]].copy()
|
|
41
|
+
sample_submission["winner_model_a"] = 0.3333333333333333
|
|
42
|
+
sample_submission["winner_model_b"] = 0.3333333333333333
|
|
43
|
+
sample_submission["winner_tie"] = 0.3333333333333333
|
|
44
|
+
|
|
45
|
+
# Checks
|
|
46
|
+
assert len(new_train) + len(new_test) == len(
|
|
47
|
+
df_in
|
|
48
|
+
), f"New train and test should have the same number of rows as the original dataframe"
|
|
49
|
+
assert set(new_train["id"]).isdisjoint(
|
|
50
|
+
set(new_test["id"])
|
|
51
|
+
), f"New train and test should have no overlapping ids"
|
|
52
|
+
assert new_test.columns.tolist() == [
|
|
53
|
+
"id",
|
|
54
|
+
"prompt",
|
|
55
|
+
"response_a",
|
|
56
|
+
"response_b",
|
|
57
|
+
], f"New test should have columns id, prompt, response_a, response_b"
|
|
58
|
+
assert sample_submission.columns.tolist() == [
|
|
59
|
+
"id",
|
|
60
|
+
"winner_model_a",
|
|
61
|
+
"winner_model_b",
|
|
62
|
+
"winner_tie",
|
|
63
|
+
], f"Sample submission should have columns id, winner_model_a, winner_model_b, winner_tie"
|
|
64
|
+
assert (
|
|
65
|
+
new_train.columns.tolist() == df_in.columns.tolist()
|
|
66
|
+
), f"New train should have the same columns as the original dataframe"
|
|
67
|
+
|
|
68
|
+
# Write CSVs
|
|
69
|
+
answers.to_csv(private_path / "answers.csv", index=False)
|
|
70
|
+
new_train.to_csv(public_path / "train.csv", index=False)
|
|
71
|
+
new_test.to_csv(public_path / "test.csv", index=False)
|
|
72
|
+
sample_submission.to_csv(public_path / "sample_submission.csv", index=False)
|
|
73
|
+
|
|
74
|
+
return new_train
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
78
|
+
"""
|
|
79
|
+
Splits the data in raw into public and private datasets with appropriate test/train splits.
|
|
80
|
+
Also creates a secondary validation split (public_val, private_val) for local testing.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
# --- Stage 1: Create the original competition split (train/test) ---
|
|
84
|
+
# This block generates the primary `public` and `private` directories.
|
|
85
|
+
# Its outputs MUST remain identical to the original script's outputs.
|
|
86
|
+
old_train_df = read_csv(raw / "train.csv")
|
|
87
|
+
train_for_val_split = _create_split(
|
|
88
|
+
df_in=old_train_df,
|
|
89
|
+
public_path=public,
|
|
90
|
+
private_path=private,
|
|
91
|
+
test_size=0.1,
|
|
92
|
+
random_state=0,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# --- Stage 2: Create the new validation split (train_val/test_val) ---
|
|
96
|
+
# This block takes the training set from Stage 1 and splits it again
|
|
97
|
+
# to create a new, smaller training set and a validation set.
|
|
98
|
+
# The outputs are saved to parallel `public_val` and `private_val` directories.
|
|
99
|
+
public_val = public.parent / "public_val"
|
|
100
|
+
private_val = private.parent / "private_val"
|
|
101
|
+
|
|
102
|
+
# Calculate the test_size needed to make the new validation set (`test_val`)
|
|
103
|
+
# have the same number of samples as the original test set from Stage 1.
|
|
104
|
+
# Original test size = 0.1 * total. New train size = 0.9 * total.
|
|
105
|
+
# We need a fraction `p` such that p * (0.9 * total) = 0.1 * total.
|
|
106
|
+
# p = 0.1 / 0.9 = 1/9.
|
|
107
|
+
val_test_size = 1 / 9
|
|
108
|
+
|
|
109
|
+
_create_split(
|
|
110
|
+
df_in=train_for_val_split,
|
|
111
|
+
public_path=public_val,
|
|
112
|
+
private_path=private_val,
|
|
113
|
+
test_size=val_test_size,
|
|
114
|
+
random_state=0, # Use same random state for consistency
|
|
115
|
+
)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
# This is a generic LLM-based grader for open-ended tasks.
|
|
7
|
+
# It reads 'rubric.md' from the task directory and evaluates the submission.
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from dsat.services.llm import LLMService
|
|
11
|
+
from dsat.config import LLMConfig
|
|
12
|
+
except ImportError:
|
|
13
|
+
# Fallback for when running outside of dsat package context
|
|
14
|
+
import sys
|
|
15
|
+
sys.path.append(str(Path(__file__).resolve().parent.parent.parent.parent))
|
|
16
|
+
from dsat.services.llm import LLMService
|
|
17
|
+
from dsat.config import LLMConfig
|
|
18
|
+
|
|
19
|
+
class Report:
|
|
20
|
+
def __init__(self, score, feedback):
|
|
21
|
+
self.score = score
|
|
22
|
+
self.feedback = feedback
|
|
23
|
+
# Standard fields expected by the framework
|
|
24
|
+
self.is_lower_better = False
|
|
25
|
+
self.submission_exists = True
|
|
26
|
+
self.valid_submission = True
|
|
27
|
+
self.gold_medal = score >= 0.9
|
|
28
|
+
self.silver_medal = score >= 0.7
|
|
29
|
+
self.bronze_medal = score >= 0.5
|
|
30
|
+
self.above_median = score >= 0.5
|
|
31
|
+
self.submission_path = ""
|
|
32
|
+
self.competition_id = "open_ended_task"
|
|
33
|
+
|
|
34
|
+
def grade(submission_path: Path, competition: Any) -> Report:
|
|
35
|
+
"""
|
|
36
|
+
Grades the submission using an LLM Judge against rubric.md.
|
|
37
|
+
"""
|
|
38
|
+
# 1. Load the Rubric
|
|
39
|
+
task_dir = competition.raw_dir.parent
|
|
40
|
+
rubric_path = task_dir / "rubric.md"
|
|
41
|
+
|
|
42
|
+
if not rubric_path.exists():
|
|
43
|
+
# Fallback if no rubric exists
|
|
44
|
+
print(f"Warning: Rubric not found at {rubric_path}. Returning default score.")
|
|
45
|
+
return Report(0.5, "No grading rubric defined.")
|
|
46
|
+
|
|
47
|
+
rubric_content = rubric_path.read_text(encoding="utf-8")
|
|
48
|
+
|
|
49
|
+
# 2. Load the Submission Content (Preview)
|
|
50
|
+
# Since it's open-ended, the 'submission_path' might be a CSV, code, or just a marker.
|
|
51
|
+
# We'll try to peek at the output artifacts if possible, or assume the agent's recent work
|
|
52
|
+
# is what we are grading. Ideally, AIDE produces a submission file.
|
|
53
|
+
|
|
54
|
+
submission_content = "No submission content readable."
|
|
55
|
+
if submission_path.exists():
|
|
56
|
+
try:
|
|
57
|
+
if submission_path.suffix == '.csv':
|
|
58
|
+
df = pd.read_csv(submission_path)
|
|
59
|
+
submission_content = f"CSV Submission Preview:\n{df.head().to_markdown()}"
|
|
60
|
+
else:
|
|
61
|
+
submission_content = submission_path.read_text(encoding="utf-8")[:2000]
|
|
62
|
+
except Exception as e:
|
|
63
|
+
submission_content = f"Error reading submission: {e}"
|
|
64
|
+
|
|
65
|
+
# 3. Setup LLM for Judging
|
|
66
|
+
# Note: In a real run, we might want to inject the API key securely.
|
|
67
|
+
# Here we assume environment variables are set (which they are in DSATRunner).
|
|
68
|
+
try:
|
|
69
|
+
api_key = os.getenv("API_KEY", "EMPTY")
|
|
70
|
+
base_url = os.getenv("API_BASE", "https://api.openai.com/v1")
|
|
71
|
+
model = os.getenv("LLM_MODEL", "gpt-4o")
|
|
72
|
+
|
|
73
|
+
llm = LLMService(LLMConfig(api_key=api_key, api_base=base_url, model=model))
|
|
74
|
+
|
|
75
|
+
prompt = f"""You are an impartial Judge. Evaluate the following submission against the provided Rubric.
|
|
76
|
+
|
|
77
|
+
# RUBRIC
|
|
78
|
+
{rubric_content}
|
|
79
|
+
|
|
80
|
+
# SUBMISSION CONTENT
|
|
81
|
+
{submission_content}
|
|
82
|
+
|
|
83
|
+
# INSTRUCTION
|
|
84
|
+
Assess the submission.
|
|
85
|
+
Output ONLY a float number between 0.0 and 1.0 on the first line.
|
|
86
|
+
On subsequent lines, provide brief feedback.
|
|
87
|
+
"""
|
|
88
|
+
# Synchronous call wrapper or direct call if possible.
|
|
89
|
+
# Since grade() is synchronous in standard mlebench, we need a way to run async code.
|
|
90
|
+
import asyncio
|
|
91
|
+
response = asyncio.run(llm.achat([{"role": "user", "content": prompt}]))
|
|
92
|
+
|
|
93
|
+
lines = response.strip().split('\n')
|
|
94
|
+
try:
|
|
95
|
+
score = float(lines[0].strip())
|
|
96
|
+
except ValueError:
|
|
97
|
+
# Fallback if LLM is chatty
|
|
98
|
+
import re
|
|
99
|
+
match = re.search(r"(\d+(\.\d+)?)", lines[0])
|
|
100
|
+
score = float(match.group(1)) if match else 0.5
|
|
101
|
+
|
|
102
|
+
feedback = "\n".join(lines[1:])
|
|
103
|
+
return Report(score, feedback)
|
|
104
|
+
|
|
105
|
+
except Exception as e:
|
|
106
|
+
print(f"LLM Judging failed: {e}")
|
|
107
|
+
return Report(0.0, f"Judging failed: {e}")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from sklearn.metrics import accuracy_score
|
|
3
|
+
|
|
4
|
+
from mlebench.competitions.utils import prepare_for_accuracy_metric
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
|
|
8
|
+
accuracy_inputs = prepare_for_accuracy_metric(
|
|
9
|
+
submission=submission, answers=answers, target_column="ClassId", id_column="Id"
|
|
10
|
+
)
|
|
11
|
+
return accuracy_score(y_true=accuracy_inputs["y_true"], y_pred=accuracy_inputs["y_pred"])
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from sklearn.model_selection import train_test_split
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
9
|
+
"""
|
|
10
|
+
Create train/test split from old train set, using same train/test proportion
|
|
11
|
+
"""
|
|
12
|
+
old_train = np.load(raw / "timit_11" / "timit_11" / "train_11.npy")
|
|
13
|
+
old_train_label = np.load(raw / "timit_11" / "timit_11" / "train_label_11.npy")
|
|
14
|
+
old_train_idxs = range(len(old_train))
|
|
15
|
+
|
|
16
|
+
# Create new splits
|
|
17
|
+
old_test = np.load(raw / "timit_11" / "timit_11" / "test_11.npy")
|
|
18
|
+
old_test_prop = len(old_test) / (len(old_train) + len(old_test)) # ~= 0.268
|
|
19
|
+
new_train_idxs, new_test_idxs = train_test_split(
|
|
20
|
+
old_train_idxs, test_size=old_test_prop, random_state=0
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
new_train = old_train[new_train_idxs]
|
|
24
|
+
new_train_label = old_train_label[new_train_idxs]
|
|
25
|
+
new_test = old_train[new_test_idxs]
|
|
26
|
+
new_test_label = old_train_label[new_test_idxs]
|
|
27
|
+
|
|
28
|
+
answers_df = pd.DataFrame({"Id": range(len(new_test)), "ClassId": new_test_label})
|
|
29
|
+
|
|
30
|
+
# Create sample submission
|
|
31
|
+
sample_submission = answers_df.copy()
|
|
32
|
+
sample_submission["ClassId"] = 0
|
|
33
|
+
|
|
34
|
+
# Save files
|
|
35
|
+
(public / "timit_11" / "timit_11").mkdir(parents=True, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
np.save(public / "timit_11" / "timit_11" / "train_11.npy", new_train)
|
|
38
|
+
np.save(public / "timit_11" / "timit_11" / "train_label_11.npy", new_train_label)
|
|
39
|
+
np.save(public / "timit_11" / "timit_11" / "test_11.npy", new_test)
|
|
40
|
+
sample_submission.to_csv(public / "sampleSubmission.csv", index=False)
|
|
41
|
+
answers_df.to_csv(private / "answers.csv", index=False)
|
|
42
|
+
|
|
43
|
+
# Sanity checks
|
|
44
|
+
assert (
|
|
45
|
+
public / "timit_11" / "timit_11" / "train_11.npy"
|
|
46
|
+
).exists(), "`train_11.npy` doesn't exist!"
|
|
47
|
+
assert (
|
|
48
|
+
public / "timit_11" / "timit_11" / "train_label_11.npy"
|
|
49
|
+
).exists(), "`train_label_11.npy` doesn't exist!"
|
|
50
|
+
assert (
|
|
51
|
+
public / "timit_11" / "timit_11" / "test_11.npy"
|
|
52
|
+
).exists(), "`test_11.npy` doesn't exist!"
|
|
53
|
+
assert (public / "sampleSubmission.csv").exists(), "`sampleSubmission.csv` doesn't exist!"
|
|
54
|
+
assert (private / "answers.csv").exists(), "`answers.csv` doesn't exist!"
|
|
55
|
+
|
|
56
|
+
assert len(new_train) + len(new_test) == len(
|
|
57
|
+
old_train
|
|
58
|
+
), f"Expected {len(old_train)} samples in combined new train and test splits, got {len(new_train) + len(new_test)}!"
|