dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dslighting/__init__.py +1 -1
- dslighting/core/agent.py +78 -62
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
- mlebench/README.md +39 -0
- mlebench/__init__.py +0 -0
- mlebench/cli.py +221 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
- mlebench/competitions/AI4Code/grade.py +70 -0
- mlebench/competitions/AI4Code/prepare.py +84 -0
- mlebench/competitions/AI4Code/prepare_val.py +159 -0
- mlebench/competitions/__init__.py +0 -0
- mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
- mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
- mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
- mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
- mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
- mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
- mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
- mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
- mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
- mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
- mlebench/competitions/bike-sharing-demand/grade.py +55 -0
- mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
- mlebench/competitions/billion-word-imputation/grade.py +37 -0
- mlebench/competitions/billion-word-imputation/prepare.py +107 -0
- mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
- mlebench/competitions/bms-molecular-translation/grade.py +40 -0
- mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
- mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
- mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
- mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
- mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
- mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
- mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
- mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
- mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
- mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
- mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
- mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
- mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
- mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
- mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
- mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
- mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
- mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
- mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
- mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
- mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
- mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
- mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
- mlebench/competitions/dog-breed-identification/dogs.py +124 -0
- mlebench/competitions/dog-breed-identification/grade.py +42 -0
- mlebench/competitions/dog-breed-identification/prepare.py +55 -0
- mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
- mlebench/competitions/ethanol-concentration/grade.py +23 -0
- mlebench/competitions/ethanol-concentration/prepare.py +90 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
- mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
- mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
- mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
- mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
- mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
- mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
- mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
- mlebench/competitions/google-quest-challenge/classes.py +32 -0
- mlebench/competitions/google-quest-challenge/grade.py +45 -0
- mlebench/competitions/google-quest-challenge/prepare.py +58 -0
- mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
- mlebench/competitions/handwriting/grade.py +23 -0
- mlebench/competitions/handwriting/prepare.py +179 -0
- mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
- mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
- mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
- mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
- mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
- mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
- mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
- mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
- mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
- mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
- mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
- mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
- mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
- mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
- mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
- mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
- mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
- mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
- mlebench/competitions/ili/grade.py +60 -0
- mlebench/competitions/ili/prepare.py +99 -0
- mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
- mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
- mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
- mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
- mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
- mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
- mlebench/competitions/instant-gratification/__init__.py +0 -0
- mlebench/competitions/instant-gratification/grade.py +55 -0
- mlebench/competitions/instant-gratification/prepare.py +25 -0
- mlebench/competitions/instant_gratification/__init__.py +0 -0
- mlebench/competitions/instant_gratification/grade.py +55 -0
- mlebench/competitions/instant_gratification/prepare.py +25 -0
- mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
- mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
- mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
- mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
- mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
- mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
- mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
- mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
- mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
- mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
- mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
- mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
- mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
- mlebench/competitions/leaf-classification/classes.py +101 -0
- mlebench/competitions/leaf-classification/grade.py +44 -0
- mlebench/competitions/leaf-classification/prepare.py +60 -0
- mlebench/competitions/leaf-classification/prepare_val.py +116 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
- mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
- mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
- mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
- mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
- mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
- mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
- mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
- mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
- mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
- mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
- mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
- mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
- mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
- mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
- mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
- mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
- mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
- mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
- mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
- mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
- mlebench/competitions/my-custom-task-01/prepare.py +2 -0
- mlebench/competitions/new-my-task-01/prepare.py +2 -0
- mlebench/competitions/new-my-task-03/grade.py +107 -0
- mlebench/competitions/new-my-task-03/prepare.py +2 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
- mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
- mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
- mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
- mlebench/competitions/paddy-disease-classification/grade.py +35 -0
- mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
- mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
- mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
- mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
- mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
- mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
- mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
- mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
- mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
- mlebench/competitions/playground-series-s3e1/grade.py +52 -0
- mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
- mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
- mlebench/competitions/playground-series-s3e11/grade.py +55 -0
- mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
- mlebench/competitions/playground-series-s3e18/grade.py +39 -0
- mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
- mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
- mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
- mlebench/competitions/playground_series_s3e1/grade.py +52 -0
- mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
- mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
- mlebench/competitions/playground_series_s3e11/grade.py +55 -0
- mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
- mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
- mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
- mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
- mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
- mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
- mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
- mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
- mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
- mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
- mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
- mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
- mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
- mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
- mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
- mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
- mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
- mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
- mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
- mlebench/competitions/see-click-predict-fix/grade.py +66 -0
- mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
- mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
- mlebench/competitions/see_click_predict_fix/grade.py +66 -0
- mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
- mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
- mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
- mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
- mlebench/competitions/siim-covid19-detection/grade.py +194 -0
- mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
- mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
- mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
- mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
- mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
- mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
- mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
- mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
- mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
- mlebench/competitions/spaceship-titanic/grade.py +11 -0
- mlebench/competitions/spaceship-titanic/prepare.py +23 -0
- mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
- mlebench/competitions/spooky-author-identification/classes.py +1 -0
- mlebench/competitions/spooky-author-identification/grade.py +38 -0
- mlebench/competitions/spooky-author-identification/prepare.py +40 -0
- mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
- mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
- mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
- mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
- mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
- mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
- mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
- mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
- mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
- mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
- mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
- mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
- mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
- mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
- mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
- mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
- mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
- mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
- mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
- mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
- mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
- mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
- mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
- mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
- mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
- mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
- mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
- mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
- mlebench/competitions/utils.py +266 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
- mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
- mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
- mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
- mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
- mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
- mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
- mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
- mlebench/competitions/whale-categorization-playground/grade.py +41 -0
- mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
- mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
- mlebench/data.py +420 -0
- mlebench/grade.py +209 -0
- mlebench/grade_helpers.py +235 -0
- mlebench/metrics.py +75 -0
- mlebench/registry.py +332 -0
- mlebench/utils.py +346 -0
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from mlebench.utils import extract, read_csv
|
|
4
|
+
|
|
5
|
+
from .classes import CLASSES
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
9
|
+
# extract only what is needed
|
|
10
|
+
extract(raw / "train.csv.zip", raw)
|
|
11
|
+
extract(raw / "test.csv.zip", raw)
|
|
12
|
+
extract(raw / "test_labels.csv.zip", raw)
|
|
13
|
+
extract(raw / "sample_submission.csv.zip", raw)
|
|
14
|
+
|
|
15
|
+
# the test set is provided, so we dont have to split the train set nor form the sample submission
|
|
16
|
+
train_with_labels = read_csv(raw / "train.csv")
|
|
17
|
+
test_without_labels = read_csv(raw / "test.csv")
|
|
18
|
+
answers = read_csv(raw / "test_labels.csv")
|
|
19
|
+
sample_submission = read_csv(raw / "sample_submission.csv")
|
|
20
|
+
sample_submission[CLASSES] = 0.5
|
|
21
|
+
|
|
22
|
+
# save to public
|
|
23
|
+
train_with_labels.to_csv(public / "train.csv", index=False)
|
|
24
|
+
test_without_labels.to_csv(public / "test.csv", index=False)
|
|
25
|
+
sample_submission.to_csv(public / "sample_submission.csv", index=False)
|
|
26
|
+
|
|
27
|
+
# save to private
|
|
28
|
+
answers.to_csv(private / "test.csv", index=False)
|
|
29
|
+
|
|
30
|
+
assert len(answers) == len(
|
|
31
|
+
sample_submission
|
|
32
|
+
), "Private test set and sample submission should be of the same length"
|
|
33
|
+
|
|
34
|
+
assert sorted(answers["id"]) == sorted(
|
|
35
|
+
test_without_labels["id"]
|
|
36
|
+
), "Private and Public test IDs should match"
|
|
37
|
+
assert sorted(sample_submission["id"]) == sorted(
|
|
38
|
+
test_without_labels["id"]
|
|
39
|
+
), "Public test and sample submission IDs should match"
|
|
40
|
+
assert (
|
|
41
|
+
len(set(train_with_labels["id"]) & set(test_without_labels["id"])) == 0
|
|
42
|
+
), "Train and test IDs should not overlap"
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from sklearn.model_selection import train_test_split
|
|
4
|
+
|
|
5
|
+
from mlebench.utils import extract, read_csv
|
|
6
|
+
|
|
7
|
+
from .classes import CLASSES
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
11
|
+
# extract only what is needed
|
|
12
|
+
extract(raw / "train.csv.zip", raw)
|
|
13
|
+
extract(raw / "test.csv.zip", raw)
|
|
14
|
+
extract(raw / "test_labels.csv.zip", raw)
|
|
15
|
+
extract(raw / "sample_submission.csv.zip", raw)
|
|
16
|
+
|
|
17
|
+
# the test set is provided, so we dont have to split the train set nor form the sample submission
|
|
18
|
+
train_with_labels = read_csv(raw / "train.csv")
|
|
19
|
+
test_without_labels = read_csv(raw / "test.csv")
|
|
20
|
+
answers = read_csv(raw / "test_labels.csv")
|
|
21
|
+
sample_submission = read_csv(raw / "sample_submission.csv")
|
|
22
|
+
sample_submission[CLASSES] = 0.5
|
|
23
|
+
|
|
24
|
+
# save to public
|
|
25
|
+
train_with_labels.to_csv(public / "train.csv", index=False)
|
|
26
|
+
test_without_labels.to_csv(public / "test.csv", index=False)
|
|
27
|
+
sample_submission.to_csv(public / "sample_submission.csv", index=False)
|
|
28
|
+
|
|
29
|
+
# save to private
|
|
30
|
+
answers.to_csv(private / "test.csv", index=False)
|
|
31
|
+
|
|
32
|
+
assert len(answers) == len(
|
|
33
|
+
sample_submission
|
|
34
|
+
), "Private test set and sample submission should be of the same length"
|
|
35
|
+
|
|
36
|
+
assert sorted(answers["id"]) == sorted(
|
|
37
|
+
test_without_labels["id"]
|
|
38
|
+
), "Private and Public test IDs should match"
|
|
39
|
+
assert sorted(sample_submission["id"]) == sorted(
|
|
40
|
+
test_without_labels["id"]
|
|
41
|
+
), "Public test and sample submission IDs should match"
|
|
42
|
+
assert (
|
|
43
|
+
len(set(train_with_labels["id"]) & set(test_without_labels["id"])) == 0
|
|
44
|
+
), "Train and test IDs should not overlap"
|
|
45
|
+
|
|
46
|
+
# ==================================================================
|
|
47
|
+
# === New code for creating the validation set starts here ===
|
|
48
|
+
# The code above this line is untouched to ensure original outputs
|
|
49
|
+
# remain identical.
|
|
50
|
+
# ==================================================================
|
|
51
|
+
|
|
52
|
+
# 1. Define new paths and create the directories for the validation set.
|
|
53
|
+
public_val = public.parent / "public_val"
|
|
54
|
+
private_val = private.parent / "private_val"
|
|
55
|
+
public_val.mkdir(exist_ok=True)
|
|
56
|
+
private_val.mkdir(exist_ok=True)
|
|
57
|
+
|
|
58
|
+
# 2. Split the original training data to create a new, smaller training set
|
|
59
|
+
# and a new validation set. The size of the validation set will be the same
|
|
60
|
+
# as the size of the original test set.
|
|
61
|
+
validation_set_size = len(test_without_labels)
|
|
62
|
+
train_val, test_val_with_labels = train_test_split(
|
|
63
|
+
train_with_labels,
|
|
64
|
+
test_size=validation_set_size,
|
|
65
|
+
random_state=42, # Use a fixed random state for reproducibility
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# 3. Prepare the validation set files, mirroring the original test set structure.
|
|
69
|
+
# The public part (input features, without labels)
|
|
70
|
+
test_val_without_labels = test_val_with_labels[["id", "comment_text"]].copy()
|
|
71
|
+
|
|
72
|
+
# The private part (ground truth labels for the validation set)
|
|
73
|
+
answers_val = test_val_with_labels[["id"] + CLASSES].copy()
|
|
74
|
+
|
|
75
|
+
# 4. Create a sample submission file for the new validation set,
|
|
76
|
+
# mirroring the original sample submission format.
|
|
77
|
+
sample_submission_val = test_val_without_labels[["id"]].copy()
|
|
78
|
+
sample_submission_val[CLASSES] = 0.5
|
|
79
|
+
|
|
80
|
+
# 5. Save the new sets to the 'public_val' and 'private_val' directories,
|
|
81
|
+
# using the same filenames as in the original 'public' and 'private' dirs.
|
|
82
|
+
# Save to public_val
|
|
83
|
+
train_val.to_csv(public_val / "train.csv", index=False)
|
|
84
|
+
test_val_without_labels.to_csv(public_val / "test.csv", index=False)
|
|
85
|
+
sample_submission_val.to_csv(public_val / "sample_submission.csv", index=False)
|
|
86
|
+
|
|
87
|
+
# Save to private_val
|
|
88
|
+
answers_val.to_csv(private_val / "test.csv", index=False)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from typing import Tuple
|
|
2
|
+
|
|
3
|
+
from pandas import DataFrame
|
|
4
|
+
from sklearn.metrics import roc_auc_score
|
|
5
|
+
|
|
6
|
+
from mlebench.grade_helpers import InvalidSubmissionError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _filter_for_subgroup(
|
|
10
|
+
answers: DataFrame, submission: DataFrame, identity_column: str, target_value: int = 1
|
|
11
|
+
) -> Tuple[DataFrame, DataFrame]:
|
|
12
|
+
"""
|
|
13
|
+
Restricts the data to only the examples that mention the specific identity subgroup.
|
|
14
|
+
A low value in this metric means the model does a poor job of distinguishing between toxic and non-toxic comments that mention the identity.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
filtered_answers = answers[answers[identity_column] == target_value]
|
|
18
|
+
filtered_submission = submission[submission["id"].isin(filtered_answers["id"])]
|
|
19
|
+
return filtered_answers, filtered_submission
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _filter_for_bpsn(
|
|
23
|
+
answers: DataFrame, submission: DataFrame, identity_column: str, target_value: int = 1
|
|
24
|
+
) -> Tuple[DataFrame, DataFrame]:
|
|
25
|
+
"""
|
|
26
|
+
Filter for BPSN (Background Positive, Subgroup Negative) AUC
|
|
27
|
+
Restricts the data to only the non-toxic examples that mention the identity and the toxic examples that do not.
|
|
28
|
+
A low value in this metric means that the model confuses non-toxic examples that mention the identity with toxic examples that do not, likely meaning that the model predicts higher toxicity scores than it should for non-toxic examples mentioning the identity.
|
|
29
|
+
"""
|
|
30
|
+
filtered_answers = answers[
|
|
31
|
+
((answers["toxicity"] == 0) & (answers[identity_column] == target_value))
|
|
32
|
+
| ((answers["toxicity"] == 1) & (answers[identity_column] != target_value))
|
|
33
|
+
]
|
|
34
|
+
filtered_submission = submission[submission["id"].isin(filtered_answers["id"])]
|
|
35
|
+
return filtered_answers, filtered_submission
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _filter_for_bnsp(
|
|
39
|
+
answers: DataFrame, submission: DataFrame, identity_column: str, target_value: int = 1
|
|
40
|
+
) -> Tuple[DataFrame, DataFrame]:
|
|
41
|
+
"""
|
|
42
|
+
Filter for BNSP (Background Negative, Subgroup Positive) AUC
|
|
43
|
+
Restricts the data to only the toxic examples that mention the identity and the non-toxic examples that do not.
|
|
44
|
+
A low value in this metric means that the model confuses toxic examples that mention the identity with non-toxic examples that do not, likely meaning that the model predicts lower toxicity scores than it should for toxic examples mentioning the identity.
|
|
45
|
+
"""
|
|
46
|
+
filtered_answers = answers[
|
|
47
|
+
((answers["toxicity"] == 1) & (answers[identity_column] == target_value))
|
|
48
|
+
| ((answers["toxicity"] == 0) & (answers[identity_column] != target_value))
|
|
49
|
+
]
|
|
50
|
+
filtered_submission = submission[submission["id"].isin(filtered_answers["id"])]
|
|
51
|
+
return filtered_answers, filtered_submission
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _generalized_mean(scores: list, p: int, identity_columns: list[str]) -> float:
|
|
55
|
+
return (sum([s**p for s in scores]) / len(identity_columns)) ** (1 / p)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def prepare_for_metric(
|
|
59
|
+
submission: DataFrame, answers: DataFrame, identity_columns: list[str]
|
|
60
|
+
) -> Tuple[DataFrame, DataFrame]:
|
|
61
|
+
|
|
62
|
+
# Ensure that the submission DataFrame contains the necessary columns
|
|
63
|
+
if "id" not in submission.columns:
|
|
64
|
+
raise InvalidSubmissionError("Submission DataFrame must have an 'id' column.")
|
|
65
|
+
if (
|
|
66
|
+
"prediction" not in submission.columns and "toxicity" not in submission.columns
|
|
67
|
+
): # Allow for the submission to be the CSV of answers, which has a "toxicity" column instead of a "prediction" column
|
|
68
|
+
raise InvalidSubmissionError("Submission DataFrame must have a 'prediction' column.")
|
|
69
|
+
|
|
70
|
+
# Ensure that the answers DataFrame contains the necessary columns
|
|
71
|
+
assert "id" in answers.columns, "Answers DataFrame must have an 'id' column."
|
|
72
|
+
assert "toxicity" in answers.columns, "Answers DataFrame must have a 'toxicity' column."
|
|
73
|
+
|
|
74
|
+
submission = submission.sort_values("id")
|
|
75
|
+
answers = answers.sort_values("id")
|
|
76
|
+
|
|
77
|
+
if (submission["id"].values != answers["id"].values).any():
|
|
78
|
+
raise InvalidSubmissionError("IDs in the submission and answers DataFrames do not match.")
|
|
79
|
+
|
|
80
|
+
# Ensure that all identity columns are present in the answers DataFrame
|
|
81
|
+
for identity_column in identity_columns:
|
|
82
|
+
assert (
|
|
83
|
+
identity_column in answers.columns
|
|
84
|
+
), f"Answers DataFrame must have an '{identity_column}' column."
|
|
85
|
+
|
|
86
|
+
submission.rename(columns={"prediction": "toxicity"}, inplace=True)
|
|
87
|
+
|
|
88
|
+
# Convert continuous toxicity scores to binary toxicity labels
|
|
89
|
+
submission["toxicity"] = submission["toxicity"].apply(lambda x: 1 if x >= 0.5 else 0)
|
|
90
|
+
answers["toxicity"] = answers["toxicity"].apply(lambda x: 1 if x >= 0.5 else 0)
|
|
91
|
+
|
|
92
|
+
# Convert continuous identity columns to binary identity columns
|
|
93
|
+
for identity_column in identity_columns:
|
|
94
|
+
answers[identity_column] = answers[identity_column].apply(lambda x: 1 if x >= 0.5 else 0)
|
|
95
|
+
|
|
96
|
+
return submission, answers
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def grade(
|
|
100
|
+
submission: DataFrame,
|
|
101
|
+
answers: DataFrame,
|
|
102
|
+
identity_columns: list[str] = [
|
|
103
|
+
"male",
|
|
104
|
+
"female",
|
|
105
|
+
"homosexual_gay_or_lesbian",
|
|
106
|
+
"christian",
|
|
107
|
+
"jewish",
|
|
108
|
+
"muslim",
|
|
109
|
+
"black",
|
|
110
|
+
"white",
|
|
111
|
+
"psychiatric_or_mental_illness",
|
|
112
|
+
],
|
|
113
|
+
p: int = -5,
|
|
114
|
+
w: float = 0.25,
|
|
115
|
+
) -> float:
|
|
116
|
+
"""
|
|
117
|
+
Computes the custom metric for Jigsaw Unintended Bias in Toxicity Classification.
|
|
118
|
+
See https://www.kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification/overview/evaluation for details on this metric.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
submission, answers = prepare_for_metric(submission, answers, identity_columns)
|
|
122
|
+
|
|
123
|
+
overall_auc = roc_auc_score(answers["toxicity"], submission["toxicity"])
|
|
124
|
+
|
|
125
|
+
# To measure unintended bias, we again calculate the ROC-AUC, this time on three specific subsets of the test set for each identity, each capturing a different aspect of unintended bias.
|
|
126
|
+
bias_aucs = []
|
|
127
|
+
for identity_column in identity_columns:
|
|
128
|
+
subgroup_answers, subgroup_submission = _filter_for_subgroup(
|
|
129
|
+
answers, submission, identity_column
|
|
130
|
+
)
|
|
131
|
+
bpsn_answers, bpsn_submission = _filter_for_bpsn(answers, submission, identity_column)
|
|
132
|
+
bnsp_answers, bnsp_submission = _filter_for_bnsp(answers, submission, identity_column)
|
|
133
|
+
|
|
134
|
+
subgroup_aucs = roc_auc_score(subgroup_answers["toxicity"], subgroup_submission["toxicity"])
|
|
135
|
+
bpsn_aucs = roc_auc_score(bpsn_answers["toxicity"], bpsn_submission["toxicity"])
|
|
136
|
+
bnsp_aucs = roc_auc_score(bnsp_answers["toxicity"], bnsp_submission["toxicity"])
|
|
137
|
+
|
|
138
|
+
bias_aucs.append([subgroup_aucs, bpsn_aucs, bnsp_aucs])
|
|
139
|
+
|
|
140
|
+
# Calculate generalized mean for each identity column
|
|
141
|
+
generalized_means = [
|
|
142
|
+
_generalized_mean([bias_auc[i] for bias_auc in bias_aucs], p, identity_columns)
|
|
143
|
+
for i in range(3) # there are 3 bias metrics
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
# Combine the overall AUC with the generalized mean of the Bias AUCs
|
|
147
|
+
weighted_overall_auc = w * overall_auc
|
|
148
|
+
weighted_generalized_means = sum(
|
|
149
|
+
[w * generalized_means[i] for i in range(len(generalized_means))]
|
|
150
|
+
)
|
|
151
|
+
final_score = weighted_overall_auc + weighted_generalized_means
|
|
152
|
+
|
|
153
|
+
return final_score
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from mlebench.utils import read_csv
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
8
|
+
# Create test and test without labels
|
|
9
|
+
test = read_csv(raw / "test_private_expanded.csv")
|
|
10
|
+
test_without_labels = test[["id", "comment_text"]]
|
|
11
|
+
|
|
12
|
+
# Copy over files
|
|
13
|
+
shutil.copy(raw / "test_private_expanded.csv", private / "test.csv")
|
|
14
|
+
shutil.copy(raw / "train.csv", public / "train.csv")
|
|
15
|
+
test_without_labels.to_csv(public / "test.csv", index=False)
|
|
16
|
+
|
|
17
|
+
# Create a sample submission file
|
|
18
|
+
submission_df = test_without_labels.copy()
|
|
19
|
+
submission_df = submission_df.drop(columns=["comment_text"])
|
|
20
|
+
submission_df["prediction"] = 0.0
|
|
21
|
+
submission_df.to_csv(public / "sample_submission.csv", index=False)
|
|
22
|
+
|
|
23
|
+
# Checks
|
|
24
|
+
assert test.shape == (
|
|
25
|
+
len(test),
|
|
26
|
+
45,
|
|
27
|
+
), "test.csv should have 45 columns as per raw data"
|
|
28
|
+
assert test_without_labels.shape == (
|
|
29
|
+
len(test_without_labels),
|
|
30
|
+
2,
|
|
31
|
+
), "test.csv should have 2 columns: id, comment_text"
|
|
32
|
+
|
|
33
|
+
assert submission_df.shape == (
|
|
34
|
+
len(test),
|
|
35
|
+
2,
|
|
36
|
+
), "sample_submission.csv should have 2 columns: id, prediction"
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
# New import required for the data splitting
|
|
5
|
+
from sklearn.model_selection import train_test_split
|
|
6
|
+
|
|
7
|
+
from mlebench.utils import read_csv
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
11
|
+
"""
|
|
12
|
+
Prepares the data for the competition.
|
|
13
|
+
|
|
14
|
+
This script performs two main operations:
|
|
15
|
+
1. Processes the original raw data into `public` and `private` directories
|
|
16
|
+
for the official test set, ensuring the output is identical to the
|
|
17
|
+
original script's output.
|
|
18
|
+
2. Creates a new validation split from the original training data. It splits
|
|
19
|
+
the original `train.csv` into a new, smaller training set (`train_val`)
|
|
20
|
+
and a validation set (`test_val`). The results are saved in parallel
|
|
21
|
+
`public_val` and `private_val` directories with a mirrored file structure.
|
|
22
|
+
"""
|
|
23
|
+
# --- Part 1: Original Data Preparation (UNCHANGED OUTPUT) ---
|
|
24
|
+
# This block ensures the 'public' and 'private' directories are populated
|
|
25
|
+
# exactly as they were in the original script.
|
|
26
|
+
|
|
27
|
+
# Ensure original output directories exist
|
|
28
|
+
public.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
private.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
# Create test and test without labels
|
|
32
|
+
test = read_csv(raw / "test_private_expanded.csv")
|
|
33
|
+
test_without_labels = test[["id", "comment_text"]]
|
|
34
|
+
|
|
35
|
+
# Copy over files
|
|
36
|
+
shutil.copy(raw / "test_private_expanded.csv", private / "test.csv")
|
|
37
|
+
shutil.copy(raw / "train.csv", public / "train.csv")
|
|
38
|
+
test_without_labels.to_csv(public / "test.csv", index=False)
|
|
39
|
+
|
|
40
|
+
# Create a sample submission file
|
|
41
|
+
submission_df = test_without_labels.copy()
|
|
42
|
+
submission_df = submission_df.drop(columns=["comment_text"])
|
|
43
|
+
submission_df["prediction"] = 0.0
|
|
44
|
+
submission_df.to_csv(public / "sample_submission.csv", index=False)
|
|
45
|
+
|
|
46
|
+
# Checks
|
|
47
|
+
assert test.shape == (
|
|
48
|
+
len(test),
|
|
49
|
+
45,
|
|
50
|
+
), "test.csv should have 45 columns as per raw data"
|
|
51
|
+
assert test_without_labels.shape == (
|
|
52
|
+
len(test_without_labels),
|
|
53
|
+
2,
|
|
54
|
+
), "test.csv should have 2 columns: id, comment_text"
|
|
55
|
+
|
|
56
|
+
assert submission_df.shape == (
|
|
57
|
+
len(test),
|
|
58
|
+
2,
|
|
59
|
+
), "sample_submission.csv should have 2 columns: id, prediction"
|
|
60
|
+
|
|
61
|
+
# --- Part 2: New Validation Set Creation ---
|
|
62
|
+
# This block creates a new split from the original training data to form
|
|
63
|
+
# a new, smaller training set and a validation set. Outputs are saved
|
|
64
|
+
# to 'public_val' and 'private_val' directories.
|
|
65
|
+
|
|
66
|
+
# Define and create the new parallel directories for the validation split
|
|
67
|
+
public_val = public.parent / "public_val"
|
|
68
|
+
private_val = private.parent / "private_val"
|
|
69
|
+
public_val.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
private_val.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
|
|
72
|
+
# Load the full original training data, which will be split
|
|
73
|
+
full_train_df = read_csv(raw / "train.csv")
|
|
74
|
+
|
|
75
|
+
# The size of the new validation set ('test_val') should be the same
|
|
76
|
+
# as the size of the original test set to replicate the split ratio.
|
|
77
|
+
test_set_size = len(test)
|
|
78
|
+
|
|
79
|
+
# Split the full training data into a new training and validation set.
|
|
80
|
+
# A fixed random_state is used to ensure the split is deterministic.
|
|
81
|
+
train_val, test_val = train_test_split(
|
|
82
|
+
full_train_df, test_size=test_set_size, random_state=42
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Create the public version of the validation set (without labels)
|
|
86
|
+
test_val_without_labels = test_val[["id", "comment_text"]]
|
|
87
|
+
|
|
88
|
+
# Save the new split data, mirroring the original file structure and names.
|
|
89
|
+
# private_val contains the ground truth for the new validation set
|
|
90
|
+
test_val.to_csv(private_val / "test.csv", index=False)
|
|
91
|
+
|
|
92
|
+
# public_val contains the new smaller training set and the unlabeled validation set
|
|
93
|
+
train_val.to_csv(public_val / "train.csv", index=False)
|
|
94
|
+
test_val_without_labels.to_csv(public_val / "test.csv", index=False)
|
|
95
|
+
|
|
96
|
+
# Create a sample submission file for the new validation set
|
|
97
|
+
submission_df_val = test_val_without_labels.copy()
|
|
98
|
+
submission_df_val = submission_df_val.drop(columns=["comment_text"])
|
|
99
|
+
submission_df_val["prediction"] = 0.0
|
|
100
|
+
submission_df_val.to_csv(public_val / "sample_submission.csv", index=False)
|
|
101
|
+
|
|
102
|
+
# Add checks for the new validation split dataframes
|
|
103
|
+
assert test_val.shape == (
|
|
104
|
+
len(test_val),
|
|
105
|
+
full_train_df.shape[1],
|
|
106
|
+
), f"private_val/test.csv should have {full_train_df.shape[1]} columns"
|
|
107
|
+
assert test_val_without_labels.shape == (
|
|
108
|
+
len(test_val_without_labels),
|
|
109
|
+
2,
|
|
110
|
+
), "public_val/test.csv should have 2 columns: id, comment_text"
|
|
111
|
+
assert submission_df_val.shape == (
|
|
112
|
+
len(test_val),
|
|
113
|
+
2,
|
|
114
|
+
), "public_val/sample_submission.csv should have 2 columns: id, prediction"
|
|
115
|
+
assert len(train_val) + len(test_val) == len(
|
|
116
|
+
full_train_df
|
|
117
|
+
), "Split should conserve all data from original train set"
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from mlebench.grade_helpers import InvalidSubmissionError
|
|
4
|
+
|
|
5
|
+
from .kuzushiji_metric import kuzushiji_f1
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def prepare_for_metric(
|
|
9
|
+
submission: pd.DataFrame, answers: pd.DataFrame
|
|
10
|
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
11
|
+
"""
|
|
12
|
+
No changes needed, since kuzushiji_metric.kuzushiji_f1 expects the submission and answers
|
|
13
|
+
exactly as they are provided. In this function we just check for invalid submissions.
|
|
14
|
+
"""
|
|
15
|
+
if len(submission) != len(answers):
|
|
16
|
+
raise InvalidSubmissionError(
|
|
17
|
+
f"Submission has {len(submission)} rows, but expected {len(answers)} rows."
|
|
18
|
+
)
|
|
19
|
+
if not {"image_id", "labels"}.issubset(submission.columns):
|
|
20
|
+
raise InvalidSubmissionError("Submission must have columns 'image_id' and 'labels'.")
|
|
21
|
+
|
|
22
|
+
submission = submission.sort_values("image_id").reset_index(drop=True)
|
|
23
|
+
answers = answers.sort_values("image_id").reset_index(drop=True)
|
|
24
|
+
|
|
25
|
+
if not all(submission["image_id"] == answers["image_id"]):
|
|
26
|
+
raise InvalidSubmissionError("Submission image_id codes don't match solution.")
|
|
27
|
+
|
|
28
|
+
# Check that labels have the correct format
|
|
29
|
+
for pred in submission["labels"]:
|
|
30
|
+
# Skip empty predictions
|
|
31
|
+
if pd.isna(pred):
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
# Pred is a string of the form "label1 X1 Y1 label2 X2 Y2 ..."
|
|
35
|
+
# where label must be a str and X, Y are floats
|
|
36
|
+
preds = pred.split(" ")
|
|
37
|
+
if len(preds) % 3 != 0:
|
|
38
|
+
raise InvalidSubmissionError(f"Malformed prediction string: {pred}")
|
|
39
|
+
# Check that the X and Y values can be converted to floats
|
|
40
|
+
if not all(isinstance(float(preds[i]), float) for i in range(1, len(preds), 3)) or not all(
|
|
41
|
+
isinstance(float(preds[i]), float) for i in range(2, len(preds), 3)
|
|
42
|
+
):
|
|
43
|
+
raise InvalidSubmissionError(f"X and Y values must be floats: {pred}")
|
|
44
|
+
|
|
45
|
+
return submission, answers
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
|
|
49
|
+
"""
|
|
50
|
+
Submissions are evaluated on a modified version of the F1 Score. To score a true positive,
|
|
51
|
+
you must provide center point coordinates that are within the ground truth bounding box and
|
|
52
|
+
a matching label. The ground truth bounding boxes are defined in the format {label X Y Width Height},
|
|
53
|
+
so if the ground truth label is U+003F 1 1 10 10 then a prediction of U+003F 3 3 would pass.
|
|
54
|
+
|
|
55
|
+
See https://www.kaggle.com/competitions/kuzushiji-recognition/overview for more details.
|
|
56
|
+
"""
|
|
57
|
+
submission, answers = prepare_for_metric(submission, answers)
|
|
58
|
+
return float(kuzushiji_f1(sub=submission, solution=answers))
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# ADAPTED FROM: https://gist.github.com/SohierDane/a90ef46d79808fe3afc70c80bae45972
|
|
2
|
+
"""
|
|
3
|
+
Python equivalent of the Kuzushiji competition metric (https://www.kaggle.com/c/kuzushiji-recognition/)
|
|
4
|
+
Kaggle's backend uses a C# implementation of the same metric. This version is
|
|
5
|
+
provided for convenience only; in the event of any discrepancies the C# implementation
|
|
6
|
+
is the master version.
|
|
7
|
+
|
|
8
|
+
Tested on Python 3.6 with numpy 1.16.4 and pandas 0.24.2.
|
|
9
|
+
|
|
10
|
+
Update 2024/06/05: Also tested on Python 3.12 with numpy 1.26.4 and pandas 2.2.2.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
import multiprocessing
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def score_page(preds, truth):
|
|
21
|
+
"""
|
|
22
|
+
Scores a single page.
|
|
23
|
+
Args:
|
|
24
|
+
preds: prediction string of labels and center points.
|
|
25
|
+
truth: ground truth string of labels and bounding boxes.
|
|
26
|
+
Returns:
|
|
27
|
+
True/false positive and false negative counts for the page
|
|
28
|
+
"""
|
|
29
|
+
tp = 0
|
|
30
|
+
fp = 0
|
|
31
|
+
fn = 0
|
|
32
|
+
|
|
33
|
+
truth_indices = {"label": 0, "X": 1, "Y": 2, "Width": 3, "Height": 4}
|
|
34
|
+
preds_indices = {"label": 0, "X": 1, "Y": 2}
|
|
35
|
+
|
|
36
|
+
if pd.isna(truth) and pd.isna(preds):
|
|
37
|
+
return {"tp": tp, "fp": fp, "fn": fn}
|
|
38
|
+
|
|
39
|
+
if pd.isna(truth):
|
|
40
|
+
fp += len(preds.split(" ")) // len(preds_indices)
|
|
41
|
+
return {"tp": tp, "fp": fp, "fn": fn}
|
|
42
|
+
|
|
43
|
+
if pd.isna(preds):
|
|
44
|
+
fn += len(truth.split(" ")) // len(truth_indices)
|
|
45
|
+
return {"tp": tp, "fp": fp, "fn": fn}
|
|
46
|
+
|
|
47
|
+
truth = truth.split(" ")
|
|
48
|
+
if len(truth) % len(truth_indices) != 0:
|
|
49
|
+
raise ValueError("Malformed solution string")
|
|
50
|
+
truth_label = np.array(truth[truth_indices["label"] :: len(truth_indices)])
|
|
51
|
+
truth_xmin = np.array(truth[truth_indices["X"] :: len(truth_indices)]).astype(float)
|
|
52
|
+
truth_ymin = np.array(truth[truth_indices["Y"] :: len(truth_indices)]).astype(float)
|
|
53
|
+
truth_xmax = truth_xmin + np.array(truth[truth_indices["Width"] :: len(truth_indices)]).astype(
|
|
54
|
+
float
|
|
55
|
+
)
|
|
56
|
+
truth_ymax = truth_ymin + np.array(truth[truth_indices["Height"] :: len(truth_indices)]).astype(
|
|
57
|
+
float
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
preds = preds.split(" ")
|
|
61
|
+
if len(preds) % len(preds_indices) != 0:
|
|
62
|
+
raise ValueError("Malformed prediction string")
|
|
63
|
+
preds_label = np.array(preds[preds_indices["label"] :: len(preds_indices)])
|
|
64
|
+
preds_x = np.array(preds[preds_indices["X"] :: len(preds_indices)]).astype(float)
|
|
65
|
+
preds_y = np.array(preds[preds_indices["Y"] :: len(preds_indices)]).astype(float)
|
|
66
|
+
preds_unused = np.ones(len(preds_label)).astype(bool)
|
|
67
|
+
|
|
68
|
+
for xmin, xmax, ymin, ymax, label in zip(
|
|
69
|
+
truth_xmin, truth_xmax, truth_ymin, truth_ymax, truth_label
|
|
70
|
+
):
|
|
71
|
+
# Matching = point inside box & character same & prediction not already used
|
|
72
|
+
matching = (
|
|
73
|
+
(xmin < preds_x)
|
|
74
|
+
& (xmax > preds_x)
|
|
75
|
+
& (ymin < preds_y)
|
|
76
|
+
& (ymax > preds_y)
|
|
77
|
+
& (preds_label == label)
|
|
78
|
+
& preds_unused
|
|
79
|
+
)
|
|
80
|
+
if matching.sum() == 0:
|
|
81
|
+
fn += 1
|
|
82
|
+
else:
|
|
83
|
+
tp += 1
|
|
84
|
+
preds_unused[np.argmax(matching)] = False
|
|
85
|
+
fp += preds_unused.sum()
|
|
86
|
+
return {"tp": tp, "fp": fp, "fn": fn}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def kuzushiji_f1(sub, solution):
|
|
90
|
+
"""
|
|
91
|
+
Calculates the competition metric.
|
|
92
|
+
Args:
|
|
93
|
+
sub: submissions, as a Pandas dataframe
|
|
94
|
+
solution: solution, as a Pandas dataframe
|
|
95
|
+
Returns:
|
|
96
|
+
f1 score
|
|
97
|
+
"""
|
|
98
|
+
if not all(sub["image_id"].values == solution["image_id"].values):
|
|
99
|
+
raise ValueError("Submission image id codes don't match solution")
|
|
100
|
+
|
|
101
|
+
pool = multiprocessing.Pool()
|
|
102
|
+
results = pool.starmap(score_page, zip(sub["labels"].values, solution["labels"].values))
|
|
103
|
+
pool.close()
|
|
104
|
+
pool.join()
|
|
105
|
+
|
|
106
|
+
tp = sum([x["tp"] for x in results])
|
|
107
|
+
fp = sum([x["fp"] for x in results])
|
|
108
|
+
fn = sum([x["fn"] for x in results])
|
|
109
|
+
|
|
110
|
+
if (tp + fp) == 0 or (tp + fn) == 0:
|
|
111
|
+
return 0
|
|
112
|
+
precision = tp / (tp + fp)
|
|
113
|
+
recall = tp / (tp + fn)
|
|
114
|
+
if precision > 0 and recall > 0:
|
|
115
|
+
f1 = (2 * precision * recall) / (precision + recall)
|
|
116
|
+
else:
|
|
117
|
+
f1 = 0
|
|
118
|
+
return f1
|