dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dslighting/__init__.py +1 -1
- dslighting/core/agent.py +78 -62
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/METADATA +1 -1
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/RECORD +352 -7
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/top_level.txt +1 -0
- mlebench/README.md +39 -0
- mlebench/__init__.py +0 -0
- mlebench/cli.py +221 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py +161 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/mAP_evaluation.py +425 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare.py +483 -0
- mlebench/competitions/3d-object-detection-for-autonomous-vehicles/prepare_val.py +719 -0
- mlebench/competitions/AI4Code/grade.py +70 -0
- mlebench/competitions/AI4Code/prepare.py +84 -0
- mlebench/competitions/AI4Code/prepare_val.py +159 -0
- mlebench/competitions/__init__.py +0 -0
- mlebench/competitions/aerial-cactus-identification/grade.py +11 -0
- mlebench/competitions/aerial-cactus-identification/prepare.py +71 -0
- mlebench/competitions/aerial-cactus-identification/prepare_val.py +133 -0
- mlebench/competitions/alaska2-image-steganalysis/grade.py +136 -0
- mlebench/competitions/alaska2-image-steganalysis/prepare.py +88 -0
- mlebench/competitions/alaska2-image-steganalysis/prepare_val.py +148 -0
- mlebench/competitions/aptos2019-blindness-detection/grade.py +35 -0
- mlebench/competitions/aptos2019-blindness-detection/prepare.py +75 -0
- mlebench/competitions/aptos2019-blindness-detection/prepare_val.py +123 -0
- mlebench/competitions/bike-sharing-demand/__init__.py +0 -0
- mlebench/competitions/bike-sharing-demand/grade.py +55 -0
- mlebench/competitions/bike-sharing-demand/prepare.py +37 -0
- mlebench/competitions/billion-word-imputation/grade.py +37 -0
- mlebench/competitions/billion-word-imputation/prepare.py +107 -0
- mlebench/competitions/billion-word-imputation/prepare_val.py +179 -0
- mlebench/competitions/bms-molecular-translation/grade.py +40 -0
- mlebench/competitions/bms-molecular-translation/prepare.py +68 -0
- mlebench/competitions/bms-molecular-translation/prepare_val.py +131 -0
- mlebench/competitions/cassava-leaf-disease-classification/grade.py +12 -0
- mlebench/competitions/cassava-leaf-disease-classification/prepare.py +113 -0
- mlebench/competitions/cassava-leaf-disease-classification/prepare_val.py +186 -0
- mlebench/competitions/cdiscount-image-classification-challenge/grade.py +11 -0
- mlebench/competitions/cdiscount-image-classification-challenge/prepare.py +144 -0
- mlebench/competitions/cdiscount-image-classification-challenge/prepare_val.py +205 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/grade.py +67 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare.py +31 -0
- mlebench/competitions/chaii-hindi-and-tamil-question-answering/prepare_val.py +94 -0
- mlebench/competitions/champs-scalar-coupling/grade.py +60 -0
- mlebench/competitions/champs-scalar-coupling/prepare.py +116 -0
- mlebench/competitions/champs-scalar-coupling/prepare_val.py +155 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/__init__.py +0 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/grade.py +40 -0
- mlebench/competitions/conways-reverse-game-of-life-2020/prepare.py +41 -0
- mlebench/competitions/demand-forecasting-kernels-only/__init__.py +0 -0
- mlebench/competitions/demand-forecasting-kernels-only/grade.py +66 -0
- mlebench/competitions/demand-forecasting-kernels-only/prepare.py +27 -0
- mlebench/competitions/demand_forecasting_kernels_only/__init__.py +0 -0
- mlebench/competitions/demand_forecasting_kernels_only/grade.py +66 -0
- mlebench/competitions/demand_forecasting_kernels_only/prepare.py +27 -0
- mlebench/competitions/denoising-dirty-documents/grade.py +44 -0
- mlebench/competitions/denoising-dirty-documents/prepare.py +134 -0
- mlebench/competitions/denoising-dirty-documents/prepare_val.py +178 -0
- mlebench/competitions/detecting-insults-in-social-commentary/grade.py +11 -0
- mlebench/competitions/detecting-insults-in-social-commentary/prepare.py +72 -0
- mlebench/competitions/detecting-insults-in-social-commentary/prepare_val.py +128 -0
- mlebench/competitions/dog-breed-identification/dogs.py +124 -0
- mlebench/competitions/dog-breed-identification/grade.py +42 -0
- mlebench/competitions/dog-breed-identification/prepare.py +55 -0
- mlebench/competitions/dog-breed-identification/prepare_val.py +104 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/grade.py +43 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare.py +70 -0
- mlebench/competitions/dogs-vs-cats-redux-kernels-edition/prepare_val.py +143 -0
- mlebench/competitions/ethanol-concentration/grade.py +23 -0
- mlebench/competitions/ethanol-concentration/prepare.py +90 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/grade.py +60 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare.py +41 -0
- mlebench/competitions/facebook-recruiting-iii-keyword-extraction/prepare_val.py +92 -0
- mlebench/competitions/feedback-prize-english-language-learning/__init__.py +0 -0
- mlebench/competitions/feedback-prize-english-language-learning/grade.py +60 -0
- mlebench/competitions/feedback-prize-english-language-learning/prepare.py +39 -0
- mlebench/competitions/freesound-audio-tagging-2019/grade.py +64 -0
- mlebench/competitions/freesound-audio-tagging-2019/prepare.py +94 -0
- mlebench/competitions/freesound-audio-tagging-2019/prepare_val.py +175 -0
- mlebench/competitions/freesound-audio-tagging-2019/vocabulary.py +83 -0
- mlebench/competitions/google-quest-challenge/classes.py +32 -0
- mlebench/competitions/google-quest-challenge/grade.py +45 -0
- mlebench/competitions/google-quest-challenge/prepare.py +58 -0
- mlebench/competitions/google-quest-challenge/prepare_val.py +120 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/grade.py +77 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare.py +155 -0
- mlebench/competitions/google-research-identify-contrails-reduce-global-warming/prepare_val.py +211 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/grade.py +42 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare.py +102 -0
- mlebench/competitions/h-and-m-personalized-fashion-recommendations/prepare_val.py +132 -0
- mlebench/competitions/handwriting/grade.py +23 -0
- mlebench/competitions/handwriting/prepare.py +179 -0
- mlebench/competitions/herbarium-2020-fgvc7/grade.py +34 -0
- mlebench/competitions/herbarium-2020-fgvc7/prepare.py +251 -0
- mlebench/competitions/herbarium-2020-fgvc7/prepare_val.py +242 -0
- mlebench/competitions/herbarium-2021-fgvc8/grade.py +34 -0
- mlebench/competitions/herbarium-2021-fgvc8/prepare.py +251 -0
- mlebench/competitions/herbarium-2021-fgvc8/prepare_val.py +222 -0
- mlebench/competitions/herbarium-2022-fgvc9/grade.py +31 -0
- mlebench/competitions/herbarium-2022-fgvc9/prepare.py +233 -0
- mlebench/competitions/herbarium-2022-fgvc9/prepare_val.py +213 -0
- mlebench/competitions/histopathologic-cancer-detection/grade.py +12 -0
- mlebench/competitions/histopathologic-cancer-detection/prepare.py +59 -0
- mlebench/competitions/histopathologic-cancer-detection/prepare_val.py +131 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/constants.py +9 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/grade.py +43 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/kaggle_metric_utilities.py +96 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/kullback_leibler_divergence.py +118 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/prepare.py +121 -0
- mlebench/competitions/hms-harmful-brain-activity-classification/prepare_val.py +190 -0
- mlebench/competitions/hotel-id-2021-fgvc8/grade.py +41 -0
- mlebench/competitions/hotel-id-2021-fgvc8/prepare.py +63 -0
- mlebench/competitions/hotel-id-2021-fgvc8/prepare_val.py +132 -0
- mlebench/competitions/hubmap-kidney-segmentation/grade.py +62 -0
- mlebench/competitions/hubmap-kidney-segmentation/prepare.py +108 -0
- mlebench/competitions/hubmap-kidney-segmentation/prepare_val.py +153 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py +111 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare.py +127 -0
- mlebench/competitions/icecube-neutrinos-in-deep-ice/prepare_val.py +183 -0
- mlebench/competitions/ili/grade.py +60 -0
- mlebench/competitions/ili/prepare.py +99 -0
- mlebench/competitions/imet-2020-fgvc7/grade.py +54 -0
- mlebench/competitions/imet-2020-fgvc7/prepare.py +77 -0
- mlebench/competitions/imet-2020-fgvc7/prepare_val.py +157 -0
- mlebench/competitions/inaturalist-2019-fgvc6/grade.py +35 -0
- mlebench/competitions/inaturalist-2019-fgvc6/prepare.py +259 -0
- mlebench/competitions/inaturalist-2019-fgvc6/prepare_val.py +304 -0
- mlebench/competitions/instant-gratification/__init__.py +0 -0
- mlebench/competitions/instant-gratification/grade.py +55 -0
- mlebench/competitions/instant-gratification/prepare.py +25 -0
- mlebench/competitions/instant_gratification/__init__.py +0 -0
- mlebench/competitions/instant_gratification/grade.py +55 -0
- mlebench/competitions/instant_gratification/prepare.py +25 -0
- mlebench/competitions/invasive-species-monitoring/grade.py +11 -0
- mlebench/competitions/invasive-species-monitoring/prepare.py +97 -0
- mlebench/competitions/invasive-species-monitoring/prepare_val.py +164 -0
- mlebench/competitions/iwildcam-2019-fgvc6/grade.py +44 -0
- mlebench/competitions/iwildcam-2019-fgvc6/prepare.py +118 -0
- mlebench/competitions/iwildcam-2019-fgvc6/prepare_val.py +194 -0
- mlebench/competitions/iwildcam-2020-fgvc7/grade.py +11 -0
- mlebench/competitions/iwildcam-2020-fgvc7/prepare.py +164 -0
- mlebench/competitions/iwildcam-2020-fgvc7/prepare_val.py +245 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/classes.py +1 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/grade.py +54 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare.py +42 -0
- mlebench/competitions/jigsaw-toxic-comment-classification-challenge/prepare_val.py +88 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/grade.py +153 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare.py +36 -0
- mlebench/competitions/jigsaw-unintended-bias-in-toxicity-classification/prepare_val.py +117 -0
- mlebench/competitions/kuzushiji-recognition/grade.py +58 -0
- mlebench/competitions/kuzushiji-recognition/kuzushiji_metric.py +118 -0
- mlebench/competitions/kuzushiji-recognition/prepare.py +92 -0
- mlebench/competitions/kuzushiji-recognition/prepare_val.py +149 -0
- mlebench/competitions/leaf-classification/classes.py +101 -0
- mlebench/competitions/leaf-classification/grade.py +44 -0
- mlebench/competitions/leaf-classification/prepare.py +60 -0
- mlebench/competitions/leaf-classification/prepare_val.py +116 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/grade.py +44 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare.py +51 -0
- mlebench/competitions/learning-agency-lab-automated-essay-scoring-2/prepare_val.py +96 -0
- mlebench/competitions/liverpool-ion-switching/__init__.py +0 -0
- mlebench/competitions/liverpool-ion-switching/grade.py +52 -0
- mlebench/competitions/liverpool-ion-switching/prepare.py +27 -0
- mlebench/competitions/liverpool_ion_switching/__init__.py +0 -0
- mlebench/competitions/liverpool_ion_switching/grade.py +52 -0
- mlebench/competitions/liverpool_ion_switching/prepare.py +27 -0
- mlebench/competitions/lmsys-chatbot-arena/grade.py +63 -0
- mlebench/competitions/lmsys-chatbot-arena/prepare.py +52 -0
- mlebench/competitions/lmsys-chatbot-arena/prepare_val.py +115 -0
- mlebench/competitions/mcm_2024_c_test/grade.py +107 -0
- mlebench/competitions/mcm_2024_c_test/prepare.py +2 -0
- mlebench/competitions/ml2021spring-hw2/grade.py +11 -0
- mlebench/competitions/ml2021spring-hw2/prepare.py +58 -0
- mlebench/competitions/ml2021spring-hw2/prepare_val.py +135 -0
- mlebench/competitions/mlsp-2013-birds/grade.py +11 -0
- mlebench/competitions/mlsp-2013-birds/prepare.py +182 -0
- mlebench/competitions/mlsp-2013-birds/prepare_val.py +241 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/grade.py +11 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare.py +58 -0
- mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py +120 -0
- mlebench/competitions/multi-modal-gesture-recognition/grade.py +58 -0
- mlebench/competitions/multi-modal-gesture-recognition/prepare.py +85 -0
- mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py +139 -0
- mlebench/competitions/my-custom-task-01/prepare.py +2 -0
- mlebench/competitions/new-my-task-01/prepare.py +2 -0
- mlebench/competitions/new-my-task-03/grade.py +107 -0
- mlebench/competitions/new-my-task-03/prepare.py +2 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py +28 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py +44 -0
- mlebench/competitions/new-york-city-taxi-fare-prediction/prepare_val.py +89 -0
- mlebench/competitions/nfl-player-contact-detection/grade.py +36 -0
- mlebench/competitions/nfl-player-contact-detection/prepare.py +101 -0
- mlebench/competitions/nfl-player-contact-detection/prepare_val.py +186 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/grade.py +47 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/prepare.py +77 -0
- mlebench/competitions/nomad2018-predict-transparent-conductors/prepare_val.py +144 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/grade.py +74 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare.py +95 -0
- mlebench/competitions/osic-pulmonary-fibrosis-progression/prepare_val.py +167 -0
- mlebench/competitions/paddy-disease-classification/grade.py +35 -0
- mlebench/competitions/paddy-disease-classification/prepare.py +69 -0
- mlebench/competitions/paddy-disease-classification/prepare_val.py +122 -0
- mlebench/competitions/petfinder-pawpularity-score/grade.py +41 -0
- mlebench/competitions/petfinder-pawpularity-score/prepare.py +76 -0
- mlebench/competitions/petfinder-pawpularity-score/prepare_val.py +154 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/grade.py +41 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/prepare.py +74 -0
- mlebench/competitions/plant-pathology-2020-fgvc7/prepare_val.py +160 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/grade.py +54 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/prepare.py +65 -0
- mlebench/competitions/plant-pathology-2021-fgvc8/prepare_val.py +130 -0
- mlebench/competitions/plant-seedlings-classification/grade.py +39 -0
- mlebench/competitions/plant-seedlings-classification/prepare.py +91 -0
- mlebench/competitions/plant-seedlings-classification/prepare_val.py +158 -0
- mlebench/competitions/playground-series-s3e1/__init__.py +0 -0
- mlebench/competitions/playground-series-s3e1/grade.py +52 -0
- mlebench/competitions/playground-series-s3e1/prepare.py +25 -0
- mlebench/competitions/playground-series-s3e11/__init__.py +0 -0
- mlebench/competitions/playground-series-s3e11/grade.py +55 -0
- mlebench/competitions/playground-series-s3e11/prepare.py +25 -0
- mlebench/competitions/playground-series-s3e18/grade.py +39 -0
- mlebench/competitions/playground-series-s3e18/prepare.py +36 -0
- mlebench/competitions/playground-series-s3e18/prepare_val.py +89 -0
- mlebench/competitions/playground_series_s3e1/__init__.py +0 -0
- mlebench/competitions/playground_series_s3e1/grade.py +52 -0
- mlebench/competitions/playground_series_s3e1/prepare.py +25 -0
- mlebench/competitions/playground_series_s3e11/__init__.py +0 -0
- mlebench/competitions/playground_series_s3e11/grade.py +55 -0
- mlebench/competitions/playground_series_s3e11/prepare.py +25 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py +44 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare.py +68 -0
- mlebench/competitions/predict-volcanic-eruptions-ingv-oe/prepare_val.py +146 -0
- mlebench/competitions/random-acts-of-pizza/grade.py +14 -0
- mlebench/competitions/random-acts-of-pizza/prepare.py +80 -0
- mlebench/competitions/random-acts-of-pizza/prepare_val.py +144 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/classes.py +11 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/grade.py +31 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare.py +53 -0
- mlebench/competitions/ranzcr-clip-catheter-line-classification/prepare_val.py +113 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/grade.py +124 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare.py +219 -0
- mlebench/competitions/rsna-2022-cervical-spine-fracture-detection/prepare_val.py +257 -0
- mlebench/competitions/rsna-breast-cancer-detection/grade.py +65 -0
- mlebench/competitions/rsna-breast-cancer-detection/prepare.py +141 -0
- mlebench/competitions/rsna-breast-cancer-detection/prepare_val.py +201 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/grade.py +13 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare.py +47 -0
- mlebench/competitions/rsna-miccai-brain-tumor-radiogenomic-classification/prepare_val.py +97 -0
- mlebench/competitions/santander-customer-satisfaction/grade.py +10 -0
- mlebench/competitions/santander-customer-satisfaction/prepare.py +41 -0
- mlebench/competitions/sciencebench-001-clintox-nn/__init__.py +0 -0
- mlebench/competitions/sciencebench-001-clintox-nn/grade.py +56 -0
- mlebench/competitions/sciencebench-001-clintox-nn/prepare.py +75 -0
- mlebench/competitions/sciencebench-015-aai/grade.py +37 -0
- mlebench/competitions/sciencebench-015-aai/prepare.py +102 -0
- mlebench/competitions/sciencebench-051-brain-blood-qsar/grade.py +58 -0
- mlebench/competitions/sciencebench-051-brain-blood-qsar/prepare.py +69 -0
- mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/grade.py +55 -0
- mlebench/competitions/sciencebench-101-experimental-band-gap-prediction/prepare.py +88 -0
- mlebench/competitions/see-click-predict-fix/__init__.py +0 -0
- mlebench/competitions/see-click-predict-fix/grade.py +66 -0
- mlebench/competitions/see-click-predict-fix/prepare.py +25 -0
- mlebench/competitions/see_click_predict_fix/__init__.py +0 -0
- mlebench/competitions/see_click_predict_fix/grade.py +66 -0
- mlebench/competitions/see_click_predict_fix/prepare.py +25 -0
- mlebench/competitions/seti-breakthrough-listen/grade.py +11 -0
- mlebench/competitions/seti-breakthrough-listen/prepare.py +71 -0
- mlebench/competitions/seti-breakthrough-listen/prepare_val.py +159 -0
- mlebench/competitions/siim-covid19-detection/grade.py +194 -0
- mlebench/competitions/siim-covid19-detection/prepare.py +123 -0
- mlebench/competitions/siim-covid19-detection/prepare_val.py +164 -0
- mlebench/competitions/siim-isic-melanoma-classification/grade.py +11 -0
- mlebench/competitions/siim-isic-melanoma-classification/prepare.py +127 -0
- mlebench/competitions/siim-isic-melanoma-classification/prepare_val.py +158 -0
- mlebench/competitions/smartphone-decimeter-2022/grade.py +55 -0
- mlebench/competitions/smartphone-decimeter-2022/notebook.py +86 -0
- mlebench/competitions/smartphone-decimeter-2022/prepare.py +143 -0
- mlebench/competitions/smartphone-decimeter-2022/prepare_val.py +199 -0
- mlebench/competitions/spaceship-titanic/grade.py +11 -0
- mlebench/competitions/spaceship-titanic/prepare.py +23 -0
- mlebench/competitions/spaceship-titanic/prepare_val.py +61 -0
- mlebench/competitions/spooky-author-identification/classes.py +1 -0
- mlebench/competitions/spooky-author-identification/grade.py +38 -0
- mlebench/competitions/spooky-author-identification/prepare.py +40 -0
- mlebench/competitions/spooky-author-identification/prepare_val.py +78 -0
- mlebench/competitions/stanford-covid-vaccine/grade.py +65 -0
- mlebench/competitions/stanford-covid-vaccine/prepare.py +129 -0
- mlebench/competitions/stanford-covid-vaccine/prepare_val.py +199 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/grade.py +41 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/prepare.py +105 -0
- mlebench/competitions/statoil-iceberg-classifier-challenge/prepare_val.py +157 -0
- mlebench/competitions/tabular-playground-series-dec-2021/grade.py +11 -0
- mlebench/competitions/tabular-playground-series-dec-2021/prepare.py +39 -0
- mlebench/competitions/tabular-playground-series-dec-2021/prepare_val.py +99 -0
- mlebench/competitions/tabular-playground-series-may-2022/grade.py +9 -0
- mlebench/competitions/tabular-playground-series-may-2022/prepare.py +56 -0
- mlebench/competitions/tabular-playground-series-may-2022/prepare_val.py +116 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/grade.py +11 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/prepare.py +90 -0
- mlebench/competitions/tensorflow-speech-recognition-challenge/prepare_val.py +148 -0
- mlebench/competitions/tensorflow2-question-answering/grade.py +122 -0
- mlebench/competitions/tensorflow2-question-answering/prepare.py +122 -0
- mlebench/competitions/tensorflow2-question-answering/prepare_val.py +187 -0
- mlebench/competitions/text-normalization-challenge-english-language/grade.py +49 -0
- mlebench/competitions/text-normalization-challenge-english-language/prepare.py +115 -0
- mlebench/competitions/text-normalization-challenge-english-language/prepare_val.py +213 -0
- mlebench/competitions/text-normalization-challenge-russian-language/grade.py +49 -0
- mlebench/competitions/text-normalization-challenge-russian-language/prepare.py +113 -0
- mlebench/competitions/text-normalization-challenge-russian-language/prepare_val.py +165 -0
- mlebench/competitions/tgs-salt-identification-challenge/grade.py +144 -0
- mlebench/competitions/tgs-salt-identification-challenge/prepare.py +158 -0
- mlebench/competitions/tgs-salt-identification-challenge/prepare_val.py +166 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/grade.py +11 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare.py +95 -0
- mlebench/competitions/the-icml-2013-whale-challenge-right-whale-redux/prepare_val.py +141 -0
- mlebench/competitions/tmdb-box-office-prediction/__init__.py +0 -0
- mlebench/competitions/tmdb-box-office-prediction/grade.py +55 -0
- mlebench/competitions/tmdb-box-office-prediction/prepare.py +35 -0
- mlebench/competitions/tweet-sentiment-extraction/grade.py +67 -0
- mlebench/competitions/tweet-sentiment-extraction/prepare.py +36 -0
- mlebench/competitions/tweet-sentiment-extraction/prepare_val.py +106 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/grade.py +31 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare.py +33 -0
- mlebench/competitions/us-patent-phrase-to-phrase-matching/prepare_val.py +71 -0
- mlebench/competitions/utils.py +266 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/grade.py +158 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare.py +139 -0
- mlebench/competitions/uw-madison-gi-tract-image-segmentation/prepare_val.py +193 -0
- mlebench/competitions/ventilator-pressure-prediction/__init__.py +0 -0
- mlebench/competitions/ventilator-pressure-prediction/grade.py +52 -0
- mlebench/competitions/ventilator-pressure-prediction/prepare.py +27 -0
- mlebench/competitions/ventilator-pressure-prediction/prepare_val.py +142 -0
- mlebench/competitions/ventilator_pressure_prediction/__init__.py +0 -0
- mlebench/competitions/ventilator_pressure_prediction/grade.py +52 -0
- mlebench/competitions/ventilator_pressure_prediction/prepare.py +27 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/grade.py +97 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/prepare.py +122 -0
- mlebench/competitions/vesuvius-challenge-ink-detection/prepare_val.py +170 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/grade.py +220 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare.py +129 -0
- mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.py +204 -0
- mlebench/competitions/whale-categorization-playground/grade.py +41 -0
- mlebench/competitions/whale-categorization-playground/prepare.py +103 -0
- mlebench/competitions/whale-categorization-playground/prepare_val.py +196 -0
- mlebench/data.py +420 -0
- mlebench/grade.py +209 -0
- mlebench/grade_helpers.py +235 -0
- mlebench/metrics.py +75 -0
- mlebench/registry.py +332 -0
- mlebench/utils.py +346 -0
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/WHEEL +0 -0
- {dslighting-1.7.1.dist-info → dslighting-1.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from tqdm.auto import tqdm
|
|
6
|
+
|
|
7
|
+
from mlebench.competitions.utils import get_ids_from_tf_records
|
|
8
|
+
from mlebench.utils import get_logger
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
14
|
+
# split on the TFRecord files.
|
|
15
|
+
# There's 33126 train samples; 16 TFRecord files with 2071 samples each
|
|
16
|
+
# so we take 2 arbitrary TF record files as our test set (4142 samples is ~ 10% of data)
|
|
17
|
+
DEV = False
|
|
18
|
+
if DEV:
|
|
19
|
+
cutoff_index = 10000
|
|
20
|
+
else:
|
|
21
|
+
cutoff_index = None
|
|
22
|
+
|
|
23
|
+
old_train = pd.read_csv(raw / "train.csv")[:cutoff_index]
|
|
24
|
+
|
|
25
|
+
test_tf_records = {"train00-2071.tfrec", "train06-2071.tfrec"}
|
|
26
|
+
# parse the IDs from the test tf records
|
|
27
|
+
test_ids = []
|
|
28
|
+
for tfrec in tqdm(test_tf_records, desc="Splitting test ids from train TFRecords"):
|
|
29
|
+
test_ids.extend(get_ids_from_tf_records(raw / "tfrecords" / tfrec))
|
|
30
|
+
|
|
31
|
+
old_train["split"] = "train"
|
|
32
|
+
old_train.loc[old_train["image_name"].isin(test_ids), "split"] = "test"
|
|
33
|
+
|
|
34
|
+
new_train = old_train[old_train["split"] == "train"].drop(columns=["split"])
|
|
35
|
+
new_test = old_train[old_train["split"] == "test"].drop(columns=["split"])
|
|
36
|
+
new_test_without_labels = new_test.copy()[
|
|
37
|
+
["image_name", "patient_id", "sex", "age_approx", "anatom_site_general_challenge"]
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
# match format of sample submission
|
|
41
|
+
new_test = new_test[["image_name", "target"]]
|
|
42
|
+
|
|
43
|
+
# sample submission
|
|
44
|
+
sample_submission = new_test.copy()
|
|
45
|
+
sample_submission["target"] = 0
|
|
46
|
+
|
|
47
|
+
# save the CSVs
|
|
48
|
+
new_train.to_csv(public / "train.csv", index=False)
|
|
49
|
+
new_test_without_labels.to_csv(public / "test.csv", index=False)
|
|
50
|
+
sample_submission.to_csv(public / "sample_submission.csv", index=False)
|
|
51
|
+
new_test.to_csv(private / "test.csv", index=False)
|
|
52
|
+
|
|
53
|
+
# split raw train files to into appropriate prepared/public/test/train directories
|
|
54
|
+
# the files themselves do not contain target metadata so we are free to move them around
|
|
55
|
+
# DICOMs and JPEGs
|
|
56
|
+
(public / "train").mkdir(parents=True, exist_ok=True)
|
|
57
|
+
(public / "jpeg" / "train").mkdir(parents=True, exist_ok=True)
|
|
58
|
+
for image_name in tqdm(new_train["image_name"], desc="Train Images", total=len(new_train)):
|
|
59
|
+
dcm_file = raw / "train" / f"{image_name}.dcm"
|
|
60
|
+
jpg_file = raw / "jpeg" / "train" / f"{image_name}.jpg"
|
|
61
|
+
shutil.copy(dcm_file, public / "train" / f"{image_name}.dcm")
|
|
62
|
+
shutil.copy(jpg_file, public / "jpeg" / "train" / f"{image_name}.jpg")
|
|
63
|
+
(public / "test").mkdir(parents=True, exist_ok=True)
|
|
64
|
+
(public / "jpeg" / "test").mkdir(parents=True, exist_ok=True)
|
|
65
|
+
for image_name in tqdm(new_test["image_name"], desc="Test Images", total=len(new_test)):
|
|
66
|
+
dcm_file = raw / "train" / f"{image_name}.dcm"
|
|
67
|
+
jpg_file = raw / "jpeg" / "train" / f"{image_name}.jpg"
|
|
68
|
+
shutil.copy(dcm_file, public / "test" / f"{image_name}.dcm")
|
|
69
|
+
shutil.copy(jpg_file, public / "jpeg" / "test" / f"{image_name}.jpg")
|
|
70
|
+
|
|
71
|
+
# TFRecords
|
|
72
|
+
train_count = 0
|
|
73
|
+
test_count = 0
|
|
74
|
+
tfrecords_dest_path = public / "tfrecords"
|
|
75
|
+
tfrecords_dest_path.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
for file in tqdm(
|
|
77
|
+
sorted((raw / "tfrecords").glob("train*.tfrec")), desc="Copying TFRecord files"
|
|
78
|
+
):
|
|
79
|
+
record_count = file.stem.split("-")[1] # i.e. get 2071 from train00-2071
|
|
80
|
+
if file.name in test_tf_records:
|
|
81
|
+
shutil.copy(file, tfrecords_dest_path / f"test{test_count:02d}-{record_count}.tfrec")
|
|
82
|
+
test_count += 1
|
|
83
|
+
else:
|
|
84
|
+
shutil.copy(file, tfrecords_dest_path / f"train{train_count:02d}-{record_count}.tfrec")
|
|
85
|
+
train_count += 1
|
|
86
|
+
|
|
87
|
+
logger.info("Running asserts...")
|
|
88
|
+
assert len(list(public.glob("train/*.dcm"))) == len(new_train), "Train DICOM count mismatch"
|
|
89
|
+
assert len(list(public.glob("test/*.dcm"))) == len(new_test), "Test DICOM count mismatch"
|
|
90
|
+
assert len(list(public.glob("jpeg/train/*.jpg"))) == len(new_train), "Train JPEG count mismatch"
|
|
91
|
+
assert len(list(public.glob("jpeg/test/*.jpg"))) == len(new_test), "Test JPEG count mismatch"
|
|
92
|
+
|
|
93
|
+
assert not set(new_train["image_name"]).intersection(
|
|
94
|
+
new_test["image_name"]
|
|
95
|
+
), "Train/Test overlap"
|
|
96
|
+
|
|
97
|
+
assert len(sample_submission) == len(new_test), "Sample submission length mismatch"
|
|
98
|
+
assert (
|
|
99
|
+
sample_submission["image_name"]
|
|
100
|
+
.sort_values()
|
|
101
|
+
.reset_index(drop=True)
|
|
102
|
+
.equals(new_test["image_name"].sort_values().reset_index(drop=True))
|
|
103
|
+
), "Sample submission IDs mismatch"
|
|
104
|
+
|
|
105
|
+
# check columns
|
|
106
|
+
assert new_test_without_labels.columns.tolist() == [
|
|
107
|
+
"image_name",
|
|
108
|
+
"patient_id",
|
|
109
|
+
"sex",
|
|
110
|
+
"age_approx",
|
|
111
|
+
"anatom_site_general_challenge",
|
|
112
|
+
], "new_test_without_labels columns mismatch"
|
|
113
|
+
assert new_train.columns.tolist() == [
|
|
114
|
+
"image_name",
|
|
115
|
+
"patient_id",
|
|
116
|
+
"sex",
|
|
117
|
+
"age_approx",
|
|
118
|
+
"anatom_site_general_challenge",
|
|
119
|
+
"diagnosis",
|
|
120
|
+
"benign_malignant",
|
|
121
|
+
"target",
|
|
122
|
+
], "new_train columns mismatch"
|
|
123
|
+
assert new_test.columns.tolist() == ["image_name", "target"], "new_test columns mismatch"
|
|
124
|
+
assert sample_submission.columns.tolist() == [
|
|
125
|
+
"image_name",
|
|
126
|
+
"target",
|
|
127
|
+
], "sample_submission columns mismatch"
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Set, List
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from tqdm.auto import tqdm
|
|
7
|
+
|
|
8
|
+
from mlebench.competitions.utils import get_ids_from_tf_records
|
|
9
|
+
from mlebench.utils import get_logger
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _get_ids_from_tfrec_set(raw_path: Path, tfrec_set: Set[str]) -> List[str]:
|
|
15
|
+
"""Helper function to parse image IDs from a given set of TFRecord files."""
|
|
16
|
+
ids = []
|
|
17
|
+
for tfrec_name in tqdm(tfrec_set, desc="Splitting IDs from TFRecords"):
|
|
18
|
+
ids.extend(get_ids_from_tf_records(raw_path / "tfrecords" / tfrec_name))
|
|
19
|
+
return ids
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _create_dataset_files(
|
|
23
|
+
train_df: pd.DataFrame,
|
|
24
|
+
test_df: pd.DataFrame,
|
|
25
|
+
source_tfrec_files: List[Path],
|
|
26
|
+
test_tfrec_set: Set[str],
|
|
27
|
+
raw_path: Path,
|
|
28
|
+
public_path: Path,
|
|
29
|
+
private_path: Path,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Core function to generate all files for a given train/test split.
|
|
33
|
+
This includes CSVs, image files (JPG, DCM), and TFRecords.
|
|
34
|
+
"""
|
|
35
|
+
# Create destination directories
|
|
36
|
+
public_path.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
private_path.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
|
|
39
|
+
# Prepare dataframes for saving
|
|
40
|
+
test_df_without_labels = test_df.copy()[
|
|
41
|
+
["image_name", "patient_id", "sex", "age_approx", "anatom_site_general_challenge"]
|
|
42
|
+
]
|
|
43
|
+
test_df_with_labels = test_df[["image_name", "target"]]
|
|
44
|
+
sample_submission = test_df_with_labels.copy()
|
|
45
|
+
sample_submission["target"] = 0
|
|
46
|
+
|
|
47
|
+
# Save the CSVs
|
|
48
|
+
train_df.to_csv(public_path / "train.csv", index=False)
|
|
49
|
+
test_df_without_labels.to_csv(public_path / "test.csv", index=False)
|
|
50
|
+
sample_submission.to_csv(public_path / "sample_submission.csv", index=False)
|
|
51
|
+
test_df_with_labels.to_csv(private_path / "test.csv", index=False)
|
|
52
|
+
|
|
53
|
+
# Copy image files (DICOMs and JPEGs)
|
|
54
|
+
(public_path / "train").mkdir(parents=True, exist_ok=True)
|
|
55
|
+
(public_path / "jpeg" / "train").mkdir(parents=True, exist_ok=True)
|
|
56
|
+
for image_name in tqdm(train_df["image_name"], desc=f"Train Images -> {public_path.name}", total=len(train_df)):
|
|
57
|
+
shutil.copy(raw_path / "train" / f"{image_name}.dcm", public_path / "train" / f"{image_name}.dcm")
|
|
58
|
+
shutil.copy(raw_path / "jpeg" / "train" / f"{image_name}.jpg", public_path / "jpeg" / "train" / f"{image_name}.jpg")
|
|
59
|
+
|
|
60
|
+
(public_path / "test").mkdir(parents=True, exist_ok=True)
|
|
61
|
+
(public_path / "jpeg" / "test").mkdir(parents=True, exist_ok=True)
|
|
62
|
+
for image_name in tqdm(test_df["image_name"], desc=f"Test Images -> {public_path.name}", total=len(test_df)):
|
|
63
|
+
shutil.copy(raw_path / "train" / f"{image_name}.dcm", public_path / "test" / f"{image_name}.dcm")
|
|
64
|
+
shutil.copy(raw_path / "jpeg" / "train" / f"{image_name}.jpg", public_path / "jpeg" / "test" / f"{image_name}.jpg")
|
|
65
|
+
|
|
66
|
+
# Copy and rename TFRecords
|
|
67
|
+
train_count = 0
|
|
68
|
+
test_count = 0
|
|
69
|
+
tfrecords_dest_path = public_path / "tfrecords"
|
|
70
|
+
tfrecords_dest_path.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
for file in tqdm(source_tfrec_files, desc=f"Copying TFRecords -> {public_path.name}"):
|
|
72
|
+
record_count = file.stem.split("-")[1]
|
|
73
|
+
if file.name in test_tfrec_set:
|
|
74
|
+
shutil.copy(file, tfrecords_dest_path / f"test{test_count:02d}-{record_count}.tfrec")
|
|
75
|
+
test_count += 1
|
|
76
|
+
else:
|
|
77
|
+
shutil.copy(file, tfrecords_dest_path / f"train{train_count:02d}-{record_count}.tfrec")
|
|
78
|
+
train_count += 1
|
|
79
|
+
|
|
80
|
+
# Assertions to ensure data integrity
|
|
81
|
+
logger.info(f"Running asserts for {public_path.name} split...")
|
|
82
|
+
assert len(list(public_path.glob("train/*.dcm"))) == len(train_df), "Train DICOM count mismatch"
|
|
83
|
+
assert len(list(public_path.glob("test/*.dcm"))) == len(test_df), "Test DICOM count mismatch"
|
|
84
|
+
assert len(list(public_path.glob("jpeg/train/*.jpg"))) == len(train_df), "Train JPEG count mismatch"
|
|
85
|
+
assert len(list(public_path.glob("jpeg/test/*.jpg"))) == len(test_df), "Test JPEG count mismatch"
|
|
86
|
+
assert not set(train_df["image_name"]).intersection(test_df["image_name"]), "Train/Test overlap"
|
|
87
|
+
assert len(sample_submission) == len(test_df), "Sample submission length mismatch"
|
|
88
|
+
assert (
|
|
89
|
+
sample_submission["image_name"].sort_values().reset_index(drop=True)
|
|
90
|
+
.equals(test_df["image_name"].sort_values().reset_index(drop=True))
|
|
91
|
+
), "Sample submission IDs mismatch"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def prepare(raw: Path, public: Path, private: Path):
|
|
95
|
+
# Common setup
|
|
96
|
+
DEV = False
|
|
97
|
+
cutoff_index = 10000 if DEV else None
|
|
98
|
+
all_data_df = pd.read_csv(raw / "train.csv")[:cutoff_index]
|
|
99
|
+
all_raw_tfrec_files = sorted((raw / "tfrecords").glob("train*.tfrec"))
|
|
100
|
+
|
|
101
|
+
# --- 1. Original Competition Split (train -> train/test) ---
|
|
102
|
+
logger.info("--- Creating original public/private split ---")
|
|
103
|
+
|
|
104
|
+
# The original split used 2 arbitrary TFRecord files as the test set
|
|
105
|
+
original_test_tfrec_set = {"train00-2071.tfrec", "train06-2071.tfrec"}
|
|
106
|
+
original_test_ids = _get_ids_from_tfrec_set(raw, original_test_tfrec_set)
|
|
107
|
+
|
|
108
|
+
# Split the main dataframe
|
|
109
|
+
all_data_df["split"] = "train"
|
|
110
|
+
all_data_df.loc[all_data_df["image_name"].isin(original_test_ids), "split"] = "test"
|
|
111
|
+
|
|
112
|
+
# These are the final dataframes for the original competition
|
|
113
|
+
final_train_df = all_data_df[all_data_df["split"] == "train"].drop(columns=["split"])
|
|
114
|
+
final_test_df = all_data_df[all_data_df["split"] == "test"].drop(columns=["split"])
|
|
115
|
+
|
|
116
|
+
# Create all files for the original split
|
|
117
|
+
_create_dataset_files(
|
|
118
|
+
train_df=final_train_df,
|
|
119
|
+
test_df=final_test_df,
|
|
120
|
+
source_tfrec_files=all_raw_tfrec_files,
|
|
121
|
+
test_tfrec_set=original_test_tfrec_set,
|
|
122
|
+
raw_path=raw,
|
|
123
|
+
public_path=public,
|
|
124
|
+
private_path=private,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# --- 2. New Validation Split (train -> train_val/test_val) ---
|
|
128
|
+
logger.info("--- Creating new public_val/private_val split ---")
|
|
129
|
+
|
|
130
|
+
# Define new directories for the validation set
|
|
131
|
+
public_val = public.parent / "public_val"
|
|
132
|
+
private_val = private.parent / "private_val"
|
|
133
|
+
|
|
134
|
+
# The source for this new split is the *training* data from the *first* split.
|
|
135
|
+
# We replicate the split logic by taking 2 TFRecord files for the validation-test set.
|
|
136
|
+
val_source_tfrec_files = [f for f in all_raw_tfrec_files if f.name not in original_test_tfrec_set]
|
|
137
|
+
val_test_tfrec_set = {"train01-2071.tfrec", "train02-2071.tfrec"}
|
|
138
|
+
val_test_ids = _get_ids_from_tfrec_set(raw, val_test_tfrec_set)
|
|
139
|
+
|
|
140
|
+
# Split the *training dataframe* from the first split to create the new sets
|
|
141
|
+
final_train_df["split"] = "train_val"
|
|
142
|
+
final_train_df.loc[final_train_df["image_name"].isin(val_test_ids), "split"] = "test_val"
|
|
143
|
+
|
|
144
|
+
train_val_df = final_train_df[final_train_df["split"] == "train_val"].drop(columns=["split"])
|
|
145
|
+
test_val_df = final_train_df[final_train_df["split"] == "test_val"].drop(columns=["split"])
|
|
146
|
+
|
|
147
|
+
# Create all files for the new validation split
|
|
148
|
+
_create_dataset_files(
|
|
149
|
+
train_df=train_val_df,
|
|
150
|
+
test_df=test_val_df,
|
|
151
|
+
source_tfrec_files=val_source_tfrec_files,
|
|
152
|
+
test_tfrec_set=val_test_tfrec_set,
|
|
153
|
+
raw_path=raw,
|
|
154
|
+
public_path=public_val,
|
|
155
|
+
private_path=private_val,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
logger.info("--- Data preparation complete for all splits. ---")
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pandas import DataFrame
|
|
3
|
+
|
|
4
|
+
from mlebench.grade_helpers import InvalidSubmissionError
|
|
5
|
+
|
|
6
|
+
from .notebook import calc_score as get_score
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def grade(submission: DataFrame, answers: DataFrame) -> float:
|
|
10
|
+
assert "tripId" in answers.columns, "Answers DataFrame must have a 'tripId' column."
|
|
11
|
+
assert (
|
|
12
|
+
"UnixTimeMillis" in answers.columns
|
|
13
|
+
), "Answers DataFrame must have a 'UnixTimeMillis' column."
|
|
14
|
+
assert (
|
|
15
|
+
"LatitudeDegrees" in answers.columns
|
|
16
|
+
), "Answers DataFrame must have a 'LatitudeDegrees' column."
|
|
17
|
+
assert (
|
|
18
|
+
"LongitudeDegrees" in answers.columns
|
|
19
|
+
), "Answers DataFrame must have a 'LongitudeDegrees' column."
|
|
20
|
+
|
|
21
|
+
if "tripId" not in submission.columns:
|
|
22
|
+
raise InvalidSubmissionError("Submission DataFrame must have a 'tripId' column.")
|
|
23
|
+
|
|
24
|
+
if "UnixTimeMillis" not in submission.columns:
|
|
25
|
+
raise InvalidSubmissionError("Submission DataFrame must have a 'UnixTimeMillis' column.")
|
|
26
|
+
|
|
27
|
+
if "LatitudeDegrees" not in submission.columns:
|
|
28
|
+
raise InvalidSubmissionError("Submission DataFrame must have a 'LatitudeDegrees' column.")
|
|
29
|
+
|
|
30
|
+
if "LongitudeDegrees" not in submission.columns:
|
|
31
|
+
raise InvalidSubmissionError("Submission DataFrame must have a 'LongitudeDegrees' column.")
|
|
32
|
+
|
|
33
|
+
if len(submission) != len(answers):
|
|
34
|
+
raise InvalidSubmissionError(
|
|
35
|
+
"Submission and answers DataFrames must have the same number of rows."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
scores = []
|
|
39
|
+
trip_ids = answers["tripId"].unique()
|
|
40
|
+
|
|
41
|
+
for trip_id in trip_ids:
|
|
42
|
+
filtered_answers = answers[answers["tripId"] == trip_id]
|
|
43
|
+
filtered_submission = submission[submission["tripId"] == trip_id]
|
|
44
|
+
|
|
45
|
+
if len(filtered_submission) != len(filtered_answers):
|
|
46
|
+
raise InvalidSubmissionError(
|
|
47
|
+
"Submission and answers DataFrames must have the same number of rows for each 'tripId'."
|
|
48
|
+
f"Got {len(filtered_submission)} rows in the submission and {len(filtered_answers)} rows "
|
|
49
|
+
f"in the answers for tripId {trip_id}."
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
score = get_score(filtered_submission, filtered_answers)
|
|
53
|
+
scores.append(score)
|
|
54
|
+
|
|
55
|
+
return np.mean(scores)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Helper functions from the Kaggle notebook `GSDC2 - baseline submission`.
|
|
3
|
+
Adapted from https://www.kaggle.com/code/saitodevel01/gsdc2-baseline-submission.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
WGS84_SEMI_MAJOR_AXIS = 6378137.0
|
|
11
|
+
WGS84_SEMI_MINOR_AXIS = 6356752.314245
|
|
12
|
+
WGS84_SQUARED_FIRST_ECCENTRICITY = 6.69437999013e-3
|
|
13
|
+
WGS84_SQUARED_SECOND_ECCENTRICITY = 6.73949674226e-3
|
|
14
|
+
|
|
15
|
+
HAVERSINE_RADIUS = 6_371_000
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ECEF:
|
|
20
|
+
x: np.array
|
|
21
|
+
y: np.array
|
|
22
|
+
z: np.array
|
|
23
|
+
|
|
24
|
+
def to_numpy(self):
|
|
25
|
+
return np.stack([self.x, self.y, self.z], axis=0)
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def from_numpy(pos):
|
|
29
|
+
x, y, z = [np.squeeze(w) for w in np.split(pos, 3, axis=-1)]
|
|
30
|
+
return ECEF(x=x, y=y, z=z)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class BLH:
|
|
35
|
+
lat: np.array
|
|
36
|
+
lng: np.array
|
|
37
|
+
hgt: np.array
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def ECEF_to_BLH(ecef):
|
|
41
|
+
a = WGS84_SEMI_MAJOR_AXIS
|
|
42
|
+
b = WGS84_SEMI_MINOR_AXIS
|
|
43
|
+
e2 = WGS84_SQUARED_FIRST_ECCENTRICITY
|
|
44
|
+
e2_ = WGS84_SQUARED_SECOND_ECCENTRICITY
|
|
45
|
+
x = ecef.x
|
|
46
|
+
y = ecef.y
|
|
47
|
+
z = ecef.z
|
|
48
|
+
r = np.sqrt(x**2 + y**2)
|
|
49
|
+
t = np.arctan2(z * (a / b), r)
|
|
50
|
+
B = np.arctan2(z + (e2_ * b) * np.sin(t) ** 3, r - (e2 * a) * np.cos(t) ** 3)
|
|
51
|
+
L = np.arctan2(y, x)
|
|
52
|
+
n = a / np.sqrt(1 - e2 * np.sin(B) ** 2)
|
|
53
|
+
H = (r / np.cos(B)) - n
|
|
54
|
+
|
|
55
|
+
return BLH(lat=B, lng=L, hgt=H)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def haversine_distance(blh_1, blh_2):
|
|
59
|
+
dlat = blh_2.lat - blh_1.lat
|
|
60
|
+
dlng = blh_2.lng - blh_1.lng
|
|
61
|
+
a = np.sin(dlat / 2) ** 2 + np.cos(blh_1.lat) * np.cos(blh_2.lat) * np.sin(dlng / 2) ** 2
|
|
62
|
+
dist = 2 * HAVERSINE_RADIUS * np.arcsin(np.sqrt(a))
|
|
63
|
+
|
|
64
|
+
return dist
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def pandas_haversine_distance(df1, df2):
|
|
68
|
+
blh1 = BLH(
|
|
69
|
+
lat=np.deg2rad(df1["LatitudeDegrees"].to_numpy()),
|
|
70
|
+
lng=np.deg2rad(df1["LongitudeDegrees"].to_numpy()),
|
|
71
|
+
hgt=0,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
blh2 = BLH(
|
|
75
|
+
lat=np.deg2rad(df2["LatitudeDegrees"].to_numpy()),
|
|
76
|
+
lng=np.deg2rad(df2["LongitudeDegrees"].to_numpy()),
|
|
77
|
+
hgt=0,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return haversine_distance(blh1, blh2)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def calc_score(pred_df, gt_df):
|
|
84
|
+
d = pandas_haversine_distance(pred_df, gt_df)
|
|
85
|
+
score = np.mean([np.quantile(d, 0.50), np.quantile(d, 0.95)])
|
|
86
|
+
return score
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from sklearn.model_selection import train_test_split
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_date(s: str) -> str:
|
|
9
|
+
"""Gets date from string in the format YYYY-MM-DD-X where `X` is an arbitrary string."""
|
|
10
|
+
|
|
11
|
+
split = s.split("-")
|
|
12
|
+
|
|
13
|
+
assert (
|
|
14
|
+
len(split) >= 3
|
|
15
|
+
), f"Expected the string to have at least 3 parts separated by `-`. Got {len(split)} parts."
|
|
16
|
+
|
|
17
|
+
year, month, day = split[:3]
|
|
18
|
+
|
|
19
|
+
assert (
|
|
20
|
+
isinstance(year, str) and year.isdigit()
|
|
21
|
+
), f"Expected the year to be a string of digits. Got {year} instead."
|
|
22
|
+
|
|
23
|
+
assert (
|
|
24
|
+
isinstance(month, str) and month.isdigit()
|
|
25
|
+
), f"Expected the month to be a string of digits. Got {month} instead."
|
|
26
|
+
|
|
27
|
+
assert (
|
|
28
|
+
isinstance(day, str) and day.isdigit()
|
|
29
|
+
), f"Expected the day to be a string of digits. Got {day} instead."
|
|
30
|
+
|
|
31
|
+
date = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
|
|
32
|
+
|
|
33
|
+
return date
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def prepare(raw: Path, public: Path, private: Path) -> None:
|
|
37
|
+
old_train_ids = sorted([folder.name for folder in (raw / "train").glob("*") if folder.is_dir()])
|
|
38
|
+
dates = sorted(set([get_date(s) for s in old_train_ids]))
|
|
39
|
+
new_train_dates, new_test_dates = train_test_split(dates, test_size=0.1, random_state=0)
|
|
40
|
+
|
|
41
|
+
assert (
|
|
42
|
+
len(new_train_dates) >= 1
|
|
43
|
+
), "Expected the new train set to have at least one date. Got 0 dates."
|
|
44
|
+
|
|
45
|
+
assert (
|
|
46
|
+
len(new_test_dates) >= 1
|
|
47
|
+
), "Expected the new test set to have at least one date. Got 0 dates."
|
|
48
|
+
|
|
49
|
+
new_train_ids = sorted([i for i in old_train_ids if get_date(i) in new_train_dates])
|
|
50
|
+
new_test_ids = sorted([i for i in old_train_ids if get_date(i) in new_test_dates])
|
|
51
|
+
|
|
52
|
+
assert len(set(new_train_ids).intersection(set(new_test_ids))) == 0, (
|
|
53
|
+
f"Expected the new train and test instances to be disjoint. Got an intersection of "
|
|
54
|
+
f"{set(new_train_ids).intersection(set(new_test_ids))}."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
assert len(new_train_ids) + len(new_test_ids) == len(old_train_ids), (
|
|
58
|
+
f"Expected the number of new train and test instances to sum up to the number of old train "
|
|
59
|
+
f"instances. Got {len(new_train_ids)} new train instances and {len(new_test_ids)} new test "
|
|
60
|
+
f"instances which sum to {len(new_train_ids) + len(new_test_ids)} instead of "
|
|
61
|
+
f"{len(old_train_ids)}."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
assert set(new_train_ids).intersection(new_test_ids) == set(), (
|
|
65
|
+
f"Expected the new train and test instances to be disjoint. Got an intersection of "
|
|
66
|
+
f"{set(new_train_ids).intersection(new_test_ids)}."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
for new_train_id in new_train_ids:
|
|
70
|
+
shutil.copytree(
|
|
71
|
+
src=raw / "train" / new_train_id,
|
|
72
|
+
dst=public / "train" / new_train_id,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
for new_test_id in new_test_ids:
|
|
76
|
+
shutil.copytree(
|
|
77
|
+
src=raw / "train" / new_test_id,
|
|
78
|
+
dst=public / "test" / new_test_id,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Construct test set by concatenating all ground truth csvs for the test journeys
|
|
82
|
+
dfs = []
|
|
83
|
+
|
|
84
|
+
for fpath in sorted((public / "test").rglob("ground_truth.csv")):
|
|
85
|
+
drive_id = fpath.parent.parent.name
|
|
86
|
+
phone_id = fpath.parent.name
|
|
87
|
+
|
|
88
|
+
assert (
|
|
89
|
+
drive_id in new_test_ids
|
|
90
|
+
), f"Expected the drive {drive_id} to be one of the new test instances. Got {drive_id} instead."
|
|
91
|
+
|
|
92
|
+
raw_df = pd.read_csv(fpath)
|
|
93
|
+
df = raw_df.copy()
|
|
94
|
+
df.loc[:, "tripId"] = f"{drive_id}-{phone_id}"
|
|
95
|
+
df = df[["tripId", "UnixTimeMillis", "LatitudeDegrees", "LongitudeDegrees"]]
|
|
96
|
+
dfs.append(df)
|
|
97
|
+
|
|
98
|
+
new_test = pd.concat(dfs, ignore_index=True)
|
|
99
|
+
new_test.to_csv(private / "test.csv", index=False)
|
|
100
|
+
|
|
101
|
+
for fpath in (public / "test").rglob("ground_truth.csv"):
|
|
102
|
+
fpath.unlink() # don't include ground truth in public test data
|
|
103
|
+
|
|
104
|
+
shutil.copytree(
|
|
105
|
+
src=raw / "metadata",
|
|
106
|
+
dst=public / "metadata",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
actual_journey_ids = set(["-".join(s.split("-")[:-1]) for s in new_test["tripId"]])
|
|
110
|
+
|
|
111
|
+
assert len(actual_journey_ids) == len(new_test_ids), (
|
|
112
|
+
f"Expected the new test instances to have {len(new_test_ids)} unique trip IDs. Got "
|
|
113
|
+
f"{len(new_test['tripId'].unique())} unique trip IDs."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
sample_submission = new_test.copy()
|
|
117
|
+
sample_submission.loc[:, "LatitudeDegrees"] = 37.904611315634504
|
|
118
|
+
sample_submission.loc[:, "LongitudeDegrees"] = -86.48107806249548
|
|
119
|
+
|
|
120
|
+
assert len(sample_submission) == len(new_test), (
|
|
121
|
+
f"Expected the sample submission to have the same number of instances as the new test "
|
|
122
|
+
f"instances. Got {len(sample_submission)} instances in the sample submission and "
|
|
123
|
+
f"{len(new_test)} new test instances."
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
sample_submission.to_csv(public / "sample_submission.csv", index=False)
|
|
127
|
+
|
|
128
|
+
assert sorted(list(public.glob("train/*"))) == sorted(
|
|
129
|
+
set([public / "train" / drive_id for drive_id in new_train_ids])
|
|
130
|
+
), "Expected the public train directory to contain the new train instances."
|
|
131
|
+
|
|
132
|
+
assert sorted(list(public.glob("test/*"))) == sorted(
|
|
133
|
+
set([public / "test" / drive_id for drive_id in new_test_ids])
|
|
134
|
+
), "Expected the public test directory to contain the new test instances."
|
|
135
|
+
|
|
136
|
+
assert (
|
|
137
|
+
len(list((public / "test").rglob("ground_truth.csv"))) == 0
|
|
138
|
+
), "Expected the public test directory to not contain any ground truth files."
|
|
139
|
+
|
|
140
|
+
assert len(list((public / "train").rglob("ground_truth.csv"))) >= len(new_train_ids), (
|
|
141
|
+
"Expected the public train directory to contain at least one ground truth file per new "
|
|
142
|
+
"train instance."
|
|
143
|
+
)
|