julearn 0.3.4.dev19__tar.gz → 0.3.4.dev34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/PKG-INFO +1 -1
- julearn-0.3.4.dev34/docs/changes/newsfragments/274.enh +1 -0
- julearn-0.3.4.dev34/docs/changes/newsfragments/293.enh +1 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/configuration.rst +7 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/02_inspection/plot_preprocess.py +1 -1
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/02_inspection/run_binary_inspect_folds.py +0 -1
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/04_confounds/run_return_confounds.py +1 -1
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_model_inspection_docs.py +1 -1
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/_version.py +2 -2
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/api.py +25 -13
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/config.py +1 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/_pipeline.py +4 -1
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/tests/test_pipeline.py +24 -2
- julearn-0.3.4.dev34/julearn/model_selection/final_model_cv.py +96 -0
- julearn-0.3.4.dev34/julearn/model_selection/tests/test_final_model_cv.py +53 -0
- julearn-0.3.4.dev34/julearn/model_selection/utils.py +55 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/pipeline_creator.py +39 -5
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/tests/test_pipeline_creator.py +50 -3
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/tests/test_api.py +7 -5
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/ju_column_transformer.py +4 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/_cv.py +7 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/typing.py +8 -1
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn.egg-info/PKG-INFO +1 -1
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn.egg-info/SOURCES.txt +5 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/ISSUE_TEMPLATE/documentation_request.yaml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/workflows/check-stale.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/workflows/ci-docs.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/workflows/ci.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/workflows/docs-preview.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/workflows/docs.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/workflows/lint.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.github/workflows/pypi.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.gitignore +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/.pre-commit-config.yaml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/AUTHORS.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/LICENSE.md +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/README.md +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/codecov.yml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/Makefile +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/_static/css/custom.css +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/_static/js/custom.js +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/_templates/class.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/_templates/function.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/_templates/function_warning.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/_templates/versions.html +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/base.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/index.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/inspect.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/main.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/model_selection.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/models.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/pipeline.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/prepare.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/scoring.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/stats.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/transformers.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/utils.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/api/viz.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/available_pipeline_steps.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/changes/contributors.inc +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/changes/newsfragments/.gitignore +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/changes/newsfragments/268.bugfix +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/changes/newsfragments/270.enh +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/changes/newsfragments/271.enh +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/conf.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/contributing.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/examples.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/faq.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/getting_started.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/corrected_ttest.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/final_estimator.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/iris_X.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/iris_df.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/iris_y.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/julearn_logo.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/julearn_logo_calm.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/julearn_logo_confbias.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/julearn_logo_cv.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/julearn_logo_generalization.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/julearn_logo_it.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/julearn_logo_ml.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/julearn_logo_mlit.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/multiple_scorers_run_cv.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/plot_scores.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/scores_run_cv.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/scores_run_cv_splitter.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/images/scores_run_cv_train.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/index.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/links.inc +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/maintaining.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/redirect.html +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/selected_deeper_topics/CBPM.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/selected_deeper_topics/confound_removal.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/selected_deeper_topics/cross_validation_splitter.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/selected_deeper_topics/hyperparameter_tuning.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/selected_deeper_topics/index.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/selected_deeper_topics/model_inspect.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/selected_deeper_topics/stacked_models.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/selected_deeper_topics/target_transformers.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/sphinxext/gh_substitutions.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/what_really_need_know/cross_validation.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/what_really_need_know/data.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/what_really_need_know/index.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/what_really_need_know/model_comparison.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/what_really_need_know/model_evaluation.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/what_really_need_know/pipeline.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/docs/whats_new.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/00_starting/README.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/00_starting/plot_cm_acc_multiclass.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/00_starting/plot_example_regression.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/00_starting/plot_stratified_kfold_reg.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/00_starting/run_combine_pandas.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/00_starting/run_grouped_cv.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/00_starting/run_simple_binary_classification.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/01_model_comparison/README.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/01_model_comparison/plot_simple_model_comparison.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/02_inspection/README.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/02_inspection/plot_groupcv_inspect_svm.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/02_inspection/plot_inspect_random_forest.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/03_complex_models/README.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/03_complex_models/run_apply_to_target.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/03_complex_models/run_example_pca_featsets.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/03_complex_models/run_hyperparameter_multiple_grids.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/03_complex_models/run_hyperparameter_tuning.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/03_complex_models/run_hyperparameter_tuning_bayessearch.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/03_complex_models/run_stacked_models.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/04_confounds/README.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/04_confounds/plot_confound_removal_classification.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/05_customization/README.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/05_customization/run_custom_scorers_regression.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/README.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_cbpm_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_confound_removal_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_cv_splitters_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_data_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_hyperparameters_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_model_comparison_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_model_evaluation_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_pipeline_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_stacked_models_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/99_docs/run_target_transformer_docs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/README.rst +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/XX_disabled/dis_run_n_jobs.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/XX_disabled/dis_run_target_confound_removal.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/ignore_words.txt +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/base/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/base/column_types.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/base/estimators.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/base/tests/test_base_estimators.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/base/tests/test_column_types.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/conftest.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/external/optuna_searchcv.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/_cv.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/_preprocess.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/inspector.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/tests/test_cv.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/tests/test_inspector.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/inspect/tests/test_preprocess.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/_optuna_searcher.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/_skopt_searcher.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/available_searchers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/continuous_stratified_kfold.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/stratified_bootstrap.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/tests/test_available_searchers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/tests/test_continous_stratified_kfold.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/tests/test_optuna_searcher.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/tests/test_skopt_searcher.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/model_selection/tests/test_stratified_bootstrap.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/models/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/models/available_models.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/models/dynamic.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/models/tests/test_available_models.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/models/tests/test_dynamic.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/models/tests/test_models.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/merger.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/target_pipeline.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/target_pipeline_creator.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/tests/test_merger.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/tests/test_target_pipeline.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/pipeline/tests/test_target_pipeline_creator.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/prepare.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/scoring/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/scoring/available_scorers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/scoring/metrics.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/scoring/tests/test_available_scorers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/scoring/tests/test_metrics.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/stats/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/stats/corrected_ttest.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/stats/tests/test_corrected_ttest.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/tests/test_config.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/tests/test_prepare.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/available_transformers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/cbpm.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/confound_remover.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/change_column_types.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/drop_columns.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/filter_columns.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/set_column_types.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/tests/test_change_column_types.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/tests/test_drop_columns.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/tests/test_filter_columns.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/dataframe/tests/test_set_column_types.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/available_target_transformers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/ju_target_transformer.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/ju_transformed_target_model.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/target_confound_remover.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/tests/test_available_target_transformers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/tests/test_ju_target_transformer.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/tests/test_ju_transformed_target_model.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/target/tests/test_target_confound_remover.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/tests/test_available_transformers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/tests/test_cbpm.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/tests/test_confounds.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/transformers/tests/test_jucolumntransformers.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/checks.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/logging.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/testing.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/tests/test_logging.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/tests/test_version.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/utils/versions.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/viz/__init__.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/viz/_scores.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn/viz/res/julearn_logo_generalization.png +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn.egg-info/dependency_links.txt +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn.egg-info/requires.txt +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/julearn.egg-info/top_level.txt +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/pyproject.toml +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/setup.cfg +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/setup.py +0 -0
- {julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: julearn
|
|
3
|
-
Version: 0.3.4.
|
|
3
|
+
Version: 0.3.4.dev34
|
|
4
4
|
Summary: Juelich Machine Learning Library
|
|
5
5
|
Author-email: Fede Raimondo <f.raimondo@fz-juelich.de>, Sami Hamdan <s.hamdan@fz-juelich.de>
|
|
6
6
|
Maintainer-email: Sami Hamdan <s.hamdan@fz-juelich.de>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Optimise wrapping of steps and models in the pipeline only when a subset of features is being used, by `Fede Raimondo`_
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Change the internal logic of :func:`.run_cross_validation` to optimise joblib calls by `Fede Raimondo`_
|
|
@@ -47,3 +47,10 @@ Here you can find the comprehensive list of flags that can be set:
|
|
|
47
47
|
- | Disable printing the list of expanded column names in ``X_types``.
|
|
48
48
|
| If set to ``True``, the list of types of X will not be printed.
|
|
49
49
|
- The user will not see the expanded ``X_types`` column names.
|
|
50
|
+
* - ``enable_parallel_column_transformers``
|
|
51
|
+
- | This flag enables parallel execution of column transformers by
|
|
52
|
+
| reverting to the default behaviour of scikit-learn
|
|
53
|
+
| (instead of using ``n_jobs=1``)
|
|
54
|
+
| If set to ``True``, the parameter will be set back to None.
|
|
55
|
+
- | Column transformers will be applied in parallel, using more resources.
|
|
56
|
+
| than expected.
|
|
@@ -121,7 +121,7 @@ X_after_zscore = preprocess(model, X=X, data=df, until="zscore")
|
|
|
121
121
|
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
|
|
122
122
|
sns.scatterplot(x=X[0], y=X[1], data=df, ax=axes[0])
|
|
123
123
|
axes[0].set_title("Raw features")
|
|
124
|
-
sns.scatterplot(x="
|
|
124
|
+
sns.scatterplot(x="pca0", y="pca1", data=X_after_pca, ax=axes[1])
|
|
125
125
|
axes[1].set_title("PCA components")
|
|
126
126
|
|
|
127
127
|
###############################################################################
|
{julearn-0.3.4.dev19 → julearn-0.3.4.dev34}/examples/02_inspection/run_binary_inspect_folds.py
RENAMED
|
@@ -44,7 +44,6 @@ creator = PipelineCreator(problem_type="classification")
|
|
|
44
44
|
creator.add("zscore")
|
|
45
45
|
creator.add("svm")
|
|
46
46
|
|
|
47
|
-
cv = ShuffleSplit(n_splits=5, train_size=0.7, random_state=200)
|
|
48
47
|
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=4, random_state=200)
|
|
49
48
|
|
|
50
49
|
scores, model, inspector = run_cross_validation(
|
|
@@ -140,7 +140,7 @@ c_values = []
|
|
|
140
140
|
for fold_inspector in inspector.folds:
|
|
141
141
|
fold_model = fold_inspector.model
|
|
142
142
|
c_values.append(
|
|
143
|
-
fold_model.get_fitted_params()["
|
|
143
|
+
fold_model.get_fitted_params()["svm__C"]
|
|
144
144
|
)
|
|
145
145
|
|
|
146
146
|
##############################################################################
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '0.3.4.
|
|
16
|
-
__version_tuple__ = version_tuple = (0, 3, 4, '
|
|
15
|
+
__version__ = version = '0.3.4.dev34'
|
|
16
|
+
__version_tuple__ = version_tuple = (0, 3, 4, 'dev34')
|
|
@@ -11,13 +11,13 @@ import pandas as pd
|
|
|
11
11
|
import sklearn
|
|
12
12
|
from sklearn.base import BaseEstimator
|
|
13
13
|
from sklearn.model_selection import (
|
|
14
|
-
check_cv,
|
|
15
14
|
cross_validate,
|
|
16
15
|
)
|
|
17
16
|
from sklearn.model_selection._search import BaseSearchCV
|
|
18
17
|
from sklearn.pipeline import Pipeline
|
|
19
18
|
|
|
20
19
|
from .inspect import Inspector
|
|
20
|
+
from .model_selection.utils import check_cv
|
|
21
21
|
from .pipeline import PipelineCreator
|
|
22
22
|
from .pipeline.merger import merge_pipelines
|
|
23
23
|
from .prepare import check_consistency, prepare_input_data
|
|
@@ -541,16 +541,19 @@ def run_cross_validation(
|
|
|
541
541
|
seed=seed,
|
|
542
542
|
)
|
|
543
543
|
|
|
544
|
+
include_final_model = return_estimator in ["final", "all"]
|
|
545
|
+
cv_return_estimator = return_estimator in ["cv", "all", "final"]
|
|
546
|
+
|
|
544
547
|
# Prepare cross validation
|
|
545
548
|
cv_outer = check_cv(
|
|
546
549
|
cv, # type: ignore
|
|
547
550
|
classifier=problem_type == "classification",
|
|
551
|
+
include_final_model=include_final_model,
|
|
548
552
|
)
|
|
549
553
|
logger.info(f"Using outer CV scheme {cv_outer}")
|
|
550
554
|
|
|
551
555
|
check_consistency(df_y, cv, groups, problem_type) # type: ignore
|
|
552
556
|
|
|
553
|
-
cv_return_estimator = return_estimator in ["cv", "all"]
|
|
554
557
|
scoring = check_scoring(
|
|
555
558
|
pipeline, # type: ignore
|
|
556
559
|
scoring,
|
|
@@ -583,18 +586,28 @@ def run_cross_validation(
|
|
|
583
586
|
**_sklearn_deprec_fit_params,
|
|
584
587
|
)
|
|
585
588
|
|
|
586
|
-
n_repeats = getattr(cv_outer, "n_repeats", 1)
|
|
587
|
-
n_folds = len(scores["fit_time"]) // n_repeats
|
|
588
|
-
|
|
589
|
-
repeats = np.repeat(np.arange(n_repeats), n_folds)
|
|
590
|
-
folds = np.tile(np.arange(n_folds), n_repeats)
|
|
591
|
-
|
|
592
589
|
fold_sizes = np.array(
|
|
593
590
|
[
|
|
594
591
|
list(map(len, x))
|
|
595
592
|
for x in cv_outer.split(df_X, df_y, groups=df_groups)
|
|
596
593
|
]
|
|
597
594
|
)
|
|
595
|
+
|
|
596
|
+
if include_final_model:
|
|
597
|
+
# If we include the final model, we need to remove the last item in
|
|
598
|
+
# the scores as this is the final model
|
|
599
|
+
pipeline = scores["estimator"][-1]
|
|
600
|
+
if return_estimator == "final":
|
|
601
|
+
scores.pop("estimator")
|
|
602
|
+
scores = {k: v[:-1] for k, v in scores.items()}
|
|
603
|
+
fold_sizes = fold_sizes[:-1]
|
|
604
|
+
|
|
605
|
+
n_repeats = getattr(cv_outer, "n_repeats", 1)
|
|
606
|
+
n_folds = len(scores["fit_time"]) // n_repeats
|
|
607
|
+
|
|
608
|
+
repeats = np.repeat(np.arange(n_repeats), n_folds)
|
|
609
|
+
folds = np.tile(np.arange(n_folds), n_repeats)
|
|
610
|
+
|
|
598
611
|
scores["n_train"] = fold_sizes[:, 0]
|
|
599
612
|
scores["n_test"] = fold_sizes[:, 1]
|
|
600
613
|
scores["repeat"] = repeats
|
|
@@ -602,11 +615,10 @@ def run_cross_validation(
|
|
|
602
615
|
scores["cv_mdsum"] = cv_mdsum
|
|
603
616
|
|
|
604
617
|
scores_df = pd.DataFrame(scores)
|
|
618
|
+
|
|
605
619
|
out = scores_df
|
|
606
|
-
if
|
|
607
|
-
|
|
608
|
-
pipeline.fit(df_X, df_y, **fit_params)
|
|
609
|
-
out = scores_df, pipeline
|
|
620
|
+
if include_final_model:
|
|
621
|
+
out = out, pipeline
|
|
610
622
|
|
|
611
623
|
if return_inspector:
|
|
612
624
|
inspector = Inspector(
|
|
@@ -615,7 +627,7 @@ def run_cross_validation(
|
|
|
615
627
|
X=df_X,
|
|
616
628
|
y=df_y,
|
|
617
629
|
groups=df_groups,
|
|
618
|
-
cv=cv_outer,
|
|
630
|
+
cv=cv_outer.cv if include_final_model else cv_outer,
|
|
619
631
|
)
|
|
620
632
|
if isinstance(out, tuple):
|
|
621
633
|
out = (*out, inspector)
|
|
@@ -14,6 +14,7 @@ _global_config["disable_x_check"] = False
|
|
|
14
14
|
_global_config["disable_xtypes_check"] = False
|
|
15
15
|
_global_config["disable_x_verbose"] = False
|
|
16
16
|
_global_config["disable_xtypes_verbose"] = False
|
|
17
|
+
_global_config["enable_parallel_column_transformers"] = False
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
def set_config(key: str, value: Any) -> None:
|
|
@@ -63,11 +63,14 @@ class _EstimatorInspector:
|
|
|
63
63
|
),
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
private_params = {
|
|
67
67
|
param: val
|
|
68
68
|
for param, val in all_params.items()
|
|
69
69
|
if re.match(r"^[a-zA-Z].*[a-zA-Z0-9]*_$", param)
|
|
70
70
|
}
|
|
71
|
+
out = self.get_params()
|
|
72
|
+
out.update(private_params)
|
|
73
|
+
return out
|
|
71
74
|
|
|
72
75
|
@property
|
|
73
76
|
def estimator(self):
|
|
@@ -152,14 +152,25 @@ def test_steps(
|
|
|
152
152
|
@pytest.mark.parametrize(
|
|
153
153
|
"est,fitted_params",
|
|
154
154
|
[
|
|
155
|
-
[
|
|
155
|
+
[
|
|
156
|
+
MockTestEst(),
|
|
157
|
+
{"hype_0": 0, "hype_1": 1, "param_0_": 0, "param_1_": 1},
|
|
158
|
+
],
|
|
156
159
|
[
|
|
157
160
|
JuColumnTransformer(
|
|
158
161
|
"test",
|
|
159
162
|
MockTestEst(), # type: ignore
|
|
160
163
|
"continuous",
|
|
161
164
|
),
|
|
162
|
-
{
|
|
165
|
+
{
|
|
166
|
+
"hype_0": 0,
|
|
167
|
+
"hype_1": 1,
|
|
168
|
+
"param_0_": 0,
|
|
169
|
+
"param_1_": 1,
|
|
170
|
+
"needed_types": None,
|
|
171
|
+
"row_select_col_type": None,
|
|
172
|
+
"row_select_vals": None,
|
|
173
|
+
},
|
|
163
174
|
],
|
|
164
175
|
],
|
|
165
176
|
)
|
|
@@ -183,6 +194,9 @@ def test_inspect_estimator(
|
|
|
183
194
|
assert est.get_params() == inspector.get_params()
|
|
184
195
|
inspect_params = inspector.get_fitted_params()
|
|
185
196
|
inspect_params.pop("column_transformer_", None)
|
|
197
|
+
inspect_params.pop("apply_to", None)
|
|
198
|
+
inspect_params.pop("transformer", None)
|
|
199
|
+
inspect_params.pop("name", None)
|
|
186
200
|
assert fitted_params == inspect_params
|
|
187
201
|
|
|
188
202
|
|
|
@@ -196,8 +210,14 @@ def test_inspect_pipeline(df_iris: "pd.DataFrame") -> None:
|
|
|
196
210
|
|
|
197
211
|
"""
|
|
198
212
|
expected_fitted_params = {
|
|
213
|
+
"jucolumntransformer__hype_0": 0,
|
|
214
|
+
"jucolumntransformer__hype_1": 1,
|
|
199
215
|
"jucolumntransformer__param_0_": 0,
|
|
200
216
|
"jucolumntransformer__param_1_": 1,
|
|
217
|
+
"jucolumntransformer__needed_types": None,
|
|
218
|
+
"jucolumntransformer__row_select_col_type": None,
|
|
219
|
+
"jucolumntransformer__row_select_vals": None,
|
|
220
|
+
"jucolumntransformer__name": "test",
|
|
201
221
|
}
|
|
202
222
|
|
|
203
223
|
pipe = (
|
|
@@ -216,6 +236,8 @@ def test_inspect_pipeline(df_iris: "pd.DataFrame") -> None:
|
|
|
216
236
|
inspector = PipelineInspector(pipe)
|
|
217
237
|
inspect_params = inspector.get_fitted_params()
|
|
218
238
|
inspect_params.pop("jucolumntransformer__column_transformer_", None)
|
|
239
|
+
inspect_params.pop("jucolumntransformer__transformer", None)
|
|
240
|
+
inspect_params.pop("jucolumntransformer__apply_to", None)
|
|
219
241
|
inspect_params = {
|
|
220
242
|
key: val
|
|
221
243
|
for key, val in inspect_params.items()
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""CV Wrapper that includes a fold with all the data."""
|
|
2
|
+
|
|
3
|
+
# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
|
|
4
|
+
# License: AGPL
|
|
5
|
+
|
|
6
|
+
from typing import TYPE_CHECKING, Generator, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from sklearn.model_selection import BaseCrossValidator
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _JulearnFinalModelCV:
|
|
16
|
+
"""Final model cross-validation iterator.
|
|
17
|
+
|
|
18
|
+
Wraps any CV iterator to provide an extra iteration with the full dataset.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
cv : BaseCrossValidator
|
|
23
|
+
The cross-validation iterator to wrap.
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, cv: "BaseCrossValidator") -> None:
|
|
28
|
+
self.cv = cv
|
|
29
|
+
if hasattr(cv, "n_repeats"):
|
|
30
|
+
self.n_repeats = cv.n_repeats
|
|
31
|
+
|
|
32
|
+
def split(
|
|
33
|
+
self,
|
|
34
|
+
X: np.ndarray, # noqa: N803
|
|
35
|
+
y: np.ndarray,
|
|
36
|
+
groups: Optional[np.ndarray] = None,
|
|
37
|
+
) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]:
|
|
38
|
+
"""Generate indices to split data into training and test set.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
X : array-like of shape (n_samples, n_features)
|
|
43
|
+
Training data, where n_samples is the number of samples
|
|
44
|
+
and n_features is the number of features.
|
|
45
|
+
Note that providing ``y`` is sufficient to generate the splits and
|
|
46
|
+
hence ``np.zeros(n_samples)`` may be used as a placeholder for
|
|
47
|
+
``X`` instead of actual training data.
|
|
48
|
+
|
|
49
|
+
y : array-like of shape (n_samples,), default=None
|
|
50
|
+
The target variable for supervised learning problems.
|
|
51
|
+
|
|
52
|
+
groups : array-like of shape (n_samples,), default=None
|
|
53
|
+
Group labels for the samples used while splitting the dataset into
|
|
54
|
+
train/test set.
|
|
55
|
+
|
|
56
|
+
Yields
|
|
57
|
+
------
|
|
58
|
+
train : ndarray
|
|
59
|
+
The training set indices for that split.
|
|
60
|
+
test : ndarray
|
|
61
|
+
The testing set indices for that split.
|
|
62
|
+
|
|
63
|
+
Notes
|
|
64
|
+
-----
|
|
65
|
+
This CV Splitter will generate an extra fold where the full dataset is
|
|
66
|
+
used for training and testing. This is useful to train the final model
|
|
67
|
+
on the full dataset at the same time as the cross-validation,
|
|
68
|
+
profitting for joblib calls.
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
yield from self.cv.split(X, y, groups)
|
|
72
|
+
all_inds = np.arange(len(X))
|
|
73
|
+
# For the last fold, train on all samples and return only 2 for testing
|
|
74
|
+
yield all_inds, all_inds[:2]
|
|
75
|
+
|
|
76
|
+
def get_n_splits(self) -> int:
|
|
77
|
+
"""Get the number of splits.
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
int
|
|
82
|
+
The number of splits.
|
|
83
|
+
|
|
84
|
+
"""
|
|
85
|
+
return self.cv.get_n_splits() + 1
|
|
86
|
+
|
|
87
|
+
def __repr__(self) -> str:
|
|
88
|
+
"""Return the representation of the object.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
str
|
|
93
|
+
The representation of the object.
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
return f"{self.cv} (incl. final model)"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Provides tests for the final model CV."""
|
|
2
|
+
|
|
3
|
+
# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
|
|
4
|
+
# License: AGPL
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from numpy.testing import assert_array_equal
|
|
8
|
+
from sklearn.model_selection import RepeatedStratifiedKFold
|
|
9
|
+
|
|
10
|
+
from julearn.model_selection.final_model_cv import _JulearnFinalModelCV
|
|
11
|
+
from julearn.utils import _compute_cvmdsum
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_final_model_cv() -> None:
|
|
15
|
+
"""Test the final model CV."""
|
|
16
|
+
sklearn_cv = RepeatedStratifiedKFold(
|
|
17
|
+
n_repeats=2, n_splits=5, random_state=42
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
julearn_cv = _JulearnFinalModelCV(sklearn_cv)
|
|
21
|
+
|
|
22
|
+
assert julearn_cv.get_n_splits() == 11
|
|
23
|
+
assert julearn_cv.n_repeats == 2
|
|
24
|
+
|
|
25
|
+
n_features = 10
|
|
26
|
+
n_samples = 123
|
|
27
|
+
X = np.zeros((n_samples, n_features))
|
|
28
|
+
y = np.zeros(n_samples)
|
|
29
|
+
|
|
30
|
+
all_ju = list(julearn_cv.split(X, y))
|
|
31
|
+
all_sk = list(sklearn_cv.split(X, y))
|
|
32
|
+
|
|
33
|
+
assert len(all_ju) == len(all_sk) + 1
|
|
34
|
+
for i in range(10):
|
|
35
|
+
assert_array_equal(all_ju[i][0], all_sk[i][0])
|
|
36
|
+
assert_array_equal( all_ju[i][1], all_sk[i][1])
|
|
37
|
+
|
|
38
|
+
assert all_ju[-1][0].shape[0] == n_samples
|
|
39
|
+
assert all_ju[-1][1].shape[0] == 2
|
|
40
|
+
assert_array_equal(all_ju[-1][0], np.arange(n_samples))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_final_model_cv_mdsum() -> None:
|
|
44
|
+
"""Test the mdsum of the final model CV."""
|
|
45
|
+
sklearn_cv = RepeatedStratifiedKFold(
|
|
46
|
+
n_repeats=2, n_splits=5, random_state=42
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
julearn_cv = _JulearnFinalModelCV(sklearn_cv)
|
|
50
|
+
|
|
51
|
+
mdsum = _compute_cvmdsum(julearn_cv)
|
|
52
|
+
mdsum_sk = _compute_cvmdsum(sklearn_cv)
|
|
53
|
+
assert mdsum == mdsum_sk
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Utility functions for model selection in julearn."""
|
|
2
|
+
|
|
3
|
+
# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
|
|
4
|
+
# License: AGPL
|
|
5
|
+
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from sklearn.model_selection import check_cv as sk_check_cv
|
|
9
|
+
|
|
10
|
+
from .final_model_cv import _JulearnFinalModelCV
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from ..utils.typing import CVLike
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def check_cv(
|
|
18
|
+
cv: "CVLike", classifier: bool = False, include_final_model: bool = False
|
|
19
|
+
) -> "CVLike":
|
|
20
|
+
"""Check the CV instance and return the proper CV for julearn.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
cv : int, str or cross-validation generator | None
|
|
25
|
+
Cross-validation splitting strategy to use for model evaluation.
|
|
26
|
+
|
|
27
|
+
Options are:
|
|
28
|
+
|
|
29
|
+
* None: defaults to 5-fold
|
|
30
|
+
* int: the number of folds in a `(Stratified)KFold`
|
|
31
|
+
* CV Splitter (see scikit-learn documentation on CV)
|
|
32
|
+
* An iterable yielding (train, test) splits as arrays of indices.
|
|
33
|
+
|
|
34
|
+
classifier : bool, default=False
|
|
35
|
+
Whether the task is a classification task, in which case
|
|
36
|
+
stratified KFold will be used.
|
|
37
|
+
|
|
38
|
+
include_final_model : bool, default=False
|
|
39
|
+
Whether to include the final model in the cross-validation. If true,
|
|
40
|
+
one more fold will be added to the cross-validation, where the full
|
|
41
|
+
dataset is used for training and testing
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
checked_cv : a cross-validator instance.
|
|
46
|
+
The return value is a cross-validator which generates the train/test
|
|
47
|
+
splits via the ``split`` method.
|
|
48
|
+
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
cv = sk_check_cv(cv, classifier=classifier)
|
|
52
|
+
if include_final_model:
|
|
53
|
+
cv = _JulearnFinalModelCV(cv)
|
|
54
|
+
|
|
55
|
+
return cv
|
|
@@ -42,6 +42,37 @@ from .target_pipeline import JuTargetPipeline
|
|
|
42
42
|
from .target_pipeline_creator import TargetPipelineCreator
|
|
43
43
|
|
|
44
44
|
|
|
45
|
+
def _should_wrap_this_step(
|
|
46
|
+
X_types: Dict[str, List[str]], # noqa: N803
|
|
47
|
+
apply_to: ColumnTypesLike,
|
|
48
|
+
) -> bool:
|
|
49
|
+
"""Check if we should wrap the step.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
X_types : Dict[str, List[str]]
|
|
54
|
+
The types of the columns in the data.
|
|
55
|
+
apply_to : ColumnTypesLike
|
|
56
|
+
The types to apply this step to.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
bool
|
|
61
|
+
Whether we should wrap the step.
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
# If we have a wildcard, we will not wrap the step
|
|
66
|
+
if any(x in ["*", ".*"] for x in apply_to):
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
# If any of the X_types is not in the apply_to, we will wrap the step
|
|
70
|
+
if any(x not in apply_to for x in X_types.keys()):
|
|
71
|
+
return True
|
|
72
|
+
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
|
|
45
76
|
def _params_to_pipeline(
|
|
46
77
|
param: Any,
|
|
47
78
|
X_types: Dict[str, List], # noqa: N803
|
|
@@ -511,7 +542,9 @@ class PipelineCreator:
|
|
|
511
542
|
logger.debug(f"\t Params to tune: {step_params_to_tune}")
|
|
512
543
|
|
|
513
544
|
# Wrap in a JuTransformer if needed
|
|
514
|
-
if
|
|
545
|
+
if _should_wrap_this_step(
|
|
546
|
+
X_types, step_dict.apply_to
|
|
547
|
+
) and not isinstance(estimator, JuTransformer):
|
|
515
548
|
estimator = self._wrap_step(
|
|
516
549
|
name,
|
|
517
550
|
estimator,
|
|
@@ -539,7 +572,9 @@ class PipelineCreator:
|
|
|
539
572
|
for k, v in model_params.items()
|
|
540
573
|
}
|
|
541
574
|
model_estimator.set_params(**model_params)
|
|
542
|
-
if
|
|
575
|
+
if _should_wrap_this_step(
|
|
576
|
+
X_types, model_step.apply_to
|
|
577
|
+
) and not isinstance(model_estimator, JuModelLike):
|
|
543
578
|
logger.debug(f"Wrapping {model_name}")
|
|
544
579
|
model_estimator = WrapModel(model_estimator, model_step.apply_to)
|
|
545
580
|
|
|
@@ -789,12 +824,11 @@ class PipelineCreator:
|
|
|
789
824
|
"this type."
|
|
790
825
|
)
|
|
791
826
|
|
|
792
|
-
self.wrap = needed_types != {"continuous"}
|
|
793
827
|
return X_types
|
|
794
828
|
|
|
795
829
|
@staticmethod
|
|
796
830
|
def _is_transformer_step(
|
|
797
|
-
step: Union[str, EstimatorLike, TargetPipelineCreator]
|
|
831
|
+
step: Union[str, EstimatorLike, TargetPipelineCreator],
|
|
798
832
|
) -> bool:
|
|
799
833
|
"""Check if a step is a transformer."""
|
|
800
834
|
if step in list_transformers():
|
|
@@ -805,7 +839,7 @@ class PipelineCreator:
|
|
|
805
839
|
|
|
806
840
|
@staticmethod
|
|
807
841
|
def _is_model_step(
|
|
808
|
-
step: Union[EstimatorLike, str, TargetPipelineCreator]
|
|
842
|
+
step: Union[EstimatorLike, str, TargetPipelineCreator],
|
|
809
843
|
) -> bool:
|
|
810
844
|
"""Check if a step is a model."""
|
|
811
845
|
if step in list_models():
|
|
@@ -26,10 +26,10 @@ if TYPE_CHECKING:
|
|
|
26
26
|
from sklearn.pipeline import Pipeline
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
def
|
|
29
|
+
def test_construction_working_wrapping(
|
|
30
30
|
model: str, preprocess: Union[str, List[str]], problem_type: str
|
|
31
31
|
) -> None:
|
|
32
|
-
"""Test that the pipeline constructions works as expected.
|
|
32
|
+
"""Test that the pipeline constructions works as expected (wrapping).
|
|
33
33
|
|
|
34
34
|
Parameters
|
|
35
35
|
----------
|
|
@@ -46,7 +46,7 @@ def test_construction_working(
|
|
|
46
46
|
for step in preprocess:
|
|
47
47
|
creator.add(step, apply_to="categorical")
|
|
48
48
|
creator.add(model)
|
|
49
|
-
X_types = {"categorical": ["A"]}
|
|
49
|
+
X_types = {"categorical": ["A"], "continuous": ["B"]}
|
|
50
50
|
pipeline = creator.to_pipeline(X_types=X_types)
|
|
51
51
|
|
|
52
52
|
# check preprocessing steps
|
|
@@ -72,6 +72,53 @@ def test_construction_working(
|
|
|
72
72
|
assert len(preprocess) + 2 == len(pipeline.steps)
|
|
73
73
|
|
|
74
74
|
|
|
75
|
+
def test_construction_working_nowrapping(
|
|
76
|
+
model: str, preprocess: Union[str, List[str]], problem_type: str
|
|
77
|
+
) -> None:
|
|
78
|
+
"""Test that the pipeline constructions works as expected (no wrapping).
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
model : str
|
|
83
|
+
The model to test.
|
|
84
|
+
preprocess : str or list of str
|
|
85
|
+
The preprocessing steps to test.
|
|
86
|
+
problem_type : str
|
|
87
|
+
The problem type to test.
|
|
88
|
+
|
|
89
|
+
"""
|
|
90
|
+
creator = PipelineCreator(problem_type=problem_type)
|
|
91
|
+
preprocess = preprocess if isinstance(preprocess, list) else [preprocess]
|
|
92
|
+
for step in preprocess:
|
|
93
|
+
creator.add(step, apply_to="*")
|
|
94
|
+
creator.add(model, apply_to=["categorical", "continuous"])
|
|
95
|
+
X_types = {"categorical": ["A"], "continuous": ["B"]}
|
|
96
|
+
pipeline = creator.to_pipeline(X_types=X_types)
|
|
97
|
+
|
|
98
|
+
# check preprocessing steps
|
|
99
|
+
# ignoring first step for types and last for model
|
|
100
|
+
for element in zip(preprocess, pipeline.steps[1:-1]):
|
|
101
|
+
_preprocess, (name, transformer) = element
|
|
102
|
+
assert name.startswith(f"{_preprocess}")
|
|
103
|
+
assert not isinstance(transformer, JuColumnTransformer)
|
|
104
|
+
assert isinstance(
|
|
105
|
+
transformer, get_transformer(_preprocess).__class__
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# check model step
|
|
109
|
+
model_name, model = pipeline.steps[-1]
|
|
110
|
+
assert not isinstance(model, WrapModel)
|
|
111
|
+
assert isinstance(
|
|
112
|
+
model,
|
|
113
|
+
get_model(
|
|
114
|
+
model_name,
|
|
115
|
+
problem_type=problem_type,
|
|
116
|
+
).__class__,
|
|
117
|
+
)
|
|
118
|
+
assert len(preprocess) + 2 == len(pipeline.steps)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
75
122
|
def test_fit_and_transform_no_error(
|
|
76
123
|
X_iris: pd.DataFrame, # noqa: N803
|
|
77
124
|
y_iris: pd.Series,
|
|
@@ -415,8 +415,8 @@ def test_tune_hyperparam_gridsearch(df_iris: pd.DataFrame) -> None:
|
|
|
415
415
|
scoring = "accuracy"
|
|
416
416
|
|
|
417
417
|
np.random.seed(42)
|
|
418
|
-
cv_outer = RepeatedKFold(n_splits=
|
|
419
|
-
cv_inner = RepeatedKFold(n_splits=
|
|
418
|
+
cv_outer = RepeatedKFold(n_splits=3, n_repeats=2)
|
|
419
|
+
cv_inner = RepeatedKFold(n_splits=3, n_repeats=2)
|
|
420
420
|
|
|
421
421
|
model_params = {"svm__C": [0.01, 0.001]}
|
|
422
422
|
search_params = {"cv": cv_inner}
|
|
@@ -434,10 +434,12 @@ def test_tune_hyperparam_gridsearch(df_iris: pd.DataFrame) -> None:
|
|
|
434
434
|
problem_type="classification",
|
|
435
435
|
)
|
|
436
436
|
|
|
437
|
+
assert len(actual["repeat"].unique()) == 2
|
|
438
|
+
|
|
437
439
|
# Now do the same with scikit-learn
|
|
438
440
|
np.random.seed(42)
|
|
439
|
-
cv_outer = RepeatedKFold(n_splits=
|
|
440
|
-
cv_inner = RepeatedKFold(n_splits=
|
|
441
|
+
cv_outer = RepeatedKFold(n_splits=3, n_repeats=2)
|
|
442
|
+
cv_inner = RepeatedKFold(n_splits=3, n_repeats=2)
|
|
441
443
|
|
|
442
444
|
clf = make_pipeline(SVC())
|
|
443
445
|
gs = GridSearchCV(
|
|
@@ -1225,7 +1227,7 @@ def test_api_stacking_models() -> None:
|
|
|
1225
1227
|
# The final model should be a stacking model im which the first estimator
|
|
1226
1228
|
# is a grid search
|
|
1227
1229
|
assert isinstance(
|
|
1228
|
-
final.steps[1][1].
|
|
1230
|
+
final.steps[1][1].estimators[0][1], # type: ignore
|
|
1229
1231
|
GridSearchCV,
|
|
1230
1232
|
)
|
|
1231
1233
|
|
|
@@ -12,6 +12,7 @@ from sklearn.compose import ColumnTransformer
|
|
|
12
12
|
from sklearn.utils.validation import check_is_fitted
|
|
13
13
|
|
|
14
14
|
from ..base import ColumnTypesLike, JuTransformer, ensure_column_types
|
|
15
|
+
from ..config import get_config
|
|
15
16
|
from ..utils.logging import raise_error
|
|
16
17
|
from ..utils.typing import DataLike, EstimatorLike
|
|
17
18
|
|
|
@@ -93,6 +94,9 @@ class JuColumnTransformer(JuTransformer):
|
|
|
93
94
|
[(self.name, self.transformer, self.apply_to.to_type_selector())],
|
|
94
95
|
verbose_feature_names_out=verbose_feature_names_out,
|
|
95
96
|
remainder="passthrough",
|
|
97
|
+
n_jobs=None
|
|
98
|
+
if get_config("enable_parallel_column_transformers")
|
|
99
|
+
else 1,
|
|
96
100
|
)
|
|
97
101
|
self.column_transformer_.fit(X, y, **fit_params)
|
|
98
102
|
|