panelsplit 2.0.4.dev0__tar.gz → 2.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.github/workflows/ci.yml +1 -1
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/PKG-INFO +4 -4
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/README.md +1 -1
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/metrics.py +152 -37
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/model_selection/model_selection.py +10 -117
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/pipeline.py +57 -25
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/pyproject.toml +2 -2
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_metrics.py +8 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_pipeline.py +21 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_search.py +18 -0
- panelsplit-2.0.5/tests/test_sequentialcvpipeline_indices.py +148 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/uv.lock +1434 -1959
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.github/workflows/lint.yml +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.github/workflows/pre-commit.yml +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.github/workflows/releases.yml +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.gitignore +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.pre-commit-config.yaml +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/CHANGELOG.md +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/CITATION.cff +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/CNAME +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/CODE_OF_CONDUCT.md +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/LICENSE +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/examples/An introduction to PanelSplit.ipynb +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/__init__.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/application.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/cross_validation.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/model_selection/__init__.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/model_selection/_validation.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/plot.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/__init__.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/_response.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/typing.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/utils.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/validation.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/__init__.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/df_generation.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_PanelSplit.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_check_fitted_fix.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_cross_validation.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_edge_cases.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_issue_59_fix.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_narwhals_compatibility.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_plot.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_scorer.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_set_params.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_utils.py +0 -0
- {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_validation_coverage.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: panelsplit
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.5
|
|
4
4
|
Summary: A tool for panel data analysis.
|
|
5
5
|
Project-URL: Homepage, https://github.com/4Freye/panelsplit
|
|
6
6
|
Project-URL: Repository, https://github.com/4Freye/panelsplit
|
|
@@ -11,13 +11,13 @@ License-File: LICENSE
|
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
12
|
Classifier: Operating System :: OS Independent
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Requires-Python: >=3.
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
15
|
Requires-Dist: joblib>=1.0.1
|
|
16
16
|
Requires-Dist: matplotlib>=3.4.3
|
|
17
17
|
Requires-Dist: narwhals>=1.42.1
|
|
18
18
|
Requires-Dist: numpy>=1.21.0
|
|
19
19
|
Requires-Dist: pandas>=1.3.0
|
|
20
|
-
Requires-Dist: scikit-learn>=
|
|
20
|
+
Requires-Dist: scikit-learn>=1.8.0
|
|
21
21
|
Requires-Dist: scipy>=1.10.1
|
|
22
22
|
Requires-Dist: tqdm>=4.67.1
|
|
23
23
|
Requires-Dist: typing-extensions>=4.13.2
|
|
@@ -32,7 +32,7 @@ panelsplit is a Python package designed to facilitate time series cross-validati
|
|
|
32
32
|
|
|
33
33
|
## Installation
|
|
34
34
|
|
|
35
|
-
panelsplit is tested for compatibility with python versions >= 3.
|
|
35
|
+
panelsplit is tested for compatibility with python versions >= 3.11. You can install panelsplit using pip:
|
|
36
36
|
|
|
37
37
|
```bash
|
|
38
38
|
pip install panelsplit
|
|
@@ -7,7 +7,7 @@ panelsplit is a Python package designed to facilitate time series cross-validati
|
|
|
7
7
|
|
|
8
8
|
## Installation
|
|
9
9
|
|
|
10
|
-
panelsplit is tested for compatibility with python versions >= 3.
|
|
10
|
+
panelsplit is tested for compatibility with python versions >= 3.11. You can install panelsplit using pip:
|
|
11
11
|
|
|
12
12
|
```bash
|
|
13
13
|
pip install panelsplit
|
|
@@ -1,37 +1,42 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Metrics that are equivalent their sklearn counterparts, except for the fact that they work with SequentialCVPipeline.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# Standard library
|
|
6
|
+
import warnings
|
|
2
7
|
from inspect import signature
|
|
3
8
|
from collections.abc import Iterable
|
|
4
9
|
from functools import partial
|
|
5
|
-
from sklearn.metrics._scorer import _MultimetricScorer
|
|
6
|
-
from sklearn.utils._param_validation import (
|
|
7
|
-
validate_params,
|
|
8
|
-
)
|
|
9
|
-
from sklearn.metrics._scorer import _PassthroughScorer, _get_response_method_name
|
|
10
10
|
from copy import deepcopy
|
|
11
|
-
from sklearn.utils.validation import _check_response_method
|
|
12
|
-
import warnings
|
|
13
|
-
from sklearn.base import is_regressor
|
|
14
|
-
from panelsplit.utils._response import _get_response_values
|
|
15
|
-
from sklearn.utils.metadata_routing import (
|
|
16
|
-
_MetadataRequester,
|
|
17
|
-
_raise_for_params,
|
|
18
|
-
_routing_enabled,
|
|
19
|
-
MetadataRequest,
|
|
20
|
-
)
|
|
21
|
-
from .utils.typing import EstimatorLike, ArrayLike
|
|
22
|
-
from numpy.typing import NDArray
|
|
23
11
|
from typing import Callable, Optional, List, Union, Any, Dict
|
|
12
|
+
|
|
13
|
+
# Third-party / typing
|
|
24
14
|
from typing_extensions import Self
|
|
15
|
+
from numpy.typing import NDArray
|
|
25
16
|
|
|
26
|
-
#
|
|
17
|
+
# Local package utilities
|
|
18
|
+
from .utils.validation import _safe_indexing
|
|
19
|
+
from .utils.typing import EstimatorLike, ArrayLike
|
|
20
|
+
from panelsplit.utils._response import _get_response_values
|
|
21
|
+
|
|
22
|
+
# sklearn public metrics (single consolidated import)
|
|
27
23
|
from sklearn.metrics import (
|
|
28
24
|
accuracy_score,
|
|
25
|
+
adjusted_mutual_info_score,
|
|
26
|
+
adjusted_rand_score,
|
|
29
27
|
average_precision_score,
|
|
30
28
|
balanced_accuracy_score,
|
|
31
29
|
brier_score_loss,
|
|
32
30
|
class_likelihood_ratios,
|
|
31
|
+
completeness_score,
|
|
33
32
|
d2_absolute_error_score,
|
|
33
|
+
d2_brier_score,
|
|
34
|
+
d2_log_loss_score,
|
|
34
35
|
explained_variance_score,
|
|
36
|
+
f1_score,
|
|
37
|
+
fowlkes_mallows_score,
|
|
38
|
+
jaccard_score,
|
|
39
|
+
homogeneity_score,
|
|
35
40
|
log_loss,
|
|
36
41
|
matthews_corrcoef,
|
|
37
42
|
max_error,
|
|
@@ -42,24 +47,37 @@ from sklearn.metrics import (
|
|
|
42
47
|
mean_squared_error,
|
|
43
48
|
mean_squared_log_error,
|
|
44
49
|
median_absolute_error,
|
|
50
|
+
mutual_info_score,
|
|
51
|
+
normalized_mutual_info_score,
|
|
52
|
+
precision_score,
|
|
53
|
+
rand_score,
|
|
45
54
|
r2_score,
|
|
55
|
+
recall_score,
|
|
46
56
|
roc_auc_score,
|
|
47
57
|
root_mean_squared_error,
|
|
48
58
|
root_mean_squared_log_error,
|
|
49
59
|
top_k_accuracy_score,
|
|
50
|
-
)
|
|
51
|
-
from sklearn.metrics.cluster import (
|
|
52
|
-
adjusted_mutual_info_score,
|
|
53
|
-
adjusted_rand_score,
|
|
54
|
-
completeness_score,
|
|
55
|
-
fowlkes_mallows_score,
|
|
56
|
-
homogeneity_score,
|
|
57
|
-
mutual_info_score,
|
|
58
|
-
normalized_mutual_info_score,
|
|
59
|
-
rand_score,
|
|
60
60
|
v_measure_score,
|
|
61
61
|
)
|
|
62
62
|
|
|
63
|
+
# sklearn internals / utilities (note: private APIs)
|
|
64
|
+
from sklearn.metrics._scorer import (
|
|
65
|
+
_MultimetricScorer,
|
|
66
|
+
_PassthroughScorer,
|
|
67
|
+
_get_response_method_name,
|
|
68
|
+
)
|
|
69
|
+
from sklearn.utils._param_validation import validate_params
|
|
70
|
+
from sklearn.utils.validation import _check_response_method
|
|
71
|
+
from sklearn.base import is_regressor
|
|
72
|
+
|
|
73
|
+
# metadata routing utilities (used by some sklearn internals)
|
|
74
|
+
from sklearn.utils.metadata_routing import (
|
|
75
|
+
_MetadataRequester,
|
|
76
|
+
_raise_for_params,
|
|
77
|
+
_routing_enabled,
|
|
78
|
+
MetadataRequest,
|
|
79
|
+
)
|
|
80
|
+
|
|
63
81
|
|
|
64
82
|
def _get_idx_from_last_cv(estimator: EstimatorLike) -> Union[None, List[NDArray]]:
|
|
65
83
|
"""
|
|
@@ -88,14 +106,63 @@ def make_SequentialCV_scorer(
|
|
|
88
106
|
greater_is_better: bool = True,
|
|
89
107
|
**kwargs: Any,
|
|
90
108
|
) -> Callable[..., float]:
|
|
109
|
+
"""
|
|
110
|
+
Make a SequentialCVPipeline-compatible scorer from a performance metric.
|
|
111
|
+
|
|
112
|
+
A scorer is a wrapper around an arbitrary metric or loss function that is called
|
|
113
|
+
with the signature `scorer(estimator, X, y_true, **kwargs)`.
|
|
114
|
+
|
|
115
|
+
The parameter `response_method` allows to specify which method of the estimator
|
|
116
|
+
should be used to feed the scoring/loss function.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
score_func : callable
|
|
121
|
+
Score function (or loss function) with signature
|
|
122
|
+
``score_func(y, y_pred, **kwargs)``.
|
|
123
|
+
|
|
124
|
+
response_method : {"predict_proba", "decision_function", "predict"} or \
|
|
125
|
+
list/tuple of such str, default="predict"
|
|
126
|
+
|
|
127
|
+
Specifies the response method to use get prediction from an estimator
|
|
128
|
+
(i.e. :term:`predict_proba`, :term:`decision_function` or
|
|
129
|
+
:term:`predict`). Possible choices are:
|
|
130
|
+
|
|
131
|
+
- if `str`, it corresponds to the name to the method to return;
|
|
132
|
+
- if a list or tuple of `str`, it provides the method names in order of
|
|
133
|
+
preference. The method returned corresponds to the first method in
|
|
134
|
+
the list and which is implemented by `estimator`.
|
|
135
|
+
|
|
136
|
+
greater_is_better : bool, default=True
|
|
137
|
+
Whether `score_func` is a score function (default), meaning high is
|
|
138
|
+
good, or a loss function, meaning low is good. In the latter case, the
|
|
139
|
+
scorer object will sign-flip the outcome of the `score_func`.
|
|
140
|
+
|
|
141
|
+
**kwargs : additional arguments
|
|
142
|
+
Additional parameters to be passed to `score_func`.
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
Callable
|
|
147
|
+
Callable object that returns a scalar score; greater is better.
|
|
148
|
+
|
|
149
|
+
Examples
|
|
150
|
+
--------
|
|
151
|
+
>>> from panelsplit.metrics import make_SequentialCV_scorer
|
|
152
|
+
>>> from sklearn.metrics import brier_score_loss
|
|
153
|
+
>>> brier_loss_scorer= make_SequentialCV_scorer(brier_score_loss, response_method='predict_proba', greater_is_better=False)
|
|
154
|
+
|
|
155
|
+
>>> from panelsplit.pipeline import SequentialCVPipeline
|
|
156
|
+
>>> from sklearn.ensemble import RandomForestClassifier
|
|
157
|
+
>>> from sklearn.datasets import load_iris
|
|
158
|
+
>>> X, y = load_iris(return_X_y=True)
|
|
159
|
+
>>> p = SequentialCVPipeline(steps = [('rf', RandomForestClassifier())], cv_steps = [None])
|
|
160
|
+
>>> p.fit(X, y)
|
|
161
|
+
>>> brier_loss_scorer(p, X, y)
|
|
162
|
+
"""
|
|
91
163
|
sign = 1 if greater_is_better else -1
|
|
92
164
|
|
|
93
165
|
if response_method is None:
|
|
94
|
-
warnings.warn(
|
|
95
|
-
"response_method=None is deprecated in version 1.6 and will be removed "
|
|
96
|
-
"in version 1.8. Leave it to its default value to avoid this warning.",
|
|
97
|
-
FutureWarning,
|
|
98
|
-
)
|
|
99
166
|
response_method = "predict"
|
|
100
167
|
elif response_method == "default":
|
|
101
168
|
response_method = "predict"
|
|
@@ -158,7 +225,6 @@ class _BaseScorer(_MetadataRequester):
|
|
|
158
225
|
self._sign = sign
|
|
159
226
|
self._kwargs = kwargs
|
|
160
227
|
self._response_method = response_method
|
|
161
|
-
# TODO (1.8): remove in 1.8 (scoring="max_error" has been deprecated in 1.6)
|
|
162
228
|
self._deprecation_msg = None
|
|
163
229
|
|
|
164
230
|
def _get_pos_label(self) -> Optional[Any]:
|
|
@@ -170,7 +236,6 @@ class _BaseScorer(_MetadataRequester):
|
|
|
170
236
|
return None
|
|
171
237
|
|
|
172
238
|
def _accept_sample_weight(self) -> bool:
|
|
173
|
-
# TODO(slep006): remove when metadata routing is the only way
|
|
174
239
|
return "sample_weight" in signature(self._score_func).parameters
|
|
175
240
|
|
|
176
241
|
def __repr__(self) -> str:
|
|
@@ -217,7 +282,6 @@ class _BaseScorer(_MetadataRequester):
|
|
|
217
282
|
float
|
|
218
283
|
Score function applied to prediction of estimator on X.
|
|
219
284
|
"""
|
|
220
|
-
# TODO (1.8): remove in 1.8 (scoring="max_error" has been deprecated in 1.6)
|
|
221
285
|
if self._deprecation_msg is not None:
|
|
222
286
|
warnings.warn(
|
|
223
287
|
self._deprecation_msg, category=DeprecationWarning, stacklevel=2
|
|
@@ -314,6 +378,7 @@ class _Scorer(_BaseScorer):
|
|
|
314
378
|
X,
|
|
315
379
|
pos_label=pos_label,
|
|
316
380
|
)
|
|
381
|
+
|
|
317
382
|
# make lookup dict for fast matching
|
|
318
383
|
pred_dict = dict(zip(idx, y_pred))
|
|
319
384
|
|
|
@@ -340,6 +405,36 @@ class _Scorer(_BaseScorer):
|
|
|
340
405
|
prefer_skip_nested_validation=True,
|
|
341
406
|
)
|
|
342
407
|
def get_scorer(scoring: Union[str, Callable]) -> Any:
|
|
408
|
+
"""
|
|
409
|
+
Get a scorer from string.
|
|
410
|
+
|
|
411
|
+
`sklearn.metrics.get_scorer_names` can be used to retrieve the names
|
|
412
|
+
of all available scorers.
|
|
413
|
+
|
|
414
|
+
Parameters
|
|
415
|
+
----------
|
|
416
|
+
scoring : str, callable or None
|
|
417
|
+
Scoring method as string. If callable it is returned as is.
|
|
418
|
+
If None, returns None.
|
|
419
|
+
|
|
420
|
+
Returns
|
|
421
|
+
-------
|
|
422
|
+
callable
|
|
423
|
+
The scorer.
|
|
424
|
+
|
|
425
|
+
Notes
|
|
426
|
+
-----
|
|
427
|
+
When passed a string, this function always returns a copy of the scorer
|
|
428
|
+
object. Calling `get_scorer` twice for the same scorer results in two
|
|
429
|
+
separate scorer objects.
|
|
430
|
+
|
|
431
|
+
Examples
|
|
432
|
+
--------
|
|
433
|
+
>>> from panelsplit.metrics import get_scorer
|
|
434
|
+
>>> accuracy = get_scorer("accuracy")
|
|
435
|
+
>>> accuracy(classifier, X, y)
|
|
436
|
+
"""
|
|
437
|
+
|
|
343
438
|
if isinstance(scoring, str):
|
|
344
439
|
try:
|
|
345
440
|
scorer = deepcopy(_SCORERS[scoring])
|
|
@@ -489,7 +584,11 @@ neg_mean_poisson_deviance_scorer = make_SequentialCV_scorer(
|
|
|
489
584
|
neg_mean_gamma_deviance_scorer = make_SequentialCV_scorer(
|
|
490
585
|
mean_gamma_deviance, greater_is_better=False
|
|
491
586
|
)
|
|
587
|
+
# D^2 scorers (fraction of explained Brier / log-loss)
|
|
492
588
|
d2_absolute_error_scorer = make_SequentialCV_scorer(d2_absolute_error_score)
|
|
589
|
+
d2_brier_scorer = make_SequentialCV_scorer(d2_brier_score)
|
|
590
|
+
d2_log_loss_scorer = make_SequentialCV_scorer(d2_log_loss_score)
|
|
591
|
+
|
|
493
592
|
|
|
494
593
|
# Standard Classification Scores
|
|
495
594
|
accuracy_scorer = make_SequentialCV_scorer(accuracy_score)
|
|
@@ -583,6 +682,8 @@ _SCORERS = dict(
|
|
|
583
682
|
neg_mean_poisson_deviance=neg_mean_poisson_deviance_scorer,
|
|
584
683
|
neg_mean_gamma_deviance=neg_mean_gamma_deviance_scorer,
|
|
585
684
|
d2_absolute_error_score=d2_absolute_error_scorer,
|
|
685
|
+
d2_brier_score=d2_brier_scorer,
|
|
686
|
+
d2_log_loss_score=d2_log_loss_scorer,
|
|
586
687
|
accuracy=accuracy_scorer,
|
|
587
688
|
top_k_accuracy=top_k_accuracy_scorer,
|
|
588
689
|
roc_auc=roc_auc_scorer,
|
|
@@ -607,3 +708,17 @@ _SCORERS = dict(
|
|
|
607
708
|
normalized_mutual_info_score=normalized_mutual_info_scorer,
|
|
608
709
|
fowlkes_mallows_score=fowlkes_mallows_scorer,
|
|
609
710
|
)
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
for name, metric in [
|
|
714
|
+
("precision", precision_score),
|
|
715
|
+
("recall", recall_score),
|
|
716
|
+
("f1", f1_score),
|
|
717
|
+
("jaccard", jaccard_score),
|
|
718
|
+
]:
|
|
719
|
+
_SCORERS[name] = make_SequentialCV_scorer(metric, average="binary")
|
|
720
|
+
for average in ["macro", "micro", "samples", "weighted"]:
|
|
721
|
+
qualified_name = "{0}_{1}".format(name, average)
|
|
722
|
+
_SCORERS[qualified_name] = make_SequentialCV_scorer(
|
|
723
|
+
metric, pos_label=None, average=average
|
|
724
|
+
)
|
|
@@ -970,8 +970,8 @@ class GridSearch(BaseSearch):
|
|
|
970
970
|
|
|
971
971
|
If `scoring` represents a single score, one can use:
|
|
972
972
|
|
|
973
|
-
- a single string (see
|
|
974
|
-
- a callable (see
|
|
973
|
+
- a single string (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-string-names);
|
|
974
|
+
- a callable (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-callable) that returns a single value;
|
|
975
975
|
- `None`, the `estimator`'s default evaluation criterion is used.
|
|
976
976
|
|
|
977
977
|
If `scoring` represents multiple scores, one can use:
|
|
@@ -981,16 +981,13 @@ class GridSearch(BaseSearch):
|
|
|
981
981
|
names and the values are the metric scores;
|
|
982
982
|
- a dictionary with metric names as keys and callables as values.
|
|
983
983
|
|
|
984
|
-
See
|
|
984
|
+
See https://scikit-learn.org/stable/modules/grid_search.html#multimetric-grid-search for an example.
|
|
985
985
|
|
|
986
986
|
n_jobs : int, default=None
|
|
987
987
|
Number of jobs to run in parallel.
|
|
988
988
|
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
|
989
989
|
``-1`` means using all processors.
|
|
990
990
|
|
|
991
|
-
.. versionchanged:: v0.20
|
|
992
|
-
`n_jobs` default changed from 1 to None
|
|
993
|
-
|
|
994
991
|
refit : bool, str, or callable, default=True
|
|
995
992
|
Refit an estimator using the best found parameters on the whole
|
|
996
993
|
dataset.
|
|
@@ -1054,67 +1051,20 @@ class GridSearch(BaseSearch):
|
|
|
1054
1051
|
expensive and is not strictly required to select the parameters that
|
|
1055
1052
|
yield the best generalization performance.
|
|
1056
1053
|
|
|
1057
|
-
.. versionadded:: 0.19
|
|
1058
|
-
|
|
1059
|
-
.. versionchanged:: 0.21
|
|
1060
|
-
Default value was changed from ``True`` to ``False``
|
|
1061
|
-
|
|
1062
1054
|
Attributes
|
|
1063
1055
|
----------
|
|
1064
1056
|
cv_results_ : dict of numpy (masked) ndarrays
|
|
1065
1057
|
A dict with keys as column headers and values as columns, that can be
|
|
1066
1058
|
imported into a pandas ``DataFrame``.
|
|
1067
1059
|
|
|
1068
|
-
For instance the below given table
|
|
1069
|
-
|
|
1070
|
-
+------------+-----------+------------+-----------------+---+---------+
|
|
1071
|
-
|param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...|
|
|
1072
|
-
+============+===========+============+=================+===+=========+
|
|
1073
|
-
| 'poly' | -- | 2 | 0.80 |...| 2 |
|
|
1074
|
-
+------------+-----------+------------+-----------------+---+---------+
|
|
1075
|
-
| 'poly' | -- | 3 | 0.70 |...| 4 |
|
|
1076
|
-
+------------+-----------+------------+-----------------+---+---------+
|
|
1077
|
-
| 'rbf' | 0.1 | -- | 0.80 |...| 3 |
|
|
1078
|
-
+------------+-----------+------------+-----------------+---+---------+
|
|
1079
|
-
| 'rbf' | 0.2 | -- | 0.93 |...| 1 |
|
|
1080
|
-
+------------+-----------+------------+-----------------+---+---------+
|
|
1081
|
-
|
|
1082
|
-
will be represented by a ``cv_results_`` dict of::
|
|
1083
|
-
|
|
1084
|
-
{
|
|
1085
|
-
'param_kernel': masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],
|
|
1086
|
-
mask = [False False False False]...)
|
|
1087
|
-
'param_gamma': masked_array(data = [-- -- 0.1 0.2],
|
|
1088
|
-
mask = [ True True False False]...),
|
|
1089
|
-
'param_degree': masked_array(data = [2.0 3.0 -- --],
|
|
1090
|
-
mask = [False False True True]...),
|
|
1091
|
-
'split0_test_score' : [0.80, 0.70, 0.80, 0.93],
|
|
1092
|
-
'split1_test_score' : [0.82, 0.50, 0.70, 0.78],
|
|
1093
|
-
'mean_test_score' : [0.81, 0.60, 0.75, 0.85],
|
|
1094
|
-
'std_test_score' : [0.01, 0.10, 0.05, 0.08],
|
|
1095
|
-
'rank_test_score' : [2, 4, 3, 1],
|
|
1096
|
-
'split0_train_score' : [0.80, 0.92, 0.70, 0.93],
|
|
1097
|
-
'split1_train_score' : [0.82, 0.55, 0.70, 0.87],
|
|
1098
|
-
'mean_train_score' : [0.81, 0.74, 0.70, 0.90],
|
|
1099
|
-
'std_train_score' : [0.01, 0.19, 0.00, 0.03],
|
|
1100
|
-
'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],
|
|
1101
|
-
'std_fit_time' : [0.01, 0.02, 0.01, 0.01],
|
|
1102
|
-
'mean_score_time' : [0.01, 0.06, 0.04, 0.04],
|
|
1103
|
-
'std_score_time' : [0.00, 0.00, 0.00, 0.01],
|
|
1104
|
-
'params' : [{'kernel': 'poly', 'degree': 2}, ...],
|
|
1105
|
-
}
|
|
1106
|
-
|
|
1107
1060
|
For an example of visualization and interpretation of GridSearch results,
|
|
1108
|
-
see
|
|
1061
|
+
see https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_stats.html#sphx-glr-auto-examples-model-selection-plot-grid-search-stats-py.
|
|
1109
1062
|
|
|
1110
1063
|
NOTE
|
|
1111
1064
|
|
|
1112
1065
|
The key ``'params'`` is used to store a list of parameter
|
|
1113
1066
|
settings dicts for all the parameter candidates.
|
|
1114
1067
|
|
|
1115
|
-
The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and
|
|
1116
|
-
``std_score_time`` are all in seconds.
|
|
1117
|
-
|
|
1118
1068
|
For multi-metric evaluation, the scores for all the scorers are
|
|
1119
1069
|
available in the ``cv_results_`` dict at the keys ending with that
|
|
1120
1070
|
scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown
|
|
@@ -1167,8 +1117,6 @@ class GridSearch(BaseSearch):
|
|
|
1167
1117
|
|
|
1168
1118
|
This is present only if ``refit`` is not False.
|
|
1169
1119
|
|
|
1170
|
-
.. versionadded:: 0.20
|
|
1171
|
-
|
|
1172
1120
|
multimetric_ : bool
|
|
1173
1121
|
Whether or not the scorers compute several metrics.
|
|
1174
1122
|
|
|
@@ -1182,16 +1130,12 @@ class GridSearch(BaseSearch):
|
|
|
1182
1130
|
parameter for more details) and that `best_estimator_` exposes
|
|
1183
1131
|
`n_features_in_` when fit.
|
|
1184
1132
|
|
|
1185
|
-
.. versionadded:: 0.24
|
|
1186
|
-
|
|
1187
1133
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
1188
1134
|
Names of features seen during :term:`fit`. Only defined if
|
|
1189
1135
|
`best_estimator_` is defined (see the documentation for the `refit`
|
|
1190
1136
|
parameter for more details) and that `best_estimator_` exposes
|
|
1191
1137
|
`feature_names_in_` when fit.
|
|
1192
1138
|
|
|
1193
|
-
.. versionadded:: 1.0
|
|
1194
|
-
|
|
1195
1139
|
See Also
|
|
1196
1140
|
--------
|
|
1197
1141
|
ParameterGrid : Generates all the combinations of a hyperparameter grid.
|
|
@@ -1226,11 +1170,11 @@ class GridSearch(BaseSearch):
|
|
|
1226
1170
|
GridSearch(estimator=SVC(),
|
|
1227
1171
|
param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})
|
|
1228
1172
|
>>> sorted(clf.cv_results_.keys())
|
|
1229
|
-
['
|
|
1173
|
+
['mean_test_score',...
|
|
1230
1174
|
'param_C', 'param_kernel', 'params',...
|
|
1231
1175
|
'rank_test_score', 'split0_test_score',...
|
|
1232
1176
|
'split2_test_score', ...
|
|
1233
|
-
'
|
|
1177
|
+
'std_test_score']
|
|
1234
1178
|
"""
|
|
1235
1179
|
|
|
1236
1180
|
_parameter_constraints: dict = {
|
|
@@ -1320,8 +1264,8 @@ class RandomizedSearch(BaseSearch):
|
|
|
1320
1264
|
|
|
1321
1265
|
If `scoring` represents a single score, one can use:
|
|
1322
1266
|
|
|
1323
|
-
- a single string (see
|
|
1324
|
-
- a callable (see
|
|
1267
|
+
- a single string (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-string-names);
|
|
1268
|
+
- a callable (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-callable) that returns a single value;
|
|
1325
1269
|
- `None`, the `estimator`'s default evaluation criterion is used.
|
|
1326
1270
|
|
|
1327
1271
|
If `scoring` represents multiple scores, one can use:
|
|
@@ -1331,7 +1275,7 @@ class RandomizedSearch(BaseSearch):
|
|
|
1331
1275
|
names and the values are the metric scores;
|
|
1332
1276
|
- a dictionary with metric names as keys and callables as values.
|
|
1333
1277
|
|
|
1334
|
-
See
|
|
1278
|
+
See https://scikit-learn.org/stable/modules/grid_search.html#multimetric-grid-search for an example.
|
|
1335
1279
|
|
|
1336
1280
|
If None, the estimator's score method is used.
|
|
1337
1281
|
|
|
@@ -1341,9 +1285,6 @@ class RandomizedSearch(BaseSearch):
|
|
|
1341
1285
|
``-1`` means using all processors.
|
|
1342
1286
|
for more details.
|
|
1343
1287
|
|
|
1344
|
-
.. versionchanged:: v0.20
|
|
1345
|
-
`n_jobs` default changed from 1 to None
|
|
1346
|
-
|
|
1347
1288
|
refit : bool, str, or callable, default=True
|
|
1348
1289
|
Refit an estimator using the best found parameters on the whole
|
|
1349
1290
|
dataset.
|
|
@@ -1413,62 +1354,20 @@ class RandomizedSearch(BaseSearch):
|
|
|
1413
1354
|
expensive and is not strictly required to select the parameters that
|
|
1414
1355
|
yield the best generalization performance.
|
|
1415
1356
|
|
|
1416
|
-
.. versionadded:: 0.19
|
|
1417
|
-
|
|
1418
|
-
.. versionchanged:: 0.21
|
|
1419
|
-
Default value was changed from ``True`` to ``False``
|
|
1420
|
-
|
|
1421
1357
|
Attributes
|
|
1422
1358
|
----------
|
|
1423
1359
|
cv_results_ : dict of numpy (masked) ndarrays
|
|
1424
1360
|
A dict with keys as column headers and values as columns, that can be
|
|
1425
1361
|
imported into a pandas ``DataFrame``.
|
|
1426
1362
|
|
|
1427
|
-
For instance the below given table
|
|
1428
|
-
|
|
1429
|
-
+--------------+-------------+-------------------+---+---------------+
|
|
1430
|
-
| param_kernel | param_gamma | split0_test_score |...|rank_test_score|
|
|
1431
|
-
+==============+=============+===================+===+===============+
|
|
1432
|
-
| 'rbf' | 0.1 | 0.80 |...| 1 |
|
|
1433
|
-
+--------------+-------------+-------------------+---+---------------+
|
|
1434
|
-
| 'rbf' | 0.2 | 0.84 |...| 3 |
|
|
1435
|
-
+--------------+-------------+-------------------+---+---------------+
|
|
1436
|
-
| 'rbf' | 0.3 | 0.70 |...| 2 |
|
|
1437
|
-
+--------------+-------------+-------------------+---+---------------+
|
|
1438
|
-
|
|
1439
|
-
will be represented by a ``cv_results_`` dict of::
|
|
1440
|
-
|
|
1441
|
-
{
|
|
1442
|
-
'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],
|
|
1443
|
-
mask = False),
|
|
1444
|
-
'param_gamma' : masked_array(data = [0.1 0.2 0.3], mask = False),
|
|
1445
|
-
'split0_test_score' : [0.80, 0.84, 0.70],
|
|
1446
|
-
'split1_test_score' : [0.82, 0.50, 0.70],
|
|
1447
|
-
'mean_test_score' : [0.81, 0.67, 0.70],
|
|
1448
|
-
'std_test_score' : [0.01, 0.24, 0.00],
|
|
1449
|
-
'rank_test_score' : [1, 3, 2],
|
|
1450
|
-
'split0_train_score' : [0.80, 0.92, 0.70],
|
|
1451
|
-
'split1_train_score' : [0.82, 0.55, 0.70],
|
|
1452
|
-
'mean_train_score' : [0.81, 0.74, 0.70],
|
|
1453
|
-
'std_train_score' : [0.01, 0.19, 0.00],
|
|
1454
|
-
'mean_fit_time' : [0.73, 0.63, 0.43],
|
|
1455
|
-
'std_fit_time' : [0.01, 0.02, 0.01],
|
|
1456
|
-
'mean_score_time' : [0.01, 0.06, 0.04],
|
|
1457
|
-
'std_score_time' : [0.00, 0.00, 0.00],
|
|
1458
|
-
'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],
|
|
1459
|
-
}
|
|
1460
|
-
|
|
1461
1363
|
For an example of analysing ``cv_results_``,
|
|
1462
|
-
see
|
|
1364
|
+
see https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_stats.html#sphx-glr-auto-examples-model-selection-plot-grid-search-stats-py.
|
|
1463
1365
|
|
|
1464
1366
|
NOTE
|
|
1465
1367
|
|
|
1466
1368
|
The key ``'params'`` is used to store a list of parameter
|
|
1467
1369
|
settings dicts for all the parameter candidates.
|
|
1468
1370
|
|
|
1469
|
-
The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and
|
|
1470
|
-
``std_score_time`` are all in seconds.
|
|
1471
|
-
|
|
1472
1371
|
For multi-metric evaluation, the scores for all the scorers are
|
|
1473
1372
|
available in the ``cv_results_`` dict at the keys ending with that
|
|
1474
1373
|
scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown
|
|
@@ -1524,8 +1423,6 @@ class RandomizedSearch(BaseSearch):
|
|
|
1524
1423
|
|
|
1525
1424
|
This is present only if ``refit`` is not False.
|
|
1526
1425
|
|
|
1527
|
-
.. versionadded:: 0.20
|
|
1528
|
-
|
|
1529
1426
|
multimetric_ : bool
|
|
1530
1427
|
Whether or not the scorers compute several metrics.
|
|
1531
1428
|
|
|
@@ -1539,16 +1436,12 @@ class RandomizedSearch(BaseSearch):
|
|
|
1539
1436
|
parameter for more details) and that `best_estimator_` exposes
|
|
1540
1437
|
`n_features_in_` when fit.
|
|
1541
1438
|
|
|
1542
|
-
.. versionadded:: 0.24
|
|
1543
|
-
|
|
1544
1439
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
1545
1440
|
Names of features seen during :term:`fit`. Only defined if
|
|
1546
1441
|
`best_estimator_` is defined (see the documentation for the `refit`
|
|
1547
1442
|
parameter for more details) and that `best_estimator_` exposes
|
|
1548
1443
|
`feature_names_in_` when fit.
|
|
1549
1444
|
|
|
1550
|
-
.. versionadded:: 1.0
|
|
1551
|
-
|
|
1552
1445
|
See Also
|
|
1553
1446
|
--------
|
|
1554
1447
|
GridSearch : Does exhaustive search over a grid of parameters.
|