panelsplit 2.0.4.dev0__tar.gz → 2.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.github/workflows/ci.yml +1 -1
  2. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/PKG-INFO +4 -4
  3. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/README.md +1 -1
  4. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/metrics.py +152 -37
  5. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/model_selection/model_selection.py +10 -117
  6. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/pipeline.py +57 -25
  7. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/pyproject.toml +2 -2
  8. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_metrics.py +8 -0
  9. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_pipeline.py +21 -0
  10. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_search.py +18 -0
  11. panelsplit-2.0.5/tests/test_sequentialcvpipeline_indices.py +148 -0
  12. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/uv.lock +1434 -1959
  13. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.github/workflows/lint.yml +0 -0
  14. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.github/workflows/pre-commit.yml +0 -0
  15. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.github/workflows/releases.yml +0 -0
  16. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.gitignore +0 -0
  17. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/.pre-commit-config.yaml +0 -0
  18. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/CHANGELOG.md +0 -0
  19. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/CITATION.cff +0 -0
  20. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/CNAME +0 -0
  21. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/CODE_OF_CONDUCT.md +0 -0
  22. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/LICENSE +0 -0
  23. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/examples/An introduction to PanelSplit.ipynb +0 -0
  24. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/__init__.py +0 -0
  25. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/application.py +0 -0
  26. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/cross_validation.py +0 -0
  27. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/model_selection/__init__.py +0 -0
  28. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/model_selection/_validation.py +0 -0
  29. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/plot.py +0 -0
  30. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/__init__.py +0 -0
  31. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/_response.py +0 -0
  32. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/typing.py +0 -0
  33. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/utils.py +0 -0
  34. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/panelsplit/utils/validation.py +0 -0
  35. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/__init__.py +0 -0
  36. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/df_generation.py +0 -0
  37. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_PanelSplit.py +0 -0
  38. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_check_fitted_fix.py +0 -0
  39. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_cross_validation.py +0 -0
  40. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_edge_cases.py +0 -0
  41. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_issue_59_fix.py +0 -0
  42. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_narwhals_compatibility.py +0 -0
  43. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_plot.py +0 -0
  44. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_scorer.py +0 -0
  45. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_set_params.py +0 -0
  46. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_utils.py +0 -0
  47. {panelsplit-2.0.4.dev0 → panelsplit-2.0.5}/tests/test_validation_coverage.py +0 -0
@@ -14,7 +14,7 @@ jobs:
14
14
 
15
15
  strategy:
16
16
  matrix:
17
- python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
17
+ python-version: ["3.11", "3.12", "3.13", "3.14"]
18
18
  fail-fast: true
19
19
 
20
20
  steps:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: panelsplit
3
- Version: 2.0.4.dev0
3
+ Version: 2.0.5
4
4
  Summary: A tool for panel data analysis.
5
5
  Project-URL: Homepage, https://github.com/4Freye/panelsplit
6
6
  Project-URL: Repository, https://github.com/4Freye/panelsplit
@@ -11,13 +11,13 @@ License-File: LICENSE
11
11
  Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
- Requires-Python: >=3.10
14
+ Requires-Python: >=3.11
15
15
  Requires-Dist: joblib>=1.0.1
16
16
  Requires-Dist: matplotlib>=3.4.3
17
17
  Requires-Dist: narwhals>=1.42.1
18
18
  Requires-Dist: numpy>=1.21.0
19
19
  Requires-Dist: pandas>=1.3.0
20
- Requires-Dist: scikit-learn>=0.24.2
20
+ Requires-Dist: scikit-learn>=1.8.0
21
21
  Requires-Dist: scipy>=1.10.1
22
22
  Requires-Dist: tqdm>=4.67.1
23
23
  Requires-Dist: typing-extensions>=4.13.2
@@ -32,7 +32,7 @@ panelsplit is a Python package designed to facilitate time series cross-validati
32
32
 
33
33
  ## Installation
34
34
 
35
- panelsplit is tested for compatibility with python versions >= 3.10. You can install panelsplit using pip:
35
+ panelsplit is tested for compatibility with python versions >= 3.11. You can install panelsplit using pip:
36
36
 
37
37
  ```bash
38
38
  pip install panelsplit
@@ -7,7 +7,7 @@ panelsplit is a Python package designed to facilitate time series cross-validati
7
7
 
8
8
  ## Installation
9
9
 
10
- panelsplit is tested for compatibility with python versions >= 3.10. You can install panelsplit using pip:
10
+ panelsplit is tested for compatibility with python versions >= 3.11. You can install panelsplit using pip:
11
11
 
12
12
  ```bash
13
13
  pip install panelsplit
@@ -1,37 +1,42 @@
1
- from .utils.validation import _safe_indexing
1
+ """
2
+ Metrics that are equivalent their sklearn counterparts, except for the fact that they work with SequentialCVPipeline.
3
+ """
4
+
5
+ # Standard library
6
+ import warnings
2
7
  from inspect import signature
3
8
  from collections.abc import Iterable
4
9
  from functools import partial
5
- from sklearn.metrics._scorer import _MultimetricScorer
6
- from sklearn.utils._param_validation import (
7
- validate_params,
8
- )
9
- from sklearn.metrics._scorer import _PassthroughScorer, _get_response_method_name
10
10
  from copy import deepcopy
11
- from sklearn.utils.validation import _check_response_method
12
- import warnings
13
- from sklearn.base import is_regressor
14
- from panelsplit.utils._response import _get_response_values
15
- from sklearn.utils.metadata_routing import (
16
- _MetadataRequester,
17
- _raise_for_params,
18
- _routing_enabled,
19
- MetadataRequest,
20
- )
21
- from .utils.typing import EstimatorLike, ArrayLike
22
- from numpy.typing import NDArray
23
11
  from typing import Callable, Optional, List, Union, Any, Dict
12
+
13
+ # Third-party / typing
24
14
  from typing_extensions import Self
15
+ from numpy.typing import NDArray
25
16
 
26
- # all the error scores:
17
+ # Local package utilities
18
+ from .utils.validation import _safe_indexing
19
+ from .utils.typing import EstimatorLike, ArrayLike
20
+ from panelsplit.utils._response import _get_response_values
21
+
22
+ # sklearn public metrics (single consolidated import)
27
23
  from sklearn.metrics import (
28
24
  accuracy_score,
25
+ adjusted_mutual_info_score,
26
+ adjusted_rand_score,
29
27
  average_precision_score,
30
28
  balanced_accuracy_score,
31
29
  brier_score_loss,
32
30
  class_likelihood_ratios,
31
+ completeness_score,
33
32
  d2_absolute_error_score,
33
+ d2_brier_score,
34
+ d2_log_loss_score,
34
35
  explained_variance_score,
36
+ f1_score,
37
+ fowlkes_mallows_score,
38
+ jaccard_score,
39
+ homogeneity_score,
35
40
  log_loss,
36
41
  matthews_corrcoef,
37
42
  max_error,
@@ -42,24 +47,37 @@ from sklearn.metrics import (
42
47
  mean_squared_error,
43
48
  mean_squared_log_error,
44
49
  median_absolute_error,
50
+ mutual_info_score,
51
+ normalized_mutual_info_score,
52
+ precision_score,
53
+ rand_score,
45
54
  r2_score,
55
+ recall_score,
46
56
  roc_auc_score,
47
57
  root_mean_squared_error,
48
58
  root_mean_squared_log_error,
49
59
  top_k_accuracy_score,
50
- )
51
- from sklearn.metrics.cluster import (
52
- adjusted_mutual_info_score,
53
- adjusted_rand_score,
54
- completeness_score,
55
- fowlkes_mallows_score,
56
- homogeneity_score,
57
- mutual_info_score,
58
- normalized_mutual_info_score,
59
- rand_score,
60
60
  v_measure_score,
61
61
  )
62
62
 
63
+ # sklearn internals / utilities (note: private APIs)
64
+ from sklearn.metrics._scorer import (
65
+ _MultimetricScorer,
66
+ _PassthroughScorer,
67
+ _get_response_method_name,
68
+ )
69
+ from sklearn.utils._param_validation import validate_params
70
+ from sklearn.utils.validation import _check_response_method
71
+ from sklearn.base import is_regressor
72
+
73
+ # metadata routing utilities (used by some sklearn internals)
74
+ from sklearn.utils.metadata_routing import (
75
+ _MetadataRequester,
76
+ _raise_for_params,
77
+ _routing_enabled,
78
+ MetadataRequest,
79
+ )
80
+
63
81
 
64
82
  def _get_idx_from_last_cv(estimator: EstimatorLike) -> Union[None, List[NDArray]]:
65
83
  """
@@ -88,14 +106,63 @@ def make_SequentialCV_scorer(
88
106
  greater_is_better: bool = True,
89
107
  **kwargs: Any,
90
108
  ) -> Callable[..., float]:
109
+ """
110
+ Make a SequentialCVPipeline-compatible scorer from a performance metric.
111
+
112
+ A scorer is a wrapper around an arbitrary metric or loss function that is called
113
+ with the signature `scorer(estimator, X, y_true, **kwargs)`.
114
+
115
+ The parameter `response_method` allows to specify which method of the estimator
116
+ should be used to feed the scoring/loss function.
117
+
118
+ Parameters
119
+ ----------
120
+ score_func : callable
121
+ Score function (or loss function) with signature
122
+ ``score_func(y, y_pred, **kwargs)``.
123
+
124
+ response_method : {"predict_proba", "decision_function", "predict"} or \
125
+ list/tuple of such str, default="predict"
126
+
127
+ Specifies the response method to use get prediction from an estimator
128
+ (i.e. :term:`predict_proba`, :term:`decision_function` or
129
+ :term:`predict`). Possible choices are:
130
+
131
+ - if `str`, it corresponds to the name to the method to return;
132
+ - if a list or tuple of `str`, it provides the method names in order of
133
+ preference. The method returned corresponds to the first method in
134
+ the list and which is implemented by `estimator`.
135
+
136
+ greater_is_better : bool, default=True
137
+ Whether `score_func` is a score function (default), meaning high is
138
+ good, or a loss function, meaning low is good. In the latter case, the
139
+ scorer object will sign-flip the outcome of the `score_func`.
140
+
141
+ **kwargs : additional arguments
142
+ Additional parameters to be passed to `score_func`.
143
+
144
+ Returns
145
+ -------
146
+ Callable
147
+ Callable object that returns a scalar score; greater is better.
148
+
149
+ Examples
150
+ --------
151
+ >>> from panelsplit.metrics import make_SequentialCV_scorer
152
+ >>> from sklearn.metrics import brier_score_loss
153
+ >>> brier_loss_scorer= make_SequentialCV_scorer(brier_score_loss, response_method='predict_proba', greater_is_better=False)
154
+
155
+ >>> from panelsplit.pipeline import SequentialCVPipeline
156
+ >>> from sklearn.ensemble import RandomForestClassifier
157
+ >>> from sklearn.datasets import load_iris
158
+ >>> X, y = load_iris(return_X_y=True)
159
+ >>> p = SequentialCVPipeline(steps = [('rf', RandomForestClassifier())], cv_steps = [None])
160
+ >>> p.fit(X, y)
161
+ >>> brier_loss_scorer(p, X, y)
162
+ """
91
163
  sign = 1 if greater_is_better else -1
92
164
 
93
165
  if response_method is None:
94
- warnings.warn(
95
- "response_method=None is deprecated in version 1.6 and will be removed "
96
- "in version 1.8. Leave it to its default value to avoid this warning.",
97
- FutureWarning,
98
- )
99
166
  response_method = "predict"
100
167
  elif response_method == "default":
101
168
  response_method = "predict"
@@ -158,7 +225,6 @@ class _BaseScorer(_MetadataRequester):
158
225
  self._sign = sign
159
226
  self._kwargs = kwargs
160
227
  self._response_method = response_method
161
- # TODO (1.8): remove in 1.8 (scoring="max_error" has been deprecated in 1.6)
162
228
  self._deprecation_msg = None
163
229
 
164
230
  def _get_pos_label(self) -> Optional[Any]:
@@ -170,7 +236,6 @@ class _BaseScorer(_MetadataRequester):
170
236
  return None
171
237
 
172
238
  def _accept_sample_weight(self) -> bool:
173
- # TODO(slep006): remove when metadata routing is the only way
174
239
  return "sample_weight" in signature(self._score_func).parameters
175
240
 
176
241
  def __repr__(self) -> str:
@@ -217,7 +282,6 @@ class _BaseScorer(_MetadataRequester):
217
282
  float
218
283
  Score function applied to prediction of estimator on X.
219
284
  """
220
- # TODO (1.8): remove in 1.8 (scoring="max_error" has been deprecated in 1.6)
221
285
  if self._deprecation_msg is not None:
222
286
  warnings.warn(
223
287
  self._deprecation_msg, category=DeprecationWarning, stacklevel=2
@@ -314,6 +378,7 @@ class _Scorer(_BaseScorer):
314
378
  X,
315
379
  pos_label=pos_label,
316
380
  )
381
+
317
382
  # make lookup dict for fast matching
318
383
  pred_dict = dict(zip(idx, y_pred))
319
384
 
@@ -340,6 +405,36 @@ class _Scorer(_BaseScorer):
340
405
  prefer_skip_nested_validation=True,
341
406
  )
342
407
  def get_scorer(scoring: Union[str, Callable]) -> Any:
408
+ """
409
+ Get a scorer from string.
410
+
411
+ `sklearn.metrics.get_scorer_names` can be used to retrieve the names
412
+ of all available scorers.
413
+
414
+ Parameters
415
+ ----------
416
+ scoring : str, callable or None
417
+ Scoring method as string. If callable it is returned as is.
418
+ If None, returns None.
419
+
420
+ Returns
421
+ -------
422
+ callable
423
+ The scorer.
424
+
425
+ Notes
426
+ -----
427
+ When passed a string, this function always returns a copy of the scorer
428
+ object. Calling `get_scorer` twice for the same scorer results in two
429
+ separate scorer objects.
430
+
431
+ Examples
432
+ --------
433
+ >>> from panelsplit.metrics import get_scorer
434
+ >>> accuracy = get_scorer("accuracy")
435
+ >>> accuracy(classifier, X, y)
436
+ """
437
+
343
438
  if isinstance(scoring, str):
344
439
  try:
345
440
  scorer = deepcopy(_SCORERS[scoring])
@@ -489,7 +584,11 @@ neg_mean_poisson_deviance_scorer = make_SequentialCV_scorer(
489
584
  neg_mean_gamma_deviance_scorer = make_SequentialCV_scorer(
490
585
  mean_gamma_deviance, greater_is_better=False
491
586
  )
587
+ # D^2 scorers (fraction of explained Brier / log-loss)
492
588
  d2_absolute_error_scorer = make_SequentialCV_scorer(d2_absolute_error_score)
589
+ d2_brier_scorer = make_SequentialCV_scorer(d2_brier_score)
590
+ d2_log_loss_scorer = make_SequentialCV_scorer(d2_log_loss_score)
591
+
493
592
 
494
593
  # Standard Classification Scores
495
594
  accuracy_scorer = make_SequentialCV_scorer(accuracy_score)
@@ -583,6 +682,8 @@ _SCORERS = dict(
583
682
  neg_mean_poisson_deviance=neg_mean_poisson_deviance_scorer,
584
683
  neg_mean_gamma_deviance=neg_mean_gamma_deviance_scorer,
585
684
  d2_absolute_error_score=d2_absolute_error_scorer,
685
+ d2_brier_score=d2_brier_scorer,
686
+ d2_log_loss_score=d2_log_loss_scorer,
586
687
  accuracy=accuracy_scorer,
587
688
  top_k_accuracy=top_k_accuracy_scorer,
588
689
  roc_auc=roc_auc_scorer,
@@ -607,3 +708,17 @@ _SCORERS = dict(
607
708
  normalized_mutual_info_score=normalized_mutual_info_scorer,
608
709
  fowlkes_mallows_score=fowlkes_mallows_scorer,
609
710
  )
711
+
712
+
713
+ for name, metric in [
714
+ ("precision", precision_score),
715
+ ("recall", recall_score),
716
+ ("f1", f1_score),
717
+ ("jaccard", jaccard_score),
718
+ ]:
719
+ _SCORERS[name] = make_SequentialCV_scorer(metric, average="binary")
720
+ for average in ["macro", "micro", "samples", "weighted"]:
721
+ qualified_name = "{0}_{1}".format(name, average)
722
+ _SCORERS[qualified_name] = make_SequentialCV_scorer(
723
+ metric, pos_label=None, average=average
724
+ )
@@ -970,8 +970,8 @@ class GridSearch(BaseSearch):
970
970
 
971
971
  If `scoring` represents a single score, one can use:
972
972
 
973
- - a single string (see :ref:`scoring_string_names`);
974
- - a callable (see :ref:`scoring_callable`) that returns a single value;
973
+ - a single string (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-string-names);
974
+ - a callable (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-callable) that returns a single value;
975
975
  - `None`, the `estimator`'s default evaluation criterion is used.
976
976
 
977
977
  If `scoring` represents multiple scores, one can use:
@@ -981,16 +981,13 @@ class GridSearch(BaseSearch):
981
981
  names and the values are the metric scores;
982
982
  - a dictionary with metric names as keys and callables as values.
983
983
 
984
- See :ref:`multimetric_grid_search` for an example.
984
+ See https://scikit-learn.org/stable/modules/grid_search.html#multimetric-grid-search for an example.
985
985
 
986
986
  n_jobs : int, default=None
987
987
  Number of jobs to run in parallel.
988
988
  ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
989
989
  ``-1`` means using all processors.
990
990
 
991
- .. versionchanged:: v0.20
992
- `n_jobs` default changed from 1 to None
993
-
994
991
  refit : bool, str, or callable, default=True
995
992
  Refit an estimator using the best found parameters on the whole
996
993
  dataset.
@@ -1054,67 +1051,20 @@ class GridSearch(BaseSearch):
1054
1051
  expensive and is not strictly required to select the parameters that
1055
1052
  yield the best generalization performance.
1056
1053
 
1057
- .. versionadded:: 0.19
1058
-
1059
- .. versionchanged:: 0.21
1060
- Default value was changed from ``True`` to ``False``
1061
-
1062
1054
  Attributes
1063
1055
  ----------
1064
1056
  cv_results_ : dict of numpy (masked) ndarrays
1065
1057
  A dict with keys as column headers and values as columns, that can be
1066
1058
  imported into a pandas ``DataFrame``.
1067
1059
 
1068
- For instance the below given table
1069
-
1070
- +------------+-----------+------------+-----------------+---+---------+
1071
- |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...|
1072
- +============+===========+============+=================+===+=========+
1073
- | 'poly' | -- | 2 | 0.80 |...| 2 |
1074
- +------------+-----------+------------+-----------------+---+---------+
1075
- | 'poly' | -- | 3 | 0.70 |...| 4 |
1076
- +------------+-----------+------------+-----------------+---+---------+
1077
- | 'rbf' | 0.1 | -- | 0.80 |...| 3 |
1078
- +------------+-----------+------------+-----------------+---+---------+
1079
- | 'rbf' | 0.2 | -- | 0.93 |...| 1 |
1080
- +------------+-----------+------------+-----------------+---+---------+
1081
-
1082
- will be represented by a ``cv_results_`` dict of::
1083
-
1084
- {
1085
- 'param_kernel': masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],
1086
- mask = [False False False False]...)
1087
- 'param_gamma': masked_array(data = [-- -- 0.1 0.2],
1088
- mask = [ True True False False]...),
1089
- 'param_degree': masked_array(data = [2.0 3.0 -- --],
1090
- mask = [False False True True]...),
1091
- 'split0_test_score' : [0.80, 0.70, 0.80, 0.93],
1092
- 'split1_test_score' : [0.82, 0.50, 0.70, 0.78],
1093
- 'mean_test_score' : [0.81, 0.60, 0.75, 0.85],
1094
- 'std_test_score' : [0.01, 0.10, 0.05, 0.08],
1095
- 'rank_test_score' : [2, 4, 3, 1],
1096
- 'split0_train_score' : [0.80, 0.92, 0.70, 0.93],
1097
- 'split1_train_score' : [0.82, 0.55, 0.70, 0.87],
1098
- 'mean_train_score' : [0.81, 0.74, 0.70, 0.90],
1099
- 'std_train_score' : [0.01, 0.19, 0.00, 0.03],
1100
- 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],
1101
- 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],
1102
- 'mean_score_time' : [0.01, 0.06, 0.04, 0.04],
1103
- 'std_score_time' : [0.00, 0.00, 0.00, 0.01],
1104
- 'params' : [{'kernel': 'poly', 'degree': 2}, ...],
1105
- }
1106
-
1107
1060
  For an example of visualization and interpretation of GridSearch results,
1108
- see :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_stats.py`.
1061
+ see https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_stats.html#sphx-glr-auto-examples-model-selection-plot-grid-search-stats-py.
1109
1062
 
1110
1063
  NOTE
1111
1064
 
1112
1065
  The key ``'params'`` is used to store a list of parameter
1113
1066
  settings dicts for all the parameter candidates.
1114
1067
 
1115
- The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and
1116
- ``std_score_time`` are all in seconds.
1117
-
1118
1068
  For multi-metric evaluation, the scores for all the scorers are
1119
1069
  available in the ``cv_results_`` dict at the keys ending with that
1120
1070
  scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown
@@ -1167,8 +1117,6 @@ class GridSearch(BaseSearch):
1167
1117
 
1168
1118
  This is present only if ``refit`` is not False.
1169
1119
 
1170
- .. versionadded:: 0.20
1171
-
1172
1120
  multimetric_ : bool
1173
1121
  Whether or not the scorers compute several metrics.
1174
1122
 
@@ -1182,16 +1130,12 @@ class GridSearch(BaseSearch):
1182
1130
  parameter for more details) and that `best_estimator_` exposes
1183
1131
  `n_features_in_` when fit.
1184
1132
 
1185
- .. versionadded:: 0.24
1186
-
1187
1133
  feature_names_in_ : ndarray of shape (`n_features_in_`,)
1188
1134
  Names of features seen during :term:`fit`. Only defined if
1189
1135
  `best_estimator_` is defined (see the documentation for the `refit`
1190
1136
  parameter for more details) and that `best_estimator_` exposes
1191
1137
  `feature_names_in_` when fit.
1192
1138
 
1193
- .. versionadded:: 1.0
1194
-
1195
1139
  See Also
1196
1140
  --------
1197
1141
  ParameterGrid : Generates all the combinations of a hyperparameter grid.
@@ -1226,11 +1170,11 @@ class GridSearch(BaseSearch):
1226
1170
  GridSearch(estimator=SVC(),
1227
1171
  param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})
1228
1172
  >>> sorted(clf.cv_results_.keys())
1229
- ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
1173
+ ['mean_test_score',...
1230
1174
  'param_C', 'param_kernel', 'params',...
1231
1175
  'rank_test_score', 'split0_test_score',...
1232
1176
  'split2_test_score', ...
1233
- 'std_fit_time', 'std_score_time', 'std_test_score']
1177
+ 'std_test_score']
1234
1178
  """
1235
1179
 
1236
1180
  _parameter_constraints: dict = {
@@ -1320,8 +1264,8 @@ class RandomizedSearch(BaseSearch):
1320
1264
 
1321
1265
  If `scoring` represents a single score, one can use:
1322
1266
 
1323
- - a single string (see :ref:`scoring_string_names`);
1324
- - a callable (see :ref:`scoring_callable`) that returns a single value;
1267
+ - a single string (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-string-names);
1268
+ - a callable (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-callable) that returns a single value;
1325
1269
  - `None`, the `estimator`'s default evaluation criterion is used.
1326
1270
 
1327
1271
  If `scoring` represents multiple scores, one can use:
@@ -1331,7 +1275,7 @@ class RandomizedSearch(BaseSearch):
1331
1275
  names and the values are the metric scores;
1332
1276
  - a dictionary with metric names as keys and callables as values.
1333
1277
 
1334
- See :ref:`multimetric_grid_search` for an example.
1278
+ See https://scikit-learn.org/stable/modules/grid_search.html#multimetric-grid-search for an example.
1335
1279
 
1336
1280
  If None, the estimator's score method is used.
1337
1281
 
@@ -1341,9 +1285,6 @@ class RandomizedSearch(BaseSearch):
1341
1285
  ``-1`` means using all processors.
1342
1286
  for more details.
1343
1287
 
1344
- .. versionchanged:: v0.20
1345
- `n_jobs` default changed from 1 to None
1346
-
1347
1288
  refit : bool, str, or callable, default=True
1348
1289
  Refit an estimator using the best found parameters on the whole
1349
1290
  dataset.
@@ -1413,62 +1354,20 @@ class RandomizedSearch(BaseSearch):
1413
1354
  expensive and is not strictly required to select the parameters that
1414
1355
  yield the best generalization performance.
1415
1356
 
1416
- .. versionadded:: 0.19
1417
-
1418
- .. versionchanged:: 0.21
1419
- Default value was changed from ``True`` to ``False``
1420
-
1421
1357
  Attributes
1422
1358
  ----------
1423
1359
  cv_results_ : dict of numpy (masked) ndarrays
1424
1360
  A dict with keys as column headers and values as columns, that can be
1425
1361
  imported into a pandas ``DataFrame``.
1426
1362
 
1427
- For instance the below given table
1428
-
1429
- +--------------+-------------+-------------------+---+---------------+
1430
- | param_kernel | param_gamma | split0_test_score |...|rank_test_score|
1431
- +==============+=============+===================+===+===============+
1432
- | 'rbf' | 0.1 | 0.80 |...| 1 |
1433
- +--------------+-------------+-------------------+---+---------------+
1434
- | 'rbf' | 0.2 | 0.84 |...| 3 |
1435
- +--------------+-------------+-------------------+---+---------------+
1436
- | 'rbf' | 0.3 | 0.70 |...| 2 |
1437
- +--------------+-------------+-------------------+---+---------------+
1438
-
1439
- will be represented by a ``cv_results_`` dict of::
1440
-
1441
- {
1442
- 'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],
1443
- mask = False),
1444
- 'param_gamma' : masked_array(data = [0.1 0.2 0.3], mask = False),
1445
- 'split0_test_score' : [0.80, 0.84, 0.70],
1446
- 'split1_test_score' : [0.82, 0.50, 0.70],
1447
- 'mean_test_score' : [0.81, 0.67, 0.70],
1448
- 'std_test_score' : [0.01, 0.24, 0.00],
1449
- 'rank_test_score' : [1, 3, 2],
1450
- 'split0_train_score' : [0.80, 0.92, 0.70],
1451
- 'split1_train_score' : [0.82, 0.55, 0.70],
1452
- 'mean_train_score' : [0.81, 0.74, 0.70],
1453
- 'std_train_score' : [0.01, 0.19, 0.00],
1454
- 'mean_fit_time' : [0.73, 0.63, 0.43],
1455
- 'std_fit_time' : [0.01, 0.02, 0.01],
1456
- 'mean_score_time' : [0.01, 0.06, 0.04],
1457
- 'std_score_time' : [0.00, 0.00, 0.00],
1458
- 'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],
1459
- }
1460
-
1461
1363
  For an example of analysing ``cv_results_``,
1462
- see :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_stats.py`.
1364
+ see https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_stats.html#sphx-glr-auto-examples-model-selection-plot-grid-search-stats-py.
1463
1365
 
1464
1366
  NOTE
1465
1367
 
1466
1368
  The key ``'params'`` is used to store a list of parameter
1467
1369
  settings dicts for all the parameter candidates.
1468
1370
 
1469
- The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and
1470
- ``std_score_time`` are all in seconds.
1471
-
1472
1371
  For multi-metric evaluation, the scores for all the scorers are
1473
1372
  available in the ``cv_results_`` dict at the keys ending with that
1474
1373
  scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown
@@ -1524,8 +1423,6 @@ class RandomizedSearch(BaseSearch):
1524
1423
 
1525
1424
  This is present only if ``refit`` is not False.
1526
1425
 
1527
- .. versionadded:: 0.20
1528
-
1529
1426
  multimetric_ : bool
1530
1427
  Whether or not the scorers compute several metrics.
1531
1428
 
@@ -1539,16 +1436,12 @@ class RandomizedSearch(BaseSearch):
1539
1436
  parameter for more details) and that `best_estimator_` exposes
1540
1437
  `n_features_in_` when fit.
1541
1438
 
1542
- .. versionadded:: 0.24
1543
-
1544
1439
  feature_names_in_ : ndarray of shape (`n_features_in_`,)
1545
1440
  Names of features seen during :term:`fit`. Only defined if
1546
1441
  `best_estimator_` is defined (see the documentation for the `refit`
1547
1442
  parameter for more details) and that `best_estimator_` exposes
1548
1443
  `feature_names_in_` when fit.
1549
1444
 
1550
- .. versionadded:: 1.0
1551
-
1552
1445
  See Also
1553
1446
  --------
1554
1447
  GridSearch : Does exhaustive search over a grid of parameters.