panelsplit 2.0.5.dev0__tar.gz → 2.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/.github/workflows/ci.yml +1 -1
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/CODE_OF_CONDUCT.md +3 -2
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/PKG-INFO +29 -5
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/README.md +25 -1
- panelsplit-2.1.1/examples/An introduction to PanelSplit.ipynb +1027 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/cross_validation.py +83 -9
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/metrics.py +152 -37
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/model_selection/model_selection.py +10 -117
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/pipeline.py +57 -25
- panelsplit-2.1.1/panelsplit/plot.py +168 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/utils/validation.py +32 -9
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/pyproject.toml +3 -3
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_metrics.py +8 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_pipeline.py +21 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_search.py +18 -0
- panelsplit-2.1.1/tests/test_sequentialcvpipeline_indices.py +148 -0
- panelsplit-2.1.1/tests/test_spatial_cv.py +119 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/uv.lock +1434 -1959
- panelsplit-2.0.5.dev0/examples/An introduction to PanelSplit.ipynb +0 -1576
- panelsplit-2.0.5.dev0/panelsplit/plot.py +0 -66
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/.github/workflows/lint.yml +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/.github/workflows/pre-commit.yml +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/.github/workflows/releases.yml +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/.gitignore +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/.pre-commit-config.yaml +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/CHANGELOG.md +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/CITATION.cff +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/CNAME +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/LICENSE +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/__init__.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/application.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/model_selection/__init__.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/model_selection/_validation.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/utils/__init__.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/utils/_response.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/utils/typing.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/panelsplit/utils/utils.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/__init__.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/df_generation.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_PanelSplit.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_check_fitted_fix.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_cross_validation.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_edge_cases.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_issue_59_fix.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_narwhals_compatibility.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_plot.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_scorer.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_set_params.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_utils.py +0 -0
- {panelsplit-2.0.5.dev0 → panelsplit-2.1.1}/tests/test_validation_coverage.py +0 -0
|
@@ -31,8 +31,9 @@ from the organisation. Some examples of poor etiquette are:
|
|
|
31
31
|
|
|
32
32
|
For more context, you can check out this blog post on [The Cost of AI in Open Source Maintenance](https://adrin.info/the-cost-of-ai-in-open-source-maintenance.html).
|
|
33
33
|
|
|
34
|
-
If this happens to you and you believe it's been a mistake, you can reach out
|
|
35
|
-
|
|
34
|
+
If this happens to you and you believe it's been a mistake, you can reach out by opening a
|
|
35
|
+
[GitHub Discussion](https://github.com/4Freye/panelsplit/discussions) or contacting the maintainer
|
|
36
|
+
via [GitHub](https://github.com/4Freye).
|
|
36
37
|
|
|
37
38
|
*This code of conduct is adapted from the scikit-learn's code of conduct, which itself follows the Python Software Foundation's Code of Conduct.
|
|
38
39
|
For the original version, see the scikit-learn repository on GitHub: <https://github.com/scikit-learn/scikit-learn/blob/main/CODE_OF_CONDUCT.md>
|
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: panelsplit
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.1
|
|
4
4
|
Summary: A tool for panel data analysis.
|
|
5
5
|
Project-URL: Homepage, https://github.com/4Freye/panelsplit
|
|
6
6
|
Project-URL: Repository, https://github.com/4Freye/panelsplit
|
|
7
7
|
Project-URL: Documentation, https://4freye.github.io/panelsplit/panelsplit.html
|
|
8
|
-
Author
|
|
8
|
+
Author: panelsplit developers
|
|
9
9
|
License: MIT
|
|
10
10
|
License-File: LICENSE
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
12
|
Classifier: Operating System :: OS Independent
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Requires-Python: >=3.
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
15
|
Requires-Dist: joblib>=1.0.1
|
|
16
16
|
Requires-Dist: matplotlib>=3.4.3
|
|
17
17
|
Requires-Dist: narwhals>=1.42.1
|
|
18
18
|
Requires-Dist: numpy>=1.21.0
|
|
19
19
|
Requires-Dist: pandas>=1.3.0
|
|
20
|
-
Requires-Dist: scikit-learn>=
|
|
20
|
+
Requires-Dist: scikit-learn>=1.8.0
|
|
21
21
|
Requires-Dist: scipy>=1.10.1
|
|
22
22
|
Requires-Dist: tqdm>=4.67.1
|
|
23
23
|
Requires-Dist: typing-extensions>=4.13.2
|
|
@@ -32,7 +32,7 @@ panelsplit is a Python package designed to facilitate time series cross-validati
|
|
|
32
32
|
|
|
33
33
|
## Installation
|
|
34
34
|
|
|
35
|
-
panelsplit is tested for compatibility with python versions >= 3.
|
|
35
|
+
panelsplit is tested for compatibility with python versions >= 3.11. You can install panelsplit using pip:
|
|
36
36
|
|
|
37
37
|
```bash
|
|
38
38
|
pip install panelsplit
|
|
@@ -73,6 +73,30 @@ for train_idx, test_idx in splits:
|
|
|
73
73
|
print("Test:"); display(panel_data.loc[test_idx])
|
|
74
74
|
```
|
|
75
75
|
|
|
76
|
+
### Spatio-Temporal Cross-Validation
|
|
77
|
+
|
|
78
|
+
panelsplit can also handle combined spatio-temporal holdouts by factoring in entity hierarchies (e.g., states or cities) to prevent cluster-level leakage. You can simultaneously validate on unobserved time periods *and* structurally unobserved groups:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from sklearn.model_selection import StratifiedGroupKFold
|
|
82
|
+
|
|
83
|
+
# Create spatial splits that evaluate cluster-level combinations robustly:
|
|
84
|
+
panel_split = PanelSplit(
|
|
85
|
+
periods=panel_data.year,
|
|
86
|
+
n_splits=2,
|
|
87
|
+
groups=panel_data["country_id"],
|
|
88
|
+
group_splitter=StratifiedGroupKFold(n_splits=3) # Use any valid Scikit-Learn group methodology!
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# You can also pass arbitrarily nested multi-column groups!
|
|
92
|
+
# PanelSplit will internally flatten them into a single composite group identifier for KFold slicing.
|
|
93
|
+
# e.g., groups = panel_data[["country_id", "city_id"]]
|
|
94
|
+
|
|
95
|
+
# Lazy Evaluation securely propagates X and y through the StratifiedGroupKFold!
|
|
96
|
+
splits = panel_split.split(X=panel_data, y=panel_data["y"])
|
|
97
|
+
# Yields 6 total sub-splits (2 temporal cuts x 3 spatial stratified holds)!
|
|
98
|
+
```
|
|
99
|
+
|
|
76
100
|
For more examples and detailed usage instructions, refer to the [examples](examples) directory in this repository. Also feel free to check out [an introductory article on panelsplit](https://towardsdatascience.com/how-to-cross-validate-your-panel-data-in-python-9ad981ddd043).
|
|
77
101
|
|
|
78
102
|
## Background
|
|
@@ -7,7 +7,7 @@ panelsplit is a Python package designed to facilitate time series cross-validati
|
|
|
7
7
|
|
|
8
8
|
## Installation
|
|
9
9
|
|
|
10
|
-
panelsplit is tested for compatibility with python versions >= 3.
|
|
10
|
+
panelsplit is tested for compatibility with python versions >= 3.11. You can install panelsplit using pip:
|
|
11
11
|
|
|
12
12
|
```bash
|
|
13
13
|
pip install panelsplit
|
|
@@ -48,6 +48,30 @@ for train_idx, test_idx in splits:
|
|
|
48
48
|
print("Test:"); display(panel_data.loc[test_idx])
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
+
### Spatio-Temporal Cross-Validation
|
|
52
|
+
|
|
53
|
+
panelsplit can also handle combined spatio-temporal holdouts by factoring in entity hierarchies (e.g., states or cities) to prevent cluster-level leakage. You can simultaneously validate on unobserved time periods *and* structurally unobserved groups:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from sklearn.model_selection import StratifiedGroupKFold
|
|
57
|
+
|
|
58
|
+
# Create spatial splits that evaluate cluster-level combinations robustly:
|
|
59
|
+
panel_split = PanelSplit(
|
|
60
|
+
periods=panel_data.year,
|
|
61
|
+
n_splits=2,
|
|
62
|
+
groups=panel_data["country_id"],
|
|
63
|
+
group_splitter=StratifiedGroupKFold(n_splits=3) # Use any valid Scikit-Learn group methodology!
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# You can also pass arbitrarily nested multi-column groups!
|
|
67
|
+
# PanelSplit will internally flatten them into a single composite group identifier for KFold slicing.
|
|
68
|
+
# e.g., groups = panel_data[["country_id", "city_id"]]
|
|
69
|
+
|
|
70
|
+
# Lazy Evaluation securely propagates X and y through the StratifiedGroupKFold!
|
|
71
|
+
splits = panel_split.split(X=panel_data, y=panel_data["y"])
|
|
72
|
+
# Yields 6 total sub-splits (2 temporal cuts x 3 spatial stratified holds)!
|
|
73
|
+
```
|
|
74
|
+
|
|
51
75
|
For more examples and detailed usage instructions, refer to the [examples](examples) directory in this repository. Also feel free to check out [an introductory article on panelsplit](https://towardsdatascience.com/how-to-cross-validate-your-panel-data-in-python-9ad981ddd043).
|
|
52
76
|
|
|
53
77
|
## Background
|