kfc-procedure 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kfc_procedure-0.1.0/LICENSE +21 -0
- kfc_procedure-0.1.0/PKG-INFO +169 -0
- kfc_procedure-0.1.0/README.md +129 -0
- kfc_procedure-0.1.0/pyproject.toml +85 -0
- kfc_procedure-0.1.0/setup.cfg +4 -0
- kfc_procedure-0.1.0/src/kfc_procedure/__init__.py +10 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/__init__.py +13 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/combined_classifier.py +512 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/__init__.py +106 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/adapters/__init__.py +32 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/adapters/base.py +141 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/adapters/one_parameter.py +60 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/adapters/two_parameter.py +85 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/aggregators/__init__.py +16 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/aggregators/base.py +147 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/aggregators/weighted_mean.py +62 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/aggregators/weighted_vote.py +125 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/cv/__init__.py +27 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/cv/base.py +94 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/cv/kfold.py +103 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/cv/stratified_kfold.py +71 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/cv/time_series.py +57 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/distances/__init__.py +20 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/distances/base.py +135 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/distances/cosine.py +77 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/distances/euclidean.py +75 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/distances/hamming.py +109 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/distances/manhattan.py +68 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/distances/minkowski.py +96 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/estimators/__init__.py +41 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/estimators/base.py +135 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/estimators/mean_regressor.py +83 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/estimators/sklearn.py +103 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/factory.py +377 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/__init__.py +49 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/base.py +181 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/biweight.py +27 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/cauchy.py +30 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/cobra.py +36 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/epanechnikov.py +29 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/exponential.py +36 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/naive.py +29 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/radial.py +32 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/reverse_cosh.py +42 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/triangular.py +27 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/kernels/triweight.py +27 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/losses/__init__.py +31 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/losses/base.py +82 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/losses/hinge.py +25 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/losses/huber.py +34 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/losses/log_loss.py +25 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/losses/mae.py +24 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/losses/mse.py +25 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/losses/quantile.py +32 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/normalizers/__init__.py +30 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/normalizers/base.py +115 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/normalizers/minmax.py +72 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/normalizers/standard.py +81 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/__init__.py +50 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/_utils.py +256 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/base.py +108 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/gradient/__init__.py +35 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/gradient/adam.py +131 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/gradient/base.py +319 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/gradient/gd.py +101 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/gradient/momentum.py +105 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/search/__init__.py +28 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/search/base.py +196 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/optimizers/search/search.py +81 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/splitters/__init__.py +60 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/splitters/base.py +119 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/splitters/holdout.py +110 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/splitters/overlap.py +182 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/core/types.py +82 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/gradientcobra.py +396 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/mixcobra.py +690 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/superlearner.py +509 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/utils/__init__.py +99 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/utils/distance.py +20 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/utils/preprocessing.py +265 -0
- kfc_procedure-0.1.0/src/kfc_procedure/cobra/utils/resolve.py +466 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/__init__.py +0 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/clustering/__init__.py +51 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/clustering/bregman.py +406 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/clustering/divergences/__init__.py +30 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/clustering/divergences/base.py +412 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/clustering/divergences/euclidean.py +100 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/clustering/divergences/gkl.py +90 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/clustering/divergences/itakura_saito.py +82 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/clustering/divergences/logistic.py +91 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/__init__.py +94 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/base.py +92 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/classification/__init__.py +18 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/classification/combined_classifier.py +33 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/classification/majority_vote.py +39 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/classification/stacking.py +44 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/regression/__init__.py +24 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/regression/gradientcobra.py +28 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/regression/mean.py +47 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/regression/mixcobra.py +28 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/regression/stacking.py +53 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/combiner/regression/weighted_mean.py +49 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/factory.py +484 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/ml/__init__.py +26 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/ml/base.py +75 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/ml/sklearn.py +146 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/steps/__init__.py +77 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/steps/cstep.py +227 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/steps/fstep.py +233 -0
- kfc_procedure-0.1.0/src/kfc_procedure/core/steps/kstep.py +209 -0
- kfc_procedure-0.1.0/src/kfc_procedure/kfc.py +335 -0
- kfc_procedure-0.1.0/src/kfc_procedure/utils/__init__.py +3 -0
- kfc_procedure-0.1.0/src/kfc_procedure/utils/logger.py +131 -0
- kfc_procedure-0.1.0/src/kfc_procedure/utils/resolve.py +41 -0
- kfc_procedure-0.1.0/src/kfc_procedure.egg-info/PKG-INFO +169 -0
- kfc_procedure-0.1.0/src/kfc_procedure.egg-info/SOURCES.txt +117 -0
- kfc_procedure-0.1.0/src/kfc_procedure.egg-info/dependency_links.txt +1 -0
- kfc_procedure-0.1.0/src/kfc_procedure.egg-info/requires.txt +29 -0
- kfc_procedure-0.1.0/src/kfc_procedure.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Ougi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kfc-procedure
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Clusterwise predictive modeling library
|
|
5
|
+
Author-email: OUGI POV <ougipov113@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Ougi3ay/kfc-procedure
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
11
|
+
Requires-Python: >=3.11
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: pandas
|
|
15
|
+
Requires-Dist: numpy
|
|
16
|
+
Requires-Dist: scikit-learn
|
|
17
|
+
Requires-Dist: matplotlib
|
|
18
|
+
Requires-Dist: xgboost
|
|
19
|
+
Provides-Extra: core
|
|
20
|
+
Requires-Dist: numpy; extra == "core"
|
|
21
|
+
Requires-Dist: scikit-learn; extra == "core"
|
|
22
|
+
Provides-Extra: cobra
|
|
23
|
+
Requires-Dist: numba; extra == "cobra"
|
|
24
|
+
Requires-Dist: faiss-cpu; extra == "cobra"
|
|
25
|
+
Requires-Dist: xgboost; extra == "cobra"
|
|
26
|
+
Requires-Dist: pandas; extra == "cobra"
|
|
27
|
+
Requires-Dist: numpy; extra == "cobra"
|
|
28
|
+
Requires-Dist: scikit-learn; extra == "cobra"
|
|
29
|
+
Requires-Dist: matplotlib; extra == "cobra"
|
|
30
|
+
Requires-Dist: plotly; extra == "cobra"
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest; extra == "dev"
|
|
33
|
+
Requires-Dist: build; extra == "dev"
|
|
34
|
+
Requires-Dist: twine; extra == "dev"
|
|
35
|
+
Requires-Dist: jupyter; extra == "dev"
|
|
36
|
+
Provides-Extra: all
|
|
37
|
+
Requires-Dist: kfc-procedure[cobra]; extra == "all"
|
|
38
|
+
Requires-Dist: kfc-procedure[dev]; extra == "all"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# KFC-Model: A Python Implementation of the KFC Procedure
|
|
42
|
+
|
|
43
|
+
KFC-Model is a modular Python library for clusterwise predictive modeling using the KFC procedure (K-step, F-step, C-step). It combines multiple clustering divergences, local models, and aggregation strategies for regression and classification tasks.
|
|
44
|
+
|
|
45
|
+
## Features
|
|
46
|
+
|
|
47
|
+
- KFC meta-estimator for clusterwise learning
|
|
48
|
+
- Modular `KStep`, `FStep`, and `CStep` components
|
|
49
|
+
- Support for Bregman K-Means divergences
|
|
50
|
+
- Local model factories for regression and classification
|
|
51
|
+
- Aggregation strategies including mean, stacking, and GradientCOBRA
|
|
52
|
+
- Easy extension with custom components
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
Requirements:
|
|
57
|
+
|
|
58
|
+
- Python 3.11+
|
|
59
|
+
- `numpy`
|
|
60
|
+
- `pandas`
|
|
61
|
+
- `scikit-learn`
|
|
62
|
+
- `xgboost`
|
|
63
|
+
- `matplotlib`
|
|
64
|
+
|
|
65
|
+
Create and activate a virtual environment:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
python3 -m venv .venv
|
|
69
|
+
source .venv/bin/activate
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Install dependencies:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
python3 -m pip install -r requirements.txt
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Install the package locally:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
python3 -m pip install -e .
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Quick Start
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
import numpy as np
|
|
88
|
+
from kfc_procedure.kfc import KFCRegressor, KFCClassifier
|
|
89
|
+
|
|
90
|
+
# Example data
|
|
91
|
+
X = np.random.randn(200, 5)
|
|
92
|
+
y_reg = X[:, 0] * 2 + np.random.randn(200) * 0.1
|
|
93
|
+
y_clf = (y_reg > 0).astype(int)
|
|
94
|
+
|
|
95
|
+
# Regression example
|
|
96
|
+
model = KFCRegressor(
|
|
97
|
+
divergences=["euclidean", "kl"],
|
|
98
|
+
local_model="linear",
|
|
99
|
+
aggregation="mean",
|
|
100
|
+
random_state=42,
|
|
101
|
+
)
|
|
102
|
+
model.fit(X, y_reg)
|
|
103
|
+
y_pred = model.predict(X)
|
|
104
|
+
|
|
105
|
+
# Classification example
|
|
106
|
+
clf = KFCClassifier(
|
|
107
|
+
divergences=["euclidean"],
|
|
108
|
+
local_model="logistic",
|
|
109
|
+
aggregation="majority_vote",
|
|
110
|
+
random_state=42,
|
|
111
|
+
)
|
|
112
|
+
clf.fit(X, y_clf)
|
|
113
|
+
y_pred_clf = clf.predict(X)
|
|
114
|
+
proba = clf.predict_proba(X)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Core Components
|
|
118
|
+
|
|
119
|
+
- `KStep`: fits clustering models using one or more Bregman divergences
|
|
120
|
+
- `FStep`: trains local models for each cluster and divergence
|
|
121
|
+
- `CStep`: aggregates local predictions into final outputs
|
|
122
|
+
- `KFCRegressor` / `KFCClassifier`: full meta-estimators exposing `fit`, `predict`, and `predict_proba`
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
|
|
126
|
+
### `divergences`
|
|
127
|
+
|
|
128
|
+
The divergences parameter accepts:
|
|
129
|
+
|
|
130
|
+
- a list of divergence names, e.g. `['euclidean', 'kl']`
|
|
131
|
+
- a list of config dictionaries, e.g. `[{ 'name': 'euclidean', 'n_clusters': 4 }]`
|
|
132
|
+
|
|
133
|
+
Available divergences: `'euclidean'`, `'kl'`, `'gkl'`, `'is'`, `'logistic'`
|
|
134
|
+
|
|
135
|
+
### `local_model`
|
|
136
|
+
|
|
137
|
+
The local_model parameter accepts:
|
|
138
|
+
|
|
139
|
+
- a model name string, e.g. `'linear'`, `'ridge'`
|
|
140
|
+
|
|
141
|
+
Supported regression models include: `linear`, `ridge`, `lasso`, `decision_tree`, `random_forest`.
|
|
142
|
+
Supported classification models include: `logistic`, `decision_tree`, `random_forest`.
|
|
143
|
+
|
|
144
|
+
### `aggregation`
|
|
145
|
+
|
|
146
|
+
The aggregation parameter accepts:
|
|
147
|
+
|
|
148
|
+
- an aggregation strategy name string, e.g. `'mean'`, `'stacking'`
|
|
149
|
+
|
|
150
|
+
Supported aggregators:
|
|
151
|
+
|
|
152
|
+
- Regression: `mean`, `weighted_mean`, `stacking`
|
|
153
|
+
- Classification: `majority_vote`, `stacking`, `combine_classifier`
|
|
154
|
+
|
|
155
|
+
## Project Structure
|
|
156
|
+
|
|
157
|
+
- `src/kfc_procedure/`: main package code
|
|
158
|
+
- `src/kfc_procedure/core/`: factories, clustering, ML wrappers, and aggregation strategies
|
|
159
|
+
- `src/kfc_procedure/steps/`: KFC step implementations
|
|
160
|
+
- `src/kfc_procedure/utils/`: resolution and validation helpers
|
|
161
|
+
|
|
162
|
+
## Contributing
|
|
163
|
+
|
|
164
|
+
Contributions, bug reports, and improvements are welcome. Use `pytest` for testing and follow the existing package layout for new components.
|
|
165
|
+
|
|
166
|
+
## License
|
|
167
|
+
|
|
168
|
+
MIT License
|
|
169
|
+
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# KFC-Model: A Python Implementation of the KFC Procedure
|
|
2
|
+
|
|
3
|
+
KFC-Model is a modular Python library for clusterwise predictive modeling using the KFC procedure (K-step, F-step, C-step). It combines multiple clustering divergences, local models, and aggregation strategies for regression and classification tasks.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- KFC meta-estimator for clusterwise learning
|
|
8
|
+
- Modular `KStep`, `FStep`, and `CStep` components
|
|
9
|
+
- Support for Bregman K-Means divergences
|
|
10
|
+
- Local model factories for regression and classification
|
|
11
|
+
- Aggregation strategies including mean, stacking, and GradientCOBRA
|
|
12
|
+
- Easy extension with custom components
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
Requirements:
|
|
17
|
+
|
|
18
|
+
- Python 3.11+
|
|
19
|
+
- `numpy`
|
|
20
|
+
- `pandas`
|
|
21
|
+
- `scikit-learn`
|
|
22
|
+
- `xgboost`
|
|
23
|
+
- `matplotlib`
|
|
24
|
+
|
|
25
|
+
Create and activate a virtual environment:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
python3 -m venv .venv
|
|
29
|
+
source .venv/bin/activate
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Install dependencies:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
python3 -m pip install -r requirements.txt
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Install the package locally:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
python3 -m pip install -e .
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import numpy as np
|
|
48
|
+
from kfc_procedure.kfc import KFCRegressor, KFCClassifier
|
|
49
|
+
|
|
50
|
+
# Example data
|
|
51
|
+
X = np.random.randn(200, 5)
|
|
52
|
+
y_reg = X[:, 0] * 2 + np.random.randn(200) * 0.1
|
|
53
|
+
y_clf = (y_reg > 0).astype(int)
|
|
54
|
+
|
|
55
|
+
# Regression example
|
|
56
|
+
model = KFCRegressor(
|
|
57
|
+
divergences=["euclidean", "kl"],
|
|
58
|
+
local_model="linear",
|
|
59
|
+
aggregation="mean",
|
|
60
|
+
random_state=42,
|
|
61
|
+
)
|
|
62
|
+
model.fit(X, y_reg)
|
|
63
|
+
y_pred = model.predict(X)
|
|
64
|
+
|
|
65
|
+
# Classification example
|
|
66
|
+
clf = KFCClassifier(
|
|
67
|
+
divergences=["euclidean"],
|
|
68
|
+
local_model="logistic",
|
|
69
|
+
aggregation="majority_vote",
|
|
70
|
+
random_state=42,
|
|
71
|
+
)
|
|
72
|
+
clf.fit(X, y_clf)
|
|
73
|
+
y_pred_clf = clf.predict(X)
|
|
74
|
+
proba = clf.predict_proba(X)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Core Components
|
|
78
|
+
|
|
79
|
+
- `KStep`: fits clustering models using one or more Bregman divergences
|
|
80
|
+
- `FStep`: trains local models for each cluster and divergence
|
|
81
|
+
- `CStep`: aggregates local predictions into final outputs
|
|
82
|
+
- `KFCRegressor` / `KFCClassifier`: full meta-estimators exposing `fit`, `predict`, and `predict_proba`
|
|
83
|
+
|
|
84
|
+
## Configuration
|
|
85
|
+
|
|
86
|
+
### `divergences`
|
|
87
|
+
|
|
88
|
+
The divergences parameter accepts:
|
|
89
|
+
|
|
90
|
+
- a list of divergence names, e.g. `['euclidean', 'kl']`
|
|
91
|
+
- a list of config dictionaries, e.g. `[{ 'name': 'euclidean', 'n_clusters': 4 }]`
|
|
92
|
+
|
|
93
|
+
Available divergences: `'euclidean'`, `'kl'`, `'gkl'`, `'is'`, `'logistic'`
|
|
94
|
+
|
|
95
|
+
### `local_model`
|
|
96
|
+
|
|
97
|
+
The local_model parameter accepts:
|
|
98
|
+
|
|
99
|
+
- a model name string, e.g. `'linear'`, `'ridge'`
|
|
100
|
+
|
|
101
|
+
Supported regression models include: `linear`, `ridge`, `lasso`, `decision_tree`, `random_forest`.
|
|
102
|
+
Supported classification models include: `logistic`, `decision_tree`, `random_forest`.
|
|
103
|
+
|
|
104
|
+
### `aggregation`
|
|
105
|
+
|
|
106
|
+
The aggregation parameter accepts:
|
|
107
|
+
|
|
108
|
+
- an aggregation strategy name string, e.g. `'mean'`, `'stacking'`
|
|
109
|
+
|
|
110
|
+
Supported aggregators:
|
|
111
|
+
|
|
112
|
+
- Regression: `mean`, `weighted_mean`, `stacking`
|
|
113
|
+
- Classification: `majority_vote`, `stacking`, `combine_classifier`
|
|
114
|
+
|
|
115
|
+
## Project Structure
|
|
116
|
+
|
|
117
|
+
- `src/kfc_procedure/`: main package code
|
|
118
|
+
- `src/kfc_procedure/core/`: factories, clustering, ML wrappers, and aggregation strategies
|
|
119
|
+
- `src/kfc_procedure/steps/`: KFC step implementations
|
|
120
|
+
- `src/kfc_procedure/utils/`: resolution and validation helpers
|
|
121
|
+
|
|
122
|
+
## Contributing
|
|
123
|
+
|
|
124
|
+
Contributions, bug reports, and improvements are welcome. Use `pytest` for testing and follow the existing package layout for new components.
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
MIT License
|
|
129
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "kfc-procedure"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Clusterwise predictive modeling library"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICEN[CS]E*"]
|
|
13
|
+
|
|
14
|
+
authors = [
|
|
15
|
+
{ name = "OUGI POV", email = "ougipov113@gmail.com" }
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
dependencies = [
|
|
19
|
+
"pandas",
|
|
20
|
+
"numpy",
|
|
21
|
+
"scikit-learn",
|
|
22
|
+
"matplotlib",
|
|
23
|
+
"xgboost"
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
classifiers = [
|
|
27
|
+
"Programming Language :: Python :: 3",
|
|
28
|
+
"Operating System :: OS Independent",
|
|
29
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence"
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/Ougi3ay/kfc-procedure"
|
|
34
|
+
|
|
35
|
+
# --------------------------
|
|
36
|
+
# Optional dependencies (extras)
|
|
37
|
+
# --------------------------
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
|
|
40
|
+
core = [
|
|
41
|
+
"numpy",
|
|
42
|
+
"scikit-learn"
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
cobra = [
|
|
46
|
+
"numba",
|
|
47
|
+
"faiss-cpu",
|
|
48
|
+
"xgboost",
|
|
49
|
+
"pandas",
|
|
50
|
+
"numpy",
|
|
51
|
+
"scikit-learn",
|
|
52
|
+
"matplotlib",
|
|
53
|
+
"plotly"
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
dev = [
|
|
57
|
+
"pytest",
|
|
58
|
+
"build",
|
|
59
|
+
"twine",
|
|
60
|
+
"jupyter"
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
all = [
|
|
64
|
+
"kfc-procedure[cobra]",
|
|
65
|
+
"kfc-procedure[dev]"
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
# --------------------------
|
|
69
|
+
# setuptools config (src layout)
|
|
70
|
+
# --------------------------
|
|
71
|
+
[tool.setuptools]
|
|
72
|
+
package-dir = {"" = "src"}
|
|
73
|
+
|
|
74
|
+
[tool.setuptools.packages.find]
|
|
75
|
+
where = ["src"]
|
|
76
|
+
include = ["kfc_procedure*"]
|
|
77
|
+
|
|
78
|
+
# --------------------------
|
|
79
|
+
# pytest config
|
|
80
|
+
# --------------------------
|
|
81
|
+
[tool.pytest.ini_options]
|
|
82
|
+
testpaths = ["tests"]
|
|
83
|
+
python_files = ["test_*.py"]
|
|
84
|
+
python_functions = ["test_*"]
|
|
85
|
+
addopts = "-vv --disable-warnings --strict-markers"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
kfc_procedure
|
|
3
|
+
|
|
4
|
+
Meta-estimators and modular pipeline components for the KFC algorithm.
|
|
5
|
+
|
|
6
|
+
This package implements the K-step, F-step, and C-step stages used to build
|
|
7
|
+
local-model ensembles and aggregate their predictions into final outputs.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .kfc import KFCProcedure, KFCRegressor, KFCClassifier
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
|
|
2
|
+
from .core import *
|
|
3
|
+
from .mixcobra import MixCOBRARegressor
|
|
4
|
+
from .gradientcobra import GradientCOBRA
|
|
5
|
+
from .combined_classifier import CombinedClassifier
|
|
6
|
+
from .superlearner import SuperLearner
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"MixCOBRARegressor",
|
|
10
|
+
"GradientCOBRA",
|
|
11
|
+
"CombinedClassifier",
|
|
12
|
+
"SuperLearner",
|
|
13
|
+
]
|