kfc-procedure 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. kfc_procedure/__init__.py +10 -0
  2. kfc_procedure/cobra/__init__.py +13 -0
  3. kfc_procedure/cobra/combined_classifier.py +512 -0
  4. kfc_procedure/cobra/core/__init__.py +106 -0
  5. kfc_procedure/cobra/core/adapters/__init__.py +32 -0
  6. kfc_procedure/cobra/core/adapters/base.py +141 -0
  7. kfc_procedure/cobra/core/adapters/one_parameter.py +60 -0
  8. kfc_procedure/cobra/core/adapters/two_parameter.py +85 -0
  9. kfc_procedure/cobra/core/aggregators/__init__.py +16 -0
  10. kfc_procedure/cobra/core/aggregators/base.py +147 -0
  11. kfc_procedure/cobra/core/aggregators/weighted_mean.py +62 -0
  12. kfc_procedure/cobra/core/aggregators/weighted_vote.py +125 -0
  13. kfc_procedure/cobra/core/cv/__init__.py +27 -0
  14. kfc_procedure/cobra/core/cv/base.py +94 -0
  15. kfc_procedure/cobra/core/cv/kfold.py +103 -0
  16. kfc_procedure/cobra/core/cv/stratified_kfold.py +71 -0
  17. kfc_procedure/cobra/core/cv/time_series.py +57 -0
  18. kfc_procedure/cobra/core/distances/__init__.py +20 -0
  19. kfc_procedure/cobra/core/distances/base.py +135 -0
  20. kfc_procedure/cobra/core/distances/cosine.py +77 -0
  21. kfc_procedure/cobra/core/distances/euclidean.py +75 -0
  22. kfc_procedure/cobra/core/distances/hamming.py +109 -0
  23. kfc_procedure/cobra/core/distances/manhattan.py +68 -0
  24. kfc_procedure/cobra/core/distances/minkowski.py +96 -0
  25. kfc_procedure/cobra/core/estimators/__init__.py +41 -0
  26. kfc_procedure/cobra/core/estimators/base.py +135 -0
  27. kfc_procedure/cobra/core/estimators/mean_regressor.py +83 -0
  28. kfc_procedure/cobra/core/estimators/sklearn.py +103 -0
  29. kfc_procedure/cobra/core/factory.py +377 -0
  30. kfc_procedure/cobra/core/kernels/__init__.py +49 -0
  31. kfc_procedure/cobra/core/kernels/base.py +181 -0
  32. kfc_procedure/cobra/core/kernels/biweight.py +27 -0
  33. kfc_procedure/cobra/core/kernels/cauchy.py +30 -0
  34. kfc_procedure/cobra/core/kernels/cobra.py +36 -0
  35. kfc_procedure/cobra/core/kernels/epanechnikov.py +29 -0
  36. kfc_procedure/cobra/core/kernels/exponential.py +36 -0
  37. kfc_procedure/cobra/core/kernels/naive.py +29 -0
  38. kfc_procedure/cobra/core/kernels/radial.py +32 -0
  39. kfc_procedure/cobra/core/kernels/reverse_cosh.py +42 -0
  40. kfc_procedure/cobra/core/kernels/triangular.py +27 -0
  41. kfc_procedure/cobra/core/kernels/triweight.py +27 -0
  42. kfc_procedure/cobra/core/losses/__init__.py +31 -0
  43. kfc_procedure/cobra/core/losses/base.py +82 -0
  44. kfc_procedure/cobra/core/losses/hinge.py +25 -0
  45. kfc_procedure/cobra/core/losses/huber.py +34 -0
  46. kfc_procedure/cobra/core/losses/log_loss.py +25 -0
  47. kfc_procedure/cobra/core/losses/mae.py +24 -0
  48. kfc_procedure/cobra/core/losses/mse.py +25 -0
  49. kfc_procedure/cobra/core/losses/quantile.py +32 -0
  50. kfc_procedure/cobra/core/normalizers/__init__.py +30 -0
  51. kfc_procedure/cobra/core/normalizers/base.py +115 -0
  52. kfc_procedure/cobra/core/normalizers/minmax.py +72 -0
  53. kfc_procedure/cobra/core/normalizers/standard.py +81 -0
  54. kfc_procedure/cobra/core/optimizers/__init__.py +50 -0
  55. kfc_procedure/cobra/core/optimizers/_utils.py +256 -0
  56. kfc_procedure/cobra/core/optimizers/base.py +108 -0
  57. kfc_procedure/cobra/core/optimizers/gradient/__init__.py +35 -0
  58. kfc_procedure/cobra/core/optimizers/gradient/adam.py +131 -0
  59. kfc_procedure/cobra/core/optimizers/gradient/base.py +319 -0
  60. kfc_procedure/cobra/core/optimizers/gradient/gd.py +101 -0
  61. kfc_procedure/cobra/core/optimizers/gradient/momentum.py +105 -0
  62. kfc_procedure/cobra/core/optimizers/search/__init__.py +28 -0
  63. kfc_procedure/cobra/core/optimizers/search/base.py +196 -0
  64. kfc_procedure/cobra/core/optimizers/search/search.py +81 -0
  65. kfc_procedure/cobra/core/splitters/__init__.py +60 -0
  66. kfc_procedure/cobra/core/splitters/base.py +119 -0
  67. kfc_procedure/cobra/core/splitters/holdout.py +110 -0
  68. kfc_procedure/cobra/core/splitters/overlap.py +182 -0
  69. kfc_procedure/cobra/core/types.py +82 -0
  70. kfc_procedure/cobra/gradientcobra.py +396 -0
  71. kfc_procedure/cobra/mixcobra.py +690 -0
  72. kfc_procedure/cobra/superlearner.py +509 -0
  73. kfc_procedure/cobra/utils/__init__.py +99 -0
  74. kfc_procedure/cobra/utils/distance.py +20 -0
  75. kfc_procedure/cobra/utils/preprocessing.py +265 -0
  76. kfc_procedure/cobra/utils/resolve.py +466 -0
  77. kfc_procedure/core/__init__.py +0 -0
  78. kfc_procedure/core/clustering/__init__.py +51 -0
  79. kfc_procedure/core/clustering/bregman.py +406 -0
  80. kfc_procedure/core/clustering/divergences/__init__.py +30 -0
  81. kfc_procedure/core/clustering/divergences/base.py +412 -0
  82. kfc_procedure/core/clustering/divergences/euclidean.py +100 -0
  83. kfc_procedure/core/clustering/divergences/gkl.py +90 -0
  84. kfc_procedure/core/clustering/divergences/itakura_saito.py +82 -0
  85. kfc_procedure/core/clustering/divergences/logistic.py +91 -0
  86. kfc_procedure/core/combiner/__init__.py +94 -0
  87. kfc_procedure/core/combiner/base.py +92 -0
  88. kfc_procedure/core/combiner/classification/__init__.py +18 -0
  89. kfc_procedure/core/combiner/classification/combined_classifier.py +33 -0
  90. kfc_procedure/core/combiner/classification/majority_vote.py +39 -0
  91. kfc_procedure/core/combiner/classification/stacking.py +44 -0
  92. kfc_procedure/core/combiner/regression/__init__.py +24 -0
  93. kfc_procedure/core/combiner/regression/gradientcobra.py +28 -0
  94. kfc_procedure/core/combiner/regression/mean.py +47 -0
  95. kfc_procedure/core/combiner/regression/mixcobra.py +28 -0
  96. kfc_procedure/core/combiner/regression/stacking.py +53 -0
  97. kfc_procedure/core/combiner/regression/weighted_mean.py +49 -0
  98. kfc_procedure/core/factory.py +484 -0
  99. kfc_procedure/core/ml/__init__.py +26 -0
  100. kfc_procedure/core/ml/base.py +75 -0
  101. kfc_procedure/core/ml/sklearn.py +146 -0
  102. kfc_procedure/core/steps/__init__.py +77 -0
  103. kfc_procedure/core/steps/cstep.py +227 -0
  104. kfc_procedure/core/steps/fstep.py +233 -0
  105. kfc_procedure/core/steps/kstep.py +209 -0
  106. kfc_procedure/kfc.py +335 -0
  107. kfc_procedure/utils/__init__.py +3 -0
  108. kfc_procedure/utils/logger.py +131 -0
  109. kfc_procedure/utils/resolve.py +41 -0
  110. kfc_procedure-0.1.0.dist-info/METADATA +169 -0
  111. kfc_procedure-0.1.0.dist-info/RECORD +114 -0
  112. kfc_procedure-0.1.0.dist-info/WHEEL +5 -0
  113. kfc_procedure-0.1.0.dist-info/licenses/LICENSE +21 -0
  114. kfc_procedure-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,10 @@
1
+ """
2
+ kfc_procedure
3
+
4
+ Meta-estimators and modular pipeline components for the KFC algorithm.
5
+
6
+ This package implements the K-step, F-step, and C-step stages used to build
7
+ local-model ensembles and aggregate their predictions into final outputs.
8
+ """
9
+
10
+ from .kfc import KFCProcedure, KFCRegressor, KFCClassifier
@@ -0,0 +1,13 @@
1
+
2
+ from .core import *
3
+ from .mixcobra import MixCOBRARegressor
4
+ from .gradientcobra import GradientCOBRA
5
+ from .combined_classifier import CombinedClassifier
6
+ from .superlearner import SuperLearner
7
+
8
+ __all__ = [
9
+ "MixCOBRARegressor",
10
+ "GradientCOBRA",
11
+ "CombinedClassifier",
12
+ "SuperLearner",
13
+ ]
@@ -0,0 +1,512 @@
1
+ """
2
+
3
+ Combine Classifier Module
4
+ =========================
5
+
6
+ This module implements a hybrid ensemble aggregation framework based on
7
+ prediction-space similarity, kernel weighting, and optimized bandwidth
8
+ selection. The system integrates multiple base estimators, distance
9
+ functions, kernel transformations, and aggregation strategies into a unified
10
+ classifier.
11
+
12
+ Core Idea
13
+ ---------
14
+ The classifier transforms base estimator outputs into a prediction space,
15
+ computes pairwise similarity using a distance metric and kernel function,
16
+ and performs weighted aggregation of labels using locally optimized
17
+ bandwidth parameters.
18
+ Two variants are provided:
19
+ - CombineClassifier: Full implementation with exact kernel computation.
20
+ - CombineClassifierFast: Approximate version using FAISS nearest neighbor search.
21
+
22
+ Main Components
23
+ ---------------
24
+
25
+ - Base Estimators: Supervised learners producing intermediate predictions
26
+ - Distance Functions: Measure similarity between prediction vectors
27
+ - Kernel Functions: Transform distances into similarity weights
28
+ - Aggregators: Combine weighted labels into final predictions
29
+ - Loss Functions: Evaluate cross-validation performance
30
+ - Optimizers: Tune kernel bandwidth parameters
31
+ - Cross-Validation: Estimate generalization error during optimization
32
+
33
+ Author: COBRA Framework
34
+
35
+ """
36
+ from __future__ import annotations
37
+
38
+ from abc import ABC
39
+ from typing import Any, Dict, List, Union
40
+
41
+ import numpy as np
42
+ from sklearn.base import BaseEstimator as SkBaseEstimator
43
+ from sklearn.utils import check_array
44
+
45
+ from kfc_procedure.cobra.core.adapters.base import BaseKernelAdapter, KernelAdapterFactory
46
+ from kfc_procedure.cobra.core.aggregators.base import AggregatorFactory, BaseAggregator
47
+ from kfc_procedure.cobra.core.distances.base import BaseDistance, DistanceFactory
48
+ from kfc_procedure.cobra.core.estimators.base import BaseEstimator
49
+ from kfc_procedure.cobra.core.kernels.base import BaseKernel, KernelFactory
50
+ from kfc_procedure.cobra.core.losses.base import BaseLoss, LossFactory
51
+ from kfc_procedure.cobra.core.optimizers.base import OptimizerFactory
52
+ from kfc_procedure.cobra.core.cv.base import BaseCrossValidator, CVFactory
53
+ from kfc_procedure.cobra.utils.preprocessing import history_to_dataframe
54
+ from kfc_procedure.cobra.utils.resolve import fit_estimators, predict_estimators, resolve_training_context
55
+
56
+ try:
57
+ import faiss
58
+ HAS_FAISS = True
59
+ except ImportError:
60
+ HAS_FAISS = False
61
+
62
+ class CombinedClassifier(ABC, SkBaseEstimator):
63
+ """
64
+
65
+ CombineClassifier
66
+ =================
67
+
68
+ A kernel-based ensemble aggregation classifier that operates on
69
+ prediction-space representations of base estimators.
70
+ The model works in three main stages:
71
+ 1. Train base estimators on kernel dataset
72
+ 2. Transform data into prediction space
73
+ 3. Perform kernel-weighted aggregation using optimized bandwidth
74
+
75
+ Parameters
76
+ ----------
77
+ estimators : list of str or BaseEstimator, optional
78
+ List of base estimators used to generate prediction space.
79
+
80
+ estimators_params : dict, optional
81
+ Hyperparameters for base estimators.
82
+
83
+ distance : str, default="hamming"
84
+ Distance metric used in prediction space.
85
+
86
+ distance_params : dict, optional
87
+ Parameters for distance function.
88
+
89
+ kernel : str, default="rbf"
90
+ Kernel function used to transform distances into similarities.
91
+
92
+ kernel_params : dict, optional
93
+ Parameters for kernel function.
94
+
95
+ aggregator : str, default="weighted_vote"
96
+ Aggregation strategy for combining weighted labels.
97
+
98
+ aggregator_params : dict, optional
99
+ Parameters for aggregator.
100
+
101
+ loss : str, default="mse"
102
+ Loss function used for cross-validation optimization.
103
+
104
+ loss_params : dict, optional
105
+ Parameters for loss function.
106
+
107
+ optimizer : str, default="grid"
108
+ Optimization strategy for bandwidth selection.
109
+
110
+ optimizer_params : dict, optional
111
+ Parameters for optimizer.
112
+
113
+ n_jobs : int, default=1
114
+ Number of parallel jobs.
115
+
116
+ bandwidth_list : array-like, optional
117
+ Candidate bandwidth values for optimization.
118
+
119
+ max_iter : int, default=300
120
+ Maximum iterations for optimizer search.
121
+
122
+ n_cv : int, default=5
123
+ Number of cross-validation folds.
124
+
125
+ random_state : int, optional
126
+ Random seed for reproducibility.
127
+ """
128
+ def __init__(
129
+ self,
130
+ estimators: List[Union[str, BaseEstimator]] | None = None,
131
+ estimators_params: Dict[str, Any] | None = None,
132
+ distance: str = "hamming",
133
+ distance_params: Dict[str, Any] | None = None,
134
+ kernel: str = "rbf",
135
+ kernel_params: Dict[str, Any] | None = None,
136
+ aggregator: str = "weighted_vote",
137
+ aggregator_params: Dict[str, Any] | None = None,
138
+ loss: str = "mse",
139
+ loss_params: dict[str, Any] | None = None,
140
+ optimizer: str = "grid",
141
+ optimizer_params: dict[str, Any] | None = None,
142
+ n_jobs: int = 1,
143
+ bandwidth_list: np.ndarray | None = None,
144
+ max_iter: int = 300,
145
+ n_cv: int = 5,
146
+ random_state: int | None = None,
147
+ ):
148
+ self.estimators = estimators
149
+ self.estimators_params = estimators_params
150
+
151
+ self.distance = distance
152
+ self.distance_params = distance_params
153
+
154
+ self.kernel = kernel
155
+ self.kernel_params = kernel_params
156
+
157
+ self.aggregator = aggregator
158
+ self.aggregator_params = aggregator_params
159
+
160
+ self.loss = loss
161
+ self.loss_params = loss_params
162
+
163
+ self.optimizer = optimizer
164
+ self.optimizer_params = optimizer_params
165
+
166
+ self.n_jobs = n_jobs
167
+ self.bandwidth_list = bandwidth_list
168
+ self.max_iter = max_iter
169
+ self.n_cv = n_cv
170
+ self.random_state = random_state
171
+
172
+ def _fit_estimators(self, X_k: np.ndarray, y_k: np.ndarray):
173
+
174
+ default_estimators = [
175
+ "logistic_regression",
176
+ "decision_tree_classifier",
177
+ "svc",
178
+ "k_neighbors_classifier",
179
+ ]
180
+
181
+ estimators = self.estimators or default_estimators
182
+
183
+ return fit_estimators(
184
+ X=X_k,
185
+ y=y_k,
186
+ estimators_params=self.estimators_params,
187
+ estimators=estimators,
188
+ n_jobs=self.n_jobs,
189
+ )
190
+
191
+ def _load_predictions(self, X: np.ndarray) -> np.ndarray:
192
+ return predict_estimators(
193
+ X=X,
194
+ estimators=self.estimators_,
195
+ n_jobs=self.n_jobs,
196
+ )
197
+
198
+ def _resolve_components(self):
199
+
200
+ self.distance_: BaseDistance = DistanceFactory.create(
201
+ self.distance,
202
+ **(self.distance_params or {}),
203
+ )
204
+
205
+ self.kernel_: BaseKernel = KernelFactory.create(
206
+ self.kernel,
207
+ **(self.kernel_params or {}),
208
+ )
209
+
210
+ self.loss_: BaseLoss = LossFactory.create(
211
+ self.loss,
212
+ **(self.loss_params or {}),
213
+ )
214
+
215
+ self.cv_: BaseCrossValidator = CVFactory.create(
216
+ "kfold",
217
+ n_splits=self.n_cv,
218
+ shuffle=True,
219
+ random_state=self.random_state,
220
+ )
221
+
222
+ self.adapter_: BaseKernelAdapter = KernelAdapterFactory.create(
223
+ "one_parameter",
224
+ bandwidth=1.0,
225
+ )
226
+
227
+ self.aggregator_: BaseAggregator = AggregatorFactory.create(
228
+ self.aggregator,
229
+ **(self.aggregator_params or {}),
230
+ )
231
+
232
+ def _optimize_hyperparameters(self):
233
+
234
+ bandwidth_candidates = (
235
+ np.asarray(self.bandwidth_list)
236
+ if self.bandwidth_list is not None
237
+ else np.linspace(0.001, 10.0, self.max_iter)
238
+ )
239
+
240
+ params = dict(self.optimizer_params or {})
241
+ params.update({
242
+ "param_grid": {"bandwidth": bandwidth_candidates},
243
+ "max_iter": self.max_iter,
244
+ "random_state": self.random_state,
245
+ })
246
+
247
+ self.optimizer_ = OptimizerFactory.create(
248
+ self.optimizer,
249
+ **params,
250
+ )
251
+
252
+ result = self.optimizer_(self.kappa_cross_validation_error)
253
+
254
+ self.bandwidth_ = float(np.atleast_1d(result["x"])[0])
255
+
256
+ # arrange data
257
+ history_df = history_to_dataframe(
258
+ result["history"],
259
+ param_names=["bandwidth"],
260
+ )
261
+ self.optimization_outputs_ = {
262
+ "method": "grid",
263
+ "optimizer": self.optimizer,
264
+ "bandwidth": self.bandwidth_,
265
+ "score": result["score"],
266
+ "history": history_df,
267
+ }
268
+
269
+ def kappa_cross_validation_error(self, params):
270
+
271
+ bandwidth = float(np.atleast_1d(params)[0])
272
+
273
+ self.adapter_.set_params(bandwidth=bandwidth)
274
+
275
+ D = self.adapter_.transform(self.distance_matrix_)
276
+ K = self.kernel_(D)
277
+
278
+ errors = []
279
+
280
+ for fold in self.cv_folds_:
281
+
282
+ train_idx = fold.train_idx
283
+ val_idx = fold.eval_idx
284
+
285
+ K_vt = K[np.ix_(val_idx, train_idx)]
286
+ y_train = self.y_l_[train_idx]
287
+
288
+ preds = []
289
+
290
+ for i in range(len(val_idx)):
291
+ w = K_vt[i]
292
+
293
+ if np.sum(w) <= 0:
294
+ pred = self.global_majority_class_
295
+ else:
296
+ pred = self.aggregator_.aggregate(y_train, w)
297
+
298
+ preds.append(pred)
299
+
300
+ preds = np.array(preds)
301
+ y_true = self.y_l_[val_idx]
302
+ error = self.loss_(y_true, preds)
303
+ errors.append(error)
304
+
305
+ return np.mean(errors)
306
+
307
+ def fit(self, X, y, X_l=None, y_l=None, split_ratio=0.5, overlap=False, as_predictions=False):
308
+ """
309
+ Fit the CombineClassifier model.
310
+
311
+ This method:
312
+ - Splits or resolves training context
313
+ - Fits base estimators
314
+ - Constructs prediction space
315
+ - Builds kernel similarity matrix
316
+ - Optimizes bandwidth via cross-validation
317
+
318
+ Returns
319
+ -------
320
+ self
321
+
322
+ """
323
+ ctx = resolve_training_context(
324
+ X,
325
+ y,
326
+ X_l=X_l,
327
+ y_l=y_l,
328
+ as_predictions=as_predictions,
329
+ split_ratio=split_ratio,
330
+ overlap=overlap,
331
+ random_state=self.random_state
332
+ )
333
+
334
+ self.X_k_, self.y_k_ = ctx.X_k, ctx.y_k
335
+ self.X_l_, self.y_l_ = ctx.X_l, ctx.y_l
336
+ self.as_predictions_ = ctx.as_predictions
337
+
338
+ if not self.as_predictions_:
339
+ self.classes_ = np.unique(self.y_k_)
340
+ self.estimators_ = self._fit_estimators(self.X_k_, self.y_k_)
341
+ self.pred_l_ = self._load_predictions(self.X_l_)
342
+ else:
343
+ self.classes_ = np.unique(self.y_l_)
344
+ self.pred_l_ = self.X_l_
345
+
346
+ classes, counts = np.unique(self.y_l_, return_counts=True)
347
+ self.global_majority_class_ = classes[np.argmax(counts)]
348
+
349
+ self._resolve_components()
350
+
351
+ self.distance_matrix_ = self.distance_.matrix(self.pred_l_, self.pred_l_)
352
+
353
+ self.cv_folds_ = list(self.cv_.split(self.X_l_, self.y_l_))
354
+
355
+ self._optimize_hyperparameters()
356
+
357
+ return self
358
+
359
+ def predict(self, X):
360
+
361
+ X = check_array(X)
362
+
363
+ if self.as_predictions_:
364
+ preds_space = X
365
+ else:
366
+ preds_space = self._load_predictions(X)
367
+
368
+ distance_matrix = self.distance_.matrix(preds_space, self.pred_l_)
369
+
370
+ # D = bandwidth * distance_matrix
371
+ self.adapter_.set_params(bandwidth=self.bandwidth_)
372
+ D = self.adapter_.transform(distance_matrix)
373
+
374
+ K = self.kernel_(D)
375
+
376
+ outputs = []
377
+
378
+ for i in range(K.shape[0]):
379
+ w = K[i]
380
+
381
+ if np.sum(w) <= 0:
382
+ outputs.append(self.global_majority_class_)
383
+ else:
384
+ outputs.append(
385
+ self.aggregator_.aggregate(self.y_l_, w)
386
+ )
387
+
388
+ return np.array(outputs)
389
+
390
+ def predict_proba(self, X):
391
+
392
+ X = check_array(X)
393
+
394
+ if self.as_predictions_:
395
+ preds_space = X
396
+ else:
397
+ preds_space = self._load_predictions(X)
398
+
399
+ distance_matrix = self.distance_.matrix(preds_space, self.pred_l_)
400
+
401
+ self.adapter_.set_params(bandwidth=self.bandwidth_)
402
+ D = self.adapter_.transform(distance_matrix)
403
+
404
+ K = self.kernel_(D)
405
+
406
+ classes = self.classes_
407
+ proba = np.zeros((len(K), len(classes)))
408
+
409
+ for i in range(len(K)):
410
+ w = K[i]
411
+
412
+ if np.sum(w) <= 0:
413
+ proba[i, np.where(classes == self.global_majority_class_)[0][0]] = 1.0
414
+ continue
415
+
416
+ proba[i] = self.aggregator_.aggregate_proba(
417
+ values=self.y_l_,
418
+ weights=w,
419
+ classes=classes
420
+ )
421
+
422
+ return proba
423
+
424
+ class CombinedClassifierFast(CombinedClassifier):
425
+ def __init__(
426
+ self,
427
+ use_faiss: bool = False,
428
+ faiss_k: int | None = None,
429
+ **kwargs
430
+ ):
431
+ super().__init__(**kwargs)
432
+ self.use_faiss = use_faiss
433
+ self.faiss_k = faiss_k
434
+
435
+ def fit(self, X, y, X_l=None, y_l=None, split_ratio=0.5, overlap=False, as_predictions=False):
436
+ super().fit(X, y, X_l=X_l, y_l=y_l, split_ratio=split_ratio, overlap=overlap, as_predictions=as_predictions)
437
+
438
+ if self.use_faiss and HAS_FAISS:
439
+ preds = self.pred_l_.astype(np.float32)
440
+ self.faiss_index_ = faiss.IndexFlatL2(preds.shape[1])
441
+ self.faiss_index_.add(preds)
442
+
443
+ return self
444
+
445
+ def predict(self, X):
446
+ X = check_array(X)
447
+ if self.as_predictions_:
448
+ preds = X
449
+ else:
450
+ preds = self._load_predictions(X).astype(np.float32)
451
+
452
+ if self.use_faiss and HAS_FAISS and hasattr(self, 'faiss_index_'):
453
+ # Find k nearest neighbors
454
+ k = self.faiss_k or min(100, len(self.pred_l_))
455
+ distances, indices = self.faiss_index_.search(preds, k)
456
+
457
+ # convert faiss l2 distances to similarity scores
458
+ if hasattr(self.kernel_, "gamma") and self.kernel_.gamma is not None:
459
+ K_approx = np.exp(-self.kernel_.gamma * distances)
460
+ elif hasattr(self.kernel_, "threshold") and self.kernel_.threshold is not None:
461
+ K_approx = np.exp(-self.kernel_.threshold * distances)
462
+ else:
463
+ raise ValueError("Kernel must define either gamma or threshold.")
464
+
465
+ outputs = []
466
+ for i in range(K_approx.shape[0]):
467
+ w = K_approx[i]
468
+ idx = indices[i]
469
+ outputs.append(
470
+ self.aggregator_.aggregate(self.y_l_[idx], w)
471
+ )
472
+
473
+ return outputs
474
+ else:
475
+ return super().predict(X)
476
+
477
+ def predict_proba(self, X, pred_X):
478
+ X = check_array(X)
479
+ if pred_X is not None:
480
+ preds = pred_X
481
+ else:
482
+ preds = self._load_predictions(X).astype(np.float32)
483
+
484
+ if self.use_faiss and HAS_FAISS and hasattr(self, 'faiss_index_'):
485
+ k = self.faiss_k or min(100, len(self.pred_l_))
486
+ distances, indices = self.faiss_index_.search(preds, k)
487
+
488
+ if hasattr(self.kernel_, "gamma") and self.kernel_.gamma is not None:
489
+ K_approx = np.exp(-self.kernel_.gamma * distances)
490
+ elif hasattr(self.kernel_, "threshold") and self.kernel_.threshold is not None:
491
+ K_approx = np.exp(-self.kernel_.threshold * distances)
492
+ else:
493
+ raise ValueError("Kernel must define either gamma or threshold.")
494
+
495
+ classes = self.classes_
496
+ proba = np.zeros((len(K_approx), len(classes)))
497
+
498
+ for i in range(K_approx.shape[0]):
499
+ w = K_approx[i]
500
+ idx = indices[i]
501
+
502
+ proba[i] = self.aggregator_.aggregate_proba(
503
+ values=self.y_l_[idx],
504
+ weights=w,
505
+ classes=classes
506
+ )
507
+
508
+ return proba
509
+ else:
510
+ return super().predict_proba(X)
511
+
512
+
@@ -0,0 +1,106 @@
1
+ """
2
+ COBRA core module.
3
+
4
+ This package contains the fundamental building blocks of the COBRA
5
+ and MIXCOBRA pipeline architecture.
6
+
7
+ Pipeline overview
8
+ -----------------
9
+ Input -> Splitter -> Estimators -> Normalize Constants -> Distance
10
+ -> Kernel Adapter -> Kernel -> Optimize + Loss -> Aggregation -> Output
11
+
12
+ Purpose
13
+ -------
14
+ The core module provides all reusable components required to build
15
+ end-to-end consensus learning systems.
16
+
17
+ It implements a fully modular, factory-driven architecture where each
18
+ stage of the pipeline can be independently replaced or extended.
19
+
20
+ Main design goals:
21
+
22
+ - modular pipeline construction
23
+ - consistent factory-based instantiation
24
+ - extensible research-friendly architecture
25
+ - clear separation of concerns between stages
26
+ - support for COBRA / GradientCOBRA / MIXCOBRA variants
27
+
28
+ Core components
29
+ ---------------
30
+
31
+ Adapters
32
+ ^^^^^^^^
33
+ Transform raw distance matrices using learnable or fixed parameters.
34
+
35
+ Aggregators
36
+ ^^^^^^^^^^^^
37
+ Combine neighbor predictions into a final consensus output.
38
+
39
+ Distances
40
+ ^^^^^^^^^
41
+ Compute pairwise distances between samples in feature space.
42
+
43
+ Estimators
44
+ ^^^^^^^^^^^
45
+ Base models used as experts in the prediction pool.
46
+
47
+ Kernels
48
+ ^^^^^^^
49
+ Transform distances into similarity / influence weights.
50
+
51
+ Losses
52
+ ^^^^^^
53
+ Evaluate prediction error for optimization objectives.
54
+
55
+ Optimizers
56
+ ^^^^^^^^^^
57
+ Search or refine model parameters (gradient-based or discrete).
58
+
59
+ Spaces
60
+ ^^^^^^
61
+ Normalize and align estimator outputs into a shared representation.
62
+
63
+ Splitters
64
+ ^^^^^^^^^
65
+ Partition data into training and calibration subsets.
66
+
67
+ Factory system
68
+ --------------
69
+
70
+ - ``BaseFactory``
71
+ Generic registry-based factory used across all components.
72
+
73
+ Design philosophy
74
+ -----------------
75
+ Each module is designed to be:
76
+
77
+ - independently replaceable
78
+ - configurable via string-based factory registration
79
+ - compatible with black-box optimization workflows
80
+ - suitable for ensemble learning research and experimentation
81
+
82
+ Example usage
83
+ -------------
84
+ >>> from cobra.core.splitters import SplitterFactory
85
+ >>> from cobra.core.estimators import EstimatorFactory
86
+ >>> from cobra.core.optimizers import GradientOptimizerFactory
87
+
88
+ >>> splitter = SplitterFactory.create("holdout")
89
+ >>> estimator = EstimatorFactory.create("ridge")
90
+ >>> optimizer = GradientOptimizerFactory.create("gradient_descent")
91
+ """
92
+
93
+ from .adapters import *
94
+ from .aggregators import *
95
+ from .distances import *
96
+ from .estimators import *
97
+ from .kernels import *
98
+ from .losses import *
99
+ from .optimizers import *
100
+ from .splitters import *
101
+
102
+ from .factory import BaseFactory
103
+
104
+ __all__ = [
105
+ "BaseFactory",
106
+ ]
@@ -0,0 +1,32 @@
1
+ """
2
+ Kernel Adapter module for COBRA framework.
3
+
4
+ This package provides a set of transformation layers that operate
5
+ on distance matrices before kernel construction and optimization.
6
+
7
+ Kernel adapters serve as a bridge between:
8
+ - Distance metrics (geometry space)
9
+ - Kernel functions (similarity mapping)
10
+ - Optimization procedures (parameter tuning)
11
+
12
+ These adapters allow COBRA to support:
13
+ - single-parameter learnable parameters
14
+ - multi-parameter linear combination
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from .base import (
20
+ BaseKernelAdapter,
21
+ KernelAdapterFactory,
22
+ )
23
+ from .one_parameter import OneParameterKernelAdapter
24
+ from .two_parameter import TwoParameterKernelAdapter
25
+
26
+
27
+ __all__ = [
28
+ "BaseKernelAdapter",
29
+ "OneParameterKernelAdapter",
30
+ "TwoParameterKernelAdapter",
31
+ "KernelAdapterFactory",
32
+ ]