mlquantify 0.1.20__py3-none-any.whl → 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  import numpy as np
2
2
  from abc import abstractmethod
3
+ from sklearn.metrics.pairwise import pairwise_kernels
4
+ from scipy.optimize import minimize
3
5
 
4
6
  from mlquantify.base import BaseQuantifier
5
7
  from mlquantify.base_aggregative import AggregationMixin, SoftLearnerQMixin, _get_learner_function
@@ -44,9 +46,9 @@ class AggregativeMixture(SoftLearnerQMixin, AggregationMixin, BaseMixture):
44
46
 
45
47
  if learner_fitted:
46
48
  train_predictions = getattr(self.learner, learner_function)(X)
47
- train_y_values = y
49
+ y_train = y
48
50
  else:
49
- train_predictions, train_y_values = apply_cross_validation(
51
+ train_predictions, y_train = apply_cross_validation(
50
52
  self.learner,
51
53
  X,
52
54
  y,
@@ -58,34 +60,34 @@ class AggregativeMixture(SoftLearnerQMixin, AggregationMixin, BaseMixture):
58
60
  )
59
61
 
60
62
  self.train_predictions = train_predictions
61
- self.train_y_values = train_y_values
63
+ self.y_train = y_train
62
64
 
63
- self._precompute_training(train_predictions, train_y_values)
65
+ self._precompute_training(train_predictions, y_train)
64
66
  return self
65
67
 
66
- def _precompute_training(self, train_predictions, train_y_values):
68
+ def _precompute_training(self, train_predictions, y_train):
67
69
  """
68
70
  Fit learner and store score distributions for positive and negative classes.
69
71
  """
70
72
  # Store scores for positive and negative classes
71
- self.pos_scores = train_predictions[train_y_values == self.classes_[1], 1]
72
- self.neg_scores = train_predictions[train_y_values == self.classes_[0], 1]
73
+ self.pos_scores = train_predictions[y_train == self.classes_[1], 1]
74
+ self.neg_scores = train_predictions[y_train == self.classes_[0], 1]
73
75
  self._precomputed = True
74
76
  return self
75
77
 
76
78
  def _predict(self, X):
77
79
  """Predict class prevalences for the given data."""
78
80
  predictions = getattr(self.learner, _get_learner_function(self))(X)
79
- prevalences = self.aggregate(predictions, self.train_predictions, self.train_y_values)
81
+ prevalences = self.aggregate(predictions, self.train_predictions, self.y_train)
80
82
 
81
83
  return prevalences
82
84
 
83
- def aggregate(self, predictions, train_predictions, train_y_values):
85
+ def aggregate(self, predictions, train_predictions, y_train):
84
86
  predictions = validate_predictions(self, predictions)
85
- self.classes_ = check_classes_attribute(self, np.unique(train_y_values))
87
+ self.classes_ = check_classes_attribute(self, np.unique(y_train))
86
88
 
87
89
  if not self._precomputed:
88
- self._precompute_training(train_predictions, train_y_values)
90
+ self._precompute_training(train_predictions, y_train)
89
91
  self._precomputed = True
90
92
 
91
93
  pos_test_scores = predictions[:, 1]
@@ -252,7 +254,7 @@ class HDy(AggregativeMixture):
252
254
  Distance corresponding to the best mixture weight.
253
255
  """
254
256
 
255
- bins_size = np.arange(10, 110, 11)
257
+ bins_size = np.linspace(10, 110, 11)
256
258
  alpha_values = np.round(np.linspace(0, 1, 101), 2)
257
259
 
258
260
  alphas, self.distances = [], []
@@ -409,7 +411,7 @@ class HDx(BaseMixture):
409
411
  def __init__(self, bins_size=None, strategy="ovr"):
410
412
  super().__init__()
411
413
  if bins_size is None:
412
- bins_size = np.append(np.linspace(2, 20, 10), 30)
414
+ bins_size = np.linspace(10, 110, 11)
413
415
 
414
416
  self.bins_size = bins_size
415
417
  self.neg_features = None
@@ -439,7 +441,7 @@ class HDx(BaseMixture):
439
441
  # For each feature, compute the Hellinger distance
440
442
  for feature_idx in range(X.shape[1]):
441
443
 
442
- for bins in self.bins_size:
444
+ for bins in self.bins_size:
443
445
 
444
446
  pos_feature = pos[:, feature_idx]
445
447
  neg_feature = neg[:, feature_idx]
@@ -457,4 +459,297 @@ class HDx(BaseMixture):
457
459
  self.distances.append(avg_distance)
458
460
  best_alpha = alpha_values[np.argmin(self.distances)]
459
461
  best_distance = np.min(self.distances)
460
- return best_alpha, best_distance
462
+ return best_alpha, best_distance
463
+
464
+
465
+
466
+ class MMD_RKHS(BaseMixture):
467
+ r"""
468
+ Maximum Mean Discrepancy in RKHS (MMD-RKHS) quantification method.
469
+
470
+ This method estimates class prevalences in an unlabeled test set by
471
+ matching the kernel mean embedding of the test distribution to a
472
+ convex combination of the class-conditional training embeddings.
473
+
474
+ Let :math:`\mathcal{X} \subseteq \mathbb{R}^d` be the input space and
475
+ :math:`\mathcal{Y} = \{0, \dots, C-1\}` the label set. Let
476
+ :math:`K` be a positive definite kernel with RKHS :math:`\mathcal{H}`
477
+ and feature map :math:`\phi`, so that
478
+ :math:`K(x, x') = \langle \phi(x), \phi(x') \rangle_{\mathcal{H}}`.
479
+
480
+ For each class :math:`y`, the class-conditional kernel mean embedding is
481
+
482
+ .. math::
483
+ \mu_y \;=\; \mathbb{E}_{x \sim P_{D}(x \mid y)}[\phi(x)] \in \mathcal{H},
484
+
485
+ and the test mean embedding is
486
+
487
+ .. math::
488
+ \mu_U \;=\; \mathbb{E}_{x \sim P_{U}(x)}[\phi(x)] \in \mathcal{H}.
489
+
490
+ Under prior probability shift, the test distribution satisfies
491
+
492
+ .. math::
493
+ P_U(x) = \sum_{y=0}^{C-1} \theta_y \, P_D(x \mid y),
494
+
495
+ which implies
496
+
497
+ .. math::
498
+ \mu_U = \sum_{y=0}^{C-1} \theta_y \, \mu_y,
499
+
500
+ where :math:`\theta \in \Delta^{C-1}` is the class prevalence vector.
501
+ The MMD-RKHS estimator solves
502
+
503
+ .. math::
504
+ \hat{\theta}
505
+ \;=\;
506
+ \arg\min_{\theta \in \Delta^{C-1}}
507
+ \big\lVert \textstyle\sum_{y=0}^{C-1} \theta_y \mu_y - \mu_U
508
+ \big\rVert_{\mathcal{H}}^2.
509
+
510
+ In practice, embeddings are approximated by empirical means. Using the
511
+ kernel trick, the objective can be written as a quadratic program
512
+
513
+ .. math::
514
+ \hat{\theta}
515
+ \;=\;
516
+ \arg\min_{\theta \in \Delta^{C-1}}
517
+ \big( \theta^\top G \, \theta - 2 \, h^\top \theta \big),
518
+
519
+ with
520
+
521
+ .. math::
522
+ G_{yy'} = \langle \hat{\mu}_y, \hat{\mu}_{y'} \rangle_{\mathcal{H}},
523
+ \qquad
524
+ h_y = \langle \hat{\mu}_y, \hat{\mu}_U \rangle_{\mathcal{H}}.
525
+
526
+ The solution :math:`\hat{\theta}` is the estimated prevalence vector.
527
+
528
+ Parameters
529
+ ----------
530
+ kernel : {'rbf', 'linear', 'poly', 'sigmoid', 'cosine'}, default='rbf'
531
+ Kernel used to build the RKHS where MMD is computed.
532
+ gamma : float or None, default=None
533
+ Kernel coefficient for 'rbf' and 'sigmoid'.
534
+ degree : int, default=3
535
+ Degree of the polynomial kernel.
536
+ coef0 : float, default=0.0
537
+ Independent term in 'poly' and 'sigmoid' kernels.
538
+ strategy : {'ovr', 'ovo'}, default='ovr'
539
+ Multiclass quantification strategy flag (for consistency with
540
+ other mixture-based quantifiers).
541
+
542
+ Attributes
543
+ ----------
544
+ classes_ : ndarray of shape (n_classes,)
545
+ Class labels seen during fitting.
546
+ X_train_ : ndarray of shape (n_train, n_features)
547
+ Training feature matrix.
548
+ y_train_ : ndarray of shape (n_train,)
549
+ Training labels.
550
+ class_means_ : ndarray of shape (n_classes, n_train)
551
+ Empirical class-wise kernel mean embeddings in the span of training
552
+ samples.
553
+ K_train_ : ndarray of shape (n_train, n_train)
554
+ Gram matrix of training samples under the chosen kernel.
555
+
556
+ References
557
+ ----------
558
+ [1] Iyer, A., Nath, S., & Sarawagi, S. (2014).
559
+ Maximum Mean Discrepancy for Class Ratio Estimation:
560
+ Convergence Bounds and Kernel Selection. ICML.
561
+
562
+ [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023).
563
+ Learning to Quantify. Springer.
564
+ """
565
+
566
+ _parameter_constraints = {
567
+ "kernel": [Options(["rbf", "linear", "poly", "sigmoid", "cosine"])],
568
+ "gamma": [Interval(0, None, inclusive_left=False), Options([None])],
569
+ "degree": [Interval(1, None, inclusive_left=True)],
570
+ "coef0": [Interval(0, None, inclusive_left=True)],
571
+ "strategy": [Options(["ovr", "ovo"])],
572
+ }
573
+
574
+ def __init__(self,
575
+ kernel="rbf",
576
+ gamma=None,
577
+ degree=3,
578
+ coef0=0.0,
579
+ strategy="ovr"):
580
+ super().__init__()
581
+ self.kernel = kernel
582
+ self.gamma = gamma
583
+ self.degree = degree
584
+ self.coef0 = coef0
585
+ self.strategy = strategy
586
+
587
+ self.X_train_ = None
588
+ self.y_train_ = None
589
+ self.class_means_ = None # class-wise kernel means
590
+ self.K_train_ = None # train Gram matrix
591
+
592
+
593
+ @_fit_context(prefer_skip_nested_validation=True)
594
+ def _fit(self, X, y, *args, **kwargs):
595
+ """
596
+ Store X, y, validate labels and precompute class-wise kernel means.
597
+ """
598
+ self.X_train_ = X
599
+ self.y_train_ = y
600
+
601
+ class_means, K_train = self._compute_class_means(X, y)
602
+ self.class_means_ = class_means
603
+ self.K_train_ = K_train
604
+
605
+ return self
606
+
607
+ def _predict(self, X, y_train) -> np.ndarray:
608
+ """
609
+ Estimate the prevalence vector on X using MMD.
610
+ """
611
+ self.classes_ = check_classes_attribute(self, np.unique(y_train))
612
+
613
+ theta, _ = self.best_mixture(X, self.X_train_, self.y_train_)
614
+ prevalence = validate_prevalences(self, theta, self.classes_)
615
+ return prevalence
616
+
617
+ def best_mixture(self, X_test, X_train, y_train):
618
+ """
619
+ Implements the MMD-based class ratio estimation:
620
+
621
+ min_theta || sum_y theta_y mu_y - mu_U ||^2
622
+
623
+ and returns (theta, objective_value).
624
+ """
625
+ # Use precomputed means if available
626
+ if self.class_means_ is None or self.X_train_ is None:
627
+ class_means, _ = self._compute_class_means(X_train, y_train)
628
+ else:
629
+ class_means = self.class_means_
630
+
631
+ mu_u = self._compute_unlabeled_mean(X_test)
632
+ G, h = self._build_QP_matrices(class_means, mu_u)
633
+
634
+ theta = self._solve_simplex_qp(G, h)
635
+ # Objective value: ||A theta - a||^2 = theta^T G theta - 2 h^T theta + const
636
+ obj = float(theta @ G @ theta - 2.0 * (h @ theta))
637
+
638
+ return theta, obj
639
+
640
+
641
+ def _kernel_kwargs(self):
642
+ params = {}
643
+ if self.kernel == "rbf" and self.gamma is not None:
644
+ params["gamma"] = self.gamma
645
+ if self.kernel == "poly":
646
+ params["degree"] = self.degree
647
+ params["coef0"] = self.coef0
648
+ if self.kernel == "sigmoid":
649
+ if self.gamma is not None:
650
+ params["gamma"] = self.gamma
651
+ params["coef0"] = self.coef0
652
+ return params
653
+
654
+ def _compute_class_means(self, X, y):
655
+ """
656
+ Compute kernel mean embeddings per class in the RKHS.
657
+
658
+ X: (n_train, d)
659
+ y: (n_train,)
660
+ Returns:
661
+ class_means: (n_classes, n_train)
662
+ K: (n_train, n_train)
663
+ """
664
+ classes = self.classes_
665
+ K = pairwise_kernels(X, X, metric=self.kernel, **self._kernel_kwargs())
666
+ means = []
667
+ for c in classes:
668
+ mask = (y == c)
669
+ Kc = K[mask] # rows of class c
670
+ mu_c = Kc.mean(axis=0) # mean over rows
671
+ means.append(mu_c)
672
+ means = np.vstack(means)
673
+ return means, K
674
+
675
+ def _compute_unlabeled_mean(self, X_test):
676
+ """
677
+ Compute the kernel mean embedding of the test set.
678
+
679
+ mu_U = E_{x in U} K(x, ·)
680
+ """
681
+ K_ut = pairwise_kernels(
682
+ X_test,
683
+ self.X_train_,
684
+ metric=self.kernel,
685
+ **self._kernel_kwargs()
686
+ )
687
+ mu_u = K_ut.mean(axis=0) # shape (n_train,)
688
+ return mu_u
689
+
690
+ def _build_QP_matrices(self, class_means, mu_u):
691
+ """
692
+ Build G and h for the objective
693
+
694
+ min_theta theta^T G theta - 2 h^T theta
695
+
696
+ with theta in the simplex (dimension = n_classes).
697
+
698
+ class_means: (n_classes, n_train)
699
+ mu_u: (n_train,)
700
+ """
701
+ # Gram of means in RKHS: G_ij = <mu_i, mu_j>
702
+ G = class_means @ class_means.T # (C, C)
703
+ # Inner products with mu_U: h_i = <mu_i, mu_U>
704
+ h = class_means @ mu_u # (C,)
705
+ return G, h
706
+
707
+ def _solve_simplex_qp(self, G, h):
708
+ """
709
+ Solve:
710
+
711
+ min_theta theta^T G theta - 2 h^T theta
712
+ s.t. theta >= 0, sum(theta) = 1
713
+
714
+ using SciPy's SLSQP solver.
715
+ """
716
+ C = G.shape[0]
717
+
718
+ def obj(theta):
719
+ return float(theta @ G @ theta - 2.0 * (h @ theta))
720
+
721
+ def grad(theta):
722
+ # gradient: 2 G theta - 2 h
723
+ return 2.0 * (G @ theta - h)
724
+
725
+ # equality constraint: sum(theta) = 1
726
+ cons = {
727
+ "type": "eq",
728
+ "fun": lambda t: np.sum(t) - 1.0,
729
+ "jac": lambda t: np.ones_like(t),
730
+ }
731
+
732
+ # bounds: theta_i >= 0
733
+ bounds = [(0.0, 1.0) for _ in range(C)]
734
+
735
+ # initial point: uniform distribution on the simplex
736
+ x0 = np.ones(C) / C
737
+
738
+ res = minimize(
739
+ obj,
740
+ x0,
741
+ method="SLSQP",
742
+ jac=grad,
743
+ bounds=bounds,
744
+ constraints=[cons],
745
+ options={"maxiter": 100, "ftol": 1e-9},
746
+ )
747
+
748
+ theta = res.x
749
+ theta = np.maximum(theta, 0)
750
+ s = theta.sum()
751
+ if s <= 0:
752
+ theta = np.ones_like(theta) / len(theta)
753
+ else:
754
+ theta /= s
755
+ return theta
@@ -107,7 +107,7 @@ class GridSearchQ(MetaquantifierMixin, BaseQuantifier):
107
107
  "scoring": [CallableConstraint()],
108
108
  "refit": [bool],
109
109
  "val_split": [Interval(0.0, 1.0, inclusive_left=False, inclusive_right=False)],
110
- "n_jobs": [Interval(1, None), None],
110
+ "n_jobs": [Interval(-1, None), None],
111
111
  "random_seed": [Interval(0, None), None],
112
112
  "timeout": [Interval(-1, None)],
113
113
  "verbose": [bool]
@@ -96,22 +96,22 @@ class BaseKDE(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
96
96
 
97
97
  if learner_fitted:
98
98
  train_predictions = getattr(self.learner, learner_function)(X)
99
- train_y_values = y
99
+ y_train = y
100
100
  else:
101
- train_predictions, train_y_values = apply_cross_validation(
101
+ train_predictions, y_train = apply_cross_validation(
102
102
  self.learner, X, y,
103
103
  function=learner_function, cv=5,
104
104
  stratified=True, shuffle=True
105
105
  )
106
106
 
107
107
  self.train_predictions = train_predictions
108
- self.train_y_values = train_y_values
109
- self._precompute_training(train_predictions, train_y_values)
108
+ self.y_train = y_train
109
+ self._precompute_training(train_predictions, y_train)
110
110
  return self
111
111
 
112
- def _fit_kde_models(self, train_predictions, train_y_values):
112
+ def _fit_kde_models(self, train_predictions, y_train):
113
113
  P = np.atleast_2d(train_predictions)
114
- y = np.asarray(train_y_values)
114
+ y = np.asarray(y_train)
115
115
  self._class_kdes = []
116
116
 
117
117
  for c in self.classes_:
@@ -126,18 +126,18 @@ class BaseKDE(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
126
126
 
127
127
  def predict(self, X):
128
128
  predictions = getattr(self.learner, _get_learner_function(self))(X)
129
- return self.aggregate(predictions, self.train_predictions, self.train_y_values)
129
+ return self.aggregate(predictions, self.train_predictions, self.y_train)
130
130
 
131
- def aggregate(self, predictions, train_predictions, train_y_values):
131
+ def aggregate(self, predictions, train_predictions, y_train):
132
132
  predictions = validate_predictions(self, predictions)
133
133
 
134
- if hasattr(self, "classes_") and len(np.unique(train_y_values)) != len(self.classes_):
134
+ if hasattr(self, "classes_") and len(np.unique(y_train)) != len(self.classes_):
135
135
  self._precomputed = False
136
136
 
137
- self.classes_ = check_classes_attribute(self, np.unique(train_y_values))
137
+ self.classes_ = check_classes_attribute(self, np.unique(y_train))
138
138
 
139
139
  if not self._precomputed:
140
- self._precompute_training(train_predictions, train_y_values)
140
+ self._precompute_training(train_predictions, y_train)
141
141
  self._precomputed = True
142
142
 
143
143
  prevalence, _ = self._solve_prevalences(predictions)
@@ -145,22 +145,22 @@ class BaseKDE(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
145
145
  prevalence = validate_prevalences(self, prevalence, self.classes_)
146
146
  return prevalence
147
147
 
148
- def best_distance(self, predictions, train_predictions, train_y_values):
148
+ def best_distance(self, predictions, train_predictions, y_train):
149
149
  """Retorna a melhor distância encontrada durante o ajuste."""
150
150
  if self.best_distance is not None:
151
151
  return self.best_distance
152
152
 
153
- self.classes_ = check_classes_attribute(self, np.unique(train_y_values))
153
+ self.classes_ = check_classes_attribute(self, np.unique(y_train))
154
154
 
155
155
  if not self._precomputed:
156
- self._precompute_training(train_predictions, train_y_values)
156
+ self._precompute_training(train_predictions, y_train)
157
157
  self._precomputed = True
158
158
 
159
159
  _, best_distance = self._solve_prevalences(predictions)
160
160
  return best_distance
161
161
 
162
162
  @abstractmethod
163
- def _precompute_training(self, train_predictions, train_y_values):
163
+ def _precompute_training(self, train_predictions, y_train):
164
164
  raise NotImplementedError
165
165
 
166
166
  @abstractmethod
@@ -63,7 +63,7 @@ class PWK(BaseQuantifier):
63
63
  "leaf_size": [Interval(1, None, inclusive_right=False)],
64
64
  "p": [Interval(1, None, inclusive_right=False)],
65
65
  "metric_params": [dict, type(None)],
66
- "n_jobs": [Interval(1, None, inclusive_right=False), type(None)],
66
+ "n_jobs": [Interval(-1, None, inclusive_right=False), type(None)],
67
67
  }
68
68
 
69
69
  def __init__(self,
@@ -111,7 +111,6 @@ class PWK(BaseQuantifier):
111
111
  The fitted instance.
112
112
  """
113
113
  X, y = validate_data(self, X, y, ensure_2d=True, ensure_min_samples=2)
114
- validate_y(self, y)
115
114
  self.classes_ = np.unique(y)
116
115
  self.cc = CC(self.learner)
117
116
  return self.cc.fit(X, y)
@@ -129,6 +128,7 @@ class PWK(BaseQuantifier):
129
128
  prevalences : array of shape (n_classes,)
130
129
  Predicted class prevalences.
131
130
  """
131
+ X = validate_data(self, X, ensure_2d=True)
132
132
  prevalences = self.cc.predict(X)
133
133
  prevalences = validate_prevalences(self, prevalences, self.classes_)
134
134
  return prevalences
@@ -78,11 +78,11 @@ class KDEyML(BaseKDE):
78
78
  approaches for distribution matching and maximum likelihood estimation.
79
79
  """
80
80
 
81
- def _precompute_training(self, train_predictions, train_y_values):
81
+ def _precompute_training(self, train_predictions, y_train):
82
82
  r"""
83
83
  Fit KDE models on class-specific training posterior predictions.
84
84
  """
85
- super()._fit_kde_models(train_predictions, train_y_values)
85
+ super()._fit_kde_models(train_predictions, y_train)
86
86
 
87
87
  def _solve_prevalences(self, predictions):
88
88
  r"""
@@ -156,11 +156,11 @@ class KDEyHD(BaseKDE):
156
156
  self.montecarlo_trials = montecarlo_trials
157
157
  self.random_state = random_state
158
158
 
159
- def _precompute_training(self, train_predictions, train_y_values):
159
+ def _precompute_training(self, train_predictions, y_train):
160
160
  """
161
161
  Precompute reference samples from class KDEs and their densities.
162
162
  """
163
- super()._fit_kde_models(train_predictions, train_y_values)
163
+ super()._fit_kde_models(train_predictions, y_train)
164
164
  n_class = len(self._class_kdes)
165
165
  trials = int(self.montecarlo_trials)
166
166
  rng = check_random_state(self.random_state)
@@ -222,12 +222,12 @@ class KDEyCS(BaseKDE):
222
222
  density representations, as discussed by Moreo et al. (2023).
223
223
  """
224
224
 
225
- def _precompute_training(self, train_predictions, train_y_values):
225
+ def _precompute_training(self, train_predictions, y_train):
226
226
  """
227
227
  Precompute kernel sums and Gram matrices needed for CS divergence evaluation.
228
228
  """
229
229
  P = np.atleast_2d(train_predictions)
230
- y = np.asarray(train_y_values)
230
+ y = np.asarray(y_train)
231
231
  centers = [P[y == c] for c in self.classes_]
232
232
  counts = np.array([len(x) if len(x) > 0 else 1 for x in centers])
233
233
  h_eff = np.sqrt(2) * self.bandwidth
@@ -1 +1 @@
1
- # TODO
1
+ from ._classes import QuaNet
File without changes