mlquantify 0.0.11.2__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. mlquantify/__init__.py +32 -6
  2. mlquantify/base.py +559 -257
  3. mlquantify/classification/__init__.py +1 -1
  4. mlquantify/classification/methods.py +160 -0
  5. mlquantify/evaluation/__init__.py +14 -2
  6. mlquantify/evaluation/measures.py +215 -0
  7. mlquantify/evaluation/protocol.py +647 -0
  8. mlquantify/methods/__init__.py +37 -40
  9. mlquantify/methods/aggregative.py +1030 -0
  10. mlquantify/methods/meta.py +472 -0
  11. mlquantify/methods/mixture_models.py +1003 -0
  12. mlquantify/methods/non_aggregative.py +136 -0
  13. mlquantify/methods/threshold_optimization.py +957 -0
  14. mlquantify/model_selection.py +377 -232
  15. mlquantify/plots.py +367 -0
  16. mlquantify/utils/__init__.py +2 -2
  17. mlquantify/utils/general.py +334 -0
  18. mlquantify/utils/method.py +449 -0
  19. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/METADATA +137 -122
  20. mlquantify-0.1.1.dist-info/RECORD +22 -0
  21. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/WHEEL +1 -1
  22. mlquantify/classification/pwkclf.py +0 -73
  23. mlquantify/evaluation/measures/__init__.py +0 -26
  24. mlquantify/evaluation/measures/ae.py +0 -11
  25. mlquantify/evaluation/measures/bias.py +0 -16
  26. mlquantify/evaluation/measures/kld.py +0 -8
  27. mlquantify/evaluation/measures/mse.py +0 -12
  28. mlquantify/evaluation/measures/nae.py +0 -16
  29. mlquantify/evaluation/measures/nkld.py +0 -13
  30. mlquantify/evaluation/measures/nrae.py +0 -16
  31. mlquantify/evaluation/measures/rae.py +0 -12
  32. mlquantify/evaluation/measures/se.py +0 -12
  33. mlquantify/evaluation/protocol/_Protocol.py +0 -202
  34. mlquantify/evaluation/protocol/__init__.py +0 -2
  35. mlquantify/evaluation/protocol/app.py +0 -146
  36. mlquantify/evaluation/protocol/npp.py +0 -34
  37. mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -62
  38. mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -7
  39. mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -27
  40. mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -23
  41. mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -21
  42. mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -25
  43. mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -41
  44. mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -21
  45. mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -23
  46. mlquantify/methods/aggregative/__init__.py +0 -9
  47. mlquantify/methods/aggregative/cc.py +0 -32
  48. mlquantify/methods/aggregative/emq.py +0 -86
  49. mlquantify/methods/aggregative/fm.py +0 -72
  50. mlquantify/methods/aggregative/gac.py +0 -96
  51. mlquantify/methods/aggregative/gpac.py +0 -87
  52. mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -81
  53. mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -5
  54. mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
  55. mlquantify/methods/aggregative/mixtureModels/dys_syn.py +0 -89
  56. mlquantify/methods/aggregative/mixtureModels/hdy.py +0 -46
  57. mlquantify/methods/aggregative/mixtureModels/smm.py +0 -27
  58. mlquantify/methods/aggregative/mixtureModels/sord.py +0 -77
  59. mlquantify/methods/aggregative/pcc.py +0 -33
  60. mlquantify/methods/aggregative/pwk.py +0 -38
  61. mlquantify/methods/meta/__init__.py +0 -1
  62. mlquantify/methods/meta/ensemble.py +0 -236
  63. mlquantify/methods/non_aggregative/__init__.py +0 -1
  64. mlquantify/methods/non_aggregative/hdx.py +0 -71
  65. mlquantify/plots/__init__.py +0 -2
  66. mlquantify/plots/distribution_plot.py +0 -109
  67. mlquantify/plots/protocol_plot.py +0 -193
  68. mlquantify/utils/general_purposes/__init__.py +0 -8
  69. mlquantify/utils/general_purposes/convert_col_to_array.py +0 -13
  70. mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -29
  71. mlquantify/utils/general_purposes/get_real_prev.py +0 -9
  72. mlquantify/utils/general_purposes/load_quantifier.py +0 -4
  73. mlquantify/utils/general_purposes/make_prevs.py +0 -23
  74. mlquantify/utils/general_purposes/normalize.py +0 -20
  75. mlquantify/utils/general_purposes/parallel.py +0 -10
  76. mlquantify/utils/general_purposes/round_protocol_df.py +0 -14
  77. mlquantify/utils/method_purposes/__init__.py +0 -6
  78. mlquantify/utils/method_purposes/distances.py +0 -21
  79. mlquantify/utils/method_purposes/getHist.py +0 -13
  80. mlquantify/utils/method_purposes/get_scores.py +0 -33
  81. mlquantify/utils/method_purposes/moss.py +0 -16
  82. mlquantify/utils/method_purposes/ternary_search.py +0 -14
  83. mlquantify/utils/method_purposes/tprfpr.py +0 -42
  84. mlquantify-0.0.11.2.dist-info/RECORD +0 -73
  85. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,136 @@
1
+ import numpy as np
2
+
3
+ from ..base import NonAggregativeQuantifier
4
+ from ..utils.method import getHist, hellinger
5
+
6
+
7
+
8
+
9
+ class HDx(NonAggregativeQuantifier):
10
+ """
11
+ Hellinger Distance Minimization (HDx).
12
+
13
+ This method estimates class prevalence by calculating the Hellinger
14
+ distance for each feature in the dataset, as opposed to HDy, which
15
+ computes the distance for classifier-generated scores.
16
+
17
+ Parameters
18
+ ----------
19
+ bins_size : np.ndarray, optional
20
+ An array of bin sizes for histogram calculations. Defaults to an array
21
+ combining linearly spaced values between 2 and 20 with an additional
22
+ bin size of 30.
23
+
24
+ Attributes
25
+ ----------
26
+ bins_size : np.ndarray
27
+ An array of bin sizes for histogram calculations.
28
+ neg_features : np.ndarray
29
+ Features from the negative class.
30
+ pos_features : np.ndarray
31
+ Features from the positive class.
32
+
33
+ References
34
+ ----------
35
+ .. [1] GONZÁLEZ-CASTRO, Víctor; ALAIZ-RODRÍGUEZ, Rocío; ALEGRE, Enrique. Class distribution estimation based on the Hellinger distance. Information Sciences, v. 218, p. 146-164, 2013. Avaliable at https://www.sciencedirect.com/science/article/abs/pii/S0020025512004069?casa_token=W6UksOigmp4AAAAA:ap8FK5mtpAzG-s8k2ygfRVgdIBYDGWjEi70ueJ546coP9F-VNaCKE5W_gsAv0bWQiwzt2QoAuLjP
36
+
37
+ Examples
38
+ --------
39
+ >>> from mlquantify.methods.non_aggregative import HDx
40
+ >>> from mlquantify.utils.general import get_real_prev
41
+ >>> from sklearn.datasets import load_breast_cancer
42
+ >>> from sklearn.model_selection import train_test_split
43
+ >>>
44
+ >>> features, target = load_breast_cancer(return_X_y=True)
45
+ >>>
46
+ >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)
47
+ >>>
48
+ >>> model = HDx()
49
+ >>> model.fit(X_train, y_train)
50
+ >>>
51
+ >>> predictions = model.predict(X_test)
52
+ >>> predictions
53
+ {0: 0.39, 1: 0.61}
54
+ >>> get_real_prev(y_test)
55
+ {0: 0.3684210526315789, 1: 0.631578947368421}
56
+ """
57
+
58
+ def __init__(self, bins_size: np.ndarray = None):
59
+ if bins_size is None:
60
+ bins_size = np.append(np.linspace(2, 20, 10), 30)
61
+
62
+ self.bins_size = bins_size
63
+ self.neg_features = None
64
+ self.pos_features = None
65
+
66
+ def _fit_method(self, X, y):
67
+ """
68
+ Fit the HDx model by separating the features into positive and negative classes.
69
+
70
+ Parameters
71
+ ----------
72
+ X : array-like
73
+ Feature matrix.
74
+ y : array-like
75
+ Target labels.
76
+
77
+ Returns
78
+ -------
79
+ self : HDx
80
+ The fitted instance of the class.
81
+ """
82
+ self.pos_features = X[y == self.classes[1]]
83
+ self.neg_features = X[y == self.classes[0]]
84
+
85
+ if not isinstance(X, np.ndarray):
86
+ self.pos_features = self.pos_features.to_numpy()
87
+ if not isinstance(y, np.ndarray):
88
+ self.neg_features = self.neg_features.to_numpy()
89
+
90
+ return self
91
+
92
+ def _predict_method(self, X) -> np.ndarray:
93
+ """
94
+ Predict the prevalence of the positive and negative classes.
95
+
96
+ Parameters
97
+ ----------
98
+ X : array-like
99
+ Feature matrix for the test data.
100
+
101
+ Returns
102
+ -------
103
+ prevalence : np.ndarray
104
+ A 2-element array representing the prevalence of the negative
105
+ and positive classes, respectively.
106
+ """
107
+ if not isinstance(X, np.ndarray):
108
+ X = X.to_numpy()
109
+
110
+ alpha_values = np.round(np.linspace(0, 1, 101), 2)
111
+ best_distances = {}
112
+
113
+ # Iterate over alpha values to compute the prevalence
114
+ for alpha in alpha_values:
115
+ distances = []
116
+
117
+ # For each feature, compute the Hellinger distance
118
+ for i in range(X.shape[1]):
119
+ for bins in self.bins_size:
120
+ dist_feature_pos = getHist(self.pos_features[:, i], bins)
121
+ dist_feature_neg = getHist(self.neg_features[:, i], bins)
122
+ dist_feature_test = getHist(X[:, i], bins)
123
+
124
+ # Combine positive and negative densities using the mixture weight (alpha)
125
+ train_combined_density = (dist_feature_pos * alpha) + (dist_feature_neg * (1 - alpha))
126
+
127
+ # Compute the Hellinger distance between the combined density and test density
128
+ distances.append(hellinger(train_combined_density, dist_feature_test))
129
+
130
+ # Store the mean distance for the current alpha
131
+ best_distances[alpha] = np.mean(distances)
132
+
133
+ # Find the alpha value that minimizes the mean Hellinger distance
134
+ prevalence = min(best_distances, key=best_distances.get)
135
+
136
+ return np.asarray([1 - prevalence, prevalence])