mlquantify 0.0.11.3__tar.gz → 0.0.11.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/PKG-INFO +10 -2
  2. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/README.md +1 -1
  3. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/bias.py +1 -1
  4. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/gac.py +16 -23
  5. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/gpac.py +17 -26
  6. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify.egg-info/PKG-INFO +10 -2
  7. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/setup.py +1 -1
  8. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/__init__.py +0 -0
  9. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/base.py +0 -0
  10. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/classification/__init__.py +0 -0
  11. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/classification/pwkclf.py +0 -0
  12. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/__init__.py +0 -0
  13. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/__init__.py +0 -0
  14. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/ae.py +0 -0
  15. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/kld.py +0 -0
  16. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/mse.py +0 -0
  17. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/nae.py +0 -0
  18. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/nkld.py +0 -0
  19. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/nrae.py +0 -0
  20. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/rae.py +0 -0
  21. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/se.py +0 -0
  22. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/protocol/_Protocol.py +0 -0
  23. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/protocol/__init__.py +0 -0
  24. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/protocol/app.py +0 -0
  25. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/protocol/npp.py +0 -0
  26. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/__init__.py +0 -0
  27. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -0
  28. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -0
  29. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -0
  30. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -0
  31. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -0
  32. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -0
  33. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -0
  34. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -0
  35. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -0
  36. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/__init__.py +0 -0
  37. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/cc.py +0 -0
  38. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/emq.py +0 -0
  39. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/fm.py +0 -0
  40. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -0
  41. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -0
  42. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/mixtureModels/dys.py +0 -0
  43. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/mixtureModels/dys_syn.py +0 -0
  44. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/mixtureModels/hdy.py +0 -0
  45. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/mixtureModels/smm.py +0 -0
  46. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/mixtureModels/sord.py +0 -0
  47. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/pcc.py +0 -0
  48. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/pwk.py +0 -0
  49. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/meta/__init__.py +0 -0
  50. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/meta/ensemble.py +0 -0
  51. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/non_aggregative/__init__.py +0 -0
  52. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/non_aggregative/hdx.py +0 -0
  53. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/model_selection.py +0 -0
  54. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/plots/__init__.py +0 -0
  55. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/plots/distribution_plot.py +0 -0
  56. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/plots/protocol_plot.py +0 -0
  57. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/__init__.py +0 -0
  58. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/__init__.py +0 -0
  59. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/convert_col_to_array.py +0 -0
  60. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -0
  61. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/get_real_prev.py +0 -0
  62. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/load_quantifier.py +0 -0
  63. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/make_prevs.py +0 -0
  64. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/normalize.py +0 -0
  65. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/parallel.py +0 -0
  66. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/general_purposes/round_protocol_df.py +0 -0
  67. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/method_purposes/__init__.py +0 -0
  68. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/method_purposes/distances.py +0 -0
  69. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/method_purposes/getHist.py +0 -0
  70. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/method_purposes/get_scores.py +0 -0
  71. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/method_purposes/moss.py +0 -0
  72. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/method_purposes/ternary_search.py +0 -0
  73. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/utils/method_purposes/tprfpr.py +0 -0
  74. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify.egg-info/SOURCES.txt +0 -0
  75. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify.egg-info/dependency_links.txt +0 -0
  76. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify.egg-info/requires.txt +0 -0
  77. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify.egg-info/top_level.txt +0 -0
  78. {mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlquantify
3
- Version: 0.0.11.3
3
+ Version: 0.0.11.5
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -12,6 +12,14 @@ Classifier: Operating System :: Unix
12
12
  Classifier: Operating System :: MacOS :: MacOS X
13
13
  Classifier: Operating System :: Microsoft :: Windows
14
14
  Description-Content-Type: text/markdown
15
+ Requires-Dist: scikit-learn
16
+ Requires-Dist: numpy
17
+ Requires-Dist: scipy
18
+ Requires-Dist: joblib
19
+ Requires-Dist: tqdm
20
+ Requires-Dist: pandas
21
+ Requires-Dist: xlrd
22
+ Requires-Dist: matplotlib
15
23
 
16
24
  <h1 align="center">MLQuantify</h1>
17
25
  <h4 align="center">A Python Package for Quantification</h4>
@@ -45,7 +53,7 @@ ___
45
53
 
46
54
  | Section | Description |
47
55
  |---|---|
48
- | **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
56
+ | **21 Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
49
57
  | **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
50
58
  | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
51
59
  | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |
@@ -30,7 +30,7 @@ ___
30
30
 
31
31
  | Section | Description |
32
32
  |---|---|
33
- | **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
33
+ | **21 Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
34
34
  | **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
35
35
  | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
36
36
  | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |
@@ -11,6 +11,6 @@ def bias(prev_real:np.any, prev_pred:np.any):
11
11
  abs_errors = abs(prev_pred - prev_real)
12
12
 
13
13
  if classes:
14
- return {class_:abs_error for class_, abs_error in zip(classes, abs_errors)}
14
+ return {class_:float(abs_error) for class_, abs_error in zip(classes, abs_errors)}
15
15
 
16
16
  return abs_errors
@@ -2,7 +2,7 @@ import numpy as np
2
2
  import pandas as pd
3
3
  from sklearn.base import BaseEstimator
4
4
  from sklearn.metrics import confusion_matrix
5
- from sklearn.model_selection import StratifiedKFold
5
+ from sklearn.model_selection import train_test_split
6
6
 
7
7
  from ...base import AggregativeQuantifier
8
8
 
@@ -13,10 +13,12 @@ class GAC(AggregativeQuantifier):
13
13
  and solve it via constrained least-squares regression.
14
14
  """
15
15
 
16
- def __init__(self, learner: BaseEstimator):
16
+ def __init__(self, learner: BaseEstimator, train_size:float=0.6, random_state:int=None):
17
17
  assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
18
18
  self.learner = learner
19
19
  self.cond_prob_matrix = None
20
+ self.train_size = train_size
21
+ self.random_state = random_state
20
22
 
21
23
  def _fit_method(self, X, y):
22
24
  # Ensure X and y are DataFrames
@@ -29,26 +31,17 @@ class GAC(AggregativeQuantifier):
29
31
  y_pred = self.learner.predict(X)
30
32
  y_label = y
31
33
  else:
32
- # Cross-validation for generating predictions
33
- skf = StratifiedKFold(n_splits=self.cv_folds)
34
- y_pred = []
35
- y_label = []
34
+ X_train, X_val, y_train, y_val = train_test_split(
35
+ X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
36
+ )
36
37
 
37
- for train_index, valid_index in skf.split(X, y):
38
-
39
- train_data = pd.DataFrame(X.iloc[train_index])
40
- train_label = y.iloc[train_index]
41
-
42
- valid_data = pd.DataFrame(X.iloc[valid_index])
43
- valid_label = y.iloc[valid_index]
44
-
45
- self.learner.fit(train_data, train_label)
46
-
47
- y_pred.extend(self.learner.predict(valid_data))
48
- y_label.extend(valid_label)
38
+ self.learner.fit(X_train, y_train)
39
+
40
+ y_label = y_val
41
+ y_pred = self.learner.predict(X_val)
49
42
 
50
43
  # Compute conditional probability matrix
51
- self.cond_prob_matrix = self.get_cond_prob_matrix(self.classes, y, y_pred)
44
+ self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, y_label, y_pred)
52
45
 
53
46
  return self
54
47
 
@@ -66,11 +59,11 @@ class GAC(AggregativeQuantifier):
66
59
  return adjusted_prevalences
67
60
 
68
61
  @classmethod
69
- def get_cond_prob_matrix(cls, classes:list, true_labels:np.ndarray, predictions:np.ndarray) -> np.ndarray:
62
+ def get_cond_prob_matrix(cls, classes:list, y_labels:np.ndarray, predictions:np.ndarray) -> np.ndarray:
70
63
  """ Estimate the conditional probability matrix P(yi|yj)"""
71
64
 
72
- CM = confusion_matrix(true_labels, predictions, labels=classes).T
73
- CM = CM.astype(np.float32)
65
+ CM = confusion_matrix(y_labels, predictions, labels=classes).T
66
+ CM = CM.astype(float)
74
67
  class_counts = CM.sum(axis=0)
75
68
  for i, _ in enumerate(classes):
76
69
  if class_counts[i] == 0:
@@ -91,6 +84,6 @@ class GAC(AggregativeQuantifier):
91
84
  adjusted_prevalences = np.linalg.solve(A, B)
92
85
  adjusted_prevalences = np.clip(adjusted_prevalences, 0, 1)
93
86
  adjusted_prevalences /= adjusted_prevalences.sum()
94
- except (np.linalg.LinAlgError, ValueError):
87
+ except (np.linalg.LinAlgError):
95
88
  adjusted_prevalences = predicted_prevalences # No way to adjust them
96
89
  return adjusted_prevalences
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
3
  from sklearn.base import BaseEstimator
4
- from sklearn.model_selection import StratifiedKFold
4
+ from sklearn.model_selection import train_test_split
5
5
 
6
6
  from .gac import GAC
7
7
  from ...base import AggregativeQuantifier
@@ -14,10 +14,12 @@ class GPAC(AggregativeQuantifier):
14
14
  """
15
15
 
16
16
 
17
- def __init__(self, learner: BaseEstimator):
17
+ def __init__(self, learner: BaseEstimator, train_size:float=0.6, random_state:int=None):
18
18
  assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
19
19
  self.learner = learner
20
20
  self.cond_prob_matrix = None
21
+ self.train_size = train_size
22
+ self.random_state = random_state
21
23
 
22
24
  def _fit_method(self, X, y):
23
25
  # Convert X and y to DataFrames if they are numpy arrays
@@ -28,31 +30,20 @@ class GPAC(AggregativeQuantifier):
28
30
 
29
31
  if self.learner_fitted:
30
32
  # Use existing model to predict
31
- predictions = self.learner.predict(X)
32
- true_labels = y
33
+ y_pred = self.learner.predict(X)
34
+ y_labels = y
33
35
  else:
34
- # Perform cross-validation to generate predictions
35
- skf = StratifiedKFold(n_splits=self.cv_folds)
36
- predictions = []
37
- true_labels = []
36
+ X_train, X_val, y_train, y_val = train_test_split(
37
+ X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
38
+ )
38
39
 
39
- for train_index, valid_index in skf.split(X, y):
40
- # Split data into training and validation sets
41
- train_data = pd.DataFrame(X.iloc[train_index])
42
- train_labels = y.iloc[train_index]
43
-
44
- valid_data = pd.DataFrame(X.iloc[valid_index])
45
- valid_labels = y.iloc[valid_index]
46
-
47
- # Train the learner
48
- self.learner.fit(train_data, train_labels)
49
-
50
- # Predict and collect results
51
- predictions.extend(self.learner.predict(valid_data))
52
- true_labels.extend(valid_labels)
40
+ self.learner.fit(X_train, y_train)
41
+
42
+ y_labels = y_val
43
+ y_pred = self.learner.predict(X_val)
53
44
 
54
45
  # Compute conditional probability matrix using GAC
55
- self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, true_labels, predictions)
46
+ self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, y_labels, y_pred)
56
47
 
57
48
  return self
58
49
 
@@ -73,15 +64,15 @@ class GPAC(AggregativeQuantifier):
73
64
  return adjusted_prevalences
74
65
 
75
66
  @classmethod
76
- def get_cond_prob_matrix(cls, classes:list, true_labels:np.ndarray, predictions:np.ndarray) -> np.ndarray:
67
+ def get_cond_prob_matrix(cls, classes:list, y_labels:np.ndarray, y_pred:np.ndarray) -> np.ndarray:
77
68
  """Estimate the matrix where entry (i,j) is the estimate of P(yi|yj)"""
78
69
 
79
70
  n_classes = len(classes)
80
71
  cond_prob_matrix = np.eye(n_classes)
81
72
 
82
73
  for i, class_ in enumerate(classes):
83
- class_indices = true_labels == class_
74
+ class_indices = y_labels == class_
84
75
  if class_indices.any():
85
- cond_prob_matrix[i] = predictions[class_indices].mean(axis=0)
76
+ cond_prob_matrix[i] = y_pred[class_indices].mean(axis=0)
86
77
 
87
78
  return cond_prob_matrix.T
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlquantify
3
- Version: 0.0.11.3
3
+ Version: 0.0.11.5
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -12,6 +12,14 @@ Classifier: Operating System :: Unix
12
12
  Classifier: Operating System :: MacOS :: MacOS X
13
13
  Classifier: Operating System :: Microsoft :: Windows
14
14
  Description-Content-Type: text/markdown
15
+ Requires-Dist: scikit-learn
16
+ Requires-Dist: numpy
17
+ Requires-Dist: scipy
18
+ Requires-Dist: joblib
19
+ Requires-Dist: tqdm
20
+ Requires-Dist: pandas
21
+ Requires-Dist: xlrd
22
+ Requires-Dist: matplotlib
15
23
 
16
24
  <h1 align="center">MLQuantify</h1>
17
25
  <h4 align="center">A Python Package for Quantification</h4>
@@ -45,7 +53,7 @@ ___
45
53
 
46
54
  | Section | Description |
47
55
  |---|---|
48
- | **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
56
+ | **21 Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
49
57
  | **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
50
58
  | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
51
59
  | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |
@@ -6,7 +6,7 @@ here = pathlib.Path(__file__).parent.resolve()
6
6
 
7
7
  long_description = (here / 'README.md').read_text(encoding='utf-8')
8
8
 
9
- VERSION = '0.0.11.3'
9
+ VERSION = '0.0.11.5'
10
10
  DESCRIPTION = 'Quantification Library'
11
11
 
12
12
  # Setting up
File without changes