mlquantify 0.0.1__tar.gz → 0.0.11.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. mlquantify-0.0.11.1/PKG-INFO +114 -0
  2. mlquantify-0.0.11.1/README.md +99 -0
  3. mlquantify-0.0.11.1/mlquantify/__init__.py +6 -0
  4. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify/base.py +2 -1
  5. mlquantify-0.0.11.1/mlquantify/classification/pwkclf.py +73 -0
  6. mlquantify-0.0.11.1/mlquantify/evaluation/measures/__init__.py +26 -0
  7. mlquantify-0.0.11.1/mlquantify/evaluation/measures/ae.py +11 -0
  8. mlquantify-0.0.11.1/mlquantify/evaluation/measures/bias.py +16 -0
  9. mlquantify-0.0.11.1/mlquantify/evaluation/measures/kld.py +8 -0
  10. mlquantify-0.0.11.1/mlquantify/evaluation/measures/mse.py +12 -0
  11. mlquantify-0.0.11.1/mlquantify/evaluation/measures/nae.py +16 -0
  12. mlquantify-0.0.11.1/mlquantify/evaluation/measures/nkld.py +13 -0
  13. mlquantify-0.0.11.1/mlquantify/evaluation/measures/nrae.py +16 -0
  14. mlquantify-0.0.11.1/mlquantify/evaluation/measures/rae.py +12 -0
  15. mlquantify-0.0.11.1/mlquantify/evaluation/measures/se.py +12 -0
  16. mlquantify-0.0.11.1/mlquantify/evaluation/protocol/_Protocol.py +202 -0
  17. mlquantify-0.0.11.1/mlquantify/evaluation/protocol/__init__.py +2 -0
  18. mlquantify-0.0.11.1/mlquantify/evaluation/protocol/app.py +146 -0
  19. mlquantify-0.0.11.1/mlquantify/evaluation/protocol/npp.py +34 -0
  20. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +62 -0
  21. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/__init__.py +7 -0
  22. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/acc.py +27 -0
  23. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/max.py +23 -0
  24. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/ms.py +21 -0
  25. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/ms2.py +25 -0
  26. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/pacc.py +41 -0
  27. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/t50.py +21 -0
  28. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/x.py +23 -0
  29. mlquantify-0.0.11.1/mlquantify/methods/aggregative/__init__.py +9 -0
  30. mlquantify-0.0.11.1/mlquantify/methods/aggregative/cc.py +32 -0
  31. mlquantify-0.0.11.1/mlquantify/methods/aggregative/emq.py +86 -0
  32. mlquantify-0.0.11.1/mlquantify/methods/aggregative/fm.py +72 -0
  33. mlquantify-0.0.11.1/mlquantify/methods/aggregative/gac.py +96 -0
  34. mlquantify-0.0.11.1/mlquantify/methods/aggregative/gpac.py +87 -0
  35. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +81 -0
  36. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/__init__.py +5 -0
  37. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/dys.py +55 -0
  38. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/dys_syn.py +89 -0
  39. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/hdy.py +46 -0
  40. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/smm.py +27 -0
  41. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/sord.py +77 -0
  42. mlquantify-0.0.11.1/mlquantify/methods/aggregative/pcc.py +33 -0
  43. mlquantify-0.0.11.1/mlquantify/methods/aggregative/pwk.py +38 -0
  44. mlquantify-0.0.11.1/mlquantify/methods/meta/__init__.py +1 -0
  45. mlquantify-0.0.11.1/mlquantify/methods/meta/ensemble.py +236 -0
  46. mlquantify-0.0.11.1/mlquantify/methods/non_aggregative/__init__.py +1 -0
  47. mlquantify-0.0.11.1/mlquantify/methods/non_aggregative/hdx.py +71 -0
  48. mlquantify-0.0.11.1/mlquantify/plots/distribution_plot.py +109 -0
  49. mlquantify-0.0.11.1/mlquantify/plots/protocol_plot.py +157 -0
  50. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/__init__.py +8 -0
  51. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/convert_col_to_array.py +13 -0
  52. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/generate_artificial_indexes.py +29 -0
  53. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/get_real_prev.py +9 -0
  54. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/load_quantifier.py +4 -0
  55. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/make_prevs.py +23 -0
  56. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/normalize.py +20 -0
  57. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/parallel.py +10 -0
  58. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/round_protocol_df.py +14 -0
  59. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/__init__.py +6 -0
  60. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/distances.py +21 -0
  61. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/getHist.py +13 -0
  62. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/get_scores.py +33 -0
  63. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/moss.py +16 -0
  64. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/ternary_search.py +14 -0
  65. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/tprfpr.py +42 -0
  66. mlquantify-0.0.11.1/mlquantify.egg-info/PKG-INFO +114 -0
  67. mlquantify-0.0.11.1/mlquantify.egg-info/SOURCES.txt +76 -0
  68. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/setup.py +8 -1
  69. mlquantify-0.0.1/MANIFEST.in +0 -4
  70. mlquantify-0.0.1/PKG-INFO +0 -22
  71. mlquantify-0.0.1/README.md +0 -2
  72. mlquantify-0.0.1/mlquantify.egg-info/PKG-INFO +0 -22
  73. mlquantify-0.0.1/mlquantify.egg-info/SOURCES.txt +0 -15
  74. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify/classification/__init__.py +0 -0
  75. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify/evaluation/__init__.py +0 -0
  76. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify/methods/__init__.py +0 -0
  77. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify/model_selection.py +0 -0
  78. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify/plots/__init__.py +0 -0
  79. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify/utils/__init__.py +0 -0
  80. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify.egg-info/dependency_links.txt +0 -0
  81. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify.egg-info/requires.txt +0 -0
  82. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/mlquantify.egg-info/top_level.txt +0 -0
  83. {mlquantify-0.0.1 → mlquantify-0.0.11.1}/setup.cfg +0 -0
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.1
2
+ Name: mlquantify
3
+ Version: 0.0.11.1
4
+ Summary: Quantification Library
5
+ Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
+ Maintainer: Luiz Fernando Luth Junior
7
+ Keywords: python,machine learning,quantification,quantify
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: Unix
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: Microsoft :: Windows
14
+ Description-Content-Type: text/markdown
15
+
16
+ <h1 align="center">MLQuantify</h1>
17
+ <h4 align="center">A Python Package for Quantification</h4>
18
+
19
+ ___
20
+
21
+ **mlquantify** is a Python library for quantification, also known as supervised prevalence estimation, designed to estimate the distribution of classes within datasets. It offers a range of tools for various quantification methods, model selection tailored for quantification tasks, evaluation metrics, and protocols to assess quantification performance. Additionally, mlquantify includes popular datasets and visualization tools to help analyze and interpret results.
22
+
23
+ ___
24
+
25
+ ## Latest Release
26
+
27
+ - **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
28
+ - In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
29
+ - Explore the [API documentation](#) for detailed developer information.
30
+ - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
31
+
32
+ ___
33
+
34
+ ## Installation
35
+
36
+ To install mlquantify, run the following command:
37
+
38
+ ```bash
39
+ pip install mlquantify
40
+ ```
41
+
42
+ ___
43
+
44
+ ## Contents
45
+
46
+ | Section | Description |
47
+ |---|---|
48
+ | **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
49
+ | **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
50
+ | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
51
+ | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |
52
+ | **Evaluation Protocols** | Evaluation protocols used, based on sampling generation (e.g., APP, NPP, etc.).. |
53
+ | **Plotting Results** | Tools and techniques used to visualize results, such as the protocol results.|
54
+ | **Comprehensive Documentation** | Complete documentation of the project, including code, data, and results. |
55
+
56
+ ___
57
+
58
+ ## Quick example:
59
+
60
+ This code first loads the breast cancer dataset from _sklearn_, which is then split into training and testing sets. It uses the _Expectation Maximisation Quantifier (EMQ)_ with a RandomForest classifier to predict class prevalence. After training the model, it evaluates performance by calculating and printing the absolute error and bias between the real and predicted prevalences.
61
+
62
+ ```python
63
+ import mlquantify as mq
64
+ from sklearn.ensemble import RandomForestClassifier
65
+ from sklearn.datasets import load_breast_cancer
66
+ from sklearn.model_selection import train_test_split
67
+
68
+ # Loading dataset from sklearn
69
+ features, target = load_breast_cancer(return_X_y=True)
70
+
71
+ #Splitting into train and test
72
+ X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)
73
+
74
+ #Create the model, here it is the Expectation Maximisation Quantifier (EMQ) with a classifier
75
+ model = mq.methods.EMQ(RandomForestClassifier())
76
+ model.fit(X_train, y_train)
77
+
78
+ #Predict the class prevalence for X_test
79
+ pred_prevalence = model.predict(X_test)
80
+ real_prevalence = mq.utils.get_real_prev(y_test)
81
+
82
+ #Get the error for the prediction
83
+ ae = mq.evaluation.absolute_error(real_prevalence, pred_prevalence)
84
+ bias = mq.evaluation.bias(real_prevalence, pred_prevalence)
85
+
86
+ print(f"Mean Squared Error (MSE) -> {ae:.4f}")
87
+ print(f"Bias -> {bias}")
88
+ ```
89
+
90
+ ___
91
+
92
+ ## Requirements
93
+
94
+ - Scikit-learn
95
+ - pandas
96
+ - numpy
97
+ - joblib
98
+ - tqdm
99
+ - matplotlib
100
+ - xlrd
101
+
102
+ ___
103
+
104
+ ## Documentation
105
+
106
+ ##### API is avaliable [here](#)
107
+
108
+ - [Methods](https://github.com/luizfernandolj/mlquantify/wiki/Methods)
109
+ - [Model Selection](https://github.com/luizfernandolj/mlquantify/wiki/Model-Selection)
110
+ - [Evaluation](https://github.com/luizfernandolj/mlquantify/wiki/Evaluation)
111
+ - [Plotting](https://github.com/luizfernandolj/mlquantify/wiki/Plotting)
112
+
113
+
114
+ ___
@@ -0,0 +1,99 @@
1
+ <h1 align="center">MLQuantify</h1>
2
+ <h4 align="center">A Python Package for Quantification</h4>
3
+
4
+ ___
5
+
6
+ **mlquantify** is a Python library for quantification, also known as supervised prevalence estimation, designed to estimate the distribution of classes within datasets. It offers a range of tools for various quantification methods, model selection tailored for quantification tasks, evaluation metrics, and protocols to assess quantification performance. Additionally, mlquantify includes popular datasets and visualization tools to help analyze and interpret results.
7
+
8
+ ___
9
+
10
+ ## Latest Release
11
+
12
+ - **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
13
+ - In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
14
+ - Explore the [API documentation](#) for detailed developer information.
15
+ - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
16
+
17
+ ___
18
+
19
+ ## Installation
20
+
21
+ To install mlquantify, run the following command:
22
+
23
+ ```bash
24
+ pip install mlquantify
25
+ ```
26
+
27
+ ___
28
+
29
+ ## Contents
30
+
31
+ | Section | Description |
32
+ |---|---|
33
+ | **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
34
+ | **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
35
+ | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
36
+ | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |
37
+ | **Evaluation Protocols** | Evaluation protocols used, based on sampling generation (e.g., APP, NPP, etc.).. |
38
+ | **Plotting Results** | Tools and techniques used to visualize results, such as the protocol results.|
39
+ | **Comprehensive Documentation** | Complete documentation of the project, including code, data, and results. |
40
+
41
+ ___
42
+
43
+ ## Quick example:
44
+
45
+ This code first loads the breast cancer dataset from _sklearn_, which is then split into training and testing sets. It uses the _Expectation Maximisation Quantifier (EMQ)_ with a RandomForest classifier to predict class prevalence. After training the model, it evaluates performance by calculating and printing the absolute error and bias between the real and predicted prevalences.
46
+
47
+ ```python
48
+ import mlquantify as mq
49
+ from sklearn.ensemble import RandomForestClassifier
50
+ from sklearn.datasets import load_breast_cancer
51
+ from sklearn.model_selection import train_test_split
52
+
53
+ # Loading dataset from sklearn
54
+ features, target = load_breast_cancer(return_X_y=True)
55
+
56
+ #Splitting into train and test
57
+ X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)
58
+
59
+ #Create the model, here it is the Expectation Maximisation Quantifier (EMQ) with a classifier
60
+ model = mq.methods.EMQ(RandomForestClassifier())
61
+ model.fit(X_train, y_train)
62
+
63
+ #Predict the class prevalence for X_test
64
+ pred_prevalence = model.predict(X_test)
65
+ real_prevalence = mq.utils.get_real_prev(y_test)
66
+
67
+ #Get the error for the prediction
68
+ ae = mq.evaluation.absolute_error(real_prevalence, pred_prevalence)
69
+ bias = mq.evaluation.bias(real_prevalence, pred_prevalence)
70
+
71
+ print(f"Mean Squared Error (MSE) -> {ae:.4f}")
72
+ print(f"Bias -> {bias}")
73
+ ```
74
+
75
+ ___
76
+
77
+ ## Requirements
78
+
79
+ - Scikit-learn
80
+ - pandas
81
+ - numpy
82
+ - joblib
83
+ - tqdm
84
+ - matplotlib
85
+ - xlrd
86
+
87
+ ___
88
+
89
+ ## Documentation
90
+
91
+ ##### API is avaliable [here](#)
92
+
93
+ - [Methods](https://github.com/luizfernandolj/mlquantify/wiki/Methods)
94
+ - [Model Selection](https://github.com/luizfernandolj/mlquantify/wiki/Model-Selection)
95
+ - [Evaluation](https://github.com/luizfernandolj/mlquantify/wiki/Evaluation)
96
+ - [Plotting](https://github.com/luizfernandolj/mlquantify/wiki/Plotting)
97
+
98
+
99
+ ___
@@ -0,0 +1,6 @@
1
+ from .classification import *
2
+ from .evaluation import *
3
+ from .methods import *
4
+ from .utils import *
5
+ from .plots import *
6
+ from .model_selection import GridSearchQ
@@ -138,13 +138,14 @@ class AggregativeQuantifier(Quantifier, ABC):
138
138
  return self.learner.get_params()
139
139
 
140
140
  def set_params(self, **params):
141
+
141
142
  # Model Params
142
143
  for key, value in params.items():
143
144
  if hasattr(self, key):
144
145
  setattr(self, key, value)
145
146
 
146
147
  # Learner Params
147
- if self.learner:
148
+ if self.learner is not None:
148
149
  learner_params = {k.replace('learner__', ''): v for k, v in params.items() if 'learner__' in k}
149
150
  if learner_params:
150
151
  self.learner.set_params(**learner_params)
@@ -0,0 +1,73 @@
1
+ from sklearn.neighbors import NearestNeighbors
2
+ from sklearn.base import BaseEstimator
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ class PWKCLF(BaseEstimator):
7
+ """Learner based on k-Nearest Neighborst (KNN) to use on the method PWK,
8
+ that also is based on KNN.
9
+ """
10
+
11
+
12
+ def __init__(self,
13
+ alpha=1,
14
+ n_neighbors=10,
15
+ algorithm="auto",
16
+ metric="euclidean",
17
+ leaf_size=30,
18
+ p=2,
19
+ metric_params=None,
20
+ n_jobs=None):
21
+
22
+ if alpha < 1:
23
+ raise ValueError("alpha must not be smaller than 1")
24
+
25
+ self.alpha = alpha
26
+ self.n_neighbors = n_neighbors
27
+
28
+ self.nbrs = NearestNeighbors(n_neighbors=n_neighbors,
29
+ algorithm=algorithm,
30
+ leaf_size=leaf_size,
31
+ metric=metric,
32
+ p=p,
33
+ metric_params=metric_params,
34
+ n_jobs=n_jobs)
35
+
36
+ self.Y = None
37
+ self.Y_map = None
38
+ self.w = None
39
+ self.y = None
40
+
41
+ def fit(self, X, y):
42
+ n_samples = X.shape[0]
43
+ if n_samples < self.n_neighbors:
44
+ self.nbrs.set_params(n_neighbors=n_samples)
45
+
46
+ self.y = y
47
+
48
+ if isinstance(y, pd.DataFrame):
49
+ self.y = y.reset_index(drop=True)
50
+
51
+ Y_cts = np.unique(y, return_counts=True)
52
+ self.Y = Y_cts[0]
53
+ self.Y_map = dict(zip(self.Y, range(len(self.Y))))
54
+
55
+ min_class_count = np.min(Y_cts[1])
56
+ self.w = (Y_cts[1] / min_class_count) ** (-1.0 / self.alpha)
57
+ self.nbrs.fit(X)
58
+ return self
59
+
60
+ def predict(self, X):
61
+ n_samples = X.shape[0]
62
+ nn_indices = self.nbrs.kneighbors(X, return_distance=False)
63
+
64
+ CM = np.zeros((n_samples, len(self.Y)))
65
+
66
+ for i in range(n_samples):
67
+ for j in nn_indices[i]:
68
+ CM[i, self.Y_map[self.y[j]]] += 1
69
+
70
+ CM = np.multiply(CM, self.w)
71
+ predictions = np.apply_along_axis(np.argmax, axis=1, arr=CM)
72
+
73
+ return self.Y[predictions]
@@ -0,0 +1,26 @@
1
+ from .ae import absolute_error
2
+ from .kld import kullback_leibler_divergence
3
+ from .nkld import normalized_kullback_leibler_divergence
4
+ from .rae import relative_absolute_error
5
+ from .nae import normalized_absolute_error
6
+ from .bias import bias
7
+ from .nrae import normalized_relative_absolute_error
8
+ from .se import squared_error
9
+ from .mse import mean_squared_error
10
+
11
+
12
+
13
+ MEASURES = {
14
+ "ae": absolute_error,
15
+ "nae": normalized_absolute_error,
16
+ "kld": kullback_leibler_divergence,
17
+ "nkld": normalized_kullback_leibler_divergence,
18
+ "nrae": normalized_relative_absolute_error,
19
+ "rae": relative_absolute_error,
20
+ "se": squared_error,
21
+ "mse": mean_squared_error
22
+ }
23
+
24
+
25
+ def get_measure(measure:str):
26
+ return MEASURES.get(measure)
@@ -0,0 +1,11 @@
1
+ import numpy as np
2
+
3
+ def absolute_error(prev_real:np.any, prev_pred:np.any):
4
+ if isinstance(prev_real, dict):
5
+ prev_real = np.asarray(list(prev_real.values()))
6
+ if isinstance(prev_pred, dict):
7
+ prev_pred = np.asarray(list(prev_pred.values()))
8
+
9
+ abs_error = abs(prev_pred - prev_real).mean(axis=-1)
10
+
11
+ return abs_error
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+
3
+ def bias(prev_real:np.any, prev_pred:np.any):
4
+ classes = None
5
+ if isinstance(prev_real, dict):
6
+ classes = prev_real.keys()
7
+ prev_real = np.asarray(list(prev_real.values()))
8
+ if isinstance(prev_pred, dict):
9
+ prev_pred = np.asarray(list(prev_pred.values()))
10
+
11
+ abs_errors = abs(prev_pred - prev_real)
12
+
13
+ if classes:
14
+ return {class_:abs_error for class_, abs_error in zip(classes, abs_errors)}
15
+
16
+ return abs_errors
@@ -0,0 +1,8 @@
1
+ import numpy as np
2
+
3
+ def kullback_leibler_divergence(prev_real:np.any, prev_pred:np.any):
4
+ if isinstance(prev_real, dict):
5
+ prev_real = np.asarray(list(prev_real.values()))
6
+ if isinstance(prev_pred, dict):
7
+ prev_pred = np.asarray(list(prev_pred.values()))
8
+ return prev_real * abs(np.log((prev_real / prev_pred)))
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from .se import squared_error
3
+
4
+ def mean_squared_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ mean_sq_error = squared_error(prev_real, prev_pred).mean()
11
+
12
+ return mean_sq_error
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+ from .ae import absolute_error
3
+
4
+ def normalized_absolute_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ abs_error = absolute_error(prev_real, prev_pred)
11
+
12
+ z_abs_error = (2 * (1 - min(prev_real)))
13
+
14
+ normalized = abs_error / z_abs_error
15
+
16
+ return normalized
@@ -0,0 +1,13 @@
1
+ import numpy as np
2
+ from .kld import kullback_leibler_divergence
3
+
4
+ def normalized_kullback_leibler_divergence(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ euler = np.exp(kullback_leibler_divergence(prev_real, prev_pred))
11
+ normalized = 2 * (euler / (euler + 1)) - 1
12
+
13
+ return normalized
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+ from .rae import relative_absolute_error
3
+
4
+ def normalized_relative_absolute_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ relative = relative_absolute_error(prev_real, prev_pred)
11
+
12
+ z_relative = (len(prev_real) - 1 + ((1 - min(prev_real)) / min(prev_real))) / len(prev_real)
13
+
14
+ normalized = relative/z_relative
15
+
16
+ return normalized
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from . import absolute_error
3
+
4
+ def relative_absolute_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ relative = (absolute_error(prev_real, prev_pred) / prev_real).mean(axis=-1)
11
+
12
+ return relative
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from .ae import absolute_error
3
+
4
+ def squared_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ sq_abs_error = ((prev_pred - prev_real) ** 2).mean(axis=-1)
11
+
12
+ return sq_abs_error
@@ -0,0 +1,202 @@
1
+ from abc import ABC, abstractmethod
2
+ import numpy as np
3
+ import pandas as pd
4
+ from typing import Union, List
5
+ from sklearn.base import BaseEstimator
6
+ from time import time
7
+ from tqdm import tqdm
8
+
9
+ from ...methods import get_method, METHODS, AGGREGATIVE, NON_AGGREGATIVE
10
+ from ...utils import *
11
+ from ..measures import get_measure, MEASURES
12
+ from ...base import Quantifier, AggregativeQuantifier
13
+
14
+ class Protocol(ABC):
15
+ """Base class for implementing different quantification protocols.
16
+
17
+ This abstract class provides a structure for creating protocols that involve
18
+ fitting quantification models to training data and generating predictions on test data.
19
+ It supports parallel processing, multiple iterations, and different output formats.
20
+
21
+ Args:
22
+ models (Union[List[Union[str, Quantifier]], str, Quantifier]):
23
+ List of quantification models, a single model name, or 'all' for all models.
24
+ batch_size (Union[List[int], int]):
25
+ Size of the batches to be processed, or a list of sizes.
26
+ learner (BaseEstimator, optional):
27
+ Machine learning model to be used with the quantifiers. Required for model methods.
28
+ n_iterations (int, optional):
29
+ Number of iterations for the protocol. Default is 1.
30
+ n_jobs (int, optional):
31
+ Number of jobs to run in parallel. Default is 1.
32
+ random_state (int, optional):
33
+ Seed for random number generation. Default is 32.
34
+ verbose (bool, optional):
35
+ Whether to print progress messages. Default is False.
36
+ return_type (str, optional):
37
+ Type of return value ('predictions' or 'table'). Default is 'predictions'.
38
+ measures (List[str], optional):
39
+ List of error measures to calculate. Must be in MEASURES or None. Default is None.
40
+ """
41
+
42
+
43
+ def __init__(self,
44
+ models: Union[List[Union[str, Quantifier]], str, Quantifier],
45
+ batch_size: Union[List[int], int],
46
+ learner: BaseEstimator = None,
47
+ n_iterations: int = 1,
48
+ n_jobs: int = 1,
49
+ random_state: int = 32,
50
+ verbose: bool = False,
51
+ return_type: str = "predictions",
52
+ measures: List[str] = None):
53
+
54
+ assert not measures or all(m in MEASURES for m in measures), \
55
+ f"Invalid measure(s) provided. Valid options: {list(MEASURES.keys())} or None"
56
+ assert return_type in ["predictions", "table"], \
57
+ "Invalid return_type. Valid options: ['predictions', 'table']"
58
+
59
+ self.models = self._initialize_models(models, learner)
60
+ self.learner = learner
61
+ self.batch_size = batch_size
62
+ self.n_iterations = n_iterations
63
+ self.n_jobs = n_jobs
64
+ self.random_state = random_state
65
+ self.verbose = verbose
66
+ self.return_type = return_type
67
+ self.measures = measures
68
+
69
+ def _initialize_models(self, models, learner):
70
+ if isinstance(models, list):
71
+ if isinstance(models[0], Quantifier):
72
+ return models
73
+ assert learner is not None, "Learner is required for model methods."
74
+ return [get_method(model)(learner) for model in models]
75
+ if isinstance(models, Quantifier):
76
+ return [models]
77
+
78
+ assert learner is not None, "Learner is required for model methods."
79
+
80
+ if models == "all":
81
+ print(hasattr(list(AGGREGATIVE.values())[0], "learner"))
82
+ models = [model(learner) if hasattr(model, "learner") else model() for model in METHODS.values()]
83
+ return models
84
+ if models == "aggregative":
85
+ return [model(learner) for model in AGGREGATIVE.values()]
86
+ if models == "non_aggregative":
87
+ return [model() for model in NON_AGGREGATIVE.values()]
88
+
89
+ return [get_method(models)(learner)]
90
+
91
+
92
+ def sout(self, msg):
93
+ if self.verbose:
94
+ print('[APP]' + msg)
95
+
96
+
97
+ def fit(self, X_train, y_train):
98
+ """Fit all methods into the training data.
99
+
100
+ Args:
101
+ X_train (array-like): Features of training.
102
+ y_train (array-like): Labels of training.
103
+ """
104
+ self.sout("Fitting models")
105
+
106
+ args = ((model, X_train, y_train, self.verbose) for model in self.models)
107
+ self.models = parallel(
108
+ self._delayed_fit,
109
+ tqdm(args, desc="Fitting models", total=len(self.models)) if self.verbose else args,
110
+ self.n_jobs)
111
+
112
+ self.sout("Fit [Done]")
113
+ return self
114
+
115
+
116
+ def predict(self, X_test, y_test) -> np.any:
117
+ """Generate several samples with artificial prevalences, and sizes.
118
+ And for each method, predicts with this sample, aggregating all toguether
119
+ with a pandas dataframe if request, or else just the predictions.
120
+
121
+ Args:
122
+ X_test (array-like): Features of test.
123
+ y_test (array-like): Labels of test.
124
+
125
+ Returns:
126
+ tuple: tuple containing the model, real_prev and pred_prev, or.
127
+ DataFrame: table of results, along with error measures if requested.
128
+ """
129
+
130
+
131
+ predictions = self.predict_protocol(X_test, y_test)
132
+
133
+
134
+ predictions_df = pd.DataFrame(predictions)
135
+
136
+ if self.return_type == "table":
137
+ predictions_df.columns = ["QUANTIFIER", "REAL_PREVS", "PRED_PREVS", "BATCH_SIZE"]
138
+
139
+ if self.measures:
140
+
141
+ def smooth(values:np.ndarray) ->np.ndarray:
142
+ smoothed_factor = 1/(2 * len(X_test))
143
+
144
+ values = (values + smoothed_factor) / (smoothed_factor * len(values) + 1)
145
+
146
+ return values
147
+
148
+
149
+ for metric in self.measures:
150
+ predictions_df[metric] = predictions_df.apply(
151
+ lambda row: get_measure(metric)(smooth(row["REAL_PREVS"]), smooth(row["PRED_PREVS"])),
152
+ axis=1
153
+ )
154
+
155
+ return predictions_df
156
+
157
+ predictions_array = predictions_df.to_numpy()
158
+ return (
159
+ predictions_array[:, 0], # Model names
160
+ np.stack(predictions_array[:, 1]), # Prev
161
+ np.stack(predictions_array[:, 2]) # Prev_pred
162
+ )
163
+
164
+
165
+ @abstractmethod
166
+ def predict_protocol(self) -> np.ndarray:
167
+ """ Abstract method that every protocol has to implement """
168
+ ...
169
+
170
+ @abstractmethod
171
+ def _new_sample(self) -> tuple:
172
+ """ Abstract method of sample extraction for each protocol
173
+
174
+ Returns:
175
+ tuple: tuple containing the X_sample and the y_sample
176
+ """
177
+ ...
178
+
179
+
180
+ @abstractmethod
181
+ def _delayed_predict(self, args) -> tuple:
182
+ """abstract method for predicting in the extracted
183
+ samples, is delayed for running in parallel for
184
+ eficciency purposes.
185
+ """
186
+ ...
187
+
188
+
189
+
190
+ def _delayed_fit(self, args):
191
+ model, X_train, y_train, verbose = args
192
+
193
+ if verbose:
194
+ print(f"\tFitting {model.__class__.__name__}")
195
+ start = time()
196
+
197
+ model = model.fit(X=X_train, y=y_train)
198
+
199
+ if verbose:
200
+ end = time()
201
+ print(f"\t\\--Fit ended for {model.__class__.__name__} in {round(end - start, 3)} seconds")
202
+ return model
@@ -0,0 +1,2 @@
1
+ from .app import APP
2
+ from .npp import NPP