mlquantify 0.0.11__tar.gz → 0.0.11.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/PKG-INFO +4 -16
  2. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/README.md +3 -7
  3. mlquantify-0.0.11.1/mlquantify/__init__.py +6 -0
  4. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify/base.py +2 -1
  5. mlquantify-0.0.11.1/mlquantify/classification/pwkclf.py +73 -0
  6. mlquantify-0.0.11.1/mlquantify/evaluation/measures/__init__.py +26 -0
  7. mlquantify-0.0.11.1/mlquantify/evaluation/measures/ae.py +11 -0
  8. mlquantify-0.0.11.1/mlquantify/evaluation/measures/bias.py +16 -0
  9. mlquantify-0.0.11.1/mlquantify/evaluation/measures/kld.py +8 -0
  10. mlquantify-0.0.11.1/mlquantify/evaluation/measures/mse.py +12 -0
  11. mlquantify-0.0.11.1/mlquantify/evaluation/measures/nae.py +16 -0
  12. mlquantify-0.0.11.1/mlquantify/evaluation/measures/nkld.py +13 -0
  13. mlquantify-0.0.11.1/mlquantify/evaluation/measures/nrae.py +16 -0
  14. mlquantify-0.0.11.1/mlquantify/evaluation/measures/rae.py +12 -0
  15. mlquantify-0.0.11.1/mlquantify/evaluation/measures/se.py +12 -0
  16. mlquantify-0.0.11.1/mlquantify/evaluation/protocol/_Protocol.py +202 -0
  17. mlquantify-0.0.11.1/mlquantify/evaluation/protocol/__init__.py +2 -0
  18. mlquantify-0.0.11.1/mlquantify/evaluation/protocol/app.py +146 -0
  19. mlquantify-0.0.11.1/mlquantify/evaluation/protocol/npp.py +34 -0
  20. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +62 -0
  21. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/__init__.py +7 -0
  22. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/acc.py +27 -0
  23. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/max.py +23 -0
  24. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/ms.py +21 -0
  25. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/ms2.py +25 -0
  26. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/pacc.py +41 -0
  27. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/t50.py +21 -0
  28. mlquantify-0.0.11.1/mlquantify/methods/aggregative/ThreholdOptm/x.py +23 -0
  29. mlquantify-0.0.11.1/mlquantify/methods/aggregative/__init__.py +9 -0
  30. mlquantify-0.0.11.1/mlquantify/methods/aggregative/cc.py +32 -0
  31. mlquantify-0.0.11.1/mlquantify/methods/aggregative/emq.py +86 -0
  32. mlquantify-0.0.11.1/mlquantify/methods/aggregative/fm.py +72 -0
  33. mlquantify-0.0.11.1/mlquantify/methods/aggregative/gac.py +96 -0
  34. mlquantify-0.0.11.1/mlquantify/methods/aggregative/gpac.py +87 -0
  35. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +81 -0
  36. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/__init__.py +5 -0
  37. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/dys.py +55 -0
  38. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/dys_syn.py +89 -0
  39. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/hdy.py +46 -0
  40. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/smm.py +27 -0
  41. mlquantify-0.0.11.1/mlquantify/methods/aggregative/mixtureModels/sord.py +77 -0
  42. mlquantify-0.0.11.1/mlquantify/methods/aggregative/pcc.py +33 -0
  43. mlquantify-0.0.11.1/mlquantify/methods/aggregative/pwk.py +38 -0
  44. mlquantify-0.0.11.1/mlquantify/methods/meta/__init__.py +1 -0
  45. mlquantify-0.0.11.1/mlquantify/methods/meta/ensemble.py +236 -0
  46. mlquantify-0.0.11.1/mlquantify/methods/non_aggregative/__init__.py +1 -0
  47. mlquantify-0.0.11.1/mlquantify/methods/non_aggregative/hdx.py +71 -0
  48. mlquantify-0.0.11.1/mlquantify/plots/distribution_plot.py +109 -0
  49. mlquantify-0.0.11.1/mlquantify/plots/protocol_plot.py +157 -0
  50. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/__init__.py +8 -0
  51. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/convert_col_to_array.py +13 -0
  52. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/generate_artificial_indexes.py +29 -0
  53. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/get_real_prev.py +9 -0
  54. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/load_quantifier.py +4 -0
  55. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/make_prevs.py +23 -0
  56. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/normalize.py +20 -0
  57. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/parallel.py +10 -0
  58. mlquantify-0.0.11.1/mlquantify/utils/general_purposes/round_protocol_df.py +14 -0
  59. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/__init__.py +6 -0
  60. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/distances.py +21 -0
  61. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/getHist.py +13 -0
  62. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/get_scores.py +33 -0
  63. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/moss.py +16 -0
  64. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/ternary_search.py +14 -0
  65. mlquantify-0.0.11.1/mlquantify/utils/method_purposes/tprfpr.py +42 -0
  66. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify.egg-info/PKG-INFO +4 -16
  67. mlquantify-0.0.11.1/mlquantify.egg-info/SOURCES.txt +76 -0
  68. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/setup.py +1 -1
  69. mlquantify-0.0.11/MANIFEST.in +0 -4
  70. mlquantify-0.0.11/mlquantify.egg-info/SOURCES.txt +0 -15
  71. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify/classification/__init__.py +0 -0
  72. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify/evaluation/__init__.py +0 -0
  73. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify/methods/__init__.py +0 -0
  74. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify/model_selection.py +0 -0
  75. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify/plots/__init__.py +0 -0
  76. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify/utils/__init__.py +0 -0
  77. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify.egg-info/dependency_links.txt +0 -0
  78. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify.egg-info/requires.txt +0 -0
  79. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/mlquantify.egg-info/top_level.txt +0 -0
  80. {mlquantify-0.0.11 → mlquantify-0.0.11.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlquantify
3
- Version: 0.0.11
3
+ Version: 0.0.11.1
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -12,14 +12,6 @@ Classifier: Operating System :: Unix
12
12
  Classifier: Operating System :: MacOS :: MacOS X
13
13
  Classifier: Operating System :: Microsoft :: Windows
14
14
  Description-Content-Type: text/markdown
15
- Requires-Dist: scikit-learn
16
- Requires-Dist: numpy
17
- Requires-Dist: scipy
18
- Requires-Dist: joblib
19
- Requires-Dist: tqdm
20
- Requires-Dist: pandas
21
- Requires-Dist: xlrd
22
- Requires-Dist: matplotlib
23
15
 
24
16
  <h1 align="center">MLQuantify</h1>
25
17
  <h4 align="center">A Python Package for Quantification</h4>
@@ -114,13 +106,9 @@ ___
114
106
  ##### API is avaliable [here](#)
115
107
 
116
108
  - [Methods](https://github.com/luizfernandolj/mlquantify/wiki/Methods)
117
- - [Model Selection](#)
118
- - [Evaluation](#)
119
- - [Plotting](#)
109
+ - [Model Selection](https://github.com/luizfernandolj/mlquantify/wiki/Model-Selection)
110
+ - [Evaluation](https://github.com/luizfernandolj/mlquantify/wiki/Evaluation)
111
+ - [Plotting](https://github.com/luizfernandolj/mlquantify/wiki/Plotting)
120
112
 
121
113
 
122
114
  ___
123
-
124
- ### See the References in the pdf below
125
-
126
- ...
@@ -91,13 +91,9 @@ ___
91
91
  ##### API is avaliable [here](#)
92
92
 
93
93
  - [Methods](https://github.com/luizfernandolj/mlquantify/wiki/Methods)
94
- - [Model Selection](#)
95
- - [Evaluation](#)
96
- - [Plotting](#)
94
+ - [Model Selection](https://github.com/luizfernandolj/mlquantify/wiki/Model-Selection)
95
+ - [Evaluation](https://github.com/luizfernandolj/mlquantify/wiki/Evaluation)
96
+ - [Plotting](https://github.com/luizfernandolj/mlquantify/wiki/Plotting)
97
97
 
98
98
 
99
99
  ___
100
-
101
- ### See the References in the pdf below
102
-
103
- ...
@@ -0,0 +1,6 @@
1
+ from .classification import *
2
+ from .evaluation import *
3
+ from .methods import *
4
+ from .utils import *
5
+ from .plots import *
6
+ from .model_selection import GridSearchQ
@@ -138,13 +138,14 @@ class AggregativeQuantifier(Quantifier, ABC):
138
138
  return self.learner.get_params()
139
139
 
140
140
  def set_params(self, **params):
141
+
141
142
  # Model Params
142
143
  for key, value in params.items():
143
144
  if hasattr(self, key):
144
145
  setattr(self, key, value)
145
146
 
146
147
  # Learner Params
147
- if self.learner:
148
+ if self.learner is not None:
148
149
  learner_params = {k.replace('learner__', ''): v for k, v in params.items() if 'learner__' in k}
149
150
  if learner_params:
150
151
  self.learner.set_params(**learner_params)
@@ -0,0 +1,73 @@
1
+ from sklearn.neighbors import NearestNeighbors
2
+ from sklearn.base import BaseEstimator
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ class PWKCLF(BaseEstimator):
7
+ """Learner based on k-Nearest Neighborst (KNN) to use on the method PWK,
8
+ that also is based on KNN.
9
+ """
10
+
11
+
12
+ def __init__(self,
13
+ alpha=1,
14
+ n_neighbors=10,
15
+ algorithm="auto",
16
+ metric="euclidean",
17
+ leaf_size=30,
18
+ p=2,
19
+ metric_params=None,
20
+ n_jobs=None):
21
+
22
+ if alpha < 1:
23
+ raise ValueError("alpha must not be smaller than 1")
24
+
25
+ self.alpha = alpha
26
+ self.n_neighbors = n_neighbors
27
+
28
+ self.nbrs = NearestNeighbors(n_neighbors=n_neighbors,
29
+ algorithm=algorithm,
30
+ leaf_size=leaf_size,
31
+ metric=metric,
32
+ p=p,
33
+ metric_params=metric_params,
34
+ n_jobs=n_jobs)
35
+
36
+ self.Y = None
37
+ self.Y_map = None
38
+ self.w = None
39
+ self.y = None
40
+
41
+ def fit(self, X, y):
42
+ n_samples = X.shape[0]
43
+ if n_samples < self.n_neighbors:
44
+ self.nbrs.set_params(n_neighbors=n_samples)
45
+
46
+ self.y = y
47
+
48
+ if isinstance(y, pd.DataFrame):
49
+ self.y = y.reset_index(drop=True)
50
+
51
+ Y_cts = np.unique(y, return_counts=True)
52
+ self.Y = Y_cts[0]
53
+ self.Y_map = dict(zip(self.Y, range(len(self.Y))))
54
+
55
+ min_class_count = np.min(Y_cts[1])
56
+ self.w = (Y_cts[1] / min_class_count) ** (-1.0 / self.alpha)
57
+ self.nbrs.fit(X)
58
+ return self
59
+
60
+ def predict(self, X):
61
+ n_samples = X.shape[0]
62
+ nn_indices = self.nbrs.kneighbors(X, return_distance=False)
63
+
64
+ CM = np.zeros((n_samples, len(self.Y)))
65
+
66
+ for i in range(n_samples):
67
+ for j in nn_indices[i]:
68
+ CM[i, self.Y_map[self.y[j]]] += 1
69
+
70
+ CM = np.multiply(CM, self.w)
71
+ predictions = np.apply_along_axis(np.argmax, axis=1, arr=CM)
72
+
73
+ return self.Y[predictions]
@@ -0,0 +1,26 @@
1
+ from .ae import absolute_error
2
+ from .kld import kullback_leibler_divergence
3
+ from .nkld import normalized_kullback_leibler_divergence
4
+ from .rae import relative_absolute_error
5
+ from .nae import normalized_absolute_error
6
+ from .bias import bias
7
+ from .nrae import normalized_relative_absolute_error
8
+ from .se import squared_error
9
+ from .mse import mean_squared_error
10
+
11
+
12
+
13
+ MEASURES = {
14
+ "ae": absolute_error,
15
+ "nae": normalized_absolute_error,
16
+ "kld": kullback_leibler_divergence,
17
+ "nkld": normalized_kullback_leibler_divergence,
18
+ "nrae": normalized_relative_absolute_error,
19
+ "rae": relative_absolute_error,
20
+ "se": squared_error,
21
+ "mse": mean_squared_error
22
+ }
23
+
24
+
25
+ def get_measure(measure:str):
26
+ return MEASURES.get(measure)
@@ -0,0 +1,11 @@
1
+ import numpy as np
2
+
3
+ def absolute_error(prev_real:np.any, prev_pred:np.any):
4
+ if isinstance(prev_real, dict):
5
+ prev_real = np.asarray(list(prev_real.values()))
6
+ if isinstance(prev_pred, dict):
7
+ prev_pred = np.asarray(list(prev_pred.values()))
8
+
9
+ abs_error = abs(prev_pred - prev_real).mean(axis=-1)
10
+
11
+ return abs_error
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+
3
+ def bias(prev_real:np.any, prev_pred:np.any):
4
+ classes = None
5
+ if isinstance(prev_real, dict):
6
+ classes = prev_real.keys()
7
+ prev_real = np.asarray(list(prev_real.values()))
8
+ if isinstance(prev_pred, dict):
9
+ prev_pred = np.asarray(list(prev_pred.values()))
10
+
11
+ abs_errors = abs(prev_pred - prev_real)
12
+
13
+ if classes:
14
+ return {class_:abs_error for class_, abs_error in zip(classes, abs_errors)}
15
+
16
+ return abs_errors
@@ -0,0 +1,8 @@
1
+ import numpy as np
2
+
3
+ def kullback_leibler_divergence(prev_real:np.any, prev_pred:np.any):
4
+ if isinstance(prev_real, dict):
5
+ prev_real = np.asarray(list(prev_real.values()))
6
+ if isinstance(prev_pred, dict):
7
+ prev_pred = np.asarray(list(prev_pred.values()))
8
+ return prev_real * abs(np.log((prev_real / prev_pred)))
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from .se import squared_error
3
+
4
+ def mean_squared_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ mean_sq_error = squared_error(prev_real, prev_pred).mean()
11
+
12
+ return mean_sq_error
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+ from .ae import absolute_error
3
+
4
+ def normalized_absolute_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ abs_error = absolute_error(prev_real, prev_pred)
11
+
12
+ z_abs_error = (2 * (1 - min(prev_real)))
13
+
14
+ normalized = abs_error / z_abs_error
15
+
16
+ return normalized
@@ -0,0 +1,13 @@
1
+ import numpy as np
2
+ from .kld import kullback_leibler_divergence
3
+
4
+ def normalized_kullback_leibler_divergence(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ euler = np.exp(kullback_leibler_divergence(prev_real, prev_pred))
11
+ normalized = 2 * (euler / (euler + 1)) - 1
12
+
13
+ return normalized
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+ from .rae import relative_absolute_error
3
+
4
+ def normalized_relative_absolute_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ relative = relative_absolute_error(prev_real, prev_pred)
11
+
12
+ z_relative = (len(prev_real) - 1 + ((1 - min(prev_real)) / min(prev_real))) / len(prev_real)
13
+
14
+ normalized = relative/z_relative
15
+
16
+ return normalized
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from . import absolute_error
3
+
4
+ def relative_absolute_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ relative = (absolute_error(prev_real, prev_pred) / prev_real).mean(axis=-1)
11
+
12
+ return relative
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from .ae import absolute_error
3
+
4
+ def squared_error(prev_real:np.any, prev_pred:np.any):
5
+ if isinstance(prev_real, dict):
6
+ prev_real = np.asarray(list(prev_real.values()))
7
+ if isinstance(prev_pred, dict):
8
+ prev_pred = np.asarray(list(prev_pred.values()))
9
+
10
+ sq_abs_error = ((prev_pred - prev_real) ** 2).mean(axis=-1)
11
+
12
+ return sq_abs_error
@@ -0,0 +1,202 @@
1
+ from abc import ABC, abstractmethod
2
+ import numpy as np
3
+ import pandas as pd
4
+ from typing import Union, List
5
+ from sklearn.base import BaseEstimator
6
+ from time import time
7
+ from tqdm import tqdm
8
+
9
+ from ...methods import get_method, METHODS, AGGREGATIVE, NON_AGGREGATIVE
10
+ from ...utils import *
11
+ from ..measures import get_measure, MEASURES
12
+ from ...base import Quantifier, AggregativeQuantifier
13
+
14
+ class Protocol(ABC):
15
+ """Base class for implementing different quantification protocols.
16
+
17
+ This abstract class provides a structure for creating protocols that involve
18
+ fitting quantification models to training data and generating predictions on test data.
19
+ It supports parallel processing, multiple iterations, and different output formats.
20
+
21
+ Args:
22
+ models (Union[List[Union[str, Quantifier]], str, Quantifier]):
23
+ List of quantification models, a single model name, or 'all' for all models.
24
+ batch_size (Union[List[int], int]):
25
+ Size of the batches to be processed, or a list of sizes.
26
+ learner (BaseEstimator, optional):
27
+ Machine learning model to be used with the quantifiers. Required for model methods.
28
+ n_iterations (int, optional):
29
+ Number of iterations for the protocol. Default is 1.
30
+ n_jobs (int, optional):
31
+ Number of jobs to run in parallel. Default is 1.
32
+ random_state (int, optional):
33
+ Seed for random number generation. Default is 32.
34
+ verbose (bool, optional):
35
+ Whether to print progress messages. Default is False.
36
+ return_type (str, optional):
37
+ Type of return value ('predictions' or 'table'). Default is 'predictions'.
38
+ measures (List[str], optional):
39
+ List of error measures to calculate. Must be in MEASURES or None. Default is None.
40
+ """
41
+
42
+
43
+ def __init__(self,
44
+ models: Union[List[Union[str, Quantifier]], str, Quantifier],
45
+ batch_size: Union[List[int], int],
46
+ learner: BaseEstimator = None,
47
+ n_iterations: int = 1,
48
+ n_jobs: int = 1,
49
+ random_state: int = 32,
50
+ verbose: bool = False,
51
+ return_type: str = "predictions",
52
+ measures: List[str] = None):
53
+
54
+ assert not measures or all(m in MEASURES for m in measures), \
55
+ f"Invalid measure(s) provided. Valid options: {list(MEASURES.keys())} or None"
56
+ assert return_type in ["predictions", "table"], \
57
+ "Invalid return_type. Valid options: ['predictions', 'table']"
58
+
59
+ self.models = self._initialize_models(models, learner)
60
+ self.learner = learner
61
+ self.batch_size = batch_size
62
+ self.n_iterations = n_iterations
63
+ self.n_jobs = n_jobs
64
+ self.random_state = random_state
65
+ self.verbose = verbose
66
+ self.return_type = return_type
67
+ self.measures = measures
68
+
69
+ def _initialize_models(self, models, learner):
70
+ if isinstance(models, list):
71
+ if isinstance(models[0], Quantifier):
72
+ return models
73
+ assert learner is not None, "Learner is required for model methods."
74
+ return [get_method(model)(learner) for model in models]
75
+ if isinstance(models, Quantifier):
76
+ return [models]
77
+
78
+ assert learner is not None, "Learner is required for model methods."
79
+
80
+ if models == "all":
81
+ print(hasattr(list(AGGREGATIVE.values())[0], "learner"))
82
+ models = [model(learner) if hasattr(model, "learner") else model() for model in METHODS.values()]
83
+ return models
84
+ if models == "aggregative":
85
+ return [model(learner) for model in AGGREGATIVE.values()]
86
+ if models == "non_aggregative":
87
+ return [model() for model in NON_AGGREGATIVE.values()]
88
+
89
+ return [get_method(models)(learner)]
90
+
91
+
92
+ def sout(self, msg):
93
+ if self.verbose:
94
+ print('[APP]' + msg)
95
+
96
+
97
+ def fit(self, X_train, y_train):
98
+ """Fit all methods into the training data.
99
+
100
+ Args:
101
+ X_train (array-like): Features of training.
102
+ y_train (array-like): Labels of training.
103
+ """
104
+ self.sout("Fitting models")
105
+
106
+ args = ((model, X_train, y_train, self.verbose) for model in self.models)
107
+ self.models = parallel(
108
+ self._delayed_fit,
109
+ tqdm(args, desc="Fitting models", total=len(self.models)) if self.verbose else args,
110
+ self.n_jobs)
111
+
112
+ self.sout("Fit [Done]")
113
+ return self
114
+
115
+
116
+ def predict(self, X_test, y_test) -> np.any:
117
+ """Generate several samples with artificial prevalences, and sizes.
118
+ And for each method, predicts with this sample, aggregating all toguether
119
+ with a pandas dataframe if request, or else just the predictions.
120
+
121
+ Args:
122
+ X_test (array-like): Features of test.
123
+ y_test (array-like): Labels of test.
124
+
125
+ Returns:
126
+ tuple: tuple containing the model, real_prev and pred_prev, or.
127
+ DataFrame: table of results, along with error measures if requested.
128
+ """
129
+
130
+
131
+ predictions = self.predict_protocol(X_test, y_test)
132
+
133
+
134
+ predictions_df = pd.DataFrame(predictions)
135
+
136
+ if self.return_type == "table":
137
+ predictions_df.columns = ["QUANTIFIER", "REAL_PREVS", "PRED_PREVS", "BATCH_SIZE"]
138
+
139
+ if self.measures:
140
+
141
+ def smooth(values:np.ndarray) ->np.ndarray:
142
+ smoothed_factor = 1/(2 * len(X_test))
143
+
144
+ values = (values + smoothed_factor) / (smoothed_factor * len(values) + 1)
145
+
146
+ return values
147
+
148
+
149
+ for metric in self.measures:
150
+ predictions_df[metric] = predictions_df.apply(
151
+ lambda row: get_measure(metric)(smooth(row["REAL_PREVS"]), smooth(row["PRED_PREVS"])),
152
+ axis=1
153
+ )
154
+
155
+ return predictions_df
156
+
157
+ predictions_array = predictions_df.to_numpy()
158
+ return (
159
+ predictions_array[:, 0], # Model names
160
+ np.stack(predictions_array[:, 1]), # Prev
161
+ np.stack(predictions_array[:, 2]) # Prev_pred
162
+ )
163
+
164
+
165
+ @abstractmethod
166
+ def predict_protocol(self) -> np.ndarray:
167
+ """ Abstract method that every protocol has to implement """
168
+ ...
169
+
170
+ @abstractmethod
171
+ def _new_sample(self) -> tuple:
172
+ """ Abstract method of sample extraction for each protocol
173
+
174
+ Returns:
175
+ tuple: tuple containing the X_sample and the y_sample
176
+ """
177
+ ...
178
+
179
+
180
+ @abstractmethod
181
+ def _delayed_predict(self, args) -> tuple:
182
+ """abstract method for predicting in the extracted
183
+ samples, is delayed for running in parallel for
184
+ eficciency purposes.
185
+ """
186
+ ...
187
+
188
+
189
+
190
+ def _delayed_fit(self, args):
191
+ model, X_train, y_train, verbose = args
192
+
193
+ if verbose:
194
+ print(f"\tFitting {model.__class__.__name__}")
195
+ start = time()
196
+
197
+ model = model.fit(X=X_train, y=y_train)
198
+
199
+ if verbose:
200
+ end = time()
201
+ print(f"\t\\--Fit ended for {model.__class__.__name__} in {round(end - start, 3)} seconds")
202
+ return model
@@ -0,0 +1,2 @@
1
+ from .app import APP
2
+ from .npp import NPP
@@ -0,0 +1,146 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import Union, List
4
+ from sklearn.base import BaseEstimator
5
+ import itertools
6
+ from tqdm import tqdm
7
+
8
+ from ...utils import generate_artificial_indexes, parallel
9
+ from ...base import Quantifier
10
+ from ._Protocol import Protocol
11
+
12
+ class APP(Protocol):
13
+ """Artificial Prevalence Protocol. It splits a test into several
14
+ samples varying prevalence and sample size, with n iterations.
15
+ For a list of Quantifiers, it computes training and testing
16
+ for each one and returns either a table of results with error measures
17
+ or just the predictions.
18
+ """
19
+
20
+ def __init__(self,
21
+ models: Union[List[Union[str, Quantifier]], str, Quantifier],
22
+ batch_size: Union[List[int], int],
23
+ learner: BaseEstimator = None,
24
+ n_prevs: int = 100,
25
+ n_iterations: int = 1,
26
+ n_jobs: int = 1,
27
+ random_state: int = 32,
28
+ verbose: bool = False,
29
+ return_type: str = "predictions",
30
+ measures: List[str] = None):
31
+
32
+ super().__init__(models, batch_size, learner, n_iterations, n_jobs, random_state, verbose, return_type, measures)
33
+ self.n_prevs = n_prevs
34
+
35
+ def predict_protocol(self, X_test, y_test) -> tuple:
36
+ """Generates several samples with artificial prevalences and sizes.
37
+ For each model, predicts with this sample, aggregating all together
38
+ with a pandas dataframe if requested, or else just the predictions.
39
+
40
+ Args:
41
+ X_test (array-like): Features of the test set.
42
+ y_test (array-like): Labels of the test set.
43
+
44
+ Returns:
45
+ tuple: predictions containing the model name, real prev, pred prev, and batch size
46
+ """
47
+
48
+ n_dim = len(np.unique(y_test))
49
+ prevs = self._generate_artificial_prevalences(n_dim, self.n_prevs, self.n_iterations)
50
+
51
+ args = self._generate_args(X_test, y_test, prevs)
52
+ batch_size = 1
53
+
54
+ if isinstance(self.batch_size, list):
55
+ batch_size = len(self.batch_size)
56
+
57
+ size = len(prevs) * len(self.models) * batch_size * self.n_iterations
58
+
59
+ predictions = parallel(
60
+ self._delayed_predict,
61
+ tqdm(args, desc="Running APP", total=size) if self.verbose else args,
62
+ n_jobs=self.n_jobs
63
+ )
64
+
65
+ return predictions
66
+
67
+
68
+ def _new_sample(self, X, y, prev: List[float], batch_size: int) -> tuple:
69
+ """Generates a new sample with a specified prevalence and size.
70
+
71
+ Args:
72
+ X (array-like): Features from which to take the new sample.
73
+ y (array-like): Labels from which to take the new sample.
74
+ prev (List[float]): The specified prevalences.
75
+ batch_size (int): Sample size.
76
+
77
+ Returns:
78
+ tuple: New sample's features and labels.
79
+ """
80
+ sample_index = generate_artificial_indexes(y, prev, batch_size, np.unique(y))
81
+ return np.take(X, sample_index, axis=0), np.take(y, sample_index, axis=0)
82
+
83
+
84
+
85
+ def _delayed_predict(self, args) -> tuple:
86
+ """Method predicts into the new sample, is delayed for running
87
+ in parallel for eficciency purposes
88
+
89
+ Args:
90
+ args (Any): arguments to use
91
+
92
+ Returns:
93
+ tuple: returns the (method name, real_prev, pred_prev and sample_size)
94
+ """
95
+
96
+ X, y, model, prev, batch_size, verbose = args
97
+
98
+ if verbose:
99
+ print(f'\t {model.__class__.__name__} with {str(batch_size)} instances and prev {str(prev)}')
100
+
101
+ X_sample, _ = self._new_sample(X, y, prev, batch_size)
102
+ prev_pred = np.asarray(list(model.predict(X=X_sample).values()))
103
+
104
+ if verbose:
105
+ print(f'\t \\--Ending {model.__class__.__name__} with {str(batch_size)} instances and prev {str(prev)} \n')
106
+
107
+ return [model.__class__.__name__, prev, prev_pred, batch_size]
108
+
109
+
110
+
111
+
112
+ def _generate_artificial_prevalences(self, n_dim: int, n_prev: int, n_iter: int) -> np.ndarray:
113
+ """Generates n artificial prevalences with n dimensions.
114
+
115
+ Args:
116
+ n_dim (int): Number of dimensions for the artificial prevalence.
117
+ n_prev (int): Number of prevalence points to generate.
118
+ n_iter (int): Number of iterations.
119
+
120
+ Returns:
121
+ np.ndarray: Generated artificial prevalences.
122
+ """
123
+ s = np.linspace(0., 1., n_prev, endpoint=True)
124
+ prevs = np.array([p + (1 - sum(p),) for p in itertools.product(*(s,) * (n_dim - 1)) if sum(p) <= 1])
125
+
126
+ return np.repeat(prevs, n_iter, axis=0) if n_iter > 1 else prevs
127
+
128
+
129
+
130
+ def _generate_args(self, X_test, y_test, prevs):
131
+ """Generates arguments for parallel processing based on the model, prevalence, and batch size.
132
+
133
+ Args:
134
+ X_test (array-like): Features of the test set.
135
+ y_test (array-like): Labels of the test set.
136
+ prevs (np.ndarray): Artificial prevalences generated.
137
+
138
+ Returns:
139
+ List[tuple]: List of arguments for parallel processing.
140
+ """
141
+ if isinstance(self.batch_size, list):
142
+ return [(X_test, y_test, model, prev, bs, self.verbose)
143
+ for prev in prevs for bs in self.batch_size for model in self.models]
144
+ else:
145
+ return [(X_test, y_test, model, prev, self.batch_size, self.verbose)
146
+ for prev in prevs for model in self.models]