mlquantify 0.0.1__py3-none-any.whl → 0.0.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/base.py +2 -1
- mlquantify/plots/protocol_plot.py +38 -2
- mlquantify-0.0.11.2.dist-info/METADATA +122 -0
- {mlquantify-0.0.1.dist-info → mlquantify-0.0.11.2.dist-info}/RECORD +6 -6
- mlquantify-0.0.1.dist-info/METADATA +0 -23
- {mlquantify-0.0.1.dist-info → mlquantify-0.0.11.2.dist-info}/WHEEL +0 -0
- {mlquantify-0.0.1.dist-info → mlquantify-0.0.11.2.dist-info}/top_level.txt +0 -0
mlquantify/base.py
CHANGED
|
@@ -138,13 +138,14 @@ class AggregativeQuantifier(Quantifier, ABC):
|
|
|
138
138
|
return self.learner.get_params()
|
|
139
139
|
|
|
140
140
|
def set_params(self, **params):
|
|
141
|
+
|
|
141
142
|
# Model Params
|
|
142
143
|
for key, value in params.items():
|
|
143
144
|
if hasattr(self, key):
|
|
144
145
|
setattr(self, key, value)
|
|
145
146
|
|
|
146
147
|
# Learner Params
|
|
147
|
-
if self.learner:
|
|
148
|
+
if self.learner is not None:
|
|
148
149
|
learner_params = {k.replace('learner__', ''): v for k, v in params.items() if 'learner__' in k}
|
|
149
150
|
if learner_params:
|
|
150
151
|
self.learner.set_params(**learner_params)
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import matplotlib.pyplot as plt
|
|
2
|
+
import matplotlib.colors as mcolors
|
|
2
3
|
import matplotlib.patches as mpatches
|
|
3
4
|
import pandas as pd
|
|
4
5
|
from typing import List, Optional, Dict, Any, Union
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
|
|
7
8
|
plt.rcParams.update({
|
|
9
|
+
'lines.markersize': 6,
|
|
8
10
|
'axes.facecolor': "#F8F8F8",
|
|
9
11
|
'figure.facecolor': "#F8F8F8",
|
|
10
12
|
'font.family': 'sans-serif',
|
|
@@ -41,6 +43,28 @@ COLORS = [
|
|
|
41
43
|
|
|
42
44
|
MARKERS = ["o", "s", "^", "D", "p", "*", "+", "x", "H", "1", "2", "3", "4", "|", "_"]
|
|
43
45
|
|
|
46
|
+
def adjust_color_saturation(color: str, saturation_factor: float = 5) -> str:
|
|
47
|
+
"""
|
|
48
|
+
Adjusts the saturation of a given color.
|
|
49
|
+
|
|
50
|
+
Parameters:
|
|
51
|
+
- color (str): The original color in hexadecimal format.
|
|
52
|
+
- saturation_factor (float): The factor by which to adjust the saturation.
|
|
53
|
+
Values > 1 will increase saturation,
|
|
54
|
+
values < 1 will decrease it. Default is 1.5.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
- str: The color with adjusted saturation in hexadecimal format.
|
|
58
|
+
"""
|
|
59
|
+
# Convert color to HSV (Hue, Saturation, Value)
|
|
60
|
+
h, s, v = mcolors.rgb_to_hsv(mcolors.to_rgb(color))
|
|
61
|
+
|
|
62
|
+
# Adjust saturation
|
|
63
|
+
s = min(1, s * saturation_factor)
|
|
64
|
+
|
|
65
|
+
# Convert back to RGB and then to hex
|
|
66
|
+
return mcolors.to_hex(mcolors.hsv_to_rgb((h, s, v)))
|
|
67
|
+
|
|
44
68
|
|
|
45
69
|
|
|
46
70
|
def protocol_boxplot(
|
|
@@ -100,6 +124,10 @@ def protocol_boxplot(
|
|
|
100
124
|
|
|
101
125
|
|
|
102
126
|
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
|
|
103
131
|
def protocol_lineplot(
|
|
104
132
|
table_protocol: pd.DataFrame,
|
|
105
133
|
methods: Union[List[str], str, None],
|
|
@@ -135,10 +163,16 @@ def protocol_lineplot(
|
|
|
135
163
|
|
|
136
164
|
# Create plot with custom figsize
|
|
137
165
|
fig, ax = plt.subplots(figsize=figsize)
|
|
138
|
-
for i, (method, marker) in enumerate(zip(methods, MARKERS[:len(methods)])):
|
|
166
|
+
for i, (method, marker) in enumerate(zip(methods, MARKERS[:len(methods)+1])):
|
|
139
167
|
method_data = table[table['QUANTIFIER'] == method]
|
|
140
168
|
y_data = real if y == "ALPHA" else method_data[y]
|
|
141
|
-
|
|
169
|
+
color = adjust_color_saturation(COLORS[i % len(COLORS)]) # Aumenta a saturação das cores
|
|
170
|
+
ax.plot(method_data[x],
|
|
171
|
+
y_data, color=color,
|
|
172
|
+
marker=marker,
|
|
173
|
+
label=method,
|
|
174
|
+
alpha=1.0,
|
|
175
|
+
**plot_params)
|
|
142
176
|
|
|
143
177
|
# Add legend
|
|
144
178
|
if legend:
|
|
@@ -155,3 +189,5 @@ def protocol_lineplot(
|
|
|
155
189
|
if save_path:
|
|
156
190
|
plt.savefig(save_path, bbox_inches='tight')
|
|
157
191
|
plt.show()
|
|
192
|
+
|
|
193
|
+
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: mlquantify
|
|
3
|
+
Version: 0.0.11.2
|
|
4
|
+
Summary: Quantification Library
|
|
5
|
+
Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
|
|
6
|
+
Maintainer: Luiz Fernando Luth Junior
|
|
7
|
+
Keywords: python,machine learning,quantification,quantify
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Operating System :: Unix
|
|
12
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: scikit-learn
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: scipy
|
|
18
|
+
Requires-Dist: joblib
|
|
19
|
+
Requires-Dist: tqdm
|
|
20
|
+
Requires-Dist: pandas
|
|
21
|
+
Requires-Dist: xlrd
|
|
22
|
+
Requires-Dist: matplotlib
|
|
23
|
+
|
|
24
|
+
<h1 align="center">MLQuantify</h1>
|
|
25
|
+
<h4 align="center">A Python Package for Quantification</h4>
|
|
26
|
+
|
|
27
|
+
___
|
|
28
|
+
|
|
29
|
+
**mlquantify** is a Python library for quantification, also known as supervised prevalence estimation, designed to estimate the distribution of classes within datasets. It offers a range of tools for various quantification methods, model selection tailored for quantification tasks, evaluation metrics, and protocols to assess quantification performance. Additionally, mlquantify includes popular datasets and visualization tools to help analyze and interpret results.
|
|
30
|
+
|
|
31
|
+
___
|
|
32
|
+
|
|
33
|
+
## Latest Release
|
|
34
|
+
|
|
35
|
+
- **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
|
|
36
|
+
- In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
|
|
37
|
+
- Explore the [API documentation](#) for detailed developer information.
|
|
38
|
+
- See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
|
|
39
|
+
|
|
40
|
+
___
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
To install mlquantify, run the following command:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install mlquantify
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
___
|
|
51
|
+
|
|
52
|
+
## Contents
|
|
53
|
+
|
|
54
|
+
| Section | Description |
|
|
55
|
+
|---|---|
|
|
56
|
+
| **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
|
|
57
|
+
| **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
|
|
58
|
+
| **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
|
|
59
|
+
| **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |
|
|
60
|
+
| **Evaluation Protocols** | Evaluation protocols used, based on sampling generation (e.g., APP, NPP, etc.).. |
|
|
61
|
+
| **Plotting Results** | Tools and techniques used to visualize results, such as the protocol results.|
|
|
62
|
+
| **Comprehensive Documentation** | Complete documentation of the project, including code, data, and results. |
|
|
63
|
+
|
|
64
|
+
___
|
|
65
|
+
|
|
66
|
+
## Quick example:
|
|
67
|
+
|
|
68
|
+
This code first loads the breast cancer dataset from _sklearn_, which is then split into training and testing sets. It uses the _Expectation Maximisation Quantifier (EMQ)_ with a RandomForest classifier to predict class prevalence. After training the model, it evaluates performance by calculating and printing the absolute error and bias between the real and predicted prevalences.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import mlquantify as mq
|
|
72
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
73
|
+
from sklearn.datasets import load_breast_cancer
|
|
74
|
+
from sklearn.model_selection import train_test_split
|
|
75
|
+
|
|
76
|
+
# Loading dataset from sklearn
|
|
77
|
+
features, target = load_breast_cancer(return_X_y=True)
|
|
78
|
+
|
|
79
|
+
#Splitting into train and test
|
|
80
|
+
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)
|
|
81
|
+
|
|
82
|
+
#Create the model, here it is the Expectation Maximisation Quantifier (EMQ) with a classifier
|
|
83
|
+
model = mq.methods.EMQ(RandomForestClassifier())
|
|
84
|
+
model.fit(X_train, y_train)
|
|
85
|
+
|
|
86
|
+
#Predict the class prevalence for X_test
|
|
87
|
+
pred_prevalence = model.predict(X_test)
|
|
88
|
+
real_prevalence = mq.utils.get_real_prev(y_test)
|
|
89
|
+
|
|
90
|
+
#Get the error for the prediction
|
|
91
|
+
ae = mq.evaluation.absolute_error(real_prevalence, pred_prevalence)
|
|
92
|
+
bias = mq.evaluation.bias(real_prevalence, pred_prevalence)
|
|
93
|
+
|
|
94
|
+
print(f"Mean Squared Error (MSE) -> {ae:.4f}")
|
|
95
|
+
print(f"Bias -> {bias}")
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
___
|
|
99
|
+
|
|
100
|
+
## Requirements
|
|
101
|
+
|
|
102
|
+
- Scikit-learn
|
|
103
|
+
- pandas
|
|
104
|
+
- numpy
|
|
105
|
+
- joblib
|
|
106
|
+
- tqdm
|
|
107
|
+
- matplotlib
|
|
108
|
+
- xlrd
|
|
109
|
+
|
|
110
|
+
___
|
|
111
|
+
|
|
112
|
+
## Documentation
|
|
113
|
+
|
|
114
|
+
##### API is avaliable [here](#)
|
|
115
|
+
|
|
116
|
+
- [Methods](https://github.com/luizfernandolj/mlquantify/wiki/Methods)
|
|
117
|
+
- [Model Selection](https://github.com/luizfernandolj/mlquantify/wiki/Model-Selection)
|
|
118
|
+
- [Evaluation](https://github.com/luizfernandolj/mlquantify/wiki/Evaluation)
|
|
119
|
+
- [Plotting](https://github.com/luizfernandolj/mlquantify/wiki/Plotting)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
___
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mlquantify/__init__.py,sha256=BGxGCeZhlNsTYZkLoJJ-zcpVDRU4jBFVsz9ZmEZCtvs,166
|
|
2
|
-
mlquantify/base.py,sha256=
|
|
2
|
+
mlquantify/base.py,sha256=przWKpQtoTT6ekbyTrAU3Kp_JsQ1982rAXWX_oAoSQM,8393
|
|
3
3
|
mlquantify/model_selection.py,sha256=zN-qkrCe35GoJ8u1_Ab3EKXvrnAppk68Fj5kQVi61nU,8617
|
|
4
4
|
mlquantify/classification/__init__.py,sha256=zxE6_ouh3kJVL56aJAhpFh36EBWKq9U4IcID4noROPQ,26
|
|
5
5
|
mlquantify/classification/pwkclf.py,sha256=K1XBgI9FIh1Yu571bzdw8PM-TJXFGOQAzP4ewDV9b3Y,2323
|
|
@@ -49,7 +49,7 @@ mlquantify/methods/non_aggregative/__init__.py,sha256=HD34lwNmAkw26qlEA5PDHay9V6
|
|
|
49
49
|
mlquantify/methods/non_aggregative/hdx.py,sha256=vwodV_Z89_be8HBOSriX3lsFhzmPZVLdOgKewa6la-E,2420
|
|
50
50
|
mlquantify/plots/__init__.py,sha256=IDnv_KVALIsVuohiudclnIMSM5F1dNRxUFpjygLblBI,118
|
|
51
51
|
mlquantify/plots/distribution_plot.py,sha256=8l22Lq6LhlOd3yv6gHX6wg2p1MTozYASyNzPcLlPHzk,4038
|
|
52
|
-
mlquantify/plots/protocol_plot.py,sha256=
|
|
52
|
+
mlquantify/plots/protocol_plot.py,sha256=DfkCSHELr9lyyxMqeGNJgx0iywODQmH8EL5tlMjeAFQ,6618
|
|
53
53
|
mlquantify/utils/__init__.py,sha256=VKplOsrL4ONf0-9anhcWFOxXjvGSQTH0Kh922s_hGb0,63
|
|
54
54
|
mlquantify/utils/general_purposes/__init__.py,sha256=-Pwx1t2M_rGCEHxIc2TyomSe92w6U5Bdp6wtpsdcMZI,380
|
|
55
55
|
mlquantify/utils/general_purposes/convert_col_to_array.py,sha256=Pq6_U6BdsIAl4Vr_836_vM-g2iRQAJ1b4kGOsYoFukI,560
|
|
@@ -67,7 +67,7 @@ mlquantify/utils/method_purposes/get_scores.py,sha256=qdIVYUS8xd8Vt86k19yETDNfib
|
|
|
67
67
|
mlquantify/utils/method_purposes/moss.py,sha256=CVDDMHxPBnl_U2hz7Aqvne7jhB2mBUsVzTTsaiLQhOc,352
|
|
68
68
|
mlquantify/utils/method_purposes/ternary_search.py,sha256=JpNrfJsA5kWuanVW_hyMucy7rQ9UzTSgazFpTRi9jMI,416
|
|
69
69
|
mlquantify/utils/method_purposes/tprfpr.py,sha256=VKniG5aK8IwAA2fXEhkdHtwnx1zHH12qhwS4kKW5Dlo,1181
|
|
70
|
-
mlquantify-0.0.
|
|
71
|
-
mlquantify-0.0.
|
|
72
|
-
mlquantify-0.0.
|
|
73
|
-
mlquantify-0.0.
|
|
70
|
+
mlquantify-0.0.11.2.dist-info/METADATA,sha256=CQ6GvMVgM4JfD6KW4vXoDAixa35QM1zOFs5Erdm3EEM,4717
|
|
71
|
+
mlquantify-0.0.11.2.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
72
|
+
mlquantify-0.0.11.2.dist-info/top_level.txt,sha256=tGEkYkbbFElwULvqENjam3u1uXtyC1J9dRmibsq8_n0,11
|
|
73
|
+
mlquantify-0.0.11.2.dist-info/RECORD,,
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: mlquantify
|
|
3
|
-
Version: 0.0.1
|
|
4
|
-
Summary: Quantification Library
|
|
5
|
-
Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
|
|
6
|
-
Maintainer: Luiz Fernando Luth Junior
|
|
7
|
-
Keywords: python,machine learning,quantification,quantify
|
|
8
|
-
Classifier: Development Status :: 4 - Beta
|
|
9
|
-
Classifier: Intended Audience :: Science/Research
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Operating System :: Unix
|
|
12
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
-
Description-Content-Type: text/markdown
|
|
15
|
-
Requires-Dist: scikit-learn
|
|
16
|
-
Requires-Dist: numpy
|
|
17
|
-
Requires-Dist: scipy
|
|
18
|
-
Requires-Dist: joblib
|
|
19
|
-
Requires-Dist: tqdm
|
|
20
|
-
Requires-Dist: pandas
|
|
21
|
-
Requires-Dist: xlrd
|
|
22
|
-
Requires-Dist: matplotlib
|
|
23
|
-
|
|
File without changes
|
|
File without changes
|