mlquantify 0.0.11.2__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +32 -6
- mlquantify/base.py +559 -257
- mlquantify/classification/__init__.py +1 -1
- mlquantify/classification/methods.py +160 -0
- mlquantify/evaluation/__init__.py +14 -2
- mlquantify/evaluation/measures.py +215 -0
- mlquantify/evaluation/protocol.py +647 -0
- mlquantify/methods/__init__.py +37 -40
- mlquantify/methods/aggregative.py +1030 -0
- mlquantify/methods/meta.py +472 -0
- mlquantify/methods/mixture_models.py +1003 -0
- mlquantify/methods/non_aggregative.py +136 -0
- mlquantify/methods/threshold_optimization.py +959 -0
- mlquantify/model_selection.py +377 -232
- mlquantify/plots.py +367 -0
- mlquantify/utils/__init__.py +2 -2
- mlquantify/utils/general.py +334 -0
- mlquantify/utils/method.py +449 -0
- {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.0.dist-info}/METADATA +137 -122
- mlquantify-0.1.0.dist-info/RECORD +22 -0
- {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.0.dist-info}/WHEEL +1 -1
- mlquantify/classification/pwkclf.py +0 -73
- mlquantify/evaluation/measures/__init__.py +0 -26
- mlquantify/evaluation/measures/ae.py +0 -11
- mlquantify/evaluation/measures/bias.py +0 -16
- mlquantify/evaluation/measures/kld.py +0 -8
- mlquantify/evaluation/measures/mse.py +0 -12
- mlquantify/evaluation/measures/nae.py +0 -16
- mlquantify/evaluation/measures/nkld.py +0 -13
- mlquantify/evaluation/measures/nrae.py +0 -16
- mlquantify/evaluation/measures/rae.py +0 -12
- mlquantify/evaluation/measures/se.py +0 -12
- mlquantify/evaluation/protocol/_Protocol.py +0 -202
- mlquantify/evaluation/protocol/__init__.py +0 -2
- mlquantify/evaluation/protocol/app.py +0 -146
- mlquantify/evaluation/protocol/npp.py +0 -34
- mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -62
- mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -7
- mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -27
- mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -23
- mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -21
- mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -25
- mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -41
- mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -21
- mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -23
- mlquantify/methods/aggregative/__init__.py +0 -9
- mlquantify/methods/aggregative/cc.py +0 -32
- mlquantify/methods/aggregative/emq.py +0 -86
- mlquantify/methods/aggregative/fm.py +0 -72
- mlquantify/methods/aggregative/gac.py +0 -96
- mlquantify/methods/aggregative/gpac.py +0 -87
- mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -81
- mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -5
- mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
- mlquantify/methods/aggregative/mixtureModels/dys_syn.py +0 -89
- mlquantify/methods/aggregative/mixtureModels/hdy.py +0 -46
- mlquantify/methods/aggregative/mixtureModels/smm.py +0 -27
- mlquantify/methods/aggregative/mixtureModels/sord.py +0 -77
- mlquantify/methods/aggregative/pcc.py +0 -33
- mlquantify/methods/aggregative/pwk.py +0 -38
- mlquantify/methods/meta/__init__.py +0 -1
- mlquantify/methods/meta/ensemble.py +0 -236
- mlquantify/methods/non_aggregative/__init__.py +0 -1
- mlquantify/methods/non_aggregative/hdx.py +0 -71
- mlquantify/plots/__init__.py +0 -2
- mlquantify/plots/distribution_plot.py +0 -109
- mlquantify/plots/protocol_plot.py +0 -193
- mlquantify/utils/general_purposes/__init__.py +0 -8
- mlquantify/utils/general_purposes/convert_col_to_array.py +0 -13
- mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -29
- mlquantify/utils/general_purposes/get_real_prev.py +0 -9
- mlquantify/utils/general_purposes/load_quantifier.py +0 -4
- mlquantify/utils/general_purposes/make_prevs.py +0 -23
- mlquantify/utils/general_purposes/normalize.py +0 -20
- mlquantify/utils/general_purposes/parallel.py +0 -10
- mlquantify/utils/general_purposes/round_protocol_df.py +0 -14
- mlquantify/utils/method_purposes/__init__.py +0 -6
- mlquantify/utils/method_purposes/distances.py +0 -21
- mlquantify/utils/method_purposes/getHist.py +0 -13
- mlquantify/utils/method_purposes/get_scores.py +0 -33
- mlquantify/utils/method_purposes/moss.py +0 -16
- mlquantify/utils/method_purposes/ternary_search.py +0 -14
- mlquantify/utils/method_purposes/tprfpr.py +0 -42
- mlquantify-0.0.11.2.dist-info/RECORD +0 -73
- {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,122 +1,137 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
2
|
-
Name: mlquantify
|
|
3
|
-
Version: 0.0
|
|
4
|
-
Summary: Quantification Library
|
|
5
|
-
Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
|
|
6
|
-
Maintainer: Luiz Fernando Luth Junior
|
|
7
|
-
Keywords: python,machine learning,quantification,quantify
|
|
8
|
-
Classifier: Development Status :: 4 - Beta
|
|
9
|
-
Classifier: Intended Audience :: Science/Research
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Operating System :: Unix
|
|
12
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
-
Description-Content-Type: text/markdown
|
|
15
|
-
Requires-Dist: scikit-learn
|
|
16
|
-
Requires-Dist: numpy
|
|
17
|
-
Requires-Dist: scipy
|
|
18
|
-
Requires-Dist: joblib
|
|
19
|
-
Requires-Dist: tqdm
|
|
20
|
-
Requires-Dist: pandas
|
|
21
|
-
Requires-Dist: xlrd
|
|
22
|
-
Requires-Dist: matplotlib
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
___
|
|
65
|
-
|
|
66
|
-
##
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
#
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
-
|
|
117
|
-
-
|
|
118
|
-
-
|
|
119
|
-
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: mlquantify
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Quantification Library
|
|
5
|
+
Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
|
|
6
|
+
Maintainer: Luiz Fernando Luth Junior
|
|
7
|
+
Keywords: python,machine learning,quantification,quantify
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Operating System :: Unix
|
|
12
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: scikit-learn
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: scipy
|
|
18
|
+
Requires-Dist: joblib
|
|
19
|
+
Requires-Dist: tqdm
|
|
20
|
+
Requires-Dist: pandas
|
|
21
|
+
Requires-Dist: xlrd
|
|
22
|
+
Requires-Dist: matplotlib
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: keywords
|
|
28
|
+
Dynamic: maintainer
|
|
29
|
+
Dynamic: requires-dist
|
|
30
|
+
Dynamic: summary
|
|
31
|
+
|
|
32
|
+
<h1 align="center">MLQuantify</h1>
|
|
33
|
+
<h4 align="center">A Python Package for Quantification</h4>
|
|
34
|
+
|
|
35
|
+
___
|
|
36
|
+
|
|
37
|
+
**mlquantify** is a Python library for quantification, also known as supervised prevalence estimation, designed to estimate the distribution of classes within datasets. It offers a range of tools for various quantification methods, model selection tailored for quantification tasks, evaluation metrics, and protocols to assess quantification performance. Additionally, mlquantify includes popular datasets and visualization tools to help analyze and interpret results.
|
|
38
|
+
|
|
39
|
+
___
|
|
40
|
+
|
|
41
|
+
## Latest Release
|
|
42
|
+
|
|
43
|
+
- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
|
|
44
|
+
- In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
|
|
45
|
+
- Explore the [API documentation](#) for detailed developer information.
|
|
46
|
+
- See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
|
|
47
|
+
|
|
48
|
+
___
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
To install mlquantify, run the following command:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install mlquantify
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
If you only want to update, run the code below:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install --update mlquantify
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
___
|
|
65
|
+
|
|
66
|
+
## Contents
|
|
67
|
+
|
|
68
|
+
| Section | Description |
|
|
69
|
+
|---|---|
|
|
70
|
+
| **21 Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
|
|
71
|
+
| **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
|
|
72
|
+
| **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
|
|
73
|
+
| **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |
|
|
74
|
+
| **Evaluation Protocols** | Evaluation protocols used, based on sampling generation (e.g., APP, NPP, etc.).. |
|
|
75
|
+
| **Plotting Results** | Tools and techniques used to visualize results, such as the protocol results.|
|
|
76
|
+
| **Comprehensive Documentation** | Complete documentation of the project, including code, data, and results. |
|
|
77
|
+
|
|
78
|
+
___
|
|
79
|
+
|
|
80
|
+
## Quick example:
|
|
81
|
+
|
|
82
|
+
This code first loads the breast cancer dataset from _sklearn_, which is then split into training and testing sets. It uses the _Expectation Maximisation Quantifier (EMQ)_ with a RandomForest classifier to predict class prevalence. After training the model, it evaluates performance by calculating and printing the absolute error and bias between the real and predicted prevalences.
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import mlquantify as mq
|
|
86
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
87
|
+
from sklearn.datasets import load_breast_cancer
|
|
88
|
+
from sklearn.model_selection import train_test_split
|
|
89
|
+
|
|
90
|
+
# Loading dataset from sklearn
|
|
91
|
+
features, target = load_breast_cancer(return_X_y=True)
|
|
92
|
+
|
|
93
|
+
#Splitting into train and test
|
|
94
|
+
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)
|
|
95
|
+
|
|
96
|
+
#Create the model, here it is the Expectation Maximisation Quantifier (EMQ) with a classifier
|
|
97
|
+
model = mq.methods.EMQ(RandomForestClassifier())
|
|
98
|
+
model.fit(X_train, y_train)
|
|
99
|
+
|
|
100
|
+
#Predict the class prevalence for X_test
|
|
101
|
+
pred_prevalence = model.predict(X_test)
|
|
102
|
+
real_prevalence = mq.utils.get_real_prev(y_test)
|
|
103
|
+
|
|
104
|
+
#Get the error for the prediction
|
|
105
|
+
ae = mq.evaluation.absolute_error(real_prevalence, pred_prevalence)
|
|
106
|
+
bias = mq.evaluation.bias(real_prevalence, pred_prevalence)
|
|
107
|
+
|
|
108
|
+
print(f"Mean Squared Error (MSE) -> {ae:.4f}")
|
|
109
|
+
print(f"Bias -> {bias}")
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
___
|
|
113
|
+
|
|
114
|
+
## Requirements
|
|
115
|
+
|
|
116
|
+
- Scikit-learn
|
|
117
|
+
- pandas
|
|
118
|
+
- numpy
|
|
119
|
+
- joblib
|
|
120
|
+
- tqdm
|
|
121
|
+
- matplotlib
|
|
122
|
+
- xlrd
|
|
123
|
+
|
|
124
|
+
___
|
|
125
|
+
|
|
126
|
+
## Documentation
|
|
127
|
+
|
|
128
|
+
##### API is avaliable [here](#)
|
|
129
|
+
|
|
130
|
+
- [Methods](https://github.com/luizfernandolj/mlquantify/wiki/Methods)
|
|
131
|
+
- [Model Selection](https://github.com/luizfernandolj/mlquantify/wiki/Model-Selection)
|
|
132
|
+
- [Evaluation](https://github.com/luizfernandolj/mlquantify/wiki/Evaluation)
|
|
133
|
+
- [Plotting](https://github.com/luizfernandolj/mlquantify/wiki/Plotting)
|
|
134
|
+
- [Utilities](https://github.com/luizfernandolj/mlquantify/wiki/Utilities)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
___
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
mlquantify/__init__.py,sha256=Q9jCEkG0EJoHXukrxh194mhO_Yfu-BZPRfjpQ4T1XlQ,978
|
|
2
|
+
mlquantify/base.py,sha256=hJ9FYYNGeO5-WJlpJpsUiu_LQL1fimvZPPNsKptxN7w,19196
|
|
3
|
+
mlquantify/model_selection.py,sha256=deMdTKarMu4DtfSy6ucTIxQKj1yrC-x3nhBT77kMOtI,12679
|
|
4
|
+
mlquantify/plots.py,sha256=9XOhx4QXkN9RkkiErLuL90FWIBUV2YTEJNT4Jwfy0ac,12380
|
|
5
|
+
mlquantify/classification/__init__.py,sha256=3FGf-F4SOM3gByUPsWdnBzjyC_31B3MtzuolEuocPls,22
|
|
6
|
+
mlquantify/classification/methods.py,sha256=yDSbpoqM3hfF0a9ATzKqfG9S-44x-0Rq0lkAVJKTIEs,5006
|
|
7
|
+
mlquantify/evaluation/__init__.py,sha256=x1grng0n_QeZpVBU8-pwagYdBMkbMRILtrp1qk_bLvk,447
|
|
8
|
+
mlquantify/evaluation/measures.py,sha256=fIKyxxlD8em3oaj4u_BeXmNyUQG_A0vXWY8APPgNoJ0,6579
|
|
9
|
+
mlquantify/evaluation/protocol.py,sha256=OsOXm_vf7sYlw9pQv08WxAvvgzo10bAqiDM-1cpz7nQ,24020
|
|
10
|
+
mlquantify/methods/__init__.py,sha256=ya3Mn7bcz2r3oaIT7yVR4iJkAfgEAwF4xDK54C0rZ7U,536
|
|
11
|
+
mlquantify/methods/aggregative.py,sha256=rL_xlX2nYECrxFSjBJNlxj6h3b-iIs7l_XgxIRSYHpw,34164
|
|
12
|
+
mlquantify/methods/meta.py,sha256=sZWQHUGkm6iiqujmIpHDL_8tDdKQ161bzD5mcpXLWEY,19066
|
|
13
|
+
mlquantify/methods/mixture_models.py,sha256=si2Pzaka5Kbva4QKBzLolvb_8V0ZEjp68UBAiOwl49s,35166
|
|
14
|
+
mlquantify/methods/non_aggregative.py,sha256=xaBu21TUtiYkOEUKO16NaNMwdNa6-SNjfBsc5PpIMyI,4815
|
|
15
|
+
mlquantify/methods/threshold_optimization.py,sha256=P88VXG-czZiaHSHTGnzFmZVzm3SoJHnrmi60Zvv7IJU,33726
|
|
16
|
+
mlquantify/utils/__init__.py,sha256=logWrL6B6mukP8tvYm_UPEdO9eNA-J-ySILr7-syDoc,44
|
|
17
|
+
mlquantify/utils/general.py,sha256=Li5ix_dy19dUhYNgiUsNHdqqnSVYvznUBUuyr-zYSPI,7554
|
|
18
|
+
mlquantify/utils/method.py,sha256=RL4vBJGl5_6DZ59Bs62hdNXI_hnoDIWilMMyMPiOjBg,12631
|
|
19
|
+
mlquantify-0.1.0.dist-info/METADATA,sha256=6ud6gvzxxaQr7oZLD3fu3piid1ZHjJuHAQZzZeUw7Rs,4939
|
|
20
|
+
mlquantify-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
21
|
+
mlquantify-0.1.0.dist-info/top_level.txt,sha256=tGEkYkbbFElwULvqENjam3u1uXtyC1J9dRmibsq8_n0,11
|
|
22
|
+
mlquantify-0.1.0.dist-info/RECORD,,
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
from sklearn.neighbors import NearestNeighbors
|
|
2
|
-
from sklearn.base import BaseEstimator
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
class PWKCLF(BaseEstimator):
|
|
7
|
-
"""Learner based on k-Nearest Neighborst (KNN) to use on the method PWK,
|
|
8
|
-
that also is based on KNN.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def __init__(self,
|
|
13
|
-
alpha=1,
|
|
14
|
-
n_neighbors=10,
|
|
15
|
-
algorithm="auto",
|
|
16
|
-
metric="euclidean",
|
|
17
|
-
leaf_size=30,
|
|
18
|
-
p=2,
|
|
19
|
-
metric_params=None,
|
|
20
|
-
n_jobs=None):
|
|
21
|
-
|
|
22
|
-
if alpha < 1:
|
|
23
|
-
raise ValueError("alpha must not be smaller than 1")
|
|
24
|
-
|
|
25
|
-
self.alpha = alpha
|
|
26
|
-
self.n_neighbors = n_neighbors
|
|
27
|
-
|
|
28
|
-
self.nbrs = NearestNeighbors(n_neighbors=n_neighbors,
|
|
29
|
-
algorithm=algorithm,
|
|
30
|
-
leaf_size=leaf_size,
|
|
31
|
-
metric=metric,
|
|
32
|
-
p=p,
|
|
33
|
-
metric_params=metric_params,
|
|
34
|
-
n_jobs=n_jobs)
|
|
35
|
-
|
|
36
|
-
self.Y = None
|
|
37
|
-
self.Y_map = None
|
|
38
|
-
self.w = None
|
|
39
|
-
self.y = None
|
|
40
|
-
|
|
41
|
-
def fit(self, X, y):
|
|
42
|
-
n_samples = X.shape[0]
|
|
43
|
-
if n_samples < self.n_neighbors:
|
|
44
|
-
self.nbrs.set_params(n_neighbors=n_samples)
|
|
45
|
-
|
|
46
|
-
self.y = y
|
|
47
|
-
|
|
48
|
-
if isinstance(y, pd.DataFrame):
|
|
49
|
-
self.y = y.reset_index(drop=True)
|
|
50
|
-
|
|
51
|
-
Y_cts = np.unique(y, return_counts=True)
|
|
52
|
-
self.Y = Y_cts[0]
|
|
53
|
-
self.Y_map = dict(zip(self.Y, range(len(self.Y))))
|
|
54
|
-
|
|
55
|
-
min_class_count = np.min(Y_cts[1])
|
|
56
|
-
self.w = (Y_cts[1] / min_class_count) ** (-1.0 / self.alpha)
|
|
57
|
-
self.nbrs.fit(X)
|
|
58
|
-
return self
|
|
59
|
-
|
|
60
|
-
def predict(self, X):
|
|
61
|
-
n_samples = X.shape[0]
|
|
62
|
-
nn_indices = self.nbrs.kneighbors(X, return_distance=False)
|
|
63
|
-
|
|
64
|
-
CM = np.zeros((n_samples, len(self.Y)))
|
|
65
|
-
|
|
66
|
-
for i in range(n_samples):
|
|
67
|
-
for j in nn_indices[i]:
|
|
68
|
-
CM[i, self.Y_map[self.y[j]]] += 1
|
|
69
|
-
|
|
70
|
-
CM = np.multiply(CM, self.w)
|
|
71
|
-
predictions = np.apply_along_axis(np.argmax, axis=1, arr=CM)
|
|
72
|
-
|
|
73
|
-
return self.Y[predictions]
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from .ae import absolute_error
|
|
2
|
-
from .kld import kullback_leibler_divergence
|
|
3
|
-
from .nkld import normalized_kullback_leibler_divergence
|
|
4
|
-
from .rae import relative_absolute_error
|
|
5
|
-
from .nae import normalized_absolute_error
|
|
6
|
-
from .bias import bias
|
|
7
|
-
from .nrae import normalized_relative_absolute_error
|
|
8
|
-
from .se import squared_error
|
|
9
|
-
from .mse import mean_squared_error
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
MEASURES = {
|
|
14
|
-
"ae": absolute_error,
|
|
15
|
-
"nae": normalized_absolute_error,
|
|
16
|
-
"kld": kullback_leibler_divergence,
|
|
17
|
-
"nkld": normalized_kullback_leibler_divergence,
|
|
18
|
-
"nrae": normalized_relative_absolute_error,
|
|
19
|
-
"rae": relative_absolute_error,
|
|
20
|
-
"se": squared_error,
|
|
21
|
-
"mse": mean_squared_error
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def get_measure(measure:str):
|
|
26
|
-
return MEASURES.get(measure)
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def absolute_error(prev_real:np.any, prev_pred:np.any):
|
|
4
|
-
if isinstance(prev_real, dict):
|
|
5
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
6
|
-
if isinstance(prev_pred, dict):
|
|
7
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
8
|
-
|
|
9
|
-
abs_error = abs(prev_pred - prev_real).mean(axis=-1)
|
|
10
|
-
|
|
11
|
-
return abs_error
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def bias(prev_real:np.any, prev_pred:np.any):
|
|
4
|
-
classes = None
|
|
5
|
-
if isinstance(prev_real, dict):
|
|
6
|
-
classes = prev_real.keys()
|
|
7
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
8
|
-
if isinstance(prev_pred, dict):
|
|
9
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
10
|
-
|
|
11
|
-
abs_errors = abs(prev_pred - prev_real)
|
|
12
|
-
|
|
13
|
-
if classes:
|
|
14
|
-
return {class_:abs_error for class_, abs_error in zip(classes, abs_errors)}
|
|
15
|
-
|
|
16
|
-
return abs_errors
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def kullback_leibler_divergence(prev_real:np.any, prev_pred:np.any):
|
|
4
|
-
if isinstance(prev_real, dict):
|
|
5
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
6
|
-
if isinstance(prev_pred, dict):
|
|
7
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
8
|
-
return prev_real * abs(np.log((prev_real / prev_pred)))
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from .se import squared_error
|
|
3
|
-
|
|
4
|
-
def mean_squared_error(prev_real:np.any, prev_pred:np.any):
|
|
5
|
-
if isinstance(prev_real, dict):
|
|
6
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
7
|
-
if isinstance(prev_pred, dict):
|
|
8
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
9
|
-
|
|
10
|
-
mean_sq_error = squared_error(prev_real, prev_pred).mean()
|
|
11
|
-
|
|
12
|
-
return mean_sq_error
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from .ae import absolute_error
|
|
3
|
-
|
|
4
|
-
def normalized_absolute_error(prev_real:np.any, prev_pred:np.any):
|
|
5
|
-
if isinstance(prev_real, dict):
|
|
6
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
7
|
-
if isinstance(prev_pred, dict):
|
|
8
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
9
|
-
|
|
10
|
-
abs_error = absolute_error(prev_real, prev_pred)
|
|
11
|
-
|
|
12
|
-
z_abs_error = (2 * (1 - min(prev_real)))
|
|
13
|
-
|
|
14
|
-
normalized = abs_error / z_abs_error
|
|
15
|
-
|
|
16
|
-
return normalized
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from .kld import kullback_leibler_divergence
|
|
3
|
-
|
|
4
|
-
def normalized_kullback_leibler_divergence(prev_real:np.any, prev_pred:np.any):
|
|
5
|
-
if isinstance(prev_real, dict):
|
|
6
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
7
|
-
if isinstance(prev_pred, dict):
|
|
8
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
9
|
-
|
|
10
|
-
euler = np.exp(kullback_leibler_divergence(prev_real, prev_pred))
|
|
11
|
-
normalized = 2 * (euler / (euler + 1)) - 1
|
|
12
|
-
|
|
13
|
-
return normalized
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from .rae import relative_absolute_error
|
|
3
|
-
|
|
4
|
-
def normalized_relative_absolute_error(prev_real:np.any, prev_pred:np.any):
|
|
5
|
-
if isinstance(prev_real, dict):
|
|
6
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
7
|
-
if isinstance(prev_pred, dict):
|
|
8
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
9
|
-
|
|
10
|
-
relative = relative_absolute_error(prev_real, prev_pred)
|
|
11
|
-
|
|
12
|
-
z_relative = (len(prev_real) - 1 + ((1 - min(prev_real)) / min(prev_real))) / len(prev_real)
|
|
13
|
-
|
|
14
|
-
normalized = relative/z_relative
|
|
15
|
-
|
|
16
|
-
return normalized
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from . import absolute_error
|
|
3
|
-
|
|
4
|
-
def relative_absolute_error(prev_real:np.any, prev_pred:np.any):
|
|
5
|
-
if isinstance(prev_real, dict):
|
|
6
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
7
|
-
if isinstance(prev_pred, dict):
|
|
8
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
9
|
-
|
|
10
|
-
relative = (absolute_error(prev_real, prev_pred) / prev_real).mean(axis=-1)
|
|
11
|
-
|
|
12
|
-
return relative
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from .ae import absolute_error
|
|
3
|
-
|
|
4
|
-
def squared_error(prev_real:np.any, prev_pred:np.any):
|
|
5
|
-
if isinstance(prev_real, dict):
|
|
6
|
-
prev_real = np.asarray(list(prev_real.values()))
|
|
7
|
-
if isinstance(prev_pred, dict):
|
|
8
|
-
prev_pred = np.asarray(list(prev_pred.values()))
|
|
9
|
-
|
|
10
|
-
sq_abs_error = ((prev_pred - prev_real) ** 2).mean(axis=-1)
|
|
11
|
-
|
|
12
|
-
return sq_abs_error
|