data-manipulation-utilities 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/METADATA +177 -8
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/RECORD +30 -18
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/WHEEL +1 -1
- dmu/generic/hashing.py +44 -0
- dmu/generic/utilities.py +14 -1
- dmu/generic/version_management.py +3 -5
- dmu/ml/cv_diagnostics.py +221 -0
- dmu/ml/train_mva.py +124 -31
- dmu/pdataframe/utilities.py +36 -3
- dmu/plotting/fwhm.py +64 -0
- dmu/plotting/plotter.py +2 -0
- dmu/plotting/plotter_1d.py +87 -6
- dmu/stats/fitter.py +1 -1
- dmu/stats/model_factory.py +189 -25
- dmu/stats/zfit_models.py +68 -0
- dmu/stats/zfit_plotter.py +29 -21
- dmu/testing/utilities.py +31 -4
- dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
- dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
- dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
- dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
- dmu_data/ml/tests/train_mva.yaml +15 -9
- dmu_data/ml/tests/train_mva_with_diagnostics.yaml +82 -0
- dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
- dmu_data/plotting/tests/plug_stats.yaml +19 -0
- dmu_data/plotting/tests/simple.yaml +4 -3
- dmu_data/plotting/tests/styling.yaml +11 -0
- {data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.7.data}/scripts/publish +0 -0
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt +0 -0
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
output : /tmp/tests/dmu/ml/cv_diagnostics/overlay
|
2
|
+
# Will assume that the target is already in the input dataframe
|
3
|
+
# and will use it, instead of evaluating models
|
4
|
+
score_from_rdf : w
|
5
|
+
correlations:
|
6
|
+
# Variables with respect to which the correlations with the features will be measured
|
7
|
+
target :
|
8
|
+
name : z
|
9
|
+
overlay :
|
10
|
+
wp :
|
11
|
+
- 0.2
|
12
|
+
- 0.5
|
13
|
+
- 0.7
|
14
|
+
- 0.9
|
15
|
+
general:
|
16
|
+
size : [12, 10]
|
17
|
+
saving:
|
18
|
+
plt_dir : /tmp/tests/dmu/ml/cv_diagnostics/overlay
|
19
|
+
plots:
|
20
|
+
z :
|
21
|
+
binning : [-4, 4, 10]
|
22
|
+
yscale : 'linear'
|
23
|
+
labels : ['$z$', 'Entries']
|
24
|
+
normalized : true
|
25
|
+
styling :
|
26
|
+
linestyle: '-'
|
27
|
+
methods:
|
28
|
+
- Pearson
|
29
|
+
- Kendall-$\tau$
|
30
|
+
figure:
|
31
|
+
title : Scores from file
|
32
|
+
size : [12, 10]
|
33
|
+
xlabelsize: 30
|
dmu_data/ml/tests/train_mva.yaml
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
dataset:
|
2
|
+
define :
|
3
|
+
r : z + x
|
2
4
|
nan :
|
3
5
|
x : -3
|
4
|
-
y : -3
|
6
|
+
y : -3
|
5
7
|
training :
|
6
8
|
nfold : 3
|
7
|
-
features : [x, y,
|
9
|
+
features : [x, y, r]
|
8
10
|
rdm_stat : 1
|
9
11
|
hyper :
|
10
12
|
loss : log_loss
|
@@ -13,7 +15,7 @@ training :
|
|
13
15
|
learning_rate : 0.1
|
14
16
|
min_samples_split : 2
|
15
17
|
saving:
|
16
|
-
path : '/tmp/dmu/ml/
|
18
|
+
path : '/tmp/tests/dmu/ml/train_mva/model.pkl'
|
17
19
|
plotting:
|
18
20
|
roc :
|
19
21
|
min : [0.0, 0.0]
|
@@ -29,24 +31,28 @@ plotting:
|
|
29
31
|
title : 'Correlation matrix'
|
30
32
|
size : [10, 10]
|
31
33
|
mask_value : 0
|
32
|
-
val_dir : '/tmp/dmu/ml/
|
34
|
+
val_dir : '/tmp/tests/dmu/ml/train_mva'
|
33
35
|
features:
|
34
36
|
saving:
|
35
|
-
plt_dir : '/tmp/dmu/ml/
|
37
|
+
plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
|
36
38
|
plots:
|
39
|
+
r :
|
40
|
+
binning : [-6, 6, 100]
|
41
|
+
yscale : 'linear'
|
42
|
+
labels : ['$r$', '']
|
37
43
|
w :
|
38
44
|
binning : [-4, 4, 100]
|
39
45
|
yscale : 'linear'
|
40
|
-
labels : ['w', '']
|
46
|
+
labels : ['$w$', '']
|
41
47
|
x :
|
42
48
|
binning : [-4, 4, 100]
|
43
49
|
yscale : 'linear'
|
44
|
-
labels : ['x', '']
|
50
|
+
labels : ['$x$', '']
|
45
51
|
y :
|
46
52
|
binning : [-4, 4, 100]
|
47
53
|
yscale : 'linear'
|
48
|
-
labels : ['y', '']
|
54
|
+
labels : ['$y$', '']
|
49
55
|
z :
|
50
56
|
binning : [-4, 4, 100]
|
51
57
|
yscale : 'linear'
|
52
|
-
labels : ['z', '']
|
58
|
+
labels : ['$z$', '']
|
@@ -0,0 +1,82 @@
|
|
1
|
+
dataset:
|
2
|
+
define :
|
3
|
+
r : z + x
|
4
|
+
nan :
|
5
|
+
x : -3
|
6
|
+
y : -3
|
7
|
+
training :
|
8
|
+
nfold : 3
|
9
|
+
features : [x, y, r]
|
10
|
+
rdm_stat : 1
|
11
|
+
hyper :
|
12
|
+
loss : log_loss
|
13
|
+
n_estimators : 100
|
14
|
+
max_depth : 3
|
15
|
+
learning_rate : 0.1
|
16
|
+
min_samples_split : 2
|
17
|
+
saving:
|
18
|
+
path : '/tmp/tests/dmu/ml/train_mva/model.pkl'
|
19
|
+
plotting:
|
20
|
+
roc :
|
21
|
+
min : [0.0, 0.0]
|
22
|
+
max : [1.2, 1.2]
|
23
|
+
annotate:
|
24
|
+
sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
|
25
|
+
form : '{:.2f}'
|
26
|
+
color: 'green'
|
27
|
+
xoff : -15
|
28
|
+
yoff : -15
|
29
|
+
size : 10
|
30
|
+
correlation:
|
31
|
+
title : 'Correlation matrix'
|
32
|
+
size : [10, 10]
|
33
|
+
mask_value : 0
|
34
|
+
val_dir : '/tmp/tests/dmu/ml/train_mva'
|
35
|
+
features:
|
36
|
+
saving:
|
37
|
+
plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
|
38
|
+
plots:
|
39
|
+
r :
|
40
|
+
binning : [-6, 6, 100]
|
41
|
+
yscale : 'linear'
|
42
|
+
labels : ['$r$', '']
|
43
|
+
w :
|
44
|
+
binning : [-4, 4, 100]
|
45
|
+
yscale : 'linear'
|
46
|
+
labels : ['$w$', '']
|
47
|
+
x :
|
48
|
+
binning : [-4, 4, 100]
|
49
|
+
yscale : 'linear'
|
50
|
+
labels : ['$x$', '']
|
51
|
+
y :
|
52
|
+
binning : [-4, 4, 100]
|
53
|
+
yscale : 'linear'
|
54
|
+
labels : ['$y$', '']
|
55
|
+
z :
|
56
|
+
binning : [-4, 4, 100]
|
57
|
+
yscale : 'linear'
|
58
|
+
labels : ['$z$', '']
|
59
|
+
diagnostics:
|
60
|
+
output : /tmp/tests/dmu/ml/train_mva/diagnostics
|
61
|
+
correlations:
|
62
|
+
target :
|
63
|
+
name : z
|
64
|
+
overlay :
|
65
|
+
general:
|
66
|
+
size : [20, 10]
|
67
|
+
saving:
|
68
|
+
plt_dir : /tmp/tests/dmu/ml/train_mva/diagnostics
|
69
|
+
plots:
|
70
|
+
z :
|
71
|
+
binning : [-4, +4, 30]
|
72
|
+
yscale : 'linear'
|
73
|
+
labels : ['z', 'Entries']
|
74
|
+
normalized : true
|
75
|
+
styling :
|
76
|
+
linestyle: '-'
|
77
|
+
methods:
|
78
|
+
- Pearson
|
79
|
+
- Kendall-$\tau$
|
80
|
+
figure:
|
81
|
+
title: Training diagnostics
|
82
|
+
size : [10, 8]
|
@@ -0,0 +1,24 @@
|
|
1
|
+
saving:
|
2
|
+
plt_dir : plotting/pluggins/fwhm
|
3
|
+
plots:
|
4
|
+
x :
|
5
|
+
binning : [-5.0, 8.0, 40]
|
6
|
+
title : x distribution
|
7
|
+
y :
|
8
|
+
binning : [-5.0, 8.0, 40]
|
9
|
+
title : y distribution
|
10
|
+
plugin:
|
11
|
+
fwhm:
|
12
|
+
x :
|
13
|
+
plot : true
|
14
|
+
obs : [-2, 4]
|
15
|
+
plot : true
|
16
|
+
format : FWHM={:.3f}
|
17
|
+
add_std: True
|
18
|
+
y :
|
19
|
+
plot : true
|
20
|
+
obs : [-4, 8]
|
21
|
+
plot : true
|
22
|
+
format : FWHM={:.3f}
|
23
|
+
add_std: True
|
24
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
saving:
|
2
|
+
plt_dir : plotting/pluggins/stats
|
3
|
+
plots:
|
4
|
+
x :
|
5
|
+
binning : [-5.0, 8.0, 40]
|
6
|
+
title : x distribution
|
7
|
+
styling:
|
8
|
+
linestyle : '-'
|
9
|
+
y :
|
10
|
+
binning : [-5.0, 8.0, 40]
|
11
|
+
title : y distribution
|
12
|
+
styling:
|
13
|
+
linestyle : '-'
|
14
|
+
plugin:
|
15
|
+
stats:
|
16
|
+
x :
|
17
|
+
mean : $\mu$={:.2f}
|
18
|
+
rms : $\sigma$={:.2f}
|
19
|
+
sum : $\Sigma$={:.0f}
|
{data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.7.data}/scripts/publish
RENAMED
File without changes
|
File without changes
|
File without changes
|