data-manipulation-utilities 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/METADATA +177 -8
  2. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/RECORD +30 -18
  3. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/WHEEL +1 -1
  4. dmu/generic/hashing.py +44 -0
  5. dmu/generic/utilities.py +14 -1
  6. dmu/generic/version_management.py +3 -5
  7. dmu/ml/cv_diagnostics.py +221 -0
  8. dmu/ml/train_mva.py +124 -31
  9. dmu/pdataframe/utilities.py +36 -3
  10. dmu/plotting/fwhm.py +64 -0
  11. dmu/plotting/plotter.py +2 -0
  12. dmu/plotting/plotter_1d.py +87 -6
  13. dmu/stats/fitter.py +1 -1
  14. dmu/stats/model_factory.py +189 -25
  15. dmu/stats/zfit_models.py +68 -0
  16. dmu/stats/zfit_plotter.py +29 -21
  17. dmu/testing/utilities.py +31 -4
  18. dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
  19. dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
  20. dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
  21. dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
  22. dmu_data/ml/tests/train_mva.yaml +15 -9
  23. dmu_data/ml/tests/train_mva_with_diagnostics.yaml +82 -0
  24. dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
  25. dmu_data/plotting/tests/plug_stats.yaml +19 -0
  26. dmu_data/plotting/tests/simple.yaml +4 -3
  27. dmu_data/plotting/tests/styling.yaml +11 -0
  28. {data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.7.data}/scripts/publish +0 -0
  29. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt +0 -0
  30. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ output : /tmp/tests/dmu/ml/cv_diagnostics/multiple_methods
2
+ correlations:
3
+ # Variables with respect to which the correlations with the features will be measured
4
+ target :
5
+ name : z
6
+ methods:
7
+ - Pearson
8
+ - Kendall-$\tau$
9
+ figure:
10
+ size : [10, 8]
@@ -0,0 +1,33 @@
1
+ output : /tmp/tests/dmu/ml/cv_diagnostics/overlay
2
+ # Will assume that the target is already in the input dataframe
3
+ # and will use it, instead of evaluating models
4
+ score_from_rdf : w
5
+ correlations:
6
+ # Variables with respect to which the correlations with the features will be measured
7
+ target :
8
+ name : z
9
+ overlay :
10
+ wp :
11
+ - 0.2
12
+ - 0.5
13
+ - 0.7
14
+ - 0.9
15
+ general:
16
+ size : [12, 10]
17
+ saving:
18
+ plt_dir : /tmp/tests/dmu/ml/cv_diagnostics/overlay
19
+ plots:
20
+ z :
21
+ binning : [-4, 4, 10]
22
+ yscale : 'linear'
23
+ labels : ['$z$', 'Entries']
24
+ normalized : true
25
+ styling :
26
+ linestyle: '-'
27
+ methods:
28
+ - Pearson
29
+ - Kendall-$\tau$
30
+ figure:
31
+ title : Scores from file
32
+ size : [12, 10]
33
+ xlabelsize: 30
@@ -1,10 +1,12 @@
1
1
  dataset:
2
+ define :
3
+ r : z + x
2
4
  nan :
3
5
  x : -3
4
- y : -3
6
+ y : -3
5
7
  training :
6
8
  nfold : 3
7
- features : [x, y, z]
9
+ features : [x, y, r]
8
10
  rdm_stat : 1
9
11
  hyper :
10
12
  loss : log_loss
@@ -13,7 +15,7 @@ training :
13
15
  learning_rate : 0.1
14
16
  min_samples_split : 2
15
17
  saving:
16
- path : '/tmp/dmu/ml/tests/train_mva/model.pkl'
18
+ path : '/tmp/tests/dmu/ml/train_mva/model.pkl'
17
19
  plotting:
18
20
  roc :
19
21
  min : [0.0, 0.0]
@@ -29,24 +31,28 @@ plotting:
29
31
  title : 'Correlation matrix'
30
32
  size : [10, 10]
31
33
  mask_value : 0
32
- val_dir : '/tmp/dmu/ml/tests/train_mva'
34
+ val_dir : '/tmp/tests/dmu/ml/train_mva'
33
35
  features:
34
36
  saving:
35
- plt_dir : '/tmp/dmu/ml/tests/train_mva/features'
37
+ plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
36
38
  plots:
39
+ r :
40
+ binning : [-6, 6, 100]
41
+ yscale : 'linear'
42
+ labels : ['$r$', '']
37
43
  w :
38
44
  binning : [-4, 4, 100]
39
45
  yscale : 'linear'
40
- labels : ['w', '']
46
+ labels : ['$w$', '']
41
47
  x :
42
48
  binning : [-4, 4, 100]
43
49
  yscale : 'linear'
44
- labels : ['x', '']
50
+ labels : ['$x$', '']
45
51
  y :
46
52
  binning : [-4, 4, 100]
47
53
  yscale : 'linear'
48
- labels : ['y', '']
54
+ labels : ['$y$', '']
49
55
  z :
50
56
  binning : [-4, 4, 100]
51
57
  yscale : 'linear'
52
- labels : ['z', '']
58
+ labels : ['$z$', '']
@@ -0,0 +1,82 @@
1
+ dataset:
2
+ define :
3
+ r : z + x
4
+ nan :
5
+ x : -3
6
+ y : -3
7
+ training :
8
+ nfold : 3
9
+ features : [x, y, r]
10
+ rdm_stat : 1
11
+ hyper :
12
+ loss : log_loss
13
+ n_estimators : 100
14
+ max_depth : 3
15
+ learning_rate : 0.1
16
+ min_samples_split : 2
17
+ saving:
18
+ path : '/tmp/tests/dmu/ml/train_mva/model.pkl'
19
+ plotting:
20
+ roc :
21
+ min : [0.0, 0.0]
22
+ max : [1.2, 1.2]
23
+ annotate:
24
+ sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
25
+ form : '{:.2f}'
26
+ color: 'green'
27
+ xoff : -15
28
+ yoff : -15
29
+ size : 10
30
+ correlation:
31
+ title : 'Correlation matrix'
32
+ size : [10, 10]
33
+ mask_value : 0
34
+ val_dir : '/tmp/tests/dmu/ml/train_mva'
35
+ features:
36
+ saving:
37
+ plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
38
+ plots:
39
+ r :
40
+ binning : [-6, 6, 100]
41
+ yscale : 'linear'
42
+ labels : ['$r$', '']
43
+ w :
44
+ binning : [-4, 4, 100]
45
+ yscale : 'linear'
46
+ labels : ['$w$', '']
47
+ x :
48
+ binning : [-4, 4, 100]
49
+ yscale : 'linear'
50
+ labels : ['$x$', '']
51
+ y :
52
+ binning : [-4, 4, 100]
53
+ yscale : 'linear'
54
+ labels : ['$y$', '']
55
+ z :
56
+ binning : [-4, 4, 100]
57
+ yscale : 'linear'
58
+ labels : ['$z$', '']
59
+ diagnostics:
60
+ output : /tmp/tests/dmu/ml/train_mva/diagnostics
61
+ correlations:
62
+ target :
63
+ name : z
64
+ overlay :
65
+ general:
66
+ size : [20, 10]
67
+ saving:
68
+ plt_dir : /tmp/tests/dmu/ml/train_mva/diagnostics
69
+ plots:
70
+ z :
71
+ binning : [-4, +4, 30]
72
+ yscale : 'linear'
73
+ labels : ['z', 'Entries']
74
+ normalized : true
75
+ styling :
76
+ linestyle: '-'
77
+ methods:
78
+ - Pearson
79
+ - Kendall-$\tau$
80
+ figure:
81
+ title: Training diagnostics
82
+ size : [10, 8]
@@ -0,0 +1,24 @@
1
+ saving:
2
+ plt_dir : plotting/pluggins/fwhm
3
+ plots:
4
+ x :
5
+ binning : [-5.0, 8.0, 40]
6
+ title : x distribution
7
+ y :
8
+ binning : [-5.0, 8.0, 40]
9
+ title : y distribution
10
+ plugin:
11
+ fwhm:
12
+ x :
13
+ plot : true
14
+ obs : [-2, 4]
15
+ plot : true
16
+ format : FWHM={:.3f}
17
+ add_std: True
18
+ y :
19
+ plot : true
20
+ obs : [-4, 8]
21
+ plot : true
22
+ format : FWHM={:.3f}
23
+ add_std: True
24
+
@@ -0,0 +1,19 @@
1
+ saving:
2
+ plt_dir : plotting/pluggins/stats
3
+ plots:
4
+ x :
5
+ binning : [-5.0, 8.0, 40]
6
+ title : x distribution
7
+ styling:
8
+ linestyle : '-'
9
+ y :
10
+ binning : [-5.0, 8.0, 40]
11
+ title : y distribution
12
+ styling:
13
+ linestyle : '-'
14
+ plugin:
15
+ stats:
16
+ x :
17
+ mean : $\mu$={:.2f}
18
+ rms : $\sigma$={:.2f}
19
+ sum : $\Sigma$={:.0f}
@@ -1,8 +1,9 @@
1
1
  saving:
2
2
  plt_dir : tests/plotting/simple
3
-
4
3
  plots:
5
4
  x :
6
- binning : [-5.0, 8.0, 40]
5
+ binning : [-5.0, 8.0, 40]
6
+ title : x distribution
7
7
  y :
8
- binning : [-5.0, 8.0, 40]
8
+ binning : [-5.0, 8.0, 40]
9
+ title : y distribution
@@ -0,0 +1,11 @@
1
+ saving:
2
+ plt_dir : tests/plotting/styling
3
+ plots:
4
+ x :
5
+ binning : [-5.0, 8.0, 40]
6
+ title : x distribution
7
+ styling :
8
+ histtype : step
9
+ y :
10
+ binning : [-5.0, 8.0, 40]
11
+ title : y distribution