data-manipulation-utilities 0.2.7__py3-none-any.whl → 0.2.8.dev720__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {data_manipulation_utilities-0.2.7.dist-info → data_manipulation_utilities-0.2.8.dev720.dist-info}/METADATA +669 -42
  2. data_manipulation_utilities-0.2.8.dev720.dist-info/RECORD +45 -0
  3. {data_manipulation_utilities-0.2.7.dist-info → data_manipulation_utilities-0.2.8.dev720.dist-info}/WHEEL +1 -2
  4. data_manipulation_utilities-0.2.8.dev720.dist-info/entry_points.txt +8 -0
  5. dmu/generic/hashing.py +34 -8
  6. dmu/generic/utilities.py +164 -11
  7. dmu/logging/log_store.py +34 -2
  8. dmu/logging/messages.py +96 -0
  9. dmu/ml/cv_classifier.py +3 -3
  10. dmu/ml/cv_diagnostics.py +3 -0
  11. dmu/ml/cv_performance.py +58 -0
  12. dmu/ml/cv_predict.py +149 -46
  13. dmu/ml/train_mva.py +482 -100
  14. dmu/ml/utilities.py +29 -10
  15. dmu/pdataframe/utilities.py +28 -3
  16. dmu/plotting/fwhm.py +2 -2
  17. dmu/plotting/matrix.py +1 -1
  18. dmu/plotting/plotter.py +23 -3
  19. dmu/plotting/plotter_1d.py +96 -32
  20. dmu/plotting/plotter_2d.py +5 -0
  21. dmu/rdataframe/utilities.py +54 -3
  22. dmu/rfile/ddfgetter.py +102 -0
  23. dmu/stats/fit_stats.py +129 -0
  24. dmu/stats/fitter.py +55 -22
  25. dmu/stats/gof_calculator.py +7 -0
  26. dmu/stats/model_factory.py +153 -62
  27. dmu/stats/parameters.py +100 -0
  28. dmu/stats/utilities.py +443 -12
  29. dmu/stats/wdata.py +187 -0
  30. dmu/stats/zfit.py +17 -0
  31. dmu/stats/zfit_plotter.py +147 -36
  32. dmu/testing/utilities.py +102 -24
  33. dmu/workflow/__init__.py +0 -0
  34. dmu/workflow/cache.py +266 -0
  35. data_manipulation_utilities-0.2.7.data/scripts/publish +0 -89
  36. data_manipulation_utilities-0.2.7.dist-info/RECORD +0 -69
  37. data_manipulation_utilities-0.2.7.dist-info/entry_points.txt +0 -6
  38. data_manipulation_utilities-0.2.7.dist-info/top_level.txt +0 -3
  39. dmu_data/ml/tests/diagnostics_from_file.yaml +0 -13
  40. dmu_data/ml/tests/diagnostics_from_model.yaml +0 -10
  41. dmu_data/ml/tests/diagnostics_multiple_methods.yaml +0 -10
  42. dmu_data/ml/tests/diagnostics_overlay.yaml +0 -33
  43. dmu_data/ml/tests/train_mva.yaml +0 -58
  44. dmu_data/ml/tests/train_mva_with_diagnostics.yaml +0 -82
  45. dmu_data/plotting/tests/2d.yaml +0 -24
  46. dmu_data/plotting/tests/fig_size.yaml +0 -13
  47. dmu_data/plotting/tests/high_stat.yaml +0 -22
  48. dmu_data/plotting/tests/legend.yaml +0 -12
  49. dmu_data/plotting/tests/name.yaml +0 -14
  50. dmu_data/plotting/tests/no_bounds.yaml +0 -12
  51. dmu_data/plotting/tests/normalized.yaml +0 -9
  52. dmu_data/plotting/tests/plug_fwhm.yaml +0 -24
  53. dmu_data/plotting/tests/plug_stats.yaml +0 -19
  54. dmu_data/plotting/tests/simple.yaml +0 -9
  55. dmu_data/plotting/tests/stats.yaml +0 -9
  56. dmu_data/plotting/tests/styling.yaml +0 -11
  57. dmu_data/plotting/tests/title.yaml +0 -14
  58. dmu_data/plotting/tests/weights.yaml +0 -13
  59. dmu_data/text/transform.toml +0 -4
  60. dmu_data/text/transform.txt +0 -6
  61. dmu_data/text/transform_set.toml +0 -8
  62. dmu_data/text/transform_set.txt +0 -6
  63. dmu_data/text/transform_trf.txt +0 -12
  64. dmu_scripts/git/publish +0 -89
  65. dmu_scripts/physics/check_truth.py +0 -121
  66. dmu_scripts/rfile/compare_root_files.py +0 -299
  67. dmu_scripts/rfile/print_trees.py +0 -35
  68. dmu_scripts/ssh/coned.py +0 -168
  69. dmu_scripts/text/transform_text.py +0 -46
  70. {dmu_data → dmu}/__init__.py +0 -0
@@ -1,22 +0,0 @@
1
- selection:
2
- max_ran_entries : 50000
3
- cuts:
4
- z : 'z > 0'
5
- saving:
6
- plt_dir : tests/plotting/high_stat
7
- definitions:
8
- z : 'x + y'
9
- plots:
10
- x :
11
- binning : [-5.0, 8.0, 40]
12
- yscale : 'linear'
13
- labels : ['x', 'Entries']
14
- y :
15
- binning : [-5.0, 8.0, 40]
16
- yscale : 'linear'
17
- labels : ['y', 'Entries']
18
- z :
19
- binning : [-5.0, 8.0, 40]
20
- yscale : 'linear'
21
- labels : ['x + y', 'Normalized']
22
- normalized : true
@@ -1,12 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/legend
3
- general:
4
- size : [20, 10]
5
- plots:
6
- x :
7
- binning : [-5.0, 8.0, 40]
8
- y :
9
- binning : [-5.0, 8.0, 40]
10
- style:
11
- legend:
12
- bbox_to_anchor : [1.2, 1]
@@ -1,14 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/name
3
-
4
- plots:
5
- x :
6
- binning : [-5.0, 8.0, 40]
7
- yscale : 'linear'
8
- labels : ['x', 'Entries']
9
- name : 'xvar'
10
- y :
11
- binning : [-5.0, 8.0, 40]
12
- yscale : 'linear'
13
- labels : ['y', 'Entries']
14
- name : 'yvar'
@@ -1,12 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/no_bounds
3
-
4
- plots:
5
- x :
6
- binning : [1, 1, 40]
7
- yscale : 'linear'
8
- labels : ['x', 'Entries']
9
- y :
10
- binning : [1, 1, 40]
11
- yscale : 'linear'
12
- labels : ['y', 'Entries']
@@ -1,9 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/normalized
3
- plots:
4
- x :
5
- normalized : true
6
- binning : [-5.0, 8.0, 40]
7
- y :
8
- normalized : false
9
- binning : [-5.0, 8.0, 40]
@@ -1,24 +0,0 @@
1
- saving:
2
- plt_dir : plotting/pluggins/fwhm
3
- plots:
4
- x :
5
- binning : [-5.0, 8.0, 40]
6
- title : x distribution
7
- y :
8
- binning : [-5.0, 8.0, 40]
9
- title : y distribution
10
- plugin:
11
- fwhm:
12
- x :
13
- plot : true
14
- obs : [-2, 4]
15
- plot : true
16
- format : FWHM={:.3f}
17
- add_std: True
18
- y :
19
- plot : true
20
- obs : [-4, 8]
21
- plot : true
22
- format : FWHM={:.3f}
23
- add_std: True
24
-
@@ -1,19 +0,0 @@
1
- saving:
2
- plt_dir : plotting/pluggins/stats
3
- plots:
4
- x :
5
- binning : [-5.0, 8.0, 40]
6
- title : x distribution
7
- styling:
8
- linestyle : '-'
9
- y :
10
- binning : [-5.0, 8.0, 40]
11
- title : y distribution
12
- styling:
13
- linestyle : '-'
14
- plugin:
15
- stats:
16
- x :
17
- mean : $\mu$={:.2f}
18
- rms : $\sigma$={:.2f}
19
- sum : $\Sigma$={:.0f}
@@ -1,9 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/simple
3
- plots:
4
- x :
5
- binning : [-5.0, 8.0, 40]
6
- title : x distribution
7
- y :
8
- binning : [-5.0, 8.0, 40]
9
- title : y distribution
@@ -1,9 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/stats
3
- plots:
4
- x :
5
- binning : [-5.0, 8.0, 40]
6
- y :
7
- binning : [-5.0, 8.0, 40]
8
- stats:
9
- nentries : '{:.2e}'
@@ -1,11 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/styling
3
- plots:
4
- x :
5
- binning : [-5.0, 8.0, 40]
6
- title : x distribution
7
- styling :
8
- histtype : step
9
- y :
10
- binning : [-5.0, 8.0, 40]
11
- title : y distribution
@@ -1,14 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/title
3
-
4
- plots:
5
- x :
6
- binning : [-5.0, 8.0, 40]
7
- yscale : 'linear'
8
- labels : ['x', 'Entries']
9
- title : 'Title for X plot'
10
- y :
11
- binning : [-5.0, 8.0, 40]
12
- yscale : 'linear'
13
- labels : ['y', 'Entries']
14
- title : 'Title for Y plot'
@@ -1,13 +0,0 @@
1
- saving:
2
- plt_dir : tests/plotting/weights
3
- plots:
4
- x :
5
- weights : weights
6
- binning : [-5.0, 8.0, 40]
7
- yscale : 'linear'
8
- labels : ['x', 'Entries']
9
- y :
10
- weights : weights
11
- binning : [-5.0, 8.0, 40]
12
- yscale : 'linear'
13
- labels : ['y', 'Entries']
@@ -1,4 +0,0 @@
1
- [trf]
2
- [trf.append]
3
- 'primes are'=['2', '3', '5']
4
- 'days are'=['Monday', 'Tuesday', 'Wednesday']
@@ -1,6 +0,0 @@
1
- the
2
- first
3
- primes are
4
- and
5
- the first
6
- days are
@@ -1,8 +0,0 @@
1
- [settings]
2
- as_substring=true
3
- format ='--> {} <--'
4
-
5
- [trf]
6
- [trf.append]
7
- 'primes are'=['2', '3', '5']
8
- 'days are'=['Monday', 'Tuesday', 'Wednesday']
@@ -1,6 +0,0 @@
1
- the
2
- first
3
- primes are:
4
- and
5
- the first
6
- days are:
@@ -1,12 +0,0 @@
1
- the
2
- first
3
- primes are
4
- 2
5
- 3
6
- 5
7
- and
8
- the first
9
- days are
10
- Monday
11
- Tuesday
12
- Wednesday
dmu_scripts/git/publish DELETED
@@ -1,89 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # --------------------------
4
- display_help()
5
- {
6
- echo "Script meant to:"
7
- echo ""
8
- echo "1. Check if version in pyproject.toml has been modified"
9
- echo "2. If it has create new tag following version name"
10
- echo "3. Push to remote "
11
- }
12
- # --------------------------
13
- get_opts()
14
- {
15
- while getopts :hf: option; do
16
- case "${option}" in
17
- h)
18
- display_help
19
- exit 0
20
- ;;
21
- \?) echo "Invalid option: -${OPTARG}"
22
- display_help
23
- exit 1
24
- ;;
25
- :) echo "$0: Arguments needed"
26
- display_help
27
- exit 1
28
- ;;
29
- esac
30
- done
31
- }
32
- # --------------------------
33
- # Picks up version from pyproject.toml
34
- get_version()
35
- {
36
- if [[ ! -f pyproject.toml ]];then
37
- echo "Cannot find pyproject.toml"
38
- exit 1
39
- fi
40
-
41
- VERSION_LINE=$(grep version pyproject.toml)
42
-
43
- if [[ $? -ne 0 ]];then
44
- ehco "Could not extract version from pyproject.toml"
45
- exit 1
46
- fi
47
-
48
- if [[ "$VERSION_LINE" =~ .*([0-9]\.[0-9]\.[0-9]).* ]];then
49
- VERSION=${BASH_REMATCH[1]}
50
- echo "Using version: $VERSION"
51
- return
52
- fi
53
-
54
- echo "Could not extract version from: $VERSION_LINE"
55
- exit 1
56
- }
57
- # --------------------------
58
- create_tag()
59
- {
60
- git tag -n | grep $VERSION
61
-
62
- if [[ $? -eq 0 ]];then
63
- echo "Version found among tags, not tagging"
64
- return
65
- fi
66
-
67
- echo "Version $VERSION not found among tags, creating new tag"
68
-
69
- git tag -a $VERSION
70
- }
71
- # --------------------------
72
- push_all()
73
- {
74
- for REMOTE in $(git remote);do
75
- echo "Pushing tags and commits to remote: $REMOTE"
76
- git add pyproject.toml
77
- git commit -m "Publication commit"
78
-
79
- git pull $REMOTE HEAD
80
- git push -u $REMOTE HEAD
81
- git push $REMOTE --tags
82
- done
83
- }
84
- # --------------------------
85
- get_opts "$@"
86
-
87
- get_version
88
- create_tag
89
- push_all
@@ -1,121 +0,0 @@
1
- '''
2
- Script meant to do truth matching checks
3
- '''
4
- import os
5
- import copy
6
- import argparse
7
-
8
- import yaml
9
- import mplhep
10
- import matplotlib.pyplot as plt
11
-
12
- from ROOT import RDataFrame
13
-
14
- from dmu.logging.log_store import LogStore
15
- from dmu.plotting.plotter_1d import Plotter1D as Plotter
16
-
17
- log=LogStore.add_logger('dmu:physics:check_truth')
18
- # ----------------------------------
19
- def _set_logs() -> None:
20
- LogStore.set_level('dmu:plotting:Plotter' , 30)
21
- LogStore.set_level('dmu:plotting:Plotter1D', 30)
22
- # ----------------------------------
23
- def _get_args() -> argparse.Namespace:
24
- '''
25
- Parse args
26
- '''
27
- parser = argparse.ArgumentParser(description='Script used to carry out checks on truth matching mechanisms for MC')
28
- parser.add_argument('-c', '--conf' , type=str, help='Path to config file', required=True)
29
- args = parser.parse_args()
30
-
31
- return args
32
- # ----------------------------------
33
- def _get_config(args : argparse.Namespace) -> dict:
34
- path = args.conf
35
- if not os.path.isfile(path):
36
- raise FileNotFoundError(f'Cannot find {path}')
37
-
38
- with open(path, encoding='utf-8') as ifile:
39
- cfg = yaml.safe_load(ifile)
40
-
41
- return cfg
42
- # ----------------------------------
43
- def _get_rdf(file_path : str, tree_path : str) -> RDataFrame:
44
- log.debug(f'Picking inputs from: {file_path}/{tree_path}')
45
- rdf = RDataFrame(tree_path, file_path)
46
-
47
- nentries = rdf.Count().GetValue()
48
- log.debug(f'Found {nentries} entries')
49
-
50
- return rdf
51
- # ----------------------------------
52
- def _preprocess_rdf(rdf : RDataFrame, cfg : dict) -> RDataFrame:
53
- if 'max_entries' in cfg:
54
- max_entries = cfg['max_entries']
55
- rdf = rdf.Range(max_entries)
56
-
57
- return rdf
58
- # ----------------------------------
59
- def _check(cfg : dict) -> None:
60
- log.info(110 * '-')
61
- log.info(f'{"Sample":<20}{"Method":<20}{"Initial":<15}{"":<15}{"Final":<15}{"":15}{"Efficiency":<10}')
62
- log.info(110 * '-')
63
-
64
- for sample_name in cfg['samples']:
65
- file_path = cfg['samples'][sample_name]['file_path']
66
- tree_path = cfg['samples'][sample_name]['tree_path']
67
- rdf = _get_rdf(file_path, tree_path)
68
- rdf = _preprocess_rdf(rdf, cfg)
69
-
70
- d_cut_true = {}
71
- d_cut_fake = {}
72
- for method, cut in cfg['samples'][sample_name]['methods'].items():
73
- _check_kind(rdf, sample_name, method, cut)
74
-
75
- d_cut_true[method] = cut
76
- d_cut_fake[method] = f'({cut}) == 0'
77
- log.info('')
78
-
79
- _plot_distributions(cfg, sample_name, rdf, d_cut_true, kind='matched')
80
- _plot_distributions(cfg, sample_name, rdf, d_cut_fake, kind='anti_matched')
81
- # ----------------------------------
82
- def _plot_distributions(cfg : dict, sample_name : str, rdf : RDataFrame, d_cut : dict[str,str], kind : str) -> None:
83
- cfg = copy.deepcopy(cfg)
84
- cfg_plt = cfg['samples'][sample_name]['plot']
85
- cfg_plt = _add_suffix(cfg_plt, sample_name, kind)
86
- d_rdf = { method : rdf.Filter(cut) for method, cut in d_cut.items() }
87
-
88
- ptr=Plotter(d_rdf=d_rdf, cfg=cfg_plt)
89
- ptr.run()
90
- # ----------------------------------
91
- def _add_suffix(cfg : dict, sample_name : str, kind : str) -> dict:
92
- d_var = cfg['plots']
93
- for var in d_var:
94
- d_var[var]['name'] = f'{var}_{kind}'
95
- d_var[var]['title'] = f'{sample_name}; {kind}'
96
-
97
- cfg['plots'] = d_var
98
-
99
- return cfg
100
- # ----------------------------------
101
- def _check_kind(rdf : RDataFrame, sample : str, name : str, cut : str) -> RDataFrame:
102
- nini = rdf.Count().GetValue()
103
- rdf = rdf.Filter(cut, name)
104
- nfnl = rdf.Count().GetValue()
105
- eff = nfnl / nini * 100
106
-
107
- log.info(f'{sample:<20}{name:<20}{nini:<15}{"":<15}{nfnl:<15}{"-->":15}{eff:10.2f}')
108
- # ----------------------------------
109
- def main():
110
- '''
111
- Script starts here
112
- '''
113
- _set_logs()
114
- args = _get_args()
115
- cfg = _get_config(args)
116
- plt.style.use(mplhep.style.LHCb2)
117
-
118
- _check(cfg)
119
- # ----------------------------------
120
- if __name__ == '__main__':
121
- main()