data-manipulation-utilities 0.2.8.dev714__py3-none-any.whl → 0.2.8.dev720__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {data_manipulation_utilities-0.2.8.dev714.dist-info → data_manipulation_utilities-0.2.8.dev720.dist-info}/METADATA +33 -3
  2. data_manipulation_utilities-0.2.8.dev720.dist-info/RECORD +45 -0
  3. {data_manipulation_utilities-0.2.8.dev714.dist-info → data_manipulation_utilities-0.2.8.dev720.dist-info}/WHEEL +1 -2
  4. data_manipulation_utilities-0.2.8.dev720.dist-info/entry_points.txt +8 -0
  5. data_manipulation_utilities-0.2.8.dev714.data/scripts/publish +0 -89
  6. data_manipulation_utilities-0.2.8.dev714.dist-info/RECORD +0 -93
  7. data_manipulation_utilities-0.2.8.dev714.dist-info/entry_points.txt +0 -7
  8. data_manipulation_utilities-0.2.8.dev714.dist-info/top_level.txt +0 -3
  9. dmu_data/__init__.py +0 -0
  10. dmu_data/ml/tests/diagnostics_from_file.yaml +0 -13
  11. dmu_data/ml/tests/diagnostics_from_model.yaml +0 -10
  12. dmu_data/ml/tests/diagnostics_multiple_methods.yaml +0 -10
  13. dmu_data/ml/tests/diagnostics_overlay.yaml +0 -33
  14. dmu_data/ml/tests/train_mva.yaml +0 -60
  15. dmu_data/ml/tests/train_mva_def.yaml +0 -75
  16. dmu_data/ml/tests/train_mva_with_diagnostics.yaml +0 -87
  17. dmu_data/ml/tests/train_mva_with_preffix.yaml +0 -58
  18. dmu_data/plotting/tests/2d.yaml +0 -24
  19. dmu_data/plotting/tests/fig_size.yaml +0 -13
  20. dmu_data/plotting/tests/high_stat.yaml +0 -22
  21. dmu_data/plotting/tests/legend.yaml +0 -12
  22. dmu_data/plotting/tests/line.yaml +0 -15
  23. dmu_data/plotting/tests/name.yaml +0 -14
  24. dmu_data/plotting/tests/no_bounds.yaml +0 -12
  25. dmu_data/plotting/tests/normalized.yaml +0 -9
  26. dmu_data/plotting/tests/plug_fwhm.yaml +0 -24
  27. dmu_data/plotting/tests/plug_stats.yaml +0 -19
  28. dmu_data/plotting/tests/simple.yaml +0 -9
  29. dmu_data/plotting/tests/stats.yaml +0 -9
  30. dmu_data/plotting/tests/styling.yaml +0 -18
  31. dmu_data/plotting/tests/title.yaml +0 -14
  32. dmu_data/plotting/tests/weights.yaml +0 -13
  33. dmu_data/rfile/friends.yaml +0 -13
  34. dmu_data/stats/fitter/test_simple.yaml +0 -28
  35. dmu_data/stats/kde_optimizer/control.json +0 -1
  36. dmu_data/stats/kde_optimizer/signal.json +0 -1
  37. dmu_data/stats/parameters/data.yaml +0 -178
  38. dmu_data/tests/config.json +0 -6
  39. dmu_data/tests/config.yaml +0 -4
  40. dmu_data/tests/pdf_to_tex.txt +0 -34
  41. dmu_data/text/transform.toml +0 -4
  42. dmu_data/text/transform.txt +0 -6
  43. dmu_data/text/transform_set.toml +0 -8
  44. dmu_data/text/transform_set.txt +0 -6
  45. dmu_data/text/transform_trf.txt +0 -12
  46. dmu_scripts/git/publish +0 -89
  47. dmu_scripts/kerberos/check_expiration +0 -21
  48. dmu_scripts/kerberos/convert_certificate +0 -22
  49. dmu_scripts/ml/compare_classifiers.py +0 -85
  50. dmu_scripts/physics/check_truth.py +0 -121
  51. dmu_scripts/rfile/compare_root_files.py +0 -299
  52. dmu_scripts/rfile/print_trees.py +0 -35
  53. dmu_scripts/ssh/coned.py +0 -168
  54. dmu_scripts/text/transform_text.py +0 -46
@@ -1,178 +0,0 @@
1
- exp:
2
- c:
3
- val : -0.010
4
- low : -0.020
5
- high: -0.0001
6
- hypexp:
7
- mu:
8
- val : 5000
9
- low : 4000
10
- high: 6000
11
- ap:
12
- val : 0.020
13
- low : 0
14
- high: 0.10
15
- bt:
16
- val : 0.002
17
- low : 0.0001
18
- high: 0.003
19
- modexp:
20
- mu:
21
- val : 4250
22
- low : 4250
23
- high: 4500
24
- ap:
25
- val : 0.002
26
- low : 0.002
27
- high: 0.026
28
- bt:
29
- val : 0.002
30
- low : 0.002
31
- high: 0.020
32
- pol1:
33
- a:
34
- val : -0.005
35
- low : -0.95
36
- high: 0.00
37
- pol2:
38
- a:
39
- val : -0.005
40
- low : -0.95
41
- high: 0.00
42
- b:
43
- val : 0.000
44
- low : -0.95
45
- high: 0.95
46
- pol3:
47
- a:
48
- val : -0.005
49
- low : -0.95
50
- high: 0.00
51
- b:
52
- val : 0.000
53
- low : -0.95
54
- high: 0.95
55
- c:
56
- val : 0.000
57
- low : -0.95
58
- high: 0.95
59
- cbr:
60
- mu:
61
- val : 5300
62
- low : 5100
63
- high: 5500
64
- sg:
65
- val : 50
66
- low : 2
67
- high: 300
68
- ac:
69
- val : -2
70
- low : -14.
71
- high: -0.1
72
- nc:
73
- val : 1
74
- low : 0.5
75
- high: 150
76
- suj:
77
- mu:
78
- val : 4000
79
- low : 3500
80
- high: 5000
81
- sg:
82
- val : 10
83
- low : 2
84
- high: 5000
85
- gm:
86
- val : 1
87
- low : -10
88
- high: 10
89
- dl:
90
- val : 1
91
- low : 0.1
92
- high: 40
93
- cbl:
94
- mu:
95
- val : 5300
96
- low : 5100
97
- high: 5500
98
- sg:
99
- val : 50
100
- low : 2
101
- high: 300
102
- ac:
103
- val : 2
104
- low : 0.0
105
- high: 14.
106
- nc:
107
- val : 1
108
- low : 0.5
109
- high: 150
110
- gauss:
111
- mu:
112
- val : 5300
113
- low : 5100
114
- high: 5500
115
- sg:
116
- val : 50
117
- low : 2
118
- high: 300
119
- dscb:
120
- mu:
121
- val : 5300
122
- low : 5000
123
- high: 5400
124
- sg:
125
- val : 50
126
- low : 5
127
- high: 500
128
- ar:
129
- val : 1
130
- low : 0
131
- high: 5
132
- al:
133
- val : 1
134
- low : 0
135
- high: 5
136
- nr:
137
- val : 2
138
- low : 1
139
- high: 150
140
- nl:
141
- val : 2
142
- low : 0
143
- high: 150
144
- voigt:
145
- mu:
146
- val : 5280
147
- low : 5040
148
- high: 5500
149
- sg:
150
- val : 20
151
- low : 10
152
- high: 400
153
- gm:
154
- val : 4
155
- low : 0.1
156
- high: 100
157
- qgauss:
158
- mu:
159
- val : 5280
160
- low : 5040
161
- high: 5500
162
- sg:
163
- val : 20
164
- low : 10
165
- high: 400
166
- q:
167
- val : 1
168
- low : 1
169
- high: 3
170
- cauchy:
171
- mu:
172
- val : 5280
173
- low : 5040
174
- high: 5500
175
- gm:
176
- val : 150
177
- low : 50
178
- high: 500
@@ -1,6 +0,0 @@
1
- {
2
- "key" : [
3
- "value1",
4
- "value2",
5
- "value3"]
6
- }
@@ -1,4 +0,0 @@
1
- key:
2
- - value1
3
- - value2
4
- - value3
@@ -1,34 +0,0 @@
1
- --------------------
2
- PDF: SumPDF
3
- OBS: <zfit Space obs=('B_M_brem_track_2',), axes=(0,), limits=(array([[4500.]]), array([[7000.]])), binned=False>
4
- Name Value Low HighFloating Constraint
5
- --------------------
6
- ar_dscb_Signal_002_1_reso_flt 1.000e+00 2.000e-01 5.000e+00 1 9.062e-01___1.910e-02
7
- c_exp_cmb_1 -1.000e-02 -2.000e-02 -1.000e-04 1 none
8
- frac_brem_001 3.000e-01 0.000e+00 1.000e+00 1 3.273e-01___1.157e-05
9
- mu_Signal_001_scale_flt 0.000e+00 -1.000e+02 1.000e+02 1 -1.876e+01___7.881e-01
10
- mu_Signal_002_scale_flt 0.000e+00 -1.000e+02 1.000e+02 1 -1.228e+01___4.835e-01
11
- nBu_JpsiK_ee_eq_DPC 0.000e+00 0.000e+00 1.000e+04 1 none
12
- ncmb 0.000e+00 0.000e+00 2.000e+04 1 none
13
- nl_dscb_Signal_001_1_reso_flt 1.000e+00 2.000e-01 5.000e+00 1 8.314e+00___1.204e+00
14
- nr_dscb_Signal_002_1_reso_flt 1.000e+00 2.000e-01 5.000e+00 1 1.770e+00___5.338e-02
15
- nsig 0.000e+00 0.000e+00 1.000e+04 1 none
16
- sBd_Kstee_eq_btosllball05_DPC 0.000e+00 0.000e+00 1.000e+01 1 1.547e-01___1.552e-02
17
- sBu_Kstee_Kpi0_eq_btosllball05_DPC 0.000e+00 0.000e+00 1.000e+01 1 1.698e-01___2.266e-02
18
- sg_Signal_001_reso_flt 1.000e+00 2.000e-01 5.000e+00 1 1.399e+00___2.266e-02
19
- sg_Signal_002_reso_flt 1.000e+00 2.000e-01 5.000e+00 1 1.327e+00___1.401e-02
20
-
21
- al_dscb_Signal_001_1 3.297e-01 0.000e+00 5.000e+00 0 none
22
- al_dscb_Signal_002_1 3.684e-01 0.000e+00 5.000e+00 0 none
23
- ar_dscb_Signal_001_1 1.274e+00 0.000e+00 5.000e+00 0 none
24
- ar_dscb_Signal_002_1 8.425e-01 0.000e+00 5.000e+00 0 none
25
- frc_Signal_001_1 2.911e-02 0.000e+00 1.000e+00 0 none
26
- frc_Signal_002_1 1.074e-01 0.000e+00 1.000e+00 0 none
27
- mu_Signal_001 5.231e+03 5.100e+03 5.500e+03 0 none
28
- mu_Signal_002 5.230e+03 5.100e+03 5.500e+03 0 none
29
- nl_dscb_Signal_001_1 9.130e+00 0.000e+00 1.500e+02 0 none
30
- nl_dscb_Signal_002_1 5.152e+01 0.000e+00 1.500e+02 0 none
31
- nr_dscb_Signal_001_1 2.483e+00 1.000e+00 1.500e+02 0 none
32
- nr_dscb_Signal_002_1 3.801e+00 1.000e+00 1.500e+02 0 none
33
- sg_Signal_001 5.155e+01 2.000e+00 3.000e+02 0 none
34
- sg_Signal_002 7.496e+01 2.000e+00 3.000e+02 0 none
@@ -1,4 +0,0 @@
1
- [trf]
2
- [trf.append]
3
- 'primes are'=['2', '3', '5']
4
- 'days are'=['Monday', 'Tuesday', 'Wednesday']
@@ -1,6 +0,0 @@
1
- the
2
- first
3
- primes are
4
- and
5
- the first
6
- days are
@@ -1,8 +0,0 @@
1
- [settings]
2
- as_substring=true
3
- format ='--> {} <--'
4
-
5
- [trf]
6
- [trf.append]
7
- 'primes are'=['2', '3', '5']
8
- 'days are'=['Monday', 'Tuesday', 'Wednesday']
@@ -1,6 +0,0 @@
1
- the
2
- first
3
- primes are:
4
- and
5
- the first
6
- days are:
@@ -1,12 +0,0 @@
1
- the
2
- first
3
- primes are
4
- 2
5
- 3
6
- 5
7
- and
8
- the first
9
- days are
10
- Monday
11
- Tuesday
12
- Wednesday
dmu_scripts/git/publish DELETED
@@ -1,89 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # --------------------------
4
- display_help()
5
- {
6
- echo "Script meant to:"
7
- echo ""
8
- echo "1. Check if version in pyproject.toml has been modified"
9
- echo "2. If it has create new tag following version name"
10
- echo "3. Push to remote "
11
- }
12
- # --------------------------
13
- get_opts()
14
- {
15
- while getopts :hf: option; do
16
- case "${option}" in
17
- h)
18
- display_help
19
- exit 0
20
- ;;
21
- \?) echo "Invalid option: -${OPTARG}"
22
- display_help
23
- exit 1
24
- ;;
25
- :) echo "$0: Arguments needed"
26
- display_help
27
- exit 1
28
- ;;
29
- esac
30
- done
31
- }
32
- # --------------------------
33
- # Picks up version from pyproject.toml
34
- get_version()
35
- {
36
- if [[ ! -f pyproject.toml ]];then
37
- echo "Cannot find pyproject.toml"
38
- exit 1
39
- fi
40
-
41
- VERSION_LINE=$(grep version pyproject.toml)
42
-
43
- if [[ $? -ne 0 ]];then
44
- ehco "Could not extract version from pyproject.toml"
45
- exit 1
46
- fi
47
-
48
- if [[ "$VERSION_LINE" =~ .*([0-9]\.[0-9]\.[0-9]).* ]];then
49
- VERSION=${BASH_REMATCH[1]}
50
- echo "Using version: $VERSION"
51
- return
52
- fi
53
-
54
- echo "Could not extract version from: $VERSION_LINE"
55
- exit 1
56
- }
57
- # --------------------------
58
- create_tag()
59
- {
60
- git tag -n | grep $VERSION
61
-
62
- if [[ $? -eq 0 ]];then
63
- echo "Version found among tags, not tagging"
64
- return
65
- fi
66
-
67
- echo "Version $VERSION not found among tags, creating new tag"
68
-
69
- git tag -a $VERSION
70
- }
71
- # --------------------------
72
- push_all()
73
- {
74
- for REMOTE in $(git remote);do
75
- echo "Pushing tags and commits to remote: $REMOTE"
76
- git add pyproject.toml
77
- git commit -m "Publication commit"
78
-
79
- git pull $REMOTE HEAD
80
- git push -u $REMOTE HEAD
81
- git push $REMOTE --tags
82
- done
83
- }
84
- # --------------------------
85
- get_opts "$@"
86
-
87
- get_version
88
- create_tag
89
- push_all
@@ -1,21 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- : '
4
- This script is meant to check the expiration date of a grid certificate
5
-
6
- Usage:
7
-
8
- ./check_expiration
9
- '
10
-
11
- check()
12
- {
13
- PEMFILE=$1
14
- if [[ ! -f $PEMFILE ]];then
15
- echo "Cannot find PEM file: $PEMFILE"
16
- fi
17
-
18
- openssl x509 -enddate -noout -in $PEMFILE
19
- }
20
-
21
- check usercert.pem
@@ -1,22 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- : '
4
- This script is used to convert p12 grid certificate files into PEM files
5
-
6
- Usage:
7
-
8
- ./convert_certificate cert.p12
9
- '
10
-
11
- CERTIFICATE=$1
12
-
13
- if [[ ! -f $CERTIFICATE ]];then
14
- echo "ERROR::Certificate \"$CERTIFICATE\" does not exist."
15
- kill -INT $$
16
- fi
17
-
18
- openssl pkcs12 -in $CERTIFICATE -clcerts -nokeys -out usercert.pem
19
- openssl pkcs12 -in $CERTIFICATE -nocerts -out userkey.pem
20
- chmod 400 userkey.pem
21
- chmod 444 usercert.pem
22
-
@@ -1,85 +0,0 @@
1
- '''
2
- Script used to compare performance of classifiers
3
- '''
4
- import os
5
- import argparse
6
- import yaml
7
- import mplhep
8
- import matplotlib.pyplot as plt
9
- import pandas as pnd
10
-
11
- from sklearn.metrics import auc
12
- from dmu.logging.log_store import LogStore
13
-
14
- log=LogStore.add_logger('dmu:ml:compare_classifiers')
15
- # ------------------------------
16
- class Data:
17
- '''
18
- Data class
19
- '''
20
- out_path : str
21
- cfg_path : str
22
- logl : int
23
- cfg : dict
24
-
25
- plt.style.use(mplhep.style.LHCb2)
26
- # ------------------------------
27
- def _initialize() -> None:
28
- log.info(f'Loading settings from: {Data.cfg_path}')
29
- with open(Data.cfg_path, encoding='utf-8') as ifile:
30
- Data.cfg = yaml.safe_load(ifile)
31
-
32
- Data.out_path = Data.cfg['out_dir']
33
- os.makedirs(Data.out_path, exist_ok=True)
34
- # ------------------------------
35
- def _parse_args():
36
- parser = argparse.ArgumentParser(description='Used to perform comparisons of classifier performances')
37
- parser.add_argument('-c', '--conf' , help='Path to configuration path', required=True)
38
- parser.add_argument('-l', '--logl' , help='Logging level', choices=[10, 20, 30], default=20)
39
- args = parser.parse_args()
40
-
41
- Data.cfg_path = args.conf
42
- Data.logl = args.logl
43
- # ------------------------------
44
- def _plot_roc(name : str, path : str) -> None:
45
- roc_path = f'{path}/fold_all/roc.json'
46
- df = pnd.read_json(roc_path)
47
-
48
- plt.figure(num='ROC')
49
- xval = df['x'].to_numpy()
50
- yval = df['y'].to_numpy()
51
- area = auc(xval, yval)
52
-
53
- plt.plot(xval, yval, label=f'{name}: {area:.3f}')
54
- # ------------------------------
55
- def _compare():
56
- for name, cls_path in Data.cfg['classifiers'].items():
57
- _plot_roc(name=name, path=cls_path)
58
-
59
- _save_roc()
60
- # ------------------------------
61
- def _save_roc():
62
- d_set = Data.cfg['roc']
63
- if 'xrange' in d_set:
64
- plt.xlim(d_set['xrange'])
65
-
66
- if 'yrange' in d_set:
67
- plt.ylim(d_set['yrange'])
68
-
69
- plt.figure(num='ROC')
70
- plt.legend()
71
- plt.grid()
72
- plt.xlabel('Signal Efficiency')
73
- plt.ylabel('Background Rejection')
74
- plt.savefig(f'{Data.out_path}/roc.png')
75
- # ------------------------------
76
- def main():
77
- '''
78
- Start here
79
- '''
80
- _parse_args()
81
- _initialize()
82
- _compare()
83
- # ------------------------------
84
- if __name__ == '__main__':
85
- main()
@@ -1,121 +0,0 @@
1
- '''
2
- Script meant to do truth matching checks
3
- '''
4
- import os
5
- import copy
6
- import argparse
7
-
8
- import yaml
9
- import mplhep
10
- import matplotlib.pyplot as plt
11
-
12
- from ROOT import RDataFrame
13
-
14
- from dmu.logging.log_store import LogStore
15
- from dmu.plotting.plotter_1d import Plotter1D as Plotter
16
-
17
- log=LogStore.add_logger('dmu:physics:check_truth')
18
- # ----------------------------------
19
- def _set_logs() -> None:
20
- LogStore.set_level('dmu:plotting:Plotter' , 30)
21
- LogStore.set_level('dmu:plotting:Plotter1D', 30)
22
- # ----------------------------------
23
- def _get_args() -> argparse.Namespace:
24
- '''
25
- Parse args
26
- '''
27
- parser = argparse.ArgumentParser(description='Script used to carry out checks on truth matching mechanisms for MC')
28
- parser.add_argument('-c', '--conf' , type=str, help='Path to config file', required=True)
29
- args = parser.parse_args()
30
-
31
- return args
32
- # ----------------------------------
33
- def _get_config(args : argparse.Namespace) -> dict:
34
- path = args.conf
35
- if not os.path.isfile(path):
36
- raise FileNotFoundError(f'Cannot find {path}')
37
-
38
- with open(path, encoding='utf-8') as ifile:
39
- cfg = yaml.safe_load(ifile)
40
-
41
- return cfg
42
- # ----------------------------------
43
- def _get_rdf(file_path : str, tree_path : str) -> RDataFrame:
44
- log.debug(f'Picking inputs from: {file_path}/{tree_path}')
45
- rdf = RDataFrame(tree_path, file_path)
46
-
47
- nentries = rdf.Count().GetValue()
48
- log.debug(f'Found {nentries} entries')
49
-
50
- return rdf
51
- # ----------------------------------
52
- def _preprocess_rdf(rdf : RDataFrame, cfg : dict) -> RDataFrame:
53
- if 'max_entries' in cfg:
54
- max_entries = cfg['max_entries']
55
- rdf = rdf.Range(max_entries)
56
-
57
- return rdf
58
- # ----------------------------------
59
- def _check(cfg : dict) -> None:
60
- log.info(110 * '-')
61
- log.info(f'{"Sample":<20}{"Method":<20}{"Initial":<15}{"":<15}{"Final":<15}{"":15}{"Efficiency":<10}')
62
- log.info(110 * '-')
63
-
64
- for sample_name in cfg['samples']:
65
- file_path = cfg['samples'][sample_name]['file_path']
66
- tree_path = cfg['samples'][sample_name]['tree_path']
67
- rdf = _get_rdf(file_path, tree_path)
68
- rdf = _preprocess_rdf(rdf, cfg)
69
-
70
- d_cut_true = {}
71
- d_cut_fake = {}
72
- for method, cut in cfg['samples'][sample_name]['methods'].items():
73
- _check_kind(rdf, sample_name, method, cut)
74
-
75
- d_cut_true[method] = cut
76
- d_cut_fake[method] = f'({cut}) == 0'
77
- log.info('')
78
-
79
- _plot_distributions(cfg, sample_name, rdf, d_cut_true, kind='matched')
80
- _plot_distributions(cfg, sample_name, rdf, d_cut_fake, kind='anti_matched')
81
- # ----------------------------------
82
- def _plot_distributions(cfg : dict, sample_name : str, rdf : RDataFrame, d_cut : dict[str,str], kind : str) -> None:
83
- cfg = copy.deepcopy(cfg)
84
- cfg_plt = cfg['samples'][sample_name]['plot']
85
- cfg_plt = _add_suffix(cfg_plt, sample_name, kind)
86
- d_rdf = { method : rdf.Filter(cut) for method, cut in d_cut.items() }
87
-
88
- ptr=Plotter(d_rdf=d_rdf, cfg=cfg_plt)
89
- ptr.run()
90
- # ----------------------------------
91
- def _add_suffix(cfg : dict, sample_name : str, kind : str) -> dict:
92
- d_var = cfg['plots']
93
- for var in d_var:
94
- d_var[var]['name'] = f'{var}_{kind}'
95
- d_var[var]['title'] = f'{sample_name}; {kind}'
96
-
97
- cfg['plots'] = d_var
98
-
99
- return cfg
100
- # ----------------------------------
101
- def _check_kind(rdf : RDataFrame, sample : str, name : str, cut : str) -> RDataFrame:
102
- nini = rdf.Count().GetValue()
103
- rdf = rdf.Filter(cut, name)
104
- nfnl = rdf.Count().GetValue()
105
- eff = nfnl / nini * 100
106
-
107
- log.info(f'{sample:<20}{name:<20}{nini:<15}{"":<15}{nfnl:<15}{"-->":15}{eff:10.2f}')
108
- # ----------------------------------
109
- def main():
110
- '''
111
- Script starts here
112
- '''
113
- _set_logs()
114
- args = _get_args()
115
- cfg = _get_config(args)
116
- plt.style.use(mplhep.style.LHCb2)
117
-
118
- _check(cfg)
119
- # ----------------------------------
120
- if __name__ == '__main__':
121
- main()