data-manipulation-utilities 0.2.8.dev714__py3-none-any.whl → 0.2.8.dev725__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.2.8.dev714.dist-info → data_manipulation_utilities-0.2.8.dev725.dist-info}/METADATA +33 -3
- {data_manipulation_utilities-0.2.8.dev714.dist-info → data_manipulation_utilities-0.2.8.dev725.dist-info}/RECORD +5 -16
- {data_manipulation_utilities-0.2.8.dev714.dist-info → data_manipulation_utilities-0.2.8.dev725.dist-info}/WHEEL +1 -2
- data_manipulation_utilities-0.2.8.dev725.dist-info/entry_points.txt +8 -0
- dmu/plotting/plotter.py +23 -11
- data_manipulation_utilities-0.2.8.dev714.data/scripts/publish +0 -89
- data_manipulation_utilities-0.2.8.dev714.dist-info/entry_points.txt +0 -7
- data_manipulation_utilities-0.2.8.dev714.dist-info/top_level.txt +0 -3
- dmu_scripts/git/publish +0 -89
- dmu_scripts/kerberos/check_expiration +0 -21
- dmu_scripts/kerberos/convert_certificate +0 -22
- dmu_scripts/ml/compare_classifiers.py +0 -85
- dmu_scripts/physics/check_truth.py +0 -121
- dmu_scripts/rfile/compare_root_files.py +0 -299
- dmu_scripts/rfile/print_trees.py +0 -35
- dmu_scripts/ssh/coned.py +0 -168
- dmu_scripts/text/transform_text.py +0 -46
@@ -1,7 +1,36 @@
|
|
1
|
-
Metadata-Version: 2.
|
2
|
-
Name:
|
3
|
-
Version: 0.2.8.
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: data-manipulation-utilities
|
3
|
+
Version: 0.2.8.dev725
|
4
4
|
Summary: Project storing utilities needed to reduce boilerplate code when analyzing data
|
5
|
+
Requires-Python: >=3.10,<3.13
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
7
|
+
Classifier: Programming Language :: Python :: 3.10
|
8
|
+
Classifier: Programming Language :: Python :: 3.11
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
10
|
+
Provides-Extra: dev
|
11
|
+
Provides-Extra: fit
|
12
|
+
Provides-Extra: ml
|
13
|
+
Requires-Dist: PyYAML
|
14
|
+
Requires-Dist: awkward
|
15
|
+
Requires-Dist: awkward-pandas
|
16
|
+
Requires-Dist: dask[dataframe,distributed]
|
17
|
+
Requires-Dist: hist[plot]
|
18
|
+
Requires-Dist: joblib ; extra == "ml"
|
19
|
+
Requires-Dist: logzero
|
20
|
+
Requires-Dist: matplotlib
|
21
|
+
Requires-Dist: mplhep
|
22
|
+
Requires-Dist: numpy
|
23
|
+
Requires-Dist: omegaconf
|
24
|
+
Requires-Dist: optuna ; extra == "ml"
|
25
|
+
Requires-Dist: pandas
|
26
|
+
Requires-Dist: pytest ; extra == "dev"
|
27
|
+
Requires-Dist: scikit-learn ; extra == "ml"
|
28
|
+
Requires-Dist: scipy
|
29
|
+
Requires-Dist: tensorflow
|
30
|
+
Requires-Dist: toml
|
31
|
+
Requires-Dist: tqdm
|
32
|
+
Requires-Dist: uproot
|
33
|
+
Requires-Dist: zfit (==0.26.0) ; extra == "fit"
|
5
34
|
Description-Content-Type: text/markdown
|
6
35
|
|
7
36
|
[TOC]
|
@@ -1793,3 +1822,4 @@ lxplus:
|
|
1793
1822
|
```
|
1794
1823
|
|
1795
1824
|
and should be placed in `$HOME/.config/dmu/ssh/servers.yaml`
|
1825
|
+
|
@@ -1,4 +1,3 @@
|
|
1
|
-
data_manipulation_utilities-0.2.8.dev714.data/scripts/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
|
2
1
|
dmu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
2
|
dmu/arrays/utilities.py,sha256=PKoYyybPptA2aU-V3KLnJXBudWxTXu4x1uGdIMQ49HY,1722
|
4
3
|
dmu/generic/hashing.py,sha256=QR5Gbv6-ANvi5hL232UNMrw9DONpU27BWTynXGxQLGU,1806
|
@@ -15,7 +14,7 @@ dmu/ml/utilities.py,sha256=A9j3tBh-jfaFdwwLUleo1QnttfawN7XDiQRh4VTvqVY,4597
|
|
15
14
|
dmu/pdataframe/utilities.py,sha256=xl6iLVKUccqVXYjuHsDUZ6UrCKQPw1k8D-f6407Yq30,2742
|
16
15
|
dmu/plotting/fwhm.py,sha256=4e8n6624pxWLcOOtayCQ_hDSSMKU21-3UsdmbkX1ojk,1949
|
17
16
|
dmu/plotting/matrix.py,sha256=s_5W8O3yXF3u8OX3f4J4hCoxIVZt1TF8S-qJsFBh2Go,5005
|
18
|
-
dmu/plotting/plotter.py,sha256=
|
17
|
+
dmu/plotting/plotter.py,sha256=5N5mLdQAqOUs43ukX5mT9nRaYD5dkn_sED5NoZJV5A0,8483
|
19
18
|
dmu/plotting/plotter_1d.py,sha256=Kyoyh-QyZLXXqX19wqEDUWCD1nJEvEonGp9nlgEaoZE,10936
|
20
19
|
dmu/plotting/plotter_2d.py,sha256=dXC-7Rsquibe5cn7622ryoKpuv7KCAmouIIXwQ_VEFM,3172
|
21
20
|
dmu/plotting/utilities.py,sha256=SI9dvtZq2gr-PXVz71KE4o0i09rZOKgqJKD1jzf6KXk,1167
|
@@ -77,17 +76,7 @@ dmu_data/text/transform.txt,sha256=EX760da6Vkf-_EPxnQlC5hGSkfFhJCCGCD19NU-1Qto,4
|
|
77
76
|
dmu_data/text/transform_set.toml,sha256=Jeh7BTz82idqvbOQJtl9-ur56mZkzDn5WtvmIb48LoE,150
|
78
77
|
dmu_data/text/transform_set.txt,sha256=1KivMoP9LxPn9955QrRmOzjEqduEjhTetQ9MXykO5LY,46
|
79
78
|
dmu_data/text/transform_trf.txt,sha256=zxBRTgcSmX7RdqfmWF88W1YqbyNHa4Ccruf1MmnYv2A,74
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
dmu_scripts/physics/check_truth.py,sha256=b1P_Pa9ef6VcFtyY6Y9KS9Om9L-QrCBjDKp4dqca0PQ,3964
|
85
|
-
dmu_scripts/rfile/compare_root_files.py,sha256=T8lDnQxsRNMr37x1Y7YvWD8ySHrJOWZki7ZQynxXX9Q,9540
|
86
|
-
dmu_scripts/rfile/print_trees.py,sha256=Ze4Ccl_iUldl4eVEDVnYBoe4amqBT1fSBR1zN5WSztk,941
|
87
|
-
dmu_scripts/ssh/coned.py,sha256=lhilYNHWRCGxC-jtyJ3LQ4oUgWW33B2l1tYCcyHHsR0,4858
|
88
|
-
dmu_scripts/text/transform_text.py,sha256=9akj1LB0HAyopOvkLjNOJiptZw5XoOQLe17SlcrGMD0,1456
|
89
|
-
data_manipulation_utilities-0.2.8.dev714.dist-info/METADATA,sha256=M5n-tPUt3o_0kY4viuQj6lbP4JQxWhpxkSnWCW29PFg,50263
|
90
|
-
data_manipulation_utilities-0.2.8.dev714.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
91
|
-
data_manipulation_utilities-0.2.8.dev714.dist-info/entry_points.txt,sha256=-02cr8ibY6L_reX-_Owz2N7OUQyTAwydRIvLr9kKZK0,332
|
92
|
-
data_manipulation_utilities-0.2.8.dev714.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
|
93
|
-
data_manipulation_utilities-0.2.8.dev714.dist-info/RECORD,,
|
79
|
+
data_manipulation_utilities-0.2.8.dev725.dist-info/METADATA,sha256=_4bxAW7aoKgPY3H1rKhp626lTEYrD2UWrwZX9avU750,51153
|
80
|
+
data_manipulation_utilities-0.2.8.dev725.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
81
|
+
data_manipulation_utilities-0.2.8.dev725.dist-info/entry_points.txt,sha256=M0C8_u9B_xSmyfemdPwdIBh9QuPIkjhEpG060Y5_Pjw,321
|
82
|
+
data_manipulation_utilities-0.2.8.dev725.dist-info/RECORD,,
|
@@ -0,0 +1,8 @@
|
|
1
|
+
[console_scripts]
|
2
|
+
check_truth=dmu_scripts.physics.check_truth:main
|
3
|
+
compare_classifiers=dmu_scripts.ml.compare_classifiers:main
|
4
|
+
compare_root_files=dmu_scripts.rfile.compare_root_files:main
|
5
|
+
coned=dmu_scripts.ssh.coned:main
|
6
|
+
print_trees=dmu_scripts.rfile.print_trees:main
|
7
|
+
transform_text=dmu_scripts.text.transform_text:main
|
8
|
+
|
dmu/plotting/plotter.py
CHANGED
@@ -5,12 +5,12 @@ Module containing plotter class
|
|
5
5
|
import os
|
6
6
|
import json
|
7
7
|
import math
|
8
|
-
from typing import Union
|
9
8
|
|
10
9
|
import numpy
|
11
10
|
import matplotlib.pyplot as plt
|
12
11
|
|
13
|
-
from ROOT import RDataFrame
|
12
|
+
from ROOT import RDataFrame, RDF
|
13
|
+
from omegaconf import DictConfig
|
14
14
|
from dmu.logging.log_store import LogStore
|
15
15
|
|
16
16
|
log = LogStore.add_logger('dmu:plotting:Plotter')
|
@@ -20,16 +20,28 @@ class Plotter:
|
|
20
20
|
Base class of Plotter1D and Plotter2D
|
21
21
|
'''
|
22
22
|
#-------------------------------------
|
23
|
-
def __init__(
|
24
|
-
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
d_rdf: dict|None =None,
|
26
|
+
cfg : dict|DictConfig|None =None):
|
27
|
+
'''
|
28
|
+
Parameters
|
29
|
+
--------------
|
30
|
+
d_rdf: Dictionary where
|
31
|
+
key : Identifier of dataset
|
32
|
+
value: ROOT dataframe representing dataset
|
33
|
+
|
34
|
+
cfg : Dictionary or DictConfig instance holding configuration
|
35
|
+
'''
|
36
|
+
if not isinstance( cfg, (dict,DictConfig)):
|
25
37
|
raise ValueError('Config dictionary not passed')
|
26
38
|
|
27
39
|
if not isinstance(d_rdf, dict):
|
28
40
|
raise ValueError('Dataframe dictionary not passed')
|
29
41
|
|
30
42
|
self._d_cfg = cfg
|
31
|
-
self._d_rdf : dict[str,
|
32
|
-
self._d_wgt :
|
43
|
+
self._d_rdf : dict[str, RDF.RNode] = { name : self._preprocess_rdf(rdf) for name, rdf in d_rdf.items()}
|
44
|
+
self._d_wgt : dict[str, numpy.ndarray|None] | None
|
33
45
|
|
34
46
|
self._title : str = ''
|
35
47
|
#-------------------------------------
|
@@ -68,9 +80,9 @@ class Plotter:
|
|
68
80
|
|
69
81
|
return minx, maxx
|
70
82
|
#-------------------------------------
|
71
|
-
def _preprocess_rdf(self, rdf :
|
83
|
+
def _preprocess_rdf(self, rdf : RDF.RNode) -> RDF.RNode:
|
72
84
|
'''
|
73
|
-
rdf (
|
85
|
+
rdf (RDF.RNode): ROOT dataframe
|
74
86
|
|
75
87
|
returns preprocessed dataframe
|
76
88
|
'''
|
@@ -146,7 +158,7 @@ class Plotter:
|
|
146
158
|
|
147
159
|
return rdf
|
148
160
|
# --------------------------------------------
|
149
|
-
def _print_weights(self, arr_wgt :
|
161
|
+
def _print_weights(self, arr_wgt : numpy.ndarray|None, var : str, sample : str) -> None:
|
150
162
|
if arr_wgt is None:
|
151
163
|
log.debug(f'Not using weights for {sample}:{var}')
|
152
164
|
return
|
@@ -171,7 +183,7 @@ class Plotter:
|
|
171
183
|
|
172
184
|
return fig_size
|
173
185
|
#-------------------------------------
|
174
|
-
def _get_weights(self, var) ->
|
186
|
+
def _get_weights(self, var) -> dict[str, numpy.ndarray|None]| None:
|
175
187
|
d_cfg = self._d_cfg['plots'][var]
|
176
188
|
if 'weights' not in d_cfg:
|
177
189
|
return None
|
@@ -186,7 +198,7 @@ class Plotter:
|
|
186
198
|
|
187
199
|
return d_weight
|
188
200
|
# --------------------------------------------
|
189
|
-
def _read_weights(self, name : str, rdf :
|
201
|
+
def _read_weights(self, name : str, rdf : RDF.RNode) -> numpy.ndarray:
|
190
202
|
v_col = rdf.GetColumnNames()
|
191
203
|
l_col = [ col.c_str() for col in v_col ]
|
192
204
|
|
@@ -1,89 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
|
3
|
-
# --------------------------
|
4
|
-
display_help()
|
5
|
-
{
|
6
|
-
echo "Script meant to:"
|
7
|
-
echo ""
|
8
|
-
echo "1. Check if version in pyproject.toml has been modified"
|
9
|
-
echo "2. If it has create new tag following version name"
|
10
|
-
echo "3. Push to remote "
|
11
|
-
}
|
12
|
-
# --------------------------
|
13
|
-
get_opts()
|
14
|
-
{
|
15
|
-
while getopts :hf: option; do
|
16
|
-
case "${option}" in
|
17
|
-
h)
|
18
|
-
display_help
|
19
|
-
exit 0
|
20
|
-
;;
|
21
|
-
\?) echo "Invalid option: -${OPTARG}"
|
22
|
-
display_help
|
23
|
-
exit 1
|
24
|
-
;;
|
25
|
-
:) echo "$0: Arguments needed"
|
26
|
-
display_help
|
27
|
-
exit 1
|
28
|
-
;;
|
29
|
-
esac
|
30
|
-
done
|
31
|
-
}
|
32
|
-
# --------------------------
|
33
|
-
# Picks up version from pyproject.toml
|
34
|
-
get_version()
|
35
|
-
{
|
36
|
-
if [[ ! -f pyproject.toml ]];then
|
37
|
-
echo "Cannot find pyproject.toml"
|
38
|
-
exit 1
|
39
|
-
fi
|
40
|
-
|
41
|
-
VERSION_LINE=$(grep version pyproject.toml)
|
42
|
-
|
43
|
-
if [[ $? -ne 0 ]];then
|
44
|
-
ehco "Could not extract version from pyproject.toml"
|
45
|
-
exit 1
|
46
|
-
fi
|
47
|
-
|
48
|
-
if [[ "$VERSION_LINE" =~ .*([0-9]\.[0-9]\.[0-9]).* ]];then
|
49
|
-
VERSION=${BASH_REMATCH[1]}
|
50
|
-
echo "Using version: $VERSION"
|
51
|
-
return
|
52
|
-
fi
|
53
|
-
|
54
|
-
echo "Could not extract version from: $VERSION_LINE"
|
55
|
-
exit 1
|
56
|
-
}
|
57
|
-
# --------------------------
|
58
|
-
create_tag()
|
59
|
-
{
|
60
|
-
git tag -n | grep $VERSION
|
61
|
-
|
62
|
-
if [[ $? -eq 0 ]];then
|
63
|
-
echo "Version found among tags, not tagging"
|
64
|
-
return
|
65
|
-
fi
|
66
|
-
|
67
|
-
echo "Version $VERSION not found among tags, creating new tag"
|
68
|
-
|
69
|
-
git tag -a $VERSION
|
70
|
-
}
|
71
|
-
# --------------------------
|
72
|
-
push_all()
|
73
|
-
{
|
74
|
-
for REMOTE in $(git remote);do
|
75
|
-
echo "Pushing tags and commits to remote: $REMOTE"
|
76
|
-
git add pyproject.toml
|
77
|
-
git commit -m "Publication commit"
|
78
|
-
|
79
|
-
git pull $REMOTE HEAD
|
80
|
-
git push -u $REMOTE HEAD
|
81
|
-
git push $REMOTE --tags
|
82
|
-
done
|
83
|
-
}
|
84
|
-
# --------------------------
|
85
|
-
get_opts "$@"
|
86
|
-
|
87
|
-
get_version
|
88
|
-
create_tag
|
89
|
-
push_all
|
@@ -1,7 +0,0 @@
|
|
1
|
-
[console_scripts]
|
2
|
-
check_truth = dmu_scripts.physics.check_truth:main
|
3
|
-
compare_classifiers = dmu_scripts.ml.compare_classifiers:main
|
4
|
-
compare_root_files = dmu_scripts.rfile.compare_root_files:main
|
5
|
-
coned = dmu_scripts.ssh.coned:main
|
6
|
-
print_trees = dmu_scripts.rfile.print_trees:main
|
7
|
-
transform_text = dmu_scripts.text.transform_text:main
|
dmu_scripts/git/publish
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
|
3
|
-
# --------------------------
|
4
|
-
display_help()
|
5
|
-
{
|
6
|
-
echo "Script meant to:"
|
7
|
-
echo ""
|
8
|
-
echo "1. Check if version in pyproject.toml has been modified"
|
9
|
-
echo "2. If it has create new tag following version name"
|
10
|
-
echo "3. Push to remote "
|
11
|
-
}
|
12
|
-
# --------------------------
|
13
|
-
get_opts()
|
14
|
-
{
|
15
|
-
while getopts :hf: option; do
|
16
|
-
case "${option}" in
|
17
|
-
h)
|
18
|
-
display_help
|
19
|
-
exit 0
|
20
|
-
;;
|
21
|
-
\?) echo "Invalid option: -${OPTARG}"
|
22
|
-
display_help
|
23
|
-
exit 1
|
24
|
-
;;
|
25
|
-
:) echo "$0: Arguments needed"
|
26
|
-
display_help
|
27
|
-
exit 1
|
28
|
-
;;
|
29
|
-
esac
|
30
|
-
done
|
31
|
-
}
|
32
|
-
# --------------------------
|
33
|
-
# Picks up version from pyproject.toml
|
34
|
-
get_version()
|
35
|
-
{
|
36
|
-
if [[ ! -f pyproject.toml ]];then
|
37
|
-
echo "Cannot find pyproject.toml"
|
38
|
-
exit 1
|
39
|
-
fi
|
40
|
-
|
41
|
-
VERSION_LINE=$(grep version pyproject.toml)
|
42
|
-
|
43
|
-
if [[ $? -ne 0 ]];then
|
44
|
-
ehco "Could not extract version from pyproject.toml"
|
45
|
-
exit 1
|
46
|
-
fi
|
47
|
-
|
48
|
-
if [[ "$VERSION_LINE" =~ .*([0-9]\.[0-9]\.[0-9]).* ]];then
|
49
|
-
VERSION=${BASH_REMATCH[1]}
|
50
|
-
echo "Using version: $VERSION"
|
51
|
-
return
|
52
|
-
fi
|
53
|
-
|
54
|
-
echo "Could not extract version from: $VERSION_LINE"
|
55
|
-
exit 1
|
56
|
-
}
|
57
|
-
# --------------------------
|
58
|
-
create_tag()
|
59
|
-
{
|
60
|
-
git tag -n | grep $VERSION
|
61
|
-
|
62
|
-
if [[ $? -eq 0 ]];then
|
63
|
-
echo "Version found among tags, not tagging"
|
64
|
-
return
|
65
|
-
fi
|
66
|
-
|
67
|
-
echo "Version $VERSION not found among tags, creating new tag"
|
68
|
-
|
69
|
-
git tag -a $VERSION
|
70
|
-
}
|
71
|
-
# --------------------------
|
72
|
-
push_all()
|
73
|
-
{
|
74
|
-
for REMOTE in $(git remote);do
|
75
|
-
echo "Pushing tags and commits to remote: $REMOTE"
|
76
|
-
git add pyproject.toml
|
77
|
-
git commit -m "Publication commit"
|
78
|
-
|
79
|
-
git pull $REMOTE HEAD
|
80
|
-
git push -u $REMOTE HEAD
|
81
|
-
git push $REMOTE --tags
|
82
|
-
done
|
83
|
-
}
|
84
|
-
# --------------------------
|
85
|
-
get_opts "$@"
|
86
|
-
|
87
|
-
get_version
|
88
|
-
create_tag
|
89
|
-
push_all
|
@@ -1,21 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
|
3
|
-
: '
|
4
|
-
This script is meant to check the expiration date of a grid certificate
|
5
|
-
|
6
|
-
Usage:
|
7
|
-
|
8
|
-
./check_expiration
|
9
|
-
'
|
10
|
-
|
11
|
-
check()
|
12
|
-
{
|
13
|
-
PEMFILE=$1
|
14
|
-
if [[ ! -f $PEMFILE ]];then
|
15
|
-
echo "Cannot find PEM file: $PEMFILE"
|
16
|
-
fi
|
17
|
-
|
18
|
-
openssl x509 -enddate -noout -in $PEMFILE
|
19
|
-
}
|
20
|
-
|
21
|
-
check usercert.pem
|
@@ -1,22 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
|
3
|
-
: '
|
4
|
-
This script is used to convert p12 grid certificate files into PEM files
|
5
|
-
|
6
|
-
Usage:
|
7
|
-
|
8
|
-
./convert_certificate cert.p12
|
9
|
-
'
|
10
|
-
|
11
|
-
CERTIFICATE=$1
|
12
|
-
|
13
|
-
if [[ ! -f $CERTIFICATE ]];then
|
14
|
-
echo "ERROR::Certificate \"$CERTIFICATE\" does not exist."
|
15
|
-
kill -INT $$
|
16
|
-
fi
|
17
|
-
|
18
|
-
openssl pkcs12 -in $CERTIFICATE -clcerts -nokeys -out usercert.pem
|
19
|
-
openssl pkcs12 -in $CERTIFICATE -nocerts -out userkey.pem
|
20
|
-
chmod 400 userkey.pem
|
21
|
-
chmod 444 usercert.pem
|
22
|
-
|
@@ -1,85 +0,0 @@
|
|
1
|
-
'''
|
2
|
-
Script used to compare performance of classifiers
|
3
|
-
'''
|
4
|
-
import os
|
5
|
-
import argparse
|
6
|
-
import yaml
|
7
|
-
import mplhep
|
8
|
-
import matplotlib.pyplot as plt
|
9
|
-
import pandas as pnd
|
10
|
-
|
11
|
-
from sklearn.metrics import auc
|
12
|
-
from dmu.logging.log_store import LogStore
|
13
|
-
|
14
|
-
log=LogStore.add_logger('dmu:ml:compare_classifiers')
|
15
|
-
# ------------------------------
|
16
|
-
class Data:
|
17
|
-
'''
|
18
|
-
Data class
|
19
|
-
'''
|
20
|
-
out_path : str
|
21
|
-
cfg_path : str
|
22
|
-
logl : int
|
23
|
-
cfg : dict
|
24
|
-
|
25
|
-
plt.style.use(mplhep.style.LHCb2)
|
26
|
-
# ------------------------------
|
27
|
-
def _initialize() -> None:
|
28
|
-
log.info(f'Loading settings from: {Data.cfg_path}')
|
29
|
-
with open(Data.cfg_path, encoding='utf-8') as ifile:
|
30
|
-
Data.cfg = yaml.safe_load(ifile)
|
31
|
-
|
32
|
-
Data.out_path = Data.cfg['out_dir']
|
33
|
-
os.makedirs(Data.out_path, exist_ok=True)
|
34
|
-
# ------------------------------
|
35
|
-
def _parse_args():
|
36
|
-
parser = argparse.ArgumentParser(description='Used to perform comparisons of classifier performances')
|
37
|
-
parser.add_argument('-c', '--conf' , help='Path to configuration path', required=True)
|
38
|
-
parser.add_argument('-l', '--logl' , help='Logging level', choices=[10, 20, 30], default=20)
|
39
|
-
args = parser.parse_args()
|
40
|
-
|
41
|
-
Data.cfg_path = args.conf
|
42
|
-
Data.logl = args.logl
|
43
|
-
# ------------------------------
|
44
|
-
def _plot_roc(name : str, path : str) -> None:
|
45
|
-
roc_path = f'{path}/fold_all/roc.json'
|
46
|
-
df = pnd.read_json(roc_path)
|
47
|
-
|
48
|
-
plt.figure(num='ROC')
|
49
|
-
xval = df['x'].to_numpy()
|
50
|
-
yval = df['y'].to_numpy()
|
51
|
-
area = auc(xval, yval)
|
52
|
-
|
53
|
-
plt.plot(xval, yval, label=f'{name}: {area:.3f}')
|
54
|
-
# ------------------------------
|
55
|
-
def _compare():
|
56
|
-
for name, cls_path in Data.cfg['classifiers'].items():
|
57
|
-
_plot_roc(name=name, path=cls_path)
|
58
|
-
|
59
|
-
_save_roc()
|
60
|
-
# ------------------------------
|
61
|
-
def _save_roc():
|
62
|
-
d_set = Data.cfg['roc']
|
63
|
-
if 'xrange' in d_set:
|
64
|
-
plt.xlim(d_set['xrange'])
|
65
|
-
|
66
|
-
if 'yrange' in d_set:
|
67
|
-
plt.ylim(d_set['yrange'])
|
68
|
-
|
69
|
-
plt.figure(num='ROC')
|
70
|
-
plt.legend()
|
71
|
-
plt.grid()
|
72
|
-
plt.xlabel('Signal Efficiency')
|
73
|
-
plt.ylabel('Background Rejection')
|
74
|
-
plt.savefig(f'{Data.out_path}/roc.png')
|
75
|
-
# ------------------------------
|
76
|
-
def main():
|
77
|
-
'''
|
78
|
-
Start here
|
79
|
-
'''
|
80
|
-
_parse_args()
|
81
|
-
_initialize()
|
82
|
-
_compare()
|
83
|
-
# ------------------------------
|
84
|
-
if __name__ == '__main__':
|
85
|
-
main()
|
@@ -1,121 +0,0 @@
|
|
1
|
-
'''
|
2
|
-
Script meant to do truth matching checks
|
3
|
-
'''
|
4
|
-
import os
|
5
|
-
import copy
|
6
|
-
import argparse
|
7
|
-
|
8
|
-
import yaml
|
9
|
-
import mplhep
|
10
|
-
import matplotlib.pyplot as plt
|
11
|
-
|
12
|
-
from ROOT import RDataFrame
|
13
|
-
|
14
|
-
from dmu.logging.log_store import LogStore
|
15
|
-
from dmu.plotting.plotter_1d import Plotter1D as Plotter
|
16
|
-
|
17
|
-
log=LogStore.add_logger('dmu:physics:check_truth')
|
18
|
-
# ----------------------------------
|
19
|
-
def _set_logs() -> None:
|
20
|
-
LogStore.set_level('dmu:plotting:Plotter' , 30)
|
21
|
-
LogStore.set_level('dmu:plotting:Plotter1D', 30)
|
22
|
-
# ----------------------------------
|
23
|
-
def _get_args() -> argparse.Namespace:
|
24
|
-
'''
|
25
|
-
Parse args
|
26
|
-
'''
|
27
|
-
parser = argparse.ArgumentParser(description='Script used to carry out checks on truth matching mechanisms for MC')
|
28
|
-
parser.add_argument('-c', '--conf' , type=str, help='Path to config file', required=True)
|
29
|
-
args = parser.parse_args()
|
30
|
-
|
31
|
-
return args
|
32
|
-
# ----------------------------------
|
33
|
-
def _get_config(args : argparse.Namespace) -> dict:
|
34
|
-
path = args.conf
|
35
|
-
if not os.path.isfile(path):
|
36
|
-
raise FileNotFoundError(f'Cannot find {path}')
|
37
|
-
|
38
|
-
with open(path, encoding='utf-8') as ifile:
|
39
|
-
cfg = yaml.safe_load(ifile)
|
40
|
-
|
41
|
-
return cfg
|
42
|
-
# ----------------------------------
|
43
|
-
def _get_rdf(file_path : str, tree_path : str) -> RDataFrame:
|
44
|
-
log.debug(f'Picking inputs from: {file_path}/{tree_path}')
|
45
|
-
rdf = RDataFrame(tree_path, file_path)
|
46
|
-
|
47
|
-
nentries = rdf.Count().GetValue()
|
48
|
-
log.debug(f'Found {nentries} entries')
|
49
|
-
|
50
|
-
return rdf
|
51
|
-
# ----------------------------------
|
52
|
-
def _preprocess_rdf(rdf : RDataFrame, cfg : dict) -> RDataFrame:
|
53
|
-
if 'max_entries' in cfg:
|
54
|
-
max_entries = cfg['max_entries']
|
55
|
-
rdf = rdf.Range(max_entries)
|
56
|
-
|
57
|
-
return rdf
|
58
|
-
# ----------------------------------
|
59
|
-
def _check(cfg : dict) -> None:
|
60
|
-
log.info(110 * '-')
|
61
|
-
log.info(f'{"Sample":<20}{"Method":<20}{"Initial":<15}{"":<15}{"Final":<15}{"":15}{"Efficiency":<10}')
|
62
|
-
log.info(110 * '-')
|
63
|
-
|
64
|
-
for sample_name in cfg['samples']:
|
65
|
-
file_path = cfg['samples'][sample_name]['file_path']
|
66
|
-
tree_path = cfg['samples'][sample_name]['tree_path']
|
67
|
-
rdf = _get_rdf(file_path, tree_path)
|
68
|
-
rdf = _preprocess_rdf(rdf, cfg)
|
69
|
-
|
70
|
-
d_cut_true = {}
|
71
|
-
d_cut_fake = {}
|
72
|
-
for method, cut in cfg['samples'][sample_name]['methods'].items():
|
73
|
-
_check_kind(rdf, sample_name, method, cut)
|
74
|
-
|
75
|
-
d_cut_true[method] = cut
|
76
|
-
d_cut_fake[method] = f'({cut}) == 0'
|
77
|
-
log.info('')
|
78
|
-
|
79
|
-
_plot_distributions(cfg, sample_name, rdf, d_cut_true, kind='matched')
|
80
|
-
_plot_distributions(cfg, sample_name, rdf, d_cut_fake, kind='anti_matched')
|
81
|
-
# ----------------------------------
|
82
|
-
def _plot_distributions(cfg : dict, sample_name : str, rdf : RDataFrame, d_cut : dict[str,str], kind : str) -> None:
|
83
|
-
cfg = copy.deepcopy(cfg)
|
84
|
-
cfg_plt = cfg['samples'][sample_name]['plot']
|
85
|
-
cfg_plt = _add_suffix(cfg_plt, sample_name, kind)
|
86
|
-
d_rdf = { method : rdf.Filter(cut) for method, cut in d_cut.items() }
|
87
|
-
|
88
|
-
ptr=Plotter(d_rdf=d_rdf, cfg=cfg_plt)
|
89
|
-
ptr.run()
|
90
|
-
# ----------------------------------
|
91
|
-
def _add_suffix(cfg : dict, sample_name : str, kind : str) -> dict:
|
92
|
-
d_var = cfg['plots']
|
93
|
-
for var in d_var:
|
94
|
-
d_var[var]['name'] = f'{var}_{kind}'
|
95
|
-
d_var[var]['title'] = f'{sample_name}; {kind}'
|
96
|
-
|
97
|
-
cfg['plots'] = d_var
|
98
|
-
|
99
|
-
return cfg
|
100
|
-
# ----------------------------------
|
101
|
-
def _check_kind(rdf : RDataFrame, sample : str, name : str, cut : str) -> RDataFrame:
|
102
|
-
nini = rdf.Count().GetValue()
|
103
|
-
rdf = rdf.Filter(cut, name)
|
104
|
-
nfnl = rdf.Count().GetValue()
|
105
|
-
eff = nfnl / nini * 100
|
106
|
-
|
107
|
-
log.info(f'{sample:<20}{name:<20}{nini:<15}{"":<15}{nfnl:<15}{"-->":15}{eff:10.2f}')
|
108
|
-
# ----------------------------------
|
109
|
-
def main():
|
110
|
-
'''
|
111
|
-
Script starts here
|
112
|
-
'''
|
113
|
-
_set_logs()
|
114
|
-
args = _get_args()
|
115
|
-
cfg = _get_config(args)
|
116
|
-
plt.style.use(mplhep.style.LHCb2)
|
117
|
-
|
118
|
-
_check(cfg)
|
119
|
-
# ----------------------------------
|
120
|
-
if __name__ == '__main__':
|
121
|
-
main()
|
@@ -1,299 +0,0 @@
|
|
1
|
-
'''
|
2
|
-
Script used to compare ROOT files
|
3
|
-
'''
|
4
|
-
|
5
|
-
import re
|
6
|
-
import os
|
7
|
-
from dataclasses import dataclass
|
8
|
-
from typing import ClassVar
|
9
|
-
|
10
|
-
import argparse
|
11
|
-
|
12
|
-
import yaml
|
13
|
-
import numpy
|
14
|
-
from dmu.logging.log_store import LogStore
|
15
|
-
|
16
|
-
from ROOT import TFile, TTree, RDataFrame
|
17
|
-
|
18
|
-
import dmu.rfile.utilities as rfut
|
19
|
-
|
20
|
-
|
21
|
-
log=LogStore.add_logger('rx_scripts:compare_files')
|
22
|
-
#------------------
|
23
|
-
@dataclass
|
24
|
-
class Data:
|
25
|
-
'''
|
26
|
-
Class used to store shared attributes
|
27
|
-
'''
|
28
|
-
max_entries : int
|
29
|
-
max_trees : int
|
30
|
-
l_exclude : list[str]
|
31
|
-
raise_if_diff : bool
|
32
|
-
file_name_1 : str
|
33
|
-
file_name_2 : str
|
34
|
-
|
35
|
-
d_summary : ClassVar[dict]= {}
|
36
|
-
#------------------
|
37
|
-
def _print_trees_difference(l_val_1 : list[str], l_val_2 : list[str]) -> None:
|
38
|
-
s_val_1 = set(l_val_1)
|
39
|
-
s_val_2 = set(l_val_2)
|
40
|
-
|
41
|
-
s_only_1 = s_val_1 - s_val_2
|
42
|
-
s_only_2 = s_val_2 - s_val_1
|
43
|
-
|
44
|
-
Data.d_summary[f'Trees only in {Data.file_name_1}'] = list(s_only_1)
|
45
|
-
Data.d_summary[f'Trees only in {Data.file_name_2}'] = list(s_only_2)
|
46
|
-
|
47
|
-
nonly_1 = len(s_only_1)
|
48
|
-
nonly_2 = len(s_only_2)
|
49
|
-
|
50
|
-
if nonly_1 > 0:
|
51
|
-
log.info(f'Found {nonly_1} trees in first file but not second')
|
52
|
-
for name in s_only_1:
|
53
|
-
log.info(f'{"":<4}{name:<20}')
|
54
|
-
|
55
|
-
if nonly_2 > 0:
|
56
|
-
log.info(f'Found {nonly_2} trees in second file but not first')
|
57
|
-
for name in s_only_2:
|
58
|
-
log.info(f'{"":<4}{name:<20}')
|
59
|
-
#------------------
|
60
|
-
def _check_trees(d_tree_1 : dict[str, TTree], d_tree_2 : dict[str, TTree]):
|
61
|
-
'''
|
62
|
-
Check if dictionaries have same trees
|
63
|
-
For corresponding trees, check if number of entries is the same
|
64
|
-
'''
|
65
|
-
l_treename_1 = list(d_tree_1.keys())
|
66
|
-
l_treename_2 = list(d_tree_2.keys())
|
67
|
-
|
68
|
-
if l_treename_1 != l_treename_2:
|
69
|
-
log.warning('Files contain different trees')
|
70
|
-
_print_trees_difference(l_treename_1, l_treename_2)
|
71
|
-
|
72
|
-
s_treename_1 = set(l_treename_1)
|
73
|
-
s_treename_2 = set(l_treename_2)
|
74
|
-
s_treename = s_treename_1 & s_treename_2
|
75
|
-
|
76
|
-
for treename in s_treename:
|
77
|
-
if treename in Data.l_exclude:
|
78
|
-
continue
|
79
|
-
|
80
|
-
tree_1 = d_tree_1[treename]
|
81
|
-
tree_2 = d_tree_2[treename]
|
82
|
-
|
83
|
-
entries_1 = tree_1.GetEntries()
|
84
|
-
entries_2 = tree_2.GetEntries()
|
85
|
-
|
86
|
-
if entries_1 != entries_2:
|
87
|
-
raise ValueError(f'Tree {treename} differs in entries {entries_1}/{entries_2}')
|
88
|
-
|
89
|
-
return list(s_treename)
|
90
|
-
#------------------
|
91
|
-
def _get_data(tree : TTree) -> dict[str, numpy.ndarray]:
|
92
|
-
rdf = RDataFrame(tree)
|
93
|
-
if Data.max_entries > 0:
|
94
|
-
log.warning(f'Limiting to {Data.max_entries} entries')
|
95
|
-
rdf = rdf.Range(Data.max_entries)
|
96
|
-
|
97
|
-
d_data = rdf.AsNumpy(exclude=[])
|
98
|
-
|
99
|
-
return d_data
|
100
|
-
#------------------
|
101
|
-
def _check_branches(tree_name : str, l_branch_1 : list[str], l_branch_2 : list[str]) -> None:
|
102
|
-
'''
|
103
|
-
Takes lists of branch names
|
104
|
-
Checks if they are the same, if not print differences
|
105
|
-
|
106
|
-
if raise_if_diff is True, will raise exception if branches are not the same
|
107
|
-
'''
|
108
|
-
if l_branch_1 == l_branch_2:
|
109
|
-
return
|
110
|
-
|
111
|
-
s_branch_1 = set(l_branch_1)
|
112
|
-
s_branch_2 = set(l_branch_2)
|
113
|
-
|
114
|
-
s_branch_1_m_2 = s_branch_1.difference(s_branch_2)
|
115
|
-
log.info(f'Found len({s_branch_1_m_2}) branches in first tree but not second')
|
116
|
-
for branch_name in s_branch_1_m_2:
|
117
|
-
log.debug(f'{"":<4}{branch_name:<20}')
|
118
|
-
|
119
|
-
s_branch_2_m_1 = s_branch_2.difference(s_branch_1)
|
120
|
-
log.info(f'Found len({s_branch_2_m_1}) branches in second tree but not first')
|
121
|
-
for branch_name in s_branch_2_m_1:
|
122
|
-
log.debug(f'{"":<4}{branch_name:<20}')
|
123
|
-
|
124
|
-
Data.d_summary[tree_name] = {
|
125
|
-
f'Only {Data.file_name_1}' : list(s_branch_1_m_2),
|
126
|
-
f'Only {Data.file_name_2}' : list(s_branch_2_m_1),
|
127
|
-
}
|
128
|
-
|
129
|
-
if Data.raise_if_diff:
|
130
|
-
raise ValueError('Branches differ')
|
131
|
-
#------------------
|
132
|
-
def _compare_branches(tree_name : str, d_data_1 : dict[str, list], d_data_2 : dict[str, list]) -> list[str]:
|
133
|
-
'''
|
134
|
-
Will check for different branches in trees
|
135
|
-
Will return list of branch names for common branches
|
136
|
-
'''
|
137
|
-
l_branch_1 = list(d_data_1.keys())
|
138
|
-
l_branch_2 = list(d_data_2.keys())
|
139
|
-
|
140
|
-
l_branch_1.sort()
|
141
|
-
l_branch_2.sort()
|
142
|
-
_check_branches(tree_name, l_branch_1, l_branch_2)
|
143
|
-
|
144
|
-
s_branch_1 = set(l_branch_1)
|
145
|
-
s_branch_2 = set(l_branch_2)
|
146
|
-
|
147
|
-
s_branch = s_branch_1.intersection(s_branch_2)
|
148
|
-
|
149
|
-
return list(s_branch)
|
150
|
-
#------------------
|
151
|
-
def _compare(tree_name : str, d_data_1, d_data_2) -> None:
|
152
|
-
log.info('')
|
153
|
-
log.debug('Comparing branches')
|
154
|
-
l_branch_name = _compare_branches(tree_name, d_data_1, d_data_2)
|
155
|
-
|
156
|
-
log.debug('Comparing contents of branches')
|
157
|
-
l_diff_branch = []
|
158
|
-
for branch_name in l_branch_name:
|
159
|
-
arr_val_1 = d_data_1[branch_name]
|
160
|
-
arr_val_2 = d_data_2[branch_name]
|
161
|
-
|
162
|
-
if _contents_differ(tree_name, branch_name, arr_val_1, arr_val_2):
|
163
|
-
l_diff_branch.append(branch_name)
|
164
|
-
|
165
|
-
ndiff = len(l_diff_branch)
|
166
|
-
ntot = len(l_branch_name)
|
167
|
-
|
168
|
-
Data.d_summary[f'Branches that differ for tree: {tree_name}'] = l_diff_branch
|
169
|
-
|
170
|
-
if ndiff == 0:
|
171
|
-
log.debug(f'Trees {tree_name} have same contents')
|
172
|
-
return
|
173
|
-
|
174
|
-
log.warning(f'{ndiff:<10}{"differing branches out of":<20}{ntot:<10}{"in":<10}{tree_name:<50}')
|
175
|
-
for branch_name in l_diff_branch:
|
176
|
-
log.debug(f'{"":<4}{branch_name:<20}')
|
177
|
-
#------------------
|
178
|
-
def _contents_differ(tree_name : str, branch_name : str, arr_val_1 : numpy.ndarray, arr_val_2 : numpy.ndarray) -> bool:
|
179
|
-
is_different = False
|
180
|
-
str_type = str(arr_val_1.dtype)
|
181
|
-
if str_type == 'object':
|
182
|
-
return is_different
|
183
|
-
|
184
|
-
if str_type not in ['bool', 'int32', 'uint32', 'uint64', 'float64', 'float32']:
|
185
|
-
log.info(f'Skipping {branch_name}, {str_type}')
|
186
|
-
return is_different
|
187
|
-
|
188
|
-
if not numpy.array_equal(arr_val_1, arr_val_2):
|
189
|
-
is_different = True
|
190
|
-
|
191
|
-
log.debug(20 * '-')
|
192
|
-
log.debug(f'Branch {branch_name} in tree {tree_name} differ')
|
193
|
-
log.debug(20 * '-')
|
194
|
-
log.debug(arr_val_1)
|
195
|
-
log.debug(arr_val_2)
|
196
|
-
log.debug(20 * '-')
|
197
|
-
|
198
|
-
return is_different
|
199
|
-
#------------------
|
200
|
-
def _update_keys(d_tree):
|
201
|
-
d_out = {}
|
202
|
-
|
203
|
-
for key, val in d_tree.items():
|
204
|
-
#Remove everything before .root/ and use it as new key
|
205
|
-
new_key = re.sub(r'^.*\.root/', '', key)
|
206
|
-
d_out[new_key] = val
|
207
|
-
|
208
|
-
return d_out
|
209
|
-
#------------------
|
210
|
-
def _check_file_existence(path : str) -> None:
|
211
|
-
if not os.path.isfile(path):
|
212
|
-
raise FileNotFoundError(f'Cannot find {path}')
|
213
|
-
#------------------
|
214
|
-
def _validate(file_1 : str, file_2 : str) -> None:
|
215
|
-
_check_file_existence(file_1)
|
216
|
-
_check_file_existence(file_2)
|
217
|
-
|
218
|
-
ifile_1 = TFile(file_1)
|
219
|
-
ifile_2 = TFile(file_2)
|
220
|
-
|
221
|
-
d_tree_1 = rfut.get_trees_from_file(ifile_1)
|
222
|
-
d_tree_1 = _update_keys(d_tree_1)
|
223
|
-
|
224
|
-
d_tree_2 = rfut.get_trees_from_file(ifile_2)
|
225
|
-
d_tree_2 = _update_keys(d_tree_2)
|
226
|
-
|
227
|
-
l_tree_name = _check_trees(d_tree_1, d_tree_2)
|
228
|
-
|
229
|
-
if Data.max_trees > -1:
|
230
|
-
log.warning(f'Limiting to {Data.max_trees} trees')
|
231
|
-
l_tree_name = l_tree_name[:Data.max_trees]
|
232
|
-
|
233
|
-
ncommon = len(l_tree_name)
|
234
|
-
log.debug(f'Found common {ncommon} trees')
|
235
|
-
for name in l_tree_name:
|
236
|
-
log.debug(f'{"":<4}{name}')
|
237
|
-
|
238
|
-
log.info('Checking trees')
|
239
|
-
for treename in l_tree_name:
|
240
|
-
if treename in Data.l_exclude:
|
241
|
-
log.debug(f'Skipping {treename}')
|
242
|
-
continue
|
243
|
-
|
244
|
-
log.debug(f'{"":<4}{treename}')
|
245
|
-
|
246
|
-
tree_1 = d_tree_1[treename]
|
247
|
-
tree_2 = d_tree_2[treename]
|
248
|
-
|
249
|
-
log.debug('Getting data from reference')
|
250
|
-
d_data_1= _get_data(tree_1)
|
251
|
-
|
252
|
-
log.debug('Getting data from new')
|
253
|
-
d_data_2= _get_data(tree_2)
|
254
|
-
|
255
|
-
log.debug(f'Comparing {treename}')
|
256
|
-
_compare(treename, d_data_1, d_data_2)
|
257
|
-
|
258
|
-
ifile_1.Close()
|
259
|
-
ifile_2.Close()
|
260
|
-
#------------------
|
261
|
-
def _save_summary() -> None:
|
262
|
-
'''
|
263
|
-
Saves Data.d_summary to summary.yaml
|
264
|
-
'''
|
265
|
-
|
266
|
-
with open('summary.yaml', 'w', encoding='utf-8') as ofile:
|
267
|
-
yaml.dump(Data.d_summary, ofile, indent=2, default_flow_style=False)
|
268
|
-
#------------------
|
269
|
-
def main():
|
270
|
-
'''
|
271
|
-
Script starts here
|
272
|
-
'''
|
273
|
-
parser = argparse.ArgumentParser(description='Used to validate versions of code that produce potentially different files')
|
274
|
-
parser.add_argument('-f', '--files' , nargs= 2, help='List of files to compare')
|
275
|
-
parser.add_argument('-n', '--max_entries' , type=int , help='Limit running over this number of entries. By default will run over everything', default=-1)
|
276
|
-
parser.add_argument('-t', '--max_trees' , type=int , help='Limit running over this number of trees. By default will run over everything' , default=-1)
|
277
|
-
parser.add_argument('-l', '--log_level' , type=int , help='Logging level' , default=20, choices=[10, 20, 30, 40])
|
278
|
-
parser.add_argument('-e', '--exclude' , nargs='+', help='List of trees that should not be compared' , default=[], )
|
279
|
-
parser.add_argument('-r', '--raise_if_diff' , help='If used, will fail as soon as it finds trees with different branches.', action='store_true')
|
280
|
-
|
281
|
-
args = parser.parse_args()
|
282
|
-
|
283
|
-
LogStore.set_level('rx_scripts:compare_files', args.log_level)
|
284
|
-
|
285
|
-
Data.max_entries = args.max_entries
|
286
|
-
Data.max_trees = args.max_trees
|
287
|
-
Data.l_exclude = args.exclude
|
288
|
-
Data.raise_if_diff = args.raise_if_diff
|
289
|
-
|
290
|
-
[file_1, file_2] = args.files
|
291
|
-
|
292
|
-
Data.file_name_1 = file_1
|
293
|
-
Data.file_name_2 = file_2
|
294
|
-
|
295
|
-
_validate(file_1, file_2)
|
296
|
-
_save_summary()
|
297
|
-
#------------------
|
298
|
-
if __name__ == '__main__':
|
299
|
-
main()
|
dmu_scripts/rfile/print_trees.py
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
'''
|
2
|
-
Script used to print contents of root files
|
3
|
-
'''
|
4
|
-
|
5
|
-
import argparse
|
6
|
-
|
7
|
-
from dmu.rfile.rfprinter import RFPrinter
|
8
|
-
|
9
|
-
# -----------------------------
|
10
|
-
class Data:
|
11
|
-
'''
|
12
|
-
Data class holding shared attributes
|
13
|
-
'''
|
14
|
-
path : str
|
15
|
-
screen : bool
|
16
|
-
# -----------------------------
|
17
|
-
def _get_args():
|
18
|
-
parser = argparse.ArgumentParser(description='Script used to print information about ROOT files and dump it to text')
|
19
|
-
parser.add_argument('-p', '--path' , type=str, help='Path to ROOT file')
|
20
|
-
parser.add_argument('-s', '--screen', help='If used, will dump output to screen', action='store_true')
|
21
|
-
args = parser.parse_args()
|
22
|
-
|
23
|
-
Data.path = args.path
|
24
|
-
Data.screen= args.screen
|
25
|
-
# -----------------------------
|
26
|
-
def main():
|
27
|
-
'''
|
28
|
-
Execution starts here
|
29
|
-
'''
|
30
|
-
_get_args()
|
31
|
-
prt = RFPrinter(path = Data.path)
|
32
|
-
prt.save(to_screen = Data.screen)
|
33
|
-
# -----------------------------
|
34
|
-
if __name__ == '__main__':
|
35
|
-
main()
|
dmu_scripts/ssh/coned.py
DELETED
@@ -1,168 +0,0 @@
|
|
1
|
-
'''
|
2
|
-
Script used to implement connection to servers
|
3
|
-
'''
|
4
|
-
|
5
|
-
import os
|
6
|
-
import copy
|
7
|
-
import argparse
|
8
|
-
|
9
|
-
import yaml
|
10
|
-
from dmu.logging.log_store import LogStore
|
11
|
-
|
12
|
-
log = LogStore.add_logger('dmu:scripts:coned')
|
13
|
-
#---------------------------------------
|
14
|
-
class Data:
|
15
|
-
'''
|
16
|
-
Class used to store shared data
|
17
|
-
'''
|
18
|
-
logl : int
|
19
|
-
dry : bool
|
20
|
-
prnt : bool
|
21
|
-
cfg : dict
|
22
|
-
l_ad : list[str]
|
23
|
-
l_rm : list[str]
|
24
|
-
#----------------------------
|
25
|
-
def _print_configs():
|
26
|
-
'''
|
27
|
-
Prints configuration
|
28
|
-
'''
|
29
|
-
|
30
|
-
yaml_output = yaml.dump(Data.cfg, default_flow_style=False)
|
31
|
-
print(yaml_output)
|
32
|
-
#----------------------------
|
33
|
-
def _initialize():
|
34
|
-
_load_config()
|
35
|
-
|
36
|
-
LogStore.set_level('dmu:scripts:coned', Data.logl)
|
37
|
-
|
38
|
-
log.debug(f'Running at {Data.logl} logging level')
|
39
|
-
#----------------------------
|
40
|
-
def _get_args():
|
41
|
-
'''
|
42
|
-
Will parse arguments
|
43
|
-
'''
|
44
|
-
parser = argparse.ArgumentParser(description='Used to edit and print server list specified by ~/.config/connect/servers.yaml')
|
45
|
-
parser.add_argument('-p', '--print' , help ='Prints config settings and exits', action='store_true')
|
46
|
-
parser.add_argument('-l', '--log_lvl', type =int, help='Logging level', default=20, choices=[10,20,30])
|
47
|
-
parser.add_argument('-a', '--add' , nargs=3 , help='Adds task to given server, e.g. task 123 server' , default=[])
|
48
|
-
parser.add_argument('-r', '--rem' , nargs=3 , help='Removes task from given server, e.g. task 123 server', default=[])
|
49
|
-
parser.add_argument('-d', '--dry' , help='Run dry run, for adding and removing entries', action='store_true')
|
50
|
-
args = parser.parse_args()
|
51
|
-
|
52
|
-
Data.prnt = args.print
|
53
|
-
Data.logl = args.log_lvl
|
54
|
-
Data.l_ad = args.add
|
55
|
-
Data.l_rm = args.rem
|
56
|
-
Data.dry = args.dry
|
57
|
-
#---------------------------------------
|
58
|
-
def _load_config():
|
59
|
-
home_dir = os.environ['HOME']
|
60
|
-
config_path = f'{home_dir}/.config/dmu/ssh/servers.yaml'
|
61
|
-
if not os.path.isfile(config_path):
|
62
|
-
raise FileNotFoundError(f'Config not found: {config_path}')
|
63
|
-
|
64
|
-
with open(config_path, encoding='utf-8') as ifile:
|
65
|
-
Data.cfg = yaml.safe_load(ifile)
|
66
|
-
#---------------------------------------
|
67
|
-
def _dump_config(cfg : dict):
|
68
|
-
if cfg == Data.cfg:
|
69
|
-
log.debug('Config was not modified, will not save it')
|
70
|
-
return
|
71
|
-
|
72
|
-
home_dir = os.environ['HOME']
|
73
|
-
config_path = f'{home_dir}/.config/dmu/ssh/servers.yaml'
|
74
|
-
if not os.path.isfile(config_path):
|
75
|
-
raise FileNotFoundError(f'Config not found: {config_path}')
|
76
|
-
|
77
|
-
if Data.dry:
|
78
|
-
content = yaml.dump(cfg, default_flow_style=False)
|
79
|
-
print(content)
|
80
|
-
return
|
81
|
-
|
82
|
-
with open(config_path, 'w', encoding='utf-8') as ofile:
|
83
|
-
yaml.dump(cfg, ofile, default_flow_style=False)
|
84
|
-
#---------------------------------------
|
85
|
-
def _get_updated_config() -> dict:
|
86
|
-
log.debug('Getting updated config')
|
87
|
-
|
88
|
-
cfg = copy.deepcopy(Data.cfg)
|
89
|
-
cfg = _add_task(cfg)
|
90
|
-
cfg = _remove_task(cfg)
|
91
|
-
|
92
|
-
return cfg
|
93
|
-
#---------------------------------------
|
94
|
-
def _add_task(cfg : dict) -> dict:
|
95
|
-
if len(Data.l_ad) == 0:
|
96
|
-
log.debug('No task added')
|
97
|
-
return cfg
|
98
|
-
|
99
|
-
[task, machine, server] = Data.l_ad
|
100
|
-
if server not in cfg:
|
101
|
-
cfg[server] = {}
|
102
|
-
|
103
|
-
if machine not in cfg[server]:
|
104
|
-
cfg[server][machine] = []
|
105
|
-
|
106
|
-
cfg[server][machine].append(task)
|
107
|
-
|
108
|
-
log.info(f'{"Added":<10}{server:<20}{machine:<10}{task:<20}')
|
109
|
-
|
110
|
-
return cfg
|
111
|
-
#---------------------------------------
|
112
|
-
def _remove_task(cfg : dict) -> dict:
|
113
|
-
if len(Data.l_rm) == 0:
|
114
|
-
log.debug('No task removed')
|
115
|
-
return cfg
|
116
|
-
|
117
|
-
[task, machine, server] = Data.l_rm
|
118
|
-
if server not in cfg:
|
119
|
-
log.warning(f'Server {server} not found')
|
120
|
-
return cfg
|
121
|
-
|
122
|
-
if machine not in cfg[server]:
|
123
|
-
log.warning(f'Machine {machine} not found in server {server}')
|
124
|
-
return cfg
|
125
|
-
|
126
|
-
l_task = cfg[server][machine]
|
127
|
-
if task not in l_task:
|
128
|
-
log.warning(f'Task {task} not found in {server}:{machine}')
|
129
|
-
return cfg
|
130
|
-
|
131
|
-
index = l_task.index(task)
|
132
|
-
del l_task[index]
|
133
|
-
cfg[server][machine] = l_task
|
134
|
-
|
135
|
-
log.info(f'{"Removed":<10}{server:<20}{machine:<10}{task:<20}')
|
136
|
-
|
137
|
-
cfg = _trim_config(cfg, machine, server)
|
138
|
-
|
139
|
-
return cfg
|
140
|
-
#---------------------------------------
|
141
|
-
def _trim_config(cfg : dict, machine : str, server : str) -> dict:
|
142
|
-
if cfg[server][machine] == []:
|
143
|
-
log.debug(f'Trimming {server}:{machine}')
|
144
|
-
del cfg[server][machine]
|
145
|
-
|
146
|
-
if cfg[server] == {}:
|
147
|
-
log.debug(f'Trimming {server}')
|
148
|
-
del cfg[server]
|
149
|
-
|
150
|
-
return cfg
|
151
|
-
#---------------------------------------
|
152
|
-
def main():
|
153
|
-
'''
|
154
|
-
Starts here
|
155
|
-
'''
|
156
|
-
_get_args()
|
157
|
-
_initialize()
|
158
|
-
|
159
|
-
if Data.prnt:
|
160
|
-
log.debug('Printing and returning')
|
161
|
-
_print_configs()
|
162
|
-
return
|
163
|
-
|
164
|
-
cfg = _get_updated_config()
|
165
|
-
_dump_config(cfg)
|
166
|
-
#---------------------------------------
|
167
|
-
if __name__ == '__main__':
|
168
|
-
main()
|
@@ -1,46 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
|
3
|
-
from dmu.text.transformer import transformer as txt_trf
|
4
|
-
|
5
|
-
import argparse
|
6
|
-
import logging
|
7
|
-
|
8
|
-
log = logging.getLogger('dmu_scripts:text:transformer')
|
9
|
-
#---------------------------------
|
10
|
-
class data:
|
11
|
-
txt = None
|
12
|
-
out = None
|
13
|
-
cfg = None
|
14
|
-
lvl = None
|
15
|
-
#---------------------------------
|
16
|
-
def get_args():
|
17
|
-
parser=argparse.ArgumentParser(description='Will transform a text file following a set of rules')
|
18
|
-
parser.add_argument('-i', '--input' , type=str, help='Path to input file' , required=True)
|
19
|
-
parser.add_argument('-o', '--output', type=str, help='Path to output file, if not passed, it will be same as input, but with trf before extension')
|
20
|
-
parser.add_argument('-c', '--config', type=str, help='Path to config file', required=True)
|
21
|
-
parser.add_argument('-l', '--loglvl', type=int, help='Log level' , default=20, choices=[10, 20, 30, 40])
|
22
|
-
args = parser.parse_args()
|
23
|
-
|
24
|
-
data.txt = args.input
|
25
|
-
data.out = args.output
|
26
|
-
data.cfg = args.config
|
27
|
-
data.lvl = args.loglvl
|
28
|
-
#---------------------------------
|
29
|
-
def set_logs():
|
30
|
-
logging.basicConfig()
|
31
|
-
|
32
|
-
log_tr = logging.getLogger('dmu:text:transformer')
|
33
|
-
|
34
|
-
log_tr.setLevel(data.lvl)
|
35
|
-
log.setLevel(data.lvl)
|
36
|
-
#---------------------------------
|
37
|
-
def main():
|
38
|
-
get_args()
|
39
|
-
set_logs()
|
40
|
-
|
41
|
-
trf = txt_trf(txt_path=data.txt, cfg_path=data.cfg)
|
42
|
-
trf.save_as(data.out)
|
43
|
-
#---------------------------------
|
44
|
-
if __name__ == '__main__':
|
45
|
-
main()
|
46
|
-
|