data-manipulation-utilities 0.2.7__py3-none-any.whl → 0.2.8.dev720__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.2.7.dist-info → data_manipulation_utilities-0.2.8.dev720.dist-info}/METADATA +669 -42
- data_manipulation_utilities-0.2.8.dev720.dist-info/RECORD +45 -0
- {data_manipulation_utilities-0.2.7.dist-info → data_manipulation_utilities-0.2.8.dev720.dist-info}/WHEEL +1 -2
- data_manipulation_utilities-0.2.8.dev720.dist-info/entry_points.txt +8 -0
- dmu/generic/hashing.py +34 -8
- dmu/generic/utilities.py +164 -11
- dmu/logging/log_store.py +34 -2
- dmu/logging/messages.py +96 -0
- dmu/ml/cv_classifier.py +3 -3
- dmu/ml/cv_diagnostics.py +3 -0
- dmu/ml/cv_performance.py +58 -0
- dmu/ml/cv_predict.py +149 -46
- dmu/ml/train_mva.py +482 -100
- dmu/ml/utilities.py +29 -10
- dmu/pdataframe/utilities.py +28 -3
- dmu/plotting/fwhm.py +2 -2
- dmu/plotting/matrix.py +1 -1
- dmu/plotting/plotter.py +23 -3
- dmu/plotting/plotter_1d.py +96 -32
- dmu/plotting/plotter_2d.py +5 -0
- dmu/rdataframe/utilities.py +54 -3
- dmu/rfile/ddfgetter.py +102 -0
- dmu/stats/fit_stats.py +129 -0
- dmu/stats/fitter.py +55 -22
- dmu/stats/gof_calculator.py +7 -0
- dmu/stats/model_factory.py +153 -62
- dmu/stats/parameters.py +100 -0
- dmu/stats/utilities.py +443 -12
- dmu/stats/wdata.py +187 -0
- dmu/stats/zfit.py +17 -0
- dmu/stats/zfit_plotter.py +147 -36
- dmu/testing/utilities.py +102 -24
- dmu/workflow/__init__.py +0 -0
- dmu/workflow/cache.py +266 -0
- data_manipulation_utilities-0.2.7.data/scripts/publish +0 -89
- data_manipulation_utilities-0.2.7.dist-info/RECORD +0 -69
- data_manipulation_utilities-0.2.7.dist-info/entry_points.txt +0 -6
- data_manipulation_utilities-0.2.7.dist-info/top_level.txt +0 -3
- dmu_data/ml/tests/diagnostics_from_file.yaml +0 -13
- dmu_data/ml/tests/diagnostics_from_model.yaml +0 -10
- dmu_data/ml/tests/diagnostics_multiple_methods.yaml +0 -10
- dmu_data/ml/tests/diagnostics_overlay.yaml +0 -33
- dmu_data/ml/tests/train_mva.yaml +0 -58
- dmu_data/ml/tests/train_mva_with_diagnostics.yaml +0 -82
- dmu_data/plotting/tests/2d.yaml +0 -24
- dmu_data/plotting/tests/fig_size.yaml +0 -13
- dmu_data/plotting/tests/high_stat.yaml +0 -22
- dmu_data/plotting/tests/legend.yaml +0 -12
- dmu_data/plotting/tests/name.yaml +0 -14
- dmu_data/plotting/tests/no_bounds.yaml +0 -12
- dmu_data/plotting/tests/normalized.yaml +0 -9
- dmu_data/plotting/tests/plug_fwhm.yaml +0 -24
- dmu_data/plotting/tests/plug_stats.yaml +0 -19
- dmu_data/plotting/tests/simple.yaml +0 -9
- dmu_data/plotting/tests/stats.yaml +0 -9
- dmu_data/plotting/tests/styling.yaml +0 -11
- dmu_data/plotting/tests/title.yaml +0 -14
- dmu_data/plotting/tests/weights.yaml +0 -13
- dmu_data/text/transform.toml +0 -4
- dmu_data/text/transform.txt +0 -6
- dmu_data/text/transform_set.toml +0 -8
- dmu_data/text/transform_set.txt +0 -6
- dmu_data/text/transform_trf.txt +0 -12
- dmu_scripts/git/publish +0 -89
- dmu_scripts/physics/check_truth.py +0 -121
- dmu_scripts/rfile/compare_root_files.py +0 -299
- dmu_scripts/rfile/print_trees.py +0 -35
- dmu_scripts/ssh/coned.py +0 -168
- dmu_scripts/text/transform_text.py +0 -46
- {dmu_data → dmu}/__init__.py +0 -0
dmu/workflow/cache.py
ADDED
@@ -0,0 +1,266 @@
|
|
1
|
+
'''
|
2
|
+
This module contains
|
3
|
+
'''
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
import shutil
|
7
|
+
from types import NoneType
|
8
|
+
from pathlib import Path
|
9
|
+
from contextlib import contextmanager
|
10
|
+
|
11
|
+
from dmu.generic import hashing
|
12
|
+
from dmu.logging.log_store import LogStore
|
13
|
+
|
14
|
+
log=LogStore.add_logger('dmu:workflow:cache')
|
15
|
+
# ---------------------------
|
16
|
+
class Cache:
|
17
|
+
'''
|
18
|
+
Class meant to wrap other classes in order to
|
19
|
+
|
20
|
+
- Keep track of the inputs through hashes
|
21
|
+
- Load cached data, if found, and prevent calculations
|
22
|
+
|
23
|
+
The following directories will be important:
|
24
|
+
|
25
|
+
out_dir : Directory where the outputs will go, specified by the user
|
26
|
+
cache_dir: Subdirectory of out_dir, ${out_dir}/.cache
|
27
|
+
hash_dir : Subdirectory of out_dir, ${out_dir}/.cache/{hash}
|
28
|
+
Where {hash} is a 10 alphanumeric representing the has of the inputs
|
29
|
+
|
30
|
+
# On skipping caching
|
31
|
+
|
32
|
+
This is controlled by `_l_skip_class` which is a list of class names:
|
33
|
+
|
34
|
+
- These classes will have the caching turned off
|
35
|
+
- If the list is empty, caching runs for everything
|
36
|
+
- If the list is None, caching is turned off for everything
|
37
|
+
'''
|
38
|
+
_cache_root : str|None = None
|
39
|
+
_l_skip_class : list[str]|None = []
|
40
|
+
# ---------------------------
|
41
|
+
def __init__(self, out_path : str, **kwargs):
|
42
|
+
'''
|
43
|
+
Parameters
|
44
|
+
---------------
|
45
|
+
out_path: Path to directory where outputs will go
|
46
|
+
kwargs : Key word arguments symbolizing identity of inputs, used for hashing
|
47
|
+
'''
|
48
|
+
if Cache._cache_root is None:
|
49
|
+
raise ValueError('Caching directory not set')
|
50
|
+
|
51
|
+
log.debug(f'Using {Cache._cache_root} root directory for caching')
|
52
|
+
if 'code' in kwargs:
|
53
|
+
raise ValueError('Cannot append hashing data with key "code", already used')
|
54
|
+
|
55
|
+
kwargs['code'] = self._get_code_hash()
|
56
|
+
|
57
|
+
self._out_path = os.path.normpath(f'{Cache._cache_root}/{out_path}')
|
58
|
+
log.debug(f'Using {self._out_path} output path')
|
59
|
+
os.makedirs(self._out_path, exist_ok=True)
|
60
|
+
|
61
|
+
self._dat_hash = kwargs
|
62
|
+
|
63
|
+
self._cache_dir = self._get_dir(kind='cache')
|
64
|
+
self._hash_dir : str
|
65
|
+
# ---------------------------
|
66
|
+
@classmethod
|
67
|
+
def set_cache_root(cls, root : str) -> None:
|
68
|
+
'''
|
69
|
+
Sets the path to the directory WRT which the _out_path_
|
70
|
+
will be placed
|
71
|
+
'''
|
72
|
+
if cls._cache_root is not None:
|
73
|
+
raise ValueError(f'Trying to set {root}, but already found {cls._cache_root}')
|
74
|
+
|
75
|
+
os.makedirs(root, exist_ok=True)
|
76
|
+
|
77
|
+
cls._cache_root = root
|
78
|
+
# ---------------------------
|
79
|
+
def _get_code_hash(self) -> str:
|
80
|
+
'''
|
81
|
+
If `MyTool` inherits from `Cache`. `mytool.py` git commit hash
|
82
|
+
should be returned
|
83
|
+
'''
|
84
|
+
cls = self.__class__
|
85
|
+
mod = sys.modules.get(cls.__module__)
|
86
|
+
if mod is None:
|
87
|
+
raise ValueError(f'Module not found: {cls.__module__}')
|
88
|
+
|
89
|
+
if mod.__file__ is None:
|
90
|
+
raise ValueError(f'Cannot extract file path for module: {cls.__module__}')
|
91
|
+
|
92
|
+
fname = mod.__file__
|
93
|
+
fpath = os.path.abspath(fname)
|
94
|
+
val = hashing.hash_file(path=fpath)
|
95
|
+
|
96
|
+
log.debug(f'Using hash for: {fpath} = {val}')
|
97
|
+
|
98
|
+
return val
|
99
|
+
# ---------------------------
|
100
|
+
def _get_dir(
|
101
|
+
self,
|
102
|
+
kind : str,
|
103
|
+
make : bool = True) -> str:
|
104
|
+
'''
|
105
|
+
Parameters
|
106
|
+
--------------
|
107
|
+
kind : Kind of directory, cash, hash
|
108
|
+
make : If True (default) will try to make directory
|
109
|
+
'''
|
110
|
+
if kind == 'cache':
|
111
|
+
dir_path = f'{self._out_path}/.cache'
|
112
|
+
elif kind == 'hash':
|
113
|
+
cache_dir = self._get_dir(kind='cache')
|
114
|
+
hsh = hashing.hash_object(self._dat_hash)
|
115
|
+
dir_path = f'{cache_dir}/{hsh}'
|
116
|
+
else:
|
117
|
+
raise ValueError(f'Invalid directory kind: {kind}')
|
118
|
+
|
119
|
+
if make:
|
120
|
+
os.makedirs(dir_path, exist_ok=True)
|
121
|
+
|
122
|
+
return dir_path
|
123
|
+
# ---------------------------
|
124
|
+
def _cache(self) -> None:
|
125
|
+
'''
|
126
|
+
Meant to be called after all the calculations finish
|
127
|
+
It will copy all the outputs of the processing
|
128
|
+
to a hashed directory
|
129
|
+
'''
|
130
|
+
self._hash_dir = self._get_dir(kind= 'hash')
|
131
|
+
log.info(f'Caching outputs to: {self._hash_dir}')
|
132
|
+
|
133
|
+
for source in Path(self._out_path).glob('*'):
|
134
|
+
if str(source) == self._cache_dir:
|
135
|
+
continue
|
136
|
+
|
137
|
+
log.debug(str(source))
|
138
|
+
log.debug('-->')
|
139
|
+
log.debug(self._hash_dir)
|
140
|
+
log.debug('')
|
141
|
+
|
142
|
+
if source.is_dir():
|
143
|
+
shutil.copytree(source, self._hash_dir+'/'+source.name, dirs_exist_ok=True)
|
144
|
+
else:
|
145
|
+
shutil.copy2(source, self._hash_dir)
|
146
|
+
|
147
|
+
self._delete_from_output(only_links=False)
|
148
|
+
self._copy_from_hashdir()
|
149
|
+
# ---------------------------
|
150
|
+
def _delete_from_output(self, only_links : bool) -> None:
|
151
|
+
'''
|
152
|
+
Delete all objects from _out_path directory, except for `.cache`
|
153
|
+
|
154
|
+
only_links: If true will only delete links
|
155
|
+
'''
|
156
|
+
for path in Path(self._out_path).iterdir():
|
157
|
+
if str(path) == self._cache_dir:
|
158
|
+
log.debug(f'Skipping cache dir: {self._cache_dir}')
|
159
|
+
continue
|
160
|
+
|
161
|
+
# These will always be symbolic links
|
162
|
+
if only_links and not path.is_symlink():
|
163
|
+
log.warning(f'Found a non-symlink not deleting: {path}')
|
164
|
+
continue
|
165
|
+
|
166
|
+
log.debug(f'Deleting {path}')
|
167
|
+
if path.is_dir() and not path.is_symlink():
|
168
|
+
shutil.rmtree(path)
|
169
|
+
else:
|
170
|
+
path.unlink()
|
171
|
+
# ---------------------------
|
172
|
+
def _copy_from_hashdir(self) -> None:
|
173
|
+
'''
|
174
|
+
Copies all the objects from _hash_dir to _out_path
|
175
|
+
'''
|
176
|
+
for source in Path(self._hash_dir).iterdir():
|
177
|
+
target = f'{self._out_path}/{source.name}'
|
178
|
+
log.debug(f'{str(source):<50}{"-->"}{target}')
|
179
|
+
|
180
|
+
os.symlink(source, target)
|
181
|
+
# ---------------------------
|
182
|
+
def _dont_cache(self) -> bool:
|
183
|
+
'''
|
184
|
+
Returns
|
185
|
+
---------------
|
186
|
+
Flag that if:
|
187
|
+
|
188
|
+
True : Will stop the derived class from using caching (i.e. caching is off)
|
189
|
+
False: Cache
|
190
|
+
'''
|
191
|
+
if Cache._l_skip_class is None:
|
192
|
+
log.info('No class will be cached')
|
193
|
+
return True
|
194
|
+
|
195
|
+
if len(Cache._l_skip_class) == 0:
|
196
|
+
log.debug('All classes will be cached')
|
197
|
+
return False
|
198
|
+
|
199
|
+
class_name = self.__class__.__name__
|
200
|
+
|
201
|
+
skip = class_name in Cache._l_skip_class
|
202
|
+
|
203
|
+
if skip:
|
204
|
+
log.warning(f'Caching turned off for {class_name}')
|
205
|
+
else:
|
206
|
+
log.debug(f'Caching turned on for {class_name}')
|
207
|
+
|
208
|
+
return skip
|
209
|
+
# ---------------------------
|
210
|
+
def _copy_from_cache(self) -> bool:
|
211
|
+
'''
|
212
|
+
Checks if hash directory exists:
|
213
|
+
|
214
|
+
No : Returns False
|
215
|
+
Yes:
|
216
|
+
- Removes contents of `out_path`, except for .cache
|
217
|
+
- Copies the contents of `hash_dir` to `out_dir`
|
218
|
+
|
219
|
+
Returns
|
220
|
+
---------------
|
221
|
+
True if the object, cached was found, false otherwise.
|
222
|
+
'''
|
223
|
+
if self._dont_cache():
|
224
|
+
# If not copying from cache, will need to remove what is
|
225
|
+
# in the output directory, so that it gets replaced with
|
226
|
+
# new outputs
|
227
|
+
self._delete_from_output(only_links=False)
|
228
|
+
log.info('Not picking already cached outputs, remaking them')
|
229
|
+
return False
|
230
|
+
|
231
|
+
hash_dir = self._get_dir(kind='hash', make=False)
|
232
|
+
if not os.path.isdir(hash_dir):
|
233
|
+
log.debug(f'Hash directory {hash_dir} not found, not caching')
|
234
|
+
self._delete_from_output(only_links=False)
|
235
|
+
return False
|
236
|
+
|
237
|
+
self._hash_dir = hash_dir
|
238
|
+
log.debug(f'Data found in hash directory: {self._hash_dir}')
|
239
|
+
|
240
|
+
self._delete_from_output(only_links=False)
|
241
|
+
self._copy_from_hashdir()
|
242
|
+
|
243
|
+
return True
|
244
|
+
# ---------------------------
|
245
|
+
@contextmanager
|
246
|
+
@staticmethod
|
247
|
+
def turn_off_cache(val : list[str]|None):
|
248
|
+
'''
|
249
|
+
Parameters
|
250
|
+
------------------
|
251
|
+
val: List of names of classes that inherit from `Cache`.
|
252
|
+
If None, will not cache for any class.
|
253
|
+
By default this is an empty list and it will cache for every class
|
254
|
+
'''
|
255
|
+
if not isinstance(val, (NoneType, list)):
|
256
|
+
log.error('This manager expects: list[str]|None')
|
257
|
+
raise ValueError(f'Invalid value: {val}')
|
258
|
+
|
259
|
+
old_val = Cache._l_skip_class
|
260
|
+
|
261
|
+
Cache._l_skip_class = val
|
262
|
+
try:
|
263
|
+
yield
|
264
|
+
finally:
|
265
|
+
Cache._l_skip_class = old_val
|
266
|
+
# ---------------------------
|
@@ -1,89 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
|
3
|
-
# --------------------------
|
4
|
-
display_help()
|
5
|
-
{
|
6
|
-
echo "Script meant to:"
|
7
|
-
echo ""
|
8
|
-
echo "1. Check if version in pyproject.toml has been modified"
|
9
|
-
echo "2. If it has create new tag following version name"
|
10
|
-
echo "3. Push to remote "
|
11
|
-
}
|
12
|
-
# --------------------------
|
13
|
-
get_opts()
|
14
|
-
{
|
15
|
-
while getopts :hf: option; do
|
16
|
-
case "${option}" in
|
17
|
-
h)
|
18
|
-
display_help
|
19
|
-
exit 0
|
20
|
-
;;
|
21
|
-
\?) echo "Invalid option: -${OPTARG}"
|
22
|
-
display_help
|
23
|
-
exit 1
|
24
|
-
;;
|
25
|
-
:) echo "$0: Arguments needed"
|
26
|
-
display_help
|
27
|
-
exit 1
|
28
|
-
;;
|
29
|
-
esac
|
30
|
-
done
|
31
|
-
}
|
32
|
-
# --------------------------
|
33
|
-
# Picks up version from pyproject.toml
|
34
|
-
get_version()
|
35
|
-
{
|
36
|
-
if [[ ! -f pyproject.toml ]];then
|
37
|
-
echo "Cannot find pyproject.toml"
|
38
|
-
exit 1
|
39
|
-
fi
|
40
|
-
|
41
|
-
VERSION_LINE=$(grep version pyproject.toml)
|
42
|
-
|
43
|
-
if [[ $? -ne 0 ]];then
|
44
|
-
ehco "Could not extract version from pyproject.toml"
|
45
|
-
exit 1
|
46
|
-
fi
|
47
|
-
|
48
|
-
if [[ "$VERSION_LINE" =~ .*([0-9]\.[0-9]\.[0-9]).* ]];then
|
49
|
-
VERSION=${BASH_REMATCH[1]}
|
50
|
-
echo "Using version: $VERSION"
|
51
|
-
return
|
52
|
-
fi
|
53
|
-
|
54
|
-
echo "Could not extract version from: $VERSION_LINE"
|
55
|
-
exit 1
|
56
|
-
}
|
57
|
-
# --------------------------
|
58
|
-
create_tag()
|
59
|
-
{
|
60
|
-
git tag -n | grep $VERSION
|
61
|
-
|
62
|
-
if [[ $? -eq 0 ]];then
|
63
|
-
echo "Version found among tags, not tagging"
|
64
|
-
return
|
65
|
-
fi
|
66
|
-
|
67
|
-
echo "Version $VERSION not found among tags, creating new tag"
|
68
|
-
|
69
|
-
git tag -a $VERSION
|
70
|
-
}
|
71
|
-
# --------------------------
|
72
|
-
push_all()
|
73
|
-
{
|
74
|
-
for REMOTE in $(git remote);do
|
75
|
-
echo "Pushing tags and commits to remote: $REMOTE"
|
76
|
-
git add pyproject.toml
|
77
|
-
git commit -m "Publication commit"
|
78
|
-
|
79
|
-
git pull $REMOTE HEAD
|
80
|
-
git push -u $REMOTE HEAD
|
81
|
-
git push $REMOTE --tags
|
82
|
-
done
|
83
|
-
}
|
84
|
-
# --------------------------
|
85
|
-
get_opts "$@"
|
86
|
-
|
87
|
-
get_version
|
88
|
-
create_tag
|
89
|
-
push_all
|
@@ -1,69 +0,0 @@
|
|
1
|
-
data_manipulation_utilities-0.2.7.data/scripts/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
|
2
|
-
dmu/arrays/utilities.py,sha256=PKoYyybPptA2aU-V3KLnJXBudWxTXu4x1uGdIMQ49HY,1722
|
3
|
-
dmu/generic/hashing.py,sha256=Mxwr88jyGuEP5Bhqtw1gDva9fNO-l11jb2qA0ceOysw,1167
|
4
|
-
dmu/generic/utilities.py,sha256=dBvrucgdM3vyyETharLvPAC7qEbgF9cL1mLVQ0nxNJM,2020
|
5
|
-
dmu/generic/version_management.py,sha256=j0ImlAq6SVNjTh3xRsF6G7DSoyr1w8kTRY84dNriGRE,3750
|
6
|
-
dmu/logging/log_store.py,sha256=umdvjNDuV3LdezbG26b0AiyTglbvkxST19CQu9QATbA,4184
|
7
|
-
dmu/ml/cv_classifier.py,sha256=ZbzEm_jW9yoTC7k_xBA7hFpc1bDNayiVR3tbaj1_ieE,4228
|
8
|
-
dmu/ml/cv_diagnostics.py,sha256=T6wkNcuLkdLrZ9z1YtAYMeb-M_bxnSChRHtdPeQv_To,7507
|
9
|
-
dmu/ml/cv_predict.py,sha256=4wwYL_jcUExDqLJVfClxEUWSd_QAx8yKHO3rX-mx4vw,6711
|
10
|
-
dmu/ml/train_mva.py,sha256=tkdSODahXG9R77wmcckKFxYceMmc-3O-Y4pJXMe8WAI,21137
|
11
|
-
dmu/ml/utilities.py,sha256=PK_61fW7gBV9aGZyez3PI8zAT7_Fc6IlQzDB7f8iBTM,4133
|
12
|
-
dmu/pdataframe/utilities.py,sha256=kwv9PuSmccz518e5bkrASacewCkMLTSPLtLiMH3olBc,2011
|
13
|
-
dmu/plotting/fwhm.py,sha256=Vn0TWlxgNMNk_wb2CD_At-ltVmzI49fyhoJXa0CT0I4,1924
|
14
|
-
dmu/plotting/matrix.py,sha256=pXuUJn-LgOvrI9qGkZQw16BzLjOjeikYQ_ll2VIcIXU,4978
|
15
|
-
dmu/plotting/plotter.py,sha256=f3hoZF1n3G-yjHsQ_2huxqz5cqFelZGs0RiaHYjF0JM,7391
|
16
|
-
dmu/plotting/plotter_1d.py,sha256=zygY6XUk4_wmaAK-bietjYdhkA03TZyft5IYX5acoYI,9142
|
17
|
-
dmu/plotting/plotter_2d.py,sha256=mZhp3D5I-JodOnFTEF1NqHtcLtuI-2WNpCQsrsoXNtw,3017
|
18
|
-
dmu/plotting/utilities.py,sha256=SI9dvtZq2gr-PXVz71KE4o0i09rZOKgqJKD1jzf6KXk,1167
|
19
|
-
dmu/rdataframe/atr_mgr.py,sha256=FdhaQWVpsm4OOe1IRbm7rfrq8VenTNdORyI-lZ2Bs1M,2386
|
20
|
-
dmu/rdataframe/utilities.py,sha256=pNcQARMP7txMhy6k27UnDcYf0buNy5U2fshaJDl_h8o,3661
|
21
|
-
dmu/rfile/rfprinter.py,sha256=mp5jd-oCJAnuokbdmGyL9i6tK2lY72jEfROuBIZ_ums,3941
|
22
|
-
dmu/rfile/utilities.py,sha256=XuYY7HuSBj46iSu3c60UYBHtI6KIPoJU_oofuhb-be0,945
|
23
|
-
dmu/stats/fitter.py,sha256=gmSejQmoqsHmg6QPypbNb0MTfDFkSMy-DsDLKk1H3KE,18387
|
24
|
-
dmu/stats/function.py,sha256=yzi_Fvp_ASsFzbWFivIf-comquy21WoeY7is6dgY0Go,9491
|
25
|
-
dmu/stats/gof_calculator.py,sha256=4EN6OhULcztFvsAZ00rxgohJemnjtDNB5o0IBcv6kbk,4657
|
26
|
-
dmu/stats/minimizers.py,sha256=db9R2G0SOV-k0BKi6m4EyB_yp6AtZdP23_28B0315oo,7094
|
27
|
-
dmu/stats/model_factory.py,sha256=N0Q5R7GDfy2r_08xGduzydYO-ALvNTIRE1g0KKcRFyc,15872
|
28
|
-
dmu/stats/utilities.py,sha256=LQy4kd3xSXqpApcWuYfZxkGQyjowaXv2Wr1c4Bj-4ys,4523
|
29
|
-
dmu/stats/zfit_models.py,sha256=SI61KJ-OG1UAabDICU1iTh6JPKM3giR2ErDraRjkCV8,1842
|
30
|
-
dmu/stats/zfit_plotter.py,sha256=bnkqFKyxhOEb5hVv4mxh4gpg-MrGeTrCh_5QKIT9hL0,19928
|
31
|
-
dmu/testing/utilities.py,sha256=c91g_PEzIZfS3Kxj2EOh05mc5ia7vFpZM1VEhjh4DA0,4661
|
32
|
-
dmu/text/transformer.py,sha256=4lrGknbAWRm0-rxbvgzOO-eR1-9bkYk61boJUEV3cQ0,6100
|
33
|
-
dmu_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
|
-
dmu_data/ml/tests/diagnostics_from_file.yaml,sha256=quvXOPkRducnBsctyape_Rn5_aqMEpPo6nO_UweMORo,404
|
35
|
-
dmu_data/ml/tests/diagnostics_from_model.yaml,sha256=rtCQlmGS9ld2xoQJEE35nA07yfRMklEfQEW0w3gRv2A,261
|
36
|
-
dmu_data/ml/tests/diagnostics_multiple_methods.yaml,sha256=w8Fpmr7kX1Jsb_h6LL2hiuYKf5lYpckFCpYKzWetbA0,265
|
37
|
-
dmu_data/ml/tests/diagnostics_overlay.yaml,sha256=ZVOsxLL8_JQtf41n8Ct-M9Ch10xBwHK54q1fttWPDlE,866
|
38
|
-
dmu_data/ml/tests/train_mva.yaml,sha256=AFUJV5yrXIdHHzA9TzfJv3RWhvk0GUB_CFubePCzfxE,1437
|
39
|
-
dmu_data/ml/tests/train_mva_with_diagnostics.yaml,sha256=ZyKU7W_mDGVvg8xP_wQX0NtO6N_tLSnKO1CyHAaYhuk,2036
|
40
|
-
dmu_data/plotting/tests/2d.yaml,sha256=HSAtER-8CEqIGBY_jdcIdSVOHMfYPYhmgeZghTpVYh8,516
|
41
|
-
dmu_data/plotting/tests/fig_size.yaml,sha256=7ROq49nwZ1A2EbPiySmu6n3G-Jq6YAOkc3d2X3YNZv0,294
|
42
|
-
dmu_data/plotting/tests/high_stat.yaml,sha256=bLglBLCZK6ft0xMhQ5OltxE76cWsBMPMjO6GG0OkDr8,522
|
43
|
-
dmu_data/plotting/tests/legend.yaml,sha256=wGpj58ig-GOlqbWoN894zrCet2Fj9f5QtY0rig_UC-c,213
|
44
|
-
dmu_data/plotting/tests/name.yaml,sha256=mkcPAVg8wBAmlSbSRQ1bcaMl4vOS6LXMtpqQeDrrtO4,312
|
45
|
-
dmu_data/plotting/tests/no_bounds.yaml,sha256=8e1QdphBjz-suDr857DoeUC2DXiy6SE-gvkORJQYv80,257
|
46
|
-
dmu_data/plotting/tests/normalized.yaml,sha256=Y0eKtyV5pvlSxvqfsLjytYtv8xYF3HZ5WEdCJdeHGQI,193
|
47
|
-
dmu_data/plotting/tests/plug_fwhm.yaml,sha256=xl5LXc9Nt66anM-HOXAxCtlaxWNM7zzIXf1Y6U8M4Wg,449
|
48
|
-
dmu_data/plotting/tests/plug_stats.yaml,sha256=ROO8soYXBbZIFYZcGngA_K5XHgIAFCmuAGfZCJgMmd0,384
|
49
|
-
dmu_data/plotting/tests/simple.yaml,sha256=Xc59Pjfb3BKMicLVBxODVqomHFupcb5GvefKbKHCQWQ,195
|
50
|
-
dmu_data/plotting/tests/stats.yaml,sha256=fSZjoV-xPnukpCH2OAXsz_SNPjI113qzDg8Ln3spaaA,165
|
51
|
-
dmu_data/plotting/tests/styling.yaml,sha256=yUV75HewT2tFofS-5i-mU-3s4PSHcpZOZRn5R_iSRAw,218
|
52
|
-
dmu_data/plotting/tests/title.yaml,sha256=bawKp9aGpeRrHzv69BOCbFX8sq9bb3Es9tdsPTE7jIk,333
|
53
|
-
dmu_data/plotting/tests/weights.yaml,sha256=RWQ1KxbCq-uO62WJ2AoY4h5Umc37zG35s-TpKnNMABI,312
|
54
|
-
dmu_data/text/transform.toml,sha256=R-832BZalzHZ6c5gD6jtT_Hj8BCsM5vxa1v6oeiwaP4,94
|
55
|
-
dmu_data/text/transform.txt,sha256=EX760da6Vkf-_EPxnQlC5hGSkfFhJCCGCD19NU-1Qto,44
|
56
|
-
dmu_data/text/transform_set.toml,sha256=Jeh7BTz82idqvbOQJtl9-ur56mZkzDn5WtvmIb48LoE,150
|
57
|
-
dmu_data/text/transform_set.txt,sha256=1KivMoP9LxPn9955QrRmOzjEqduEjhTetQ9MXykO5LY,46
|
58
|
-
dmu_data/text/transform_trf.txt,sha256=zxBRTgcSmX7RdqfmWF88W1YqbyNHa4Ccruf1MmnYv2A,74
|
59
|
-
dmu_scripts/git/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
|
60
|
-
dmu_scripts/physics/check_truth.py,sha256=b1P_Pa9ef6VcFtyY6Y9KS9Om9L-QrCBjDKp4dqca0PQ,3964
|
61
|
-
dmu_scripts/rfile/compare_root_files.py,sha256=T8lDnQxsRNMr37x1Y7YvWD8ySHrJOWZki7ZQynxXX9Q,9540
|
62
|
-
dmu_scripts/rfile/print_trees.py,sha256=Ze4Ccl_iUldl4eVEDVnYBoe4amqBT1fSBR1zN5WSztk,941
|
63
|
-
dmu_scripts/ssh/coned.py,sha256=lhilYNHWRCGxC-jtyJ3LQ4oUgWW33B2l1tYCcyHHsR0,4858
|
64
|
-
dmu_scripts/text/transform_text.py,sha256=9akj1LB0HAyopOvkLjNOJiptZw5XoOQLe17SlcrGMD0,1456
|
65
|
-
data_manipulation_utilities-0.2.7.dist-info/METADATA,sha256=aaUGi8PUMDuJCdslarv5Rn10iI-Jvbu4WkWbEOwmtzw,34464
|
66
|
-
data_manipulation_utilities-0.2.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
67
|
-
data_manipulation_utilities-0.2.7.dist-info/entry_points.txt,sha256=1TIZDed651KuOH-DgaN5AoBdirKmrKE_oM1b6b7zTUU,270
|
68
|
-
data_manipulation_utilities-0.2.7.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
|
69
|
-
data_manipulation_utilities-0.2.7.dist-info/RECORD,,
|
@@ -1,6 +0,0 @@
|
|
1
|
-
[console_scripts]
|
2
|
-
check_truth = dmu_scripts.physics.check_truth:main
|
3
|
-
compare_root_files = dmu_scripts.rfile.compare_root_files:main
|
4
|
-
coned = dmu_scripts.ssh.coned:main
|
5
|
-
print_trees = dmu_scripts.rfile.print_trees:main
|
6
|
-
transform_text = dmu_scripts.text.transform_text:main
|
@@ -1,13 +0,0 @@
|
|
1
|
-
output : /tmp/tests/dmu/ml/cv_diagnostics/from_rdf
|
2
|
-
# Will assume that the target is already in the input dataframe
|
3
|
-
# and will use it, instead of evaluating models
|
4
|
-
score_from_rdf : w
|
5
|
-
correlations:
|
6
|
-
# Variables with respect to which the correlations with the features will be measured
|
7
|
-
target :
|
8
|
-
name : z
|
9
|
-
methods:
|
10
|
-
- Pearson
|
11
|
-
figure:
|
12
|
-
title: Scores from file
|
13
|
-
size : [10, 8]
|
@@ -1,33 +0,0 @@
|
|
1
|
-
output : /tmp/tests/dmu/ml/cv_diagnostics/overlay
|
2
|
-
# Will assume that the target is already in the input dataframe
|
3
|
-
# and will use it, instead of evaluating models
|
4
|
-
score_from_rdf : w
|
5
|
-
correlations:
|
6
|
-
# Variables with respect to which the correlations with the features will be measured
|
7
|
-
target :
|
8
|
-
name : z
|
9
|
-
overlay :
|
10
|
-
wp :
|
11
|
-
- 0.2
|
12
|
-
- 0.5
|
13
|
-
- 0.7
|
14
|
-
- 0.9
|
15
|
-
general:
|
16
|
-
size : [12, 10]
|
17
|
-
saving:
|
18
|
-
plt_dir : /tmp/tests/dmu/ml/cv_diagnostics/overlay
|
19
|
-
plots:
|
20
|
-
z :
|
21
|
-
binning : [-4, 4, 10]
|
22
|
-
yscale : 'linear'
|
23
|
-
labels : ['$z$', 'Entries']
|
24
|
-
normalized : true
|
25
|
-
styling :
|
26
|
-
linestyle: '-'
|
27
|
-
methods:
|
28
|
-
- Pearson
|
29
|
-
- Kendall-$\tau$
|
30
|
-
figure:
|
31
|
-
title : Scores from file
|
32
|
-
size : [12, 10]
|
33
|
-
xlabelsize: 30
|
dmu_data/ml/tests/train_mva.yaml
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
dataset:
|
2
|
-
define :
|
3
|
-
r : z + x
|
4
|
-
nan :
|
5
|
-
x : -3
|
6
|
-
y : -3
|
7
|
-
training :
|
8
|
-
nfold : 3
|
9
|
-
features : [x, y, r]
|
10
|
-
rdm_stat : 1
|
11
|
-
hyper :
|
12
|
-
loss : log_loss
|
13
|
-
n_estimators : 100
|
14
|
-
max_depth : 3
|
15
|
-
learning_rate : 0.1
|
16
|
-
min_samples_split : 2
|
17
|
-
saving:
|
18
|
-
path : '/tmp/tests/dmu/ml/train_mva/model.pkl'
|
19
|
-
plotting:
|
20
|
-
roc :
|
21
|
-
min : [0.0, 0.0]
|
22
|
-
max : [1.2, 1.2]
|
23
|
-
annotate:
|
24
|
-
sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
|
25
|
-
form : '{:.2f}'
|
26
|
-
color: 'green'
|
27
|
-
xoff : -15
|
28
|
-
yoff : -15
|
29
|
-
size : 10
|
30
|
-
correlation:
|
31
|
-
title : 'Correlation matrix'
|
32
|
-
size : [10, 10]
|
33
|
-
mask_value : 0
|
34
|
-
val_dir : '/tmp/tests/dmu/ml/train_mva'
|
35
|
-
features:
|
36
|
-
saving:
|
37
|
-
plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
|
38
|
-
plots:
|
39
|
-
r :
|
40
|
-
binning : [-6, 6, 100]
|
41
|
-
yscale : 'linear'
|
42
|
-
labels : ['$r$', '']
|
43
|
-
w :
|
44
|
-
binning : [-4, 4, 100]
|
45
|
-
yscale : 'linear'
|
46
|
-
labels : ['$w$', '']
|
47
|
-
x :
|
48
|
-
binning : [-4, 4, 100]
|
49
|
-
yscale : 'linear'
|
50
|
-
labels : ['$x$', '']
|
51
|
-
y :
|
52
|
-
binning : [-4, 4, 100]
|
53
|
-
yscale : 'linear'
|
54
|
-
labels : ['$y$', '']
|
55
|
-
z :
|
56
|
-
binning : [-4, 4, 100]
|
57
|
-
yscale : 'linear'
|
58
|
-
labels : ['$z$', '']
|
@@ -1,82 +0,0 @@
|
|
1
|
-
dataset:
|
2
|
-
define :
|
3
|
-
r : z + x
|
4
|
-
nan :
|
5
|
-
x : -3
|
6
|
-
y : -3
|
7
|
-
training :
|
8
|
-
nfold : 3
|
9
|
-
features : [x, y, r]
|
10
|
-
rdm_stat : 1
|
11
|
-
hyper :
|
12
|
-
loss : log_loss
|
13
|
-
n_estimators : 100
|
14
|
-
max_depth : 3
|
15
|
-
learning_rate : 0.1
|
16
|
-
min_samples_split : 2
|
17
|
-
saving:
|
18
|
-
path : '/tmp/tests/dmu/ml/train_mva/model.pkl'
|
19
|
-
plotting:
|
20
|
-
roc :
|
21
|
-
min : [0.0, 0.0]
|
22
|
-
max : [1.2, 1.2]
|
23
|
-
annotate:
|
24
|
-
sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
|
25
|
-
form : '{:.2f}'
|
26
|
-
color: 'green'
|
27
|
-
xoff : -15
|
28
|
-
yoff : -15
|
29
|
-
size : 10
|
30
|
-
correlation:
|
31
|
-
title : 'Correlation matrix'
|
32
|
-
size : [10, 10]
|
33
|
-
mask_value : 0
|
34
|
-
val_dir : '/tmp/tests/dmu/ml/train_mva'
|
35
|
-
features:
|
36
|
-
saving:
|
37
|
-
plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
|
38
|
-
plots:
|
39
|
-
r :
|
40
|
-
binning : [-6, 6, 100]
|
41
|
-
yscale : 'linear'
|
42
|
-
labels : ['$r$', '']
|
43
|
-
w :
|
44
|
-
binning : [-4, 4, 100]
|
45
|
-
yscale : 'linear'
|
46
|
-
labels : ['$w$', '']
|
47
|
-
x :
|
48
|
-
binning : [-4, 4, 100]
|
49
|
-
yscale : 'linear'
|
50
|
-
labels : ['$x$', '']
|
51
|
-
y :
|
52
|
-
binning : [-4, 4, 100]
|
53
|
-
yscale : 'linear'
|
54
|
-
labels : ['$y$', '']
|
55
|
-
z :
|
56
|
-
binning : [-4, 4, 100]
|
57
|
-
yscale : 'linear'
|
58
|
-
labels : ['$z$', '']
|
59
|
-
diagnostics:
|
60
|
-
output : /tmp/tests/dmu/ml/train_mva/diagnostics
|
61
|
-
correlations:
|
62
|
-
target :
|
63
|
-
name : z
|
64
|
-
overlay :
|
65
|
-
general:
|
66
|
-
size : [20, 10]
|
67
|
-
saving:
|
68
|
-
plt_dir : /tmp/tests/dmu/ml/train_mva/diagnostics
|
69
|
-
plots:
|
70
|
-
z :
|
71
|
-
binning : [-4, +4, 30]
|
72
|
-
yscale : 'linear'
|
73
|
-
labels : ['z', 'Entries']
|
74
|
-
normalized : true
|
75
|
-
styling :
|
76
|
-
linestyle: '-'
|
77
|
-
methods:
|
78
|
-
- Pearson
|
79
|
-
- Kendall-$\tau$
|
80
|
-
figure:
|
81
|
-
title: Training diagnostics
|
82
|
-
size : [10, 8]
|
dmu_data/plotting/tests/2d.yaml
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
saving:
|
2
|
-
plt_dir : /tmp/dmu/tests/plotting/2d_weighted
|
3
|
-
selection:
|
4
|
-
cuts:
|
5
|
-
xlow : x > -1.5
|
6
|
-
definitions:
|
7
|
-
z : x + y
|
8
|
-
general:
|
9
|
-
size : [20, 10]
|
10
|
-
plots_2d:
|
11
|
-
- [x, y, weights, 'xy_wgt', false]
|
12
|
-
- [x, y, null, 'xy_raw', false]
|
13
|
-
- [x, z, null, 'xz_raw', false]
|
14
|
-
- [x, z, null, 'xz_log', true]
|
15
|
-
axes:
|
16
|
-
x :
|
17
|
-
binning : [-3.0, 3.0, 40]
|
18
|
-
label : 'x'
|
19
|
-
y :
|
20
|
-
binning : [-5.0, 8.0, 40]
|
21
|
-
label : 'y'
|
22
|
-
z :
|
23
|
-
binning : [-5.0, 16.0, 40]
|
24
|
-
label : 'z'
|