data-manipulation-utilities 0.2.8.dev714__py3-none-any.whl → 0.2.8.dev720__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {data_manipulation_utilities-0.2.8.dev714.dist-info → data_manipulation_utilities-0.2.8.dev720.dist-info}/METADATA +33 -3
  2. data_manipulation_utilities-0.2.8.dev720.dist-info/RECORD +45 -0
  3. {data_manipulation_utilities-0.2.8.dev714.dist-info → data_manipulation_utilities-0.2.8.dev720.dist-info}/WHEEL +1 -2
  4. data_manipulation_utilities-0.2.8.dev720.dist-info/entry_points.txt +8 -0
  5. data_manipulation_utilities-0.2.8.dev714.data/scripts/publish +0 -89
  6. data_manipulation_utilities-0.2.8.dev714.dist-info/RECORD +0 -93
  7. data_manipulation_utilities-0.2.8.dev714.dist-info/entry_points.txt +0 -7
  8. data_manipulation_utilities-0.2.8.dev714.dist-info/top_level.txt +0 -3
  9. dmu_data/__init__.py +0 -0
  10. dmu_data/ml/tests/diagnostics_from_file.yaml +0 -13
  11. dmu_data/ml/tests/diagnostics_from_model.yaml +0 -10
  12. dmu_data/ml/tests/diagnostics_multiple_methods.yaml +0 -10
  13. dmu_data/ml/tests/diagnostics_overlay.yaml +0 -33
  14. dmu_data/ml/tests/train_mva.yaml +0 -60
  15. dmu_data/ml/tests/train_mva_def.yaml +0 -75
  16. dmu_data/ml/tests/train_mva_with_diagnostics.yaml +0 -87
  17. dmu_data/ml/tests/train_mva_with_preffix.yaml +0 -58
  18. dmu_data/plotting/tests/2d.yaml +0 -24
  19. dmu_data/plotting/tests/fig_size.yaml +0 -13
  20. dmu_data/plotting/tests/high_stat.yaml +0 -22
  21. dmu_data/plotting/tests/legend.yaml +0 -12
  22. dmu_data/plotting/tests/line.yaml +0 -15
  23. dmu_data/plotting/tests/name.yaml +0 -14
  24. dmu_data/plotting/tests/no_bounds.yaml +0 -12
  25. dmu_data/plotting/tests/normalized.yaml +0 -9
  26. dmu_data/plotting/tests/plug_fwhm.yaml +0 -24
  27. dmu_data/plotting/tests/plug_stats.yaml +0 -19
  28. dmu_data/plotting/tests/simple.yaml +0 -9
  29. dmu_data/plotting/tests/stats.yaml +0 -9
  30. dmu_data/plotting/tests/styling.yaml +0 -18
  31. dmu_data/plotting/tests/title.yaml +0 -14
  32. dmu_data/plotting/tests/weights.yaml +0 -13
  33. dmu_data/rfile/friends.yaml +0 -13
  34. dmu_data/stats/fitter/test_simple.yaml +0 -28
  35. dmu_data/stats/kde_optimizer/control.json +0 -1
  36. dmu_data/stats/kde_optimizer/signal.json +0 -1
  37. dmu_data/stats/parameters/data.yaml +0 -178
  38. dmu_data/tests/config.json +0 -6
  39. dmu_data/tests/config.yaml +0 -4
  40. dmu_data/tests/pdf_to_tex.txt +0 -34
  41. dmu_data/text/transform.toml +0 -4
  42. dmu_data/text/transform.txt +0 -6
  43. dmu_data/text/transform_set.toml +0 -8
  44. dmu_data/text/transform_set.txt +0 -6
  45. dmu_data/text/transform_trf.txt +0 -12
  46. dmu_scripts/git/publish +0 -89
  47. dmu_scripts/kerberos/check_expiration +0 -21
  48. dmu_scripts/kerberos/convert_certificate +0 -22
  49. dmu_scripts/ml/compare_classifiers.py +0 -85
  50. dmu_scripts/physics/check_truth.py +0 -121
  51. dmu_scripts/rfile/compare_root_files.py +0 -299
  52. dmu_scripts/rfile/print_trees.py +0 -35
  53. dmu_scripts/ssh/coned.py +0 -168
  54. dmu_scripts/text/transform_text.py +0 -46
@@ -1,299 +0,0 @@
1
- '''
2
- Script used to compare ROOT files
3
- '''
4
-
5
- import re
6
- import os
7
- from dataclasses import dataclass
8
- from typing import ClassVar
9
-
10
- import argparse
11
-
12
- import yaml
13
- import numpy
14
- from dmu.logging.log_store import LogStore
15
-
16
- from ROOT import TFile, TTree, RDataFrame
17
-
18
- import dmu.rfile.utilities as rfut
19
-
20
-
21
- log=LogStore.add_logger('rx_scripts:compare_files')
22
- #------------------
23
- @dataclass
24
- class Data:
25
- '''
26
- Class used to store shared attributes
27
- '''
28
- max_entries : int
29
- max_trees : int
30
- l_exclude : list[str]
31
- raise_if_diff : bool
32
- file_name_1 : str
33
- file_name_2 : str
34
-
35
- d_summary : ClassVar[dict]= {}
36
- #------------------
37
- def _print_trees_difference(l_val_1 : list[str], l_val_2 : list[str]) -> None:
38
- s_val_1 = set(l_val_1)
39
- s_val_2 = set(l_val_2)
40
-
41
- s_only_1 = s_val_1 - s_val_2
42
- s_only_2 = s_val_2 - s_val_1
43
-
44
- Data.d_summary[f'Trees only in {Data.file_name_1}'] = list(s_only_1)
45
- Data.d_summary[f'Trees only in {Data.file_name_2}'] = list(s_only_2)
46
-
47
- nonly_1 = len(s_only_1)
48
- nonly_2 = len(s_only_2)
49
-
50
- if nonly_1 > 0:
51
- log.info(f'Found {nonly_1} trees in first file but not second')
52
- for name in s_only_1:
53
- log.info(f'{"":<4}{name:<20}')
54
-
55
- if nonly_2 > 0:
56
- log.info(f'Found {nonly_2} trees in second file but not first')
57
- for name in s_only_2:
58
- log.info(f'{"":<4}{name:<20}')
59
- #------------------
60
- def _check_trees(d_tree_1 : dict[str, TTree], d_tree_2 : dict[str, TTree]):
61
- '''
62
- Check if dictionaries have same trees
63
- For corresponding trees, check if number of entries is the same
64
- '''
65
- l_treename_1 = list(d_tree_1.keys())
66
- l_treename_2 = list(d_tree_2.keys())
67
-
68
- if l_treename_1 != l_treename_2:
69
- log.warning('Files contain different trees')
70
- _print_trees_difference(l_treename_1, l_treename_2)
71
-
72
- s_treename_1 = set(l_treename_1)
73
- s_treename_2 = set(l_treename_2)
74
- s_treename = s_treename_1 & s_treename_2
75
-
76
- for treename in s_treename:
77
- if treename in Data.l_exclude:
78
- continue
79
-
80
- tree_1 = d_tree_1[treename]
81
- tree_2 = d_tree_2[treename]
82
-
83
- entries_1 = tree_1.GetEntries()
84
- entries_2 = tree_2.GetEntries()
85
-
86
- if entries_1 != entries_2:
87
- raise ValueError(f'Tree {treename} differs in entries {entries_1}/{entries_2}')
88
-
89
- return list(s_treename)
90
- #------------------
91
- def _get_data(tree : TTree) -> dict[str, numpy.ndarray]:
92
- rdf = RDataFrame(tree)
93
- if Data.max_entries > 0:
94
- log.warning(f'Limiting to {Data.max_entries} entries')
95
- rdf = rdf.Range(Data.max_entries)
96
-
97
- d_data = rdf.AsNumpy(exclude=[])
98
-
99
- return d_data
100
- #------------------
101
- def _check_branches(tree_name : str, l_branch_1 : list[str], l_branch_2 : list[str]) -> None:
102
- '''
103
- Takes lists of branch names
104
- Checks if they are the same, if not print differences
105
-
106
- if raise_if_diff is True, will raise exception if branches are not the same
107
- '''
108
- if l_branch_1 == l_branch_2:
109
- return
110
-
111
- s_branch_1 = set(l_branch_1)
112
- s_branch_2 = set(l_branch_2)
113
-
114
- s_branch_1_m_2 = s_branch_1.difference(s_branch_2)
115
- log.info(f'Found len({s_branch_1_m_2}) branches in first tree but not second')
116
- for branch_name in s_branch_1_m_2:
117
- log.debug(f'{"":<4}{branch_name:<20}')
118
-
119
- s_branch_2_m_1 = s_branch_2.difference(s_branch_1)
120
- log.info(f'Found len({s_branch_2_m_1}) branches in second tree but not first')
121
- for branch_name in s_branch_2_m_1:
122
- log.debug(f'{"":<4}{branch_name:<20}')
123
-
124
- Data.d_summary[tree_name] = {
125
- f'Only {Data.file_name_1}' : list(s_branch_1_m_2),
126
- f'Only {Data.file_name_2}' : list(s_branch_2_m_1),
127
- }
128
-
129
- if Data.raise_if_diff:
130
- raise ValueError('Branches differ')
131
- #------------------
132
- def _compare_branches(tree_name : str, d_data_1 : dict[str, list], d_data_2 : dict[str, list]) -> list[str]:
133
- '''
134
- Will check for different branches in trees
135
- Will return list of branch names for common branches
136
- '''
137
- l_branch_1 = list(d_data_1.keys())
138
- l_branch_2 = list(d_data_2.keys())
139
-
140
- l_branch_1.sort()
141
- l_branch_2.sort()
142
- _check_branches(tree_name, l_branch_1, l_branch_2)
143
-
144
- s_branch_1 = set(l_branch_1)
145
- s_branch_2 = set(l_branch_2)
146
-
147
- s_branch = s_branch_1.intersection(s_branch_2)
148
-
149
- return list(s_branch)
150
- #------------------
151
- def _compare(tree_name : str, d_data_1, d_data_2) -> None:
152
- log.info('')
153
- log.debug('Comparing branches')
154
- l_branch_name = _compare_branches(tree_name, d_data_1, d_data_2)
155
-
156
- log.debug('Comparing contents of branches')
157
- l_diff_branch = []
158
- for branch_name in l_branch_name:
159
- arr_val_1 = d_data_1[branch_name]
160
- arr_val_2 = d_data_2[branch_name]
161
-
162
- if _contents_differ(tree_name, branch_name, arr_val_1, arr_val_2):
163
- l_diff_branch.append(branch_name)
164
-
165
- ndiff = len(l_diff_branch)
166
- ntot = len(l_branch_name)
167
-
168
- Data.d_summary[f'Branches that differ for tree: {tree_name}'] = l_diff_branch
169
-
170
- if ndiff == 0:
171
- log.debug(f'Trees {tree_name} have same contents')
172
- return
173
-
174
- log.warning(f'{ndiff:<10}{"differing branches out of":<20}{ntot:<10}{"in":<10}{tree_name:<50}')
175
- for branch_name in l_diff_branch:
176
- log.debug(f'{"":<4}{branch_name:<20}')
177
- #------------------
178
- def _contents_differ(tree_name : str, branch_name : str, arr_val_1 : numpy.ndarray, arr_val_2 : numpy.ndarray) -> bool:
179
- is_different = False
180
- str_type = str(arr_val_1.dtype)
181
- if str_type == 'object':
182
- return is_different
183
-
184
- if str_type not in ['bool', 'int32', 'uint32', 'uint64', 'float64', 'float32']:
185
- log.info(f'Skipping {branch_name}, {str_type}')
186
- return is_different
187
-
188
- if not numpy.array_equal(arr_val_1, arr_val_2):
189
- is_different = True
190
-
191
- log.debug(20 * '-')
192
- log.debug(f'Branch {branch_name} in tree {tree_name} differ')
193
- log.debug(20 * '-')
194
- log.debug(arr_val_1)
195
- log.debug(arr_val_2)
196
- log.debug(20 * '-')
197
-
198
- return is_different
199
- #------------------
200
- def _update_keys(d_tree):
201
- d_out = {}
202
-
203
- for key, val in d_tree.items():
204
- #Remove everything before .root/ and use it as new key
205
- new_key = re.sub(r'^.*\.root/', '', key)
206
- d_out[new_key] = val
207
-
208
- return d_out
209
- #------------------
210
- def _check_file_existence(path : str) -> None:
211
- if not os.path.isfile(path):
212
- raise FileNotFoundError(f'Cannot find {path}')
213
- #------------------
214
- def _validate(file_1 : str, file_2 : str) -> None:
215
- _check_file_existence(file_1)
216
- _check_file_existence(file_2)
217
-
218
- ifile_1 = TFile(file_1)
219
- ifile_2 = TFile(file_2)
220
-
221
- d_tree_1 = rfut.get_trees_from_file(ifile_1)
222
- d_tree_1 = _update_keys(d_tree_1)
223
-
224
- d_tree_2 = rfut.get_trees_from_file(ifile_2)
225
- d_tree_2 = _update_keys(d_tree_2)
226
-
227
- l_tree_name = _check_trees(d_tree_1, d_tree_2)
228
-
229
- if Data.max_trees > -1:
230
- log.warning(f'Limiting to {Data.max_trees} trees')
231
- l_tree_name = l_tree_name[:Data.max_trees]
232
-
233
- ncommon = len(l_tree_name)
234
- log.debug(f'Found common {ncommon} trees')
235
- for name in l_tree_name:
236
- log.debug(f'{"":<4}{name}')
237
-
238
- log.info('Checking trees')
239
- for treename in l_tree_name:
240
- if treename in Data.l_exclude:
241
- log.debug(f'Skipping {treename}')
242
- continue
243
-
244
- log.debug(f'{"":<4}{treename}')
245
-
246
- tree_1 = d_tree_1[treename]
247
- tree_2 = d_tree_2[treename]
248
-
249
- log.debug('Getting data from reference')
250
- d_data_1= _get_data(tree_1)
251
-
252
- log.debug('Getting data from new')
253
- d_data_2= _get_data(tree_2)
254
-
255
- log.debug(f'Comparing {treename}')
256
- _compare(treename, d_data_1, d_data_2)
257
-
258
- ifile_1.Close()
259
- ifile_2.Close()
260
- #------------------
261
- def _save_summary() -> None:
262
- '''
263
- Saves Data.d_summary to summary.yaml
264
- '''
265
-
266
- with open('summary.yaml', 'w', encoding='utf-8') as ofile:
267
- yaml.dump(Data.d_summary, ofile, indent=2, default_flow_style=False)
268
- #------------------
269
- def main():
270
- '''
271
- Script starts here
272
- '''
273
- parser = argparse.ArgumentParser(description='Used to validate versions of code that produce potentially different files')
274
- parser.add_argument('-f', '--files' , nargs= 2, help='List of files to compare')
275
- parser.add_argument('-n', '--max_entries' , type=int , help='Limit running over this number of entries. By default will run over everything', default=-1)
276
- parser.add_argument('-t', '--max_trees' , type=int , help='Limit running over this number of trees. By default will run over everything' , default=-1)
277
- parser.add_argument('-l', '--log_level' , type=int , help='Logging level' , default=20, choices=[10, 20, 30, 40])
278
- parser.add_argument('-e', '--exclude' , nargs='+', help='List of trees that should not be compared' , default=[], )
279
- parser.add_argument('-r', '--raise_if_diff' , help='If used, will fail as soon as it finds trees with different branches.', action='store_true')
280
-
281
- args = parser.parse_args()
282
-
283
- LogStore.set_level('rx_scripts:compare_files', args.log_level)
284
-
285
- Data.max_entries = args.max_entries
286
- Data.max_trees = args.max_trees
287
- Data.l_exclude = args.exclude
288
- Data.raise_if_diff = args.raise_if_diff
289
-
290
- [file_1, file_2] = args.files
291
-
292
- Data.file_name_1 = file_1
293
- Data.file_name_2 = file_2
294
-
295
- _validate(file_1, file_2)
296
- _save_summary()
297
- #------------------
298
- if __name__ == '__main__':
299
- main()
@@ -1,35 +0,0 @@
1
- '''
2
- Script used to print contents of root files
3
- '''
4
-
5
- import argparse
6
-
7
- from dmu.rfile.rfprinter import RFPrinter
8
-
9
- # -----------------------------
10
- class Data:
11
- '''
12
- Data class holding shared attributes
13
- '''
14
- path : str
15
- screen : bool
16
- # -----------------------------
17
- def _get_args():
18
- parser = argparse.ArgumentParser(description='Script used to print information about ROOT files and dump it to text')
19
- parser.add_argument('-p', '--path' , type=str, help='Path to ROOT file')
20
- parser.add_argument('-s', '--screen', help='If used, will dump output to screen', action='store_true')
21
- args = parser.parse_args()
22
-
23
- Data.path = args.path
24
- Data.screen= args.screen
25
- # -----------------------------
26
- def main():
27
- '''
28
- Execution starts here
29
- '''
30
- _get_args()
31
- prt = RFPrinter(path = Data.path)
32
- prt.save(to_screen = Data.screen)
33
- # -----------------------------
34
- if __name__ == '__main__':
35
- main()
dmu_scripts/ssh/coned.py DELETED
@@ -1,168 +0,0 @@
1
- '''
2
- Script used to implement connection to servers
3
- '''
4
-
5
- import os
6
- import copy
7
- import argparse
8
-
9
- import yaml
10
- from dmu.logging.log_store import LogStore
11
-
12
- log = LogStore.add_logger('dmu:scripts:coned')
13
- #---------------------------------------
14
- class Data:
15
- '''
16
- Class used to store shared data
17
- '''
18
- logl : int
19
- dry : bool
20
- prnt : bool
21
- cfg : dict
22
- l_ad : list[str]
23
- l_rm : list[str]
24
- #----------------------------
25
- def _print_configs():
26
- '''
27
- Prints configuration
28
- '''
29
-
30
- yaml_output = yaml.dump(Data.cfg, default_flow_style=False)
31
- print(yaml_output)
32
- #----------------------------
33
- def _initialize():
34
- _load_config()
35
-
36
- LogStore.set_level('dmu:scripts:coned', Data.logl)
37
-
38
- log.debug(f'Running at {Data.logl} logging level')
39
- #----------------------------
40
- def _get_args():
41
- '''
42
- Will parse arguments
43
- '''
44
- parser = argparse.ArgumentParser(description='Used to edit and print server list specified by ~/.config/connect/servers.yaml')
45
- parser.add_argument('-p', '--print' , help ='Prints config settings and exits', action='store_true')
46
- parser.add_argument('-l', '--log_lvl', type =int, help='Logging level', default=20, choices=[10,20,30])
47
- parser.add_argument('-a', '--add' , nargs=3 , help='Adds task to given server, e.g. task 123 server' , default=[])
48
- parser.add_argument('-r', '--rem' , nargs=3 , help='Removes task from given server, e.g. task 123 server', default=[])
49
- parser.add_argument('-d', '--dry' , help='Run dry run, for adding and removing entries', action='store_true')
50
- args = parser.parse_args()
51
-
52
- Data.prnt = args.print
53
- Data.logl = args.log_lvl
54
- Data.l_ad = args.add
55
- Data.l_rm = args.rem
56
- Data.dry = args.dry
57
- #---------------------------------------
58
- def _load_config():
59
- home_dir = os.environ['HOME']
60
- config_path = f'{home_dir}/.config/dmu/ssh/servers.yaml'
61
- if not os.path.isfile(config_path):
62
- raise FileNotFoundError(f'Config not found: {config_path}')
63
-
64
- with open(config_path, encoding='utf-8') as ifile:
65
- Data.cfg = yaml.safe_load(ifile)
66
- #---------------------------------------
67
- def _dump_config(cfg : dict):
68
- if cfg == Data.cfg:
69
- log.debug('Config was not modified, will not save it')
70
- return
71
-
72
- home_dir = os.environ['HOME']
73
- config_path = f'{home_dir}/.config/dmu/ssh/servers.yaml'
74
- if not os.path.isfile(config_path):
75
- raise FileNotFoundError(f'Config not found: {config_path}')
76
-
77
- if Data.dry:
78
- content = yaml.dump(cfg, default_flow_style=False)
79
- print(content)
80
- return
81
-
82
- with open(config_path, 'w', encoding='utf-8') as ofile:
83
- yaml.dump(cfg, ofile, default_flow_style=False)
84
- #---------------------------------------
85
- def _get_updated_config() -> dict:
86
- log.debug('Getting updated config')
87
-
88
- cfg = copy.deepcopy(Data.cfg)
89
- cfg = _add_task(cfg)
90
- cfg = _remove_task(cfg)
91
-
92
- return cfg
93
- #---------------------------------------
94
- def _add_task(cfg : dict) -> dict:
95
- if len(Data.l_ad) == 0:
96
- log.debug('No task added')
97
- return cfg
98
-
99
- [task, machine, server] = Data.l_ad
100
- if server not in cfg:
101
- cfg[server] = {}
102
-
103
- if machine not in cfg[server]:
104
- cfg[server][machine] = []
105
-
106
- cfg[server][machine].append(task)
107
-
108
- log.info(f'{"Added":<10}{server:<20}{machine:<10}{task:<20}')
109
-
110
- return cfg
111
- #---------------------------------------
112
- def _remove_task(cfg : dict) -> dict:
113
- if len(Data.l_rm) == 0:
114
- log.debug('No task removed')
115
- return cfg
116
-
117
- [task, machine, server] = Data.l_rm
118
- if server not in cfg:
119
- log.warning(f'Server {server} not found')
120
- return cfg
121
-
122
- if machine not in cfg[server]:
123
- log.warning(f'Machine {machine} not found in server {server}')
124
- return cfg
125
-
126
- l_task = cfg[server][machine]
127
- if task not in l_task:
128
- log.warning(f'Task {task} not found in {server}:{machine}')
129
- return cfg
130
-
131
- index = l_task.index(task)
132
- del l_task[index]
133
- cfg[server][machine] = l_task
134
-
135
- log.info(f'{"Removed":<10}{server:<20}{machine:<10}{task:<20}')
136
-
137
- cfg = _trim_config(cfg, machine, server)
138
-
139
- return cfg
140
- #---------------------------------------
141
- def _trim_config(cfg : dict, machine : str, server : str) -> dict:
142
- if cfg[server][machine] == []:
143
- log.debug(f'Trimming {server}:{machine}')
144
- del cfg[server][machine]
145
-
146
- if cfg[server] == {}:
147
- log.debug(f'Trimming {server}')
148
- del cfg[server]
149
-
150
- return cfg
151
- #---------------------------------------
152
- def main():
153
- '''
154
- Starts here
155
- '''
156
- _get_args()
157
- _initialize()
158
-
159
- if Data.prnt:
160
- log.debug('Printing and returning')
161
- _print_configs()
162
- return
163
-
164
- cfg = _get_updated_config()
165
- _dump_config(cfg)
166
- #---------------------------------------
167
- if __name__ == '__main__':
168
- main()
@@ -1,46 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- from dmu.text.transformer import transformer as txt_trf
4
-
5
- import argparse
6
- import logging
7
-
8
- log = logging.getLogger('dmu_scripts:text:transformer')
9
- #---------------------------------
10
- class data:
11
- txt = None
12
- out = None
13
- cfg = None
14
- lvl = None
15
- #---------------------------------
16
- def get_args():
17
- parser=argparse.ArgumentParser(description='Will transform a text file following a set of rules')
18
- parser.add_argument('-i', '--input' , type=str, help='Path to input file' , required=True)
19
- parser.add_argument('-o', '--output', type=str, help='Path to output file, if not passed, it will be same as input, but with trf before extension')
20
- parser.add_argument('-c', '--config', type=str, help='Path to config file', required=True)
21
- parser.add_argument('-l', '--loglvl', type=int, help='Log level' , default=20, choices=[10, 20, 30, 40])
22
- args = parser.parse_args()
23
-
24
- data.txt = args.input
25
- data.out = args.output
26
- data.cfg = args.config
27
- data.lvl = args.loglvl
28
- #---------------------------------
29
- def set_logs():
30
- logging.basicConfig()
31
-
32
- log_tr = logging.getLogger('dmu:text:transformer')
33
-
34
- log_tr.setLevel(data.lvl)
35
- log.setLevel(data.lvl)
36
- #---------------------------------
37
- def main():
38
- get_args()
39
- set_logs()
40
-
41
- trf = txt_trf(txt_path=data.txt, cfg_path=data.cfg)
42
- trf.save_as(data.out)
43
- #---------------------------------
44
- if __name__ == '__main__':
45
- main()
46
-