data-manipulation-utilities 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: data_manipulation_utilities
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: logzero
6
6
  Requires-Dist: PyYAML
@@ -1,7 +1,7 @@
1
- data_manipulation_utilities-0.1.4.data/scripts/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
1
+ data_manipulation_utilities-0.1.6.data/scripts/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
2
2
  dmu/arrays/utilities.py,sha256=PKoYyybPptA2aU-V3KLnJXBudWxTXu4x1uGdIMQ49HY,1722
3
3
  dmu/generic/utilities.py,sha256=0Xnq9t35wuebAqKxbyAiMk1ISB7IcXK4cFH25MT1fgw,1741
4
- dmu/logging/log_store.py,sha256=v0tiNz-6ktT_afD5DuvCZ8Nmr82JKQOPli8hgd28P1Q,3960
4
+ dmu/logging/log_store.py,sha256=umdvjNDuV3LdezbG26b0AiyTglbvkxST19CQu9QATbA,4184
5
5
  dmu/ml/cv_classifier.py,sha256=n81m7i2M6Zq96AEd9EZGwXSrbG5m9jkS5RdeXvbsAXU,3712
6
6
  dmu/ml/cv_predict.py,sha256=Bqxu-f6qquKJokFljhCzL_kiGcjLJLQFhVBD130fsyw,4893
7
7
  dmu/ml/train_mva.py,sha256=d_n-A07DFweikz5nXap4OE_Mqx8VprFT7zbxmnQAbac,9638
@@ -10,8 +10,8 @@ dmu/plotting/plotter.py,sha256=laa6Kl7P-ZOIhaOFBVjOH4XQ4kPCV7wBNvLIMBnyCwM,7181
10
10
  dmu/plotting/plotter_1d.py,sha256=G-i94uzm2TjNaog1A4agAKar_G0qNdkAqIPCmzhe85Y,3660
11
11
  dmu/plotting/plotter_2d.py,sha256=SWPKns-CfpUZHgBXvwm3gceH3k2eL_mKGXQ8sWpZJB0,2919
12
12
  dmu/rdataframe/atr_mgr.py,sha256=FdhaQWVpsm4OOe1IRbm7rfrq8VenTNdORyI-lZ2Bs1M,2386
13
- dmu/rdataframe/utilities.py,sha256=a31PdUz12sC2bx78LK6gvACh1M_eFaIVwuZEvOTcvcc,2084
14
- dmu/rfile/rfprinter.py,sha256=vGdqyHT_GwGBhrY7KG63EAUGWEOqobz_5yTL6goXbfk,2722
13
+ dmu/rdataframe/utilities.py,sha256=x8r379F2-vZPYzAdMFCn_V4Kx2Tx9t9pn_QHcZ1euew,2756
14
+ dmu/rfile/rfprinter.py,sha256=mp5jd-oCJAnuokbdmGyL9i6tK2lY72jEfROuBIZ_ums,3941
15
15
  dmu/rfile/utilities.py,sha256=XuYY7HuSBj46iSu3c60UYBHtI6KIPoJU_oofuhb-be0,945
16
16
  dmu/stats/fitter.py,sha256=LDvFNyhgO0OzXN7aH3kfHe6LzuPqdQfPcKR_IegDcaU,18204
17
17
  dmu/stats/function.py,sha256=yzi_Fvp_ASsFzbWFivIf-comquy21WoeY7is6dgY0Go,9491
@@ -39,8 +39,8 @@ dmu_scripts/rfile/compare_root_files.py,sha256=T8lDnQxsRNMr37x1Y7YvWD8ySHrJOWZki
39
39
  dmu_scripts/rfile/print_trees.py,sha256=Ze4Ccl_iUldl4eVEDVnYBoe4amqBT1fSBR1zN5WSztk,941
40
40
  dmu_scripts/ssh/coned.py,sha256=lhilYNHWRCGxC-jtyJ3LQ4oUgWW33B2l1tYCcyHHsR0,4858
41
41
  dmu_scripts/text/transform_text.py,sha256=9akj1LB0HAyopOvkLjNOJiptZw5XoOQLe17SlcrGMD0,1456
42
- data_manipulation_utilities-0.1.4.dist-info/METADATA,sha256=22y3wi2wh7fXUPYixxLJVDr0qyu0eeicGikPI4phUvg,19946
43
- data_manipulation_utilities-0.1.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
44
- data_manipulation_utilities-0.1.4.dist-info/entry_points.txt,sha256=1TIZDed651KuOH-DgaN5AoBdirKmrKE_oM1b6b7zTUU,270
45
- data_manipulation_utilities-0.1.4.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
46
- data_manipulation_utilities-0.1.4.dist-info/RECORD,,
42
+ data_manipulation_utilities-0.1.6.dist-info/METADATA,sha256=1ttATABwWcdqqPJM72_4s_ZQjtbFp9MzkfsprkDJTv8,19946
43
+ data_manipulation_utilities-0.1.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
44
+ data_manipulation_utilities-0.1.6.dist-info/entry_points.txt,sha256=1TIZDed651KuOH-DgaN5AoBdirKmrKE_oM1b6b7zTUU,270
45
+ data_manipulation_utilities-0.1.6.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
46
+ data_manipulation_utilities-0.1.6.dist-info/RECORD,,
dmu/logging/log_store.py CHANGED
@@ -3,6 +3,8 @@ Module holding LogStore
3
3
  '''
4
4
 
5
5
  import logging
6
+ from logging import Logger
7
+
6
8
  import logzero
7
9
 
8
10
  #------------------------------------------------------------
@@ -31,26 +33,28 @@ class LogStore:
31
33
  Class used to make loggers, set log levels, print loggers, e.g. interface to logging/logzero, etc.
32
34
  '''
33
35
  #pylint: disable = invalid-name
34
- d_logger = {}
35
- d_levels = {}
36
+ d_logger : dict[str,Logger] = {}
37
+ d_levels : dict[str, int] = {}
36
38
  log_level = logging.INFO
37
39
  is_configured = False
38
40
  backend = 'logging'
39
41
  #--------------------------
40
42
  @staticmethod
41
- def add_logger(name=None):
43
+ def add_logger(name : str, exists_ok : bool = False) -> Logger:
42
44
  '''
43
45
  Will use underlying logging library logzero/logging, etc to make logger
44
46
 
45
47
  name (str): Name of logger
46
48
  '''
47
49
 
48
- if name is None:
49
- raise ValueError('Logger name missing')
50
-
51
- if name in LogStore.d_logger:
50
+ if name in LogStore.d_logger and not exists_ok:
52
51
  raise ValueError(f'Logger name {name} already found')
53
52
 
53
+
54
+ if name in LogStore.d_logger and exists_ok:
55
+ print(f'Logger {name} already found, reusing it')
56
+ return LogStore.d_logger[name]
57
+
54
58
  level = LogStore.log_level if name not in LogStore.d_levels else LogStore.d_levels[name]
55
59
 
56
60
  if LogStore.backend == 'logging':
@@ -65,14 +69,14 @@ class LogStore:
65
69
  return logger
66
70
  #--------------------------
67
71
  @staticmethod
68
- def _get_logzero_logger(name : str, level : int):
72
+ def _get_logzero_logger(name : str, level : int) -> Logger:
69
73
  log = logzero.setup_logger(name=name)
70
74
  log.setLevel(level)
71
75
 
72
76
  return log
73
77
  #--------------------------
74
78
  @staticmethod
75
- def _get_logging_logger(name : str, level : int):
79
+ def _get_logging_logger(name : str, level : int) -> Logger:
76
80
  logger = logging.getLogger(name=name)
77
81
 
78
82
  logger.setLevel(level)
@@ -4,11 +4,13 @@ Module containing utility functions to be used with ROOT dataframes
4
4
 
5
5
  import re
6
6
  from dataclasses import dataclass
7
+ from typing import Union
7
8
 
9
+ import pandas as pnd
8
10
  import awkward as ak
9
11
  import numpy
10
12
 
11
- from ROOT import RDataFrame
13
+ from ROOT import RDataFrame, RDF
12
14
 
13
15
  from dmu.logging.log_store import LogStore
14
16
 
@@ -23,7 +25,7 @@ class Data:
23
25
  l_good_type = [int, numpy.bool_, numpy.int32, numpy.uint32, numpy.int64, numpy.uint64, numpy.float32, numpy.float64]
24
26
  d_cast_type = {'bool': numpy.int32}
25
27
  # ---------------------------------------------------------------------
26
- def add_column(rdf : RDataFrame, arr_val : numpy.ndarray | None, name : str, d_opt : dict | None = None):
28
+ def add_column(rdf : RDataFrame, arr_val : Union[numpy.ndarray,None], name : str, d_opt : Union[dict,None] = None):
27
29
  '''
28
30
  Will take a dataframe, an array of numbers and a string
29
31
  Will add the array as a colunm to the dataframe
@@ -70,3 +72,24 @@ def add_column(rdf : RDataFrame, arr_val : numpy.ndarray | None, name : str, d_o
70
72
 
71
73
  return rdf
72
74
  # ---------------------------------------------------------------------
75
+ def rdf_report_to_df(rep : RDF.RCutFlowReport) -> pnd.DataFrame:
76
+ '''
77
+ Takes the output of rdf.Report(), i.e. an RDataFrame cutflow report.
78
+
79
+ Produces a pandas dataframe with
80
+ '''
81
+ d_data = {'cut' : [], 'All' : [], 'Passed' : []}
82
+ for cut in rep:
83
+ name=cut.GetName()
84
+ pas =cut.GetPass()
85
+ tot =cut.GetAll()
86
+
87
+ d_data['cut' ].append(name)
88
+ d_data['All' ].append(tot)
89
+ d_data['Passed'].append(pas)
90
+
91
+ df = pnd.DataFrame(d_data)
92
+ df['Efficiency' ] = df['Passed'] / df['All']
93
+ df['Cummulative'] = df['Efficiency'].cumprod()
94
+
95
+ return df
dmu/rfile/rfprinter.py CHANGED
@@ -3,7 +3,8 @@ Module containing RFPrinter
3
3
  '''
4
4
  import os
5
5
 
6
- from ROOT import TFile
6
+ from typing import Union
7
+ from ROOT import TFile
7
8
 
8
9
  from dmu.logging.log_store import LogStore
9
10
 
@@ -22,7 +23,6 @@ class RFPrinter:
22
23
  raise FileNotFoundError(f'Cannot find {path}')
23
24
 
24
25
  self._root_path = path
25
- self._text_path = path.replace('.root', '.txt')
26
26
  #-----------------------------------------
27
27
  def _get_trees(self, ifile):
28
28
  '''
@@ -54,29 +54,38 @@ class RFPrinter:
54
54
  for branch in l_branch:
55
55
  bname = branch.GetName()
56
56
  leaf = branch.GetLeaf(bname)
57
- btype = leaf.GetTypeName()
57
+ try:
58
+ btype = leaf.GetTypeName()
59
+ except:
60
+ log.warning(f'Cannot read {bname}')
61
+ continue
58
62
 
59
63
  l_line.append(f'{"":4}{bname:<100}{btype:<40}')
60
64
 
61
65
  return l_line
62
66
  #-----------------------------------------
63
- def _save_info(self, l_info):
67
+ def _get_summary_path(self, file_name : Union[str,None]) -> str:
68
+ if file_name is None:
69
+ text_path = self._root_path.replace('.root', '.txt')
70
+ return text_path
71
+
72
+ root_dir = os.path.dirname(self._root_path)
73
+
74
+ return f'{root_dir}/{file_name}'
75
+ #-----------------------------------------
76
+ def _save_info(self, l_info : list[str], file_name : Union[str,None]) -> None:
64
77
  '''
65
78
  Takes list of strings, saves it to text file
66
79
  '''
67
80
 
68
- with open(self._text_path, 'w', encoding='utf-8') as ofile:
81
+ text_path = self._get_summary_path(file_name)
82
+ with open(text_path, 'w', encoding='utf-8') as ofile:
69
83
  for info in l_info:
70
84
  ofile.write(f'{info}\n')
71
85
 
72
- log.info(f'Saved to: {self._text_path}')
86
+ log.info(f'Saved to: {text_path}')
73
87
  #-----------------------------------------
74
- def save(self, to_screen=False):
75
- '''
76
- Will save a text file with the summary of the ROOT file contents
77
-
78
- to_screen (bool) : If true, will print to screen, default=False
79
- '''
88
+ def _get_info(self) -> list[str]:
80
89
  l_info = []
81
90
  log.info(f'Reading from : {self._root_path}')
82
91
  with TFile.Open(self._root_path) as ifile:
@@ -84,7 +93,27 @@ class RFPrinter:
84
93
  for tree in l_tree:
85
94
  l_info+= self._get_tree_info(tree)
86
95
 
87
- self._save_info(l_info)
96
+ return l_info
97
+ #-----------------------------------------
98
+ def save(self, file_name : Union[str,None] = None, to_screen : bool = False, raise_on_fail : bool = True) -> None:
99
+ '''
100
+ Will save a text file with the summary of the ROOT file contents
101
+
102
+ file_name : If used, name the file with the summary this way. Othewise, use ROOT file with .txt extension
103
+ to_screen : If true, will print to screen, default=False
104
+ raise_on_fail: If cannot open ROOT file, will raise exeption (default), otherwise will only show warning.
105
+ '''
106
+
107
+ try:
108
+ l_info = self._get_info()
109
+ except OSError as exc:
110
+ if raise_on_fail:
111
+ raise OSError(f'Cannot open: {self._root_path}') from exc
112
+
113
+ log.warning(f'Cannot open: {self._root_path}')
114
+ return
115
+
116
+ self._save_info(l_info, file_name=file_name)
88
117
  if to_screen:
89
118
  for info in l_info:
90
119
  log.info(info)