data-manipulation-utilities 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/METADATA +179 -10
  2. {data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/RECORD +31 -19
  3. {data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/WHEEL +1 -1
  4. dmu/generic/hashing.py +44 -0
  5. dmu/generic/utilities.py +14 -1
  6. dmu/generic/version_management.py +3 -5
  7. dmu/ml/cv_diagnostics.py +221 -0
  8. dmu/ml/train_mva.py +143 -46
  9. dmu/pdataframe/utilities.py +36 -3
  10. dmu/plotting/fwhm.py +64 -0
  11. dmu/plotting/plotter.py +2 -0
  12. dmu/plotting/plotter_1d.py +87 -6
  13. dmu/stats/fitter.py +1 -1
  14. dmu/stats/minimizers.py +40 -11
  15. dmu/stats/model_factory.py +248 -44
  16. dmu/stats/zfit_models.py +68 -0
  17. dmu/stats/zfit_plotter.py +29 -21
  18. dmu/testing/utilities.py +31 -4
  19. dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
  20. dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
  21. dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
  22. dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
  23. dmu_data/ml/tests/train_mva.yaml +19 -10
  24. dmu_data/ml/tests/train_mva_with_diagnostics.yaml +82 -0
  25. dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
  26. dmu_data/plotting/tests/plug_stats.yaml +19 -0
  27. dmu_data/plotting/tests/simple.yaml +4 -3
  28. dmu_data/plotting/tests/styling.yaml +11 -0
  29. {data_manipulation_utilities-0.2.5.data → data_manipulation_utilities-0.2.7.data}/scripts/publish +0 -0
  30. {data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt +0 -0
  31. {data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt +0 -0
dmu/ml/train_mva.py CHANGED
@@ -1,10 +1,12 @@
1
1
  '''
2
2
  Module with TrainMva class
3
3
  '''
4
- # pylint: disable = too-many-locals
4
+ # pylint: disable = too-many-locals, no-name-in-module
5
5
  # pylint: disable = too-many-arguments, too-many-positional-arguments
6
+ # pylint: disable = too-many-instance-attributes
6
7
 
7
8
  import os
9
+ import copy
8
10
 
9
11
  import joblib
10
12
  import pandas as pnd
@@ -14,18 +16,19 @@ import matplotlib.pyplot as plt
14
16
  from sklearn.metrics import roc_curve, auc
15
17
  from sklearn.model_selection import StratifiedKFold
16
18
 
17
- from ROOT import RDataFrame
19
+ from ROOT import RDataFrame, RDF
18
20
 
19
21
  import dmu.ml.utilities as ut
20
22
  import dmu.pdataframe.utilities as put
21
23
  import dmu.plotting.utilities as plu
22
24
 
25
+ from dmu.ml.cv_diagnostics import CVDiagnostics
23
26
  from dmu.ml.cv_classifier import CVClassifier as cls
24
27
  from dmu.plotting.plotter_1d import Plotter1D as Plotter
25
28
  from dmu.plotting.matrix import MatrixPlotter
26
29
  from dmu.logging.log_store import LogStore
27
30
 
28
- npa = numpy.ndarray
31
+ NPA = numpy.ndarray
29
32
  log = LogStore.add_logger('dmu:ml:train_mva')
30
33
  # ---------------------------------------------
31
34
  class TrainMva:
@@ -33,40 +36,73 @@ class TrainMva:
33
36
  Interface to scikit learn used to train classifier
34
37
  '''
35
38
  # ---------------------------------------------
36
- def __init__(self, bkg=None, sig=None, cfg=None):
39
+ def __init__(self, bkg : RDataFrame, sig : RDataFrame, cfg : dict):
37
40
  '''
38
41
  bkg (ROOT dataframe): Holds real data
39
42
  sig (ROOT dataframe): Holds simulation
40
43
  cfg (dict) : Dictionary storing configuration for training
41
44
  '''
42
- if bkg is None:
43
- raise ValueError('Background dataframe is not a ROOT dataframe')
45
+ self._cfg = cfg
46
+ self._l_ft_name = self._cfg['training']['features']
44
47
 
45
- if sig is None:
46
- raise ValueError('Signal dataframe is not a ROOT dataframe')
48
+ self._rdf_sig_org = sig
49
+ self._rdf_bkg_org = bkg
47
50
 
48
- if not isinstance(cfg, dict):
49
- raise ValueError('Config dictionary is not a dictionary')
51
+ rdf_bkg = self._preprocess_rdf(bkg)
52
+ rdf_sig = self._preprocess_rdf(sig)
50
53
 
51
- self._rdf_bkg = bkg
52
- self._rdf_sig = sig
53
- self._cfg = cfg
54
+ df_ft_sig, l_lab_sig = self._get_sample_inputs(rdf = rdf_sig, label = 1)
55
+ df_ft_bkg, l_lab_bkg = self._get_sample_inputs(rdf = rdf_bkg, label = 0)
54
56
 
55
- self._l_ft_name = self._cfg['training']['features']
57
+ self._df_ft = pnd.concat([df_ft_sig, df_ft_bkg], axis=0)
58
+ self._l_lab = numpy.array(l_lab_sig + l_lab_bkg)
56
59
 
57
- self._df_ft, self._l_lab = self._get_inputs()
60
+ self._rdf_bkg = self._get_rdf(rdf = rdf_bkg, df_feat=df_ft_bkg)
61
+ self._rdf_sig = self._get_rdf(rdf = rdf_sig, df_feat=df_ft_sig)
58
62
  # ---------------------------------------------
59
- def _get_inputs(self) -> tuple[pnd.DataFrame, npa]:
60
- log.info('Getting signal')
61
- df_sig, arr_lab_sig = self._get_sample_inputs(self._rdf_sig, label = 1)
63
+ def _get_extra_columns(self, rdf : RDataFrame, df : pnd.DataFrame) -> list[str]:
64
+ d_plot = self._cfg['plotting']['features']['plots']
65
+ l_expr = list(d_plot)
66
+ l_rdf = [ name.c_str() for name in rdf.GetColumnNames() ]
67
+
68
+ l_extr = []
69
+ for expr in l_expr:
70
+ if expr not in l_rdf:
71
+ continue
62
72
 
63
- log.info('Getting background')
64
- df_bkg, arr_lab_bkg = self._get_sample_inputs(self._rdf_bkg, label = 0)
73
+ if expr in df.columns:
74
+ continue
65
75
 
66
- df = pnd.concat([df_sig, df_bkg], axis=0)
67
- arr_lab = numpy.concatenate([arr_lab_sig, arr_lab_bkg])
76
+ l_extr.append(expr)
68
77
 
69
- return df, arr_lab
78
+ return l_extr
79
+ # ---------------------------------------------
80
+ def _get_rdf(self, rdf : RDataFrame, df_feat : pnd.DataFrame) -> RDataFrame:
81
+ '''
82
+ Takes original ROOT dataframe and pre-processed features dataframe
83
+ Adds missing branches to latter and returns expanded ROOT dataframe
84
+ Need to make plots
85
+ '''
86
+
87
+ l_extr_col = self._get_extra_columns(rdf, df_feat)
88
+ if len(l_extr_col) > 20:
89
+ for name in l_extr_col:
90
+ log.debug(name)
91
+ raise ValueError('Found more than 20 extra columns')
92
+
93
+ d_data = rdf.AsNumpy(l_extr_col)
94
+ log.debug(f'Adding extra-nonfeature columns: {l_extr_col}')
95
+ df_extr = pnd.DataFrame(d_data)
96
+
97
+ nmain = len(df_feat.columns)
98
+ nextr = len(df_extr.columns)
99
+
100
+ log.debug(f'Main DF size: {nmain}')
101
+ log.debug(f'Extra DF size: {nextr}')
102
+
103
+ df_all = pnd.concat([df_feat, df_extr], axis=1)
104
+
105
+ return RDF.FromPandas(df_all)
70
106
  # ---------------------------------------------
71
107
  def _pre_process_nans(self, df : pnd.DataFrame) -> pnd.DataFrame:
72
108
  if 'dataset' not in self._cfg:
@@ -77,25 +113,41 @@ class TrainMva:
77
113
  return df
78
114
 
79
115
  d_name_val = self._cfg['dataset']['nan']
80
- log.info(60 * '-')
116
+ log.info(70 * '-')
81
117
  log.info('Doing NaN replacements')
82
- log.info(60 * '-')
118
+ log.info(70 * '-')
83
119
  for var, val in d_name_val.items():
84
- log.info(f'{var:<20}{"--->":20}{val:<20.3f}')
120
+ nna = df[var].isna().sum()
121
+
122
+ log.info(f'{var:<20}{"--->":20}{val:<20.3f}{nna}')
85
123
  df[var] = df[var].fillna(val)
124
+ log.info(70 * '-')
86
125
 
87
126
  return df
88
127
  # ---------------------------------------------
89
- def _get_sample_inputs(self, rdf : RDataFrame, label : int) -> tuple[pnd.DataFrame, npa]:
128
+ def _preprocess_rdf(self, rdf : RDataFrame) -> RDataFrame:
129
+ if 'define' not in self._cfg['dataset']:
130
+ log.debug('No definitions found')
131
+ return rdf
132
+
133
+ log.debug('Definitions found')
134
+ d_def = self._cfg['dataset']['define']
135
+ for name, expr in d_def.items():
136
+ log.debug(f'{name:<20}{expr}')
137
+ rdf = rdf.Define(name, expr)
138
+
139
+ return rdf
140
+ # ---------------------------------------------
141
+ def _get_sample_inputs(self, rdf : RDataFrame, label : int) -> tuple[pnd.DataFrame, list[int]]:
90
142
  d_ft = rdf.AsNumpy(self._l_ft_name)
91
143
  df = pnd.DataFrame(d_ft)
92
144
  df = self._pre_process_nans(df)
93
145
  df = ut.cleanup(df)
94
146
  l_lab= len(df) * [label]
95
147
 
96
- return df, numpy.array(l_lab)
148
+ return df, l_lab
97
149
  # ---------------------------------------------
98
- def _get_model(self, arr_index : npa) -> cls:
150
+ def _get_model(self, arr_index : NPA) -> cls:
99
151
  model = cls(cfg = self._cfg)
100
152
  df_ft = self._df_ft.iloc[arr_index]
101
153
  l_lab = self._l_lab[arr_index]
@@ -107,10 +159,14 @@ class TrainMva:
107
159
 
108
160
  return model
109
161
  # ---------------------------------------------
110
- def _get_models(self):
162
+ def _get_models(self, load_trained : bool):
111
163
  '''
112
164
  Will create models, train them and return them
113
165
  '''
166
+ if load_trained:
167
+ log.warning('Not retraining, but loading trained models')
168
+ return self._load_trained_models()
169
+
114
170
  nfold = self._cfg['training']['nfold']
115
171
  rdmst = self._cfg['training']['rdm_stat']
116
172
 
@@ -137,6 +193,22 @@ class TrainMva:
137
193
 
138
194
  return l_model
139
195
  # ---------------------------------------------
196
+ def _load_trained_models(self) -> list[cls]:
197
+ model_path = self._cfg['saving']['path']
198
+ nfold = self._cfg['training']['nfold']
199
+ l_model = []
200
+ for ifold in range(nfold):
201
+ fold_path = model_path.replace('.pkl', f'_{ifold:03}.pkl')
202
+
203
+ if not os.path.isfile(fold_path):
204
+ raise FileNotFoundError(f'Missing trained model: {fold_path}')
205
+
206
+ log.debug(f'Loading model from: {fold_path}')
207
+ model = joblib.load(fold_path)
208
+ l_model.append(model)
209
+
210
+ return l_model
211
+ # ---------------------------------------------
140
212
  def _labels_from_varnames(self, l_var_name : list[str]) -> list[str]:
141
213
  try:
142
214
  d_plot = self._cfg['plotting']['features']['plots']
@@ -176,7 +248,7 @@ class TrainMva:
176
248
  d_form = {'Variable' : '{}', 'Importance' : '{:.1f}'}
177
249
  put.df_to_tex(df, table_path, d_format = d_form)
178
250
  # ---------------------------------------------
179
- def _get_scores(self, model : cls, arr_index : npa, on_training_ok : bool) -> tuple[npa, npa, npa, npa]:
251
+ def _get_scores(self, model : cls, arr_index : NPA, on_training_ok : bool) -> tuple[NPA, NPA, NPA, NPA]:
180
252
  '''
181
253
  Returns a tuple of four arrays
182
254
 
@@ -199,7 +271,7 @@ class TrainMva:
199
271
 
200
272
  return arr_sig, arr_bkg, arr_all, arr_lab
201
273
  # ---------------------------------------------
202
- def _split_scores(self, arr_prob : npa, arr_label : npa) -> tuple[npa, npa]:
274
+ def _split_scores(self, arr_prob : NPA, arr_label : NPA) -> tuple[NPA, NPA]:
203
275
  '''
204
276
  Will split the testing scores (predictions) based on the training scores
205
277
 
@@ -255,7 +327,7 @@ class TrainMva:
255
327
 
256
328
  return cfg
257
329
  # ---------------------------------------------
258
- def _plot_correlation(self, arr_index : npa, ifold : int) -> None:
330
+ def _plot_correlation(self, arr_index : NPA, ifold : int) -> None:
259
331
  df_ft = self._df_ft.iloc[arr_index]
260
332
  cfg = self._get_correlation_cfg(df_ft, ifold)
261
333
  cov = df_ft.corr()
@@ -272,7 +344,7 @@ class TrainMva:
272
344
  plt.savefig(f'{val_dir}/covariance.png')
273
345
  plt.close()
274
346
  # ---------------------------------------------
275
- def _get_nentries(self, arr_val : npa) -> str:
347
+ def _get_nentries(self, arr_val : NPA) -> str:
276
348
  size = len(arr_val)
277
349
  size = size / 1000.
278
350
 
@@ -307,10 +379,10 @@ class TrainMva:
307
379
  plt.close()
308
380
  # ---------------------------------------------
309
381
  def _plot_roc(self,
310
- l_lab_ts : npa,
311
- l_prb_ts : npa,
312
- l_lab_tr : npa,
313
- l_prb_tr : npa,
382
+ l_lab_ts : NPA,
383
+ l_prb_ts : NPA,
384
+ l_lab_tr : NPA,
385
+ l_prb_tr : NPA,
314
386
  ifold : int):
315
387
  '''
316
388
  Takes the labels and the probabilities and plots ROC
@@ -355,10 +427,10 @@ class TrainMva:
355
427
  plt.close()
356
428
  # ---------------------------------------------
357
429
  def _plot_probabilities(self,
358
- arr_seff: npa,
359
- arr_brej: npa,
360
- arr_sprb: npa,
361
- arr_labl: npa) -> None:
430
+ arr_seff: NPA,
431
+ arr_brej: NPA,
432
+ arr_sprb: NPA,
433
+ arr_labl: NPA) -> None:
362
434
 
363
435
  roc_cfg = self._cfg['plotting']['roc']
364
436
  if 'annotate' not in roc_cfg:
@@ -443,11 +515,32 @@ class TrainMva:
443
515
  os.makedirs(val_dir, exist_ok=True)
444
516
  put.df_to_tex(df, f'{val_dir}/hyperparameters.tex')
445
517
  # ---------------------------------------------
446
- def run(self, skip_fit : bool = False) -> None:
518
+ def _run_diagnostics(self, models : list[cls], rdf : RDataFrame, name : str) -> None:
519
+ if 'diagnostics' not in self._cfg:
520
+ log.warning('Diagnostics section not found, not running diagnostics')
521
+ return
522
+
523
+ cfg_diag = self._cfg['diagnostics']
524
+ out_dir = cfg_diag['output']
525
+ plt_dir = None
526
+
527
+ if 'overlay' in cfg_diag['correlations']['target']:
528
+ plt_dir = cfg_diag['correlations']['target']['overlay']['saving']['plt_dir']
529
+
530
+ cfg_diag = copy.deepcopy(cfg_diag)
531
+ cfg_diag['output'] = f'{out_dir}/{name}'
532
+ if plt_dir is not None:
533
+ cfg_diag['correlations']['target']['overlay']['saving']['plt_dir'] = f'{plt_dir}/{name}'
534
+
535
+ cvd = CVDiagnostics(models=models, rdf=rdf, cfg=cfg_diag)
536
+ cvd.run()
537
+ # ---------------------------------------------
538
+ def run(self, skip_fit : bool = False, load_trained : bool = False) -> None:
447
539
  '''
448
540
  Will do the training
449
541
 
450
542
  skip_fit: By default false, if True, it will only do the plots of features and save tables
543
+ load_trained: If true, it will load the models instead of training, by default false
451
544
  '''
452
545
  self._save_settings_to_tex()
453
546
  self._plot_features()
@@ -455,7 +548,11 @@ class TrainMva:
455
548
  if skip_fit:
456
549
  return
457
550
 
458
- l_mod = self._get_models()
459
- for ifold, mod in enumerate(l_mod):
460
- self._save_model(mod, ifold)
551
+ l_mod = self._get_models(load_trained = load_trained)
552
+ if not load_trained:
553
+ for ifold, mod in enumerate(l_mod):
554
+ self._save_model(mod, ifold)
555
+
556
+ self._run_diagnostics(models = l_mod, rdf = self._rdf_sig_org, name='Signal' )
557
+ self._run_diagnostics(models = l_mod, rdf = self._rdf_bkg_org, name='Background')
461
558
  # ---------------------------------------------
@@ -2,20 +2,28 @@
2
2
  Module containing utilities for pandas dataframes
3
3
  '''
4
4
  import os
5
+ import yaml
5
6
  import pandas as pnd
6
7
 
7
8
  from dmu.logging.log_store import LogStore
8
9
 
9
10
  log=LogStore.add_logger('dmu:pdataframe:utilities')
10
-
11
11
  # -------------------------------------
12
- def df_to_tex(df : pnd.DataFrame, path : str, hide_index : bool = True, d_format : dict[str,str]=None, caption : str =None) -> None:
12
+ def df_to_tex(df : pnd.DataFrame,
13
+ path : str,
14
+ hide_index : bool = True,
15
+ d_format : dict[str,str]= None,
16
+ **kwargs : str ) -> None:
13
17
  '''
14
18
  Saves pandas dataframe to latex
15
19
 
16
20
  Parameters
17
21
  -------------
22
+ df : Dataframe with data
23
+ path (str) : Path to latex file
24
+ hide_index : If true (default), index of dataframe won't appear in table
18
25
  d_format (dict) : Dictionary specifying the formattinng of the table, e.g. `{'col1': '{}', 'col2': '{:.3f}', 'col3' : '{:.3f}'}`
26
+ kwargs : Arguments needed in `to_latex`
19
27
  '''
20
28
 
21
29
  if path is not None:
@@ -30,7 +38,32 @@ def df_to_tex(df : pnd.DataFrame, path : str, hide_index : bool = True, d_format
30
38
  st=st.format(formatter=d_format)
31
39
 
32
40
  log.info(f'Saving to: {path}')
33
- buf = st.to_latex(buf=path, caption=caption, hrules=True)
41
+ buf = st.to_latex(buf=path, hrules=True, **kwargs)
34
42
 
35
43
  return buf
36
44
  # -------------------------------------
45
+ def to_yaml(df : pnd.DataFrame, path : str):
46
+ '''
47
+ Takes a dataframe and the path to a yaml file
48
+ Makes the directory path if not found and saves data in YAML file
49
+ '''
50
+ dir_path = os.path.dirname(path)
51
+ os.makedirs(dir_path, exist_ok=True)
52
+
53
+ data = df.to_dict()
54
+
55
+ with open(path, 'w', encoding='utf-8') as ofile:
56
+ yaml.safe_dump(data, ofile)
57
+ # -------------------------------------
58
+ def from_yaml(path : str) -> pnd.DataFrame:
59
+ '''
60
+ Takes path to a yaml file
61
+ Makes dataframe from it and returns it
62
+ '''
63
+ with open(path, encoding='utf-8') as ifile:
64
+ data = yaml.safe_load(ifile)
65
+
66
+ df = pnd.DataFrame(data)
67
+
68
+ return df
69
+ # -------------------------------------
dmu/plotting/fwhm.py ADDED
@@ -0,0 +1,64 @@
1
+ '''
2
+ Module with FWHM plugin class
3
+ '''
4
+ import zfit
5
+ import numpy
6
+ import matplotlib.pyplot as plt
7
+
8
+ from dmu.logging.log_store import LogStore
9
+
10
+ log = LogStore.add_logger('dmu:plotting:fwhm')
11
+ # --------------------------------------------
12
+ class FWHM:
13
+ '''
14
+ Class meant to be used to calculate Full Width at Half Maximum
15
+ as a Plotter1d plugin
16
+ '''
17
+ # -------------------------
18
+ def __init__(self, cfg : dict, val : numpy.ndarray, wgt : numpy.ndarray, maxy : float):
19
+ self._cfg = cfg
20
+ self._arr_val = val
21
+ self._arr_wgt = wgt
22
+ self._maxy = maxy
23
+ # -------------------------
24
+ def _normalize_yval(self, arr_pdf_val : numpy.ndarray) -> None:
25
+ max_pdf_val = numpy.max(arr_pdf_val)
26
+ arr_pdf_val*= self._maxy / max_pdf_val
27
+
28
+ return arr_pdf_val
29
+ # -------------------------
30
+ def _get_fwhm(self, arr_x : numpy.ndarray, arr_y : numpy.ndarray) -> float:
31
+ maxy = numpy.max(arr_y)
32
+ arry = numpy.where(arr_y > maxy/2.)[0]
33
+ imax = arry[ 0]
34
+ imin = arry[-1]
35
+
36
+ x1 = arr_x[imax]
37
+ x2 = arr_x[imin]
38
+
39
+ if self._cfg['plot']:
40
+ plt.plot([x1, x2], [maxy/2, maxy/2], linestyle=':', linewidth=1, color='k')
41
+
42
+ return x2 - x1
43
+ # -------------------------
44
+ def run(self) -> float:
45
+ '''
46
+ Runs plugin and return FWHM
47
+ '''
48
+ [minx, maxx] = self._cfg['obs']
49
+
50
+ log.info('Running FWHM pluggin')
51
+ obs = zfit.Space('mass', limits=(minx, maxx))
52
+ pdf= zfit.pdf.KDE1DimExact(obs=obs, data=self._arr_val, weights=self._arr_wgt)
53
+
54
+ xval = numpy.linspace(minx, maxx, 200)
55
+ yval = pdf.pdf(xval)
56
+ yval = self._normalize_yval(yval)
57
+
58
+ if self._cfg['plot']:
59
+ plt.plot(xval, yval, linestyle='-', linewidth=2, color='gray')
60
+
61
+ fwhm = self._get_fwhm(xval, yval)
62
+
63
+ return fwhm
64
+ # --------------------------------------------
dmu/plotting/plotter.py CHANGED
@@ -29,6 +29,8 @@ class Plotter:
29
29
  self._d_cfg = cfg
30
30
  self._d_rdf : dict[str, RDataFrame] = { name : self._preprocess_rdf(rdf) for name, rdf in d_rdf.items()}
31
31
  self._d_wgt : Union[dict[str, Union[numpy.ndarray, None]], None]
32
+
33
+ self._title : str = ''
32
34
  #-------------------------------------
33
35
  def _check_quantile(self, qnt : float):
34
36
  '''
@@ -1,7 +1,7 @@
1
1
  '''
2
2
  Module containing plotter class
3
3
  '''
4
-
4
+ import copy
5
5
  from hist import Hist
6
6
 
7
7
  import numpy
@@ -9,6 +9,7 @@ import matplotlib.pyplot as plt
9
9
 
10
10
  from dmu.logging.log_store import LogStore
11
11
  from dmu.plotting.plotter import Plotter
12
+ from dmu.plotting.fwhm import FWHM
12
13
 
13
14
  log = LogStore.add_logger('dmu:plotting:Plotter1D')
14
15
  # --------------------------------------------
@@ -55,6 +56,72 @@ class Plotter1D(Plotter):
55
56
 
56
57
  return minx, maxx, bins
57
58
  #-------------------------------------
59
+ def _run_plugins(self,
60
+ arr_val : numpy.ndarray,
61
+ arr_wgt : numpy.ndarray,
62
+ hst,
63
+ name : str,
64
+ varname : str) -> None:
65
+ if 'plugin' not in self._d_cfg:
66
+ log.debug('No plugins found')
67
+ return
68
+
69
+ if 'fwhm' in self._d_cfg['plugin']:
70
+ if varname not in self._d_cfg['plugin']['fwhm']:
71
+ log.debug(f'No FWHM plugin found for variable {varname}')
72
+ return
73
+
74
+ log.debug(f'FWHM plugin found for variable {varname}')
75
+ cfg = self._d_cfg['plugin']['fwhm'][varname]
76
+ self._run_fwhm(arr_val = arr_val, arr_wgt=arr_wgt, hst=hst, name=name, cfg = cfg)
77
+
78
+ if 'stats' in self._d_cfg['plugin']:
79
+ if varname not in self._d_cfg['plugin']['stats']:
80
+ log.debug(f'No stats plugin found for variable {varname}')
81
+ return
82
+
83
+ log.debug(f'stats plugin found for variable {varname}')
84
+ cfg = self._d_cfg['plugin']['stats'][varname]
85
+ self._run_stats(arr_val = arr_val, arr_wgt=arr_wgt, name=name, cfg = cfg)
86
+ #-------------------------------------
87
+ def _run_stats(self, arr_val : numpy.ndarray, arr_wgt : numpy.ndarray, name : str, cfg : dict[str:str]) -> None:
88
+ this_title = ''
89
+ if 'sum' in cfg:
90
+ form = cfg['sum']
91
+ sumv = numpy.sum(arr_wgt)
92
+ this_title += form.format(sumv) + '; '
93
+
94
+ if 'mean' in cfg:
95
+ form = cfg['mean']
96
+ mean = numpy.average(arr_val, weights=arr_wgt)
97
+ this_title += form.format(mean) + '; '
98
+
99
+ if 'rms' in cfg:
100
+ form = cfg['rms']
101
+ mean = numpy.average(arr_val, weights=arr_wgt)
102
+ rms = numpy.sqrt(numpy.average((arr_val - mean) ** 2, weights=arr_wgt))
103
+ this_title += form.format(rms ) + '; '
104
+
105
+ self._title+= f'\n{name}: {this_title}'
106
+ #-------------------------------------
107
+ def _run_fwhm(self, arr_val : numpy.ndarray, arr_wgt : numpy.ndarray, hst, name : str, cfg : dict) -> None:
108
+ arr_bin_cnt = hst.values()
109
+ maxy = numpy.max(arr_bin_cnt)
110
+ obj = FWHM(cfg=cfg, val=arr_val, wgt=arr_wgt, maxy=maxy)
111
+ fwhm = obj.run()
112
+
113
+ form = cfg['format']
114
+ this_title = form.format(fwhm)
115
+
116
+ if 'add_std' in cfg and cfg['add_std']:
117
+ mu = numpy.average(arr_val , weights=arr_wgt)
118
+ avg = numpy.average((arr_val - mu) ** 2, weights=arr_wgt)
119
+ std = numpy.sqrt(avg)
120
+ form = form.replace('FWHM', 'STD')
121
+ this_title+= '; ' + form.format(std)
122
+
123
+ self._title+= f'\n{name}: {this_title}'
124
+ #-------------------------------------
58
125
  def _plot_var(self, var : str) -> float:
59
126
  '''
60
127
  Will plot a variable from a dictionary of dataframes
@@ -70,6 +137,7 @@ class Plotter1D(Plotter):
70
137
 
71
138
  d_data = {}
72
139
  for name, rdf in self._d_rdf.items():
140
+ log.debug(f'Plotting: {var}/{name}')
73
141
  d_data[name] = rdf.AsNumpy([var])[var]
74
142
 
75
143
  minx, maxx, bins = self._get_binning(var, d_data)
@@ -82,7 +150,18 @@ class Plotter1D(Plotter):
82
150
  arr_wgt = self._normalize_weights(arr_wgt, var)
83
151
  hst = Hist.new.Reg(bins=bins, start=minx, stop=maxx, name='x').Weight()
84
152
  hst.fill(x=arr_val, weight=arr_wgt)
85
- hst.plot(label=label)
153
+ self._run_plugins(arr_val, arr_wgt, hst, name, var)
154
+
155
+ if 'styling' in self._d_cfg['plots'][var]:
156
+ style = self._d_cfg['plots'][var]['styling']
157
+ style = copy.deepcopy(style)
158
+ else:
159
+ style = {'label' : label, 'histtype' : 'errorbar', 'marker' : '.', 'linestyle' : 'none'}
160
+
161
+ if 'label' not in style:
162
+ style['label'] = label
163
+
164
+ hst.plot(**style)
86
165
  l_bc_all += hst.values().tolist()
87
166
 
88
167
  max_y = max(l_bc_all)
@@ -131,9 +210,12 @@ class Plotter1D(Plotter):
131
210
  if yscale == 'linear':
132
211
  plt.ylim(bottom=0)
133
212
 
134
- title = ''
213
+ title = self._title
135
214
  if 'title' in d_cfg:
136
- title = d_cfg['title']
215
+ this_title = d_cfg['title']
216
+ title += f'\n {this_title}'
217
+
218
+ title = title.lstrip('\n')
137
219
 
138
220
  plt.ylim(top=1.2 * max_y)
139
221
  plt.legend()
@@ -160,8 +242,7 @@ class Plotter1D(Plotter):
160
242
 
161
243
  fig_size = self._get_fig_size()
162
244
  for var in self._d_cfg['plots']:
163
- log.debug(f'Plotting: {var}')
164
-
245
+ self._title = ''
165
246
  plt.figure(var, figsize=fig_size)
166
247
  max_y = self._plot_var(var)
167
248
  self._style_plot(var, max_y)
dmu/stats/fitter.py CHANGED
@@ -231,7 +231,7 @@ class Fitter:
231
231
  continue
232
232
 
233
233
  const = zfit.constraint.GaussianConstraint(params=par, observation=float(par_mu), uncertainty=float(par_sg))
234
- log.info(f'{"":<4}{par_name:<25}{par_mu:<15.3e}{par_sg:<15.3e}')
234
+ log.info(f'{"":<4}{par_name:<45}{par_mu:<15.3e}{par_sg:<15.3e}')
235
235
  l_const.append(const)
236
236
 
237
237
  return l_const