data-manipulation-utilities 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/METADATA +177 -8
  2. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/RECORD +30 -18
  3. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/WHEEL +1 -1
  4. dmu/generic/hashing.py +44 -0
  5. dmu/generic/utilities.py +14 -1
  6. dmu/generic/version_management.py +3 -5
  7. dmu/ml/cv_diagnostics.py +221 -0
  8. dmu/ml/train_mva.py +124 -31
  9. dmu/pdataframe/utilities.py +36 -3
  10. dmu/plotting/fwhm.py +64 -0
  11. dmu/plotting/plotter.py +2 -0
  12. dmu/plotting/plotter_1d.py +87 -6
  13. dmu/stats/fitter.py +1 -1
  14. dmu/stats/model_factory.py +189 -25
  15. dmu/stats/zfit_models.py +68 -0
  16. dmu/stats/zfit_plotter.py +29 -21
  17. dmu/testing/utilities.py +31 -4
  18. dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
  19. dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
  20. dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
  21. dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
  22. dmu_data/ml/tests/train_mva.yaml +15 -9
  23. dmu_data/ml/tests/train_mva_with_diagnostics.yaml +82 -0
  24. dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
  25. dmu_data/plotting/tests/plug_stats.yaml +19 -0
  26. dmu_data/plotting/tests/simple.yaml +4 -3
  27. dmu_data/plotting/tests/styling.yaml +11 -0
  28. {data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.7.data}/scripts/publish +0 -0
  29. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt +0 -0
  30. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt +0 -0
dmu/ml/train_mva.py CHANGED
@@ -3,8 +3,10 @@ Module with TrainMva class
3
3
  '''
4
4
  # pylint: disable = too-many-locals, no-name-in-module
5
5
  # pylint: disable = too-many-arguments, too-many-positional-arguments
6
+ # pylint: disable = too-many-instance-attributes
6
7
 
7
8
  import os
9
+ import copy
8
10
 
9
11
  import joblib
10
12
  import pandas as pnd
@@ -20,12 +22,13 @@ import dmu.ml.utilities as ut
20
22
  import dmu.pdataframe.utilities as put
21
23
  import dmu.plotting.utilities as plu
22
24
 
25
+ from dmu.ml.cv_diagnostics import CVDiagnostics
23
26
  from dmu.ml.cv_classifier import CVClassifier as cls
24
27
  from dmu.plotting.plotter_1d import Plotter1D as Plotter
25
28
  from dmu.plotting.matrix import MatrixPlotter
26
29
  from dmu.logging.log_store import LogStore
27
30
 
28
- npa = numpy.ndarray
31
+ NPA = numpy.ndarray
29
32
  log = LogStore.add_logger('dmu:ml:train_mva')
30
33
  # ---------------------------------------------
31
34
  class TrainMva:
@@ -42,30 +45,62 @@ class TrainMva:
42
45
  self._cfg = cfg
43
46
  self._l_ft_name = self._cfg['training']['features']
44
47
 
45
- df_ft_sig, l_lab_sig = self._get_sample_inputs(rdf = sig, label = 1)
46
- df_ft_bkg, l_lab_bkg = self._get_sample_inputs(rdf = bkg, label = 0)
48
+ self._rdf_sig_org = sig
49
+ self._rdf_bkg_org = bkg
50
+
51
+ rdf_bkg = self._preprocess_rdf(bkg)
52
+ rdf_sig = self._preprocess_rdf(sig)
53
+
54
+ df_ft_sig, l_lab_sig = self._get_sample_inputs(rdf = rdf_sig, label = 1)
55
+ df_ft_bkg, l_lab_bkg = self._get_sample_inputs(rdf = rdf_bkg, label = 0)
47
56
 
48
57
  self._df_ft = pnd.concat([df_ft_sig, df_ft_bkg], axis=0)
49
58
  self._l_lab = numpy.array(l_lab_sig + l_lab_bkg)
50
59
 
51
- self._rdf_bkg = self._get_rdf(rdf = bkg, df=df_ft_bkg)
52
- self._rdf_sig = self._get_rdf(rdf = sig, df=df_ft_sig)
60
+ self._rdf_bkg = self._get_rdf(rdf = rdf_bkg, df_feat=df_ft_bkg)
61
+ self._rdf_sig = self._get_rdf(rdf = rdf_sig, df_feat=df_ft_sig)
53
62
  # ---------------------------------------------
54
- def _get_rdf(self, rdf : RDataFrame, df : pnd.DataFrame) -> RDataFrame:
63
+ def _get_extra_columns(self, rdf : RDataFrame, df : pnd.DataFrame) -> list[str]:
64
+ d_plot = self._cfg['plotting']['features']['plots']
65
+ l_expr = list(d_plot)
66
+ l_rdf = [ name.c_str() for name in rdf.GetColumnNames() ]
67
+
68
+ l_extr = []
69
+ for expr in l_expr:
70
+ if expr not in l_rdf:
71
+ continue
72
+
73
+ if expr in df.columns:
74
+ continue
75
+
76
+ l_extr.append(expr)
77
+
78
+ return l_extr
79
+ # ---------------------------------------------
80
+ def _get_rdf(self, rdf : RDataFrame, df_feat : pnd.DataFrame) -> RDataFrame:
55
81
  '''
56
82
  Takes original ROOT dataframe and pre-processed features dataframe
57
83
  Adds missing branches to latter and returns expanded ROOT dataframe
84
+ Need to make plots
58
85
  '''
59
86
 
60
- l_pnd_col = df.columns.tolist()
61
- l_rdf_col = [ name.c_str() for name in rdf.GetColumnNames() ]
62
- l_mis_col = [ col for col in l_rdf_col if col not in l_pnd_col ]
87
+ l_extr_col = self._get_extra_columns(rdf, df_feat)
88
+ if len(l_extr_col) > 20:
89
+ for name in l_extr_col:
90
+ log.debug(name)
91
+ raise ValueError('Found more than 20 extra columns')
92
+
93
+ d_data = rdf.AsNumpy(l_extr_col)
94
+ log.debug(f'Adding extra-nonfeature columns: {l_extr_col}')
95
+ df_extr = pnd.DataFrame(d_data)
96
+
97
+ nmain = len(df_feat.columns)
98
+ nextr = len(df_extr.columns)
63
99
 
64
- log.debug(f'Adding extra-nonfeature columns: {l_mis_col}')
100
+ log.debug(f'Main DF size: {nmain}')
101
+ log.debug(f'Extra DF size: {nextr}')
65
102
 
66
- d_data = rdf.AsNumpy(l_mis_col)
67
- df_ext = pnd.DataFrame(d_data)
68
- df_all = pnd.concat([df, df_ext], axis=1)
103
+ df_all = pnd.concat([df_feat, df_extr], axis=1)
69
104
 
70
105
  return RDF.FromPandas(df_all)
71
106
  # ---------------------------------------------
@@ -90,6 +125,19 @@ class TrainMva:
90
125
 
91
126
  return df
92
127
  # ---------------------------------------------
128
+ def _preprocess_rdf(self, rdf : RDataFrame) -> RDataFrame:
129
+ if 'define' not in self._cfg['dataset']:
130
+ log.debug('No definitions found')
131
+ return rdf
132
+
133
+ log.debug('Definitions found')
134
+ d_def = self._cfg['dataset']['define']
135
+ for name, expr in d_def.items():
136
+ log.debug(f'{name:<20}{expr}')
137
+ rdf = rdf.Define(name, expr)
138
+
139
+ return rdf
140
+ # ---------------------------------------------
93
141
  def _get_sample_inputs(self, rdf : RDataFrame, label : int) -> tuple[pnd.DataFrame, list[int]]:
94
142
  d_ft = rdf.AsNumpy(self._l_ft_name)
95
143
  df = pnd.DataFrame(d_ft)
@@ -99,7 +147,7 @@ class TrainMva:
99
147
 
100
148
  return df, l_lab
101
149
  # ---------------------------------------------
102
- def _get_model(self, arr_index : npa) -> cls:
150
+ def _get_model(self, arr_index : NPA) -> cls:
103
151
  model = cls(cfg = self._cfg)
104
152
  df_ft = self._df_ft.iloc[arr_index]
105
153
  l_lab = self._l_lab[arr_index]
@@ -111,10 +159,14 @@ class TrainMva:
111
159
 
112
160
  return model
113
161
  # ---------------------------------------------
114
- def _get_models(self):
162
+ def _get_models(self, load_trained : bool):
115
163
  '''
116
164
  Will create models, train them and return them
117
165
  '''
166
+ if load_trained:
167
+ log.warning('Not retraining, but loading trained models')
168
+ return self._load_trained_models()
169
+
118
170
  nfold = self._cfg['training']['nfold']
119
171
  rdmst = self._cfg['training']['rdm_stat']
120
172
 
@@ -141,6 +193,22 @@ class TrainMva:
141
193
 
142
194
  return l_model
143
195
  # ---------------------------------------------
196
+ def _load_trained_models(self) -> list[cls]:
197
+ model_path = self._cfg['saving']['path']
198
+ nfold = self._cfg['training']['nfold']
199
+ l_model = []
200
+ for ifold in range(nfold):
201
+ fold_path = model_path.replace('.pkl', f'_{ifold:03}.pkl')
202
+
203
+ if not os.path.isfile(fold_path):
204
+ raise FileNotFoundError(f'Missing trained model: {fold_path}')
205
+
206
+ log.debug(f'Loading model from: {fold_path}')
207
+ model = joblib.load(fold_path)
208
+ l_model.append(model)
209
+
210
+ return l_model
211
+ # ---------------------------------------------
144
212
  def _labels_from_varnames(self, l_var_name : list[str]) -> list[str]:
145
213
  try:
146
214
  d_plot = self._cfg['plotting']['features']['plots']
@@ -180,7 +248,7 @@ class TrainMva:
180
248
  d_form = {'Variable' : '{}', 'Importance' : '{:.1f}'}
181
249
  put.df_to_tex(df, table_path, d_format = d_form)
182
250
  # ---------------------------------------------
183
- def _get_scores(self, model : cls, arr_index : npa, on_training_ok : bool) -> tuple[npa, npa, npa, npa]:
251
+ def _get_scores(self, model : cls, arr_index : NPA, on_training_ok : bool) -> tuple[NPA, NPA, NPA, NPA]:
184
252
  '''
185
253
  Returns a tuple of four arrays
186
254
 
@@ -203,7 +271,7 @@ class TrainMva:
203
271
 
204
272
  return arr_sig, arr_bkg, arr_all, arr_lab
205
273
  # ---------------------------------------------
206
- def _split_scores(self, arr_prob : npa, arr_label : npa) -> tuple[npa, npa]:
274
+ def _split_scores(self, arr_prob : NPA, arr_label : NPA) -> tuple[NPA, NPA]:
207
275
  '''
208
276
  Will split the testing scores (predictions) based on the training scores
209
277
 
@@ -259,7 +327,7 @@ class TrainMva:
259
327
 
260
328
  return cfg
261
329
  # ---------------------------------------------
262
- def _plot_correlation(self, arr_index : npa, ifold : int) -> None:
330
+ def _plot_correlation(self, arr_index : NPA, ifold : int) -> None:
263
331
  df_ft = self._df_ft.iloc[arr_index]
264
332
  cfg = self._get_correlation_cfg(df_ft, ifold)
265
333
  cov = df_ft.corr()
@@ -276,7 +344,7 @@ class TrainMva:
276
344
  plt.savefig(f'{val_dir}/covariance.png')
277
345
  plt.close()
278
346
  # ---------------------------------------------
279
- def _get_nentries(self, arr_val : npa) -> str:
347
+ def _get_nentries(self, arr_val : NPA) -> str:
280
348
  size = len(arr_val)
281
349
  size = size / 1000.
282
350
 
@@ -311,10 +379,10 @@ class TrainMva:
311
379
  plt.close()
312
380
  # ---------------------------------------------
313
381
  def _plot_roc(self,
314
- l_lab_ts : npa,
315
- l_prb_ts : npa,
316
- l_lab_tr : npa,
317
- l_prb_tr : npa,
382
+ l_lab_ts : NPA,
383
+ l_prb_ts : NPA,
384
+ l_lab_tr : NPA,
385
+ l_prb_tr : NPA,
318
386
  ifold : int):
319
387
  '''
320
388
  Takes the labels and the probabilities and plots ROC
@@ -359,10 +427,10 @@ class TrainMva:
359
427
  plt.close()
360
428
  # ---------------------------------------------
361
429
  def _plot_probabilities(self,
362
- arr_seff: npa,
363
- arr_brej: npa,
364
- arr_sprb: npa,
365
- arr_labl: npa) -> None:
430
+ arr_seff: NPA,
431
+ arr_brej: NPA,
432
+ arr_sprb: NPA,
433
+ arr_labl: NPA) -> None:
366
434
 
367
435
  roc_cfg = self._cfg['plotting']['roc']
368
436
  if 'annotate' not in roc_cfg:
@@ -447,11 +515,32 @@ class TrainMva:
447
515
  os.makedirs(val_dir, exist_ok=True)
448
516
  put.df_to_tex(df, f'{val_dir}/hyperparameters.tex')
449
517
  # ---------------------------------------------
450
- def run(self, skip_fit : bool = False) -> None:
518
+ def _run_diagnostics(self, models : list[cls], rdf : RDataFrame, name : str) -> None:
519
+ if 'diagnostics' not in self._cfg:
520
+ log.warning('Diagnostics section not found, not running diagnostics')
521
+ return
522
+
523
+ cfg_diag = self._cfg['diagnostics']
524
+ out_dir = cfg_diag['output']
525
+ plt_dir = None
526
+
527
+ if 'overlay' in cfg_diag['correlations']['target']:
528
+ plt_dir = cfg_diag['correlations']['target']['overlay']['saving']['plt_dir']
529
+
530
+ cfg_diag = copy.deepcopy(cfg_diag)
531
+ cfg_diag['output'] = f'{out_dir}/{name}'
532
+ if plt_dir is not None:
533
+ cfg_diag['correlations']['target']['overlay']['saving']['plt_dir'] = f'{plt_dir}/{name}'
534
+
535
+ cvd = CVDiagnostics(models=models, rdf=rdf, cfg=cfg_diag)
536
+ cvd.run()
537
+ # ---------------------------------------------
538
+ def run(self, skip_fit : bool = False, load_trained : bool = False) -> None:
451
539
  '''
452
540
  Will do the training
453
541
 
454
542
  skip_fit: By default false, if True, it will only do the plots of features and save tables
543
+ load_trained: If true, it will load the models instead of training, by default false
455
544
  '''
456
545
  self._save_settings_to_tex()
457
546
  self._plot_features()
@@ -459,7 +548,11 @@ class TrainMva:
459
548
  if skip_fit:
460
549
  return
461
550
 
462
- l_mod = self._get_models()
463
- for ifold, mod in enumerate(l_mod):
464
- self._save_model(mod, ifold)
551
+ l_mod = self._get_models(load_trained = load_trained)
552
+ if not load_trained:
553
+ for ifold, mod in enumerate(l_mod):
554
+ self._save_model(mod, ifold)
555
+
556
+ self._run_diagnostics(models = l_mod, rdf = self._rdf_sig_org, name='Signal' )
557
+ self._run_diagnostics(models = l_mod, rdf = self._rdf_bkg_org, name='Background')
465
558
  # ---------------------------------------------
@@ -2,20 +2,28 @@
2
2
  Module containing utilities for pandas dataframes
3
3
  '''
4
4
  import os
5
+ import yaml
5
6
  import pandas as pnd
6
7
 
7
8
  from dmu.logging.log_store import LogStore
8
9
 
9
10
  log=LogStore.add_logger('dmu:pdataframe:utilities')
10
-
11
11
  # -------------------------------------
12
- def df_to_tex(df : pnd.DataFrame, path : str, hide_index : bool = True, d_format : dict[str,str]=None, caption : str =None) -> None:
12
+ def df_to_tex(df : pnd.DataFrame,
13
+ path : str,
14
+ hide_index : bool = True,
15
+ d_format : dict[str,str]= None,
16
+ **kwargs : str ) -> None:
13
17
  '''
14
18
  Saves pandas dataframe to latex
15
19
 
16
20
  Parameters
17
21
  -------------
22
+ df : Dataframe with data
23
+ path (str) : Path to latex file
24
+ hide_index : If true (default), index of dataframe won't appear in table
18
25
  d_format (dict) : Dictionary specifying the formattinng of the table, e.g. `{'col1': '{}', 'col2': '{:.3f}', 'col3' : '{:.3f}'}`
26
+ kwargs : Arguments needed in `to_latex`
19
27
  '''
20
28
 
21
29
  if path is not None:
@@ -30,7 +38,32 @@ def df_to_tex(df : pnd.DataFrame, path : str, hide_index : bool = True, d_format
30
38
  st=st.format(formatter=d_format)
31
39
 
32
40
  log.info(f'Saving to: {path}')
33
- buf = st.to_latex(buf=path, caption=caption, hrules=True)
41
+ buf = st.to_latex(buf=path, hrules=True, **kwargs)
34
42
 
35
43
  return buf
36
44
  # -------------------------------------
45
+ def to_yaml(df : pnd.DataFrame, path : str):
46
+ '''
47
+ Takes a dataframe and the path to a yaml file
48
+ Makes the directory path if not found and saves data in YAML file
49
+ '''
50
+ dir_path = os.path.dirname(path)
51
+ os.makedirs(dir_path, exist_ok=True)
52
+
53
+ data = df.to_dict()
54
+
55
+ with open(path, 'w', encoding='utf-8') as ofile:
56
+ yaml.safe_dump(data, ofile)
57
+ # -------------------------------------
58
+ def from_yaml(path : str) -> pnd.DataFrame:
59
+ '''
60
+ Takes path to a yaml file
61
+ Makes dataframe from it and returns it
62
+ '''
63
+ with open(path, encoding='utf-8') as ifile:
64
+ data = yaml.safe_load(ifile)
65
+
66
+ df = pnd.DataFrame(data)
67
+
68
+ return df
69
+ # -------------------------------------
dmu/plotting/fwhm.py ADDED
@@ -0,0 +1,64 @@
1
+ '''
2
+ Module with FWHM plugin class
3
+ '''
4
+ import zfit
5
+ import numpy
6
+ import matplotlib.pyplot as plt
7
+
8
+ from dmu.logging.log_store import LogStore
9
+
10
+ log = LogStore.add_logger('dmu:plotting:fwhm')
11
+ # --------------------------------------------
12
+ class FWHM:
13
+ '''
14
+ Class meant to be used to calculate Full Width at Half Maximum
15
+ as a Plotter1d plugin
16
+ '''
17
+ # -------------------------
18
+ def __init__(self, cfg : dict, val : numpy.ndarray, wgt : numpy.ndarray, maxy : float):
19
+ self._cfg = cfg
20
+ self._arr_val = val
21
+ self._arr_wgt = wgt
22
+ self._maxy = maxy
23
+ # -------------------------
24
+ def _normalize_yval(self, arr_pdf_val : numpy.ndarray) -> None:
25
+ max_pdf_val = numpy.max(arr_pdf_val)
26
+ arr_pdf_val*= self._maxy / max_pdf_val
27
+
28
+ return arr_pdf_val
29
+ # -------------------------
30
+ def _get_fwhm(self, arr_x : numpy.ndarray, arr_y : numpy.ndarray) -> float:
31
+ maxy = numpy.max(arr_y)
32
+ arry = numpy.where(arr_y > maxy/2.)[0]
33
+ imax = arry[ 0]
34
+ imin = arry[-1]
35
+
36
+ x1 = arr_x[imax]
37
+ x2 = arr_x[imin]
38
+
39
+ if self._cfg['plot']:
40
+ plt.plot([x1, x2], [maxy/2, maxy/2], linestyle=':', linewidth=1, color='k')
41
+
42
+ return x2 - x1
43
+ # -------------------------
44
+ def run(self) -> float:
45
+ '''
46
+ Runs plugin and return FWHM
47
+ '''
48
+ [minx, maxx] = self._cfg['obs']
49
+
50
+ log.info('Running FWHM pluggin')
51
+ obs = zfit.Space('mass', limits=(minx, maxx))
52
+ pdf= zfit.pdf.KDE1DimExact(obs=obs, data=self._arr_val, weights=self._arr_wgt)
53
+
54
+ xval = numpy.linspace(minx, maxx, 200)
55
+ yval = pdf.pdf(xval)
56
+ yval = self._normalize_yval(yval)
57
+
58
+ if self._cfg['plot']:
59
+ plt.plot(xval, yval, linestyle='-', linewidth=2, color='gray')
60
+
61
+ fwhm = self._get_fwhm(xval, yval)
62
+
63
+ return fwhm
64
+ # --------------------------------------------
dmu/plotting/plotter.py CHANGED
@@ -29,6 +29,8 @@ class Plotter:
29
29
  self._d_cfg = cfg
30
30
  self._d_rdf : dict[str, RDataFrame] = { name : self._preprocess_rdf(rdf) for name, rdf in d_rdf.items()}
31
31
  self._d_wgt : Union[dict[str, Union[numpy.ndarray, None]], None]
32
+
33
+ self._title : str = ''
32
34
  #-------------------------------------
33
35
  def _check_quantile(self, qnt : float):
34
36
  '''
@@ -1,7 +1,7 @@
1
1
  '''
2
2
  Module containing plotter class
3
3
  '''
4
-
4
+ import copy
5
5
  from hist import Hist
6
6
 
7
7
  import numpy
@@ -9,6 +9,7 @@ import matplotlib.pyplot as plt
9
9
 
10
10
  from dmu.logging.log_store import LogStore
11
11
  from dmu.plotting.plotter import Plotter
12
+ from dmu.plotting.fwhm import FWHM
12
13
 
13
14
  log = LogStore.add_logger('dmu:plotting:Plotter1D')
14
15
  # --------------------------------------------
@@ -55,6 +56,72 @@ class Plotter1D(Plotter):
55
56
 
56
57
  return minx, maxx, bins
57
58
  #-------------------------------------
59
+ def _run_plugins(self,
60
+ arr_val : numpy.ndarray,
61
+ arr_wgt : numpy.ndarray,
62
+ hst,
63
+ name : str,
64
+ varname : str) -> None:
65
+ if 'plugin' not in self._d_cfg:
66
+ log.debug('No plugins found')
67
+ return
68
+
69
+ if 'fwhm' in self._d_cfg['plugin']:
70
+ if varname not in self._d_cfg['plugin']['fwhm']:
71
+ log.debug(f'No FWHM plugin found for variable {varname}')
72
+ return
73
+
74
+ log.debug(f'FWHM plugin found for variable {varname}')
75
+ cfg = self._d_cfg['plugin']['fwhm'][varname]
76
+ self._run_fwhm(arr_val = arr_val, arr_wgt=arr_wgt, hst=hst, name=name, cfg = cfg)
77
+
78
+ if 'stats' in self._d_cfg['plugin']:
79
+ if varname not in self._d_cfg['plugin']['stats']:
80
+ log.debug(f'No stats plugin found for variable {varname}')
81
+ return
82
+
83
+ log.debug(f'stats plugin found for variable {varname}')
84
+ cfg = self._d_cfg['plugin']['stats'][varname]
85
+ self._run_stats(arr_val = arr_val, arr_wgt=arr_wgt, name=name, cfg = cfg)
86
+ #-------------------------------------
87
+ def _run_stats(self, arr_val : numpy.ndarray, arr_wgt : numpy.ndarray, name : str, cfg : dict[str:str]) -> None:
88
+ this_title = ''
89
+ if 'sum' in cfg:
90
+ form = cfg['sum']
91
+ sumv = numpy.sum(arr_wgt)
92
+ this_title += form.format(sumv) + '; '
93
+
94
+ if 'mean' in cfg:
95
+ form = cfg['mean']
96
+ mean = numpy.average(arr_val, weights=arr_wgt)
97
+ this_title += form.format(mean) + '; '
98
+
99
+ if 'rms' in cfg:
100
+ form = cfg['rms']
101
+ mean = numpy.average(arr_val, weights=arr_wgt)
102
+ rms = numpy.sqrt(numpy.average((arr_val - mean) ** 2, weights=arr_wgt))
103
+ this_title += form.format(rms ) + '; '
104
+
105
+ self._title+= f'\n{name}: {this_title}'
106
+ #-------------------------------------
107
+ def _run_fwhm(self, arr_val : numpy.ndarray, arr_wgt : numpy.ndarray, hst, name : str, cfg : dict) -> None:
108
+ arr_bin_cnt = hst.values()
109
+ maxy = numpy.max(arr_bin_cnt)
110
+ obj = FWHM(cfg=cfg, val=arr_val, wgt=arr_wgt, maxy=maxy)
111
+ fwhm = obj.run()
112
+
113
+ form = cfg['format']
114
+ this_title = form.format(fwhm)
115
+
116
+ if 'add_std' in cfg and cfg['add_std']:
117
+ mu = numpy.average(arr_val , weights=arr_wgt)
118
+ avg = numpy.average((arr_val - mu) ** 2, weights=arr_wgt)
119
+ std = numpy.sqrt(avg)
120
+ form = form.replace('FWHM', 'STD')
121
+ this_title+= '; ' + form.format(std)
122
+
123
+ self._title+= f'\n{name}: {this_title}'
124
+ #-------------------------------------
58
125
  def _plot_var(self, var : str) -> float:
59
126
  '''
60
127
  Will plot a variable from a dictionary of dataframes
@@ -70,6 +137,7 @@ class Plotter1D(Plotter):
70
137
 
71
138
  d_data = {}
72
139
  for name, rdf in self._d_rdf.items():
140
+ log.debug(f'Plotting: {var}/{name}')
73
141
  d_data[name] = rdf.AsNumpy([var])[var]
74
142
 
75
143
  minx, maxx, bins = self._get_binning(var, d_data)
@@ -82,7 +150,18 @@ class Plotter1D(Plotter):
82
150
  arr_wgt = self._normalize_weights(arr_wgt, var)
83
151
  hst = Hist.new.Reg(bins=bins, start=minx, stop=maxx, name='x').Weight()
84
152
  hst.fill(x=arr_val, weight=arr_wgt)
85
- hst.plot(label=label)
153
+ self._run_plugins(arr_val, arr_wgt, hst, name, var)
154
+
155
+ if 'styling' in self._d_cfg['plots'][var]:
156
+ style = self._d_cfg['plots'][var]['styling']
157
+ style = copy.deepcopy(style)
158
+ else:
159
+ style = {'label' : label, 'histtype' : 'errorbar', 'marker' : '.', 'linestyle' : 'none'}
160
+
161
+ if 'label' not in style:
162
+ style['label'] = label
163
+
164
+ hst.plot(**style)
86
165
  l_bc_all += hst.values().tolist()
87
166
 
88
167
  max_y = max(l_bc_all)
@@ -131,9 +210,12 @@ class Plotter1D(Plotter):
131
210
  if yscale == 'linear':
132
211
  plt.ylim(bottom=0)
133
212
 
134
- title = ''
213
+ title = self._title
135
214
  if 'title' in d_cfg:
136
- title = d_cfg['title']
215
+ this_title = d_cfg['title']
216
+ title += f'\n {this_title}'
217
+
218
+ title = title.lstrip('\n')
137
219
 
138
220
  plt.ylim(top=1.2 * max_y)
139
221
  plt.legend()
@@ -160,8 +242,7 @@ class Plotter1D(Plotter):
160
242
 
161
243
  fig_size = self._get_fig_size()
162
244
  for var in self._d_cfg['plots']:
163
- log.debug(f'Plotting: {var}')
164
-
245
+ self._title = ''
165
246
  plt.figure(var, figsize=fig_size)
166
247
  max_y = self._plot_var(var)
167
248
  self._style_plot(var, max_y)
dmu/stats/fitter.py CHANGED
@@ -231,7 +231,7 @@ class Fitter:
231
231
  continue
232
232
 
233
233
  const = zfit.constraint.GaussianConstraint(params=par, observation=float(par_mu), uncertainty=float(par_sg))
234
- log.info(f'{"":<4}{par_name:<25}{par_mu:<15.3e}{par_sg:<15.3e}')
234
+ log.info(f'{"":<4}{par_name:<45}{par_mu:<15.3e}{par_sg:<15.3e}')
235
235
  l_const.append(const)
236
236
 
237
237
  return l_const