PyPI - data-manipulation-utilities - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

data-manipulation-utilities 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/METADATA +179 -10
{data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/RECORD +31 -19
{data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/WHEEL +1 -1
dmu/generic/hashing.py +44 -0
dmu/generic/utilities.py +14 -1
dmu/generic/version_management.py +3 -5
dmu/ml/cv_diagnostics.py +221 -0
dmu/ml/train_mva.py +143 -46
dmu/pdataframe/utilities.py +36 -3
dmu/plotting/fwhm.py +64 -0
dmu/plotting/plotter.py +2 -0
dmu/plotting/plotter_1d.py +87 -6
dmu/stats/fitter.py +1 -1
dmu/stats/minimizers.py +40 -11
dmu/stats/model_factory.py +248 -44
dmu/stats/zfit_models.py +68 -0
dmu/stats/zfit_plotter.py +29 -21
dmu/testing/utilities.py +31 -4
dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
dmu_data/ml/tests/train_mva.yaml +19 -10
dmu_data/ml/tests/train_mva_with_diagnostics.yaml +82 -0
dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
dmu_data/plotting/tests/plug_stats.yaml +19 -0
dmu_data/plotting/tests/simple.yaml +4 -3
dmu_data/plotting/tests/styling.yaml +11 -0
{data_manipulation_utilities-0.2.5.data → data_manipulation_utilities-0.2.7.data}/scripts/publish +0 -0
{data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt +0 -0
{data_manipulation_utilities-0.2.5.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt +0 -0

dmu/ml/train_mva.py CHANGED Viewed

@@ -1,10 +1,12 @@
 '''
 Module with TrainMva class
 '''
-# pylint: disable = too-many-locals
+# pylint: disable = too-many-locals, no-name-in-module
 # pylint: disable = too-many-arguments, too-many-positional-arguments
+# pylint: disable = too-many-instance-attributes
 import os
+import copy
 import joblib
 import pandas as pnd
@@ -14,18 +16,19 @@ import matplotlib.pyplot as plt
 from sklearn.metrics         import roc_curve, auc
 from sklearn.model_selection import StratifiedKFold
-from ROOT import RDataFrame
+from ROOT import RDataFrame, RDF
 import dmu.ml.utilities         as ut
 import dmu.pdataframe.utilities as put
 import dmu.plotting.utilities   as plu
+from dmu.ml.cv_diagnostics   import CVDiagnostics
 from dmu.ml.cv_classifier    import CVClassifier as cls
 from dmu.plotting.plotter_1d import Plotter1D    as Plotter
 from dmu.plotting.matrix     import MatrixPlotter
 from dmu.logging.log_store   import LogStore
-npa = numpy.ndarray
+NPA = numpy.ndarray
 log = LogStore.add_logger('dmu:ml:train_mva')
 # ---------------------------------------------
 class TrainMva:
@@ -33,40 +36,73 @@ class TrainMva:
     Interface to scikit learn used to train classifier
     '''
     # ---------------------------------------------
-    def __init__(self, bkg=None, sig=None, cfg=None):
+    def __init__(self, bkg : RDataFrame, sig : RDataFrame, cfg : dict):
         '''
         bkg (ROOT dataframe): Holds real data
         sig (ROOT dataframe): Holds simulation
         cfg (dict)          : Dictionary storing configuration for training
         '''
-        if bkg is None:
-            raise ValueError('Background dataframe is not a ROOT dataframe')
+        self._cfg       = cfg
+        self._l_ft_name = self._cfg['training']['features']
-        if sig is None:
-            raise ValueError('Signal dataframe is not a ROOT dataframe')
+        self._rdf_sig_org = sig
+        self._rdf_bkg_org = bkg
-        if not isinstance(cfg, dict):
-            raise ValueError('Config dictionary is not a dictionary')
+        rdf_bkg = self._preprocess_rdf(bkg)
+        rdf_sig = self._preprocess_rdf(sig)
-        self._rdf_bkg = bkg
-        self._rdf_sig = sig
-        self._cfg     = cfg
+        df_ft_sig, l_lab_sig = self._get_sample_inputs(rdf = rdf_sig, label = 1)
+        df_ft_bkg, l_lab_bkg = self._get_sample_inputs(rdf = rdf_bkg, label = 0)
-        self._l_ft_name = self._cfg['training']['features']
+        self._df_ft = pnd.concat([df_ft_sig, df_ft_bkg], axis=0)
+        self._l_lab = numpy.array(l_lab_sig + l_lab_bkg)
-        self._df_ft, self._l_lab = self._get_inputs()
+        self._rdf_bkg = self._get_rdf(rdf = rdf_bkg, df_feat=df_ft_bkg)
+        self._rdf_sig = self._get_rdf(rdf = rdf_sig, df_feat=df_ft_sig)
     # ---------------------------------------------
-    def _get_inputs(self) -> tuple[pnd.DataFrame, npa]:
-        log.info('Getting signal')
-        df_sig, arr_lab_sig = self._get_sample_inputs(self._rdf_sig, label = 1)
+    def _get_extra_columns(self, rdf : RDataFrame, df : pnd.DataFrame) -> list[str]:
+        d_plot = self._cfg['plotting']['features']['plots']
+        l_expr = list(d_plot)
+        l_rdf  = [ name.c_str() for name in rdf.GetColumnNames() ]
+        l_extr = []
+        for expr in l_expr:
+            if expr not in l_rdf:
+                continue
-        log.info('Getting background')
-        df_bkg, arr_lab_bkg = self._get_sample_inputs(self._rdf_bkg, label = 0)
+            if expr in df.columns:
+                continue
-        df      = pnd.concat([df_sig, df_bkg], axis=0)
-        arr_lab = numpy.concatenate([arr_lab_sig, arr_lab_bkg])
+            l_extr.append(expr)
-        return df, arr_lab
+        return l_extr
+    # ---------------------------------------------
+    def _get_rdf(self, rdf : RDataFrame, df_feat : pnd.DataFrame) -> RDataFrame:
+        '''
+        Takes original ROOT dataframe and pre-processed features dataframe
+        Adds missing branches to latter and returns expanded ROOT dataframe
+        Need to make plots
+        '''
+        l_extr_col = self._get_extra_columns(rdf, df_feat)
+        if len(l_extr_col) > 20:
+            for name in l_extr_col:
+                log.debug(name)
+            raise ValueError('Found more than 20 extra columns')
+        d_data = rdf.AsNumpy(l_extr_col)
+        log.debug(f'Adding extra-nonfeature columns: {l_extr_col}')
+        df_extr = pnd.DataFrame(d_data)
+        nmain = len(df_feat.columns)
+        nextr = len(df_extr.columns)
+        log.debug(f'Main  DF size: {nmain}')
+        log.debug(f'Extra DF size: {nextr}')
+        df_all = pnd.concat([df_feat, df_extr], axis=1)
+        return RDF.FromPandas(df_all)
     # ---------------------------------------------
     def _pre_process_nans(self, df : pnd.DataFrame) -> pnd.DataFrame:
         if 'dataset' not in self._cfg:
@@ -77,25 +113,41 @@ class TrainMva:
             return df
         d_name_val = self._cfg['dataset']['nan']
-        log.info(60 * '-')
+        log.info(70 * '-')
         log.info('Doing NaN replacements')
-        log.info(60 * '-')
+        log.info(70 * '-')
         for var, val in d_name_val.items():
-            log.info(f'{var:<20}{"--->":20}{val:<20.3f}')
+            nna = df[var].isna().sum()
+            log.info(f'{var:<20}{"--->":20}{val:<20.3f}{nna}')
             df[var] = df[var].fillna(val)
+        log.info(70 * '-')
         return df
     # ---------------------------------------------
-    def _get_sample_inputs(self, rdf : RDataFrame, label : int) -> tuple[pnd.DataFrame, npa]:
+    def _preprocess_rdf(self, rdf : RDataFrame) -> RDataFrame:
+        if 'define' not in self._cfg['dataset']:
+            log.debug('No definitions found')
+            return rdf
+        log.debug('Definitions found')
+        d_def = self._cfg['dataset']['define']
+        for name, expr in d_def.items():
+            log.debug(f'{name:<20}{expr}')
+            rdf = rdf.Define(name, expr)
+        return rdf
+    # ---------------------------------------------
+    def _get_sample_inputs(self, rdf : RDataFrame, label : int) -> tuple[pnd.DataFrame, list[int]]:
         d_ft = rdf.AsNumpy(self._l_ft_name)
         df   = pnd.DataFrame(d_ft)
         df   = self._pre_process_nans(df)
         df   = ut.cleanup(df)
         l_lab= len(df) * [label]
-        return df, numpy.array(l_lab)
+        return df, l_lab
     # ---------------------------------------------
-    def _get_model(self, arr_index : npa) -> cls:
+    def _get_model(self, arr_index : NPA) -> cls:
         model = cls(cfg = self._cfg)
         df_ft = self._df_ft.iloc[arr_index]
         l_lab = self._l_lab[arr_index]
@@ -107,10 +159,14 @@ class TrainMva:
         return model
     # ---------------------------------------------
-    def _get_models(self):
+    def _get_models(self, load_trained : bool):
         '''
         Will create models, train them and return them
         '''
+        if load_trained:
+            log.warning('Not retraining, but loading trained models')
+            return self._load_trained_models()
         nfold = self._cfg['training']['nfold']
         rdmst = self._cfg['training']['rdm_stat']
@@ -137,6 +193,22 @@ class TrainMva:
         return l_model
     # ---------------------------------------------
+    def _load_trained_models(self) -> list[cls]:
+        model_path = self._cfg['saving']['path']
+        nfold      = self._cfg['training']['nfold']
+        l_model    = []
+        for ifold in range(nfold):
+            fold_path = model_path.replace('.pkl', f'_{ifold:03}.pkl')
+            if not os.path.isfile(fold_path):
+                raise FileNotFoundError(f'Missing trained model: {fold_path}')
+            log.debug(f'Loading model from: {fold_path}')
+            model = joblib.load(fold_path)
+            l_model.append(model)
+        return l_model
+    # ---------------------------------------------
     def _labels_from_varnames(self, l_var_name : list[str]) -> list[str]:
         try:
             d_plot = self._cfg['plotting']['features']['plots']
@@ -176,7 +248,7 @@ class TrainMva:
         d_form = {'Variable' : '{}', 'Importance' : '{:.1f}'}
         put.df_to_tex(df, table_path, d_format = d_form)
     # ---------------------------------------------
-    def _get_scores(self, model : cls, arr_index : npa, on_training_ok : bool) -> tuple[npa, npa, npa, npa]:
+    def _get_scores(self, model : cls, arr_index : NPA, on_training_ok : bool) -> tuple[NPA, NPA, NPA, NPA]:
         '''
         Returns a tuple of four arrays
@@ -199,7 +271,7 @@ class TrainMva:
         return arr_sig, arr_bkg, arr_all, arr_lab
     # ---------------------------------------------
-    def _split_scores(self, arr_prob : npa, arr_label : npa) -> tuple[npa, npa]:
+    def _split_scores(self, arr_prob : NPA, arr_label : NPA) -> tuple[NPA, NPA]:
         '''
         Will split the testing scores (predictions) based on the training scores
@@ -255,7 +327,7 @@ class TrainMva:
         return cfg
     # ---------------------------------------------
-    def _plot_correlation(self, arr_index : npa, ifold : int) -> None:
+    def _plot_correlation(self, arr_index : NPA, ifold : int) -> None:
         df_ft = self._df_ft.iloc[arr_index]
         cfg = self._get_correlation_cfg(df_ft, ifold)
         cov = df_ft.corr()
@@ -272,7 +344,7 @@ class TrainMva:
         plt.savefig(f'{val_dir}/covariance.png')
         plt.close()
     # ---------------------------------------------
-    def _get_nentries(self, arr_val : npa) -> str:
+    def _get_nentries(self, arr_val : NPA) -> str:
         size = len(arr_val)
         size = size / 1000.
@@ -307,10 +379,10 @@ class TrainMva:
         plt.close()
     # ---------------------------------------------
     def _plot_roc(self,
-                  l_lab_ts : npa,
-                  l_prb_ts : npa,
-                  l_lab_tr : npa,
-                  l_prb_tr : npa,
+                  l_lab_ts : NPA,
+                  l_prb_ts : NPA,
+                  l_lab_tr : NPA,
+                  l_prb_tr : NPA,
                   ifold    : int):
         '''
         Takes the labels and the probabilities and plots ROC
@@ -355,10 +427,10 @@ class TrainMva:
         plt.close()
     # ---------------------------------------------
     def _plot_probabilities(self,
-                            arr_seff: npa,
-                            arr_brej: npa,
-                            arr_sprb: npa,
-                            arr_labl: npa) -> None:
+                            arr_seff: NPA,
+                            arr_brej: NPA,
+                            arr_sprb: NPA,
+                            arr_labl: NPA) -> None:
         roc_cfg = self._cfg['plotting']['roc']
         if 'annotate' not in roc_cfg:
@@ -443,11 +515,32 @@ class TrainMva:
         os.makedirs(val_dir, exist_ok=True)
         put.df_to_tex(df, f'{val_dir}/hyperparameters.tex')
     # ---------------------------------------------
-    def run(self, skip_fit : bool = False) -> None:
+    def _run_diagnostics(self, models : list[cls], rdf : RDataFrame, name : str) -> None:
+        if 'diagnostics' not in self._cfg:
+            log.warning('Diagnostics section not found, not running diagnostics')
+            return
+        cfg_diag = self._cfg['diagnostics']
+        out_dir  = cfg_diag['output']
+        plt_dir  = None
+        if 'overlay' in cfg_diag['correlations']['target']:
+            plt_dir  = cfg_diag['correlations']['target']['overlay']['saving']['plt_dir']
+        cfg_diag = copy.deepcopy(cfg_diag)
+        cfg_diag['output'] = f'{out_dir}/{name}'
+        if plt_dir is not None:
+            cfg_diag['correlations']['target']['overlay']['saving']['plt_dir'] = f'{plt_dir}/{name}'
+        cvd = CVDiagnostics(models=models, rdf=rdf, cfg=cfg_diag)
+        cvd.run()
+    # ---------------------------------------------
+    def run(self, skip_fit : bool = False, load_trained : bool = False) -> None:
         '''
         Will do the training
         skip_fit: By default false, if True, it will only do the plots of features and save tables
+        load_trained: If true, it will load the models instead of training, by default false
         '''
         self._save_settings_to_tex()
         self._plot_features()
@@ -455,7 +548,11 @@ class TrainMva:
         if skip_fit:
             return
-        l_mod = self._get_models()
-        for ifold, mod in enumerate(l_mod):
-            self._save_model(mod, ifold)
+        l_mod = self._get_models(load_trained = load_trained)
+        if not load_trained:
+            for ifold, mod in enumerate(l_mod):
+                self._save_model(mod, ifold)
+        self._run_diagnostics(models = l_mod, rdf = self._rdf_sig_org, name='Signal'    )
+        self._run_diagnostics(models = l_mod, rdf = self._rdf_bkg_org, name='Background')
 # ---------------------------------------------

dmu/pdataframe/utilities.py CHANGED Viewed

@@ -2,20 +2,28 @@
 Module containing utilities for pandas dataframes
 '''
 import os
+import yaml
 import pandas as pnd
 from dmu.logging.log_store import LogStore
 log=LogStore.add_logger('dmu:pdataframe:utilities')
 # -------------------------------------
-def df_to_tex(df : pnd.DataFrame, path : str, hide_index : bool = True, d_format : dict[str,str]=None, caption : str =None) -> None:
+def df_to_tex(df         : pnd.DataFrame,
+              path       : str,
+              hide_index : bool         = True,
+              d_format   : dict[str,str]= None,
+              **kwargs   : str       ) -> None:
     '''
     Saves pandas dataframe to latex
     Parameters
     -------------
+    df              : Dataframe with data
+    path     (str)  : Path to latex file
+    hide_index      : If true (default), index of dataframe won't appear in table
     d_format (dict) : Dictionary specifying the formattinng of the table, e.g. `{'col1': '{}', 'col2': '{:.3f}', 'col3' : '{:.3f}'}`
+    kwargs          : Arguments needed in `to_latex`
     '''
     if path is not None:
@@ -30,7 +38,32 @@ def df_to_tex(df : pnd.DataFrame, path : str, hide_index : bool = True, d_format
         st=st.format(formatter=d_format)
     log.info(f'Saving to: {path}')
-    buf = st.to_latex(buf=path, caption=caption, hrules=True)
+    buf = st.to_latex(buf=path, hrules=True, **kwargs)
     return buf
 # -------------------------------------
+def to_yaml(df : pnd.DataFrame, path : str):
+    '''
+    Takes a dataframe and the path to a yaml file
+    Makes the directory path if not found and saves data in YAML file
+    '''
+    dir_path = os.path.dirname(path)
+    os.makedirs(dir_path, exist_ok=True)
+    data = df.to_dict()
+    with open(path, 'w', encoding='utf-8') as ofile:
+        yaml.safe_dump(data, ofile)
+# -------------------------------------
+def from_yaml(path : str) -> pnd.DataFrame:
+    '''
+    Takes path to a yaml file
+    Makes dataframe from it and returns it
+    '''
+    with open(path, encoding='utf-8') as ifile:
+        data = yaml.safe_load(ifile)
+    df = pnd.DataFrame(data)
+    return df
+# -------------------------------------

dmu/plotting/fwhm.py ADDED Viewed

@@ -0,0 +1,64 @@
+'''
+Module with FWHM plugin class
+'''
+import zfit
+import numpy
+import matplotlib.pyplot as plt
+from dmu.logging.log_store import LogStore
+log = LogStore.add_logger('dmu:plotting:fwhm')
+# --------------------------------------------
+class FWHM:
+    '''
+    Class meant to be used to calculate Full Width at Half Maximum
+    as a Plotter1d plugin
+    '''
+    # -------------------------
+    def __init__(self, cfg : dict, val : numpy.ndarray, wgt : numpy.ndarray, maxy : float):
+        self._cfg     = cfg
+        self._arr_val = val
+        self._arr_wgt = wgt
+        self._maxy    = maxy
+    # -------------------------
+    def _normalize_yval(self, arr_pdf_val : numpy.ndarray) -> None:
+        max_pdf_val = numpy.max(arr_pdf_val)
+        arr_pdf_val*= self._maxy / max_pdf_val
+        return arr_pdf_val
+    # -------------------------
+    def _get_fwhm(self, arr_x : numpy.ndarray, arr_y : numpy.ndarray) -> float:
+        maxy = numpy.max(arr_y)
+        arry = numpy.where(arr_y > maxy/2.)[0]
+        imax = arry[ 0]
+        imin = arry[-1]
+        x1 = arr_x[imax]
+        x2 = arr_x[imin]
+        if self._cfg['plot']:
+            plt.plot([x1, x2], [maxy/2, maxy/2], linestyle=':', linewidth=1, color='k')
+        return x2 - x1
+    # -------------------------
+    def run(self) -> float:
+        '''
+        Runs plugin and return FWHM
+        '''
+        [minx, maxx] = self._cfg['obs']
+        log.info('Running FWHM pluggin')
+        obs = zfit.Space('mass', limits=(minx, maxx))
+        pdf= zfit.pdf.KDE1DimExact(obs=obs, data=self._arr_val, weights=self._arr_wgt)
+        xval = numpy.linspace(minx, maxx, 200)
+        yval = pdf.pdf(xval)
+        yval = self._normalize_yval(yval)
+        if self._cfg['plot']:
+            plt.plot(xval, yval, linestyle='-', linewidth=2, color='gray')
+        fwhm = self._get_fwhm(xval, yval)
+        return fwhm
+# --------------------------------------------

dmu/plotting/plotter.py CHANGED Viewed

@@ -29,6 +29,8 @@ class Plotter:
         self._d_cfg = cfg
         self._d_rdf : dict[str, RDataFrame]    = { name : self._preprocess_rdf(rdf) for name, rdf in d_rdf.items()}
         self._d_wgt : Union[dict[str, Union[numpy.ndarray, None]], None]
+        self._title : str = ''
     #-------------------------------------
     def _check_quantile(self, qnt : float):
         '''

dmu/plotting/plotter_1d.py CHANGED Viewed

@@ -1,7 +1,7 @@
 '''
 Module containing plotter class
 '''
+import copy
 from hist import Hist
 import numpy
@@ -9,6 +9,7 @@ import matplotlib.pyplot as plt
 from dmu.logging.log_store import LogStore
 from dmu.plotting.plotter  import Plotter
+from dmu.plotting.fwhm     import FWHM
 log = LogStore.add_logger('dmu:plotting:Plotter1D')
 # --------------------------------------------
@@ -55,6 +56,72 @@ class Plotter1D(Plotter):
         return minx, maxx, bins
     #-------------------------------------
+    def _run_plugins(self,
+                     arr_val : numpy.ndarray,
+                     arr_wgt : numpy.ndarray,
+                     hst,
+                     name    : str,
+                     varname : str) -> None:
+        if 'plugin' not in self._d_cfg:
+            log.debug('No plugins found')
+            return
+        if 'fwhm' in self._d_cfg['plugin']:
+            if varname not in self._d_cfg['plugin']['fwhm']:
+                log.debug(f'No FWHM plugin found for variable {varname}')
+                return
+            log.debug(f'FWHM plugin found for variable {varname}')
+            cfg = self._d_cfg['plugin']['fwhm'][varname]
+            self._run_fwhm(arr_val = arr_val, arr_wgt=arr_wgt, hst=hst, name=name, cfg = cfg)
+        if 'stats' in self._d_cfg['plugin']:
+            if varname not in self._d_cfg['plugin']['stats']:
+                log.debug(f'No stats plugin found for variable {varname}')
+                return
+            log.debug(f'stats plugin found for variable {varname}')
+            cfg = self._d_cfg['plugin']['stats'][varname]
+            self._run_stats(arr_val = arr_val, arr_wgt=arr_wgt, name=name, cfg = cfg)
+    #-------------------------------------
+    def _run_stats(self, arr_val : numpy.ndarray, arr_wgt : numpy.ndarray, name : str, cfg : dict[str:str]) -> None:
+        this_title = ''
+        if 'sum' in cfg:
+            form = cfg['sum']
+            sumv = numpy.sum(arr_wgt)
+            this_title += form.format(sumv) + '; '
+        if 'mean' in cfg:
+            form = cfg['mean']
+            mean = numpy.average(arr_val, weights=arr_wgt)
+            this_title += form.format(mean) + '; '
+        if 'rms'  in cfg:
+            form = cfg['rms']
+            mean = numpy.average(arr_val, weights=arr_wgt)
+            rms  = numpy.sqrt(numpy.average((arr_val - mean) ** 2, weights=arr_wgt))
+            this_title += form.format(rms ) + '; '
+        self._title+= f'\n{name}: {this_title}'
+    #-------------------------------------
+    def _run_fwhm(self, arr_val : numpy.ndarray, arr_wgt : numpy.ndarray, hst, name : str, cfg : dict) -> None:
+        arr_bin_cnt = hst.values()
+        maxy = numpy.max(arr_bin_cnt)
+        obj  = FWHM(cfg=cfg, val=arr_val, wgt=arr_wgt, maxy=maxy)
+        fwhm = obj.run()
+        form        = cfg['format']
+        this_title  = form.format(fwhm)
+        if 'add_std' in cfg and cfg['add_std']:
+            mu         = numpy.average(arr_val            , weights=arr_wgt)
+            avg        = numpy.average((arr_val - mu) ** 2, weights=arr_wgt)
+            std        = numpy.sqrt(avg)
+            form       = form.replace('FWHM', 'STD')
+            this_title+= '; ' + form.format(std)
+        self._title+= f'\n{name}: {this_title}'
+    #-------------------------------------
     def _plot_var(self, var : str) -> float:
         '''
         Will plot a variable from a dictionary of dataframes
@@ -70,6 +137,7 @@ class Plotter1D(Plotter):
         d_data = {}
         for name, rdf in self._d_rdf.items():
+            log.debug(f'Plotting: {var}/{name}')
             d_data[name] = rdf.AsNumpy([var])[var]
         minx, maxx, bins = self._get_binning(var, d_data)
@@ -82,7 +150,18 @@ class Plotter1D(Plotter):
             arr_wgt      = self._normalize_weights(arr_wgt, var)
             hst          = Hist.new.Reg(bins=bins, start=minx, stop=maxx, name='x').Weight()
             hst.fill(x=arr_val, weight=arr_wgt)
-            hst.plot(label=label)
+            self._run_plugins(arr_val, arr_wgt, hst, name, var)
+            if 'styling' in self._d_cfg['plots'][var]:
+                style = self._d_cfg['plots'][var]['styling']
+                style = copy.deepcopy(style)
+            else:
+                style = {'label' : label, 'histtype' : 'errorbar', 'marker' : '.', 'linestyle' : 'none'}
+            if 'label' not in style:
+                style['label'] = label
+            hst.plot(**style)
             l_bc_all    += hst.values().tolist()
         max_y = max(l_bc_all)
@@ -131,9 +210,12 @@ class Plotter1D(Plotter):
         if yscale == 'linear':
             plt.ylim(bottom=0)
-        title = ''
+        title = self._title
         if 'title'      in d_cfg:
-            title = d_cfg['title']
+            this_title = d_cfg['title']
+            title += f'\n {this_title}'
+        title = title.lstrip('\n')
         plt.ylim(top=1.2 * max_y)
         plt.legend()
@@ -160,8 +242,7 @@ class Plotter1D(Plotter):
         fig_size = self._get_fig_size()
         for var in self._d_cfg['plots']:
-            log.debug(f'Plotting: {var}')
+            self._title = ''
             plt.figure(var, figsize=fig_size)
             max_y = self._plot_var(var)
             self._style_plot(var, max_y)

dmu/stats/fitter.py CHANGED Viewed

@@ -231,7 +231,7 @@ class Fitter:
                 continue
             const = zfit.constraint.GaussianConstraint(params=par, observation=float(par_mu), uncertainty=float(par_sg))
-            log.info(f'{"":<4}{par_name:<25}{par_mu:<15.3e}{par_sg:<15.3e}')
+            log.info(f'{"":<4}{par_name:<45}{par_mu:<15.3e}{par_sg:<15.3e}')
             l_const.append(const)
         return l_const

data-manipulation-utilities 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

data-manipulation-utilities 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl