data-manipulation-utilities 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/METADATA +177 -8
  2. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/RECORD +30 -18
  3. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/WHEEL +1 -1
  4. dmu/generic/hashing.py +44 -0
  5. dmu/generic/utilities.py +14 -1
  6. dmu/generic/version_management.py +3 -5
  7. dmu/ml/cv_diagnostics.py +221 -0
  8. dmu/ml/train_mva.py +124 -31
  9. dmu/pdataframe/utilities.py +36 -3
  10. dmu/plotting/fwhm.py +64 -0
  11. dmu/plotting/plotter.py +2 -0
  12. dmu/plotting/plotter_1d.py +87 -6
  13. dmu/stats/fitter.py +1 -1
  14. dmu/stats/model_factory.py +189 -25
  15. dmu/stats/zfit_models.py +68 -0
  16. dmu/stats/zfit_plotter.py +29 -21
  17. dmu/testing/utilities.py +31 -4
  18. dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
  19. dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
  20. dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
  21. dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
  22. dmu_data/ml/tests/train_mva.yaml +15 -9
  23. dmu_data/ml/tests/train_mva_with_diagnostics.yaml +82 -0
  24. dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
  25. dmu_data/plotting/tests/plug_stats.yaml +19 -0
  26. dmu_data/plotting/tests/simple.yaml +4 -3
  27. dmu_data/plotting/tests/styling.yaml +11 -0
  28. {data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.7.data}/scripts/publish +0 -0
  29. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt +0 -0
  30. {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt +0 -0
@@ -6,9 +6,12 @@ Module storing ZModel class
6
6
  from typing import Callable, Union
7
7
 
8
8
  import zfit
9
+
9
10
  from zfit.core.interfaces import ZfitSpace as zobs
10
11
  from zfit.core.basepdf import BasePDF as zpdf
11
12
  from zfit.core.parameter import Parameter as zpar
13
+ from dmu.stats.zfit_models import HypExp
14
+ from dmu.stats.zfit_models import ModExp
12
15
  from dmu.logging.log_store import LogStore
13
16
 
14
17
  log=LogStore.add_logger('dmu:stats:model_factory')
@@ -47,6 +50,13 @@ class MethodRegistry:
47
50
  log.info(f' {value}')
48
51
 
49
52
  return method
53
+
54
+ @classmethod
55
+ def get_pdf_names(cls) -> list[str]:
56
+ '''
57
+ Returns list of PDFs that are registered/supported
58
+ '''
59
+ return list(cls._d_method)
50
60
  #-----------------------------------------
51
61
  class ModelFactory:
52
62
  '''
@@ -57,11 +67,17 @@ class ModelFactory:
57
67
 
58
68
  l_pdf = ['dscb', 'gauss']
59
69
  l_shr = ['mu']
60
- mod = ModelFactory(preffix = 'signal', obs = obs, l_pdf = l_pdf, l_shared=l_shr)
70
+ l_flt = ['mu', 'sg']
71
+ d_rep = {'mu' : 'scale', 'sg' : 'reso'}
72
+ mod = ModelFactory(preffix = 'signal', obs = obs, l_pdf = l_pdf, l_shared = l_shr, d_rep = d_rep)
61
73
  pdf = mod.get_pdf()
62
74
  ```
63
75
 
64
- where one can specify which parameters can be shared among the PDFs
76
+ where one can specify which parameters
77
+
78
+ - Can be shared among the PDFs
79
+ - Are meant to float if this fit is done to MC, in order to fix parameters in data.
80
+ - Are scales or resolutions that need reparametrizations
65
81
  '''
66
82
  #-----------------------------------------
67
83
  def __init__(self,
@@ -69,22 +85,45 @@ class ModelFactory:
69
85
  obs : zobs,
70
86
  l_pdf : list[str],
71
87
  l_shared : list[str],
72
- l_float : list[str]):
88
+ l_float : list[str],
89
+ d_fix : dict[str:float] = None,
90
+ d_rep : dict[str:str] = None):
73
91
  '''
74
92
  preffix: used to identify PDF, will be used to name every parameter
75
93
  obs: zfit obserbable
76
94
  l_pdf: List of PDF nicknames which are registered below
77
95
  l_shared: List of parameter names that are shared
78
96
  l_float: List of parameter names to allow to float
97
+ d_fix: Dictionary with keys as the beginning of the name of a parameter and value as the number
98
+ to which it has to be fixed. If not one and only one parameter is found, ValueError is raised
99
+ d_rep: Dictionary with keys as variables that will be reparametrized
79
100
  '''
80
101
 
81
102
  self._preffix = preffix
82
103
  self._l_pdf = l_pdf
83
104
  self._l_shr = l_shared
84
105
  self._l_flt = l_float
106
+ self._d_fix = d_fix
107
+ self._d_rep = d_rep
85
108
  self._obs = obs
86
109
 
87
110
  self._d_par : dict[str,zpar] = {}
111
+
112
+ self._check_reparametrization()
113
+ #-----------------------------------------
114
+ def _check_reparametrization(self) -> None:
115
+ if self._d_rep is None:
116
+ return
117
+
118
+ s_par_1 = set(self._d_rep)
119
+ s_par_2 = set(self._l_flt)
120
+
121
+ if not s_par_1.isdisjoint(s_par_2):
122
+ raise ValueError('Non empty intersection between floating and reparametrization parameters')
123
+
124
+ s_kind = set(self._d_rep.values())
125
+ if not s_kind.issubset({'scale', 'reso'}):
126
+ raise ValueError(f'Only scales and resolution reparametrizations allowed, found: {s_kind}')
88
127
  #-----------------------------------------
89
128
  def _split_name(self, name : str) -> tuple[str,str]:
90
129
  l_part = name.split('_')
@@ -108,30 +147,86 @@ class ModelFactory:
108
147
 
109
148
  return name
110
149
  #-----------------------------------------
111
- def _get_parameter(self,
112
- name : str,
113
- suffix : str,
114
- val : float,
115
- low : float,
116
- high : float) -> zpar:
150
+ def _get_parameter(
151
+ self,
152
+ name : str,
153
+ suffix : str,
154
+ val : float,
155
+ low : float,
156
+ high : float) -> zpar:
157
+
158
+ par_name = self._get_parameter_name(name, suffix)
159
+ log.debug(f'Assigning name: {par_name}')
160
+
161
+ if par_name in self._d_par:
162
+ return self._d_par[par_name]
163
+
164
+ is_reparametrized = self._is_reparametrized(name)
165
+
166
+ if is_reparametrized:
167
+ init_name, _ = self._split_name(par_name)
168
+ par = self._get_reparametrization(par_name, init_name, val, low, high)
169
+ else:
170
+ par = zfit.param.Parameter(par_name, val, low, high)
171
+
172
+ self._d_par[par_name] = par
173
+
174
+ return par
175
+ #-----------------------------------------
176
+ def _is_reparametrized(self, name : str) -> bool:
177
+ if self._d_rep is None:
178
+ return False
117
179
 
118
- name = self._get_parameter_name(name, suffix)
119
- log.debug(f'Assigning name: {name}')
180
+ root_name, _ = self._split_name(name)
120
181
 
121
- if name in self._d_par:
122
- return self._d_par[name]
182
+ is_rep = root_name in self._d_rep
123
183
 
124
- par = zfit.param.Parameter(name, val, low, high)
184
+ log.debug(f'Reparametrizing {name}: {is_rep}')
125
185
 
126
- self._d_par[name] = par
186
+ return is_rep
187
+ #-----------------------------------------
188
+ def _get_reparametrization(self, par_name : str, init_name : str, value : float, low : float, high : float) -> zpar:
189
+ log.debug(f'Reparametrizing {par_name}')
190
+ par_const = zfit.Parameter(par_name, value, low, high)
191
+ par_const.floating = False
192
+
193
+ kind = self._d_rep[init_name]
194
+ if kind == 'reso':
195
+ par_reso = zfit.Parameter(f'{par_name}_reso_flt' , 1.0, 0.20, 5.0)
196
+ par = zfit.ComposedParameter(f'{par_name}_cmp', lambda d_par : d_par['par_const'] * d_par['reso' ], params={'par_const' : par_const, 'reso' : par_reso } )
197
+ elif kind == 'scale':
198
+ par_scale = zfit.Parameter(f'{par_name}_scale_flt', 0.0, -100, 100)
199
+ par = zfit.ComposedParameter(f'{par_name}_cmp', lambda d_par : d_par['par_const'] + d_par['scale'], params={'par_const' : par_const, 'scale' : par_scale} )
200
+ else:
201
+ raise ValueError(f'Invalid kind: {kind}')
127
202
 
128
203
  return par
129
204
  #-----------------------------------------
130
205
  @MethodRegistry.register('exp')
131
206
  def _get_exponential(self, suffix : str = '') -> zpdf:
132
- c = self._get_parameter('c_exp', suffix, -0.005, -0.20, 0.00)
207
+ c = self._get_parameter('c_exp', suffix, -0.010, -0.020, -0.0001)
133
208
  pdf = zfit.pdf.Exponential(c, self._obs, name=f'exp{suffix}')
134
209
 
210
+ return pdf
211
+ # ---------------------------------------------
212
+ @MethodRegistry.register('hypexp')
213
+ def _get_hypexp(self, suffix : str = '') -> zpdf:
214
+ mu = zfit.Parameter('mu_hypexp', 5000, 4000, 6000)
215
+ ap = zfit.Parameter('ap_hypexp', 0.020, 0, 0.10)
216
+ bt = zfit.Parameter('bt_hypexp', 0.002, 0.0001, 0.003)
217
+
218
+ pdf= HypExp(obs=self._obs, mu=mu, alpha=ap, beta=bt, name=f'hypexp{suffix}')
219
+
220
+ return pdf
221
+ # ---------------------------------------------
222
+ @MethodRegistry.register('modexp')
223
+ def _get_modexp(self, suffix : str = '') -> zpdf:
224
+ mu = zfit.Parameter('mu_modexp', 4250, 4250, 4500)
225
+ ap = zfit.Parameter('ap_modexp', 0.002, 0.002, 0.026)
226
+ bt = zfit.Parameter('bt_modexp', 0.002, 0.002, 0.020)
227
+
228
+ pdf= ModExp(obs=self._obs, mu=mu, alpha=ap, beta=bt, name=f'modexp{suffix}')
229
+
135
230
  return pdf
136
231
  #-----------------------------------------
137
232
  @MethodRegistry.register('pol1')
@@ -145,13 +240,22 @@ class ModelFactory:
145
240
  def _get_pol2(self, suffix : str = '') -> zpdf:
146
241
  a = self._get_parameter('a_pol2', suffix, -0.005, -0.95, 0.00)
147
242
  b = self._get_parameter('b_pol2', suffix, 0.000, -0.95, 0.95)
148
- pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b], name=f'pol2{suffix}')
243
+ pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b ], name=f'pol2{suffix}')
244
+
245
+ return pdf
246
+ # ---------------------------------------------
247
+ @MethodRegistry.register('pol3')
248
+ def _get_pol3(self, suffix : str = '') -> zpdf:
249
+ a = zfit.Parameter('a_pol3', -0.005, -0.95, 0.00)
250
+ b = zfit.Parameter('b_pol3', 0.000, -0.95, 0.95)
251
+ c = zfit.Parameter('c_pol3', 0.000, -0.95, 0.95)
252
+ pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b, c], name=f'pol3{suffix}')
149
253
 
150
254
  return pdf
151
255
  #-----------------------------------------
152
256
  @MethodRegistry.register('cbr')
153
257
  def _get_cbr(self, suffix : str = '') -> zpdf:
154
- mu = self._get_parameter('mu_cbr', suffix, 5300, 5100, 5350)
258
+ mu = self._get_parameter('mu_cbr', suffix, 5300, 5100, 5500)
155
259
  sg = self._get_parameter('sg_cbr', suffix, 10, 2, 300)
156
260
  ar = self._get_parameter('ac_cbr', suffix, -2, -14., -0.1)
157
261
  nr = self._get_parameter('nc_cbr', suffix, 1, 0.5, 150)
@@ -162,10 +266,10 @@ class ModelFactory:
162
266
  #-----------------------------------------
163
267
  @MethodRegistry.register('suj')
164
268
  def _get_suj(self, suffix : str = '') -> zpdf:
165
- mu = self._get_parameter('mu_suj', suffix, 5300, 4000, 6000)
269
+ mu = self._get_parameter('mu_suj', suffix, 5300, 5000, 6000)
166
270
  sg = self._get_parameter('sg_suj', suffix, 10, 2, 5000)
167
271
  gm = self._get_parameter('gm_suj', suffix, 1, -10, 10)
168
- dl = self._get_parameter('dl_suj', suffix, 1, 0.1, 10)
272
+ dl = self._get_parameter('dl_suj', suffix, 1, 0.1, 40)
169
273
 
170
274
  pdf = zfit.pdf.JohnsonSU(mu, sg, gm, dl, self._obs, name=f'suj{suffix}')
171
275
 
@@ -173,9 +277,9 @@ class ModelFactory:
173
277
  #-----------------------------------------
174
278
  @MethodRegistry.register('cbl')
175
279
  def _get_cbl(self, suffix : str = '') -> zpdf:
176
- mu = self._get_parameter('mu_cbl', suffix, 5300, 5100, 5350)
280
+ mu = self._get_parameter('mu_cbl', suffix, 5300, 5100, 5500)
177
281
  sg = self._get_parameter('sg_cbl', suffix, 10, 2, 300)
178
- al = self._get_parameter('ac_cbl', suffix, 2, 0.1, 14.)
282
+ al = self._get_parameter('ac_cbl', suffix, 2, 0.0, 14.)
179
283
  nl = self._get_parameter('nc_cbl', suffix, 1, 0.5, 150)
180
284
 
181
285
  pdf = zfit.pdf.CrystalBall(mu, sg, al, nl, self._obs, name=f'cbl{suffix}')
@@ -184,7 +288,7 @@ class ModelFactory:
184
288
  #-----------------------------------------
185
289
  @MethodRegistry.register('gauss')
186
290
  def _get_gauss(self, suffix : str = '') -> zpdf:
187
- mu = self._get_parameter('mu_gauss', suffix, 5300, 5100, 5350)
291
+ mu = self._get_parameter('mu_gauss', suffix, 5300, 5100, 5500)
188
292
  sg = self._get_parameter('sg_gauss', suffix, 10, 2, 300)
189
293
 
190
294
  pdf = zfit.pdf.Gauss(mu, sg, self._obs, name=f'gauss{suffix}')
@@ -193,7 +297,7 @@ class ModelFactory:
193
297
  #-----------------------------------------
194
298
  @MethodRegistry.register('dscb')
195
299
  def _get_dscb(self, suffix : str = '') -> zpdf:
196
- mu = self._get_parameter('mu_dscb', suffix, 4000, 4000, 5400)
300
+ mu = self._get_parameter('mu_dscb', suffix, 5300, 5000, 5400)
197
301
  sg = self._get_parameter('sg_dscb', suffix, 10, 2, 500)
198
302
  ar = self._get_parameter('ar_dscb', suffix, 1, 0, 5)
199
303
  al = self._get_parameter('al_dscb', suffix, 1, 0, 5)
@@ -204,6 +308,35 @@ class ModelFactory:
204
308
 
205
309
  return pdf
206
310
  #-----------------------------------------
311
+ @MethodRegistry.register('voigt')
312
+ def _get_voigt(self, suffix : str = '') -> zpdf:
313
+ mu = zfit.Parameter('mu_voigt', 5280, 5040, 5500)
314
+ sg = zfit.Parameter('sg_voigt', 20, 10, 400)
315
+ gm = zfit.Parameter('gm_voigt', 4, 0.1, 100)
316
+
317
+ pdf = zfit.pdf.Voigt(m=mu, sigma=sg, gamma=gm, obs=self._obs, name=f'voigt{suffix}')
318
+
319
+ return pdf
320
+ #-----------------------------------------
321
+ @MethodRegistry.register('qgauss')
322
+ def _get_qgauss(self, suffix : str = '') -> zpdf:
323
+ mu = zfit.Parameter('mu_qgauss', 5280, 5040, 5500)
324
+ sg = zfit.Parameter('sg_qgauss', 20, 10, 400)
325
+ q = zfit.Parameter( 'q_qgauss', 1, 1, 3)
326
+
327
+ pdf = zfit.pdf.QGauss(q=q, mu=mu, sigma=sg, obs=self._obs, name =f'qgauss{suffix}')
328
+
329
+ return pdf
330
+ #-----------------------------------------
331
+ @MethodRegistry.register('cauchy')
332
+ def _get_cauchy(self, suffix : str = '') -> zpdf:
333
+ mu = zfit.Parameter('mu', 5280, 5040, 5500)
334
+ gm = zfit.Parameter('gm', 150, 50, 500)
335
+
336
+ pdf = zfit.pdf.Cauchy(obs=self._obs, m=mu, gamma=gm, name=f'cauchy{suffix}')
337
+
338
+ return pdf
339
+ #-----------------------------------------
207
340
  def _get_pdf_types(self) -> list[tuple[str,str]]:
208
341
  d_name_freq = {}
209
342
 
@@ -234,12 +367,42 @@ class ModelFactory:
234
367
  log.debug('Requested only one PDF, skipping sum')
235
368
  return l_pdf[0]
236
369
 
237
- l_frc= [ zfit.param.Parameter(f'frc_{ifrc + 1}', 0.5, 0, 1) for ifrc in range(nfrc - 1) ]
370
+ l_frc= [ zfit.param.Parameter(f'frc_{self._preffix}_{ifrc + 1}', 0.5, 0, 1) for ifrc in range(nfrc - 1) ]
238
371
 
239
372
  pdf = zfit.pdf.SumPDF(l_pdf, name=self._preffix, fracs=l_frc)
240
373
 
241
374
  return pdf
242
375
  #-----------------------------------------
376
+ def _find_par(self, s_par : set[zpar], name_start : str) -> zpar:
377
+ l_par_match = [ par for par in s_par if par.name.startswith(name_start) ]
378
+
379
+ if len(l_par_match) != 1:
380
+ for par in s_par:
381
+ log.info(par.name)
382
+
383
+ raise ValueError(f'Not found one and only one parameter starting with: {name_start}')
384
+
385
+ return l_par_match[0]
386
+ #-----------------------------------------
387
+ def _fix_parameters(self, pdf : zpdf) -> zpdf:
388
+ if self._d_fix is None:
389
+ log.debug('Not fixing any parameter')
390
+ return pdf
391
+
392
+ s_par = pdf.get_params()
393
+
394
+ log.info('-' * 30)
395
+ log.info('Fixing parameters')
396
+ log.info('-' * 30)
397
+ for name_start, value in self._d_fix.items():
398
+ par = self._find_par(s_par, name_start)
399
+ par.set_value(value)
400
+
401
+ log.info(f'{name_start:<20}{value:<20.3f}')
402
+ par.floating = False
403
+
404
+ return pdf
405
+ #-----------------------------------------
243
406
  def get_pdf(self) -> zpdf:
244
407
  '''
245
408
  Given a list of strings representing PDFs returns the a zfit PDF which is
@@ -248,6 +411,7 @@ class ModelFactory:
248
411
  l_type= self._get_pdf_types()
249
412
  l_pdf = [ self._get_pdf(kind, preffix) for kind, preffix in l_type ]
250
413
  pdf = self._add_pdf(l_pdf)
414
+ pdf = self._fix_parameters(pdf)
251
415
 
252
416
  return pdf
253
417
  #-----------------------------------------
@@ -0,0 +1,68 @@
1
+ '''
2
+ Module meant to hold classes defining PDFs that can be used by ZFIT
3
+ '''
4
+
5
+ import zfit
6
+ from zfit import z
7
+
8
+ #-------------------------------------------------------------------
9
+ class HypExp(zfit.pdf.ZPDF):
10
+ _N_OBS = 1
11
+ _PARAMS = ['mu', 'alpha', 'beta']
12
+
13
+ def _unnormalized_pdf(self, x):
14
+ x = z.unstack_x(x)
15
+ mu = self.params['mu']
16
+ ap = self.params['alpha']
17
+ bt = self.params['beta']
18
+
19
+ u = (x - mu)
20
+ val = z.exp(-bt * x) / (1 + z.exp(-ap * u))
21
+
22
+ return val
23
+ #-------------------------------------------------------------------
24
+ class ModExp(zfit.pdf.ZPDF):
25
+ _N_OBS = 1
26
+ _PARAMS = ['mu', 'alpha', 'beta']
27
+
28
+ def _unnormalized_pdf(self, x):
29
+ x = z.unstack_x(x)
30
+ mu = self.params['mu']
31
+ ap = self.params['alpha']
32
+ bt = self.params['beta']
33
+
34
+ u = x - mu
35
+ val = (1 - z.exp(-ap * u)) * z.exp(-bt * u)
36
+
37
+ return val
38
+ #-------------------------------------------------------------------
39
+ class GenExp(zfit.pdf.ZPDF):
40
+ _N_OBS = 1
41
+ _PARAMS = ['mu', 'sg', 'alpha', 'beta']
42
+
43
+ def _unnormalized_pdf(self, x):
44
+ x = z.unstack_x(x)
45
+ mu = self.params['mu']
46
+ sg = self.params['sg']
47
+ ap = self.params['alpha']
48
+ bt = self.params['beta']
49
+
50
+ u = (x - mu) / sg
51
+ val = (1 - z.exp(-ap * u)) * z.exp(-bt * u)
52
+
53
+ return val
54
+ #-------------------------------------------------------------------
55
+ class FermiDirac(zfit.pdf.ZPDF):
56
+ _N_OBS = 1
57
+ _PARAMS = ['mu', 'ap']
58
+
59
+ def _unnormalized_pdf(self, x):
60
+ x = z.unstack_x(x)
61
+ mu = self.params['mu']
62
+ ap = self.params['ap']
63
+
64
+ exp = (x - mu) / ap
65
+ den = 1 + z.exp(exp)
66
+
67
+ return 1. / den
68
+ #-------------------------------------------------------------------
dmu/stats/zfit_plotter.py CHANGED
@@ -1,7 +1,7 @@
1
1
  '''
2
2
  Module containing plot class, used to plot fits
3
3
  '''
4
- # pylint: disable=too-many-instance-attributes
4
+ # pylint: disable=too-many-instance-attributes, too-many-arguments
5
5
 
6
6
  import warnings
7
7
  import pprint
@@ -51,6 +51,8 @@ class ZFitPlotter:
51
51
  self._figsize = None
52
52
  self._leg_loc = None
53
53
 
54
+ self.dat_xerr : bool
55
+
54
56
  # zfit.settings.advanced_warnings['extend_wrapped_extended'] = False
55
57
  warnings.filterwarnings("ignore")
56
58
  #----------------------------------------
@@ -60,17 +62,17 @@ class ZFitPlotter:
60
62
  self._l_def_col = list(mcolors.TABLEAU_COLORS.keys())
61
63
  #----------------------------------------
62
64
  def _data_to_zdata(self, obs, data, weights):
65
+ if isinstance(data, zfit.data.Data):
66
+ return data
67
+
63
68
  if isinstance(data, np.ndarray):
64
69
  data = zfit.Data.from_numpy (obs=obs, array=data , weights=weights)
65
70
  elif isinstance(data, pd.Series):
66
71
  data = zfit.Data.from_pandas(obs=obs, df=pd.DataFrame(data), weights=weights)
67
72
  elif isinstance(data, pd.DataFrame):
68
73
  data = zfit.Data.from_pandas(obs=obs, df=data , weights=weights)
69
- elif isinstance(data, zfit.data.Data):
70
- data = data
71
74
  else:
72
- log.error(f'Passed data is of usupported type {type(data)}')
73
- raise
75
+ raise ValueError(f'Passed data is of usupported type {type(data)}')
74
76
 
75
77
  return data
76
78
  #----------------------------------------
@@ -200,7 +202,7 @@ class ZFitPlotter:
200
202
  #----------------------------------------
201
203
  def _get_zfit_gof(self):
202
204
  if not hasattr(self._result, 'gof'):
203
- return
205
+ return None
204
206
 
205
207
  chi2, ndof, pval = self._result.gof
206
208
 
@@ -211,14 +213,16 @@ class ZFitPlotter:
211
213
  def _get_text(self, ext_text):
212
214
  gof_text = self._get_zfit_gof()
213
215
 
214
- if ext_text is None and gof_text is None:
215
- return
216
- elif ext_text is not None and gof_text is None:
216
+ if ext_text is None and gof_text is None:
217
+ return None
218
+
219
+ if ext_text is not None and gof_text is None:
217
220
  return ext_text
218
- elif ext_text is None and gof_text is not None:
221
+
222
+ if ext_text is None and gof_text is not None:
219
223
  return gof_text
220
- else:
221
- return f'{ext_text}\n{gof_text}'
224
+
225
+ return f'{ext_text}\n{gof_text}'
222
226
  #----------------------------------------
223
227
  def _get_pars(self):
224
228
  '''
@@ -238,7 +242,7 @@ class ZFitPlotter:
238
242
  name= par if isinstance(par, str) else par.name
239
243
  try:
240
244
  err = d_val['hesse']['error']
241
- except:
245
+ except KeyError:
242
246
  log.warning(f'Cannot extract {name} Hesse errors, using zeros')
243
247
  pprint.pprint(d_val)
244
248
  err = 0
@@ -260,7 +264,7 @@ class ZFitPlotter:
260
264
  '''
261
265
  d_par = self._get_pars()
262
266
 
263
- line = f''
267
+ line = ''
264
268
  for name, [val, err] in d_par.items():
265
269
  if add_pars != 'all' and name not in add_pars:
266
270
  continue
@@ -328,7 +332,7 @@ class ZFitPlotter:
328
332
  nevt = self._get_component_yield(model, par)
329
333
 
330
334
  if model.name in self._l_plot_components and hasattr(model, 'pdfs'):
331
- l_model = [ (frc, pdf) for pdf, frc in zip(model.pdfs, model.params.values()) ]
335
+ l_model = [ (frc, pdf) for pdf, frc in zip(model.pdfs, model.params.values()) ]
332
336
  elif model.name in self._l_plot_components and not hasattr(model, 'pdfs'):
333
337
  log.warning(f'Cannot plot {model.name} as separate components, despite it was requested')
334
338
  l_model = [ (1, model)]
@@ -347,17 +351,17 @@ class ZFitPlotter:
347
351
  ax.plot(self.x, y, '-', label=self._leg.get(name, name), color=self._col.get(name))
348
352
 
349
353
  if (blind_name is not None) and (was_blinded is False):
350
- log.error(f'Blinding was requested, but PDF {blind_name} was not found among:')
351
354
  for model in self.total_model.pdfs:
352
355
  log.info(model.name)
353
- raise
356
+
357
+ raise ValueError(f'Blinding was requested, but PDF {blind_name} was not found among:')
354
358
  #----------------------------------------
355
359
  def _get_col(self, name):
356
360
  if name in self._col:
357
361
  return self._col[name]
358
362
 
359
363
  col = self._l_def_col[0]
360
- del(self._l_def_col[0])
364
+ del self._l_def_col[0]
361
365
 
362
366
  return col
363
367
  #----------------------------------------
@@ -400,9 +404,8 @@ class ZFitPlotter:
400
404
  if plot_range is not None:
401
405
  try:
402
406
  self.lower, self.upper = plot_range
403
- except TypeError:
404
- log.error(f'plot_range argument is expected to be a tuple with two numeric values')
405
- raise TypeError
407
+ except TypeError as exc:
408
+ raise TypeError('plot_range argument is expected to be a tuple with two numeric values') from exc
406
409
 
407
410
  return np.linspace(self.lower, self.upper, 2000)
408
411
  #----------------------------------------
@@ -439,6 +442,7 @@ class ZFitPlotter:
439
442
  add_pars = None,
440
443
  ymax = None,
441
444
  skip_pulls = False,
445
+ yscale : str = None,
442
446
  axs = None,
443
447
  figsize:tuple = (13, 7),
444
448
  leg_loc:str = 'best',
@@ -464,6 +468,7 @@ class ZFitPlotter:
464
468
  figsize (tuple) : Tuple with figure size, default (13, 7)
465
469
  leg_loc (str) : Location of legend, default 'best'
466
470
  xerr (bool or float) : Used to pass xerr to mplhep histplot. True will use error with bin size, False, no error, otherwise it's the size of the xerror bar
471
+ yscale (str) : Scale for y axis of main plot, either log or linear
467
472
  '''
468
473
  # pylint: disable=too-many-locals, too-many-positional-arguments, too-many-arguments
469
474
  d_leg = {} if d_leg is None else d_leg
@@ -512,6 +517,9 @@ class ZFitPlotter:
512
517
  self.axs[0].set(xlabel=xlabel, ylabel=ylabel)
513
518
  self.axs[0].set_xlim([self.lower, self.upper])
514
519
 
520
+ if yscale is not None:
521
+ self.axs[0].set_yscale(yscale)
522
+
515
523
  if title is not None:
516
524
  self.axs[0].set_title(title)
517
525
 
dmu/testing/utilities.py CHANGED
@@ -3,16 +3,20 @@ Module containing utility functions needed by unit tests
3
3
  '''
4
4
  import os
5
5
  import math
6
+ import glob
6
7
  from typing import Union
7
8
  from dataclasses import dataclass
8
9
  from importlib.resources import files
9
10
 
10
11
  from ROOT import RDF, TFile, RDataFrame
11
12
 
13
+ import joblib
12
14
  import pandas as pnd
13
15
  import numpy
14
16
  import yaml
15
17
 
18
+ from dmu.ml.train_mva import TrainMva
19
+ from dmu.ml.cv_classifier import CVClassifier
16
20
  from dmu.logging.log_store import LogStore
17
21
 
18
22
  log = LogStore.add_logger('dmu:testing:utilities')
@@ -22,6 +26,7 @@ class Data:
22
26
  '''
23
27
  Class storing shared data
24
28
  '''
29
+ out_dir = '/tmp/tests/dmu/ml/cv_predict'
25
30
  # -------------------------------
26
31
  def _double_data(df_1 : pnd.DataFrame) -> pnd.DataFrame:
27
32
  df_2 = df_1.copy()
@@ -39,7 +44,7 @@ def _add_nans(df : pnd.DataFrame, columns : list[str]) -> pnd.DataFrame:
39
44
  else:
40
45
  l_col_index = [ l_col.index(column) for column in columns ]
41
46
 
42
- log.debug('Replacing randomly with {size} NaNs')
47
+ log.debug(f'Replacing randomly with {size} NaNs')
43
48
  for _ in range(size):
44
49
  irow = numpy.random.randint(0, df.shape[0]) # Random row index
45
50
  icol = numpy.random.choice(l_col_index) # Random column index
@@ -51,7 +56,7 @@ def _add_nans(df : pnd.DataFrame, columns : list[str]) -> pnd.DataFrame:
51
56
  def get_rdf(kind : Union[str,None] = None,
52
57
  repeated : bool = False,
53
58
  nentries : int = 3_000,
54
- add_nans : list[str] = None):
59
+ columns_with_nans : list[str] = None):
55
60
  '''
56
61
  Return ROOT dataframe with toy data
57
62
  '''
@@ -76,8 +81,8 @@ def get_rdf(kind : Union[str,None] = None,
76
81
  if repeated:
77
82
  df = _double_data(df)
78
83
 
79
- if add_nans:
80
- df = _add_nans(df, columns=add_nans)
84
+ if columns_with_nans is not None:
85
+ df = _add_nans(df, columns=columns_with_nans)
81
86
 
82
87
  rdf = RDF.FromPandas(df)
83
88
 
@@ -126,3 +131,25 @@ def get_file_with_trees(path : str) -> TFile:
126
131
  snap.fMode = 'update'
127
132
 
128
133
  return TFile(path)
134
+ # -------------------------------
135
+ def get_models(rdf_sig : RDataFrame, rdf_bkg : RDataFrame) -> list[CVClassifier]:
136
+ '''
137
+ Will train and return models
138
+ '''
139
+
140
+ cfg = get_config('ml/tests/train_mva.yaml')
141
+ pkl_path = f'{Data.out_dir}/model.pkl'
142
+ plt_dir = f'{Data.out_dir}/cv_predict'
143
+ cfg['saving']['path'] = pkl_path
144
+ cfg['plotting']['val_dir'] = plt_dir
145
+ cfg['plotting']['features']['saving']['plt_dir'] = plt_dir
146
+
147
+ obj= TrainMva(sig=rdf_sig, bkg=rdf_bkg, cfg=cfg)
148
+ obj.run()
149
+
150
+ pkl_wc = pkl_path.replace('.pkl', '_*.pkl')
151
+ l_pkl_path = glob.glob(pkl_wc)
152
+ l_model = [ joblib.load(pkl_path) for pkl_path in l_pkl_path ]
153
+
154
+ return l_model
155
+ # -------------------------------
@@ -0,0 +1,13 @@
1
+ output : /tmp/tests/dmu/ml/cv_diagnostics/from_rdf
2
+ # Will assume that the target is already in the input dataframe
3
+ # and will use it, instead of evaluating models
4
+ score_from_rdf : w
5
+ correlations:
6
+ # Variables with respect to which the correlations with the features will be measured
7
+ target :
8
+ name : z
9
+ methods:
10
+ - Pearson
11
+ figure:
12
+ title: Scores from file
13
+ size : [10, 8]
@@ -0,0 +1,10 @@
1
+ output : /tmp/tests/dmu/ml/cv_diagnostics/from_model
2
+ correlations:
3
+ # Variables with respect to which the correlations with the features will be measured
4
+ target :
5
+ name : z
6
+ methods:
7
+ - Pearson
8
+ figure:
9
+ size : [10, 8]
10
+ rotate: 90