data-manipulation-utilities 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/METADATA +177 -8
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/RECORD +30 -18
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/WHEEL +1 -1
- dmu/generic/hashing.py +44 -0
- dmu/generic/utilities.py +14 -1
- dmu/generic/version_management.py +3 -5
- dmu/ml/cv_diagnostics.py +221 -0
- dmu/ml/train_mva.py +124 -31
- dmu/pdataframe/utilities.py +36 -3
- dmu/plotting/fwhm.py +64 -0
- dmu/plotting/plotter.py +2 -0
- dmu/plotting/plotter_1d.py +87 -6
- dmu/stats/fitter.py +1 -1
- dmu/stats/model_factory.py +189 -25
- dmu/stats/zfit_models.py +68 -0
- dmu/stats/zfit_plotter.py +29 -21
- dmu/testing/utilities.py +31 -4
- dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
- dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
- dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
- dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
- dmu_data/ml/tests/train_mva.yaml +15 -9
- dmu_data/ml/tests/train_mva_with_diagnostics.yaml +82 -0
- dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
- dmu_data/plotting/tests/plug_stats.yaml +19 -0
- dmu_data/plotting/tests/simple.yaml +4 -3
- dmu_data/plotting/tests/styling.yaml +11 -0
- {data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.7.data}/scripts/publish +0 -0
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt +0 -0
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt +0 -0
dmu/stats/model_factory.py
CHANGED
@@ -6,9 +6,12 @@ Module storing ZModel class
|
|
6
6
|
from typing import Callable, Union
|
7
7
|
|
8
8
|
import zfit
|
9
|
+
|
9
10
|
from zfit.core.interfaces import ZfitSpace as zobs
|
10
11
|
from zfit.core.basepdf import BasePDF as zpdf
|
11
12
|
from zfit.core.parameter import Parameter as zpar
|
13
|
+
from dmu.stats.zfit_models import HypExp
|
14
|
+
from dmu.stats.zfit_models import ModExp
|
12
15
|
from dmu.logging.log_store import LogStore
|
13
16
|
|
14
17
|
log=LogStore.add_logger('dmu:stats:model_factory')
|
@@ -47,6 +50,13 @@ class MethodRegistry:
|
|
47
50
|
log.info(f' {value}')
|
48
51
|
|
49
52
|
return method
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def get_pdf_names(cls) -> list[str]:
|
56
|
+
'''
|
57
|
+
Returns list of PDFs that are registered/supported
|
58
|
+
'''
|
59
|
+
return list(cls._d_method)
|
50
60
|
#-----------------------------------------
|
51
61
|
class ModelFactory:
|
52
62
|
'''
|
@@ -57,11 +67,17 @@ class ModelFactory:
|
|
57
67
|
|
58
68
|
l_pdf = ['dscb', 'gauss']
|
59
69
|
l_shr = ['mu']
|
60
|
-
|
70
|
+
l_flt = ['mu', 'sg']
|
71
|
+
d_rep = {'mu' : 'scale', 'sg' : 'reso'}
|
72
|
+
mod = ModelFactory(preffix = 'signal', obs = obs, l_pdf = l_pdf, l_shared = l_shr, d_rep = d_rep)
|
61
73
|
pdf = mod.get_pdf()
|
62
74
|
```
|
63
75
|
|
64
|
-
where one can specify which parameters
|
76
|
+
where one can specify which parameters
|
77
|
+
|
78
|
+
- Can be shared among the PDFs
|
79
|
+
- Are meant to float if this fit is done to MC, in order to fix parameters in data.
|
80
|
+
- Are scales or resolutions that need reparametrizations
|
65
81
|
'''
|
66
82
|
#-----------------------------------------
|
67
83
|
def __init__(self,
|
@@ -69,22 +85,45 @@ class ModelFactory:
|
|
69
85
|
obs : zobs,
|
70
86
|
l_pdf : list[str],
|
71
87
|
l_shared : list[str],
|
72
|
-
l_float : list[str]
|
88
|
+
l_float : list[str],
|
89
|
+
d_fix : dict[str:float] = None,
|
90
|
+
d_rep : dict[str:str] = None):
|
73
91
|
'''
|
74
92
|
preffix: used to identify PDF, will be used to name every parameter
|
75
93
|
obs: zfit obserbable
|
76
94
|
l_pdf: List of PDF nicknames which are registered below
|
77
95
|
l_shared: List of parameter names that are shared
|
78
96
|
l_float: List of parameter names to allow to float
|
97
|
+
d_fix: Dictionary with keys as the beginning of the name of a parameter and value as the number
|
98
|
+
to which it has to be fixed. If not one and only one parameter is found, ValueError is raised
|
99
|
+
d_rep: Dictionary with keys as variables that will be reparametrized
|
79
100
|
'''
|
80
101
|
|
81
102
|
self._preffix = preffix
|
82
103
|
self._l_pdf = l_pdf
|
83
104
|
self._l_shr = l_shared
|
84
105
|
self._l_flt = l_float
|
106
|
+
self._d_fix = d_fix
|
107
|
+
self._d_rep = d_rep
|
85
108
|
self._obs = obs
|
86
109
|
|
87
110
|
self._d_par : dict[str,zpar] = {}
|
111
|
+
|
112
|
+
self._check_reparametrization()
|
113
|
+
#-----------------------------------------
|
114
|
+
def _check_reparametrization(self) -> None:
|
115
|
+
if self._d_rep is None:
|
116
|
+
return
|
117
|
+
|
118
|
+
s_par_1 = set(self._d_rep)
|
119
|
+
s_par_2 = set(self._l_flt)
|
120
|
+
|
121
|
+
if not s_par_1.isdisjoint(s_par_2):
|
122
|
+
raise ValueError('Non empty intersection between floating and reparametrization parameters')
|
123
|
+
|
124
|
+
s_kind = set(self._d_rep.values())
|
125
|
+
if not s_kind.issubset({'scale', 'reso'}):
|
126
|
+
raise ValueError(f'Only scales and resolution reparametrizations allowed, found: {s_kind}')
|
88
127
|
#-----------------------------------------
|
89
128
|
def _split_name(self, name : str) -> tuple[str,str]:
|
90
129
|
l_part = name.split('_')
|
@@ -108,30 +147,86 @@ class ModelFactory:
|
|
108
147
|
|
109
148
|
return name
|
110
149
|
#-----------------------------------------
|
111
|
-
def _get_parameter(
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
150
|
+
def _get_parameter(
|
151
|
+
self,
|
152
|
+
name : str,
|
153
|
+
suffix : str,
|
154
|
+
val : float,
|
155
|
+
low : float,
|
156
|
+
high : float) -> zpar:
|
157
|
+
|
158
|
+
par_name = self._get_parameter_name(name, suffix)
|
159
|
+
log.debug(f'Assigning name: {par_name}')
|
160
|
+
|
161
|
+
if par_name in self._d_par:
|
162
|
+
return self._d_par[par_name]
|
163
|
+
|
164
|
+
is_reparametrized = self._is_reparametrized(name)
|
165
|
+
|
166
|
+
if is_reparametrized:
|
167
|
+
init_name, _ = self._split_name(par_name)
|
168
|
+
par = self._get_reparametrization(par_name, init_name, val, low, high)
|
169
|
+
else:
|
170
|
+
par = zfit.param.Parameter(par_name, val, low, high)
|
171
|
+
|
172
|
+
self._d_par[par_name] = par
|
173
|
+
|
174
|
+
return par
|
175
|
+
#-----------------------------------------
|
176
|
+
def _is_reparametrized(self, name : str) -> bool:
|
177
|
+
if self._d_rep is None:
|
178
|
+
return False
|
117
179
|
|
118
|
-
|
119
|
-
log.debug(f'Assigning name: {name}')
|
180
|
+
root_name, _ = self._split_name(name)
|
120
181
|
|
121
|
-
|
122
|
-
return self._d_par[name]
|
182
|
+
is_rep = root_name in self._d_rep
|
123
183
|
|
124
|
-
|
184
|
+
log.debug(f'Reparametrizing {name}: {is_rep}')
|
125
185
|
|
126
|
-
|
186
|
+
return is_rep
|
187
|
+
#-----------------------------------------
|
188
|
+
def _get_reparametrization(self, par_name : str, init_name : str, value : float, low : float, high : float) -> zpar:
|
189
|
+
log.debug(f'Reparametrizing {par_name}')
|
190
|
+
par_const = zfit.Parameter(par_name, value, low, high)
|
191
|
+
par_const.floating = False
|
192
|
+
|
193
|
+
kind = self._d_rep[init_name]
|
194
|
+
if kind == 'reso':
|
195
|
+
par_reso = zfit.Parameter(f'{par_name}_reso_flt' , 1.0, 0.20, 5.0)
|
196
|
+
par = zfit.ComposedParameter(f'{par_name}_cmp', lambda d_par : d_par['par_const'] * d_par['reso' ], params={'par_const' : par_const, 'reso' : par_reso } )
|
197
|
+
elif kind == 'scale':
|
198
|
+
par_scale = zfit.Parameter(f'{par_name}_scale_flt', 0.0, -100, 100)
|
199
|
+
par = zfit.ComposedParameter(f'{par_name}_cmp', lambda d_par : d_par['par_const'] + d_par['scale'], params={'par_const' : par_const, 'scale' : par_scale} )
|
200
|
+
else:
|
201
|
+
raise ValueError(f'Invalid kind: {kind}')
|
127
202
|
|
128
203
|
return par
|
129
204
|
#-----------------------------------------
|
130
205
|
@MethodRegistry.register('exp')
|
131
206
|
def _get_exponential(self, suffix : str = '') -> zpdf:
|
132
|
-
c = self._get_parameter('c_exp', suffix, -0.
|
207
|
+
c = self._get_parameter('c_exp', suffix, -0.010, -0.020, -0.0001)
|
133
208
|
pdf = zfit.pdf.Exponential(c, self._obs, name=f'exp{suffix}')
|
134
209
|
|
210
|
+
return pdf
|
211
|
+
# ---------------------------------------------
|
212
|
+
@MethodRegistry.register('hypexp')
|
213
|
+
def _get_hypexp(self, suffix : str = '') -> zpdf:
|
214
|
+
mu = zfit.Parameter('mu_hypexp', 5000, 4000, 6000)
|
215
|
+
ap = zfit.Parameter('ap_hypexp', 0.020, 0, 0.10)
|
216
|
+
bt = zfit.Parameter('bt_hypexp', 0.002, 0.0001, 0.003)
|
217
|
+
|
218
|
+
pdf= HypExp(obs=self._obs, mu=mu, alpha=ap, beta=bt, name=f'hypexp{suffix}')
|
219
|
+
|
220
|
+
return pdf
|
221
|
+
# ---------------------------------------------
|
222
|
+
@MethodRegistry.register('modexp')
|
223
|
+
def _get_modexp(self, suffix : str = '') -> zpdf:
|
224
|
+
mu = zfit.Parameter('mu_modexp', 4250, 4250, 4500)
|
225
|
+
ap = zfit.Parameter('ap_modexp', 0.002, 0.002, 0.026)
|
226
|
+
bt = zfit.Parameter('bt_modexp', 0.002, 0.002, 0.020)
|
227
|
+
|
228
|
+
pdf= ModExp(obs=self._obs, mu=mu, alpha=ap, beta=bt, name=f'modexp{suffix}')
|
229
|
+
|
135
230
|
return pdf
|
136
231
|
#-----------------------------------------
|
137
232
|
@MethodRegistry.register('pol1')
|
@@ -145,13 +240,22 @@ class ModelFactory:
|
|
145
240
|
def _get_pol2(self, suffix : str = '') -> zpdf:
|
146
241
|
a = self._get_parameter('a_pol2', suffix, -0.005, -0.95, 0.00)
|
147
242
|
b = self._get_parameter('b_pol2', suffix, 0.000, -0.95, 0.95)
|
148
|
-
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b], name=f'pol2{suffix}')
|
243
|
+
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b ], name=f'pol2{suffix}')
|
244
|
+
|
245
|
+
return pdf
|
246
|
+
# ---------------------------------------------
|
247
|
+
@MethodRegistry.register('pol3')
|
248
|
+
def _get_pol3(self, suffix : str = '') -> zpdf:
|
249
|
+
a = zfit.Parameter('a_pol3', -0.005, -0.95, 0.00)
|
250
|
+
b = zfit.Parameter('b_pol3', 0.000, -0.95, 0.95)
|
251
|
+
c = zfit.Parameter('c_pol3', 0.000, -0.95, 0.95)
|
252
|
+
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b, c], name=f'pol3{suffix}')
|
149
253
|
|
150
254
|
return pdf
|
151
255
|
#-----------------------------------------
|
152
256
|
@MethodRegistry.register('cbr')
|
153
257
|
def _get_cbr(self, suffix : str = '') -> zpdf:
|
154
|
-
mu = self._get_parameter('mu_cbr', suffix, 5300, 5100,
|
258
|
+
mu = self._get_parameter('mu_cbr', suffix, 5300, 5100, 5500)
|
155
259
|
sg = self._get_parameter('sg_cbr', suffix, 10, 2, 300)
|
156
260
|
ar = self._get_parameter('ac_cbr', suffix, -2, -14., -0.1)
|
157
261
|
nr = self._get_parameter('nc_cbr', suffix, 1, 0.5, 150)
|
@@ -162,10 +266,10 @@ class ModelFactory:
|
|
162
266
|
#-----------------------------------------
|
163
267
|
@MethodRegistry.register('suj')
|
164
268
|
def _get_suj(self, suffix : str = '') -> zpdf:
|
165
|
-
mu = self._get_parameter('mu_suj', suffix, 5300,
|
269
|
+
mu = self._get_parameter('mu_suj', suffix, 5300, 5000, 6000)
|
166
270
|
sg = self._get_parameter('sg_suj', suffix, 10, 2, 5000)
|
167
271
|
gm = self._get_parameter('gm_suj', suffix, 1, -10, 10)
|
168
|
-
dl = self._get_parameter('dl_suj', suffix, 1, 0.1,
|
272
|
+
dl = self._get_parameter('dl_suj', suffix, 1, 0.1, 40)
|
169
273
|
|
170
274
|
pdf = zfit.pdf.JohnsonSU(mu, sg, gm, dl, self._obs, name=f'suj{suffix}')
|
171
275
|
|
@@ -173,9 +277,9 @@ class ModelFactory:
|
|
173
277
|
#-----------------------------------------
|
174
278
|
@MethodRegistry.register('cbl')
|
175
279
|
def _get_cbl(self, suffix : str = '') -> zpdf:
|
176
|
-
mu = self._get_parameter('mu_cbl', suffix, 5300, 5100,
|
280
|
+
mu = self._get_parameter('mu_cbl', suffix, 5300, 5100, 5500)
|
177
281
|
sg = self._get_parameter('sg_cbl', suffix, 10, 2, 300)
|
178
|
-
al = self._get_parameter('ac_cbl', suffix, 2, 0.
|
282
|
+
al = self._get_parameter('ac_cbl', suffix, 2, 0.0, 14.)
|
179
283
|
nl = self._get_parameter('nc_cbl', suffix, 1, 0.5, 150)
|
180
284
|
|
181
285
|
pdf = zfit.pdf.CrystalBall(mu, sg, al, nl, self._obs, name=f'cbl{suffix}')
|
@@ -184,7 +288,7 @@ class ModelFactory:
|
|
184
288
|
#-----------------------------------------
|
185
289
|
@MethodRegistry.register('gauss')
|
186
290
|
def _get_gauss(self, suffix : str = '') -> zpdf:
|
187
|
-
mu = self._get_parameter('mu_gauss', suffix, 5300, 5100,
|
291
|
+
mu = self._get_parameter('mu_gauss', suffix, 5300, 5100, 5500)
|
188
292
|
sg = self._get_parameter('sg_gauss', suffix, 10, 2, 300)
|
189
293
|
|
190
294
|
pdf = zfit.pdf.Gauss(mu, sg, self._obs, name=f'gauss{suffix}')
|
@@ -193,7 +297,7 @@ class ModelFactory:
|
|
193
297
|
#-----------------------------------------
|
194
298
|
@MethodRegistry.register('dscb')
|
195
299
|
def _get_dscb(self, suffix : str = '') -> zpdf:
|
196
|
-
mu = self._get_parameter('mu_dscb', suffix,
|
300
|
+
mu = self._get_parameter('mu_dscb', suffix, 5300, 5000, 5400)
|
197
301
|
sg = self._get_parameter('sg_dscb', suffix, 10, 2, 500)
|
198
302
|
ar = self._get_parameter('ar_dscb', suffix, 1, 0, 5)
|
199
303
|
al = self._get_parameter('al_dscb', suffix, 1, 0, 5)
|
@@ -204,6 +308,35 @@ class ModelFactory:
|
|
204
308
|
|
205
309
|
return pdf
|
206
310
|
#-----------------------------------------
|
311
|
+
@MethodRegistry.register('voigt')
|
312
|
+
def _get_voigt(self, suffix : str = '') -> zpdf:
|
313
|
+
mu = zfit.Parameter('mu_voigt', 5280, 5040, 5500)
|
314
|
+
sg = zfit.Parameter('sg_voigt', 20, 10, 400)
|
315
|
+
gm = zfit.Parameter('gm_voigt', 4, 0.1, 100)
|
316
|
+
|
317
|
+
pdf = zfit.pdf.Voigt(m=mu, sigma=sg, gamma=gm, obs=self._obs, name=f'voigt{suffix}')
|
318
|
+
|
319
|
+
return pdf
|
320
|
+
#-----------------------------------------
|
321
|
+
@MethodRegistry.register('qgauss')
|
322
|
+
def _get_qgauss(self, suffix : str = '') -> zpdf:
|
323
|
+
mu = zfit.Parameter('mu_qgauss', 5280, 5040, 5500)
|
324
|
+
sg = zfit.Parameter('sg_qgauss', 20, 10, 400)
|
325
|
+
q = zfit.Parameter( 'q_qgauss', 1, 1, 3)
|
326
|
+
|
327
|
+
pdf = zfit.pdf.QGauss(q=q, mu=mu, sigma=sg, obs=self._obs, name =f'qgauss{suffix}')
|
328
|
+
|
329
|
+
return pdf
|
330
|
+
#-----------------------------------------
|
331
|
+
@MethodRegistry.register('cauchy')
|
332
|
+
def _get_cauchy(self, suffix : str = '') -> zpdf:
|
333
|
+
mu = zfit.Parameter('mu', 5280, 5040, 5500)
|
334
|
+
gm = zfit.Parameter('gm', 150, 50, 500)
|
335
|
+
|
336
|
+
pdf = zfit.pdf.Cauchy(obs=self._obs, m=mu, gamma=gm, name=f'cauchy{suffix}')
|
337
|
+
|
338
|
+
return pdf
|
339
|
+
#-----------------------------------------
|
207
340
|
def _get_pdf_types(self) -> list[tuple[str,str]]:
|
208
341
|
d_name_freq = {}
|
209
342
|
|
@@ -234,12 +367,42 @@ class ModelFactory:
|
|
234
367
|
log.debug('Requested only one PDF, skipping sum')
|
235
368
|
return l_pdf[0]
|
236
369
|
|
237
|
-
l_frc= [ zfit.param.Parameter(f'frc_{ifrc + 1}', 0.5, 0, 1) for ifrc in range(nfrc - 1) ]
|
370
|
+
l_frc= [ zfit.param.Parameter(f'frc_{self._preffix}_{ifrc + 1}', 0.5, 0, 1) for ifrc in range(nfrc - 1) ]
|
238
371
|
|
239
372
|
pdf = zfit.pdf.SumPDF(l_pdf, name=self._preffix, fracs=l_frc)
|
240
373
|
|
241
374
|
return pdf
|
242
375
|
#-----------------------------------------
|
376
|
+
def _find_par(self, s_par : set[zpar], name_start : str) -> zpar:
|
377
|
+
l_par_match = [ par for par in s_par if par.name.startswith(name_start) ]
|
378
|
+
|
379
|
+
if len(l_par_match) != 1:
|
380
|
+
for par in s_par:
|
381
|
+
log.info(par.name)
|
382
|
+
|
383
|
+
raise ValueError(f'Not found one and only one parameter starting with: {name_start}')
|
384
|
+
|
385
|
+
return l_par_match[0]
|
386
|
+
#-----------------------------------------
|
387
|
+
def _fix_parameters(self, pdf : zpdf) -> zpdf:
|
388
|
+
if self._d_fix is None:
|
389
|
+
log.debug('Not fixing any parameter')
|
390
|
+
return pdf
|
391
|
+
|
392
|
+
s_par = pdf.get_params()
|
393
|
+
|
394
|
+
log.info('-' * 30)
|
395
|
+
log.info('Fixing parameters')
|
396
|
+
log.info('-' * 30)
|
397
|
+
for name_start, value in self._d_fix.items():
|
398
|
+
par = self._find_par(s_par, name_start)
|
399
|
+
par.set_value(value)
|
400
|
+
|
401
|
+
log.info(f'{name_start:<20}{value:<20.3f}')
|
402
|
+
par.floating = False
|
403
|
+
|
404
|
+
return pdf
|
405
|
+
#-----------------------------------------
|
243
406
|
def get_pdf(self) -> zpdf:
|
244
407
|
'''
|
245
408
|
Given a list of strings representing PDFs returns the a zfit PDF which is
|
@@ -248,6 +411,7 @@ class ModelFactory:
|
|
248
411
|
l_type= self._get_pdf_types()
|
249
412
|
l_pdf = [ self._get_pdf(kind, preffix) for kind, preffix in l_type ]
|
250
413
|
pdf = self._add_pdf(l_pdf)
|
414
|
+
pdf = self._fix_parameters(pdf)
|
251
415
|
|
252
416
|
return pdf
|
253
417
|
#-----------------------------------------
|
dmu/stats/zfit_models.py
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
'''
|
2
|
+
Module meant to hold classes defining PDFs that can be used by ZFIT
|
3
|
+
'''
|
4
|
+
|
5
|
+
import zfit
|
6
|
+
from zfit import z
|
7
|
+
|
8
|
+
#-------------------------------------------------------------------
|
9
|
+
class HypExp(zfit.pdf.ZPDF):
|
10
|
+
_N_OBS = 1
|
11
|
+
_PARAMS = ['mu', 'alpha', 'beta']
|
12
|
+
|
13
|
+
def _unnormalized_pdf(self, x):
|
14
|
+
x = z.unstack_x(x)
|
15
|
+
mu = self.params['mu']
|
16
|
+
ap = self.params['alpha']
|
17
|
+
bt = self.params['beta']
|
18
|
+
|
19
|
+
u = (x - mu)
|
20
|
+
val = z.exp(-bt * x) / (1 + z.exp(-ap * u))
|
21
|
+
|
22
|
+
return val
|
23
|
+
#-------------------------------------------------------------------
|
24
|
+
class ModExp(zfit.pdf.ZPDF):
|
25
|
+
_N_OBS = 1
|
26
|
+
_PARAMS = ['mu', 'alpha', 'beta']
|
27
|
+
|
28
|
+
def _unnormalized_pdf(self, x):
|
29
|
+
x = z.unstack_x(x)
|
30
|
+
mu = self.params['mu']
|
31
|
+
ap = self.params['alpha']
|
32
|
+
bt = self.params['beta']
|
33
|
+
|
34
|
+
u = x - mu
|
35
|
+
val = (1 - z.exp(-ap * u)) * z.exp(-bt * u)
|
36
|
+
|
37
|
+
return val
|
38
|
+
#-------------------------------------------------------------------
|
39
|
+
class GenExp(zfit.pdf.ZPDF):
|
40
|
+
_N_OBS = 1
|
41
|
+
_PARAMS = ['mu', 'sg', 'alpha', 'beta']
|
42
|
+
|
43
|
+
def _unnormalized_pdf(self, x):
|
44
|
+
x = z.unstack_x(x)
|
45
|
+
mu = self.params['mu']
|
46
|
+
sg = self.params['sg']
|
47
|
+
ap = self.params['alpha']
|
48
|
+
bt = self.params['beta']
|
49
|
+
|
50
|
+
u = (x - mu) / sg
|
51
|
+
val = (1 - z.exp(-ap * u)) * z.exp(-bt * u)
|
52
|
+
|
53
|
+
return val
|
54
|
+
#-------------------------------------------------------------------
|
55
|
+
class FermiDirac(zfit.pdf.ZPDF):
|
56
|
+
_N_OBS = 1
|
57
|
+
_PARAMS = ['mu', 'ap']
|
58
|
+
|
59
|
+
def _unnormalized_pdf(self, x):
|
60
|
+
x = z.unstack_x(x)
|
61
|
+
mu = self.params['mu']
|
62
|
+
ap = self.params['ap']
|
63
|
+
|
64
|
+
exp = (x - mu) / ap
|
65
|
+
den = 1 + z.exp(exp)
|
66
|
+
|
67
|
+
return 1. / den
|
68
|
+
#-------------------------------------------------------------------
|
dmu/stats/zfit_plotter.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
'''
|
2
2
|
Module containing plot class, used to plot fits
|
3
3
|
'''
|
4
|
-
# pylint: disable=too-many-instance-attributes
|
4
|
+
# pylint: disable=too-many-instance-attributes, too-many-arguments
|
5
5
|
|
6
6
|
import warnings
|
7
7
|
import pprint
|
@@ -51,6 +51,8 @@ class ZFitPlotter:
|
|
51
51
|
self._figsize = None
|
52
52
|
self._leg_loc = None
|
53
53
|
|
54
|
+
self.dat_xerr : bool
|
55
|
+
|
54
56
|
# zfit.settings.advanced_warnings['extend_wrapped_extended'] = False
|
55
57
|
warnings.filterwarnings("ignore")
|
56
58
|
#----------------------------------------
|
@@ -60,17 +62,17 @@ class ZFitPlotter:
|
|
60
62
|
self._l_def_col = list(mcolors.TABLEAU_COLORS.keys())
|
61
63
|
#----------------------------------------
|
62
64
|
def _data_to_zdata(self, obs, data, weights):
|
65
|
+
if isinstance(data, zfit.data.Data):
|
66
|
+
return data
|
67
|
+
|
63
68
|
if isinstance(data, np.ndarray):
|
64
69
|
data = zfit.Data.from_numpy (obs=obs, array=data , weights=weights)
|
65
70
|
elif isinstance(data, pd.Series):
|
66
71
|
data = zfit.Data.from_pandas(obs=obs, df=pd.DataFrame(data), weights=weights)
|
67
72
|
elif isinstance(data, pd.DataFrame):
|
68
73
|
data = zfit.Data.from_pandas(obs=obs, df=data , weights=weights)
|
69
|
-
elif isinstance(data, zfit.data.Data):
|
70
|
-
data = data
|
71
74
|
else:
|
72
|
-
|
73
|
-
raise
|
75
|
+
raise ValueError(f'Passed data is of usupported type {type(data)}')
|
74
76
|
|
75
77
|
return data
|
76
78
|
#----------------------------------------
|
@@ -200,7 +202,7 @@ class ZFitPlotter:
|
|
200
202
|
#----------------------------------------
|
201
203
|
def _get_zfit_gof(self):
|
202
204
|
if not hasattr(self._result, 'gof'):
|
203
|
-
return
|
205
|
+
return None
|
204
206
|
|
205
207
|
chi2, ndof, pval = self._result.gof
|
206
208
|
|
@@ -211,14 +213,16 @@ class ZFitPlotter:
|
|
211
213
|
def _get_text(self, ext_text):
|
212
214
|
gof_text = self._get_zfit_gof()
|
213
215
|
|
214
|
-
if
|
215
|
-
return
|
216
|
-
|
216
|
+
if ext_text is None and gof_text is None:
|
217
|
+
return None
|
218
|
+
|
219
|
+
if ext_text is not None and gof_text is None:
|
217
220
|
return ext_text
|
218
|
-
|
221
|
+
|
222
|
+
if ext_text is None and gof_text is not None:
|
219
223
|
return gof_text
|
220
|
-
|
221
|
-
|
224
|
+
|
225
|
+
return f'{ext_text}\n{gof_text}'
|
222
226
|
#----------------------------------------
|
223
227
|
def _get_pars(self):
|
224
228
|
'''
|
@@ -238,7 +242,7 @@ class ZFitPlotter:
|
|
238
242
|
name= par if isinstance(par, str) else par.name
|
239
243
|
try:
|
240
244
|
err = d_val['hesse']['error']
|
241
|
-
except:
|
245
|
+
except KeyError:
|
242
246
|
log.warning(f'Cannot extract {name} Hesse errors, using zeros')
|
243
247
|
pprint.pprint(d_val)
|
244
248
|
err = 0
|
@@ -260,7 +264,7 @@ class ZFitPlotter:
|
|
260
264
|
'''
|
261
265
|
d_par = self._get_pars()
|
262
266
|
|
263
|
-
line =
|
267
|
+
line = ''
|
264
268
|
for name, [val, err] in d_par.items():
|
265
269
|
if add_pars != 'all' and name not in add_pars:
|
266
270
|
continue
|
@@ -328,7 +332,7 @@ class ZFitPlotter:
|
|
328
332
|
nevt = self._get_component_yield(model, par)
|
329
333
|
|
330
334
|
if model.name in self._l_plot_components and hasattr(model, 'pdfs'):
|
331
|
-
l_model = [ (frc, pdf) for pdf, frc in zip(model.pdfs, model.params.values()) ]
|
335
|
+
l_model = [ (frc, pdf) for pdf, frc in zip(model.pdfs, model.params.values()) ]
|
332
336
|
elif model.name in self._l_plot_components and not hasattr(model, 'pdfs'):
|
333
337
|
log.warning(f'Cannot plot {model.name} as separate components, despite it was requested')
|
334
338
|
l_model = [ (1, model)]
|
@@ -347,17 +351,17 @@ class ZFitPlotter:
|
|
347
351
|
ax.plot(self.x, y, '-', label=self._leg.get(name, name), color=self._col.get(name))
|
348
352
|
|
349
353
|
if (blind_name is not None) and (was_blinded is False):
|
350
|
-
log.error(f'Blinding was requested, but PDF {blind_name} was not found among:')
|
351
354
|
for model in self.total_model.pdfs:
|
352
355
|
log.info(model.name)
|
353
|
-
|
356
|
+
|
357
|
+
raise ValueError(f'Blinding was requested, but PDF {blind_name} was not found among:')
|
354
358
|
#----------------------------------------
|
355
359
|
def _get_col(self, name):
|
356
360
|
if name in self._col:
|
357
361
|
return self._col[name]
|
358
362
|
|
359
363
|
col = self._l_def_col[0]
|
360
|
-
del
|
364
|
+
del self._l_def_col[0]
|
361
365
|
|
362
366
|
return col
|
363
367
|
#----------------------------------------
|
@@ -400,9 +404,8 @@ class ZFitPlotter:
|
|
400
404
|
if plot_range is not None:
|
401
405
|
try:
|
402
406
|
self.lower, self.upper = plot_range
|
403
|
-
except TypeError:
|
404
|
-
|
405
|
-
raise TypeError
|
407
|
+
except TypeError as exc:
|
408
|
+
raise TypeError('plot_range argument is expected to be a tuple with two numeric values') from exc
|
406
409
|
|
407
410
|
return np.linspace(self.lower, self.upper, 2000)
|
408
411
|
#----------------------------------------
|
@@ -439,6 +442,7 @@ class ZFitPlotter:
|
|
439
442
|
add_pars = None,
|
440
443
|
ymax = None,
|
441
444
|
skip_pulls = False,
|
445
|
+
yscale : str = None,
|
442
446
|
axs = None,
|
443
447
|
figsize:tuple = (13, 7),
|
444
448
|
leg_loc:str = 'best',
|
@@ -464,6 +468,7 @@ class ZFitPlotter:
|
|
464
468
|
figsize (tuple) : Tuple with figure size, default (13, 7)
|
465
469
|
leg_loc (str) : Location of legend, default 'best'
|
466
470
|
xerr (bool or float) : Used to pass xerr to mplhep histplot. True will use error with bin size, False, no error, otherwise it's the size of the xerror bar
|
471
|
+
yscale (str) : Scale for y axis of main plot, either log or linear
|
467
472
|
'''
|
468
473
|
# pylint: disable=too-many-locals, too-many-positional-arguments, too-many-arguments
|
469
474
|
d_leg = {} if d_leg is None else d_leg
|
@@ -512,6 +517,9 @@ class ZFitPlotter:
|
|
512
517
|
self.axs[0].set(xlabel=xlabel, ylabel=ylabel)
|
513
518
|
self.axs[0].set_xlim([self.lower, self.upper])
|
514
519
|
|
520
|
+
if yscale is not None:
|
521
|
+
self.axs[0].set_yscale(yscale)
|
522
|
+
|
515
523
|
if title is not None:
|
516
524
|
self.axs[0].set_title(title)
|
517
525
|
|
dmu/testing/utilities.py
CHANGED
@@ -3,16 +3,20 @@ Module containing utility functions needed by unit tests
|
|
3
3
|
'''
|
4
4
|
import os
|
5
5
|
import math
|
6
|
+
import glob
|
6
7
|
from typing import Union
|
7
8
|
from dataclasses import dataclass
|
8
9
|
from importlib.resources import files
|
9
10
|
|
10
11
|
from ROOT import RDF, TFile, RDataFrame
|
11
12
|
|
13
|
+
import joblib
|
12
14
|
import pandas as pnd
|
13
15
|
import numpy
|
14
16
|
import yaml
|
15
17
|
|
18
|
+
from dmu.ml.train_mva import TrainMva
|
19
|
+
from dmu.ml.cv_classifier import CVClassifier
|
16
20
|
from dmu.logging.log_store import LogStore
|
17
21
|
|
18
22
|
log = LogStore.add_logger('dmu:testing:utilities')
|
@@ -22,6 +26,7 @@ class Data:
|
|
22
26
|
'''
|
23
27
|
Class storing shared data
|
24
28
|
'''
|
29
|
+
out_dir = '/tmp/tests/dmu/ml/cv_predict'
|
25
30
|
# -------------------------------
|
26
31
|
def _double_data(df_1 : pnd.DataFrame) -> pnd.DataFrame:
|
27
32
|
df_2 = df_1.copy()
|
@@ -39,7 +44,7 @@ def _add_nans(df : pnd.DataFrame, columns : list[str]) -> pnd.DataFrame:
|
|
39
44
|
else:
|
40
45
|
l_col_index = [ l_col.index(column) for column in columns ]
|
41
46
|
|
42
|
-
log.debug('Replacing randomly with {size} NaNs')
|
47
|
+
log.debug(f'Replacing randomly with {size} NaNs')
|
43
48
|
for _ in range(size):
|
44
49
|
irow = numpy.random.randint(0, df.shape[0]) # Random row index
|
45
50
|
icol = numpy.random.choice(l_col_index) # Random column index
|
@@ -51,7 +56,7 @@ def _add_nans(df : pnd.DataFrame, columns : list[str]) -> pnd.DataFrame:
|
|
51
56
|
def get_rdf(kind : Union[str,None] = None,
|
52
57
|
repeated : bool = False,
|
53
58
|
nentries : int = 3_000,
|
54
|
-
|
59
|
+
columns_with_nans : list[str] = None):
|
55
60
|
'''
|
56
61
|
Return ROOT dataframe with toy data
|
57
62
|
'''
|
@@ -76,8 +81,8 @@ def get_rdf(kind : Union[str,None] = None,
|
|
76
81
|
if repeated:
|
77
82
|
df = _double_data(df)
|
78
83
|
|
79
|
-
if
|
80
|
-
df = _add_nans(df, columns=
|
84
|
+
if columns_with_nans is not None:
|
85
|
+
df = _add_nans(df, columns=columns_with_nans)
|
81
86
|
|
82
87
|
rdf = RDF.FromPandas(df)
|
83
88
|
|
@@ -126,3 +131,25 @@ def get_file_with_trees(path : str) -> TFile:
|
|
126
131
|
snap.fMode = 'update'
|
127
132
|
|
128
133
|
return TFile(path)
|
134
|
+
# -------------------------------
|
135
|
+
def get_models(rdf_sig : RDataFrame, rdf_bkg : RDataFrame) -> list[CVClassifier]:
|
136
|
+
'''
|
137
|
+
Will train and return models
|
138
|
+
'''
|
139
|
+
|
140
|
+
cfg = get_config('ml/tests/train_mva.yaml')
|
141
|
+
pkl_path = f'{Data.out_dir}/model.pkl'
|
142
|
+
plt_dir = f'{Data.out_dir}/cv_predict'
|
143
|
+
cfg['saving']['path'] = pkl_path
|
144
|
+
cfg['plotting']['val_dir'] = plt_dir
|
145
|
+
cfg['plotting']['features']['saving']['plt_dir'] = plt_dir
|
146
|
+
|
147
|
+
obj= TrainMva(sig=rdf_sig, bkg=rdf_bkg, cfg=cfg)
|
148
|
+
obj.run()
|
149
|
+
|
150
|
+
pkl_wc = pkl_path.replace('.pkl', '_*.pkl')
|
151
|
+
l_pkl_path = glob.glob(pkl_wc)
|
152
|
+
l_model = [ joblib.load(pkl_path) for pkl_path in l_pkl_path ]
|
153
|
+
|
154
|
+
return l_model
|
155
|
+
# -------------------------------
|
@@ -0,0 +1,13 @@
|
|
1
|
+
output : /tmp/tests/dmu/ml/cv_diagnostics/from_rdf
|
2
|
+
# Will assume that the target is already in the input dataframe
|
3
|
+
# and will use it, instead of evaluating models
|
4
|
+
score_from_rdf : w
|
5
|
+
correlations:
|
6
|
+
# Variables with respect to which the correlations with the features will be measured
|
7
|
+
target :
|
8
|
+
name : z
|
9
|
+
methods:
|
10
|
+
- Pearson
|
11
|
+
figure:
|
12
|
+
title: Scores from file
|
13
|
+
size : [10, 8]
|