data-manipulation-utilities 0.2.6__py3-none-any.whl → 0.2.8.dev714__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.8.dev714.dist-info}/METADATA +800 -34
- data_manipulation_utilities-0.2.8.dev714.dist-info/RECORD +93 -0
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.8.dev714.dist-info}/WHEEL +1 -1
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.8.dev714.dist-info}/entry_points.txt +1 -0
- dmu/__init__.py +0 -0
- dmu/generic/hashing.py +70 -0
- dmu/generic/utilities.py +175 -9
- dmu/generic/version_management.py +3 -5
- dmu/logging/log_store.py +34 -2
- dmu/logging/messages.py +96 -0
- dmu/ml/cv_classifier.py +3 -3
- dmu/ml/cv_diagnostics.py +224 -0
- dmu/ml/cv_performance.py +58 -0
- dmu/ml/cv_predict.py +149 -46
- dmu/ml/train_mva.py +587 -112
- dmu/ml/utilities.py +29 -10
- dmu/pdataframe/utilities.py +61 -3
- dmu/plotting/fwhm.py +64 -0
- dmu/plotting/matrix.py +1 -1
- dmu/plotting/plotter.py +25 -3
- dmu/plotting/plotter_1d.py +159 -14
- dmu/plotting/plotter_2d.py +5 -0
- dmu/rdataframe/utilities.py +54 -3
- dmu/rfile/ddfgetter.py +102 -0
- dmu/stats/fit_stats.py +129 -0
- dmu/stats/fitter.py +56 -23
- dmu/stats/gof_calculator.py +7 -0
- dmu/stats/model_factory.py +305 -50
- dmu/stats/parameters.py +100 -0
- dmu/stats/utilities.py +443 -12
- dmu/stats/wdata.py +187 -0
- dmu/stats/zfit.py +17 -0
- dmu/stats/zfit_models.py +68 -0
- dmu/stats/zfit_plotter.py +175 -56
- dmu/testing/utilities.py +120 -15
- dmu/workflow/__init__.py +0 -0
- dmu/workflow/cache.py +266 -0
- dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
- dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
- dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
- dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
- dmu_data/ml/tests/train_mva.yaml +20 -12
- dmu_data/ml/tests/train_mva_def.yaml +75 -0
- dmu_data/ml/tests/train_mva_with_diagnostics.yaml +87 -0
- dmu_data/ml/tests/train_mva_with_preffix.yaml +58 -0
- dmu_data/plotting/tests/2d.yaml +5 -5
- dmu_data/plotting/tests/line.yaml +15 -0
- dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
- dmu_data/plotting/tests/plug_stats.yaml +19 -0
- dmu_data/plotting/tests/simple.yaml +4 -3
- dmu_data/plotting/tests/styling.yaml +18 -0
- dmu_data/rfile/friends.yaml +13 -0
- dmu_data/stats/fitter/test_simple.yaml +28 -0
- dmu_data/stats/kde_optimizer/control.json +1 -0
- dmu_data/stats/kde_optimizer/signal.json +1 -0
- dmu_data/stats/parameters/data.yaml +178 -0
- dmu_data/tests/config.json +6 -0
- dmu_data/tests/config.yaml +4 -0
- dmu_data/tests/pdf_to_tex.txt +34 -0
- dmu_scripts/kerberos/check_expiration +21 -0
- dmu_scripts/kerberos/convert_certificate +22 -0
- dmu_scripts/ml/compare_classifiers.py +85 -0
- data_manipulation_utilities-0.2.6.dist-info/RECORD +0 -57
- {data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.8.dev714.data}/scripts/publish +0 -0
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.8.dev714.dist-info}/top_level.txt +0 -0
dmu/stats/model_factory.py
CHANGED
@@ -2,13 +2,18 @@
|
|
2
2
|
Module storing ZModel class
|
3
3
|
'''
|
4
4
|
# pylint: disable=too-many-lines, import-error, too-many-positional-arguments, too-many-arguments
|
5
|
+
# pylint: disable=too-many-instance-attributes
|
5
6
|
|
6
7
|
from typing import Callable, Union
|
7
8
|
|
8
9
|
import zfit
|
10
|
+
|
9
11
|
from zfit.core.interfaces import ZfitSpace as zobs
|
10
12
|
from zfit.core.basepdf import BasePDF as zpdf
|
11
13
|
from zfit.core.parameter import Parameter as zpar
|
14
|
+
from dmu.stats.parameters import ParameterLibrary as PL
|
15
|
+
from dmu.stats.zfit_models import HypExp
|
16
|
+
from dmu.stats.zfit_models import ModExp
|
12
17
|
from dmu.logging.log_store import LogStore
|
13
18
|
|
14
19
|
log=LogStore.add_logger('dmu:stats:model_factory')
|
@@ -47,6 +52,13 @@ class MethodRegistry:
|
|
47
52
|
log.info(f' {value}')
|
48
53
|
|
49
54
|
return method
|
55
|
+
|
56
|
+
@classmethod
|
57
|
+
def get_pdf_names(cls) -> list[str]:
|
58
|
+
'''
|
59
|
+
Returns list of PDFs that are registered/supported
|
60
|
+
'''
|
61
|
+
return list(cls._d_method)
|
50
62
|
#-----------------------------------------
|
51
63
|
class ModelFactory:
|
52
64
|
'''
|
@@ -57,34 +69,82 @@ class ModelFactory:
|
|
57
69
|
|
58
70
|
l_pdf = ['dscb', 'gauss']
|
59
71
|
l_shr = ['mu']
|
60
|
-
|
72
|
+
l_flt = ['mu', 'sg']
|
73
|
+
d_rep = {'mu' : 'scale', 'sg' : 'reso'}
|
74
|
+
mod = ModelFactory(
|
75
|
+
preffix = 'signal',
|
76
|
+
obs = obs,
|
77
|
+
l_pdf = l_pdf,
|
78
|
+
l_shared= l_shr,
|
79
|
+
d_rep = d_rep)
|
80
|
+
|
61
81
|
pdf = mod.get_pdf()
|
62
82
|
```
|
63
83
|
|
64
|
-
where one can specify which parameters
|
84
|
+
where one can specify which parameters
|
85
|
+
|
86
|
+
- Can be shared among the PDFs
|
87
|
+
- Are meant to float if this fit is done to MC, in order to fix parameters in data.
|
88
|
+
- Are scales or resolutions that need reparametrizations
|
65
89
|
'''
|
66
90
|
#-----------------------------------------
|
67
91
|
def __init__(self,
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
92
|
+
preffix : str,
|
93
|
+
obs : zobs,
|
94
|
+
l_pdf : list[str],
|
95
|
+
l_shared : list[str],
|
96
|
+
l_float : list[str],
|
97
|
+
l_reuse : None | list[zpar] = None,
|
98
|
+
d_fix : None | dict[str,float] = None,
|
99
|
+
d_rep : None | dict[str,str] = None):
|
73
100
|
'''
|
74
101
|
preffix: used to identify PDF, will be used to name every parameter
|
75
102
|
obs: zfit obserbable
|
76
103
|
l_pdf: List of PDF nicknames which are registered below
|
77
104
|
l_shared: List of parameter names that are shared
|
78
105
|
l_float: List of parameter names to allow to float
|
106
|
+
l_reuse: Optional. List of parameters that if given will be used instead of built by factory
|
107
|
+
d_fix: Dictionary with keys as the beginning of the name of a parameter and value as the number
|
108
|
+
to which it has to be fixed. If not one and only one parameter is found, ValueError is raised
|
109
|
+
d_rep: Dictionary with keys as variables that will be reparametrized
|
79
110
|
'''
|
111
|
+
l_reuse = [] if l_reuse is None else l_reuse
|
80
112
|
|
81
113
|
self._preffix = preffix
|
82
114
|
self._l_pdf = l_pdf
|
83
115
|
self._l_shr = l_shared
|
84
116
|
self._l_flt = l_float
|
117
|
+
self._d_fix = d_fix
|
118
|
+
self._d_rep = d_rep
|
119
|
+
self._d_reuse = { par.name : par for par in l_reuse }
|
85
120
|
self._obs = obs
|
86
121
|
|
87
122
|
self._d_par : dict[str,zpar] = {}
|
123
|
+
|
124
|
+
self._check_reparametrization()
|
125
|
+
#-----------------------------------------
|
126
|
+
def _check_reparametrization(self) -> None:
|
127
|
+
'''
|
128
|
+
This method:
|
129
|
+
|
130
|
+
- Returns if no reparametrization has been requested
|
131
|
+
- Raises if reparametrization is on any fixed parameter
|
132
|
+
- Raises if trying to reparametrize anything that is not scales and resolutions
|
133
|
+
'''
|
134
|
+
if self._d_rep is None:
|
135
|
+
return
|
136
|
+
|
137
|
+
s_par_1 = set(self._d_rep)
|
138
|
+
s_par_2 = set(self._l_flt)
|
139
|
+
|
140
|
+
if not s_par_1.isdisjoint(s_par_2):
|
141
|
+
log.info(f'Found : {s_par_1}')
|
142
|
+
log.info(f'Allowed: {s_par_2}')
|
143
|
+
raise ValueError('Non empty intersection between floating and reparametrization parameters')
|
144
|
+
|
145
|
+
s_kind = set(self._d_rep.values())
|
146
|
+
if not s_kind.issubset({'scale', 'reso'}):
|
147
|
+
raise ValueError(f'Only scales and resolution reparametrizations allowed, found: {s_kind}')
|
88
148
|
#-----------------------------------------
|
89
149
|
def _split_name(self, name : str) -> tuple[str,str]:
|
90
150
|
l_part = name.split('_')
|
@@ -94,67 +154,201 @@ class ModelFactory:
|
|
94
154
|
return pname, xname
|
95
155
|
#-----------------------------------------
|
96
156
|
def _get_parameter_name(self, name : str, suffix : str) -> str:
|
157
|
+
'''
|
158
|
+
Parameters
|
159
|
+
---------------
|
160
|
+
name : Name of pdf and physical name, e.g mu_gauss
|
161
|
+
suffix: Identifies this PDF, e.g. index of 3rd gaussian
|
162
|
+
|
163
|
+
Returns
|
164
|
+
---------------
|
165
|
+
Name of parameter which:
|
166
|
+
|
167
|
+
- mu if parameter is meant to be reused, e.g. same mu for all PDFs
|
168
|
+
- mu_preffix, if parameter is shared
|
169
|
+
- mu_preffix3 if not shared but not floating
|
170
|
+
- mu_preffix3_flt if not shared and floating
|
171
|
+
'''
|
172
|
+
# pname = physical name, is something like mu or sg
|
97
173
|
pname, xname = self._split_name(name)
|
98
|
-
|
99
174
|
log.debug(f'Using physical name: {pname}')
|
100
175
|
|
176
|
+
if pname in self._l_flt:
|
177
|
+
# If reused parameter is floating
|
178
|
+
# find it with flt
|
179
|
+
reuse_name = f'{pname}_flt'
|
180
|
+
else:
|
181
|
+
reuse_name = pname
|
182
|
+
|
183
|
+
if reuse_name in self._d_reuse:
|
184
|
+
log.debug(f'Picking name {reuse_name} for reused parameter')
|
185
|
+
return self._add_float(pname=pname, name=pname)
|
186
|
+
|
101
187
|
if pname in self._l_shr:
|
102
188
|
name = f'{pname}_{self._preffix}'
|
189
|
+
log.debug(f'Using model specific parameter name {name}')
|
103
190
|
else:
|
104
191
|
name = f'{pname}_{xname}_{self._preffix}{suffix}'
|
192
|
+
log.debug(f'Using component specific parameter name {name}')
|
105
193
|
|
106
|
-
|
107
|
-
|
194
|
+
return self._add_float(pname=pname, name=name)
|
195
|
+
#-----------------------------------------
|
196
|
+
def _add_float(self, pname : str, name : str) -> str:
|
197
|
+
'''
|
198
|
+
Parameters
|
199
|
+
-------------
|
200
|
+
pname : Physical name, e.g. mu
|
201
|
+
name : Actual parameter name, e.g. mu_cbl_3
|
202
|
+
|
203
|
+
Returns
|
204
|
+
-------------
|
205
|
+
Actual parameter name with _flt appended if the physical version is meant to float
|
206
|
+
'''
|
207
|
+
if pname not in self._l_flt:
|
208
|
+
return name
|
108
209
|
|
109
|
-
return name
|
210
|
+
return f'{name}_flt'
|
110
211
|
#-----------------------------------------
|
111
|
-
def _get_parameter(
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
212
|
+
def _get_parameter(
|
213
|
+
self,
|
214
|
+
kind : str,
|
215
|
+
name : str,
|
216
|
+
suffix : str) -> zpar:
|
217
|
+
'''
|
218
|
+
Parameters
|
219
|
+
----------------
|
220
|
+
kind : Identifies PDF, e.g. gaus
|
221
|
+
name : Physical name of parameter, e.g. mu
|
222
|
+
suffix: If multiple PDFs of this kind, it will be some sort of index, e.g. gaus(1), gaus(2)
|
223
|
+
|
224
|
+
Returns
|
225
|
+
----------------
|
226
|
+
Parameter, if it was :
|
227
|
+
|
228
|
+
- Provided as part of l_reuse (e.g. mu), it will pick it up instead of building it
|
229
|
+
- Specified as shared, it will build it once and then reuse that one.
|
230
|
+
- Otherwise, it will make a new one, with a suffix to diferentiate it from whatever was already created
|
231
|
+
'''
|
117
232
|
|
118
|
-
|
119
|
-
log.debug(f'Assigning name: {
|
233
|
+
par_name = self._get_parameter_name(f'{name}_{kind}', suffix)
|
234
|
+
log.debug(f'Assigning name: {par_name}')
|
120
235
|
|
121
|
-
if
|
122
|
-
|
236
|
+
if par_name in self._d_reuse:
|
237
|
+
log.info(f'Reusing {par_name}')
|
238
|
+
return self._d_reuse[par_name]
|
123
239
|
|
124
|
-
|
240
|
+
if par_name in self._d_par:
|
241
|
+
log.info(f'Picking already made parameter {par_name}')
|
242
|
+
return self._d_par[par_name]
|
125
243
|
|
126
|
-
self.
|
244
|
+
is_reparametrized = self._is_reparametrized(name)
|
245
|
+
|
246
|
+
val, low, high = PL.get_values(kind=kind, parameter=name)
|
247
|
+
|
248
|
+
if is_reparametrized:
|
249
|
+
init_name, _ = self._split_name(par_name)
|
250
|
+
log.info(f'Reparametrizing {par_name}')
|
251
|
+
par = self._get_reparametrization(par_name, init_name, val, low, high)
|
252
|
+
else:
|
253
|
+
if val == low == high:
|
254
|
+
log.warning(f'Upper and lower edges agree, fixing parameter to: {low}')
|
255
|
+
par = zfit.param.Parameter(par_name, val, low - 1 , high + 1)
|
256
|
+
par.floating = False
|
257
|
+
else:
|
258
|
+
log.debug(f'Creating new parameter {par_name}')
|
259
|
+
par = zfit.param.Parameter(par_name, val, low, high)
|
260
|
+
|
261
|
+
self._d_par[par_name] = par
|
262
|
+
|
263
|
+
return par
|
264
|
+
#-----------------------------------------
|
265
|
+
def _is_reparametrized(self, name : str) -> bool:
|
266
|
+
if self._d_rep is None:
|
267
|
+
return False
|
268
|
+
|
269
|
+
root_name, _ = self._split_name(name)
|
270
|
+
|
271
|
+
is_rep = root_name in self._d_rep
|
272
|
+
|
273
|
+
log.debug(f'Reparametrizing {name}: {is_rep}')
|
274
|
+
|
275
|
+
return is_rep
|
276
|
+
#-----------------------------------------
|
277
|
+
def _get_reparametrization(self, par_name : str, init_name : str, value : float, low : float, high : float) -> zpar:
|
278
|
+
log.debug(f'Reparametrizing {par_name}')
|
279
|
+
par_const = zfit.Parameter(par_name, value, low, high)
|
280
|
+
par_const.floating = False
|
281
|
+
|
282
|
+
kind = self._d_rep[init_name]
|
283
|
+
if kind == 'reso':
|
284
|
+
par_reso = zfit.Parameter(f'{par_name}_reso_flt' , 1.0, 0.20, 5.0)
|
285
|
+
par = zfit.ComposedParameter(f'{par_name}_cmp', lambda d_par : d_par['par_const'] * d_par['reso' ], params={'par_const' : par_const, 'reso' : par_reso } )
|
286
|
+
elif kind == 'scale':
|
287
|
+
par_scale = zfit.Parameter(f'{par_name}_scale_flt', 0.0, -100, 100)
|
288
|
+
par = zfit.ComposedParameter(f'{par_name}_cmp', lambda d_par : d_par['par_const'] + d_par['scale'], params={'par_const' : par_const, 'scale' : par_scale} )
|
289
|
+
else:
|
290
|
+
raise ValueError(f'Invalid kind: {kind}')
|
127
291
|
|
128
292
|
return par
|
129
293
|
#-----------------------------------------
|
130
294
|
@MethodRegistry.register('exp')
|
131
295
|
def _get_exponential(self, suffix : str = '') -> zpdf:
|
132
|
-
c = self._get_parameter('
|
296
|
+
c = self._get_parameter('exp', 'c', suffix)
|
133
297
|
pdf = zfit.pdf.Exponential(c, self._obs, name=f'exp{suffix}')
|
134
298
|
|
299
|
+
return pdf
|
300
|
+
# ---------------------------------------------
|
301
|
+
@MethodRegistry.register('hypexp')
|
302
|
+
def _get_hypexp(self, suffix : str = '') -> zpdf:
|
303
|
+
mu = self._get_parameter('hypexp', 'mu', suffix)
|
304
|
+
ap = self._get_parameter('hypexp', 'ap', suffix)
|
305
|
+
bt = self._get_parameter('hypexp', 'bt', suffix)
|
306
|
+
|
307
|
+
pdf= HypExp(obs=self._obs, mu=mu, alpha=ap, beta=bt, name=f'hypexp{suffix}')
|
308
|
+
|
309
|
+
return pdf
|
310
|
+
# ---------------------------------------------
|
311
|
+
@MethodRegistry.register('modexp')
|
312
|
+
def _get_modexp(self, suffix : str = '') -> zpdf:
|
313
|
+
mu = self._get_parameter('modexp', 'mu', suffix)
|
314
|
+
ap = self._get_parameter('modexp', 'ap', suffix)
|
315
|
+
bt = self._get_parameter('modexp', 'bt', suffix)
|
316
|
+
|
317
|
+
pdf= ModExp(obs=self._obs, mu=mu, alpha=ap, beta=bt, name=f'modexp{suffix}')
|
318
|
+
|
135
319
|
return pdf
|
136
320
|
#-----------------------------------------
|
137
321
|
@MethodRegistry.register('pol1')
|
138
322
|
def _get_pol1(self, suffix : str = '') -> zpdf:
|
139
|
-
a = self._get_parameter('
|
323
|
+
a = self._get_parameter('pol1', 'a', suffix)
|
140
324
|
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a], name=f'pol1{suffix}')
|
141
325
|
|
142
326
|
return pdf
|
143
327
|
#-----------------------------------------
|
144
328
|
@MethodRegistry.register('pol2')
|
145
329
|
def _get_pol2(self, suffix : str = '') -> zpdf:
|
146
|
-
a = self._get_parameter('
|
147
|
-
b = self._get_parameter('
|
148
|
-
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b], name=f'pol2{suffix}')
|
330
|
+
a = self._get_parameter('pol2', 'a', suffix)
|
331
|
+
b = self._get_parameter('pol2', 'b', suffix)
|
332
|
+
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b ], name=f'pol2{suffix}')
|
333
|
+
|
334
|
+
return pdf
|
335
|
+
# ---------------------------------------------
|
336
|
+
@MethodRegistry.register('pol3')
|
337
|
+
def _get_pol3(self, suffix : str = '') -> zpdf:
|
338
|
+
a = self._get_parameter('pol3', 'a', suffix)
|
339
|
+
b = self._get_parameter('pol3', 'b', suffix)
|
340
|
+
c = self._get_parameter('pol3', 'c', suffix)
|
341
|
+
|
342
|
+
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b, c], name=f'pol3{suffix}')
|
149
343
|
|
150
344
|
return pdf
|
151
345
|
#-----------------------------------------
|
152
346
|
@MethodRegistry.register('cbr')
|
153
347
|
def _get_cbr(self, suffix : str = '') -> zpdf:
|
154
|
-
mu = self._get_parameter('
|
155
|
-
sg = self._get_parameter('
|
156
|
-
ar = self._get_parameter('
|
157
|
-
nr = self._get_parameter('
|
348
|
+
mu = self._get_parameter('cbr', 'mu', suffix)
|
349
|
+
sg = self._get_parameter('cbr', 'sg', suffix)
|
350
|
+
ar = self._get_parameter('cbr', 'ac', suffix)
|
351
|
+
nr = self._get_parameter('cbr', 'nc', suffix)
|
158
352
|
|
159
353
|
pdf = zfit.pdf.CrystalBall(mu, sg, ar, nr, self._obs, name=f'cbr{suffix}')
|
160
354
|
|
@@ -162,10 +356,10 @@ class ModelFactory:
|
|
162
356
|
#-----------------------------------------
|
163
357
|
@MethodRegistry.register('suj')
|
164
358
|
def _get_suj(self, suffix : str = '') -> zpdf:
|
165
|
-
mu = self._get_parameter('
|
166
|
-
sg = self._get_parameter('
|
167
|
-
gm = self._get_parameter('
|
168
|
-
dl = self._get_parameter('
|
359
|
+
mu = self._get_parameter('suj', 'mu', suffix)
|
360
|
+
sg = self._get_parameter('suj', 'sg', suffix)
|
361
|
+
gm = self._get_parameter('suj', 'gm', suffix)
|
362
|
+
dl = self._get_parameter('suj', 'dl', suffix)
|
169
363
|
|
170
364
|
pdf = zfit.pdf.JohnsonSU(mu, sg, gm, dl, self._obs, name=f'suj{suffix}')
|
171
365
|
|
@@ -173,10 +367,10 @@ class ModelFactory:
|
|
173
367
|
#-----------------------------------------
|
174
368
|
@MethodRegistry.register('cbl')
|
175
369
|
def _get_cbl(self, suffix : str = '') -> zpdf:
|
176
|
-
mu = self._get_parameter('
|
177
|
-
sg = self._get_parameter('
|
178
|
-
al = self._get_parameter('
|
179
|
-
nl = self._get_parameter('
|
370
|
+
mu = self._get_parameter('cbl', 'mu', suffix)
|
371
|
+
sg = self._get_parameter('cbl', 'sg', suffix)
|
372
|
+
al = self._get_parameter('cbl', 'ac', suffix)
|
373
|
+
nl = self._get_parameter('cbl', 'nc', suffix)
|
180
374
|
|
181
375
|
pdf = zfit.pdf.CrystalBall(mu, sg, al, nl, self._obs, name=f'cbl{suffix}')
|
182
376
|
|
@@ -184,8 +378,8 @@ class ModelFactory:
|
|
184
378
|
#-----------------------------------------
|
185
379
|
@MethodRegistry.register('gauss')
|
186
380
|
def _get_gauss(self, suffix : str = '') -> zpdf:
|
187
|
-
mu = self._get_parameter('
|
188
|
-
sg = self._get_parameter('
|
381
|
+
mu = self._get_parameter('gauss', 'mu', suffix)
|
382
|
+
sg = self._get_parameter('gauss', 'sg', suffix)
|
189
383
|
|
190
384
|
pdf = zfit.pdf.Gauss(mu, sg, self._obs, name=f'gauss{suffix}')
|
191
385
|
|
@@ -193,17 +387,46 @@ class ModelFactory:
|
|
193
387
|
#-----------------------------------------
|
194
388
|
@MethodRegistry.register('dscb')
|
195
389
|
def _get_dscb(self, suffix : str = '') -> zpdf:
|
196
|
-
mu = self._get_parameter('
|
197
|
-
sg = self._get_parameter('
|
198
|
-
ar = self._get_parameter('
|
199
|
-
al = self._get_parameter('
|
200
|
-
nr = self._get_parameter('
|
201
|
-
nl = self._get_parameter('
|
390
|
+
mu = self._get_parameter('dscb', 'mu', suffix)
|
391
|
+
sg = self._get_parameter('dscb', 'sg', suffix)
|
392
|
+
ar = self._get_parameter('dscb', 'ar', suffix)
|
393
|
+
al = self._get_parameter('dscb', 'al', suffix)
|
394
|
+
nr = self._get_parameter('dscb', 'nr', suffix)
|
395
|
+
nl = self._get_parameter('dscb', 'nl', suffix)
|
202
396
|
|
203
397
|
pdf = zfit.pdf.DoubleCB(mu, sg, al, nl, ar, nr, self._obs, name=f'dscb{suffix}')
|
204
398
|
|
205
399
|
return pdf
|
206
400
|
#-----------------------------------------
|
401
|
+
@MethodRegistry.register('voigt')
|
402
|
+
def _get_voigt(self, suffix : str = '') -> zpdf:
|
403
|
+
mu = self._get_parameter('voigt', 'mu', suffix)
|
404
|
+
sg = self._get_parameter('voigt', 'sg', suffix)
|
405
|
+
gm = self._get_parameter('voigt', 'gm', suffix)
|
406
|
+
|
407
|
+
pdf = zfit.pdf.Voigt(m=mu, sigma=sg, gamma=gm, obs=self._obs, name=f'voigt{suffix}')
|
408
|
+
|
409
|
+
return pdf
|
410
|
+
#-----------------------------------------
|
411
|
+
@MethodRegistry.register('qgauss')
|
412
|
+
def _get_qgauss(self, suffix : str = '') -> zpdf:
|
413
|
+
mu = self._get_parameter('qgauss', 'mu', suffix)
|
414
|
+
sg = self._get_parameter('qgauss', 'sg', suffix)
|
415
|
+
q = self._get_parameter('qgauss', 'q', suffix)
|
416
|
+
|
417
|
+
pdf = zfit.pdf.QGauss(q=q, mu=mu, sigma=sg, obs=self._obs, name =f'qgauss{suffix}')
|
418
|
+
|
419
|
+
return pdf
|
420
|
+
#-----------------------------------------
|
421
|
+
@MethodRegistry.register('cauchy')
|
422
|
+
def _get_cauchy(self, suffix : str = '') -> zpdf:
|
423
|
+
mu = self._get_parameter('cauchy', 'mu', suffix)
|
424
|
+
gm = self._get_parameter('cauchy', 'gm', suffix)
|
425
|
+
|
426
|
+
pdf = zfit.pdf.Cauchy(obs=self._obs, m=mu, gamma=gm, name=f'cauchy{suffix}')
|
427
|
+
|
428
|
+
return pdf
|
429
|
+
#-----------------------------------------
|
207
430
|
def _get_pdf_types(self) -> list[tuple[str,str]]:
|
208
431
|
d_name_freq = {}
|
209
432
|
|
@@ -224,7 +447,7 @@ class ModelFactory:
|
|
224
447
|
def _get_pdf(self, kind : str, preffix : str) -> zpdf:
|
225
448
|
fun = MethodRegistry.get_method(kind)
|
226
449
|
if fun is None:
|
227
|
-
raise NotImplementedError(f'PDF of type {kind} is not implemented')
|
450
|
+
raise NotImplementedError(f'PDF of type \"{kind}\" with preffix \"{preffix}\" is not implemented')
|
228
451
|
|
229
452
|
return fun(self, preffix)
|
230
453
|
#-----------------------------------------
|
@@ -234,12 +457,43 @@ class ModelFactory:
|
|
234
457
|
log.debug('Requested only one PDF, skipping sum')
|
235
458
|
return l_pdf[0]
|
236
459
|
|
237
|
-
l_frc= [ zfit.param.Parameter(f'frc_{ifrc + 1}', 0.5, 0, 1) for ifrc in range(nfrc - 1) ]
|
460
|
+
l_frc= [ zfit.param.Parameter(f'frc_{self._preffix}_{ifrc + 1}', 0.5, 0, 1) for ifrc in range(nfrc - 1) ]
|
238
461
|
|
239
462
|
pdf = zfit.pdf.SumPDF(l_pdf, name=self._preffix, fracs=l_frc)
|
240
463
|
|
241
464
|
return pdf
|
242
465
|
#-----------------------------------------
|
466
|
+
def _find_par(self, s_par : set[zpar], name_start : str) -> zpar:
|
467
|
+
l_par_match = [ par for par in s_par if par.name.startswith(name_start) ]
|
468
|
+
npar = len(l_par_match)
|
469
|
+
|
470
|
+
if npar!= 1:
|
471
|
+
for par in s_par:
|
472
|
+
log.info(par.name)
|
473
|
+
|
474
|
+
raise ValueError(f'Found {npar} parameters starting with: {name_start}')
|
475
|
+
|
476
|
+
return l_par_match[0]
|
477
|
+
#-----------------------------------------
|
478
|
+
def _fix_parameters(self, pdf : zpdf) -> zpdf:
|
479
|
+
if self._d_fix is None:
|
480
|
+
log.debug('Not fixing any parameter')
|
481
|
+
return pdf
|
482
|
+
|
483
|
+
s_par = pdf.get_params()
|
484
|
+
|
485
|
+
log.info('-' * 30)
|
486
|
+
log.info('Fixing parameters')
|
487
|
+
log.info('-' * 30)
|
488
|
+
for name_start, value in self._d_fix.items():
|
489
|
+
par = self._find_par(s_par, name_start)
|
490
|
+
par.set_value(value)
|
491
|
+
|
492
|
+
log.info(f'{name_start:<20}{value:<20.3f}')
|
493
|
+
par.floating = False
|
494
|
+
|
495
|
+
return pdf
|
496
|
+
#-----------------------------------------
|
243
497
|
def get_pdf(self) -> zpdf:
|
244
498
|
'''
|
245
499
|
Given a list of strings representing PDFs returns the a zfit PDF which is
|
@@ -248,6 +502,7 @@ class ModelFactory:
|
|
248
502
|
l_type= self._get_pdf_types()
|
249
503
|
l_pdf = [ self._get_pdf(kind, preffix) for kind, preffix in l_type ]
|
250
504
|
pdf = self._add_pdf(l_pdf)
|
505
|
+
pdf = self._fix_parameters(pdf)
|
251
506
|
|
252
507
|
return pdf
|
253
508
|
#-----------------------------------------
|
dmu/stats/parameters.py
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
'''
|
2
|
+
Module with ParameterLibrary class
|
3
|
+
'''
|
4
|
+
from importlib.resources import files
|
5
|
+
|
6
|
+
import yaml
|
7
|
+
import pandas as pnd
|
8
|
+
|
9
|
+
from dmu.logging.log_store import LogStore
|
10
|
+
|
11
|
+
log=LogStore.add_logger('dmu:parameters')
|
12
|
+
# --------------------------------
|
13
|
+
class ParameterLibrary:
|
14
|
+
'''
|
15
|
+
Class meant to:
|
16
|
+
|
17
|
+
- Connect to database (YAML file) with parameter values and make them available
|
18
|
+
- Allow parameter values to be overriden
|
19
|
+
'''
|
20
|
+
df_parameters : pnd.DataFrame
|
21
|
+
# --------------------------------
|
22
|
+
@staticmethod
|
23
|
+
def _load_data() -> None:
|
24
|
+
if hasattr(ParameterLibrary, 'df_parameters'):
|
25
|
+
return
|
26
|
+
|
27
|
+
data_path = files('dmu_data').joinpath('stats/parameters/data.yaml')
|
28
|
+
data_path = str(data_path)
|
29
|
+
|
30
|
+
d_data = {'parameter' : [], 'kind' : [], 'val' : [], 'low' : [], 'high' : []}
|
31
|
+
with open(data_path, encoding='utf-8') as ifile:
|
32
|
+
data = yaml.safe_load(ifile)
|
33
|
+
for kind, d_par in data.items():
|
34
|
+
for parameter, d_kind in d_par.items():
|
35
|
+
val = d_kind['val' ]
|
36
|
+
low = d_kind['low' ]
|
37
|
+
high= d_kind['high']
|
38
|
+
|
39
|
+
d_data['parameter'].append(parameter)
|
40
|
+
d_data['kind' ].append(kind )
|
41
|
+
d_data['val' ].append(val )
|
42
|
+
d_data['low' ].append(low )
|
43
|
+
d_data['high' ].append(high )
|
44
|
+
|
45
|
+
df = pnd.DataFrame(d_data)
|
46
|
+
|
47
|
+
ParameterLibrary.df_parameters = df
|
48
|
+
# --------------------------------
|
49
|
+
@staticmethod
|
50
|
+
def print_parameters(kind : str) -> None:
|
51
|
+
'''
|
52
|
+
Method taking the kind of PDF to which the parameters are associated
|
53
|
+
and printing the values.
|
54
|
+
'''
|
55
|
+
df = ParameterLibrary.df_parameters
|
56
|
+
df = df[ df['kind'] == kind ]
|
57
|
+
|
58
|
+
print(df)
|
59
|
+
# --------------------------------
|
60
|
+
@staticmethod
|
61
|
+
def get_values(kind : str, parameter : str) -> tuple[float,float,float]:
|
62
|
+
'''
|
63
|
+
Takes PDF and parameter names and returns default value, low value and high value
|
64
|
+
'''
|
65
|
+
df = ParameterLibrary.df_parameters
|
66
|
+
|
67
|
+
df = df[df['kind'] == kind]
|
68
|
+
df = df[df['parameter']==parameter]
|
69
|
+
|
70
|
+
if len(df) != 1:
|
71
|
+
log.info(df)
|
72
|
+
raise ValueError(f'Could not find one and only one row for: {kind}/{parameter}')
|
73
|
+
|
74
|
+
val = df['val'].iloc[0]
|
75
|
+
low = df['low'].iloc[0]
|
76
|
+
high= df['high'].iloc[0]
|
77
|
+
|
78
|
+
return val, low, high
|
79
|
+
# --------------------------------
|
80
|
+
@staticmethod
|
81
|
+
def set_values(
|
82
|
+
parameter : str,
|
83
|
+
kind : str,
|
84
|
+
val : float,
|
85
|
+
low : float,
|
86
|
+
high : float) -> None:
|
87
|
+
'''
|
88
|
+
This function will override the value and range for the given parameter
|
89
|
+
It should be typically used before using the ModelFactory class
|
90
|
+
'''
|
91
|
+
|
92
|
+
df = ParameterLibrary.df_parameters
|
93
|
+
|
94
|
+
location = (df['parameter'] == parameter) & (df['kind'] == kind)
|
95
|
+
|
96
|
+
df.loc[location, 'val' ] = val
|
97
|
+
df.loc[location, 'low' ] = low
|
98
|
+
df.loc[location, 'high'] = high
|
99
|
+
# --------------------------------
|
100
|
+
ParameterLibrary._load_data()
|