data-manipulation-utilities 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.1.6.dist-info → data_manipulation_utilities-0.1.9.dist-info}/METADATA +102 -3
- {data_manipulation_utilities-0.1.6.dist-info → data_manipulation_utilities-0.1.9.dist-info}/RECORD +15 -11
- {data_manipulation_utilities-0.1.6.dist-info → data_manipulation_utilities-0.1.9.dist-info}/WHEEL +1 -1
- dmu/plotting/plotter.py +1 -1
- dmu/plotting/plotter_1d.py +54 -32
- dmu/plotting/plotter_2d.py +1 -1
- dmu/stats/fitter.py +16 -9
- dmu/stats/gof_calculator.py +145 -0
- dmu/stats/minimizers.py +183 -0
- dmu/stats/model_factory.py +207 -0
- dmu/stats/zfit_plotter.py +527 -0
- dmu_data/plotting/tests/2d.yaml +7 -1
- {data_manipulation_utilities-0.1.6.data → data_manipulation_utilities-0.1.9.data}/scripts/publish +0 -0
- {data_manipulation_utilities-0.1.6.dist-info → data_manipulation_utilities-0.1.9.dist-info}/entry_points.txt +0 -0
- {data_manipulation_utilities-0.1.6.dist-info → data_manipulation_utilities-0.1.9.dist-info}/top_level.txt +0 -0
dmu/stats/minimizers.py
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
'''
|
2
|
+
Module containing derived classes from ZFit minimizer
|
3
|
+
'''
|
4
|
+
import numpy
|
5
|
+
|
6
|
+
import zfit
|
7
|
+
from zfit.result import FitResult
|
8
|
+
from zfit.core.basepdf import BasePDF as zpdf
|
9
|
+
from zfit.minimizers.baseminimizer import FailMinimizeNaN
|
10
|
+
from dmu.stats.gof_calculator import GofCalculator
|
11
|
+
from dmu.logging.log_store import LogStore
|
12
|
+
|
13
|
+
log = LogStore.add_logger('dmu:ml:minimizers')
|
14
|
+
# ------------------------
|
15
|
+
class AnealingMinimizer(zfit.minimize.Minuit):
|
16
|
+
'''
|
17
|
+
Class meant to minimizer zfit likelihoods by using multiple retries,
|
18
|
+
each retry is preceeded by the randomization of the fitting parameters
|
19
|
+
'''
|
20
|
+
# ------------------------
|
21
|
+
def __init__(self, ntries : int, pvalue : float = -1, chi2ndof : float = -1):
|
22
|
+
'''
|
23
|
+
ntries : Try this number of times
|
24
|
+
pvalue : Stop tries when this threshold is reached
|
25
|
+
chi2ndof: Use this value as a threshold to stop fits
|
26
|
+
'''
|
27
|
+
self._ntries = ntries
|
28
|
+
self._pvalue = pvalue
|
29
|
+
self._chi2ndof = chi2ndof
|
30
|
+
|
31
|
+
self._check_thresholds()
|
32
|
+
|
33
|
+
super().__init__()
|
34
|
+
# ------------------------
|
35
|
+
def _check_thresholds(self) -> None:
|
36
|
+
good_pvalue = 0 <= self._pvalue < 1
|
37
|
+
good_chi2dof = self._chi2ndof > 0
|
38
|
+
|
39
|
+
if good_pvalue and good_chi2dof:
|
40
|
+
raise ValueError('Threshold for both chi2 and pvalue were specified')
|
41
|
+
|
42
|
+
if good_pvalue:
|
43
|
+
log.debug(f'Will use threshold on pvalue with value: {self._pvalue}')
|
44
|
+
return
|
45
|
+
|
46
|
+
if good_chi2dof:
|
47
|
+
log.debug(f'Will use threshold on chi2ndof with value: {self._chi2ndof}')
|
48
|
+
return
|
49
|
+
|
50
|
+
raise ValueError('Neither pvalue nor chi2 thresholds are valid')
|
51
|
+
# ------------------------
|
52
|
+
def _is_good_gof(self, ch2 : float, pvl : float) -> bool:
|
53
|
+
is_good_pval = pvl > self._pvalue and self._pvalue > 0
|
54
|
+
is_good_chi2 = ch2 < self._chi2ndof and self._chi2ndof > 0
|
55
|
+
is_good = is_good_pval or is_good_chi2
|
56
|
+
|
57
|
+
if is_good_pval:
|
58
|
+
log.info(f'Stopping fit, found p-value: {pvl:.3f} > {self._pvalue:.3f}')
|
59
|
+
|
60
|
+
if is_good_chi2:
|
61
|
+
log.info(f'Stopping fit, found chi2/ndof: {ch2:.3f} > {self._chi2ndof:.3f}')
|
62
|
+
|
63
|
+
if not is_good:
|
64
|
+
log.debug(f'Could not read threshold, pvalue/chi2: {pvl:.3f}/{ch2:.3f}')
|
65
|
+
|
66
|
+
return is_good
|
67
|
+
# ------------------------
|
68
|
+
def _is_good_fit(self, res : FitResult) -> bool:
|
69
|
+
if not res.valid:
|
70
|
+
log.warning('Skipping invalid fit')
|
71
|
+
return False
|
72
|
+
|
73
|
+
if res.status != 0:
|
74
|
+
log.warning('Skipping fit with bad status')
|
75
|
+
return False
|
76
|
+
|
77
|
+
if not res.converged:
|
78
|
+
log.warning('Skipping non-converging fit')
|
79
|
+
return False
|
80
|
+
|
81
|
+
return True
|
82
|
+
# ------------------------
|
83
|
+
def _get_gof(self, nll) -> tuple[float, float]:
|
84
|
+
log.debug('Checking GOF')
|
85
|
+
|
86
|
+
gcl = GofCalculator(nll)
|
87
|
+
pvl = gcl.get_gof(kind='pvalue')
|
88
|
+
ch2 = gcl.get_gof(kind='chi2/ndof')
|
89
|
+
|
90
|
+
return ch2, pvl
|
91
|
+
# ------------------------
|
92
|
+
def _randomize_parameters(self, nll):
|
93
|
+
'''
|
94
|
+
Will move floating parameters of PDF according
|
95
|
+
to uniform PDF
|
96
|
+
'''
|
97
|
+
|
98
|
+
log.debug('Randomizing parameters')
|
99
|
+
l_model = nll.model
|
100
|
+
if len(l_model) != 1:
|
101
|
+
raise ValueError('Not found and and only one model')
|
102
|
+
|
103
|
+
model = l_model[0]
|
104
|
+
s_par = model.get_params(floating=True)
|
105
|
+
for par in s_par:
|
106
|
+
ival = par.value()
|
107
|
+
fval = numpy.random.uniform(par.lower, par.upper)
|
108
|
+
par.set_value(fval)
|
109
|
+
log.debug(f'{par.name:<20}{ival:<15.3f}{"->":<10}{fval:<15.3f}{"in":<5}{par.lower:<15.3e}{par.upper:<15.3e}')
|
110
|
+
# ------------------------
|
111
|
+
def _pick_best_fit(self, d_chi2_res : dict) -> FitResult:
|
112
|
+
nres = len(d_chi2_res)
|
113
|
+
if nres == 0:
|
114
|
+
raise ValueError('No fits found')
|
115
|
+
|
116
|
+
l_chi2_res= list(d_chi2_res.items())
|
117
|
+
l_chi2_res.sort()
|
118
|
+
chi2, res = l_chi2_res[0]
|
119
|
+
|
120
|
+
log.warning(f'Picking out best fit from {nres} fits with chi2: {chi2:.3f}')
|
121
|
+
|
122
|
+
return res
|
123
|
+
#------------------------------
|
124
|
+
def _set_pdf_pars(self, res : FitResult, pdf : zpdf) -> None:
|
125
|
+
'''
|
126
|
+
Will set the PDF floating parameter values as the result instance
|
127
|
+
'''
|
128
|
+
l_par_flt = list(pdf.get_params(floating= True))
|
129
|
+
l_par_fix = list(pdf.get_params(floating=False))
|
130
|
+
l_par = l_par_flt + l_par_fix
|
131
|
+
|
132
|
+
d_val = { par.name : dc['value'] for par, dc in res.params.items()}
|
133
|
+
|
134
|
+
log.debug('Setting PDF parameters to best result')
|
135
|
+
for par in l_par:
|
136
|
+
if par.name not in d_val:
|
137
|
+
par_val = par.value().numpy()
|
138
|
+
log.debug(f'Skipping {par.name} = {par_val:.3e}')
|
139
|
+
continue
|
140
|
+
|
141
|
+
val = d_val[par.name]
|
142
|
+
log.debug(f'{"":<4}{par.name:<20}{"->":<10}{val:<20.3e}')
|
143
|
+
par.set_value(val)
|
144
|
+
# ------------------------
|
145
|
+
def _pdf_from_nll(self, nll) -> zpdf:
|
146
|
+
l_model = nll.model
|
147
|
+
if len(l_model) != 1:
|
148
|
+
raise ValueError('Cannot extract one and only one PDF from NLL')
|
149
|
+
|
150
|
+
return l_model[0]
|
151
|
+
# ------------------------
|
152
|
+
def minimize(self, nll, **kwargs) -> FitResult:
|
153
|
+
'''
|
154
|
+
Will run minimization and return FitResult object
|
155
|
+
'''
|
156
|
+
|
157
|
+
d_chi2_res : dict[float,FitResult] = {}
|
158
|
+
for i_try in range(self._ntries):
|
159
|
+
log.info(f'try {i_try:02}/{self._ntries:02}')
|
160
|
+
try:
|
161
|
+
res = super().minimize(nll, **kwargs)
|
162
|
+
except (FailMinimizeNaN, ValueError, RuntimeError) as exc:
|
163
|
+
log.warning(exc)
|
164
|
+
self._randomize_parameters(nll)
|
165
|
+
continue
|
166
|
+
|
167
|
+
if not self._is_good_fit(res):
|
168
|
+
continue
|
169
|
+
|
170
|
+
chi2, pvl = self._get_gof(nll)
|
171
|
+
d_chi2_res[chi2] = res
|
172
|
+
|
173
|
+
if self._is_good_gof(chi2, pvl):
|
174
|
+
return res
|
175
|
+
|
176
|
+
self._randomize_parameters(nll)
|
177
|
+
|
178
|
+
res = self._pick_best_fit(d_chi2_res)
|
179
|
+
pdf = self._pdf_from_nll(nll)
|
180
|
+
self._set_pdf_pars(res, pdf)
|
181
|
+
|
182
|
+
return res
|
183
|
+
# ------------------------
|
@@ -0,0 +1,207 @@
|
|
1
|
+
'''
|
2
|
+
Module storing ZModel class
|
3
|
+
'''
|
4
|
+
# pylint: disable=too-many-lines, import-error
|
5
|
+
|
6
|
+
from typing import Callable, Union
|
7
|
+
|
8
|
+
import zfit
|
9
|
+
from zfit.core.interfaces import ZfitSpace as zobs
|
10
|
+
from zfit.core.basepdf import BasePDF as zpdf
|
11
|
+
from zfit.core.parameter import Parameter as zpar
|
12
|
+
from dmu.logging.log_store import LogStore
|
13
|
+
|
14
|
+
log=LogStore.add_logger('dmu:stats:model_factory')
|
15
|
+
#-----------------------------------------
|
16
|
+
class MethodRegistry:
|
17
|
+
'''
|
18
|
+
Class intended to store protected methods belonging to ModelFactory class
|
19
|
+
which is defined in this same module
|
20
|
+
'''
|
21
|
+
# Registry dictionary to hold methods
|
22
|
+
_d_method = {}
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def register(cls, nickname : str):
|
26
|
+
'''
|
27
|
+
Decorator in charge of registering method for given nickname
|
28
|
+
'''
|
29
|
+
def decorator(method):
|
30
|
+
cls._d_method[nickname] = method
|
31
|
+
return method
|
32
|
+
|
33
|
+
return decorator
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def get_method(cls, nickname : str) -> Union[Callable,None]:
|
37
|
+
'''
|
38
|
+
Will return method in charge of building PDF, for an input nickname
|
39
|
+
'''
|
40
|
+
return cls._d_method.get(nickname, None)
|
41
|
+
#-----------------------------------------
|
42
|
+
class ModelFactory:
|
43
|
+
'''
|
44
|
+
Class used to create Zfit PDFs by passing only the nicknames, e.g.:
|
45
|
+
|
46
|
+
```python
|
47
|
+
from dmu.stats.model_factory import ModelFactory
|
48
|
+
|
49
|
+
l_pdf = ['dscb', 'gauss']
|
50
|
+
l_shr = ['mu']
|
51
|
+
mod = ModelFactory(obs = obs, l_pdf = l_pdf, l_shared=l_shr)
|
52
|
+
pdf = mod.get_pdf()
|
53
|
+
```
|
54
|
+
|
55
|
+
where one can specify which parameters can be shared among the PDFs
|
56
|
+
'''
|
57
|
+
#-----------------------------------------
|
58
|
+
def __init__(self, obs : zobs, l_pdf : list[str], l_shared : list[str]):
|
59
|
+
'''
|
60
|
+
obs: zfit obserbable
|
61
|
+
l_pdf: List of PDF nicknames which are registered below
|
62
|
+
l_shared: List of parameter names that are shared
|
63
|
+
'''
|
64
|
+
|
65
|
+
self._l_pdf = l_pdf
|
66
|
+
self._l_shr = l_shared
|
67
|
+
self._l_can_be_shared = ['mu', 'sg']
|
68
|
+
self._obs = obs
|
69
|
+
|
70
|
+
self._d_par : dict[str,zpar] = {}
|
71
|
+
#-----------------------------------------
|
72
|
+
def _get_name(self, name : str, suffix : str) -> str:
|
73
|
+
for can_be_shared in self._l_can_be_shared:
|
74
|
+
if name.startswith(f'{can_be_shared}_') and can_be_shared in self._l_shr:
|
75
|
+
return can_be_shared
|
76
|
+
|
77
|
+
return f'{name}{suffix}'
|
78
|
+
#-----------------------------------------
|
79
|
+
def _get_parameter(self,
|
80
|
+
name : str,
|
81
|
+
suffix : str,
|
82
|
+
val : float,
|
83
|
+
low : float,
|
84
|
+
high : float) -> zpar:
|
85
|
+
name = self._get_name(name, suffix)
|
86
|
+
if name in self._d_par:
|
87
|
+
return self._d_par[name]
|
88
|
+
|
89
|
+
par = zfit.param.Parameter(name, val, low, high)
|
90
|
+
|
91
|
+
self._d_par[name] = par
|
92
|
+
|
93
|
+
return par
|
94
|
+
#-----------------------------------------
|
95
|
+
@MethodRegistry.register('exp')
|
96
|
+
def _get_exponential(self, suffix : str = '') -> zpdf:
|
97
|
+
c = self._get_parameter('c_exp', suffix, -0.005, -0.05, 0.00)
|
98
|
+
pdf = zfit.pdf.Exponential(c, self._obs)
|
99
|
+
|
100
|
+
return pdf
|
101
|
+
#-----------------------------------------
|
102
|
+
@MethodRegistry.register('pol1')
|
103
|
+
def _get_pol1(self, suffix : str = '') -> zpdf:
|
104
|
+
a = self._get_parameter('a_pol1', suffix, -0.005, -0.95, 0.00)
|
105
|
+
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a])
|
106
|
+
|
107
|
+
return pdf
|
108
|
+
#-----------------------------------------
|
109
|
+
@MethodRegistry.register('pol2')
|
110
|
+
def _get_pol2(self, suffix : str = '') -> zpdf:
|
111
|
+
a = self._get_parameter('a_pol2', suffix, -0.005, -0.95, 0.00)
|
112
|
+
b = self._get_parameter('b_pol2', suffix, 0.000, -0.95, 0.95)
|
113
|
+
pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b])
|
114
|
+
|
115
|
+
return pdf
|
116
|
+
#-----------------------------------------
|
117
|
+
@MethodRegistry.register('cbr')
|
118
|
+
def _get_cbr(self, suffix : str = '') -> zpdf:
|
119
|
+
mu = self._get_parameter('mu_cbr', suffix, 5300, 5250, 5350)
|
120
|
+
sg = self._get_parameter('sg_cbr', suffix, 10, 2, 300)
|
121
|
+
ar = self._get_parameter('ac_cbr', suffix, -2, -4., -1.)
|
122
|
+
nr = self._get_parameter('nc_cbr', suffix, 1, 0.5, 5.0)
|
123
|
+
|
124
|
+
pdf = zfit.pdf.CrystalBall(mu, sg, ar, nr, self._obs)
|
125
|
+
|
126
|
+
return pdf
|
127
|
+
#-----------------------------------------
|
128
|
+
@MethodRegistry.register('cbl')
|
129
|
+
def _get_cbl(self, suffix : str = '') -> zpdf:
|
130
|
+
mu = self._get_parameter('mu_cbl', suffix, 5300, 5250, 5350)
|
131
|
+
sg = self._get_parameter('sg_cbl', suffix, 10, 2, 300)
|
132
|
+
al = self._get_parameter('ac_cbl', suffix, 2, 1., 4.)
|
133
|
+
nl = self._get_parameter('nc_cbl', suffix, 1, 0.5, 5.0)
|
134
|
+
|
135
|
+
pdf = zfit.pdf.CrystalBall(mu, sg, al, nl, self._obs)
|
136
|
+
|
137
|
+
return pdf
|
138
|
+
#-----------------------------------------
|
139
|
+
@MethodRegistry.register('gauss')
|
140
|
+
def _get_gauss(self, suffix : str = '') -> zpdf:
|
141
|
+
mu = self._get_parameter('mu_gauss', suffix, 5300, 5250, 5350)
|
142
|
+
sg = self._get_parameter('sg_gauss', suffix, 10, 2, 300)
|
143
|
+
|
144
|
+
pdf = zfit.pdf.Gauss(mu, sg, self._obs)
|
145
|
+
|
146
|
+
return pdf
|
147
|
+
#-----------------------------------------
|
148
|
+
@MethodRegistry.register('dscb')
|
149
|
+
def _get_dscb(self, suffix : str = '') -> zpdf:
|
150
|
+
mu = self._get_parameter('mu_dscb', suffix, 5300, 5250, 5400)
|
151
|
+
sg = self._get_parameter('sg_dscb', suffix, 10, 2, 30)
|
152
|
+
ar = self._get_parameter('ar_dscb', suffix, 1, 0, 5)
|
153
|
+
al = self._get_parameter('al_dscb', suffix, 1, 0, 5)
|
154
|
+
nr = self._get_parameter('nr_dscb', suffix, 2, 1, 5)
|
155
|
+
nl = self._get_parameter('nl_dscb', suffix, 2, 0, 5)
|
156
|
+
|
157
|
+
pdf = zfit.pdf.DoubleCB(mu, sg, al, nl, ar, nr, self._obs)
|
158
|
+
|
159
|
+
return pdf
|
160
|
+
#-----------------------------------------
|
161
|
+
def _get_pdf_types(self) -> list[tuple[str,str]]:
|
162
|
+
d_name_freq = {}
|
163
|
+
|
164
|
+
l_type = []
|
165
|
+
for name in self._l_pdf:
|
166
|
+
if name not in d_name_freq:
|
167
|
+
d_name_freq[name] = 1
|
168
|
+
else:
|
169
|
+
d_name_freq[name]+= 1
|
170
|
+
|
171
|
+
frq = d_name_freq[name]
|
172
|
+
frq = f'_{frq}'
|
173
|
+
|
174
|
+
l_type.append((name, frq))
|
175
|
+
|
176
|
+
return l_type
|
177
|
+
#-----------------------------------------
|
178
|
+
def _get_pdf(self, kind : str, preffix : str) -> zpdf:
|
179
|
+
fun = MethodRegistry.get_method(kind)
|
180
|
+
if fun is None:
|
181
|
+
raise NotImplementedError(f'PDF of type {kind} is not implemented')
|
182
|
+
|
183
|
+
return fun(self, preffix)
|
184
|
+
#-----------------------------------------
|
185
|
+
def _add_pdf(self, l_pdf : list[zpdf]) -> zpdf:
|
186
|
+
nfrc = len(l_pdf)
|
187
|
+
if nfrc == 1:
|
188
|
+
log.debug('Requested only one PDF, skipping sum')
|
189
|
+
return l_pdf[0]
|
190
|
+
|
191
|
+
l_frc= [ zfit.param.Parameter(f'frc_{ifrc + 1}', 0.5, 0, 1) for ifrc in range(nfrc - 1) ]
|
192
|
+
|
193
|
+
pdf = zfit.pdf.SumPDF(l_pdf, fracs=l_frc)
|
194
|
+
|
195
|
+
return pdf
|
196
|
+
#-----------------------------------------
|
197
|
+
def get_pdf(self) -> zpdf:
|
198
|
+
'''
|
199
|
+
Given a list of strings representing PDFs returns the a zfit PDF which is
|
200
|
+
the sum of them
|
201
|
+
'''
|
202
|
+
l_type= self._get_pdf_types()
|
203
|
+
l_pdf = [ self._get_pdf(kind, preffix) for kind, preffix in l_type ]
|
204
|
+
pdf = self._add_pdf(l_pdf)
|
205
|
+
|
206
|
+
return pdf
|
207
|
+
#-----------------------------------------
|