data-manipulation-utilities 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ '''
2
+ Module containing derived classes from ZFit minimizer
3
+ '''
4
+ import numpy
5
+
6
+ import zfit
7
+ from zfit.result import FitResult
8
+ from zfit.core.basepdf import BasePDF as zpdf
9
+ from zfit.minimizers.baseminimizer import FailMinimizeNaN
10
+ from dmu.stats.gof_calculator import GofCalculator
11
+ from dmu.logging.log_store import LogStore
12
+
13
+ log = LogStore.add_logger('dmu:ml:minimizers')
14
+ # ------------------------
15
+ class AnealingMinimizer(zfit.minimize.Minuit):
16
+ '''
17
+ Class meant to minimizer zfit likelihoods by using multiple retries,
18
+ each retry is preceeded by the randomization of the fitting parameters
19
+ '''
20
+ # ------------------------
21
+ def __init__(self, ntries : int, pvalue : float = -1, chi2ndof : float = -1):
22
+ '''
23
+ ntries : Try this number of times
24
+ pvalue : Stop tries when this threshold is reached
25
+ chi2ndof: Use this value as a threshold to stop fits
26
+ '''
27
+ self._ntries = ntries
28
+ self._pvalue = pvalue
29
+ self._chi2ndof = chi2ndof
30
+
31
+ self._check_thresholds()
32
+
33
+ super().__init__()
34
+ # ------------------------
35
+ def _check_thresholds(self) -> None:
36
+ good_pvalue = 0 <= self._pvalue < 1
37
+ good_chi2dof = self._chi2ndof > 0
38
+
39
+ if good_pvalue and good_chi2dof:
40
+ raise ValueError('Threshold for both chi2 and pvalue were specified')
41
+
42
+ if good_pvalue:
43
+ log.debug(f'Will use threshold on pvalue with value: {self._pvalue}')
44
+ return
45
+
46
+ if good_chi2dof:
47
+ log.debug(f'Will use threshold on chi2ndof with value: {self._chi2ndof}')
48
+ return
49
+
50
+ raise ValueError('Neither pvalue nor chi2 thresholds are valid')
51
+ # ------------------------
52
+ def _is_good_gof(self, ch2 : float, pvl : float) -> bool:
53
+ is_good_pval = pvl > self._pvalue and self._pvalue > 0
54
+ is_good_chi2 = ch2 < self._chi2ndof and self._chi2ndof > 0
55
+ is_good = is_good_pval or is_good_chi2
56
+
57
+ if is_good_pval:
58
+ log.info(f'Stopping fit, found p-value: {pvl:.3f} > {self._pvalue:.3f}')
59
+
60
+ if is_good_chi2:
61
+ log.info(f'Stopping fit, found chi2/ndof: {ch2:.3f} > {self._chi2ndof:.3f}')
62
+
63
+ if not is_good:
64
+ log.debug(f'Could not read threshold, pvalue/chi2: {pvl:.3f}/{ch2:.3f}')
65
+
66
+ return is_good
67
+ # ------------------------
68
+ def _is_good_fit(self, res : FitResult) -> bool:
69
+ if not res.valid:
70
+ log.warning('Skipping invalid fit')
71
+ return False
72
+
73
+ if res.status != 0:
74
+ log.warning('Skipping fit with bad status')
75
+ return False
76
+
77
+ if not res.converged:
78
+ log.warning('Skipping non-converging fit')
79
+ return False
80
+
81
+ return True
82
+ # ------------------------
83
+ def _get_gof(self, nll) -> tuple[float, float]:
84
+ log.debug('Checking GOF')
85
+
86
+ gcl = GofCalculator(nll)
87
+ pvl = gcl.get_gof(kind='pvalue')
88
+ ch2 = gcl.get_gof(kind='chi2/ndof')
89
+
90
+ return ch2, pvl
91
+ # ------------------------
92
+ def _randomize_parameters(self, nll):
93
+ '''
94
+ Will move floating parameters of PDF according
95
+ to uniform PDF
96
+ '''
97
+
98
+ log.debug('Randomizing parameters')
99
+ l_model = nll.model
100
+ if len(l_model) != 1:
101
+ raise ValueError('Not found and and only one model')
102
+
103
+ model = l_model[0]
104
+ s_par = model.get_params(floating=True)
105
+ for par in s_par:
106
+ ival = par.value()
107
+ fval = numpy.random.uniform(par.lower, par.upper)
108
+ par.set_value(fval)
109
+ log.debug(f'{par.name:<20}{ival:<15.3f}{"->":<10}{fval:<15.3f}{"in":<5}{par.lower:<15.3e}{par.upper:<15.3e}')
110
+ # ------------------------
111
+ def _pick_best_fit(self, d_chi2_res : dict) -> FitResult:
112
+ nres = len(d_chi2_res)
113
+ if nres == 0:
114
+ raise ValueError('No fits found')
115
+
116
+ l_chi2_res= list(d_chi2_res.items())
117
+ l_chi2_res.sort()
118
+ chi2, res = l_chi2_res[0]
119
+
120
+ log.warning(f'Picking out best fit from {nres} fits with chi2: {chi2:.3f}')
121
+
122
+ return res
123
+ #------------------------------
124
+ def _set_pdf_pars(self, res : FitResult, pdf : zpdf) -> None:
125
+ '''
126
+ Will set the PDF floating parameter values as the result instance
127
+ '''
128
+ l_par_flt = list(pdf.get_params(floating= True))
129
+ l_par_fix = list(pdf.get_params(floating=False))
130
+ l_par = l_par_flt + l_par_fix
131
+
132
+ d_val = { par.name : dc['value'] for par, dc in res.params.items()}
133
+
134
+ log.debug('Setting PDF parameters to best result')
135
+ for par in l_par:
136
+ if par.name not in d_val:
137
+ par_val = par.value().numpy()
138
+ log.debug(f'Skipping {par.name} = {par_val:.3e}')
139
+ continue
140
+
141
+ val = d_val[par.name]
142
+ log.debug(f'{"":<4}{par.name:<20}{"->":<10}{val:<20.3e}')
143
+ par.set_value(val)
144
+ # ------------------------
145
+ def _pdf_from_nll(self, nll) -> zpdf:
146
+ l_model = nll.model
147
+ if len(l_model) != 1:
148
+ raise ValueError('Cannot extract one and only one PDF from NLL')
149
+
150
+ return l_model[0]
151
+ # ------------------------
152
+ def minimize(self, nll, **kwargs) -> FitResult:
153
+ '''
154
+ Will run minimization and return FitResult object
155
+ '''
156
+
157
+ d_chi2_res : dict[float,FitResult] = {}
158
+ for i_try in range(self._ntries):
159
+ log.info(f'try {i_try:02}/{self._ntries:02}')
160
+ try:
161
+ res = super().minimize(nll, **kwargs)
162
+ except (FailMinimizeNaN, ValueError, RuntimeError) as exc:
163
+ log.warning(exc)
164
+ self._randomize_parameters(nll)
165
+ continue
166
+
167
+ if not self._is_good_fit(res):
168
+ continue
169
+
170
+ chi2, pvl = self._get_gof(nll)
171
+ d_chi2_res[chi2] = res
172
+
173
+ if self._is_good_gof(chi2, pvl):
174
+ return res
175
+
176
+ self._randomize_parameters(nll)
177
+
178
+ res = self._pick_best_fit(d_chi2_res)
179
+ pdf = self._pdf_from_nll(nll)
180
+ self._set_pdf_pars(res, pdf)
181
+
182
+ return res
183
+ # ------------------------
@@ -0,0 +1,207 @@
1
+ '''
2
+ Module storing ZModel class
3
+ '''
4
+ # pylint: disable=too-many-lines, import-error
5
+
6
+ from typing import Callable, Union
7
+
8
+ import zfit
9
+ from zfit.core.interfaces import ZfitSpace as zobs
10
+ from zfit.core.basepdf import BasePDF as zpdf
11
+ from zfit.core.parameter import Parameter as zpar
12
+ from dmu.logging.log_store import LogStore
13
+
14
+ log=LogStore.add_logger('dmu:stats:model_factory')
15
+ #-----------------------------------------
16
+ class MethodRegistry:
17
+ '''
18
+ Class intended to store protected methods belonging to ModelFactory class
19
+ which is defined in this same module
20
+ '''
21
+ # Registry dictionary to hold methods
22
+ _d_method = {}
23
+
24
+ @classmethod
25
+ def register(cls, nickname : str):
26
+ '''
27
+ Decorator in charge of registering method for given nickname
28
+ '''
29
+ def decorator(method):
30
+ cls._d_method[nickname] = method
31
+ return method
32
+
33
+ return decorator
34
+
35
+ @classmethod
36
+ def get_method(cls, nickname : str) -> Union[Callable,None]:
37
+ '''
38
+ Will return method in charge of building PDF, for an input nickname
39
+ '''
40
+ return cls._d_method.get(nickname, None)
41
+ #-----------------------------------------
42
+ class ModelFactory:
43
+ '''
44
+ Class used to create Zfit PDFs by passing only the nicknames, e.g.:
45
+
46
+ ```python
47
+ from dmu.stats.model_factory import ModelFactory
48
+
49
+ l_pdf = ['dscb', 'gauss']
50
+ l_shr = ['mu']
51
+ mod = ModelFactory(obs = obs, l_pdf = l_pdf, l_shared=l_shr)
52
+ pdf = mod.get_pdf()
53
+ ```
54
+
55
+ where one can specify which parameters can be shared among the PDFs
56
+ '''
57
+ #-----------------------------------------
58
+ def __init__(self, obs : zobs, l_pdf : list[str], l_shared : list[str]):
59
+ '''
60
+ obs: zfit obserbable
61
+ l_pdf: List of PDF nicknames which are registered below
62
+ l_shared: List of parameter names that are shared
63
+ '''
64
+
65
+ self._l_pdf = l_pdf
66
+ self._l_shr = l_shared
67
+ self._l_can_be_shared = ['mu', 'sg']
68
+ self._obs = obs
69
+
70
+ self._d_par : dict[str,zpar] = {}
71
+ #-----------------------------------------
72
+ def _get_name(self, name : str, suffix : str) -> str:
73
+ for can_be_shared in self._l_can_be_shared:
74
+ if name.startswith(f'{can_be_shared}_') and can_be_shared in self._l_shr:
75
+ return can_be_shared
76
+
77
+ return f'{name}{suffix}'
78
+ #-----------------------------------------
79
+ def _get_parameter(self,
80
+ name : str,
81
+ suffix : str,
82
+ val : float,
83
+ low : float,
84
+ high : float) -> zpar:
85
+ name = self._get_name(name, suffix)
86
+ if name in self._d_par:
87
+ return self._d_par[name]
88
+
89
+ par = zfit.param.Parameter(name, val, low, high)
90
+
91
+ self._d_par[name] = par
92
+
93
+ return par
94
+ #-----------------------------------------
95
+ @MethodRegistry.register('exp')
96
+ def _get_exponential(self, suffix : str = '') -> zpdf:
97
+ c = self._get_parameter('c_exp', suffix, -0.005, -0.05, 0.00)
98
+ pdf = zfit.pdf.Exponential(c, self._obs)
99
+
100
+ return pdf
101
+ #-----------------------------------------
102
+ @MethodRegistry.register('pol1')
103
+ def _get_pol1(self, suffix : str = '') -> zpdf:
104
+ a = self._get_parameter('a_pol1', suffix, -0.005, -0.95, 0.00)
105
+ pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a])
106
+
107
+ return pdf
108
+ #-----------------------------------------
109
+ @MethodRegistry.register('pol2')
110
+ def _get_pol2(self, suffix : str = '') -> zpdf:
111
+ a = self._get_parameter('a_pol2', suffix, -0.005, -0.95, 0.00)
112
+ b = self._get_parameter('b_pol2', suffix, 0.000, -0.95, 0.95)
113
+ pdf = zfit.pdf.Chebyshev(obs=self._obs, coeffs=[a, b])
114
+
115
+ return pdf
116
+ #-----------------------------------------
117
+ @MethodRegistry.register('cbr')
118
+ def _get_cbr(self, suffix : str = '') -> zpdf:
119
+ mu = self._get_parameter('mu_cbr', suffix, 5300, 5250, 5350)
120
+ sg = self._get_parameter('sg_cbr', suffix, 10, 2, 300)
121
+ ar = self._get_parameter('ac_cbr', suffix, -2, -4., -1.)
122
+ nr = self._get_parameter('nc_cbr', suffix, 1, 0.5, 5.0)
123
+
124
+ pdf = zfit.pdf.CrystalBall(mu, sg, ar, nr, self._obs)
125
+
126
+ return pdf
127
+ #-----------------------------------------
128
+ @MethodRegistry.register('cbl')
129
+ def _get_cbl(self, suffix : str = '') -> zpdf:
130
+ mu = self._get_parameter('mu_cbl', suffix, 5300, 5250, 5350)
131
+ sg = self._get_parameter('sg_cbl', suffix, 10, 2, 300)
132
+ al = self._get_parameter('ac_cbl', suffix, 2, 1., 4.)
133
+ nl = self._get_parameter('nc_cbl', suffix, 1, 0.5, 5.0)
134
+
135
+ pdf = zfit.pdf.CrystalBall(mu, sg, al, nl, self._obs)
136
+
137
+ return pdf
138
+ #-----------------------------------------
139
+ @MethodRegistry.register('gauss')
140
+ def _get_gauss(self, suffix : str = '') -> zpdf:
141
+ mu = self._get_parameter('mu_gauss', suffix, 5300, 5250, 5350)
142
+ sg = self._get_parameter('sg_gauss', suffix, 10, 2, 300)
143
+
144
+ pdf = zfit.pdf.Gauss(mu, sg, self._obs)
145
+
146
+ return pdf
147
+ #-----------------------------------------
148
+ @MethodRegistry.register('dscb')
149
+ def _get_dscb(self, suffix : str = '') -> zpdf:
150
+ mu = self._get_parameter('mu_dscb', suffix, 5300, 5250, 5400)
151
+ sg = self._get_parameter('sg_dscb', suffix, 10, 2, 30)
152
+ ar = self._get_parameter('ar_dscb', suffix, 1, 0, 5)
153
+ al = self._get_parameter('al_dscb', suffix, 1, 0, 5)
154
+ nr = self._get_parameter('nr_dscb', suffix, 2, 1, 5)
155
+ nl = self._get_parameter('nl_dscb', suffix, 2, 0, 5)
156
+
157
+ pdf = zfit.pdf.DoubleCB(mu, sg, al, nl, ar, nr, self._obs)
158
+
159
+ return pdf
160
+ #-----------------------------------------
161
+ def _get_pdf_types(self) -> list[tuple[str,str]]:
162
+ d_name_freq = {}
163
+
164
+ l_type = []
165
+ for name in self._l_pdf:
166
+ if name not in d_name_freq:
167
+ d_name_freq[name] = 1
168
+ else:
169
+ d_name_freq[name]+= 1
170
+
171
+ frq = d_name_freq[name]
172
+ frq = f'_{frq}'
173
+
174
+ l_type.append((name, frq))
175
+
176
+ return l_type
177
+ #-----------------------------------------
178
+ def _get_pdf(self, kind : str, preffix : str) -> zpdf:
179
+ fun = MethodRegistry.get_method(kind)
180
+ if fun is None:
181
+ raise NotImplementedError(f'PDF of type {kind} is not implemented')
182
+
183
+ return fun(self, preffix)
184
+ #-----------------------------------------
185
+ def _add_pdf(self, l_pdf : list[zpdf]) -> zpdf:
186
+ nfrc = len(l_pdf)
187
+ if nfrc == 1:
188
+ log.debug('Requested only one PDF, skipping sum')
189
+ return l_pdf[0]
190
+
191
+ l_frc= [ zfit.param.Parameter(f'frc_{ifrc + 1}', 0.5, 0, 1) for ifrc in range(nfrc - 1) ]
192
+
193
+ pdf = zfit.pdf.SumPDF(l_pdf, fracs=l_frc)
194
+
195
+ return pdf
196
+ #-----------------------------------------
197
+ def get_pdf(self) -> zpdf:
198
+ '''
199
+ Given a list of strings representing PDFs returns the a zfit PDF which is
200
+ the sum of them
201
+ '''
202
+ l_type= self._get_pdf_types()
203
+ l_pdf = [ self._get_pdf(kind, preffix) for kind, preffix in l_type ]
204
+ pdf = self._add_pdf(l_pdf)
205
+
206
+ return pdf
207
+ #-----------------------------------------