data-manipulation-utilities 0.1.6__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/PKG-INFO +102 -3
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/README.md +100 -1
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/pyproject.toml +1 -1
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/PKG-INFO +102 -3
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/SOURCES.txt +4 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/plotting/plotter.py +1 -1
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/plotting/plotter_1d.py +54 -32
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/plotting/plotter_2d.py +1 -1
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/stats/fitter.py +16 -9
- data_manipulation_utilities-0.1.9/src/dmu/stats/gof_calculator.py +145 -0
- data_manipulation_utilities-0.1.9/src/dmu/stats/minimizers.py +183 -0
- data_manipulation_utilities-0.1.9/src/dmu/stats/model_factory.py +207 -0
- data_manipulation_utilities-0.1.9/src/dmu/stats/zfit_plotter.py +527 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/2d.yaml +7 -1
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/setup.cfg +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/dependency_links.txt +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/entry_points.txt +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/requires.txt +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/top_level.txt +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/arrays/utilities.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/generic/utilities.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/logging/log_store.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/ml/cv_classifier.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/ml/cv_predict.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/ml/train_mva.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/ml/utilities.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/rdataframe/atr_mgr.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/rdataframe/utilities.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/rfile/rfprinter.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/rfile/utilities.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/stats/function.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/stats/utilities.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/testing/utilities.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/text/transformer.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/__init__.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/ml/tests/train_mva.yaml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/fig_size.yaml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/high_stat.yaml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/name.yaml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/no_bounds.yaml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/simple.yaml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/title.yaml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/weights.yaml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform.toml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform.txt +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform_set.toml +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform_set.txt +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform_trf.txt +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/git/publish +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/physics/check_truth.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/rfile/compare_root_files.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/rfile/print_trees.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/ssh/coned.py +0 -0
- {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/text/transform_text.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: data_manipulation_utilities
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Description-Content-Type: text/markdown
|
5
5
|
Requires-Dist: logzero
|
6
6
|
Requires-Dist: PyYAML
|
@@ -41,7 +41,7 @@ such that:
|
|
41
41
|
|
42
42
|
Then, for each remote it pushes the tags and the commits.
|
43
43
|
|
44
|
-
*Why?*
|
44
|
+
*Why?*
|
45
45
|
|
46
46
|
1. Tags should be named as the project's version
|
47
47
|
1. As soon as a new version is created, that version needs to be tagged.
|
@@ -121,6 +121,24 @@ samples:
|
|
121
121
|
|
122
122
|
## PDFs
|
123
123
|
|
124
|
+
### Model building
|
125
|
+
|
126
|
+
In order to do complex fits, one often needs PDFs with many parameters, which need to be added.
|
127
|
+
In these PDFs certain parameters (e.g. $\mu$ or $\sigma$) need to be shared. This project provides
|
128
|
+
`ModelFactory`, which can do this as shown below:
|
129
|
+
|
130
|
+
```python
|
131
|
+
from dmu.stats.model_factory import ModelFactory
|
132
|
+
|
133
|
+
l_pdf = ['cbr'] + 2 * ['cbl']
|
134
|
+
l_shr = ['mu', 'sg']
|
135
|
+
mod = ModelFactory(obs = Data.obs, l_pdf = l_pdf, l_shared=l_shr)
|
136
|
+
pdf = mod.get_pdf()
|
137
|
+
```
|
138
|
+
|
139
|
+
where the model is a sum of three `CrystallBall` PDFs, one with a right tail and two with a left tail.
|
140
|
+
The `mu` and `sg` parameters are shared.
|
141
|
+
|
124
142
|
### Printing PDFs
|
125
143
|
|
126
144
|
One can print a zfit PDF by doing:
|
@@ -231,6 +249,87 @@ likelihood :
|
|
231
249
|
nbins : 100 #If specified, will do binned likelihood fit instead of unbinned
|
232
250
|
```
|
233
251
|
|
252
|
+
## Minimizers
|
253
|
+
|
254
|
+
These are alternative implementations of the minimizers in zfit meant to be used for special types of fits.
|
255
|
+
|
256
|
+
### Anealing minimizer
|
257
|
+
|
258
|
+
This minimizer is meant to be used for fits to models with many parameters, where multiple minima are expected in the
|
259
|
+
likelihood. The minimizer use is illustrated in:
|
260
|
+
|
261
|
+
```python
|
262
|
+
from dmu.stats.minimizers import AnealingMinimizer
|
263
|
+
|
264
|
+
nll = _get_nll()
|
265
|
+
minimizer = AnealingMinimizer(ntries=10, pvalue=0.05)
|
266
|
+
res = minimizer.minimize(nll)
|
267
|
+
```
|
268
|
+
|
269
|
+
this will:
|
270
|
+
|
271
|
+
- Take the `NLL` object.
|
272
|
+
- Try fitting at most 10 times
|
273
|
+
- After each fit, calculate the goodness of fit (in this case the p-value)
|
274
|
+
- Stop when the number of tries has been exhausted or the p-value reached is higher than `0.05`
|
275
|
+
- If the fit has not succeeded because of convergence, validity or goodness of fit issues,
|
276
|
+
randomize the parameters and try again.
|
277
|
+
- If the desired goodness of fit has not been achieved, pick the best result.
|
278
|
+
- Return the `FitResult` object and set the PDF to the final fit result.
|
279
|
+
|
280
|
+
The $\chi^2/Ndof$ can also be used as in:
|
281
|
+
|
282
|
+
```python
|
283
|
+
from dmu.stats.minimizers import AnealingMinimizer
|
284
|
+
|
285
|
+
nll = _get_nll()
|
286
|
+
minimizer = AnealingMinimizer(ntries=10, chi2ndof=1.00)
|
287
|
+
res = minimizer.minimize(nll)
|
288
|
+
```
|
289
|
+
|
290
|
+
## Fit plotting
|
291
|
+
|
292
|
+
The class `ZFitPlotter` can be used to plot fits done with zfit. For a complete set of examples of how to use
|
293
|
+
this class check the [tests](tests/stats/test_fit_plotter.py). A simple example of its usage is below:
|
294
|
+
|
295
|
+
```python
|
296
|
+
from dmu.stats.zfit_plotter import ZFitPlotter
|
297
|
+
|
298
|
+
obs = zfit.Space('m', limits=(0, 10))
|
299
|
+
|
300
|
+
# Create signal PDF
|
301
|
+
mu = zfit.Parameter("mu", 5.0, 0, 10)
|
302
|
+
sg = zfit.Parameter("sg", 0.5, 0, 5)
|
303
|
+
sig = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sg)
|
304
|
+
nsg = zfit.Parameter('nsg', 1000, 0, 10000)
|
305
|
+
esig= sig.create_extended(nsg, name='gauss')
|
306
|
+
|
307
|
+
# Create background PDF
|
308
|
+
lm = zfit.Parameter('lm', -0.1, -1, 0)
|
309
|
+
bkg = zfit.pdf.Exponential(obs=obs, lam=lm)
|
310
|
+
nbk = zfit.Parameter('nbk', 1000, 0, 10000)
|
311
|
+
ebkg= bkg.create_extended(nbk, name='expo')
|
312
|
+
|
313
|
+
# Add them
|
314
|
+
pdf = zfit.pdf.SumPDF([ebkg, esig])
|
315
|
+
sam = pdf.create_sampler()
|
316
|
+
|
317
|
+
# Plot them
|
318
|
+
obj = ZFitPlotter(data=sam, model=pdf)
|
319
|
+
d_leg = {'gauss': 'New Gauss'}
|
320
|
+
obj.plot(nbins=50, d_leg=d_leg, stacked=True, plot_range=(0, 10), ext_text='Extra text here')
|
321
|
+
|
322
|
+
# add a line to pull hist
|
323
|
+
obj.axs[1].plot([0, 10], [0, 0], linestyle='--', color='black')
|
324
|
+
```
|
325
|
+
|
326
|
+
this class supports:
|
327
|
+
|
328
|
+
- Handling title, legend, plots size.
|
329
|
+
- Adding pulls.
|
330
|
+
- Stacking and overlaying of PDFs.
|
331
|
+
- Blinding.
|
332
|
+
|
234
333
|
## Arrays
|
235
334
|
|
236
335
|
### Scaling by non-integer
|
@@ -21,7 +21,7 @@ such that:
|
|
21
21
|
|
22
22
|
Then, for each remote it pushes the tags and the commits.
|
23
23
|
|
24
|
-
*Why?*
|
24
|
+
*Why?*
|
25
25
|
|
26
26
|
1. Tags should be named as the project's version
|
27
27
|
1. As soon as a new version is created, that version needs to be tagged.
|
@@ -101,6 +101,24 @@ samples:
|
|
101
101
|
|
102
102
|
## PDFs
|
103
103
|
|
104
|
+
### Model building
|
105
|
+
|
106
|
+
In order to do complex fits, one often needs PDFs with many parameters, which need to be added.
|
107
|
+
In these PDFs certain parameters (e.g. $\mu$ or $\sigma$) need to be shared. This project provides
|
108
|
+
`ModelFactory`, which can do this as shown below:
|
109
|
+
|
110
|
+
```python
|
111
|
+
from dmu.stats.model_factory import ModelFactory
|
112
|
+
|
113
|
+
l_pdf = ['cbr'] + 2 * ['cbl']
|
114
|
+
l_shr = ['mu', 'sg']
|
115
|
+
mod = ModelFactory(obs = Data.obs, l_pdf = l_pdf, l_shared=l_shr)
|
116
|
+
pdf = mod.get_pdf()
|
117
|
+
```
|
118
|
+
|
119
|
+
where the model is a sum of three `CrystallBall` PDFs, one with a right tail and two with a left tail.
|
120
|
+
The `mu` and `sg` parameters are shared.
|
121
|
+
|
104
122
|
### Printing PDFs
|
105
123
|
|
106
124
|
One can print a zfit PDF by doing:
|
@@ -211,6 +229,87 @@ likelihood :
|
|
211
229
|
nbins : 100 #If specified, will do binned likelihood fit instead of unbinned
|
212
230
|
```
|
213
231
|
|
232
|
+
## Minimizers
|
233
|
+
|
234
|
+
These are alternative implementations of the minimizers in zfit meant to be used for special types of fits.
|
235
|
+
|
236
|
+
### Anealing minimizer
|
237
|
+
|
238
|
+
This minimizer is meant to be used for fits to models with many parameters, where multiple minima are expected in the
|
239
|
+
likelihood. The minimizer use is illustrated in:
|
240
|
+
|
241
|
+
```python
|
242
|
+
from dmu.stats.minimizers import AnealingMinimizer
|
243
|
+
|
244
|
+
nll = _get_nll()
|
245
|
+
minimizer = AnealingMinimizer(ntries=10, pvalue=0.05)
|
246
|
+
res = minimizer.minimize(nll)
|
247
|
+
```
|
248
|
+
|
249
|
+
this will:
|
250
|
+
|
251
|
+
- Take the `NLL` object.
|
252
|
+
- Try fitting at most 10 times
|
253
|
+
- After each fit, calculate the goodness of fit (in this case the p-value)
|
254
|
+
- Stop when the number of tries has been exhausted or the p-value reached is higher than `0.05`
|
255
|
+
- If the fit has not succeeded because of convergence, validity or goodness of fit issues,
|
256
|
+
randomize the parameters and try again.
|
257
|
+
- If the desired goodness of fit has not been achieved, pick the best result.
|
258
|
+
- Return the `FitResult` object and set the PDF to the final fit result.
|
259
|
+
|
260
|
+
The $\chi^2/Ndof$ can also be used as in:
|
261
|
+
|
262
|
+
```python
|
263
|
+
from dmu.stats.minimizers import AnealingMinimizer
|
264
|
+
|
265
|
+
nll = _get_nll()
|
266
|
+
minimizer = AnealingMinimizer(ntries=10, chi2ndof=1.00)
|
267
|
+
res = minimizer.minimize(nll)
|
268
|
+
```
|
269
|
+
|
270
|
+
## Fit plotting
|
271
|
+
|
272
|
+
The class `ZFitPlotter` can be used to plot fits done with zfit. For a complete set of examples of how to use
|
273
|
+
this class check the [tests](tests/stats/test_fit_plotter.py). A simple example of its usage is below:
|
274
|
+
|
275
|
+
```python
|
276
|
+
from dmu.stats.zfit_plotter import ZFitPlotter
|
277
|
+
|
278
|
+
obs = zfit.Space('m', limits=(0, 10))
|
279
|
+
|
280
|
+
# Create signal PDF
|
281
|
+
mu = zfit.Parameter("mu", 5.0, 0, 10)
|
282
|
+
sg = zfit.Parameter("sg", 0.5, 0, 5)
|
283
|
+
sig = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sg)
|
284
|
+
nsg = zfit.Parameter('nsg', 1000, 0, 10000)
|
285
|
+
esig= sig.create_extended(nsg, name='gauss')
|
286
|
+
|
287
|
+
# Create background PDF
|
288
|
+
lm = zfit.Parameter('lm', -0.1, -1, 0)
|
289
|
+
bkg = zfit.pdf.Exponential(obs=obs, lam=lm)
|
290
|
+
nbk = zfit.Parameter('nbk', 1000, 0, 10000)
|
291
|
+
ebkg= bkg.create_extended(nbk, name='expo')
|
292
|
+
|
293
|
+
# Add them
|
294
|
+
pdf = zfit.pdf.SumPDF([ebkg, esig])
|
295
|
+
sam = pdf.create_sampler()
|
296
|
+
|
297
|
+
# Plot them
|
298
|
+
obj = ZFitPlotter(data=sam, model=pdf)
|
299
|
+
d_leg = {'gauss': 'New Gauss'}
|
300
|
+
obj.plot(nbins=50, d_leg=d_leg, stacked=True, plot_range=(0, 10), ext_text='Extra text here')
|
301
|
+
|
302
|
+
# add a line to pull hist
|
303
|
+
obj.axs[1].plot([0, 10], [0, 0], linestyle='--', color='black')
|
304
|
+
```
|
305
|
+
|
306
|
+
this class supports:
|
307
|
+
|
308
|
+
- Handling title, legend, plots size.
|
309
|
+
- Adding pulls.
|
310
|
+
- Stacking and overlaying of PDFs.
|
311
|
+
- Blinding.
|
312
|
+
|
214
313
|
## Arrays
|
215
314
|
|
216
315
|
### Scaling by non-integer
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: data_manipulation_utilities
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Description-Content-Type: text/markdown
|
5
5
|
Requires-Dist: logzero
|
6
6
|
Requires-Dist: PyYAML
|
@@ -41,7 +41,7 @@ such that:
|
|
41
41
|
|
42
42
|
Then, for each remote it pushes the tags and the commits.
|
43
43
|
|
44
|
-
*Why?*
|
44
|
+
*Why?*
|
45
45
|
|
46
46
|
1. Tags should be named as the project's version
|
47
47
|
1. As soon as a new version is created, that version needs to be tagged.
|
@@ -121,6 +121,24 @@ samples:
|
|
121
121
|
|
122
122
|
## PDFs
|
123
123
|
|
124
|
+
### Model building
|
125
|
+
|
126
|
+
In order to do complex fits, one often needs PDFs with many parameters, which need to be added.
|
127
|
+
In these PDFs certain parameters (e.g. $\mu$ or $\sigma$) need to be shared. This project provides
|
128
|
+
`ModelFactory`, which can do this as shown below:
|
129
|
+
|
130
|
+
```python
|
131
|
+
from dmu.stats.model_factory import ModelFactory
|
132
|
+
|
133
|
+
l_pdf = ['cbr'] + 2 * ['cbl']
|
134
|
+
l_shr = ['mu', 'sg']
|
135
|
+
mod = ModelFactory(obs = Data.obs, l_pdf = l_pdf, l_shared=l_shr)
|
136
|
+
pdf = mod.get_pdf()
|
137
|
+
```
|
138
|
+
|
139
|
+
where the model is a sum of three `CrystallBall` PDFs, one with a right tail and two with a left tail.
|
140
|
+
The `mu` and `sg` parameters are shared.
|
141
|
+
|
124
142
|
### Printing PDFs
|
125
143
|
|
126
144
|
One can print a zfit PDF by doing:
|
@@ -231,6 +249,87 @@ likelihood :
|
|
231
249
|
nbins : 100 #If specified, will do binned likelihood fit instead of unbinned
|
232
250
|
```
|
233
251
|
|
252
|
+
## Minimizers
|
253
|
+
|
254
|
+
These are alternative implementations of the minimizers in zfit meant to be used for special types of fits.
|
255
|
+
|
256
|
+
### Anealing minimizer
|
257
|
+
|
258
|
+
This minimizer is meant to be used for fits to models with many parameters, where multiple minima are expected in the
|
259
|
+
likelihood. The minimizer use is illustrated in:
|
260
|
+
|
261
|
+
```python
|
262
|
+
from dmu.stats.minimizers import AnealingMinimizer
|
263
|
+
|
264
|
+
nll = _get_nll()
|
265
|
+
minimizer = AnealingMinimizer(ntries=10, pvalue=0.05)
|
266
|
+
res = minimizer.minimize(nll)
|
267
|
+
```
|
268
|
+
|
269
|
+
this will:
|
270
|
+
|
271
|
+
- Take the `NLL` object.
|
272
|
+
- Try fitting at most 10 times
|
273
|
+
- After each fit, calculate the goodness of fit (in this case the p-value)
|
274
|
+
- Stop when the number of tries has been exhausted or the p-value reached is higher than `0.05`
|
275
|
+
- If the fit has not succeeded because of convergence, validity or goodness of fit issues,
|
276
|
+
randomize the parameters and try again.
|
277
|
+
- If the desired goodness of fit has not been achieved, pick the best result.
|
278
|
+
- Return the `FitResult` object and set the PDF to the final fit result.
|
279
|
+
|
280
|
+
The $\chi^2/Ndof$ can also be used as in:
|
281
|
+
|
282
|
+
```python
|
283
|
+
from dmu.stats.minimizers import AnealingMinimizer
|
284
|
+
|
285
|
+
nll = _get_nll()
|
286
|
+
minimizer = AnealingMinimizer(ntries=10, chi2ndof=1.00)
|
287
|
+
res = minimizer.minimize(nll)
|
288
|
+
```
|
289
|
+
|
290
|
+
## Fit plotting
|
291
|
+
|
292
|
+
The class `ZFitPlotter` can be used to plot fits done with zfit. For a complete set of examples of how to use
|
293
|
+
this class check the [tests](tests/stats/test_fit_plotter.py). A simple example of its usage is below:
|
294
|
+
|
295
|
+
```python
|
296
|
+
from dmu.stats.zfit_plotter import ZFitPlotter
|
297
|
+
|
298
|
+
obs = zfit.Space('m', limits=(0, 10))
|
299
|
+
|
300
|
+
# Create signal PDF
|
301
|
+
mu = zfit.Parameter("mu", 5.0, 0, 10)
|
302
|
+
sg = zfit.Parameter("sg", 0.5, 0, 5)
|
303
|
+
sig = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sg)
|
304
|
+
nsg = zfit.Parameter('nsg', 1000, 0, 10000)
|
305
|
+
esig= sig.create_extended(nsg, name='gauss')
|
306
|
+
|
307
|
+
# Create background PDF
|
308
|
+
lm = zfit.Parameter('lm', -0.1, -1, 0)
|
309
|
+
bkg = zfit.pdf.Exponential(obs=obs, lam=lm)
|
310
|
+
nbk = zfit.Parameter('nbk', 1000, 0, 10000)
|
311
|
+
ebkg= bkg.create_extended(nbk, name='expo')
|
312
|
+
|
313
|
+
# Add them
|
314
|
+
pdf = zfit.pdf.SumPDF([ebkg, esig])
|
315
|
+
sam = pdf.create_sampler()
|
316
|
+
|
317
|
+
# Plot them
|
318
|
+
obj = ZFitPlotter(data=sam, model=pdf)
|
319
|
+
d_leg = {'gauss': 'New Gauss'}
|
320
|
+
obj.plot(nbins=50, d_leg=d_leg, stacked=True, plot_range=(0, 10), ext_text='Extra text here')
|
321
|
+
|
322
|
+
# add a line to pull hist
|
323
|
+
obj.axs[1].plot([0, 10], [0, 0], linestyle='--', color='black')
|
324
|
+
```
|
325
|
+
|
326
|
+
this class supports:
|
327
|
+
|
328
|
+
- Handling title, legend, plots size.
|
329
|
+
- Adding pulls.
|
330
|
+
- Stacking and overlaying of PDFs.
|
331
|
+
- Blinding.
|
332
|
+
|
234
333
|
## Arrays
|
235
334
|
|
236
335
|
### Scaling by non-integer
|
@@ -22,7 +22,11 @@ src/dmu/rfile/rfprinter.py
|
|
22
22
|
src/dmu/rfile/utilities.py
|
23
23
|
src/dmu/stats/fitter.py
|
24
24
|
src/dmu/stats/function.py
|
25
|
+
src/dmu/stats/gof_calculator.py
|
26
|
+
src/dmu/stats/minimizers.py
|
27
|
+
src/dmu/stats/model_factory.py
|
25
28
|
src/dmu/stats/utilities.py
|
29
|
+
src/dmu/stats/zfit_plotter.py
|
26
30
|
src/dmu/testing/utilities.py
|
27
31
|
src/dmu/text/transformer.py
|
28
32
|
src/dmu_data/__init__.py
|
@@ -2,6 +2,9 @@
|
|
2
2
|
Module containing plotter class
|
3
3
|
'''
|
4
4
|
|
5
|
+
import hist
|
6
|
+
from hist import Hist
|
7
|
+
|
5
8
|
import numpy
|
6
9
|
import matplotlib.pyplot as plt
|
7
10
|
|
@@ -33,58 +36,75 @@ class Plotter1D(Plotter):
|
|
33
36
|
|
34
37
|
return xname, yname
|
35
38
|
#-------------------------------------
|
36
|
-
def
|
39
|
+
def _is_normalized(self, var : str) -> bool:
|
40
|
+
d_cfg = self._d_cfg['plots'][var]
|
41
|
+
normalized=False
|
42
|
+
if 'normalized' in d_cfg:
|
43
|
+
normalized = d_cfg['normalized']
|
44
|
+
|
45
|
+
return normalized
|
46
|
+
#-------------------------------------
|
47
|
+
def _get_binning(self, var : str, d_data : dict[str, numpy.ndarray]) -> tuple[float, float, int]:
|
48
|
+
d_cfg = self._d_cfg['plots'][var]
|
49
|
+
minx, maxx, bins = d_cfg['binning']
|
50
|
+
if maxx <= minx + 1e-5:
|
51
|
+
log.info(f'Bounds not set for {var}, will calculated them')
|
52
|
+
minx, maxx = self._find_bounds(d_data = d_data, qnt=minx)
|
53
|
+
log.info(f'Using bounds [{minx:.3e}, {maxx:.3e}]')
|
54
|
+
else:
|
55
|
+
log.debug(f'Using bounds [{minx:.3e}, {maxx:.3e}]')
|
56
|
+
|
57
|
+
return minx, maxx, bins
|
58
|
+
#-------------------------------------
|
59
|
+
def _plot_var(self, var : str) -> float:
|
37
60
|
'''
|
38
61
|
Will plot a variable from a dictionary of dataframes
|
39
62
|
Parameters
|
40
63
|
--------------------
|
41
64
|
var (str) : name of column
|
65
|
+
|
66
|
+
Return
|
67
|
+
--------------------
|
68
|
+
Largest bin content among all bins and among all histograms plotted
|
42
69
|
'''
|
43
70
|
# pylint: disable=too-many-locals
|
44
71
|
|
45
|
-
d_cfg = self._d_cfg['plots'][var]
|
46
|
-
|
47
|
-
minx, maxx, bins = d_cfg['binning']
|
48
|
-
yscale = d_cfg['yscale' ] if 'yscale' in d_cfg else 'linear'
|
49
|
-
xname, yname = self._get_labels(var)
|
50
|
-
|
51
|
-
normalized=False
|
52
|
-
if 'normalized' in d_cfg:
|
53
|
-
normalized = d_cfg['normalized']
|
54
|
-
|
55
|
-
title = ''
|
56
|
-
if 'title' in d_cfg:
|
57
|
-
title = d_cfg['title']
|
58
|
-
|
59
72
|
d_data = {}
|
60
73
|
for name, rdf in self._d_rdf.items():
|
61
74
|
d_data[name] = rdf.AsNumpy([var])[var]
|
62
75
|
|
63
|
-
|
64
|
-
|
65
|
-
minx, maxx = self._find_bounds(d_data = d_data, qnt=minx)
|
66
|
-
log.info(f'Using bounds [{minx:.3e}, {maxx:.3e}]')
|
67
|
-
else:
|
68
|
-
log.debug(f'Using bounds [{minx:.3e}, {maxx:.3e}]')
|
76
|
+
minx, maxx, bins = self._get_binning(var, d_data)
|
77
|
+
d_wgt = self._get_weights(var)
|
69
78
|
|
70
79
|
l_bc_all = []
|
71
|
-
d_wgt = self._get_weights(var)
|
72
80
|
for name, arr_val in d_data.items():
|
73
|
-
arr_wgt
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
l_bc_all
|
81
|
+
arr_wgt = d_wgt[name] if d_wgt is not None else numpy.ones_like(arr_val)
|
82
|
+
hst = Hist.new.Reg(bins=bins, start=minx, stop=maxx, name='x', label=name).Weight()
|
83
|
+
hst.fill(x=arr_val, weight=arr_wgt)
|
84
|
+
hst.plot(label=name)
|
85
|
+
l_bc_all += hst.values().tolist()
|
78
86
|
|
79
|
-
|
80
|
-
plt.xlabel(xname)
|
81
|
-
plt.ylabel(yname)
|
87
|
+
max_y = max(l_bc_all)
|
82
88
|
|
89
|
+
return max_y
|
90
|
+
# --------------------------------------------
|
91
|
+
def _style_plot(self, var : str, max_y : float) -> None:
|
92
|
+
d_cfg = self._d_cfg['plots'][var]
|
93
|
+
yscale = d_cfg['yscale' ] if 'yscale' in d_cfg else 'linear'
|
94
|
+
|
95
|
+
xname, yname = self._get_labels(var)
|
96
|
+
plt.xlabel(xname)
|
97
|
+
plt.ylabel(yname)
|
98
|
+
plt.yscale(yscale)
|
83
99
|
if yscale == 'linear':
|
84
100
|
plt.ylim(bottom=0)
|
85
101
|
|
86
|
-
|
102
|
+
title = ''
|
103
|
+
if 'title' in d_cfg:
|
104
|
+
title = d_cfg['title']
|
105
|
+
|
87
106
|
plt.ylim(top=1.2 * max_y)
|
107
|
+
plt.legend()
|
88
108
|
plt.title(title)
|
89
109
|
# --------------------------------------------
|
90
110
|
def _plot_lines(self, var : str):
|
@@ -106,8 +126,10 @@ class Plotter1D(Plotter):
|
|
106
126
|
fig_size = self._get_fig_size()
|
107
127
|
for var in self._d_cfg['plots']:
|
108
128
|
log.debug(f'Plotting: {var}')
|
129
|
+
|
109
130
|
plt.figure(var, figsize=fig_size)
|
110
|
-
self._plot_var(var)
|
131
|
+
max_y = self._plot_var(var)
|
132
|
+
self._style_plot(var, max_y)
|
111
133
|
self._plot_lines(var)
|
112
134
|
self._save_plot(var)
|
113
135
|
# --------------------------------------------
|
@@ -31,8 +31,8 @@ class Plotter2D(Plotter):
|
|
31
31
|
if not isinstance(cfg, dict):
|
32
32
|
raise ValueError('Config dictionary not passed')
|
33
33
|
|
34
|
-
self._rdf : RDataFrame = rdf
|
35
34
|
self._d_cfg : dict = cfg
|
35
|
+
self._rdf : RDataFrame = super()._preprocess_rdf(rdf)
|
36
36
|
|
37
37
|
self._wgt : numpy.ndarray
|
38
38
|
# --------------------------------------------
|
{data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/stats/fitter.py
RENAMED
@@ -4,6 +4,7 @@ Module holding zfitter class
|
|
4
4
|
|
5
5
|
import pprint
|
6
6
|
from typing import Union
|
7
|
+
from functools import lru_cache
|
7
8
|
|
8
9
|
import numpy
|
9
10
|
import zfit
|
@@ -100,8 +101,8 @@ class Fitter:
|
|
100
101
|
|
101
102
|
return data
|
102
103
|
#------------------------------
|
103
|
-
def _bin_pdf(self
|
104
|
-
|
104
|
+
def _bin_pdf(self):
|
105
|
+
nbins, min_x, max_x = self._get_binning()
|
105
106
|
_, arr_edg = numpy.histogram(self._data_np, bins = nbins, range=(min_x, max_x))
|
106
107
|
|
107
108
|
size = arr_edg.size
|
@@ -117,23 +118,29 @@ class Fitter:
|
|
117
118
|
|
118
119
|
return numpy.array(l_bc)
|
119
120
|
#------------------------------
|
121
|
+
def _bin_data(self):
|
122
|
+
nbins, min_x, max_x = self._get_binning()
|
123
|
+
arr_data, _ = numpy.histogram(self._data_np, bins = nbins, range=(min_x, max_x))
|
124
|
+
arr_data = arr_data.astype(float)
|
125
|
+
|
126
|
+
return arr_data
|
127
|
+
#------------------------------
|
128
|
+
@lru_cache(maxsize=10)
|
120
129
|
def _get_binning(self):
|
121
130
|
min_x = numpy.min(self._data_np)
|
122
131
|
max_x = numpy.max(self._data_np)
|
123
132
|
nbins = self._ndof + self._get_float_pars()
|
124
133
|
|
134
|
+
log.debug(f'Nbins: {nbins}')
|
135
|
+
log.debug(f'Range: [{min_x:.3f}, {max_x:.3f}]')
|
136
|
+
|
125
137
|
return nbins, min_x, max_x
|
126
138
|
#------------------------------
|
127
139
|
def _calc_gof(self):
|
128
140
|
log.debug('Calculating GOF')
|
129
|
-
nbins, min_x, max_x = self._get_binning()
|
130
141
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
arr_data, _ = numpy.histogram(self._data_np, bins = nbins, range=(min_x, max_x))
|
135
|
-
arr_data = arr_data.astype(float)
|
136
|
-
arr_modl = self._bin_pdf(nbins)
|
142
|
+
arr_data = self._bin_data()
|
143
|
+
arr_modl = self._bin_pdf()
|
137
144
|
norm = numpy.sum(arr_data) / numpy.sum(arr_modl)
|
138
145
|
arr_modl = norm * arr_modl
|
139
146
|
arr_res = arr_modl - arr_data
|