data-manipulation-utilities 0.1.6__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/PKG-INFO +102 -3
  2. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/README.md +100 -1
  3. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/pyproject.toml +1 -1
  4. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/PKG-INFO +102 -3
  5. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/SOURCES.txt +4 -0
  6. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/plotting/plotter.py +1 -1
  7. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/plotting/plotter_1d.py +54 -32
  8. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/plotting/plotter_2d.py +1 -1
  9. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/stats/fitter.py +16 -9
  10. data_manipulation_utilities-0.1.9/src/dmu/stats/gof_calculator.py +145 -0
  11. data_manipulation_utilities-0.1.9/src/dmu/stats/minimizers.py +183 -0
  12. data_manipulation_utilities-0.1.9/src/dmu/stats/model_factory.py +207 -0
  13. data_manipulation_utilities-0.1.9/src/dmu/stats/zfit_plotter.py +527 -0
  14. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/2d.yaml +7 -1
  15. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/setup.cfg +0 -0
  16. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/dependency_links.txt +0 -0
  17. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/entry_points.txt +0 -0
  18. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/requires.txt +0 -0
  19. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/data_manipulation_utilities.egg-info/top_level.txt +0 -0
  20. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/arrays/utilities.py +0 -0
  21. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/generic/utilities.py +0 -0
  22. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/logging/log_store.py +0 -0
  23. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/ml/cv_classifier.py +0 -0
  24. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/ml/cv_predict.py +0 -0
  25. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/ml/train_mva.py +0 -0
  26. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/ml/utilities.py +0 -0
  27. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/rdataframe/atr_mgr.py +0 -0
  28. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/rdataframe/utilities.py +0 -0
  29. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/rfile/rfprinter.py +0 -0
  30. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/rfile/utilities.py +0 -0
  31. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/stats/function.py +0 -0
  32. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/stats/utilities.py +0 -0
  33. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/testing/utilities.py +0 -0
  34. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu/text/transformer.py +0 -0
  35. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/__init__.py +0 -0
  36. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/ml/tests/train_mva.yaml +0 -0
  37. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/fig_size.yaml +0 -0
  38. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/high_stat.yaml +0 -0
  39. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/name.yaml +0 -0
  40. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/no_bounds.yaml +0 -0
  41. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/simple.yaml +0 -0
  42. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/title.yaml +0 -0
  43. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/plotting/tests/weights.yaml +0 -0
  44. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform.toml +0 -0
  45. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform.txt +0 -0
  46. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform_set.toml +0 -0
  47. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform_set.txt +0 -0
  48. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_data/text/transform_trf.txt +0 -0
  49. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/git/publish +0 -0
  50. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/physics/check_truth.py +0 -0
  51. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/rfile/compare_root_files.py +0 -0
  52. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/rfile/print_trees.py +0 -0
  53. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/ssh/coned.py +0 -0
  54. {data_manipulation_utilities-0.1.6 → data_manipulation_utilities-0.1.9}/src/dmu_scripts/text/transform_text.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: data_manipulation_utilities
3
- Version: 0.1.6
3
+ Version: 0.1.9
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: logzero
6
6
  Requires-Dist: PyYAML
@@ -41,7 +41,7 @@ such that:
41
41
 
42
42
  Then, for each remote it pushes the tags and the commits.
43
43
 
44
- *Why?*
44
+ *Why?*
45
45
 
46
46
  1. Tags should be named as the project's version
47
47
  1. As soon as a new version is created, that version needs to be tagged.
@@ -121,6 +121,24 @@ samples:
121
121
 
122
122
  ## PDFs
123
123
 
124
+ ### Model building
125
+
126
+ In order to do complex fits, one often needs PDFs with many parameters, which need to be added.
127
+ In these PDFs certain parameters (e.g. $\mu$ or $\sigma$) need to be shared. This project provides
128
+ `ModelFactory`, which can do this as shown below:
129
+
130
+ ```python
131
+ from dmu.stats.model_factory import ModelFactory
132
+
133
+ l_pdf = ['cbr'] + 2 * ['cbl']
134
+ l_shr = ['mu', 'sg']
135
+ mod = ModelFactory(obs = Data.obs, l_pdf = l_pdf, l_shared=l_shr)
136
+ pdf = mod.get_pdf()
137
+ ```
138
+
139
+ where the model is a sum of three `CrystallBall` PDFs, one with a right tail and two with a left tail.
140
+ The `mu` and `sg` parameters are shared.
141
+
124
142
  ### Printing PDFs
125
143
 
126
144
  One can print a zfit PDF by doing:
@@ -231,6 +249,87 @@ likelihood :
231
249
  nbins : 100 #If specified, will do binned likelihood fit instead of unbinned
232
250
  ```
233
251
 
252
+ ## Minimizers
253
+
254
+ These are alternative implementations of the minimizers in zfit meant to be used for special types of fits.
255
+
256
+ ### Anealing minimizer
257
+
258
+ This minimizer is meant to be used for fits to models with many parameters, where multiple minima are expected in the
259
+ likelihood. The minimizer use is illustrated in:
260
+
261
+ ```python
262
+ from dmu.stats.minimizers import AnealingMinimizer
263
+
264
+ nll = _get_nll()
265
+ minimizer = AnealingMinimizer(ntries=10, pvalue=0.05)
266
+ res = minimizer.minimize(nll)
267
+ ```
268
+
269
+ this will:
270
+
271
+ - Take the `NLL` object.
272
+ - Try fitting at most 10 times
273
+ - After each fit, calculate the goodness of fit (in this case the p-value)
274
+ - Stop when the number of tries has been exhausted or the p-value reached is higher than `0.05`
275
+ - If the fit has not succeeded because of convergence, validity or goodness of fit issues,
276
+ randomize the parameters and try again.
277
+ - If the desired goodness of fit has not been achieved, pick the best result.
278
+ - Return the `FitResult` object and set the PDF to the final fit result.
279
+
280
+ The $\chi^2/Ndof$ can also be used as in:
281
+
282
+ ```python
283
+ from dmu.stats.minimizers import AnealingMinimizer
284
+
285
+ nll = _get_nll()
286
+ minimizer = AnealingMinimizer(ntries=10, chi2ndof=1.00)
287
+ res = minimizer.minimize(nll)
288
+ ```
289
+
290
+ ## Fit plotting
291
+
292
+ The class `ZFitPlotter` can be used to plot fits done with zfit. For a complete set of examples of how to use
293
+ this class check the [tests](tests/stats/test_fit_plotter.py). A simple example of its usage is below:
294
+
295
+ ```python
296
+ from dmu.stats.zfit_plotter import ZFitPlotter
297
+
298
+ obs = zfit.Space('m', limits=(0, 10))
299
+
300
+ # Create signal PDF
301
+ mu = zfit.Parameter("mu", 5.0, 0, 10)
302
+ sg = zfit.Parameter("sg", 0.5, 0, 5)
303
+ sig = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sg)
304
+ nsg = zfit.Parameter('nsg', 1000, 0, 10000)
305
+ esig= sig.create_extended(nsg, name='gauss')
306
+
307
+ # Create background PDF
308
+ lm = zfit.Parameter('lm', -0.1, -1, 0)
309
+ bkg = zfit.pdf.Exponential(obs=obs, lam=lm)
310
+ nbk = zfit.Parameter('nbk', 1000, 0, 10000)
311
+ ebkg= bkg.create_extended(nbk, name='expo')
312
+
313
+ # Add them
314
+ pdf = zfit.pdf.SumPDF([ebkg, esig])
315
+ sam = pdf.create_sampler()
316
+
317
+ # Plot them
318
+ obj = ZFitPlotter(data=sam, model=pdf)
319
+ d_leg = {'gauss': 'New Gauss'}
320
+ obj.plot(nbins=50, d_leg=d_leg, stacked=True, plot_range=(0, 10), ext_text='Extra text here')
321
+
322
+ # add a line to pull hist
323
+ obj.axs[1].plot([0, 10], [0, 0], linestyle='--', color='black')
324
+ ```
325
+
326
+ this class supports:
327
+
328
+ - Handling title, legend, plots size.
329
+ - Adding pulls.
330
+ - Stacking and overlaying of PDFs.
331
+ - Blinding.
332
+
234
333
  ## Arrays
235
334
 
236
335
  ### Scaling by non-integer
@@ -21,7 +21,7 @@ such that:
21
21
 
22
22
  Then, for each remote it pushes the tags and the commits.
23
23
 
24
- *Why?*
24
+ *Why?*
25
25
 
26
26
  1. Tags should be named as the project's version
27
27
  1. As soon as a new version is created, that version needs to be tagged.
@@ -101,6 +101,24 @@ samples:
101
101
 
102
102
  ## PDFs
103
103
 
104
+ ### Model building
105
+
106
+ In order to do complex fits, one often needs PDFs with many parameters, which need to be added.
107
+ In these PDFs certain parameters (e.g. $\mu$ or $\sigma$) need to be shared. This project provides
108
+ `ModelFactory`, which can do this as shown below:
109
+
110
+ ```python
111
+ from dmu.stats.model_factory import ModelFactory
112
+
113
+ l_pdf = ['cbr'] + 2 * ['cbl']
114
+ l_shr = ['mu', 'sg']
115
+ mod = ModelFactory(obs = Data.obs, l_pdf = l_pdf, l_shared=l_shr)
116
+ pdf = mod.get_pdf()
117
+ ```
118
+
119
+ where the model is a sum of three `CrystallBall` PDFs, one with a right tail and two with a left tail.
120
+ The `mu` and `sg` parameters are shared.
121
+
104
122
  ### Printing PDFs
105
123
 
106
124
  One can print a zfit PDF by doing:
@@ -211,6 +229,87 @@ likelihood :
211
229
  nbins : 100 #If specified, will do binned likelihood fit instead of unbinned
212
230
  ```
213
231
 
232
+ ## Minimizers
233
+
234
+ These are alternative implementations of the minimizers in zfit meant to be used for special types of fits.
235
+
236
+ ### Anealing minimizer
237
+
238
+ This minimizer is meant to be used for fits to models with many parameters, where multiple minima are expected in the
239
+ likelihood. The minimizer use is illustrated in:
240
+
241
+ ```python
242
+ from dmu.stats.minimizers import AnealingMinimizer
243
+
244
+ nll = _get_nll()
245
+ minimizer = AnealingMinimizer(ntries=10, pvalue=0.05)
246
+ res = minimizer.minimize(nll)
247
+ ```
248
+
249
+ this will:
250
+
251
+ - Take the `NLL` object.
252
+ - Try fitting at most 10 times
253
+ - After each fit, calculate the goodness of fit (in this case the p-value)
254
+ - Stop when the number of tries has been exhausted or the p-value reached is higher than `0.05`
255
+ - If the fit has not succeeded because of convergence, validity or goodness of fit issues,
256
+ randomize the parameters and try again.
257
+ - If the desired goodness of fit has not been achieved, pick the best result.
258
+ - Return the `FitResult` object and set the PDF to the final fit result.
259
+
260
+ The $\chi^2/Ndof$ can also be used as in:
261
+
262
+ ```python
263
+ from dmu.stats.minimizers import AnealingMinimizer
264
+
265
+ nll = _get_nll()
266
+ minimizer = AnealingMinimizer(ntries=10, chi2ndof=1.00)
267
+ res = minimizer.minimize(nll)
268
+ ```
269
+
270
+ ## Fit plotting
271
+
272
+ The class `ZFitPlotter` can be used to plot fits done with zfit. For a complete set of examples of how to use
273
+ this class check the [tests](tests/stats/test_fit_plotter.py). A simple example of its usage is below:
274
+
275
+ ```python
276
+ from dmu.stats.zfit_plotter import ZFitPlotter
277
+
278
+ obs = zfit.Space('m', limits=(0, 10))
279
+
280
+ # Create signal PDF
281
+ mu = zfit.Parameter("mu", 5.0, 0, 10)
282
+ sg = zfit.Parameter("sg", 0.5, 0, 5)
283
+ sig = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sg)
284
+ nsg = zfit.Parameter('nsg', 1000, 0, 10000)
285
+ esig= sig.create_extended(nsg, name='gauss')
286
+
287
+ # Create background PDF
288
+ lm = zfit.Parameter('lm', -0.1, -1, 0)
289
+ bkg = zfit.pdf.Exponential(obs=obs, lam=lm)
290
+ nbk = zfit.Parameter('nbk', 1000, 0, 10000)
291
+ ebkg= bkg.create_extended(nbk, name='expo')
292
+
293
+ # Add them
294
+ pdf = zfit.pdf.SumPDF([ebkg, esig])
295
+ sam = pdf.create_sampler()
296
+
297
+ # Plot them
298
+ obj = ZFitPlotter(data=sam, model=pdf)
299
+ d_leg = {'gauss': 'New Gauss'}
300
+ obj.plot(nbins=50, d_leg=d_leg, stacked=True, plot_range=(0, 10), ext_text='Extra text here')
301
+
302
+ # add a line to pull hist
303
+ obj.axs[1].plot([0, 10], [0, 0], linestyle='--', color='black')
304
+ ```
305
+
306
+ this class supports:
307
+
308
+ - Handling title, legend, plots size.
309
+ - Adding pulls.
310
+ - Stacking and overlaying of PDFs.
311
+ - Blinding.
312
+
214
313
  ## Arrays
215
314
 
216
315
  ### Scaling by non-integer
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = 'data_manipulation_utilities'
3
- version = '0.1.6'
3
+ version = '0.1.9'
4
4
  readme = 'README.md'
5
5
  dependencies= [
6
6
  'logzero',
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: data_manipulation_utilities
3
- Version: 0.1.6
3
+ Version: 0.1.9
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: logzero
6
6
  Requires-Dist: PyYAML
@@ -41,7 +41,7 @@ such that:
41
41
 
42
42
  Then, for each remote it pushes the tags and the commits.
43
43
 
44
- *Why?*
44
+ *Why?*
45
45
 
46
46
  1. Tags should be named as the project's version
47
47
  1. As soon as a new version is created, that version needs to be tagged.
@@ -121,6 +121,24 @@ samples:
121
121
 
122
122
  ## PDFs
123
123
 
124
+ ### Model building
125
+
126
+ In order to do complex fits, one often needs PDFs with many parameters, which need to be added.
127
+ In these PDFs certain parameters (e.g. $\mu$ or $\sigma$) need to be shared. This project provides
128
+ `ModelFactory`, which can do this as shown below:
129
+
130
+ ```python
131
+ from dmu.stats.model_factory import ModelFactory
132
+
133
+ l_pdf = ['cbr'] + 2 * ['cbl']
134
+ l_shr = ['mu', 'sg']
135
+ mod = ModelFactory(obs = Data.obs, l_pdf = l_pdf, l_shared=l_shr)
136
+ pdf = mod.get_pdf()
137
+ ```
138
+
139
+ where the model is a sum of three `CrystallBall` PDFs, one with a right tail and two with a left tail.
140
+ The `mu` and `sg` parameters are shared.
141
+
124
142
  ### Printing PDFs
125
143
 
126
144
  One can print a zfit PDF by doing:
@@ -231,6 +249,87 @@ likelihood :
231
249
  nbins : 100 #If specified, will do binned likelihood fit instead of unbinned
232
250
  ```
233
251
 
252
+ ## Minimizers
253
+
254
+ These are alternative implementations of the minimizers in zfit meant to be used for special types of fits.
255
+
256
+ ### Anealing minimizer
257
+
258
+ This minimizer is meant to be used for fits to models with many parameters, where multiple minima are expected in the
259
+ likelihood. The minimizer use is illustrated in:
260
+
261
+ ```python
262
+ from dmu.stats.minimizers import AnealingMinimizer
263
+
264
+ nll = _get_nll()
265
+ minimizer = AnealingMinimizer(ntries=10, pvalue=0.05)
266
+ res = minimizer.minimize(nll)
267
+ ```
268
+
269
+ this will:
270
+
271
+ - Take the `NLL` object.
272
+ - Try fitting at most 10 times
273
+ - After each fit, calculate the goodness of fit (in this case the p-value)
274
+ - Stop when the number of tries has been exhausted or the p-value reached is higher than `0.05`
275
+ - If the fit has not succeeded because of convergence, validity or goodness of fit issues,
276
+ randomize the parameters and try again.
277
+ - If the desired goodness of fit has not been achieved, pick the best result.
278
+ - Return the `FitResult` object and set the PDF to the final fit result.
279
+
280
+ The $\chi^2/Ndof$ can also be used as in:
281
+
282
+ ```python
283
+ from dmu.stats.minimizers import AnealingMinimizer
284
+
285
+ nll = _get_nll()
286
+ minimizer = AnealingMinimizer(ntries=10, chi2ndof=1.00)
287
+ res = minimizer.minimize(nll)
288
+ ```
289
+
290
+ ## Fit plotting
291
+
292
+ The class `ZFitPlotter` can be used to plot fits done with zfit. For a complete set of examples of how to use
293
+ this class check the [tests](tests/stats/test_fit_plotter.py). A simple example of its usage is below:
294
+
295
+ ```python
296
+ from dmu.stats.zfit_plotter import ZFitPlotter
297
+
298
+ obs = zfit.Space('m', limits=(0, 10))
299
+
300
+ # Create signal PDF
301
+ mu = zfit.Parameter("mu", 5.0, 0, 10)
302
+ sg = zfit.Parameter("sg", 0.5, 0, 5)
303
+ sig = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sg)
304
+ nsg = zfit.Parameter('nsg', 1000, 0, 10000)
305
+ esig= sig.create_extended(nsg, name='gauss')
306
+
307
+ # Create background PDF
308
+ lm = zfit.Parameter('lm', -0.1, -1, 0)
309
+ bkg = zfit.pdf.Exponential(obs=obs, lam=lm)
310
+ nbk = zfit.Parameter('nbk', 1000, 0, 10000)
311
+ ebkg= bkg.create_extended(nbk, name='expo')
312
+
313
+ # Add them
314
+ pdf = zfit.pdf.SumPDF([ebkg, esig])
315
+ sam = pdf.create_sampler()
316
+
317
+ # Plot them
318
+ obj = ZFitPlotter(data=sam, model=pdf)
319
+ d_leg = {'gauss': 'New Gauss'}
320
+ obj.plot(nbins=50, d_leg=d_leg, stacked=True, plot_range=(0, 10), ext_text='Extra text here')
321
+
322
+ # add a line to pull hist
323
+ obj.axs[1].plot([0, 10], [0, 0], linestyle='--', color='black')
324
+ ```
325
+
326
+ this class supports:
327
+
328
+ - Handling title, legend, plots size.
329
+ - Adding pulls.
330
+ - Stacking and overlaying of PDFs.
331
+ - Blinding.
332
+
234
333
  ## Arrays
235
334
 
236
335
  ### Scaling by non-integer
@@ -22,7 +22,11 @@ src/dmu/rfile/rfprinter.py
22
22
  src/dmu/rfile/utilities.py
23
23
  src/dmu/stats/fitter.py
24
24
  src/dmu/stats/function.py
25
+ src/dmu/stats/gof_calculator.py
26
+ src/dmu/stats/minimizers.py
27
+ src/dmu/stats/model_factory.py
25
28
  src/dmu/stats/utilities.py
29
+ src/dmu/stats/zfit_plotter.py
26
30
  src/dmu/testing/utilities.py
27
31
  src/dmu/text/transformer.py
28
32
  src/dmu_data/__init__.py
@@ -65,7 +65,7 @@ class Plotter:
65
65
 
66
66
  return minx, maxx
67
67
  #-------------------------------------
68
- def _preprocess_rdf(self, rdf):
68
+ def _preprocess_rdf(self, rdf : RDataFrame) -> RDataFrame:
69
69
  '''
70
70
  rdf (RDataFrame): ROOT dataframe
71
71
 
@@ -2,6 +2,9 @@
2
2
  Module containing plotter class
3
3
  '''
4
4
 
5
+ import hist
6
+ from hist import Hist
7
+
5
8
  import numpy
6
9
  import matplotlib.pyplot as plt
7
10
 
@@ -33,58 +36,75 @@ class Plotter1D(Plotter):
33
36
 
34
37
  return xname, yname
35
38
  #-------------------------------------
36
- def _plot_var(self, var):
39
+ def _is_normalized(self, var : str) -> bool:
40
+ d_cfg = self._d_cfg['plots'][var]
41
+ normalized=False
42
+ if 'normalized' in d_cfg:
43
+ normalized = d_cfg['normalized']
44
+
45
+ return normalized
46
+ #-------------------------------------
47
+ def _get_binning(self, var : str, d_data : dict[str, numpy.ndarray]) -> tuple[float, float, int]:
48
+ d_cfg = self._d_cfg['plots'][var]
49
+ minx, maxx, bins = d_cfg['binning']
50
+ if maxx <= minx + 1e-5:
51
+ log.info(f'Bounds not set for {var}, will calculated them')
52
+ minx, maxx = self._find_bounds(d_data = d_data, qnt=minx)
53
+ log.info(f'Using bounds [{minx:.3e}, {maxx:.3e}]')
54
+ else:
55
+ log.debug(f'Using bounds [{minx:.3e}, {maxx:.3e}]')
56
+
57
+ return minx, maxx, bins
58
+ #-------------------------------------
59
+ def _plot_var(self, var : str) -> float:
37
60
  '''
38
61
  Will plot a variable from a dictionary of dataframes
39
62
  Parameters
40
63
  --------------------
41
64
  var (str) : name of column
65
+
66
+ Return
67
+ --------------------
68
+ Largest bin content among all bins and among all histograms plotted
42
69
  '''
43
70
  # pylint: disable=too-many-locals
44
71
 
45
- d_cfg = self._d_cfg['plots'][var]
46
-
47
- minx, maxx, bins = d_cfg['binning']
48
- yscale = d_cfg['yscale' ] if 'yscale' in d_cfg else 'linear'
49
- xname, yname = self._get_labels(var)
50
-
51
- normalized=False
52
- if 'normalized' in d_cfg:
53
- normalized = d_cfg['normalized']
54
-
55
- title = ''
56
- if 'title' in d_cfg:
57
- title = d_cfg['title']
58
-
59
72
  d_data = {}
60
73
  for name, rdf in self._d_rdf.items():
61
74
  d_data[name] = rdf.AsNumpy([var])[var]
62
75
 
63
- if maxx <= minx + 1e-5:
64
- log.info(f'Bounds not set for {var}, will calculated them')
65
- minx, maxx = self._find_bounds(d_data = d_data, qnt=minx)
66
- log.info(f'Using bounds [{minx:.3e}, {maxx:.3e}]')
67
- else:
68
- log.debug(f'Using bounds [{minx:.3e}, {maxx:.3e}]')
76
+ minx, maxx, bins = self._get_binning(var, d_data)
77
+ d_wgt = self._get_weights(var)
69
78
 
70
79
  l_bc_all = []
71
- d_wgt = self._get_weights(var)
72
80
  for name, arr_val in d_data.items():
73
- arr_wgt = d_wgt[name] if d_wgt is not None else None
74
-
75
- self._print_weights(arr_wgt, var, name)
76
- l_bc, _, _ = plt.hist(arr_val, weights=arr_wgt, bins=bins, range=(minx, maxx), density=normalized, histtype='step', label=name)
77
- l_bc_all += numpy.array(l_bc).tolist()
81
+ arr_wgt = d_wgt[name] if d_wgt is not None else numpy.ones_like(arr_val)
82
+ hst = Hist.new.Reg(bins=bins, start=minx, stop=maxx, name='x', label=name).Weight()
83
+ hst.fill(x=arr_val, weight=arr_wgt)
84
+ hst.plot(label=name)
85
+ l_bc_all += hst.values().tolist()
78
86
 
79
- plt.yscale(yscale)
80
- plt.xlabel(xname)
81
- plt.ylabel(yname)
87
+ max_y = max(l_bc_all)
82
88
 
89
+ return max_y
90
+ # --------------------------------------------
91
+ def _style_plot(self, var : str, max_y : float) -> None:
92
+ d_cfg = self._d_cfg['plots'][var]
93
+ yscale = d_cfg['yscale' ] if 'yscale' in d_cfg else 'linear'
94
+
95
+ xname, yname = self._get_labels(var)
96
+ plt.xlabel(xname)
97
+ plt.ylabel(yname)
98
+ plt.yscale(yscale)
83
99
  if yscale == 'linear':
84
100
  plt.ylim(bottom=0)
85
101
 
86
- max_y = max(l_bc_all)
102
+ title = ''
103
+ if 'title' in d_cfg:
104
+ title = d_cfg['title']
105
+
87
106
  plt.ylim(top=1.2 * max_y)
107
+ plt.legend()
88
108
  plt.title(title)
89
109
  # --------------------------------------------
90
110
  def _plot_lines(self, var : str):
@@ -106,8 +126,10 @@ class Plotter1D(Plotter):
106
126
  fig_size = self._get_fig_size()
107
127
  for var in self._d_cfg['plots']:
108
128
  log.debug(f'Plotting: {var}')
129
+
109
130
  plt.figure(var, figsize=fig_size)
110
- self._plot_var(var)
131
+ max_y = self._plot_var(var)
132
+ self._style_plot(var, max_y)
111
133
  self._plot_lines(var)
112
134
  self._save_plot(var)
113
135
  # --------------------------------------------
@@ -31,8 +31,8 @@ class Plotter2D(Plotter):
31
31
  if not isinstance(cfg, dict):
32
32
  raise ValueError('Config dictionary not passed')
33
33
 
34
- self._rdf : RDataFrame = rdf
35
34
  self._d_cfg : dict = cfg
35
+ self._rdf : RDataFrame = super()._preprocess_rdf(rdf)
36
36
 
37
37
  self._wgt : numpy.ndarray
38
38
  # --------------------------------------------
@@ -4,6 +4,7 @@ Module holding zfitter class
4
4
 
5
5
  import pprint
6
6
  from typing import Union
7
+ from functools import lru_cache
7
8
 
8
9
  import numpy
9
10
  import zfit
@@ -100,8 +101,8 @@ class Fitter:
100
101
 
101
102
  return data
102
103
  #------------------------------
103
- def _bin_pdf(self, nbins):
104
- [[min_x]], [[max_x]] = self._pdf.space.limits
104
+ def _bin_pdf(self):
105
+ nbins, min_x, max_x = self._get_binning()
105
106
  _, arr_edg = numpy.histogram(self._data_np, bins = nbins, range=(min_x, max_x))
106
107
 
107
108
  size = arr_edg.size
@@ -117,23 +118,29 @@ class Fitter:
117
118
 
118
119
  return numpy.array(l_bc)
119
120
  #------------------------------
121
+ def _bin_data(self):
122
+ nbins, min_x, max_x = self._get_binning()
123
+ arr_data, _ = numpy.histogram(self._data_np, bins = nbins, range=(min_x, max_x))
124
+ arr_data = arr_data.astype(float)
125
+
126
+ return arr_data
127
+ #------------------------------
128
+ @lru_cache(maxsize=10)
120
129
  def _get_binning(self):
121
130
  min_x = numpy.min(self._data_np)
122
131
  max_x = numpy.max(self._data_np)
123
132
  nbins = self._ndof + self._get_float_pars()
124
133
 
134
+ log.debug(f'Nbins: {nbins}')
135
+ log.debug(f'Range: [{min_x:.3f}, {max_x:.3f}]')
136
+
125
137
  return nbins, min_x, max_x
126
138
  #------------------------------
127
139
  def _calc_gof(self):
128
140
  log.debug('Calculating GOF')
129
- nbins, min_x, max_x = self._get_binning()
130
141
 
131
- log.debug(f'Nbins: {nbins}')
132
- log.debug(f'Range: [{min_x:.3f}, {max_x:.3f}]')
133
-
134
- arr_data, _ = numpy.histogram(self._data_np, bins = nbins, range=(min_x, max_x))
135
- arr_data = arr_data.astype(float)
136
- arr_modl = self._bin_pdf(nbins)
142
+ arr_data = self._bin_data()
143
+ arr_modl = self._bin_pdf()
137
144
  norm = numpy.sum(arr_data) / numpy.sum(arr_modl)
138
145
  arr_modl = norm * arr_modl
139
146
  arr_res = arr_modl - arr_data