data-manipulation-utilities 0.2.6__py3-none-any.whl → 0.2.8.dev714__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.8.dev714.dist-info}/METADATA +800 -34
- data_manipulation_utilities-0.2.8.dev714.dist-info/RECORD +93 -0
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.8.dev714.dist-info}/WHEEL +1 -1
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.8.dev714.dist-info}/entry_points.txt +1 -0
- dmu/__init__.py +0 -0
- dmu/generic/hashing.py +70 -0
- dmu/generic/utilities.py +175 -9
- dmu/generic/version_management.py +3 -5
- dmu/logging/log_store.py +34 -2
- dmu/logging/messages.py +96 -0
- dmu/ml/cv_classifier.py +3 -3
- dmu/ml/cv_diagnostics.py +224 -0
- dmu/ml/cv_performance.py +58 -0
- dmu/ml/cv_predict.py +149 -46
- dmu/ml/train_mva.py +587 -112
- dmu/ml/utilities.py +29 -10
- dmu/pdataframe/utilities.py +61 -3
- dmu/plotting/fwhm.py +64 -0
- dmu/plotting/matrix.py +1 -1
- dmu/plotting/plotter.py +25 -3
- dmu/plotting/plotter_1d.py +159 -14
- dmu/plotting/plotter_2d.py +5 -0
- dmu/rdataframe/utilities.py +54 -3
- dmu/rfile/ddfgetter.py +102 -0
- dmu/stats/fit_stats.py +129 -0
- dmu/stats/fitter.py +56 -23
- dmu/stats/gof_calculator.py +7 -0
- dmu/stats/model_factory.py +305 -50
- dmu/stats/parameters.py +100 -0
- dmu/stats/utilities.py +443 -12
- dmu/stats/wdata.py +187 -0
- dmu/stats/zfit.py +17 -0
- dmu/stats/zfit_models.py +68 -0
- dmu/stats/zfit_plotter.py +175 -56
- dmu/testing/utilities.py +120 -15
- dmu/workflow/__init__.py +0 -0
- dmu/workflow/cache.py +266 -0
- dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
- dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
- dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
- dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
- dmu_data/ml/tests/train_mva.yaml +20 -12
- dmu_data/ml/tests/train_mva_def.yaml +75 -0
- dmu_data/ml/tests/train_mva_with_diagnostics.yaml +87 -0
- dmu_data/ml/tests/train_mva_with_preffix.yaml +58 -0
- dmu_data/plotting/tests/2d.yaml +5 -5
- dmu_data/plotting/tests/line.yaml +15 -0
- dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
- dmu_data/plotting/tests/plug_stats.yaml +19 -0
- dmu_data/plotting/tests/simple.yaml +4 -3
- dmu_data/plotting/tests/styling.yaml +18 -0
- dmu_data/rfile/friends.yaml +13 -0
- dmu_data/stats/fitter/test_simple.yaml +28 -0
- dmu_data/stats/kde_optimizer/control.json +1 -0
- dmu_data/stats/kde_optimizer/signal.json +1 -0
- dmu_data/stats/parameters/data.yaml +178 -0
- dmu_data/tests/config.json +6 -0
- dmu_data/tests/config.yaml +4 -0
- dmu_data/tests/pdf_to_tex.txt +34 -0
- dmu_scripts/kerberos/check_expiration +21 -0
- dmu_scripts/kerberos/convert_certificate +22 -0
- dmu_scripts/ml/compare_classifiers.py +85 -0
- data_manipulation_utilities-0.2.6.dist-info/RECORD +0 -57
- {data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.8.dev714.data}/scripts/publish +0 -0
- {data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.8.dev714.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,10 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: data_manipulation_utilities
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.8.dev714
|
4
|
+
Summary: Project storing utilities needed to reduce boilerplate code when analyzing data
|
4
5
|
Description-Content-Type: text/markdown
|
5
|
-
|
6
|
-
|
7
|
-
Requires-Dist: scipy
|
8
|
-
Requires-Dist: awkward
|
9
|
-
Requires-Dist: tqdm
|
10
|
-
Requires-Dist: joblib
|
11
|
-
Requires-Dist: scikit-learn
|
12
|
-
Requires-Dist: toml
|
13
|
-
Requires-Dist: numpy
|
14
|
-
Requires-Dist: matplotlib
|
15
|
-
Requires-Dist: mplhep
|
16
|
-
Requires-Dist: hist[plot]
|
17
|
-
Requires-Dist: pandas
|
18
|
-
Provides-Extra: dev
|
19
|
-
Requires-Dist: pytest; extra == "dev"
|
6
|
+
|
7
|
+
[TOC]
|
20
8
|
|
21
9
|
# D(ata) M(anipulation) U(tilities)
|
22
10
|
|
@@ -51,6 +39,190 @@ Then, for each remote it pushes the tags and the commits.
|
|
51
39
|
|
52
40
|
This section describes generic tools that could not be put in a specific category, but tend to be useful.
|
53
41
|
|
42
|
+
## Caching data
|
43
|
+
|
44
|
+
In order to reuse data that is hard to calculate one would need:
|
45
|
+
|
46
|
+
- Serializable data, i.e. strings, floats, lists, etc
|
47
|
+
- A way to get a unique identifier of that data, e.g. a hashable object
|
48
|
+
|
49
|
+
If both are avalable, one can:
|
50
|
+
|
51
|
+
```python
|
52
|
+
import dmu.generic.utilities as gut
|
53
|
+
|
54
|
+
def _get_something() -> float:
|
55
|
+
# This loads the data, if found
|
56
|
+
hashable = arg1, arg2
|
57
|
+
|
58
|
+
ret = gut.load_cached(hash_obj=hashable, on_fail=-999)
|
59
|
+
if ret != -999:
|
60
|
+
return ret
|
61
|
+
|
62
|
+
obj = very_expensive_function(arg1, arg2)
|
63
|
+
|
64
|
+
# This saves the data
|
65
|
+
ret = gut.cache_data(obj, hash_obj=hashable)
|
66
|
+
|
67
|
+
return ret
|
68
|
+
```
|
69
|
+
|
70
|
+
the cached data will go to JSON files in `/tmp/dmu/cache`.
|
71
|
+
|
72
|
+
## Caching with a base class
|
73
|
+
|
74
|
+
Caching functionalities can be added to a class through a base class as in:
|
75
|
+
|
76
|
+
```python
|
77
|
+
from dmu.workflow.cache import Cache as Wcache
|
78
|
+
|
79
|
+
class Tester(Wcache):
|
80
|
+
'''
|
81
|
+
Testing class, produces outputs from simple inputs
|
82
|
+
'''
|
83
|
+
# -----------------------------------
|
84
|
+
def __init__(
|
85
|
+
self,
|
86
|
+
nval : int):
|
87
|
+
'''
|
88
|
+
nval, some integer used to produce output data
|
89
|
+
'''
|
90
|
+
super().__init__(
|
91
|
+
out_path='Tester',
|
92
|
+
nval =nval)
|
93
|
+
|
94
|
+
self._nval = nval
|
95
|
+
# -----------------------------------
|
96
|
+
def run(self) -> None:
|
97
|
+
'''
|
98
|
+
Returns a list of 1's
|
99
|
+
'''
|
100
|
+
# _out_path belongs to the base class
|
101
|
+
obj_path = f'{self._out_path}/values.json'
|
102
|
+
|
103
|
+
if self._copy_from_cache():
|
104
|
+
log.warning('Output cached, not running')
|
105
|
+
return gut.load_json(obj_path)
|
106
|
+
|
107
|
+
log.info('Data not cached, running')
|
108
|
+
res = [1] * self._nval
|
109
|
+
|
110
|
+
gut.dump_json(res, obj_path)
|
111
|
+
self._cache()
|
112
|
+
|
113
|
+
return res
|
114
|
+
|
115
|
+
# This will set the root directory where cached data goes
|
116
|
+
# The data will go to `/some/directory/Tester`
|
117
|
+
# This has to be done ONCE and only ONCE.
|
118
|
+
Wcache.set_cache_root(root='/some/directory')
|
119
|
+
|
120
|
+
obj = Tester(nval=3)
|
121
|
+
...
|
122
|
+
```
|
123
|
+
|
124
|
+
where the tester class has access to extra functionalities to:
|
125
|
+
|
126
|
+
- Cache outputs to a hashed directory
|
127
|
+
- For the next run, check if the directory exists, if so pick
|
128
|
+
the outputs and put them in the output directory
|
129
|
+
- If not rerun the process
|
130
|
+
|
131
|
+
Several hashed directories might exist, like in the diagram:
|
132
|
+
|
133
|
+

|
134
|
+
|
135
|
+
**Important**: This class will also use the hash of the module where the `Test`
|
136
|
+
class is defined. Thus, changes in the code or in the input data, will invalidate the hash.
|
137
|
+
|
138
|
+
### Turning caching off
|
139
|
+
|
140
|
+
This can be done temporarily with:
|
141
|
+
|
142
|
+
```python
|
143
|
+
with Wcache.turn_off_cache(val=['Tester']):
|
144
|
+
obj = Tester(nval=4)
|
145
|
+
out = obj.run()
|
146
|
+
```
|
147
|
+
|
148
|
+
for any list of classes that inherit from `Cache` by passing the list of class names.
|
149
|
+
If `val=None` is passed, ALL the classes caching is turned off.
|
150
|
+
|
151
|
+
## Silencing import messages
|
152
|
+
|
153
|
+
To silence messages given by modules not in the user's control do:
|
154
|
+
|
155
|
+
```python
|
156
|
+
import dmu.generic.utilities as gut
|
157
|
+
|
158
|
+
with gut.silent_import():
|
159
|
+
import tensorflow
|
160
|
+
```
|
161
|
+
|
162
|
+
## Silencing messages going to __stderr__ originating deep from C++ code
|
163
|
+
|
164
|
+
This is an issue with frameworks like `Tensorflow`. Some messages are impossible
|
165
|
+
to kill, which interferes with the debugging process. In order hide selectively
|
166
|
+
those messages, do:
|
167
|
+
|
168
|
+
```python
|
169
|
+
from dmu.logging import messages as mes
|
170
|
+
|
171
|
+
l_msg = ['ONE', 'TWO']
|
172
|
+
with mes.filter_stderr(banned_substrings=l_msg):
|
173
|
+
os.write(2, b'MSG ONE\n')
|
174
|
+
os.write(2, b'MSG TWO\n')
|
175
|
+
os.write(2, b'MSG THREE\n')
|
176
|
+
```
|
177
|
+
|
178
|
+
The context manager above will only allow `THREE` into the error stream.
|
179
|
+
|
180
|
+
## YAML
|
181
|
+
|
182
|
+
When dumping data to yaml files do it like:
|
183
|
+
|
184
|
+
```python
|
185
|
+
import dmu.generic.utilities as gut
|
186
|
+
|
187
|
+
yaml.dump(data, Dumper=gut.BlockStyleDumper)
|
188
|
+
```
|
189
|
+
|
190
|
+
to make sure the indentation is correct.
|
191
|
+
|
192
|
+
## Hashing
|
193
|
+
|
194
|
+
### Hashing python objects
|
195
|
+
|
196
|
+
The snippet below:
|
197
|
+
|
198
|
+
```python
|
199
|
+
from dmu.generic import hashing
|
200
|
+
|
201
|
+
obj = [1, 'name', [1, 'sub', 'list'], {'x' : 1}]
|
202
|
+
val = hashing.hash_object(obj)
|
203
|
+
```
|
204
|
+
|
205
|
+
will:
|
206
|
+
|
207
|
+
- Make the input object into a JSON string
|
208
|
+
- Encode it to utf-8
|
209
|
+
- Make a 64 characters hash out of it
|
210
|
+
|
211
|
+
in two lines, thus keeping the user's code clean.
|
212
|
+
|
213
|
+
### Hashing files
|
214
|
+
|
215
|
+
The following snippet:
|
216
|
+
|
217
|
+
```python
|
218
|
+
from dmu.generic import hashing
|
219
|
+
|
220
|
+
path = '/some/file/path.txt'
|
221
|
+
val = hashing.hash_file(path=obj)
|
222
|
+
```
|
223
|
+
|
224
|
+
should provide a hash to a file, given its path.
|
225
|
+
|
54
226
|
## Timer
|
55
227
|
|
56
228
|
In order to benchmark functions do:
|
@@ -67,9 +239,9 @@ def fun():
|
|
67
239
|
fun()
|
68
240
|
```
|
69
241
|
|
70
|
-
## JSON dumper
|
242
|
+
## JSON/YAML dumper and loader
|
71
243
|
|
72
|
-
The following lines will dump data (dictionaries, lists, etc) to a JSON file:
|
244
|
+
The following lines will dump data (dictionaries, lists, etc) to a JSON/YAML file and load it back:
|
73
245
|
|
74
246
|
```python
|
75
247
|
import dmu.generic.utilities as gut
|
@@ -77,8 +249,48 @@ import dmu.generic.utilities as gut
|
|
77
249
|
data = [1,2,3,4]
|
78
250
|
|
79
251
|
gut.dump_json(data, '/tmp/list.json')
|
252
|
+
data = gut.load_json('/tmp/list.json')
|
253
|
+
```
|
254
|
+
|
255
|
+
this will dump to either JSON or YAML files, depending on the extension, extensions allowed are:
|
256
|
+
|
257
|
+
```
|
258
|
+
.json
|
259
|
+
.yaml
|
260
|
+
.yml
|
261
|
+
```
|
262
|
+
|
263
|
+
and it's meant to allow the user to bypass all the boilerplate and keep their code brief.
|
264
|
+
|
265
|
+
## PKL dumper and loader
|
266
|
+
|
267
|
+
In the same way one can do:
|
268
|
+
|
269
|
+
```python
|
270
|
+
import dmu.generic.utilities as gut
|
271
|
+
|
272
|
+
data = [1,2,3,4]
|
273
|
+
|
274
|
+
gut.dump_pickle(data, '/tmp/list.pkl')
|
275
|
+
data = gut.load_pickle('/tmp/list.pkl')
|
276
|
+
```
|
277
|
+
|
278
|
+
## Loader of files and configurations from data packages
|
279
|
+
|
280
|
+
YAML and JSON files can be loaded from data packages with:
|
281
|
+
|
282
|
+
```python
|
283
|
+
import dmu.generic.utilities as gut
|
284
|
+
|
285
|
+
data = gut.load_data(package='dmu_data', fpath=f'tests/data.json')
|
286
|
+
conf = gut.load_conf(package='dmu_data', fpath=f'tests/config.json')
|
80
287
|
```
|
81
288
|
|
289
|
+
the former will return a python dictionary, list, etc.
|
290
|
+
The later will return a `DataConf` object from the `omegaconf` project.
|
291
|
+
Check [this](https://omegaconf.readthedocs.io/en/2.3_branch/index.html)
|
292
|
+
for more information.
|
293
|
+
|
82
294
|
# Physics
|
83
295
|
|
84
296
|
## Truth matching
|
@@ -119,8 +331,72 @@ samples:
|
|
119
331
|
|
120
332
|
# Math
|
121
333
|
|
334
|
+
## Weighted data
|
335
|
+
|
336
|
+
`Wdata` is a small class symbolizing weighted data that contains extra functionality. It can
|
337
|
+
be used as:
|
338
|
+
|
339
|
+
```python
|
340
|
+
from dmu.stats.wdata import Wdata
|
341
|
+
|
342
|
+
arr_mass = numpy.random.normal(loc=0, scale=1.0, size=Data.nentries)
|
343
|
+
arr_wgt = numpy.random.normal(loc=1, scale=0.1, size=Data.nentries)
|
344
|
+
|
345
|
+
# Make an instance
|
346
|
+
wdata = Wdata(data=arr_mass, weights=arr_wgt)
|
347
|
+
|
348
|
+
# create a zfit dataset, if needed
|
349
|
+
obs = zfit.Space('obs', limits=(-3, +3))
|
350
|
+
zdata = wdata.to_zfit(obs=obs)
|
351
|
+
|
352
|
+
# Add datasets
|
353
|
+
wdata_1 = Wdata(data=arr_mass, weights=arr_wgt)
|
354
|
+
wdata_2 = Wdata(data=arr_mass, weights=arr_wgt)
|
355
|
+
wdata_3 = wdata_1 + wdata_2
|
356
|
+
|
357
|
+
# Extract information from dataset
|
358
|
+
|
359
|
+
wdata.sumw() # sum of weights
|
360
|
+
wdata.size() # Number of entries
|
361
|
+
|
362
|
+
# Update weights creating a new Wdata instance
|
363
|
+
arr_wgt_new = numpy.random.normal(loc=1, scale=0.2, size=Data.nentries)
|
364
|
+
|
365
|
+
# New weights
|
366
|
+
wdata_2 = wdata.update_weights(weights=arr_wgt_new, replace=True)
|
367
|
+
|
368
|
+
# Multiply old weights by new ones and update
|
369
|
+
wdata_3 = wdata.update_weights(weights=arr_wgt_new, replace=False)
|
370
|
+
```
|
371
|
+
|
122
372
|
## PDFs
|
123
373
|
|
374
|
+
### Suppressing tensorflow messages from zfit import
|
375
|
+
|
376
|
+
If you work with zfit, you will see messages from tensorflow, by importing zfit through:
|
377
|
+
|
378
|
+
```python
|
379
|
+
from dmu.stats.zfit import zfit
|
380
|
+
```
|
381
|
+
|
382
|
+
these messages should be hidden. If `ROOT` is installed, the wrapper will import it before
|
383
|
+
importing tensorflow. That will prevent crashes which usually happen when `tensorflow`
|
384
|
+
is imported before `ROOT`.
|
385
|
+
|
386
|
+
### Toy models
|
387
|
+
|
388
|
+
For quick tests, one can retrieve simple models with :
|
389
|
+
|
390
|
+
```python
|
391
|
+
from dmu.stats import utilities as sut
|
392
|
+
|
393
|
+
# For a Gaussian plus Exponential, extended
|
394
|
+
pdf = sut.get_model(kind='s+b')
|
395
|
+
|
396
|
+
# For a Gaussian signal, non extended
|
397
|
+
pdf = sut.get_model(kind='signal')
|
398
|
+
```
|
399
|
+
|
124
400
|
### Model building
|
125
401
|
|
126
402
|
In order to do complex fits, one often needs PDFs with many parameters, which need to be added.
|
@@ -132,7 +408,27 @@ from dmu.stats.model_factory import ModelFactory
|
|
132
408
|
|
133
409
|
l_pdf = ['cbr'] + 2 * ['cbl']
|
134
410
|
l_shr = ['mu', 'sg']
|
135
|
-
|
411
|
+
l_flt = ['mu', 'sg'] # Will mark these parameters as floating for the fit done afterwards
|
412
|
+
d_rep = {'mu' : 'scale', 'sg' : 'reso'} # Optional, will reparametrize for scale and resolution
|
413
|
+
d_fix = {'al_cbl' : 3, 'nr_cbr' : 1} # Optional, will fix two parameters whose names start with the keys
|
414
|
+
|
415
|
+
# If mu and sg are meant to be shared among all the models
|
416
|
+
# The parameters can be passed here.
|
417
|
+
# In this case, they are also meant to be floating
|
418
|
+
mu = zfit.param.Parameter('mu_flt', 5280, 5000, 5500)
|
419
|
+
sg = zfit.param.Parameter('sg_flt', 80, 20, 100)
|
420
|
+
l_reuse = [mu, sg]
|
421
|
+
|
422
|
+
mod = ModelFactory(
|
423
|
+
preffix = 'pref', # Preffix for parameter naming
|
424
|
+
obs = Data.obs,
|
425
|
+
l_pdf = l_pdf,
|
426
|
+
l_shared= l_shr,
|
427
|
+
l_float = l_float,
|
428
|
+
l_reuse = l_reuse, # Optional
|
429
|
+
d_rep = d_rep, # Optional
|
430
|
+
d_fix = d_fix) # Optional
|
431
|
+
|
136
432
|
pdf = mod.get_pdf()
|
137
433
|
```
|
138
434
|
|
@@ -145,10 +441,63 @@ pol1: Polynomial of degree 1
|
|
145
441
|
pol2: Polynomial of degree 2
|
146
442
|
cbr : CrystallBall with right tail
|
147
443
|
cbl : CrystallBall with left tail
|
148
|
-
gauss : Gaussian
|
444
|
+
gauss : Gaussian
|
149
445
|
dscb : Double sided CrystallBall
|
150
446
|
```
|
151
447
|
|
448
|
+
### Model building with reparametrizations
|
449
|
+
|
450
|
+
In order to introduce reparametrizations for the means and the resolutions, such that:
|
451
|
+
|
452
|
+
$\mu\to\mu+\Delta\mu$
|
453
|
+
$\sigma\to\sigma\cdot s_{\sigma}$
|
454
|
+
|
455
|
+
where the reparametrized $\mu$ and $\sigma$ are constant, while the scale and resolution is floating, do:
|
456
|
+
|
457
|
+
```python
|
458
|
+
import zfit
|
459
|
+
from dmu.stats.model_factory import ModelFactory
|
460
|
+
|
461
|
+
l_shr = ['mu', 'sg']
|
462
|
+
l_flt = []
|
463
|
+
d_rep = {'mu' : 'scale', 'sg' : 'reso'}
|
464
|
+
obs = zfit.Space('mass', limits=(5080, 5680))
|
465
|
+
|
466
|
+
mod = ModelFactory(
|
467
|
+
preffix = name,
|
468
|
+
obs = obs,
|
469
|
+
l_pdf = l_name,
|
470
|
+
d_rep = d_rep,
|
471
|
+
l_shared= l_shr,
|
472
|
+
l_float = l_flt)
|
473
|
+
pdf = mod.get_pdf()
|
474
|
+
```
|
475
|
+
|
476
|
+
Here, the floating parameters **should not** be the same as the reparametrized ones.
|
477
|
+
|
478
|
+
### Overriding parameters
|
479
|
+
|
480
|
+
The models above have their parameter ranges chosen for fits to B meson distributions
|
481
|
+
e.g. the mean of the distributions is around 5GeV. To make these models extensible for other
|
482
|
+
resonances do:
|
483
|
+
|
484
|
+
```python
|
485
|
+
from dmu.stats.parameters import ParameterLibrary as PL
|
486
|
+
|
487
|
+
# This will override the ranges and starting value
|
488
|
+
PL.set_values(kind='cbr', parameter='mu', val=3000, low=2500, high=3500)
|
489
|
+
|
490
|
+
# This will fix a parameter, the three arguments need to be equal
|
491
|
+
PL.set_values(kind='cbr', parameter='sg', val= 30, low= 30, high= 30)
|
492
|
+
```
|
493
|
+
|
494
|
+
before using the `ModelFactory` class.
|
495
|
+
For a summary of all the parameters and values available do:
|
496
|
+
|
497
|
+
```python
|
498
|
+
PL.print_parameters(kind='cbr')
|
499
|
+
```
|
500
|
+
|
152
501
|
### Printing PDFs
|
153
502
|
|
154
503
|
One can print a zfit PDF by doing:
|
@@ -210,6 +559,25 @@ print_pdf(pdf,
|
|
210
559
|
txt_path = 'tests/stats/utilities/print_pdf/pdf_const.txt')
|
211
560
|
```
|
212
561
|
|
562
|
+
|
563
|
+
### Storing PDF as latex
|
564
|
+
|
565
|
+
The file above can be transformed into a `tex` file by running:
|
566
|
+
|
567
|
+
```python
|
568
|
+
from dmu.stats.utilities import pdf_to_tex
|
569
|
+
|
570
|
+
d_par = {
|
571
|
+
'ar_dscb_Signal_002_1_reso_flt' : r'$\alpha_{DSCB}^{1}$',
|
572
|
+
'ar_dscb_Signal_002_2_reso_flt' : r'$\alpha_{DSCB}^{2}$',
|
573
|
+
}
|
574
|
+
|
575
|
+
# It will skip fixed parameters by default
|
576
|
+
pdf_to_tex(path='/path/to/pdf.txt', d_par=d_par, skip_fixed=True)
|
577
|
+
```
|
578
|
+
|
579
|
+
where `d_par` will rename the `Parameters` column, such that it's in latex.
|
580
|
+
|
213
581
|
## Fits
|
214
582
|
|
215
583
|
The `Fitter` class is a wrapper to zfit, use to make fitting easier.
|
@@ -273,8 +641,8 @@ strategy :
|
|
273
641
|
# The lines below will split the range of the data [0-10] into two subranges, such that the NLL is built
|
274
642
|
# only in those ranges. The ranges need to be tuples
|
275
643
|
ranges :
|
276
|
-
-
|
277
|
-
-
|
644
|
+
- [0, 3]
|
645
|
+
- [6, 9]
|
278
646
|
#The lines below will allow using contraints for each parameter, where the first element is the mean and the second
|
279
647
|
#the width of a Gaussian constraint. No correlations are implemented, yet.
|
280
648
|
constraints :
|
@@ -356,6 +724,10 @@ obj = ZFitPlotter(data=sam, model=pdf)
|
|
356
724
|
d_leg = {'gauss': 'New Gauss'}
|
357
725
|
obj.plot(nbins=50, d_leg=d_leg, stacked=True, plot_range=(0, 10), ext_text='Extra text here')
|
358
726
|
|
727
|
+
#Alternatively one can do:
|
728
|
+
obj.plot(nbins=50, d_leg=d_leg, stacked=True, ranges=[[0,3], [3,10]])
|
729
|
+
# For plotting only sidebands, useful if one has a blinded fit
|
730
|
+
|
359
731
|
# add a line to pull hist
|
360
732
|
obj.axs[1].plot([0, 10], [0, 0], linestyle='--', color='black')
|
361
733
|
```
|
@@ -367,6 +739,71 @@ this class supports:
|
|
367
739
|
- Stacking and overlaying of PDFs.
|
368
740
|
- Blinding.
|
369
741
|
|
742
|
+
## Fit saving
|
743
|
+
|
744
|
+
To save in one go everything regarding your fit do:
|
745
|
+
|
746
|
+
```python
|
747
|
+
from dmu.stats import utilities as sut
|
748
|
+
from dmu.stats.zfit_plotter import ZFitPlotter
|
749
|
+
|
750
|
+
ptr = ZFitPlotter(data=dat, model=pdf)
|
751
|
+
ptr.plot()
|
752
|
+
|
753
|
+
sut.save_fit(data=data, model=pdf, res=fit_result, fit_dir='/some/directory', d_const=constraints)
|
754
|
+
```
|
755
|
+
|
756
|
+
and the function will save everything that you would normally need from a fit.
|
757
|
+
If the lines with `ZFitPlotter` were called before `save_fit` the fit plot will also be saved.
|
758
|
+
|
759
|
+
### Transforming fit results to DictConfig
|
760
|
+
|
761
|
+
The `OmegaConf` library offers `DictConfig` objects, which are easier to handle
|
762
|
+
when reading nested data. To transform a zfit result object into one of these
|
763
|
+
objects do:
|
764
|
+
|
765
|
+
```python
|
766
|
+
from dmu.stats import utilities as sut
|
767
|
+
|
768
|
+
cres = sut.zres_to_cres(res=res)
|
769
|
+
```
|
770
|
+
|
771
|
+
and then one would access the information like:
|
772
|
+
|
773
|
+
```python
|
774
|
+
error = cres.mu.error
|
775
|
+
value = cres.mu.value
|
776
|
+
```
|
777
|
+
|
778
|
+
and these objects can be saved to JSON with:
|
779
|
+
|
780
|
+
```python
|
781
|
+
OmegaConf.save(config=cres, f='results.yaml')
|
782
|
+
```
|
783
|
+
|
784
|
+
## Placeholdef fits
|
785
|
+
|
786
|
+
In order to create a _fake_ fit on top of which one could develop other tools, do:
|
787
|
+
|
788
|
+
```python
|
789
|
+
from dmu.stats import utilities
|
790
|
+
|
791
|
+
utilities.placeholder_fit(kind='s+b', fit_dir='/some/directory')
|
792
|
+
```
|
793
|
+
|
794
|
+
## Retrieving information on fits
|
795
|
+
|
796
|
+
Once the fit has be done and the results are saved to a given directory one can do:
|
797
|
+
|
798
|
+
```python
|
799
|
+
from dmu.stats.fit_stats import FitStats
|
800
|
+
|
801
|
+
obj =FitStats(fit_dir='/directory/with/fit')
|
802
|
+
val = obj.get_value(name='var_name', kind='value or error')
|
803
|
+
```
|
804
|
+
|
805
|
+
and the tool will retrieve the value. This is useful when the values are needed elsewhere
|
806
|
+
in the code, i.e. it would connect the fitting part with other parts.
|
370
807
|
## Arrays
|
371
808
|
|
372
809
|
### Scaling by non-integer
|
@@ -413,6 +850,24 @@ xval = numpy.lispace(0, 5, num=100)
|
|
413
850
|
yval = fun(xval)
|
414
851
|
```
|
415
852
|
|
853
|
+
## Other utilities
|
854
|
+
|
855
|
+
These are here to decrease boilerplate code
|
856
|
+
|
857
|
+
```python
|
858
|
+
from dmu.stats import utilities as sut
|
859
|
+
|
860
|
+
# Retrieves name of observable from observable
|
861
|
+
name = sut.name_from_obs(obs=obs)
|
862
|
+
|
863
|
+
# Retrieves range of observable from observable
|
864
|
+
minx, maxx = sut.range_from_obs(obs=obs)
|
865
|
+
|
866
|
+
# This is needed because when building a KDE with too little data, that KDE cannot be evaluated
|
867
|
+
# and when trying it, tensorflow emits an exception.
|
868
|
+
is_pdf_usable(pdf)
|
869
|
+
```
|
870
|
+
|
416
871
|
# Machine learning
|
417
872
|
|
418
873
|
## Classification
|
@@ -427,16 +882,31 @@ rdf_bkg = _get_rdf(kind='bkg')
|
|
427
882
|
cfg = _get_config()
|
428
883
|
|
429
884
|
obj= TrainMva(sig=rdf_sig, bkg=rdf_bkg, cfg=cfg)
|
430
|
-
obj.run(
|
885
|
+
obj.run(
|
886
|
+
skip_fit=False, # by default it will be false, if true, it will only make plots of features
|
887
|
+
opt_ntrial=20, # By default this is zero, if a larger number is chosen, a hyperparameter optimization with optuna will run with this number of trials
|
888
|
+
load_trained=False, # If true, it will not train the models but will just load them, only makes sense if models already exist. Useful to add postprocessing code, like the diagnostics section.
|
889
|
+
)
|
431
890
|
```
|
432
891
|
|
433
892
|
where the settings for the training go in a config dictionary, which when written to YAML looks like:
|
434
893
|
|
435
894
|
```yaml
|
436
895
|
dataset:
|
896
|
+
# This section is optional. It can be used to redefine
|
897
|
+
# columns in different ways for different samples
|
898
|
+
#
|
899
|
+
# When evaluating the model, the same definitions will be used
|
900
|
+
# but they will be taken from the `sig` section.
|
901
|
+
samples:
|
902
|
+
sig:
|
903
|
+
definitions:
|
904
|
+
x : v + w
|
905
|
+
bkg:
|
906
|
+
definitions:
|
907
|
+
x : v - w
|
437
908
|
# Before training, new features can be defined as below
|
438
909
|
define :
|
439
|
-
x : v + w
|
440
910
|
y : v - w
|
441
911
|
# If the key is found to be NaN, replace its value with the number provided
|
442
912
|
# This will be used in the training.
|
@@ -455,8 +925,8 @@ training :
|
|
455
925
|
learning_rate : 0.1
|
456
926
|
min_samples_split : 2
|
457
927
|
saving:
|
458
|
-
# The
|
459
|
-
path : 'tests/ml/train_mva
|
928
|
+
# The model names are model_001.pkl, model_002.pkl, etc, one for each fold
|
929
|
+
path : 'tests/ml/train_mva'
|
460
930
|
plotting:
|
461
931
|
roc :
|
462
932
|
min : [0.0, 0.0] # Optional, controls where the ROC curve starts and ends
|
@@ -474,10 +944,7 @@ plotting:
|
|
474
944
|
title : 'Correlation matrix'
|
475
945
|
size : [10, 10]
|
476
946
|
mask_value : 0 # Where correlation is zero, the bin will appear white
|
477
|
-
val_dir : 'tests/ml/train_mva'
|
478
947
|
features:
|
479
|
-
saving:
|
480
|
-
plt_dir : 'tests/ml/train_mva/features'
|
481
948
|
plots:
|
482
949
|
w :
|
483
950
|
binning : [-4, 4, 100]
|
@@ -499,6 +966,20 @@ plotting:
|
|
499
966
|
|
500
967
|
the `TrainMva` is just a wrapper to `scikit-learn` that enables cross-validation (and therefore that explains the `nfolds` setting).
|
501
968
|
|
969
|
+
#### Outputs
|
970
|
+
|
971
|
+
The trainer will produce in the output:
|
972
|
+
|
973
|
+
- Models in form of `pkl` files
|
974
|
+
- Plots of the features
|
975
|
+
- For each fold:
|
976
|
+
1. Covariance plot
|
977
|
+
1. ROC curve plot
|
978
|
+
1. Feature importance table in latex
|
979
|
+
1. JSON file with data to build the ROC curve
|
980
|
+
- For the full dataset it will provide the ROC curve, scores distribution and JSON file with `x`, `y` coordinates for ROC curve.
|
981
|
+
- Latex table with hyperparameters and NaN replacements.
|
982
|
+
|
502
983
|
### Caveats
|
503
984
|
|
504
985
|
When training on real data, several things might go wrong and the code will try to deal with them in the following ways:
|
@@ -538,6 +1019,18 @@ If a sample exists, that was used in the training of _every_ model, no model can
|
|
538
1019
|
During training, the configuration will be stored in the model. Therefore, variable definitions can be picked up for evaluation
|
539
1020
|
from that configuration and the user does not need to define extra columns.
|
540
1021
|
|
1022
|
+
### Further optimization
|
1023
|
+
|
1024
|
+
If not all the entries of the ROOT dataframe are needed for the prediction (e.g. some entries won't be used anyway) define
|
1025
|
+
a column as:
|
1026
|
+
|
1027
|
+
```python
|
1028
|
+
rdf = rdf.Define('skip_mva_prediction', 'mass < 3000')
|
1029
|
+
```
|
1030
|
+
|
1031
|
+
and the predictor will assign scores of `-1` to all the entries with `mass < 3000`.
|
1032
|
+
This should speed up the prediction and reduce resource consumption.
|
1033
|
+
|
541
1034
|
### Caveats
|
542
1035
|
|
543
1036
|
When evaluating the model with real data, problems might occur, we deal with them as follows:
|
@@ -552,12 +1045,158 @@ When evaluating the model with real data, problems might occur, we deal with the
|
|
552
1045
|
- For whatever features that are still NaN, they will be _patched_ with zeros when evaluated. However, the returned probabilities will be
|
553
1046
|
saved as -1. I.e. entries with NaNs will have probabilities of -1.
|
554
1047
|
|
1048
|
+
## Diagnostics
|
1049
|
+
|
1050
|
+
To run diagnostics on the trained model do:
|
1051
|
+
|
1052
|
+
```python
|
1053
|
+
from dmu.ml.cv_diagnostics import CVDiagnostics
|
1054
|
+
|
1055
|
+
# Where l_model is the list of models and cfg is a dictionary with the config
|
1056
|
+
cvd = CVDiagnostics(models=l_model, rdf=rdf, cfg=cfg)
|
1057
|
+
cvd.run()
|
1058
|
+
```
|
1059
|
+
|
1060
|
+
the configuration can be loaded from a YAML file and would look like:
|
1061
|
+
|
1062
|
+
```yaml
|
1063
|
+
# Directory where plots will go
|
1064
|
+
output : /tmp/tests/dmu/ml/cv_diagnostics/overlay
|
1065
|
+
# Optional, will assume that the target is already in the input dataframe
|
1066
|
+
# and will use it, instead of evaluating models
|
1067
|
+
score_from_rdf : mva
|
1068
|
+
correlations:
|
1069
|
+
# Variables with respect to which the correlations with the features will be measured
|
1070
|
+
target :
|
1071
|
+
name : mass
|
1072
|
+
overlay :
|
1073
|
+
# These are the working points at which the "mass" variable will be plotted
|
1074
|
+
# If there is a correlation the shape should change
|
1075
|
+
wp :
|
1076
|
+
- 0.2
|
1077
|
+
- 0.5
|
1078
|
+
- 0.7
|
1079
|
+
- 0.9
|
1080
|
+
general:
|
1081
|
+
size : [20, 10]
|
1082
|
+
saving:
|
1083
|
+
plt_dir : /tmp/tests/dmu/ml/cv_diagnostics/from_rdf
|
1084
|
+
plots:
|
1085
|
+
z :
|
1086
|
+
binning : [1000, 4000, 30]
|
1087
|
+
yscale : 'linear'
|
1088
|
+
labels : ['mass', 'Entries']
|
1089
|
+
normalized : true
|
1090
|
+
methods:
|
1091
|
+
- Pearson
|
1092
|
+
- Kendall-$\tau$
|
1093
|
+
figure:
|
1094
|
+
title: Scores from file
|
1095
|
+
size : [10, 8]
|
1096
|
+
xlabelsize: 18 # Constrols size of x axis labels. By default 30
|
1097
|
+
rotate : 60 # Will rotate xlabels by 60 degrees
|
1098
|
+
```
|
1099
|
+
|
1100
|
+
## Comparing classifiers
|
1101
|
+
|
1102
|
+
### Simple approach
|
1103
|
+
To do that run:
|
1104
|
+
|
1105
|
+
```bash
|
1106
|
+
compare_classifiers -c /path/to/config.yaml
|
1107
|
+
```
|
1108
|
+
|
1109
|
+
where the config looks like:
|
1110
|
+
|
1111
|
+
```yaml
|
1112
|
+
out_dir : /path/to/plots
|
1113
|
+
classifiers:
|
1114
|
+
label for model 1 : /path/to/directory/with/model1
|
1115
|
+
label for model 2 : /path/to/directory/with/model2
|
1116
|
+
```
|
1117
|
+
|
1118
|
+
However this will only compare the classifiers ROC curves with respect to the
|
1119
|
+
samples that were used to train them.
|
1120
|
+
|
1121
|
+
### With custom samples
|
1122
|
+
|
1123
|
+
However the models' peformances can also be compared by _plugging_ any
|
1124
|
+
signal and backgroud proxy for any model, like:
|
1125
|
+
|
1126
|
+
```python
|
1127
|
+
import matplotlib.pyplot as plt
|
1128
|
+
from dmu.ml.cv_performance import CVPerformance
|
1129
|
+
|
1130
|
+
cvp = CVPerformance()
|
1131
|
+
cvp.plot_roc(
|
1132
|
+
sig =rdf_sig_1, bkg=rdf_bkg_1,
|
1133
|
+
model=l_model_1, name='def', color='red')
|
1134
|
+
cvp.plot_roc(
|
1135
|
+
sig =rdf_sig_1, bkg=rdf_bkg_2,
|
1136
|
+
model=l_model_2, name='alt', color='blue')
|
1137
|
+
|
1138
|
+
plt.legend()
|
1139
|
+
plt.grid()
|
1140
|
+
plt.show()
|
1141
|
+
```
|
1142
|
+
|
1143
|
+
This should show an overlay of different ROC curves made for a specific combination
|
1144
|
+
of signal and background proxies with a given model.
|
1145
|
+
|
1146
|
+
# Dask dataframes
|
1147
|
+
|
1148
|
+
In order to process large ammounts of data a `Dask` dataframe is more suitable.
|
1149
|
+
A set of `ROOT` files can be loaded into one of these with:
|
1150
|
+
|
1151
|
+
|
1152
|
+
```python
|
1153
|
+
from dmu.rfile.ddfgetter import DDFGetter
|
1154
|
+
|
1155
|
+
# Can also pass directly the configuration dictionary with the `cfg` argument
|
1156
|
+
# If no columns argument is passed, will take all the columns
|
1157
|
+
|
1158
|
+
ddfg = DDFGetter(config_path='config.yaml', columns=['a', 'b'])
|
1159
|
+
ddf = ddfg.get_dataframe()
|
1160
|
+
|
1161
|
+
# This will provide the pandas dataframe
|
1162
|
+
df = ddf.compute()
|
1163
|
+
...
|
1164
|
+
```
|
1165
|
+
where `config.yaml` would look like:
|
1166
|
+
|
1167
|
+
```yaml
|
1168
|
+
tree : tree_name
|
1169
|
+
primary_keys:
|
1170
|
+
- index
|
1171
|
+
files :
|
1172
|
+
- file_001.root
|
1173
|
+
- file_002.root
|
1174
|
+
- file_003.root
|
1175
|
+
samples:
|
1176
|
+
- /tmp/tests/dmu/rfile/main
|
1177
|
+
- /tmp/tests/dmu/rfile/frnd
|
1178
|
+
```
|
1179
|
+
|
555
1180
|
# Pandas dataframes
|
556
1181
|
|
557
1182
|
## Utilities
|
558
1183
|
|
559
1184
|
These are thin layers of code that take pandas dataframes and carry out specific tasks
|
560
1185
|
|
1186
|
+
### NaN filter
|
1187
|
+
|
1188
|
+
The following snippet will remove NaNs from the dataframe
|
1189
|
+
if up to 2% of the rows have NaNs. Beyond that, an exception will be risen.
|
1190
|
+
|
1191
|
+
```python
|
1192
|
+
import dmu.pdataframe.utilities as put
|
1193
|
+
|
1194
|
+
# Default is 0.02
|
1195
|
+
df = put.dropna(df, nan_frac=0.02)
|
1196
|
+
```
|
1197
|
+
|
1198
|
+
The usecase is cleaning up automatically, data that is not expected to be perfect.
|
1199
|
+
|
561
1200
|
### Dataframe to latex
|
562
1201
|
|
563
1202
|
One can save a dataframe to latex with:
|
@@ -582,10 +1221,41 @@ put.df_to_tex(df,
|
|
582
1221
|
caption = 'some caption')
|
583
1222
|
```
|
584
1223
|
|
1224
|
+
### Dataframe to and from YAML
|
1225
|
+
|
1226
|
+
This extends the existing JSON functionality
|
1227
|
+
|
1228
|
+
```python
|
1229
|
+
import dmu.pdataframe.utilities as put
|
1230
|
+
|
1231
|
+
df_1 = _get_df()
|
1232
|
+
put.to_yaml(df_1, yml_path)
|
1233
|
+
df_2 = put.from_yaml(yml_path)
|
1234
|
+
```
|
1235
|
+
|
1236
|
+
and is meant to be less verbose than doing it through the YAML module.
|
585
1237
|
# Rdataframes
|
586
1238
|
|
587
1239
|
These are utility functions meant to be used with ROOT dataframes.
|
588
1240
|
|
1241
|
+
## Cutflows from RDataFrames
|
1242
|
+
|
1243
|
+
When using the `Filter` method on a ROOT dataframe, one can:
|
1244
|
+
|
1245
|
+
```python
|
1246
|
+
rep = rdf.Report()
|
1247
|
+
rep.Print()
|
1248
|
+
```
|
1249
|
+
|
1250
|
+
however this `rep` object is not python friendly, despite it is basically a table that can be
|
1251
|
+
put in pandas dataframe. Precisely this can be done with:
|
1252
|
+
|
1253
|
+
```python
|
1254
|
+
from dmu.rdataframe import utilities as ut
|
1255
|
+
|
1256
|
+
df = ut.rdf_report_to_df(rep)
|
1257
|
+
```
|
1258
|
+
|
589
1259
|
## Adding a column from a numpy array
|
590
1260
|
|
591
1261
|
### With numba
|
@@ -649,6 +1319,18 @@ obj = AtrMgr(rdf)
|
|
649
1319
|
obj.to_json('/path/to/file.json')
|
650
1320
|
```
|
651
1321
|
|
1322
|
+
## Filtering for a random number of entries
|
1323
|
+
|
1324
|
+
The built in method `Range` only can be used to select ranges. Use
|
1325
|
+
|
1326
|
+
```python
|
1327
|
+
import dmu.rdataframe.utilities as ut
|
1328
|
+
|
1329
|
+
rdf = ut.random_filter(rdf, entries=val)
|
1330
|
+
```
|
1331
|
+
|
1332
|
+
to select **approximately** a random number `entries` of entries from the dataframe.
|
1333
|
+
|
652
1334
|
# Logging
|
653
1335
|
|
654
1336
|
The `LogStore` class is an interface to the `logging` module. It is aimed at making it easier to include
|
@@ -668,6 +1350,25 @@ log.error('error')
|
|
668
1350
|
log.critical('critical')
|
669
1351
|
```
|
670
1352
|
|
1353
|
+
In order to get a specific logger do:
|
1354
|
+
|
1355
|
+
```python
|
1356
|
+
logger = LogStore.get_logger(name='my_logger_name')
|
1357
|
+
```
|
1358
|
+
|
1359
|
+
In order to get the logging level fromt the logger do:
|
1360
|
+
|
1361
|
+
```python
|
1362
|
+
level = log.getEffectiveLevel()
|
1363
|
+
```
|
1364
|
+
|
1365
|
+
And a context manager is available, which can be used with:
|
1366
|
+
|
1367
|
+
```python
|
1368
|
+
with LogStore.level('logger_name', 10):
|
1369
|
+
log.debug('Debug message')
|
1370
|
+
```
|
1371
|
+
|
671
1372
|
# Plotting from ROOT dataframes
|
672
1373
|
|
673
1374
|
## 1D plots
|
@@ -703,10 +1404,34 @@ definitions:
|
|
703
1404
|
plots:
|
704
1405
|
x :
|
705
1406
|
binning : [0.98, 0.98, 40] # Here bounds agree => tool will calculate bounds making sure that they are the 2% and 98% quantile
|
706
|
-
yscale :
|
1407
|
+
yscale : linear # Optional, if not passed, will do linear, can be log
|
707
1408
|
labels : ['x', 'Entries'] # Labels are optional, will use varname and Entries as labels if not present
|
708
|
-
title :
|
709
|
-
name :
|
1409
|
+
title : some title can be added for different variable plots
|
1410
|
+
name : plot_of_x # This will ensure that one gets plot_of_x.png as a result, if missing x.png would be saved
|
1411
|
+
weights : my_weights # Optional, this is the column in the dataframe with the weights
|
1412
|
+
# Can add styling to specific plots, this should be the argument of
|
1413
|
+
# hist.plot(...)
|
1414
|
+
styling :
|
1415
|
+
# This section will update the styling of each category
|
1416
|
+
# The categories (class A, etc) are the keys of the dictionary of dataframes
|
1417
|
+
class A:
|
1418
|
+
# These are the arguments of plt.hist(...)
|
1419
|
+
histtype : fill
|
1420
|
+
color : gray
|
1421
|
+
alpha : 0.3
|
1422
|
+
class B:
|
1423
|
+
color : red
|
1424
|
+
histtype : step
|
1425
|
+
linestyle: '-' # Linestyle is by default 'none',
|
1426
|
+
# needs to be overriden to see _steps_
|
1427
|
+
# This will add vertical lines to plots, the arguments are the same
|
1428
|
+
# as the ones passed to axvline
|
1429
|
+
vline :
|
1430
|
+
x : 0
|
1431
|
+
label : label
|
1432
|
+
ls : --
|
1433
|
+
c : blue
|
1434
|
+
lw : 1
|
710
1435
|
y :
|
711
1436
|
binning : [-5.0, 8.0, 40]
|
712
1437
|
yscale : 'linear'
|
@@ -725,11 +1450,52 @@ style:
|
|
725
1450
|
# The line below would place the legend outside the figure to avoid ovelaps with the histogram
|
726
1451
|
bbox_to_anchor : [1.2, 1]
|
727
1452
|
stats:
|
728
|
-
|
1453
|
+
sumw : '{:.2f}' # This will add sum of weights to label. If no weights, then it will be the nentries value
|
729
1454
|
```
|
730
1455
|
|
731
1456
|
it's up to the user to build this dictionary and load it.
|
732
1457
|
|
1458
|
+
### Pluggins
|
1459
|
+
|
1460
|
+
Extra functionality can be `plugged` into the code by using the pluggins section like:
|
1461
|
+
|
1462
|
+
#### FWHM
|
1463
|
+
```yaml
|
1464
|
+
plugin:
|
1465
|
+
fwhm:
|
1466
|
+
# Can control each variable fit separately
|
1467
|
+
x :
|
1468
|
+
plot : true
|
1469
|
+
obs : [-2, 4]
|
1470
|
+
plot : true
|
1471
|
+
format : FWHM={:.3f}
|
1472
|
+
add_std: True
|
1473
|
+
y :
|
1474
|
+
plot : true
|
1475
|
+
obs : [-4, 8]
|
1476
|
+
plot : true
|
1477
|
+
format : FWHM={:.3f}
|
1478
|
+
add_std: True
|
1479
|
+
```
|
1480
|
+
|
1481
|
+
where the section will
|
1482
|
+
|
1483
|
+
- Use a KDE to fit the distribution and plot it on top of the histogram
|
1484
|
+
- Add the value of the FullWidth at Half Maximum in the title, for each distribution with a specific formatting.
|
1485
|
+
|
1486
|
+
#### stats
|
1487
|
+
|
1488
|
+
```yaml
|
1489
|
+
plugin:
|
1490
|
+
stats:
|
1491
|
+
x :
|
1492
|
+
mean : $\mu$={:.2f}
|
1493
|
+
rms : $\sigma$={:.2f}
|
1494
|
+
sum : $\Sigma$={:.0f}
|
1495
|
+
```
|
1496
|
+
|
1497
|
+
Can be used to print statistics, mean, rms and weighted sum of entries for each distribution.
|
1498
|
+
|
733
1499
|
## 2D plots
|
734
1500
|
|
735
1501
|
For the 2D case it would look like:
|