data-manipulation-utilities 0.2.8.dev720__py3-none-any.whl → 0.2.8.dev730__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.2.8.dev720.dist-info → data_manipulation_utilities-0.2.8.dev730.dist-info}/METADATA +2 -1
- data_manipulation_utilities-0.2.8.dev730.dist-info/RECORD +82 -0
- dmu/plotting/plotter.py +23 -11
- dmu_data/__init__.py +0 -0
- dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
- dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
- dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
- dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
- dmu_data/ml/tests/train_mva.yaml +60 -0
- dmu_data/ml/tests/train_mva_def.yaml +75 -0
- dmu_data/ml/tests/train_mva_with_diagnostics.yaml +87 -0
- dmu_data/ml/tests/train_mva_with_preffix.yaml +58 -0
- dmu_data/plotting/tests/2d.yaml +24 -0
- dmu_data/plotting/tests/fig_size.yaml +13 -0
- dmu_data/plotting/tests/high_stat.yaml +22 -0
- dmu_data/plotting/tests/legend.yaml +12 -0
- dmu_data/plotting/tests/line.yaml +15 -0
- dmu_data/plotting/tests/name.yaml +14 -0
- dmu_data/plotting/tests/no_bounds.yaml +12 -0
- dmu_data/plotting/tests/normalized.yaml +9 -0
- dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
- dmu_data/plotting/tests/plug_stats.yaml +19 -0
- dmu_data/plotting/tests/simple.yaml +9 -0
- dmu_data/plotting/tests/stats.yaml +9 -0
- dmu_data/plotting/tests/styling.yaml +18 -0
- dmu_data/plotting/tests/title.yaml +14 -0
- dmu_data/plotting/tests/weights.yaml +13 -0
- dmu_data/rfile/friends.yaml +13 -0
- dmu_data/stats/fitter/test_simple.yaml +28 -0
- dmu_data/stats/kde_optimizer/control.json +1 -0
- dmu_data/stats/kde_optimizer/signal.json +1 -0
- dmu_data/stats/parameters/data.yaml +178 -0
- dmu_data/tests/config.json +6 -0
- dmu_data/tests/config.yaml +4 -0
- dmu_data/tests/pdf_to_tex.txt +34 -0
- dmu_data/text/transform.toml +4 -0
- dmu_data/text/transform.txt +6 -0
- dmu_data/text/transform_set.toml +8 -0
- dmu_data/text/transform_set.txt +6 -0
- dmu_data/text/transform_trf.txt +12 -0
- data_manipulation_utilities-0.2.8.dev720.dist-info/RECORD +0 -45
- {data_manipulation_utilities-0.2.8.dev720.dist-info → data_manipulation_utilities-0.2.8.dev730.dist-info}/WHEEL +0 -0
- {data_manipulation_utilities-0.2.8.dev720.dist-info → data_manipulation_utilities-0.2.8.dev730.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: data-manipulation-utilities
|
3
|
-
Version: 0.2.8.
|
3
|
+
Version: 0.2.8.dev730
|
4
4
|
Summary: Project storing utilities needed to reduce boilerplate code when analyzing data
|
5
5
|
Requires-Python: >=3.10,<3.13
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
@@ -1483,6 +1483,7 @@ stats:
|
|
1483
1483
|
```
|
1484
1484
|
|
1485
1485
|
it's up to the user to build this dictionary and load it.
|
1486
|
+
this can also be a `DictConfig` from the `OmegaConf` project.
|
1486
1487
|
|
1487
1488
|
### Pluggins
|
1488
1489
|
|
@@ -0,0 +1,82 @@
|
|
1
|
+
dmu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
dmu/arrays/utilities.py,sha256=PKoYyybPptA2aU-V3KLnJXBudWxTXu4x1uGdIMQ49HY,1722
|
3
|
+
dmu/generic/hashing.py,sha256=QR5Gbv6-ANvi5hL232UNMrw9DONpU27BWTynXGxQLGU,1806
|
4
|
+
dmu/generic/utilities.py,sha256=0tT93vF_x0q8STRrTD0GvBEpALz-mqE-vJyen4zWCO8,6861
|
5
|
+
dmu/generic/version_management.py,sha256=j0ImlAq6SVNjTh3xRsF6G7DSoyr1w8kTRY84dNriGRE,3750
|
6
|
+
dmu/logging/log_store.py,sha256=eRSy8Y4fuiDFJK02Z6fq67XQzOrhQ7GMr2LvvJQbJ40,5172
|
7
|
+
dmu/logging/messages.py,sha256=Oj3O5EO2KOPtffyVq2P7RPzjpoXtxZ6yXO5HwTftVcM,2903
|
8
|
+
dmu/ml/cv_classifier.py,sha256=6rjezMahwL-WzLGKU-fzMzNxJZAGbM7YAbhaZVcJ3F0,4258
|
9
|
+
dmu/ml/cv_diagnostics.py,sha256=PLh41mSVE8Kagp9KcuRDN_7tDL9MjPxQzuewY8jDnNo,7600
|
10
|
+
dmu/ml/cv_performance.py,sha256=q9sLxIx7GP-dand3tnhHCBJnT6xqssNdRYv_TVjYWUM,1910
|
11
|
+
dmu/ml/cv_predict.py,sha256=0sc_OqwOewKvipcMyi3QqkgG30nkpZZjE-SOhHWHMd0,10778
|
12
|
+
dmu/ml/train_mva.py,sha256=7KAFX_zOx8MGbYx62U81JbdBkrZvqclSSkgmYvWX-60,34861
|
13
|
+
dmu/ml/utilities.py,sha256=A9j3tBh-jfaFdwwLUleo1QnttfawN7XDiQRh4VTvqVY,4597
|
14
|
+
dmu/pdataframe/utilities.py,sha256=xl6iLVKUccqVXYjuHsDUZ6UrCKQPw1k8D-f6407Yq30,2742
|
15
|
+
dmu/plotting/fwhm.py,sha256=4e8n6624pxWLcOOtayCQ_hDSSMKU21-3UsdmbkX1ojk,1949
|
16
|
+
dmu/plotting/matrix.py,sha256=s_5W8O3yXF3u8OX3f4J4hCoxIVZt1TF8S-qJsFBh2Go,5005
|
17
|
+
dmu/plotting/plotter.py,sha256=5N5mLdQAqOUs43ukX5mT9nRaYD5dkn_sED5NoZJV5A0,8483
|
18
|
+
dmu/plotting/plotter_1d.py,sha256=Kyoyh-QyZLXXqX19wqEDUWCD1nJEvEonGp9nlgEaoZE,10936
|
19
|
+
dmu/plotting/plotter_2d.py,sha256=dXC-7Rsquibe5cn7622ryoKpuv7KCAmouIIXwQ_VEFM,3172
|
20
|
+
dmu/plotting/utilities.py,sha256=SI9dvtZq2gr-PXVz71KE4o0i09rZOKgqJKD1jzf6KXk,1167
|
21
|
+
dmu/rdataframe/atr_mgr.py,sha256=FdhaQWVpsm4OOe1IRbm7rfrq8VenTNdORyI-lZ2Bs1M,2386
|
22
|
+
dmu/rdataframe/utilities.py,sha256=cY1Na8HbJ7kB2dwmBagRdsRyCA4ZT_vyIU86ewREj2Y,5322
|
23
|
+
dmu/rfile/ddfgetter.py,sha256=0jfNzpv72_NQUKOK5SBsn289rUqVt2BMvuL-Ro5oY7I,3316
|
24
|
+
dmu/rfile/rfprinter.py,sha256=mp5jd-oCJAnuokbdmGyL9i6tK2lY72jEfROuBIZ_ums,3941
|
25
|
+
dmu/rfile/utilities.py,sha256=XuYY7HuSBj46iSu3c60UYBHtI6KIPoJU_oofuhb-be0,945
|
26
|
+
dmu/stats/fit_stats.py,sha256=wzkQT9U32ljGe4azUj1Fj0ECF3zmnH2Ncn0O-_Pl1zQ,4070
|
27
|
+
dmu/stats/fitter.py,sha256=rm_fwjkq-0LSjXB_gt3y6BnHoK8Xvd4gHYwKBUJaItQ,19603
|
28
|
+
dmu/stats/function.py,sha256=yzi_Fvp_ASsFzbWFivIf-comquy21WoeY7is6dgY0Go,9491
|
29
|
+
dmu/stats/gof_calculator.py,sha256=63zNJJGKPy-j_hPNPfu9qNlhrHjYIgJOyL8-VDtbwuI,4894
|
30
|
+
dmu/stats/minimizers.py,sha256=db9R2G0SOV-k0BKi6m4EyB_yp6AtZdP23_28B0315oo,7094
|
31
|
+
dmu/stats/model_factory.py,sha256=0_o5OmiX0cNhp9_cNqBOYfasBgKlQkQPiy5nqi9qQKA,18966
|
32
|
+
dmu/stats/parameters.py,sha256=9lycexTT5ZcxXciiQY9HoJV8O1ahrTEkagd7dYXcfj8,3224
|
33
|
+
dmu/stats/utilities.py,sha256=7_tr1j-dl3lLNpxIMWruZs4yUtlNuUTknwGMERpfLhs,17338
|
34
|
+
dmu/stats/wdata.py,sha256=IbjZFU9SHTLSYfaBgqamDvqy1K7-3-SaKbU4bGsamK0,6799
|
35
|
+
dmu/stats/zfit.py,sha256=aSZj_4IHi9IBthfqlNJeA8YSoMmXO5WipgiKnXKGbnM,286
|
36
|
+
dmu/stats/zfit_models.py,sha256=SI61KJ-OG1UAabDICU1iTh6JPKM3giR2ErDraRjkCV8,1842
|
37
|
+
dmu/stats/zfit_plotter.py,sha256=gbN5KxhJcP4ItCi98c-fj5_UtvVWL_NA9jkTHiRjvnE,23854
|
38
|
+
dmu/testing/utilities.py,sha256=WYlz7Ve5lQjuWhhNL4gWe6_qcByBLV762Lhrc6A0P9E,7421
|
39
|
+
dmu/text/transformer.py,sha256=4lrGknbAWRm0-rxbvgzOO-eR1-9bkYk61boJUEV3cQ0,6100
|
40
|
+
dmu/workflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
41
|
+
dmu/workflow/cache.py,sha256=CtkGwxuF4UJlD55SmUJcRgWYLsbZOyUvYLI8oTVzk_g,8768
|
42
|
+
dmu_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
43
|
+
dmu_data/ml/tests/diagnostics_from_file.yaml,sha256=quvXOPkRducnBsctyape_Rn5_aqMEpPo6nO_UweMORo,404
|
44
|
+
dmu_data/ml/tests/diagnostics_from_model.yaml,sha256=rtCQlmGS9ld2xoQJEE35nA07yfRMklEfQEW0w3gRv2A,261
|
45
|
+
dmu_data/ml/tests/diagnostics_multiple_methods.yaml,sha256=w8Fpmr7kX1Jsb_h6LL2hiuYKf5lYpckFCpYKzWetbA0,265
|
46
|
+
dmu_data/ml/tests/diagnostics_overlay.yaml,sha256=ZVOsxLL8_JQtf41n8Ct-M9Ch10xBwHK54q1fttWPDlE,866
|
47
|
+
dmu_data/ml/tests/train_mva.yaml,sha256=KArbTkaj6FqerrUhlkgyBde_4DfkpVza6kCMgMQPi9g,1388
|
48
|
+
dmu_data/ml/tests/train_mva_def.yaml,sha256=UyPMo-9nshoB8BHxm9E6S0xd9ngRARdgUq6vnuMlhwI,1765
|
49
|
+
dmu_data/ml/tests/train_mva_with_diagnostics.yaml,sha256=-2KKIJ8CiNgMlgpCXkmZRdPEo-sJmAqr01vizfeqkj0,2098
|
50
|
+
dmu_data/ml/tests/train_mva_with_preffix.yaml,sha256=Q9SsJSXGbkHWGBvMZIkTZlKNUz5ZcSVBscrKgeMWBvE,1386
|
51
|
+
dmu_data/plotting/tests/2d.yaml,sha256=40wKQmNbIabZ7CI8-2QnD6mG1a_B7vEcPdzvehHkseY,520
|
52
|
+
dmu_data/plotting/tests/fig_size.yaml,sha256=7ROq49nwZ1A2EbPiySmu6n3G-Jq6YAOkc3d2X3YNZv0,294
|
53
|
+
dmu_data/plotting/tests/high_stat.yaml,sha256=bLglBLCZK6ft0xMhQ5OltxE76cWsBMPMjO6GG0OkDr8,522
|
54
|
+
dmu_data/plotting/tests/legend.yaml,sha256=wGpj58ig-GOlqbWoN894zrCet2Fj9f5QtY0rig_UC-c,213
|
55
|
+
dmu_data/plotting/tests/line.yaml,sha256=EERDeTctbauwqAvmKFXC4Ot3Tgx-8kcIniGbepXwsKs,305
|
56
|
+
dmu_data/plotting/tests/name.yaml,sha256=mkcPAVg8wBAmlSbSRQ1bcaMl4vOS6LXMtpqQeDrrtO4,312
|
57
|
+
dmu_data/plotting/tests/no_bounds.yaml,sha256=8e1QdphBjz-suDr857DoeUC2DXiy6SE-gvkORJQYv80,257
|
58
|
+
dmu_data/plotting/tests/normalized.yaml,sha256=Y0eKtyV5pvlSxvqfsLjytYtv8xYF3HZ5WEdCJdeHGQI,193
|
59
|
+
dmu_data/plotting/tests/plug_fwhm.yaml,sha256=xl5LXc9Nt66anM-HOXAxCtlaxWNM7zzIXf1Y6U8M4Wg,449
|
60
|
+
dmu_data/plotting/tests/plug_stats.yaml,sha256=ROO8soYXBbZIFYZcGngA_K5XHgIAFCmuAGfZCJgMmd0,384
|
61
|
+
dmu_data/plotting/tests/simple.yaml,sha256=Xc59Pjfb3BKMicLVBxODVqomHFupcb5GvefKbKHCQWQ,195
|
62
|
+
dmu_data/plotting/tests/stats.yaml,sha256=fSZjoV-xPnukpCH2OAXsz_SNPjI113qzDg8Ln3spaaA,165
|
63
|
+
dmu_data/plotting/tests/styling.yaml,sha256=ZglA4fG6gr5Q_K2VinwVDPjIitiFizCzxr-KsHw2ERI,370
|
64
|
+
dmu_data/plotting/tests/title.yaml,sha256=bawKp9aGpeRrHzv69BOCbFX8sq9bb3Es9tdsPTE7jIk,333
|
65
|
+
dmu_data/plotting/tests/weights.yaml,sha256=RWQ1KxbCq-uO62WJ2AoY4h5Umc37zG35s-TpKnNMABI,312
|
66
|
+
dmu_data/rfile/friends.yaml,sha256=sEGKFKK0q1U6b9qlfHUFBLZW0FeruR1t2LCOo6Ck1Rg,264
|
67
|
+
dmu_data/stats/fitter/test_simple.yaml,sha256=lBw6igBT57BZnuG3GgoxOiXTMFHfs5LchbI3Ubb8Qz0,1549
|
68
|
+
dmu_data/stats/kde_optimizer/control.json,sha256=EiArsHUAHBmzw4gmaNyOOW1ziYtNhdelIAqc3EH0K_M,1327616
|
69
|
+
dmu_data/stats/kde_optimizer/signal.json,sha256=MocwnYizcKki4dlxEIsWwE8HzY-ZBQaUo-lrCR5N3Tw,1327616
|
70
|
+
dmu_data/stats/parameters/data.yaml,sha256=lNmuolhUQmwB6sxHQvBRm-Kz5MUW_H1qAouynzBiWvs,2087
|
71
|
+
dmu_data/tests/config.json,sha256=QSfx-irgPV-BHAVe1Xe1dgiVkZGPp0fxb9OhXeVaEBg,60
|
72
|
+
dmu_data/tests/config.yaml,sha256=rFTk9PSFOgEVEcGDxr4K9vFIUrCVhbEMUoj683Py1AQ,38
|
73
|
+
dmu_data/tests/pdf_to_tex.txt,sha256=yzzH1L7P2SOFrVxS737Ykg1SlcD0jhrrBwQGsui2oAQ,3854
|
74
|
+
dmu_data/text/transform.toml,sha256=R-832BZalzHZ6c5gD6jtT_Hj8BCsM5vxa1v6oeiwaP4,94
|
75
|
+
dmu_data/text/transform.txt,sha256=EX760da6Vkf-_EPxnQlC5hGSkfFhJCCGCD19NU-1Qto,44
|
76
|
+
dmu_data/text/transform_set.toml,sha256=Jeh7BTz82idqvbOQJtl9-ur56mZkzDn5WtvmIb48LoE,150
|
77
|
+
dmu_data/text/transform_set.txt,sha256=1KivMoP9LxPn9955QrRmOzjEqduEjhTetQ9MXykO5LY,46
|
78
|
+
dmu_data/text/transform_trf.txt,sha256=zxBRTgcSmX7RdqfmWF88W1YqbyNHa4Ccruf1MmnYv2A,74
|
79
|
+
data_manipulation_utilities-0.2.8.dev730.dist-info/METADATA,sha256=p6CwOJiDGK5kQwCGIg9i6AoNvSE9VDKFbGa7OG_od0M,51215
|
80
|
+
data_manipulation_utilities-0.2.8.dev730.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
81
|
+
data_manipulation_utilities-0.2.8.dev730.dist-info/entry_points.txt,sha256=M0C8_u9B_xSmyfemdPwdIBh9QuPIkjhEpG060Y5_Pjw,321
|
82
|
+
data_manipulation_utilities-0.2.8.dev730.dist-info/RECORD,,
|
dmu/plotting/plotter.py
CHANGED
@@ -5,12 +5,12 @@ Module containing plotter class
|
|
5
5
|
import os
|
6
6
|
import json
|
7
7
|
import math
|
8
|
-
from typing import Union
|
9
8
|
|
10
9
|
import numpy
|
11
10
|
import matplotlib.pyplot as plt
|
12
11
|
|
13
|
-
from ROOT import RDataFrame
|
12
|
+
from ROOT import RDataFrame, RDF
|
13
|
+
from omegaconf import DictConfig
|
14
14
|
from dmu.logging.log_store import LogStore
|
15
15
|
|
16
16
|
log = LogStore.add_logger('dmu:plotting:Plotter')
|
@@ -20,16 +20,28 @@ class Plotter:
|
|
20
20
|
Base class of Plotter1D and Plotter2D
|
21
21
|
'''
|
22
22
|
#-------------------------------------
|
23
|
-
def __init__(
|
24
|
-
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
d_rdf: dict|None =None,
|
26
|
+
cfg : dict|DictConfig|None =None):
|
27
|
+
'''
|
28
|
+
Parameters
|
29
|
+
--------------
|
30
|
+
d_rdf: Dictionary where
|
31
|
+
key : Identifier of dataset
|
32
|
+
value: ROOT dataframe representing dataset
|
33
|
+
|
34
|
+
cfg : Dictionary or DictConfig instance holding configuration
|
35
|
+
'''
|
36
|
+
if not isinstance( cfg, (dict,DictConfig)):
|
25
37
|
raise ValueError('Config dictionary not passed')
|
26
38
|
|
27
39
|
if not isinstance(d_rdf, dict):
|
28
40
|
raise ValueError('Dataframe dictionary not passed')
|
29
41
|
|
30
42
|
self._d_cfg = cfg
|
31
|
-
self._d_rdf : dict[str,
|
32
|
-
self._d_wgt :
|
43
|
+
self._d_rdf : dict[str, RDF.RNode] = { name : self._preprocess_rdf(rdf) for name, rdf in d_rdf.items()}
|
44
|
+
self._d_wgt : dict[str, numpy.ndarray|None] | None
|
33
45
|
|
34
46
|
self._title : str = ''
|
35
47
|
#-------------------------------------
|
@@ -68,9 +80,9 @@ class Plotter:
|
|
68
80
|
|
69
81
|
return minx, maxx
|
70
82
|
#-------------------------------------
|
71
|
-
def _preprocess_rdf(self, rdf :
|
83
|
+
def _preprocess_rdf(self, rdf : RDF.RNode) -> RDF.RNode:
|
72
84
|
'''
|
73
|
-
rdf (
|
85
|
+
rdf (RDF.RNode): ROOT dataframe
|
74
86
|
|
75
87
|
returns preprocessed dataframe
|
76
88
|
'''
|
@@ -146,7 +158,7 @@ class Plotter:
|
|
146
158
|
|
147
159
|
return rdf
|
148
160
|
# --------------------------------------------
|
149
|
-
def _print_weights(self, arr_wgt :
|
161
|
+
def _print_weights(self, arr_wgt : numpy.ndarray|None, var : str, sample : str) -> None:
|
150
162
|
if arr_wgt is None:
|
151
163
|
log.debug(f'Not using weights for {sample}:{var}')
|
152
164
|
return
|
@@ -171,7 +183,7 @@ class Plotter:
|
|
171
183
|
|
172
184
|
return fig_size
|
173
185
|
#-------------------------------------
|
174
|
-
def _get_weights(self, var) ->
|
186
|
+
def _get_weights(self, var) -> dict[str, numpy.ndarray|None]| None:
|
175
187
|
d_cfg = self._d_cfg['plots'][var]
|
176
188
|
if 'weights' not in d_cfg:
|
177
189
|
return None
|
@@ -186,7 +198,7 @@ class Plotter:
|
|
186
198
|
|
187
199
|
return d_weight
|
188
200
|
# --------------------------------------------
|
189
|
-
def _read_weights(self, name : str, rdf :
|
201
|
+
def _read_weights(self, name : str, rdf : RDF.RNode) -> numpy.ndarray:
|
190
202
|
v_col = rdf.GetColumnNames()
|
191
203
|
l_col = [ col.c_str() for col in v_col ]
|
192
204
|
|
dmu_data/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,13 @@
|
|
1
|
+
output : /tmp/tests/dmu/ml/cv_diagnostics/from_rdf
|
2
|
+
# Will assume that the target is already in the input dataframe
|
3
|
+
# and will use it, instead of evaluating models
|
4
|
+
score_from_rdf : w
|
5
|
+
correlations:
|
6
|
+
# Variables with respect to which the correlations with the features will be measured
|
7
|
+
target :
|
8
|
+
name : z
|
9
|
+
methods:
|
10
|
+
- Pearson
|
11
|
+
figure:
|
12
|
+
title: Scores from file
|
13
|
+
size : [10, 8]
|
@@ -0,0 +1,33 @@
|
|
1
|
+
output : /tmp/tests/dmu/ml/cv_diagnostics/overlay
|
2
|
+
# Will assume that the target is already in the input dataframe
|
3
|
+
# and will use it, instead of evaluating models
|
4
|
+
score_from_rdf : w
|
5
|
+
correlations:
|
6
|
+
# Variables with respect to which the correlations with the features will be measured
|
7
|
+
target :
|
8
|
+
name : z
|
9
|
+
overlay :
|
10
|
+
wp :
|
11
|
+
- 0.2
|
12
|
+
- 0.5
|
13
|
+
- 0.7
|
14
|
+
- 0.9
|
15
|
+
general:
|
16
|
+
size : [12, 10]
|
17
|
+
saving:
|
18
|
+
plt_dir : /tmp/tests/dmu/ml/cv_diagnostics/overlay
|
19
|
+
plots:
|
20
|
+
z :
|
21
|
+
binning : [-4, 4, 10]
|
22
|
+
yscale : 'linear'
|
23
|
+
labels : ['$z$', 'Entries']
|
24
|
+
normalized : true
|
25
|
+
styling :
|
26
|
+
linestyle: '-'
|
27
|
+
methods:
|
28
|
+
- Pearson
|
29
|
+
- Kendall-$\tau$
|
30
|
+
figure:
|
31
|
+
title : Scores from file
|
32
|
+
size : [12, 10]
|
33
|
+
xlabelsize: 30
|
@@ -0,0 +1,60 @@
|
|
1
|
+
dataset:
|
2
|
+
define :
|
3
|
+
r : z + x
|
4
|
+
nan :
|
5
|
+
x : -3
|
6
|
+
y : -3
|
7
|
+
training :
|
8
|
+
nfold : 3
|
9
|
+
features :
|
10
|
+
- x
|
11
|
+
- y
|
12
|
+
- r
|
13
|
+
rdm_stat : 1
|
14
|
+
hyper :
|
15
|
+
loss : log_loss
|
16
|
+
max_features : sqrt
|
17
|
+
n_estimators : 100
|
18
|
+
max_depth : 5
|
19
|
+
min_samples_split : 2
|
20
|
+
subsample : 0.8
|
21
|
+
learning_rate : 0.1
|
22
|
+
saving:
|
23
|
+
output : /tmp/tests/dmu/ml/train_mva
|
24
|
+
plotting:
|
25
|
+
roc :
|
26
|
+
min : [0.0, 0.0]
|
27
|
+
max : [1.2, 1.2]
|
28
|
+
annotate:
|
29
|
+
sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
|
30
|
+
form : '{:.2f}'
|
31
|
+
color: 'green'
|
32
|
+
xoff : -15
|
33
|
+
yoff : -15
|
34
|
+
size : 10
|
35
|
+
correlation:
|
36
|
+
title : 'Correlation matrix'
|
37
|
+
size : [10, 10]
|
38
|
+
mask_value : 0
|
39
|
+
features:
|
40
|
+
plots:
|
41
|
+
r :
|
42
|
+
binning : [-6, 6, 100]
|
43
|
+
yscale : 'linear'
|
44
|
+
labels : ['$r$', '']
|
45
|
+
w :
|
46
|
+
binning : [-4, 4, 100]
|
47
|
+
yscale : 'linear'
|
48
|
+
labels : ['$w$', '']
|
49
|
+
x :
|
50
|
+
binning : [-4, 4, 100]
|
51
|
+
yscale : 'linear'
|
52
|
+
labels : ['$x$', '']
|
53
|
+
y :
|
54
|
+
binning : [-4, 4, 100]
|
55
|
+
yscale : 'linear'
|
56
|
+
labels : ['$y$', '']
|
57
|
+
z :
|
58
|
+
binning : [-4, 4, 100]
|
59
|
+
yscale : 'linear'
|
60
|
+
labels : ['$z$', '']
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# This config file is used for testing training and evaluation
|
2
|
+
# when there is a variable that is defined in different ways
|
3
|
+
# for the `sig` and `bkg` samples
|
4
|
+
|
5
|
+
dataset:
|
6
|
+
samples:
|
7
|
+
sig :
|
8
|
+
definitions:
|
9
|
+
n : x + y
|
10
|
+
bkg :
|
11
|
+
definitions:
|
12
|
+
n : x - y
|
13
|
+
define :
|
14
|
+
r : z + x
|
15
|
+
nan :
|
16
|
+
n : -3
|
17
|
+
y : -3
|
18
|
+
training :
|
19
|
+
nfold : 3
|
20
|
+
features :
|
21
|
+
- n
|
22
|
+
- y
|
23
|
+
- r
|
24
|
+
rdm_stat : 1
|
25
|
+
hyper :
|
26
|
+
loss : log_loss
|
27
|
+
max_features : sqrt
|
28
|
+
n_estimators : 100
|
29
|
+
max_depth : 5
|
30
|
+
min_samples_split : 2
|
31
|
+
subsample : 0.8
|
32
|
+
learning_rate : 0.1
|
33
|
+
saving:
|
34
|
+
output : /tmp/tests/dmu/ml/train_mva
|
35
|
+
plotting:
|
36
|
+
roc :
|
37
|
+
min : [0.0, 0.0]
|
38
|
+
max : [1.2, 1.2]
|
39
|
+
annotate:
|
40
|
+
sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
|
41
|
+
form : '{:.2f}'
|
42
|
+
color: 'green'
|
43
|
+
xoff : -15
|
44
|
+
yoff : -15
|
45
|
+
size : 10
|
46
|
+
correlation:
|
47
|
+
title : 'Correlation matrix'
|
48
|
+
size : [10, 10]
|
49
|
+
mask_value : 0
|
50
|
+
features:
|
51
|
+
plots:
|
52
|
+
r :
|
53
|
+
binning : [-6, 6, 100]
|
54
|
+
yscale : 'linear'
|
55
|
+
labels : ['$r$', '']
|
56
|
+
n :
|
57
|
+
binning : [-4, 4, 100]
|
58
|
+
yscale : 'linear'
|
59
|
+
labels : ['$n$', '']
|
60
|
+
w :
|
61
|
+
binning : [-4, 4, 100]
|
62
|
+
yscale : 'linear'
|
63
|
+
labels : ['$w$', '']
|
64
|
+
x :
|
65
|
+
binning : [-4, 4, 100]
|
66
|
+
yscale : 'linear'
|
67
|
+
labels : ['$x$', '']
|
68
|
+
y :
|
69
|
+
binning : [-4, 4, 100]
|
70
|
+
yscale : 'linear'
|
71
|
+
labels : ['$y$', '']
|
72
|
+
z :
|
73
|
+
binning : [-4, 4, 100]
|
74
|
+
yscale : 'linear'
|
75
|
+
labels : ['$z$', '']
|
@@ -0,0 +1,87 @@
|
|
1
|
+
dataset:
|
2
|
+
define :
|
3
|
+
r : z + x
|
4
|
+
nan :
|
5
|
+
x : -3
|
6
|
+
y : -3
|
7
|
+
training :
|
8
|
+
nfold : 3
|
9
|
+
features : [x, y, r]
|
10
|
+
rdm_stat : 1
|
11
|
+
hyper :
|
12
|
+
loss : log_loss
|
13
|
+
n_estimators : 100
|
14
|
+
max_depth : 3
|
15
|
+
learning_rate : 0.1
|
16
|
+
min_samples_split : 2
|
17
|
+
saving:
|
18
|
+
output : /tmp/tests/dmu/ml/train_mva
|
19
|
+
plotting:
|
20
|
+
roc :
|
21
|
+
min : [0.0, 0.0]
|
22
|
+
max : [1.2, 1.2]
|
23
|
+
annotate:
|
24
|
+
sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
|
25
|
+
form : '{:.2f}'
|
26
|
+
color: 'green'
|
27
|
+
xoff : -15
|
28
|
+
yoff : -15
|
29
|
+
size : 10
|
30
|
+
correlation:
|
31
|
+
title : 'Correlation matrix'
|
32
|
+
size : [10, 10]
|
33
|
+
mask_value : 0
|
34
|
+
val_dir : '/tmp/tests/dmu/ml/train_mva'
|
35
|
+
features:
|
36
|
+
saving:
|
37
|
+
plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
|
38
|
+
plots:
|
39
|
+
r :
|
40
|
+
binning : [-6, 6, 100]
|
41
|
+
yscale : 'linear'
|
42
|
+
labels : ['$r$', '']
|
43
|
+
w :
|
44
|
+
binning : [-4, 4, 100]
|
45
|
+
yscale : 'linear'
|
46
|
+
labels : ['$w$', '']
|
47
|
+
x :
|
48
|
+
binning : [-4, 4, 100]
|
49
|
+
yscale : 'linear'
|
50
|
+
labels : ['$x$', '']
|
51
|
+
y :
|
52
|
+
binning : [-4, 4, 100]
|
53
|
+
yscale : 'linear'
|
54
|
+
labels : ['$y$', '']
|
55
|
+
z :
|
56
|
+
binning : [-4, 4, 100]
|
57
|
+
yscale : 'linear'
|
58
|
+
labels : ['$z$', '']
|
59
|
+
diagnostics:
|
60
|
+
output : /tmp/tests/dmu/ml/train_mva/diagnostics
|
61
|
+
correlations:
|
62
|
+
target :
|
63
|
+
name : z
|
64
|
+
overlay :
|
65
|
+
wp :
|
66
|
+
- 0.2
|
67
|
+
- 0.6
|
68
|
+
- 0.8
|
69
|
+
- 0.9
|
70
|
+
general:
|
71
|
+
size : [20, 10]
|
72
|
+
saving:
|
73
|
+
plt_dir : /tmp/tests/dmu/ml/train_mva/diagnostics
|
74
|
+
plots:
|
75
|
+
z :
|
76
|
+
binning : [-4, +4, 30]
|
77
|
+
yscale : 'linear'
|
78
|
+
labels : ['z', 'Entries']
|
79
|
+
normalized : true
|
80
|
+
styling :
|
81
|
+
linestyle: '-'
|
82
|
+
methods:
|
83
|
+
- Pearson
|
84
|
+
- Kendall-$\tau$
|
85
|
+
figure:
|
86
|
+
title: Training diagnostics
|
87
|
+
size : [10, 8]
|
@@ -0,0 +1,58 @@
|
|
1
|
+
dataset:
|
2
|
+
define :
|
3
|
+
r : z + preffix.x.suffix
|
4
|
+
nan :
|
5
|
+
preffix.x.suffix : -3
|
6
|
+
y : -3
|
7
|
+
training :
|
8
|
+
nfold : 2
|
9
|
+
features :
|
10
|
+
- preffix.x.suffix
|
11
|
+
- y
|
12
|
+
- r
|
13
|
+
rdm_stat : 1
|
14
|
+
hyper :
|
15
|
+
loss : log_loss
|
16
|
+
n_estimators : 100
|
17
|
+
max_depth : 3
|
18
|
+
learning_rate : 0.1
|
19
|
+
min_samples_split : 2
|
20
|
+
saving:
|
21
|
+
output : /tmp/tests/dmu/ml/train_mva
|
22
|
+
plotting:
|
23
|
+
roc :
|
24
|
+
min : [0.0, 0.0]
|
25
|
+
max : [1.2, 1.2]
|
26
|
+
annotate:
|
27
|
+
sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
|
28
|
+
form : '{:.2f}'
|
29
|
+
color: 'green'
|
30
|
+
xoff : -15
|
31
|
+
yoff : -15
|
32
|
+
size : 10
|
33
|
+
correlation:
|
34
|
+
title : 'Correlation matrix'
|
35
|
+
size : [10, 10]
|
36
|
+
mask_value : 0
|
37
|
+
features:
|
38
|
+
plots:
|
39
|
+
r :
|
40
|
+
binning : [-6, 6, 100]
|
41
|
+
yscale : 'linear'
|
42
|
+
labels : ['$r$', '']
|
43
|
+
w :
|
44
|
+
binning : [-4, 4, 100]
|
45
|
+
yscale : 'linear'
|
46
|
+
labels : ['$w$', '']
|
47
|
+
preffix.x.suffix :
|
48
|
+
binning : [-4, 4, 100]
|
49
|
+
yscale : 'linear'
|
50
|
+
labels : ['$x$', '']
|
51
|
+
y :
|
52
|
+
binning : [-4, 4, 100]
|
53
|
+
yscale : 'linear'
|
54
|
+
labels : ['$y$', '']
|
55
|
+
z :
|
56
|
+
binning : [-4, 4, 100]
|
57
|
+
yscale : 'linear'
|
58
|
+
labels : ['$z$', '']
|
@@ -0,0 +1,24 @@
|
|
1
|
+
saving:
|
2
|
+
plt_dir : /tmp/tests/dmu/plotting
|
3
|
+
selection:
|
4
|
+
cuts:
|
5
|
+
xlow : x > -1.5
|
6
|
+
definitions:
|
7
|
+
z : x + y
|
8
|
+
general:
|
9
|
+
size : [20, 10]
|
10
|
+
plots_2d:
|
11
|
+
- [x, y, weights, 'xy_wgt_lin', false]
|
12
|
+
- [x, z, weights, 'xz_wgt_log', true]
|
13
|
+
- [x, y, null, 'xy_raw_lin', false]
|
14
|
+
- [x, z, null, 'xz_raw_log', true]
|
15
|
+
axes:
|
16
|
+
x :
|
17
|
+
binning : [-3.0, 3.0, 40]
|
18
|
+
label : 'x'
|
19
|
+
y :
|
20
|
+
binning : [-5.0, 8.0, 40]
|
21
|
+
label : 'y'
|
22
|
+
z :
|
23
|
+
binning : [-5.0, 16.0, 40]
|
24
|
+
label : 'z'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
selection:
|
2
|
+
max_ran_entries : 50000
|
3
|
+
cuts:
|
4
|
+
z : 'z > 0'
|
5
|
+
saving:
|
6
|
+
plt_dir : tests/plotting/high_stat
|
7
|
+
definitions:
|
8
|
+
z : 'x + y'
|
9
|
+
plots:
|
10
|
+
x :
|
11
|
+
binning : [-5.0, 8.0, 40]
|
12
|
+
yscale : 'linear'
|
13
|
+
labels : ['x', 'Entries']
|
14
|
+
y :
|
15
|
+
binning : [-5.0, 8.0, 40]
|
16
|
+
yscale : 'linear'
|
17
|
+
labels : ['y', 'Entries']
|
18
|
+
z :
|
19
|
+
binning : [-5.0, 8.0, 40]
|
20
|
+
yscale : 'linear'
|
21
|
+
labels : ['x + y', 'Normalized']
|
22
|
+
normalized : true
|