data-manipulation-utilities 0.2.8.dev720__py3-none-any.whl → 0.2.8.dev730__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {data_manipulation_utilities-0.2.8.dev720.dist-info → data_manipulation_utilities-0.2.8.dev730.dist-info}/METADATA +2 -1
  2. data_manipulation_utilities-0.2.8.dev730.dist-info/RECORD +82 -0
  3. dmu/plotting/plotter.py +23 -11
  4. dmu_data/__init__.py +0 -0
  5. dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
  6. dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
  7. dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
  8. dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
  9. dmu_data/ml/tests/train_mva.yaml +60 -0
  10. dmu_data/ml/tests/train_mva_def.yaml +75 -0
  11. dmu_data/ml/tests/train_mva_with_diagnostics.yaml +87 -0
  12. dmu_data/ml/tests/train_mva_with_preffix.yaml +58 -0
  13. dmu_data/plotting/tests/2d.yaml +24 -0
  14. dmu_data/plotting/tests/fig_size.yaml +13 -0
  15. dmu_data/plotting/tests/high_stat.yaml +22 -0
  16. dmu_data/plotting/tests/legend.yaml +12 -0
  17. dmu_data/plotting/tests/line.yaml +15 -0
  18. dmu_data/plotting/tests/name.yaml +14 -0
  19. dmu_data/plotting/tests/no_bounds.yaml +12 -0
  20. dmu_data/plotting/tests/normalized.yaml +9 -0
  21. dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
  22. dmu_data/plotting/tests/plug_stats.yaml +19 -0
  23. dmu_data/plotting/tests/simple.yaml +9 -0
  24. dmu_data/plotting/tests/stats.yaml +9 -0
  25. dmu_data/plotting/tests/styling.yaml +18 -0
  26. dmu_data/plotting/tests/title.yaml +14 -0
  27. dmu_data/plotting/tests/weights.yaml +13 -0
  28. dmu_data/rfile/friends.yaml +13 -0
  29. dmu_data/stats/fitter/test_simple.yaml +28 -0
  30. dmu_data/stats/kde_optimizer/control.json +1 -0
  31. dmu_data/stats/kde_optimizer/signal.json +1 -0
  32. dmu_data/stats/parameters/data.yaml +178 -0
  33. dmu_data/tests/config.json +6 -0
  34. dmu_data/tests/config.yaml +4 -0
  35. dmu_data/tests/pdf_to_tex.txt +34 -0
  36. dmu_data/text/transform.toml +4 -0
  37. dmu_data/text/transform.txt +6 -0
  38. dmu_data/text/transform_set.toml +8 -0
  39. dmu_data/text/transform_set.txt +6 -0
  40. dmu_data/text/transform_trf.txt +12 -0
  41. data_manipulation_utilities-0.2.8.dev720.dist-info/RECORD +0 -45
  42. {data_manipulation_utilities-0.2.8.dev720.dist-info → data_manipulation_utilities-0.2.8.dev730.dist-info}/WHEEL +0 -0
  43. {data_manipulation_utilities-0.2.8.dev720.dist-info → data_manipulation_utilities-0.2.8.dev730.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: data-manipulation-utilities
3
- Version: 0.2.8.dev720
3
+ Version: 0.2.8.dev730
4
4
  Summary: Project storing utilities needed to reduce boilerplate code when analyzing data
5
5
  Requires-Python: >=3.10,<3.13
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1483,6 +1483,7 @@ stats:
1483
1483
  ```
1484
1484
 
1485
1485
  it's up to the user to build this dictionary and load it.
1486
+ this can also be a `DictConfig` from the `OmegaConf` project.
1486
1487
 
1487
1488
  ### Pluggins
1488
1489
 
@@ -0,0 +1,82 @@
1
+ dmu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ dmu/arrays/utilities.py,sha256=PKoYyybPptA2aU-V3KLnJXBudWxTXu4x1uGdIMQ49HY,1722
3
+ dmu/generic/hashing.py,sha256=QR5Gbv6-ANvi5hL232UNMrw9DONpU27BWTynXGxQLGU,1806
4
+ dmu/generic/utilities.py,sha256=0tT93vF_x0q8STRrTD0GvBEpALz-mqE-vJyen4zWCO8,6861
5
+ dmu/generic/version_management.py,sha256=j0ImlAq6SVNjTh3xRsF6G7DSoyr1w8kTRY84dNriGRE,3750
6
+ dmu/logging/log_store.py,sha256=eRSy8Y4fuiDFJK02Z6fq67XQzOrhQ7GMr2LvvJQbJ40,5172
7
+ dmu/logging/messages.py,sha256=Oj3O5EO2KOPtffyVq2P7RPzjpoXtxZ6yXO5HwTftVcM,2903
8
+ dmu/ml/cv_classifier.py,sha256=6rjezMahwL-WzLGKU-fzMzNxJZAGbM7YAbhaZVcJ3F0,4258
9
+ dmu/ml/cv_diagnostics.py,sha256=PLh41mSVE8Kagp9KcuRDN_7tDL9MjPxQzuewY8jDnNo,7600
10
+ dmu/ml/cv_performance.py,sha256=q9sLxIx7GP-dand3tnhHCBJnT6xqssNdRYv_TVjYWUM,1910
11
+ dmu/ml/cv_predict.py,sha256=0sc_OqwOewKvipcMyi3QqkgG30nkpZZjE-SOhHWHMd0,10778
12
+ dmu/ml/train_mva.py,sha256=7KAFX_zOx8MGbYx62U81JbdBkrZvqclSSkgmYvWX-60,34861
13
+ dmu/ml/utilities.py,sha256=A9j3tBh-jfaFdwwLUleo1QnttfawN7XDiQRh4VTvqVY,4597
14
+ dmu/pdataframe/utilities.py,sha256=xl6iLVKUccqVXYjuHsDUZ6UrCKQPw1k8D-f6407Yq30,2742
15
+ dmu/plotting/fwhm.py,sha256=4e8n6624pxWLcOOtayCQ_hDSSMKU21-3UsdmbkX1ojk,1949
16
+ dmu/plotting/matrix.py,sha256=s_5W8O3yXF3u8OX3f4J4hCoxIVZt1TF8S-qJsFBh2Go,5005
17
+ dmu/plotting/plotter.py,sha256=5N5mLdQAqOUs43ukX5mT9nRaYD5dkn_sED5NoZJV5A0,8483
18
+ dmu/plotting/plotter_1d.py,sha256=Kyoyh-QyZLXXqX19wqEDUWCD1nJEvEonGp9nlgEaoZE,10936
19
+ dmu/plotting/plotter_2d.py,sha256=dXC-7Rsquibe5cn7622ryoKpuv7KCAmouIIXwQ_VEFM,3172
20
+ dmu/plotting/utilities.py,sha256=SI9dvtZq2gr-PXVz71KE4o0i09rZOKgqJKD1jzf6KXk,1167
21
+ dmu/rdataframe/atr_mgr.py,sha256=FdhaQWVpsm4OOe1IRbm7rfrq8VenTNdORyI-lZ2Bs1M,2386
22
+ dmu/rdataframe/utilities.py,sha256=cY1Na8HbJ7kB2dwmBagRdsRyCA4ZT_vyIU86ewREj2Y,5322
23
+ dmu/rfile/ddfgetter.py,sha256=0jfNzpv72_NQUKOK5SBsn289rUqVt2BMvuL-Ro5oY7I,3316
24
+ dmu/rfile/rfprinter.py,sha256=mp5jd-oCJAnuokbdmGyL9i6tK2lY72jEfROuBIZ_ums,3941
25
+ dmu/rfile/utilities.py,sha256=XuYY7HuSBj46iSu3c60UYBHtI6KIPoJU_oofuhb-be0,945
26
+ dmu/stats/fit_stats.py,sha256=wzkQT9U32ljGe4azUj1Fj0ECF3zmnH2Ncn0O-_Pl1zQ,4070
27
+ dmu/stats/fitter.py,sha256=rm_fwjkq-0LSjXB_gt3y6BnHoK8Xvd4gHYwKBUJaItQ,19603
28
+ dmu/stats/function.py,sha256=yzi_Fvp_ASsFzbWFivIf-comquy21WoeY7is6dgY0Go,9491
29
+ dmu/stats/gof_calculator.py,sha256=63zNJJGKPy-j_hPNPfu9qNlhrHjYIgJOyL8-VDtbwuI,4894
30
+ dmu/stats/minimizers.py,sha256=db9R2G0SOV-k0BKi6m4EyB_yp6AtZdP23_28B0315oo,7094
31
+ dmu/stats/model_factory.py,sha256=0_o5OmiX0cNhp9_cNqBOYfasBgKlQkQPiy5nqi9qQKA,18966
32
+ dmu/stats/parameters.py,sha256=9lycexTT5ZcxXciiQY9HoJV8O1ahrTEkagd7dYXcfj8,3224
33
+ dmu/stats/utilities.py,sha256=7_tr1j-dl3lLNpxIMWruZs4yUtlNuUTknwGMERpfLhs,17338
34
+ dmu/stats/wdata.py,sha256=IbjZFU9SHTLSYfaBgqamDvqy1K7-3-SaKbU4bGsamK0,6799
35
+ dmu/stats/zfit.py,sha256=aSZj_4IHi9IBthfqlNJeA8YSoMmXO5WipgiKnXKGbnM,286
36
+ dmu/stats/zfit_models.py,sha256=SI61KJ-OG1UAabDICU1iTh6JPKM3giR2ErDraRjkCV8,1842
37
+ dmu/stats/zfit_plotter.py,sha256=gbN5KxhJcP4ItCi98c-fj5_UtvVWL_NA9jkTHiRjvnE,23854
38
+ dmu/testing/utilities.py,sha256=WYlz7Ve5lQjuWhhNL4gWe6_qcByBLV762Lhrc6A0P9E,7421
39
+ dmu/text/transformer.py,sha256=4lrGknbAWRm0-rxbvgzOO-eR1-9bkYk61boJUEV3cQ0,6100
40
+ dmu/workflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ dmu/workflow/cache.py,sha256=CtkGwxuF4UJlD55SmUJcRgWYLsbZOyUvYLI8oTVzk_g,8768
42
+ dmu_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ dmu_data/ml/tests/diagnostics_from_file.yaml,sha256=quvXOPkRducnBsctyape_Rn5_aqMEpPo6nO_UweMORo,404
44
+ dmu_data/ml/tests/diagnostics_from_model.yaml,sha256=rtCQlmGS9ld2xoQJEE35nA07yfRMklEfQEW0w3gRv2A,261
45
+ dmu_data/ml/tests/diagnostics_multiple_methods.yaml,sha256=w8Fpmr7kX1Jsb_h6LL2hiuYKf5lYpckFCpYKzWetbA0,265
46
+ dmu_data/ml/tests/diagnostics_overlay.yaml,sha256=ZVOsxLL8_JQtf41n8Ct-M9Ch10xBwHK54q1fttWPDlE,866
47
+ dmu_data/ml/tests/train_mva.yaml,sha256=KArbTkaj6FqerrUhlkgyBde_4DfkpVza6kCMgMQPi9g,1388
48
+ dmu_data/ml/tests/train_mva_def.yaml,sha256=UyPMo-9nshoB8BHxm9E6S0xd9ngRARdgUq6vnuMlhwI,1765
49
+ dmu_data/ml/tests/train_mva_with_diagnostics.yaml,sha256=-2KKIJ8CiNgMlgpCXkmZRdPEo-sJmAqr01vizfeqkj0,2098
50
+ dmu_data/ml/tests/train_mva_with_preffix.yaml,sha256=Q9SsJSXGbkHWGBvMZIkTZlKNUz5ZcSVBscrKgeMWBvE,1386
51
+ dmu_data/plotting/tests/2d.yaml,sha256=40wKQmNbIabZ7CI8-2QnD6mG1a_B7vEcPdzvehHkseY,520
52
+ dmu_data/plotting/tests/fig_size.yaml,sha256=7ROq49nwZ1A2EbPiySmu6n3G-Jq6YAOkc3d2X3YNZv0,294
53
+ dmu_data/plotting/tests/high_stat.yaml,sha256=bLglBLCZK6ft0xMhQ5OltxE76cWsBMPMjO6GG0OkDr8,522
54
+ dmu_data/plotting/tests/legend.yaml,sha256=wGpj58ig-GOlqbWoN894zrCet2Fj9f5QtY0rig_UC-c,213
55
+ dmu_data/plotting/tests/line.yaml,sha256=EERDeTctbauwqAvmKFXC4Ot3Tgx-8kcIniGbepXwsKs,305
56
+ dmu_data/plotting/tests/name.yaml,sha256=mkcPAVg8wBAmlSbSRQ1bcaMl4vOS6LXMtpqQeDrrtO4,312
57
+ dmu_data/plotting/tests/no_bounds.yaml,sha256=8e1QdphBjz-suDr857DoeUC2DXiy6SE-gvkORJQYv80,257
58
+ dmu_data/plotting/tests/normalized.yaml,sha256=Y0eKtyV5pvlSxvqfsLjytYtv8xYF3HZ5WEdCJdeHGQI,193
59
+ dmu_data/plotting/tests/plug_fwhm.yaml,sha256=xl5LXc9Nt66anM-HOXAxCtlaxWNM7zzIXf1Y6U8M4Wg,449
60
+ dmu_data/plotting/tests/plug_stats.yaml,sha256=ROO8soYXBbZIFYZcGngA_K5XHgIAFCmuAGfZCJgMmd0,384
61
+ dmu_data/plotting/tests/simple.yaml,sha256=Xc59Pjfb3BKMicLVBxODVqomHFupcb5GvefKbKHCQWQ,195
62
+ dmu_data/plotting/tests/stats.yaml,sha256=fSZjoV-xPnukpCH2OAXsz_SNPjI113qzDg8Ln3spaaA,165
63
+ dmu_data/plotting/tests/styling.yaml,sha256=ZglA4fG6gr5Q_K2VinwVDPjIitiFizCzxr-KsHw2ERI,370
64
+ dmu_data/plotting/tests/title.yaml,sha256=bawKp9aGpeRrHzv69BOCbFX8sq9bb3Es9tdsPTE7jIk,333
65
+ dmu_data/plotting/tests/weights.yaml,sha256=RWQ1KxbCq-uO62WJ2AoY4h5Umc37zG35s-TpKnNMABI,312
66
+ dmu_data/rfile/friends.yaml,sha256=sEGKFKK0q1U6b9qlfHUFBLZW0FeruR1t2LCOo6Ck1Rg,264
67
+ dmu_data/stats/fitter/test_simple.yaml,sha256=lBw6igBT57BZnuG3GgoxOiXTMFHfs5LchbI3Ubb8Qz0,1549
68
+ dmu_data/stats/kde_optimizer/control.json,sha256=EiArsHUAHBmzw4gmaNyOOW1ziYtNhdelIAqc3EH0K_M,1327616
69
+ dmu_data/stats/kde_optimizer/signal.json,sha256=MocwnYizcKki4dlxEIsWwE8HzY-ZBQaUo-lrCR5N3Tw,1327616
70
+ dmu_data/stats/parameters/data.yaml,sha256=lNmuolhUQmwB6sxHQvBRm-Kz5MUW_H1qAouynzBiWvs,2087
71
+ dmu_data/tests/config.json,sha256=QSfx-irgPV-BHAVe1Xe1dgiVkZGPp0fxb9OhXeVaEBg,60
72
+ dmu_data/tests/config.yaml,sha256=rFTk9PSFOgEVEcGDxr4K9vFIUrCVhbEMUoj683Py1AQ,38
73
+ dmu_data/tests/pdf_to_tex.txt,sha256=yzzH1L7P2SOFrVxS737Ykg1SlcD0jhrrBwQGsui2oAQ,3854
74
+ dmu_data/text/transform.toml,sha256=R-832BZalzHZ6c5gD6jtT_Hj8BCsM5vxa1v6oeiwaP4,94
75
+ dmu_data/text/transform.txt,sha256=EX760da6Vkf-_EPxnQlC5hGSkfFhJCCGCD19NU-1Qto,44
76
+ dmu_data/text/transform_set.toml,sha256=Jeh7BTz82idqvbOQJtl9-ur56mZkzDn5WtvmIb48LoE,150
77
+ dmu_data/text/transform_set.txt,sha256=1KivMoP9LxPn9955QrRmOzjEqduEjhTetQ9MXykO5LY,46
78
+ dmu_data/text/transform_trf.txt,sha256=zxBRTgcSmX7RdqfmWF88W1YqbyNHa4Ccruf1MmnYv2A,74
79
+ data_manipulation_utilities-0.2.8.dev730.dist-info/METADATA,sha256=p6CwOJiDGK5kQwCGIg9i6AoNvSE9VDKFbGa7OG_od0M,51215
80
+ data_manipulation_utilities-0.2.8.dev730.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
81
+ data_manipulation_utilities-0.2.8.dev730.dist-info/entry_points.txt,sha256=M0C8_u9B_xSmyfemdPwdIBh9QuPIkjhEpG060Y5_Pjw,321
82
+ data_manipulation_utilities-0.2.8.dev730.dist-info/RECORD,,
dmu/plotting/plotter.py CHANGED
@@ -5,12 +5,12 @@ Module containing plotter class
5
5
  import os
6
6
  import json
7
7
  import math
8
- from typing import Union
9
8
 
10
9
  import numpy
11
10
  import matplotlib.pyplot as plt
12
11
 
13
- from ROOT import RDataFrame
12
+ from ROOT import RDataFrame, RDF
13
+ from omegaconf import DictConfig
14
14
  from dmu.logging.log_store import LogStore
15
15
 
16
16
  log = LogStore.add_logger('dmu:plotting:Plotter')
@@ -20,16 +20,28 @@ class Plotter:
20
20
  Base class of Plotter1D and Plotter2D
21
21
  '''
22
22
  #-------------------------------------
23
- def __init__(self, d_rdf=None, cfg=None):
24
- if not isinstance( cfg, dict):
23
+ def __init__(
24
+ self,
25
+ d_rdf: dict|None =None,
26
+ cfg : dict|DictConfig|None =None):
27
+ '''
28
+ Parameters
29
+ --------------
30
+ d_rdf: Dictionary where
31
+ key : Identifier of dataset
32
+ value: ROOT dataframe representing dataset
33
+
34
+ cfg : Dictionary or DictConfig instance holding configuration
35
+ '''
36
+ if not isinstance( cfg, (dict,DictConfig)):
25
37
  raise ValueError('Config dictionary not passed')
26
38
 
27
39
  if not isinstance(d_rdf, dict):
28
40
  raise ValueError('Dataframe dictionary not passed')
29
41
 
30
42
  self._d_cfg = cfg
31
- self._d_rdf : dict[str, RDataFrame] = { name : self._preprocess_rdf(rdf) for name, rdf in d_rdf.items()}
32
- self._d_wgt : Union[dict[str, Union[numpy.ndarray, None]], None]
43
+ self._d_rdf : dict[str, RDF.RNode] = { name : self._preprocess_rdf(rdf) for name, rdf in d_rdf.items()}
44
+ self._d_wgt : dict[str, numpy.ndarray|None] | None
33
45
 
34
46
  self._title : str = ''
35
47
  #-------------------------------------
@@ -68,9 +80,9 @@ class Plotter:
68
80
 
69
81
  return minx, maxx
70
82
  #-------------------------------------
71
- def _preprocess_rdf(self, rdf : RDataFrame) -> RDataFrame:
83
+ def _preprocess_rdf(self, rdf : RDF.RNode) -> RDF.RNode:
72
84
  '''
73
- rdf (RDataFrame): ROOT dataframe
85
+ rdf (RDF.RNode): ROOT dataframe
74
86
 
75
87
  returns preprocessed dataframe
76
88
  '''
@@ -146,7 +158,7 @@ class Plotter:
146
158
 
147
159
  return rdf
148
160
  # --------------------------------------------
149
- def _print_weights(self, arr_wgt : Union[numpy.ndarray, None], var : str, sample : str) -> None:
161
+ def _print_weights(self, arr_wgt : numpy.ndarray|None, var : str, sample : str) -> None:
150
162
  if arr_wgt is None:
151
163
  log.debug(f'Not using weights for {sample}:{var}')
152
164
  return
@@ -171,7 +183,7 @@ class Plotter:
171
183
 
172
184
  return fig_size
173
185
  #-------------------------------------
174
- def _get_weights(self, var) -> Union[dict[str, Union[numpy.ndarray, None]], None]:
186
+ def _get_weights(self, var) -> dict[str, numpy.ndarray|None]| None:
175
187
  d_cfg = self._d_cfg['plots'][var]
176
188
  if 'weights' not in d_cfg:
177
189
  return None
@@ -186,7 +198,7 @@ class Plotter:
186
198
 
187
199
  return d_weight
188
200
  # --------------------------------------------
189
- def _read_weights(self, name : str, rdf : RDataFrame) -> numpy.ndarray:
201
+ def _read_weights(self, name : str, rdf : RDF.RNode) -> numpy.ndarray:
190
202
  v_col = rdf.GetColumnNames()
191
203
  l_col = [ col.c_str() for col in v_col ]
192
204
 
dmu_data/__init__.py ADDED
File without changes
@@ -0,0 +1,13 @@
1
+ output : /tmp/tests/dmu/ml/cv_diagnostics/from_rdf
2
+ # Will assume that the target is already in the input dataframe
3
+ # and will use it, instead of evaluating models
4
+ score_from_rdf : w
5
+ correlations:
6
+ # Variables with respect to which the correlations with the features will be measured
7
+ target :
8
+ name : z
9
+ methods:
10
+ - Pearson
11
+ figure:
12
+ title: Scores from file
13
+ size : [10, 8]
@@ -0,0 +1,10 @@
1
+ output : /tmp/tests/dmu/ml/cv_diagnostics/from_model
2
+ correlations:
3
+ # Variables with respect to which the correlations with the features will be measured
4
+ target :
5
+ name : z
6
+ methods:
7
+ - Pearson
8
+ figure:
9
+ size : [10, 8]
10
+ rotate: 90
@@ -0,0 +1,10 @@
1
+ output : /tmp/tests/dmu/ml/cv_diagnostics/multiple_methods
2
+ correlations:
3
+ # Variables with respect to which the correlations with the features will be measured
4
+ target :
5
+ name : z
6
+ methods:
7
+ - Pearson
8
+ - Kendall-$\tau$
9
+ figure:
10
+ size : [10, 8]
@@ -0,0 +1,33 @@
1
+ output : /tmp/tests/dmu/ml/cv_diagnostics/overlay
2
+ # Will assume that the target is already in the input dataframe
3
+ # and will use it, instead of evaluating models
4
+ score_from_rdf : w
5
+ correlations:
6
+ # Variables with respect to which the correlations with the features will be measured
7
+ target :
8
+ name : z
9
+ overlay :
10
+ wp :
11
+ - 0.2
12
+ - 0.5
13
+ - 0.7
14
+ - 0.9
15
+ general:
16
+ size : [12, 10]
17
+ saving:
18
+ plt_dir : /tmp/tests/dmu/ml/cv_diagnostics/overlay
19
+ plots:
20
+ z :
21
+ binning : [-4, 4, 10]
22
+ yscale : 'linear'
23
+ labels : ['$z$', 'Entries']
24
+ normalized : true
25
+ styling :
26
+ linestyle: '-'
27
+ methods:
28
+ - Pearson
29
+ - Kendall-$\tau$
30
+ figure:
31
+ title : Scores from file
32
+ size : [12, 10]
33
+ xlabelsize: 30
@@ -0,0 +1,60 @@
1
+ dataset:
2
+ define :
3
+ r : z + x
4
+ nan :
5
+ x : -3
6
+ y : -3
7
+ training :
8
+ nfold : 3
9
+ features :
10
+ - x
11
+ - y
12
+ - r
13
+ rdm_stat : 1
14
+ hyper :
15
+ loss : log_loss
16
+ max_features : sqrt
17
+ n_estimators : 100
18
+ max_depth : 5
19
+ min_samples_split : 2
20
+ subsample : 0.8
21
+ learning_rate : 0.1
22
+ saving:
23
+ output : /tmp/tests/dmu/ml/train_mva
24
+ plotting:
25
+ roc :
26
+ min : [0.0, 0.0]
27
+ max : [1.2, 1.2]
28
+ annotate:
29
+ sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
30
+ form : '{:.2f}'
31
+ color: 'green'
32
+ xoff : -15
33
+ yoff : -15
34
+ size : 10
35
+ correlation:
36
+ title : 'Correlation matrix'
37
+ size : [10, 10]
38
+ mask_value : 0
39
+ features:
40
+ plots:
41
+ r :
42
+ binning : [-6, 6, 100]
43
+ yscale : 'linear'
44
+ labels : ['$r$', '']
45
+ w :
46
+ binning : [-4, 4, 100]
47
+ yscale : 'linear'
48
+ labels : ['$w$', '']
49
+ x :
50
+ binning : [-4, 4, 100]
51
+ yscale : 'linear'
52
+ labels : ['$x$', '']
53
+ y :
54
+ binning : [-4, 4, 100]
55
+ yscale : 'linear'
56
+ labels : ['$y$', '']
57
+ z :
58
+ binning : [-4, 4, 100]
59
+ yscale : 'linear'
60
+ labels : ['$z$', '']
@@ -0,0 +1,75 @@
1
+ # This config file is used for testing training and evaluation
2
+ # when there is a variable that is defined in different ways
3
+ # for the `sig` and `bkg` samples
4
+
5
+ dataset:
6
+ samples:
7
+ sig :
8
+ definitions:
9
+ n : x + y
10
+ bkg :
11
+ definitions:
12
+ n : x - y
13
+ define :
14
+ r : z + x
15
+ nan :
16
+ n : -3
17
+ y : -3
18
+ training :
19
+ nfold : 3
20
+ features :
21
+ - n
22
+ - y
23
+ - r
24
+ rdm_stat : 1
25
+ hyper :
26
+ loss : log_loss
27
+ max_features : sqrt
28
+ n_estimators : 100
29
+ max_depth : 5
30
+ min_samples_split : 2
31
+ subsample : 0.8
32
+ learning_rate : 0.1
33
+ saving:
34
+ output : /tmp/tests/dmu/ml/train_mva
35
+ plotting:
36
+ roc :
37
+ min : [0.0, 0.0]
38
+ max : [1.2, 1.2]
39
+ annotate:
40
+ sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
41
+ form : '{:.2f}'
42
+ color: 'green'
43
+ xoff : -15
44
+ yoff : -15
45
+ size : 10
46
+ correlation:
47
+ title : 'Correlation matrix'
48
+ size : [10, 10]
49
+ mask_value : 0
50
+ features:
51
+ plots:
52
+ r :
53
+ binning : [-6, 6, 100]
54
+ yscale : 'linear'
55
+ labels : ['$r$', '']
56
+ n :
57
+ binning : [-4, 4, 100]
58
+ yscale : 'linear'
59
+ labels : ['$n$', '']
60
+ w :
61
+ binning : [-4, 4, 100]
62
+ yscale : 'linear'
63
+ labels : ['$w$', '']
64
+ x :
65
+ binning : [-4, 4, 100]
66
+ yscale : 'linear'
67
+ labels : ['$x$', '']
68
+ y :
69
+ binning : [-4, 4, 100]
70
+ yscale : 'linear'
71
+ labels : ['$y$', '']
72
+ z :
73
+ binning : [-4, 4, 100]
74
+ yscale : 'linear'
75
+ labels : ['$z$', '']
@@ -0,0 +1,87 @@
1
+ dataset:
2
+ define :
3
+ r : z + x
4
+ nan :
5
+ x : -3
6
+ y : -3
7
+ training :
8
+ nfold : 3
9
+ features : [x, y, r]
10
+ rdm_stat : 1
11
+ hyper :
12
+ loss : log_loss
13
+ n_estimators : 100
14
+ max_depth : 3
15
+ learning_rate : 0.1
16
+ min_samples_split : 2
17
+ saving:
18
+ output : /tmp/tests/dmu/ml/train_mva
19
+ plotting:
20
+ roc :
21
+ min : [0.0, 0.0]
22
+ max : [1.2, 1.2]
23
+ annotate:
24
+ sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
25
+ form : '{:.2f}'
26
+ color: 'green'
27
+ xoff : -15
28
+ yoff : -15
29
+ size : 10
30
+ correlation:
31
+ title : 'Correlation matrix'
32
+ size : [10, 10]
33
+ mask_value : 0
34
+ val_dir : '/tmp/tests/dmu/ml/train_mva'
35
+ features:
36
+ saving:
37
+ plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
38
+ plots:
39
+ r :
40
+ binning : [-6, 6, 100]
41
+ yscale : 'linear'
42
+ labels : ['$r$', '']
43
+ w :
44
+ binning : [-4, 4, 100]
45
+ yscale : 'linear'
46
+ labels : ['$w$', '']
47
+ x :
48
+ binning : [-4, 4, 100]
49
+ yscale : 'linear'
50
+ labels : ['$x$', '']
51
+ y :
52
+ binning : [-4, 4, 100]
53
+ yscale : 'linear'
54
+ labels : ['$y$', '']
55
+ z :
56
+ binning : [-4, 4, 100]
57
+ yscale : 'linear'
58
+ labels : ['$z$', '']
59
+ diagnostics:
60
+ output : /tmp/tests/dmu/ml/train_mva/diagnostics
61
+ correlations:
62
+ target :
63
+ name : z
64
+ overlay :
65
+ wp :
66
+ - 0.2
67
+ - 0.6
68
+ - 0.8
69
+ - 0.9
70
+ general:
71
+ size : [20, 10]
72
+ saving:
73
+ plt_dir : /tmp/tests/dmu/ml/train_mva/diagnostics
74
+ plots:
75
+ z :
76
+ binning : [-4, +4, 30]
77
+ yscale : 'linear'
78
+ labels : ['z', 'Entries']
79
+ normalized : true
80
+ styling :
81
+ linestyle: '-'
82
+ methods:
83
+ - Pearson
84
+ - Kendall-$\tau$
85
+ figure:
86
+ title: Training diagnostics
87
+ size : [10, 8]
@@ -0,0 +1,58 @@
1
+ dataset:
2
+ define :
3
+ r : z + preffix.x.suffix
4
+ nan :
5
+ preffix.x.suffix : -3
6
+ y : -3
7
+ training :
8
+ nfold : 2
9
+ features :
10
+ - preffix.x.suffix
11
+ - y
12
+ - r
13
+ rdm_stat : 1
14
+ hyper :
15
+ loss : log_loss
16
+ n_estimators : 100
17
+ max_depth : 3
18
+ learning_rate : 0.1
19
+ min_samples_split : 2
20
+ saving:
21
+ output : /tmp/tests/dmu/ml/train_mva
22
+ plotting:
23
+ roc :
24
+ min : [0.0, 0.0]
25
+ max : [1.2, 1.2]
26
+ annotate:
27
+ sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
28
+ form : '{:.2f}'
29
+ color: 'green'
30
+ xoff : -15
31
+ yoff : -15
32
+ size : 10
33
+ correlation:
34
+ title : 'Correlation matrix'
35
+ size : [10, 10]
36
+ mask_value : 0
37
+ features:
38
+ plots:
39
+ r :
40
+ binning : [-6, 6, 100]
41
+ yscale : 'linear'
42
+ labels : ['$r$', '']
43
+ w :
44
+ binning : [-4, 4, 100]
45
+ yscale : 'linear'
46
+ labels : ['$w$', '']
47
+ preffix.x.suffix :
48
+ binning : [-4, 4, 100]
49
+ yscale : 'linear'
50
+ labels : ['$x$', '']
51
+ y :
52
+ binning : [-4, 4, 100]
53
+ yscale : 'linear'
54
+ labels : ['$y$', '']
55
+ z :
56
+ binning : [-4, 4, 100]
57
+ yscale : 'linear'
58
+ labels : ['$z$', '']
@@ -0,0 +1,24 @@
1
+ saving:
2
+ plt_dir : /tmp/tests/dmu/plotting
3
+ selection:
4
+ cuts:
5
+ xlow : x > -1.5
6
+ definitions:
7
+ z : x + y
8
+ general:
9
+ size : [20, 10]
10
+ plots_2d:
11
+ - [x, y, weights, 'xy_wgt_lin', false]
12
+ - [x, z, weights, 'xz_wgt_log', true]
13
+ - [x, y, null, 'xy_raw_lin', false]
14
+ - [x, z, null, 'xz_raw_log', true]
15
+ axes:
16
+ x :
17
+ binning : [-3.0, 3.0, 40]
18
+ label : 'x'
19
+ y :
20
+ binning : [-5.0, 8.0, 40]
21
+ label : 'y'
22
+ z :
23
+ binning : [-5.0, 16.0, 40]
24
+ label : 'z'
@@ -0,0 +1,13 @@
1
+ saving:
2
+ plt_dir : tests/plotting/fig_size
3
+ general:
4
+ size : [20, 10]
5
+ plots:
6
+ x :
7
+ binning : [-5.0, 8.0, 40]
8
+ yscale : 'linear'
9
+ labels : ['x', 'Entries']
10
+ y :
11
+ binning : [-5.0, 8.0, 40]
12
+ yscale : 'linear'
13
+ labels : ['y', 'Entries']
@@ -0,0 +1,22 @@
1
+ selection:
2
+ max_ran_entries : 50000
3
+ cuts:
4
+ z : 'z > 0'
5
+ saving:
6
+ plt_dir : tests/plotting/high_stat
7
+ definitions:
8
+ z : 'x + y'
9
+ plots:
10
+ x :
11
+ binning : [-5.0, 8.0, 40]
12
+ yscale : 'linear'
13
+ labels : ['x', 'Entries']
14
+ y :
15
+ binning : [-5.0, 8.0, 40]
16
+ yscale : 'linear'
17
+ labels : ['y', 'Entries']
18
+ z :
19
+ binning : [-5.0, 8.0, 40]
20
+ yscale : 'linear'
21
+ labels : ['x + y', 'Normalized']
22
+ normalized : true
@@ -0,0 +1,12 @@
1
+ saving:
2
+ plt_dir : tests/plotting/legend
3
+ general:
4
+ size : [20, 10]
5
+ plots:
6
+ x :
7
+ binning : [-5.0, 8.0, 40]
8
+ y :
9
+ binning : [-5.0, 8.0, 40]
10
+ style:
11
+ legend:
12
+ bbox_to_anchor : [1.2, 1]
@@ -0,0 +1,15 @@
1
+ saving:
2
+ plt_dir : tests/plotting/line
3
+ plots:
4
+ x :
5
+ binning : [-5.0, 8.0, 40]
6
+ title : x distribution
7
+ vline :
8
+ x : 0
9
+ label : label
10
+ ls : --
11
+ c : blue
12
+ lw : 1
13
+ y :
14
+ binning : [-5.0, 8.0, 40]
15
+ title : y distribution
@@ -0,0 +1,14 @@
1
+ saving:
2
+ plt_dir : tests/plotting/name
3
+
4
+ plots:
5
+ x :
6
+ binning : [-5.0, 8.0, 40]
7
+ yscale : 'linear'
8
+ labels : ['x', 'Entries']
9
+ name : 'xvar'
10
+ y :
11
+ binning : [-5.0, 8.0, 40]
12
+ yscale : 'linear'
13
+ labels : ['y', 'Entries']
14
+ name : 'yvar'
@@ -0,0 +1,12 @@
1
+ saving:
2
+ plt_dir : tests/plotting/no_bounds
3
+
4
+ plots:
5
+ x :
6
+ binning : [1, 1, 40]
7
+ yscale : 'linear'
8
+ labels : ['x', 'Entries']
9
+ y :
10
+ binning : [1, 1, 40]
11
+ yscale : 'linear'
12
+ labels : ['y', 'Entries']
@@ -0,0 +1,9 @@
1
+ saving:
2
+ plt_dir : tests/plotting/normalized
3
+ plots:
4
+ x :
5
+ normalized : true
6
+ binning : [-5.0, 8.0, 40]
7
+ y :
8
+ normalized : false
9
+ binning : [-5.0, 8.0, 40]