data-manipulation-utilities 0.1.9__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.1.9/src/data_manipulation_utilities.egg-info → data_manipulation_utilities-0.2.0}/PKG-INFO +29 -2
- data_manipulation_utilities-0.1.9/PKG-INFO → data_manipulation_utilities-0.2.0/README.md +27 -20
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/pyproject.toml +2 -2
- data_manipulation_utilities-0.1.9/README.md → data_manipulation_utilities-0.2.0/src/data_manipulation_utilities.egg-info/PKG-INFO +47 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/data_manipulation_utilities.egg-info/SOURCES.txt +1 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/data_manipulation_utilities.egg-info/requires.txt +1 -1
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/ml/cv_predict.py +2 -1
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/plotting/plotter_1d.py +18 -1
- data_manipulation_utilities-0.2.0/src/dmu_data/plotting/tests/normalized.yaml +9 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/setup.cfg +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/data_manipulation_utilities.egg-info/dependency_links.txt +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/data_manipulation_utilities.egg-info/entry_points.txt +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/data_manipulation_utilities.egg-info/top_level.txt +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/arrays/utilities.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/generic/utilities.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/logging/log_store.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/ml/cv_classifier.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/ml/train_mva.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/ml/utilities.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/plotting/plotter.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/plotting/plotter_2d.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/rdataframe/atr_mgr.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/rdataframe/utilities.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/rfile/rfprinter.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/rfile/utilities.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/fitter.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/function.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/gof_calculator.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/minimizers.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/model_factory.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/utilities.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/zfit_plotter.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/testing/utilities.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/text/transformer.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/__init__.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/ml/tests/train_mva.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/plotting/tests/2d.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/plotting/tests/fig_size.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/plotting/tests/high_stat.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/plotting/tests/name.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/plotting/tests/no_bounds.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/plotting/tests/simple.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/plotting/tests/title.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/plotting/tests/weights.yaml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/text/transform.toml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/text/transform.txt +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/text/transform_set.toml +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/text/transform_set.txt +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/text/transform_trf.txt +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_scripts/git/publish +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_scripts/physics/check_truth.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_scripts/rfile/compare_root_files.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_scripts/rfile/print_trees.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_scripts/ssh/coned.py +0 -0
- {data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_scripts/text/transform_text.py +0 -0
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: data_manipulation_utilities
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Description-Content-Type: text/markdown
|
5
5
|
Requires-Dist: logzero
|
6
6
|
Requires-Dist: PyYAML
|
7
7
|
Requires-Dist: scipy
|
8
|
-
Requires-Dist: awkward
|
8
|
+
Requires-Dist: awkward==2.4.6
|
9
9
|
Requires-Dist: tqdm
|
10
10
|
Requires-Dist: joblib
|
11
11
|
Requires-Dist: scikit-learn
|
@@ -204,6 +204,33 @@ print_pdf(pdf,
|
|
204
204
|
|
205
205
|
The `Fitter` class is a wrapper to zfit, use to make fitting easier.
|
206
206
|
|
207
|
+
### Goodness of fits
|
208
|
+
|
209
|
+
Once a fit has been done, one can use `GofCalculator` to get a rough estimate of the fit quality.
|
210
|
+
This is done by:
|
211
|
+
|
212
|
+
- Binning the data and PDF.
|
213
|
+
- Calculating the reduced $\chi^2$.
|
214
|
+
- Using the $\chi^2$ and the number of degrees of freedom to get the p-value.
|
215
|
+
|
216
|
+
This class is used as shown below:
|
217
|
+
|
218
|
+
```python
|
219
|
+
from dmu.stats.gof_calculator import GofCalculator
|
220
|
+
|
221
|
+
nll = _get_nll()
|
222
|
+
res = Data.minimizer.minimize(nll)
|
223
|
+
|
224
|
+
gcl = GofCalculator(nll, ndof=10)
|
225
|
+
gof = gcl.get_gof(kind='pvalue')
|
226
|
+
```
|
227
|
+
|
228
|
+
where:
|
229
|
+
|
230
|
+
- `ndof` Is the number of degrees of freedom used in the reduced $\chi^2$ calculation
|
231
|
+
It is needed to know how many bins to use to make the histogram. The recommended value is 10.
|
232
|
+
- `kind` The argument can be `pvalue` or `chi2/ndof`.
|
233
|
+
|
207
234
|
### Simplest fit
|
208
235
|
|
209
236
|
```python
|
@@ -1,23 +1,3 @@
|
|
1
|
-
Metadata-Version: 2.2
|
2
|
-
Name: data_manipulation_utilities
|
3
|
-
Version: 0.1.9
|
4
|
-
Description-Content-Type: text/markdown
|
5
|
-
Requires-Dist: logzero
|
6
|
-
Requires-Dist: PyYAML
|
7
|
-
Requires-Dist: scipy
|
8
|
-
Requires-Dist: awkward
|
9
|
-
Requires-Dist: tqdm
|
10
|
-
Requires-Dist: joblib
|
11
|
-
Requires-Dist: scikit-learn
|
12
|
-
Requires-Dist: toml
|
13
|
-
Requires-Dist: numpy
|
14
|
-
Requires-Dist: matplotlib
|
15
|
-
Requires-Dist: mplhep
|
16
|
-
Requires-Dist: hist[plot]
|
17
|
-
Requires-Dist: pandas
|
18
|
-
Provides-Extra: dev
|
19
|
-
Requires-Dist: pytest; extra == "dev"
|
20
|
-
|
21
1
|
# D(ata) M(anipulation) U(tilities)
|
22
2
|
|
23
3
|
These are tools that can be used for different data analysis tasks.
|
@@ -204,6 +184,33 @@ print_pdf(pdf,
|
|
204
184
|
|
205
185
|
The `Fitter` class is a wrapper to zfit, use to make fitting easier.
|
206
186
|
|
187
|
+
### Goodness of fits
|
188
|
+
|
189
|
+
Once a fit has been done, one can use `GofCalculator` to get a rough estimate of the fit quality.
|
190
|
+
This is done by:
|
191
|
+
|
192
|
+
- Binning the data and PDF.
|
193
|
+
- Calculating the reduced $\chi^2$.
|
194
|
+
- Using the $\chi^2$ and the number of degrees of freedom to get the p-value.
|
195
|
+
|
196
|
+
This class is used as shown below:
|
197
|
+
|
198
|
+
```python
|
199
|
+
from dmu.stats.gof_calculator import GofCalculator
|
200
|
+
|
201
|
+
nll = _get_nll()
|
202
|
+
res = Data.minimizer.minimize(nll)
|
203
|
+
|
204
|
+
gcl = GofCalculator(nll, ndof=10)
|
205
|
+
gof = gcl.get_gof(kind='pvalue')
|
206
|
+
```
|
207
|
+
|
208
|
+
where:
|
209
|
+
|
210
|
+
- `ndof` Is the number of degrees of freedom used in the reduced $\chi^2$ calculation
|
211
|
+
It is needed to know how many bins to use to make the histogram. The recommended value is 10.
|
212
|
+
- `kind` The argument can be `pvalue` or `chi2/ndof`.
|
213
|
+
|
207
214
|
### Simplest fit
|
208
215
|
|
209
216
|
```python
|
@@ -1,3 +1,23 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: data_manipulation_utilities
|
3
|
+
Version: 0.2.0
|
4
|
+
Description-Content-Type: text/markdown
|
5
|
+
Requires-Dist: logzero
|
6
|
+
Requires-Dist: PyYAML
|
7
|
+
Requires-Dist: scipy
|
8
|
+
Requires-Dist: awkward==2.4.6
|
9
|
+
Requires-Dist: tqdm
|
10
|
+
Requires-Dist: joblib
|
11
|
+
Requires-Dist: scikit-learn
|
12
|
+
Requires-Dist: toml
|
13
|
+
Requires-Dist: numpy
|
14
|
+
Requires-Dist: matplotlib
|
15
|
+
Requires-Dist: mplhep
|
16
|
+
Requires-Dist: hist[plot]
|
17
|
+
Requires-Dist: pandas
|
18
|
+
Provides-Extra: dev
|
19
|
+
Requires-Dist: pytest; extra == "dev"
|
20
|
+
|
1
21
|
# D(ata) M(anipulation) U(tilities)
|
2
22
|
|
3
23
|
These are tools that can be used for different data analysis tasks.
|
@@ -184,6 +204,33 @@ print_pdf(pdf,
|
|
184
204
|
|
185
205
|
The `Fitter` class is a wrapper to zfit, use to make fitting easier.
|
186
206
|
|
207
|
+
### Goodness of fits
|
208
|
+
|
209
|
+
Once a fit has been done, one can use `GofCalculator` to get a rough estimate of the fit quality.
|
210
|
+
This is done by:
|
211
|
+
|
212
|
+
- Binning the data and PDF.
|
213
|
+
- Calculating the reduced $\chi^2$.
|
214
|
+
- Using the $\chi^2$ and the number of degrees of freedom to get the p-value.
|
215
|
+
|
216
|
+
This class is used as shown below:
|
217
|
+
|
218
|
+
```python
|
219
|
+
from dmu.stats.gof_calculator import GofCalculator
|
220
|
+
|
221
|
+
nll = _get_nll()
|
222
|
+
res = Data.minimizer.minimize(nll)
|
223
|
+
|
224
|
+
gcl = GofCalculator(nll, ndof=10)
|
225
|
+
gof = gcl.get_gof(kind='pvalue')
|
226
|
+
```
|
227
|
+
|
228
|
+
where:
|
229
|
+
|
230
|
+
- `ndof` Is the number of degrees of freedom used in the reduced $\chi^2$ calculation
|
231
|
+
It is needed to know how many bins to use to make the histogram. The recommended value is 10.
|
232
|
+
- `kind` The argument can be `pvalue` or `chi2/ndof`.
|
233
|
+
|
187
234
|
### Simplest fit
|
188
235
|
|
189
236
|
```python
|
@@ -36,6 +36,7 @@ src/dmu_data/plotting/tests/fig_size.yaml
|
|
36
36
|
src/dmu_data/plotting/tests/high_stat.yaml
|
37
37
|
src/dmu_data/plotting/tests/name.yaml
|
38
38
|
src/dmu_data/plotting/tests/no_bounds.yaml
|
39
|
+
src/dmu_data/plotting/tests/normalized.yaml
|
39
40
|
src/dmu_data/plotting/tests/simple.yaml
|
40
41
|
src/dmu_data/plotting/tests/title.yaml
|
41
42
|
src/dmu_data/plotting/tests/weights.yaml
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/ml/cv_predict.py
RENAMED
@@ -10,8 +10,8 @@ import tqdm
|
|
10
10
|
from ROOT import RDataFrame
|
11
11
|
|
12
12
|
import dmu.ml.utilities as ut
|
13
|
-
import dmu.ml.cv_classifier as CVClassifier
|
14
13
|
|
14
|
+
from dmu.ml.cv_classifier import CVClassifier
|
15
15
|
from dmu.logging.log_store import LogStore
|
16
16
|
|
17
17
|
log = LogStore.add_logger('dmu:ml:cv_predict')
|
@@ -147,6 +147,7 @@ class CVPredict:
|
|
147
147
|
arr_prb = self._predict_with_overlap(df_ft)
|
148
148
|
|
149
149
|
arr_prb = self._patch_probabilities(arr_prb)
|
150
|
+
arr_prb = arr_prb.T[1]
|
150
151
|
|
151
152
|
return arr_prb
|
152
153
|
# ---------------------------------------
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Module containing plotter class
|
3
3
|
'''
|
4
4
|
|
5
|
-
import hist
|
6
5
|
from hist import Hist
|
7
6
|
|
8
7
|
import numpy
|
@@ -79,6 +78,7 @@ class Plotter1D(Plotter):
|
|
79
78
|
l_bc_all = []
|
80
79
|
for name, arr_val in d_data.items():
|
81
80
|
arr_wgt = d_wgt[name] if d_wgt is not None else numpy.ones_like(arr_val)
|
81
|
+
arr_wgt = self._normalize_weights(arr_wgt, var)
|
82
82
|
hst = Hist.new.Reg(bins=bins, start=minx, stop=maxx, name='x', label=name).Weight()
|
83
83
|
hst.fill(x=arr_val, weight=arr_wgt)
|
84
84
|
hst.plot(label=name)
|
@@ -88,6 +88,23 @@ class Plotter1D(Plotter):
|
|
88
88
|
|
89
89
|
return max_y
|
90
90
|
# --------------------------------------------
|
91
|
+
def _normalize_weights(self, arr_wgt : numpy.ndarray, var : str) -> numpy.ndarray:
|
92
|
+
cfg_var = self._d_cfg['plots'][var]
|
93
|
+
if 'normalized' not in cfg_var:
|
94
|
+
log.debug(f'Not normalizing for variable: {var}')
|
95
|
+
return arr_wgt
|
96
|
+
|
97
|
+
if not cfg_var['normalized']:
|
98
|
+
log.debug(f'Not normalizing for variable: {var}')
|
99
|
+
return arr_wgt
|
100
|
+
|
101
|
+
log.debug(f'Normalizing for variable: {var}')
|
102
|
+
total = numpy.sum(arr_wgt)
|
103
|
+
arr_wgt = arr_wgt / total
|
104
|
+
|
105
|
+
return arr_wgt
|
106
|
+
# --------------------------------------------
|
107
|
+
|
91
108
|
def _style_plot(self, var : str, max_y : float) -> None:
|
92
109
|
d_cfg = self._d_cfg['plots'][var]
|
93
110
|
yscale = d_cfg['yscale' ] if 'yscale' in d_cfg else 'linear'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/arrays/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/generic/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/logging/log_store.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/ml/cv_classifier.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/ml/train_mva.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/ml/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/plotting/plotter.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/rfile/rfprinter.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/rfile/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/fitter.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/function.py
RENAMED
File without changes
|
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/minimizers.py
RENAMED
File without changes
|
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/stats/utilities.py
RENAMED
File without changes
|
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/testing/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu/text/transformer.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_data/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_scripts/git/publish
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{data_manipulation_utilities-0.1.9 → data_manipulation_utilities-0.2.0}/src/dmu_scripts/ssh/coned.py
RENAMED
File without changes
|
File without changes
|