PyPI - data-manipulation-utilities - Versions diffs - 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

data-manipulation-utilities 0.1.9py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{data_manipulation_utilities-0.1.9.dist-info → data_manipulation_utilities-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.2
 Name: data_manipulation_utilities
-Version: 0.1.9
+Version: 0.2.0
 Description-Content-Type: text/markdown
 Requires-Dist: logzero
 Requires-Dist: PyYAML
 Requires-Dist: scipy
-Requires-Dist: awkward
+Requires-Dist: awkward==2.4.6
 Requires-Dist: tqdm
 Requires-Dist: joblib
 Requires-Dist: scikit-learn
@@ -204,6 +204,33 @@ print_pdf(pdf,
 The `Fitter` class is a wrapper to zfit, use to make fitting easier.
+### Goodness of fits
+Once a fit has been done, one can use `GofCalculator` to get a rough estimate of the fit quality.
+This is done by:
+- Binning the data and PDF.
+- Calculating the reduced $\chi^2$.
+- Using the $\chi^2$ and the number of degrees of freedom to get the p-value.
+This class is used as shown below:
+```python
+from dmu.stats.gof_calculator import GofCalculator
+nll = _get_nll()
+res = Data.minimizer.minimize(nll)
+gcl = GofCalculator(nll, ndof=10)
+gof = gcl.get_gof(kind='pvalue')
+```
+where:
+- `ndof` Is the number of degrees of freedom used in the reduced $\chi^2$ calculation
+It is needed to know how many bins to use to make the histogram. The recommended value is 10.
+- `kind` The argument can be `pvalue` or `chi2/ndof`.
 ### Simplest fit
 ```python

{data_manipulation_utilities-0.1.9.dist-info → data_manipulation_utilities-0.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-data_manipulation_utilities-0.1.9.data/scripts/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
+data_manipulation_utilities-0.2.0.data/scripts/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
 dmu/arrays/utilities.py,sha256=PKoYyybPptA2aU-V3KLnJXBudWxTXu4x1uGdIMQ49HY,1722
 dmu/generic/utilities.py,sha256=0Xnq9t35wuebAqKxbyAiMk1ISB7IcXK4cFH25MT1fgw,1741
 dmu/logging/log_store.py,sha256=umdvjNDuV3LdezbG26b0AiyTglbvkxST19CQu9QATbA,4184
 dmu/ml/cv_classifier.py,sha256=n81m7i2M6Zq96AEd9EZGwXSrbG5m9jkS5RdeXvbsAXU,3712
-dmu/ml/cv_predict.py,sha256=Bqxu-f6qquKJokFljhCzL_kiGcjLJLQFhVBD130fsyw,4893
+dmu/ml/cv_predict.py,sha256=AhCsCnHWPWGIRVTdGS1NxA2m4yH7t2lV_OdALwQAcAE,4927
 dmu/ml/train_mva.py,sha256=d_n-A07DFweikz5nXap4OE_Mqx8VprFT7zbxmnQAbac,9638
 dmu/ml/utilities.py,sha256=Nue7O9zi1QXgjGRPH6wnSAW9jusMQ2ZOSDJzBqJKIi0,3687
 dmu/plotting/plotter.py,sha256=ytMxtzHEY8ZFU0ZKEBE-ROjMszXl5kHTMnQnWe173nU,7208
-dmu/plotting/plotter_1d.py,sha256=O7rTgCBlpCko1RSpj2TzcUIfx9sKoz2jAgw73Pz7Ynk,4472
+dmu/plotting/plotter_1d.py,sha256=g6H2xAgsL9a6vRkpbqHICb3qwV_qMiQPZxxw_oOSf9M,5115
 dmu/plotting/plotter_2d.py,sha256=J-gKnagoHGfJFU7HBrhDFpGYH5Rxy0_zF5l8eE_7ZHE,2944
 dmu/rdataframe/atr_mgr.py,sha256=FdhaQWVpsm4OOe1IRbm7rfrq8VenTNdORyI-lZ2Bs1M,2386
 dmu/rdataframe/utilities.py,sha256=x8r379F2-vZPYzAdMFCn_V4Kx2Tx9t9pn_QHcZ1euew,2756
@@ -29,6 +29,7 @@ dmu_data/plotting/tests/fig_size.yaml,sha256=7ROq49nwZ1A2EbPiySmu6n3G-Jq6YAOkc3d
 dmu_data/plotting/tests/high_stat.yaml,sha256=bLglBLCZK6ft0xMhQ5OltxE76cWsBMPMjO6GG0OkDr8,522
 dmu_data/plotting/tests/name.yaml,sha256=mkcPAVg8wBAmlSbSRQ1bcaMl4vOS6LXMtpqQeDrrtO4,312
 dmu_data/plotting/tests/no_bounds.yaml,sha256=8e1QdphBjz-suDr857DoeUC2DXiy6SE-gvkORJQYv80,257
+dmu_data/plotting/tests/normalized.yaml,sha256=Y0eKtyV5pvlSxvqfsLjytYtv8xYF3HZ5WEdCJdeHGQI,193
 dmu_data/plotting/tests/simple.yaml,sha256=N_TvNBh_2dU0-VYgu_LMrtY0kV_hg2HxVuEoDlr1HX8,138
 dmu_data/plotting/tests/title.yaml,sha256=bawKp9aGpeRrHzv69BOCbFX8sq9bb3Es9tdsPTE7jIk,333
 dmu_data/plotting/tests/weights.yaml,sha256=RWQ1KxbCq-uO62WJ2AoY4h5Umc37zG35s-TpKnNMABI,312
@@ -43,8 +44,8 @@ dmu_scripts/rfile/compare_root_files.py,sha256=T8lDnQxsRNMr37x1Y7YvWD8ySHrJOWZki
 dmu_scripts/rfile/print_trees.py,sha256=Ze4Ccl_iUldl4eVEDVnYBoe4amqBT1fSBR1zN5WSztk,941
 dmu_scripts/ssh/coned.py,sha256=lhilYNHWRCGxC-jtyJ3LQ4oUgWW33B2l1tYCcyHHsR0,4858
 dmu_scripts/text/transform_text.py,sha256=9akj1LB0HAyopOvkLjNOJiptZw5XoOQLe17SlcrGMD0,1456
-data_manipulation_utilities-0.1.9.dist-info/METADATA,sha256=sxu2cZc14f4VfDD2J3MLGmW0jRHXJBpmDspXUt1D_0k,23046
-data_manipulation_utilities-0.1.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-data_manipulation_utilities-0.1.9.dist-info/entry_points.txt,sha256=1TIZDed651KuOH-DgaN5AoBdirKmrKE_oM1b6b7zTUU,270
-data_manipulation_utilities-0.1.9.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
-data_manipulation_utilities-0.1.9.dist-info/RECORD,,
+data_manipulation_utilities-0.2.0.dist-info/METADATA,sha256=TJhGYcpEMs08J-Jw-Q9UT6PivCSnKo5APqPZLoFOM7g,23800
+data_manipulation_utilities-0.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+data_manipulation_utilities-0.2.0.dist-info/entry_points.txt,sha256=1TIZDed651KuOH-DgaN5AoBdirKmrKE_oM1b6b7zTUU,270
+data_manipulation_utilities-0.2.0.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
+data_manipulation_utilities-0.2.0.dist-info/RECORD,,

dmu/ml/cv_predict.py CHANGED Viewed

@@ -10,8 +10,8 @@ import tqdm
 from ROOT import RDataFrame
 import dmu.ml.utilities     as ut
-import dmu.ml.cv_classifier as CVClassifier
+from dmu.ml.cv_classifier  import CVClassifier
 from dmu.logging.log_store import LogStore
 log = LogStore.add_logger('dmu:ml:cv_predict')
@@ -147,6 +147,7 @@ class CVPredict:
             arr_prb = self._predict_with_overlap(df_ft)
         arr_prb = self._patch_probabilities(arr_prb)
+        arr_prb = arr_prb.T[1]
         return arr_prb
 # ---------------------------------------

dmu/plotting/plotter_1d.py CHANGED Viewed

@@ -2,7 +2,6 @@
 Module containing plotter class
 '''
-import hist
 from hist import Hist
 import numpy
@@ -79,6 +78,7 @@ class Plotter1D(Plotter):
         l_bc_all = []
         for name, arr_val in d_data.items():
             arr_wgt      = d_wgt[name] if d_wgt is not None else numpy.ones_like(arr_val)
+            arr_wgt      = self._normalize_weights(arr_wgt, var)
             hst          = Hist.new.Reg(bins=bins, start=minx, stop=maxx, name='x', label=name).Weight()
             hst.fill(x=arr_val, weight=arr_wgt)
             hst.plot(label=name)
@@ -88,6 +88,23 @@ class Plotter1D(Plotter):
         return max_y
     # --------------------------------------------
+    def _normalize_weights(self, arr_wgt : numpy.ndarray, var : str) -> numpy.ndarray:
+        cfg_var = self._d_cfg['plots'][var]
+        if 'normalized' not in cfg_var:
+            log.debug(f'Not normalizing for variable: {var}')
+            return arr_wgt
+        if not cfg_var['normalized']:
+            log.debug(f'Not normalizing for variable: {var}')
+            return arr_wgt
+        log.debug(f'Normalizing for variable: {var}')
+        total   = numpy.sum(arr_wgt)
+        arr_wgt = arr_wgt / total
+        return arr_wgt
+    # --------------------------------------------
     def _style_plot(self, var : str, max_y : float) -> None:
         d_cfg  = self._d_cfg['plots'][var]
         yscale = d_cfg['yscale' ] if 'yscale' in d_cfg else 'linear'

dmu_data/plotting/tests/normalized.yaml ADDED Viewed

@@ -0,0 +1,9 @@
+saving:
+    plt_dir : tests/plotting/normalized
+plots:
+    x :
+      normalized : true
+      binning    : [-5.0, 8.0, 40]
+    y :
+      normalized : false
+      binning    : [-5.0, 8.0, 40]

{data_manipulation_utilities-0.1.9.data → data_manipulation_utilities-0.2.0.data}/scripts/publish RENAMED Viewed

File without changes

{data_manipulation_utilities-0.1.9.dist-info → data_manipulation_utilities-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{data_manipulation_utilities-0.1.9.dist-info → data_manipulation_utilities-0.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{data_manipulation_utilities-0.1.9.dist-info → data_manipulation_utilities-0.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

data-manipulation-utilities 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl

data-manipulation-utilities 0.1.9py3-none-any.whl → 0.2.0py3-none-any.whl