PyPI - data-manipulation-utilities - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

data-manipulation-utilities 0.2.6py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/METADATA +177 -8
{data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/RECORD +30 -18
{data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/WHEEL +1 -1
dmu/generic/hashing.py +44 -0
dmu/generic/utilities.py +14 -1
dmu/generic/version_management.py +3 -5
dmu/ml/cv_diagnostics.py +221 -0
dmu/ml/train_mva.py +124 -31
dmu/pdataframe/utilities.py +36 -3
dmu/plotting/fwhm.py +64 -0
dmu/plotting/plotter.py +2 -0
dmu/plotting/plotter_1d.py +87 -6
dmu/stats/fitter.py +1 -1
dmu/stats/model_factory.py +189 -25
dmu/stats/zfit_models.py +68 -0
dmu/stats/zfit_plotter.py +29 -21
dmu/testing/utilities.py +31 -4
dmu_data/ml/tests/diagnostics_from_file.yaml +13 -0
dmu_data/ml/tests/diagnostics_from_model.yaml +10 -0
dmu_data/ml/tests/diagnostics_multiple_methods.yaml +10 -0
dmu_data/ml/tests/diagnostics_overlay.yaml +33 -0
dmu_data/ml/tests/train_mva.yaml +15 -9
dmu_data/ml/tests/train_mva_with_diagnostics.yaml +82 -0
dmu_data/plotting/tests/plug_fwhm.yaml +24 -0
dmu_data/plotting/tests/plug_stats.yaml +19 -0
dmu_data/plotting/tests/simple.yaml +4 -3
dmu_data/plotting/tests/styling.yaml +11 -0
{data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.7.data}/scripts/publish +0 -0
{data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt +0 -0
{data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt +0 -0

dmu_data/ml/tests/diagnostics_multiple_methods.yaml ADDED Viewed

@@ -0,0 +1,10 @@
+output : /tmp/tests/dmu/ml/cv_diagnostics/multiple_methods
+correlations:
+  # Variables with respect to which the correlations with the features will be measured
+  target    :
+    name : z
+  methods:
+    - Pearson
+    - Kendall-$\tau$
+  figure:
+    size : [10, 8]

dmu_data/ml/tests/diagnostics_overlay.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+output         : /tmp/tests/dmu/ml/cv_diagnostics/overlay
+  # Will assume that the target is already in the input dataframe
+  # and will use it, instead of evaluating models
+score_from_rdf : w
+correlations:
+  # Variables with respect to which the correlations with the features will be measured
+  target :
+    name : z
+    overlay :
+      wp :
+        - 0.2
+        - 0.5
+        - 0.7
+        - 0.9
+      general:
+        size : [12, 10]
+      saving:
+        plt_dir : /tmp/tests/dmu/ml/cv_diagnostics/overlay
+      plots:
+        z :
+          binning    : [-4, 4, 10]
+          yscale     : 'linear'
+          labels     : ['$z$', 'Entries']
+          normalized : true
+          styling :
+            linestyle: '-'
+  methods:
+    - Pearson
+    - Kendall-$\tau$
+  figure:
+    title     : Scores from file
+    size      : [12, 10]
+    xlabelsize: 30

dmu_data/ml/tests/train_mva.yaml CHANGED Viewed

@@ -1,10 +1,12 @@
 dataset:
+  define :
+    r : z + x
   nan :
     x : -3
-    y : -3
+    y : -3
 training :
     nfold    : 3
-    features : [x, y, z]
+    features : [x, y, r]
     rdm_stat : 1
     hyper    :
       loss              : log_loss
@@ -13,7 +15,7 @@ training :
       learning_rate     : 0.1
       min_samples_split : 2
 saving:
-    path : '/tmp/dmu/ml/tests/train_mva/model.pkl'
+    path : '/tmp/tests/dmu/ml/train_mva/model.pkl'
 plotting:
     roc     :
         min : [0.0, 0.0]
@@ -29,24 +31,28 @@ plotting:
       title      : 'Correlation matrix'
       size       : [10, 10]
       mask_value : 0
-    val_dir : '/tmp/dmu/ml/tests/train_mva'
+    val_dir : '/tmp/tests/dmu/ml/train_mva'
     features:
         saving:
-            plt_dir : '/tmp/dmu/ml/tests/train_mva/features'
+            plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
         plots:
+          r :
+            binning : [-6, 6, 100]
+            yscale  : 'linear'
+            labels  : ['$r$', '']
           w :
             binning : [-4, 4, 100]
             yscale  : 'linear'
-            labels  : ['w', '']
+            labels  : ['$w$', '']
           x :
             binning : [-4, 4, 100]
             yscale  : 'linear'
-            labels  : ['x', '']
+            labels  : ['$x$', '']
           y :
             binning : [-4, 4, 100]
             yscale  : 'linear'
-            labels  : ['y', '']
+            labels  : ['$y$', '']
           z :
             binning : [-4, 4, 100]
             yscale  : 'linear'
-            labels  : ['z', '']
+            labels  : ['$z$', '']

dmu_data/ml/tests/train_mva_with_diagnostics.yaml ADDED Viewed

@@ -0,0 +1,82 @@
+dataset:
+  define :
+    r : z + x
+  nan :
+    x : -3
+    y : -3
+training :
+    nfold    : 3
+    features : [x, y, r]
+    rdm_stat : 1
+    hyper    :
+      loss              : log_loss
+      n_estimators      : 100
+      max_depth         : 3
+      learning_rate     : 0.1
+      min_samples_split : 2
+saving:
+    path : '/tmp/tests/dmu/ml/train_mva/model.pkl'
+plotting:
+    roc     :
+        min : [0.0, 0.0]
+        max : [1.2, 1.2]
+        annotate:
+          sig_eff : [0.5, 0.6, 0.7, 0.8, 0.9]
+          form : '{:.2f}'
+          color: 'green'
+          xoff : -15
+          yoff : -15
+          size :  10
+    correlation:
+      title      : 'Correlation matrix'
+      size       : [10, 10]
+      mask_value : 0
+    val_dir : '/tmp/tests/dmu/ml/train_mva'
+    features:
+        saving:
+            plt_dir : '/tmp/tests/dmu/ml/train_mva/features'
+        plots:
+          r :
+            binning : [-6, 6, 100]
+            yscale  : 'linear'
+            labels  : ['$r$', '']
+          w :
+            binning : [-4, 4, 100]
+            yscale  : 'linear'
+            labels  : ['$w$', '']
+          x :
+            binning : [-4, 4, 100]
+            yscale  : 'linear'
+            labels  : ['$x$', '']
+          y :
+            binning : [-4, 4, 100]
+            yscale  : 'linear'
+            labels  : ['$y$', '']
+          z :
+            binning : [-4, 4, 100]
+            yscale  : 'linear'
+            labels  : ['$z$', '']
+diagnostics:
+  output         : /tmp/tests/dmu/ml/train_mva/diagnostics
+  correlations:
+    target :
+      name : z
+      overlay :
+        general:
+          size : [20, 10]
+        saving:
+          plt_dir : /tmp/tests/dmu/ml/train_mva/diagnostics
+        plots:
+          z :
+            binning    : [-4, +4, 30]
+            yscale     : 'linear'
+            labels     : ['z', 'Entries']
+            normalized : true
+            styling :
+              linestyle: '-'
+    methods:
+      - Pearson
+      - Kendall-$\tau$
+    figure:
+      title: Training diagnostics
+      size : [10, 8]

dmu_data/plotting/tests/plug_fwhm.yaml ADDED Viewed

@@ -0,0 +1,24 @@
+saving:
+    plt_dir : plotting/pluggins/fwhm
+plots:
+    x :
+      binning : [-5.0, 8.0, 40]
+      title   : x distribution
+    y :
+      binning : [-5.0, 8.0, 40]
+      title   : y distribution
+plugin:
+  fwhm:
+    x :
+      plot   : true
+      obs    : [-2, 4]
+      plot   : true
+      format : FWHM={:.3f}
+      add_std: True
+    y :
+      plot   : true
+      obs    : [-4, 8]
+      plot   : true
+      format : FWHM={:.3f}
+      add_std: True

dmu_data/plotting/tests/plug_stats.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+saving:
+    plt_dir : plotting/pluggins/stats
+plots:
+    x :
+      binning : [-5.0, 8.0, 40]
+      title   : x distribution
+      styling:
+        linestyle : '-'
+    y :
+      binning : [-5.0, 8.0, 40]
+      title   : y distribution
+      styling:
+        linestyle : '-'
+plugin:
+  stats:
+    x :
+      mean : $\mu$={:.2f}
+      rms  : $\sigma$={:.2f}
+      sum  : $\Sigma$={:.0f}

dmu_data/plotting/tests/simple.yaml CHANGED Viewed

@@ -1,8 +1,9 @@
 saving:
     plt_dir : tests/plotting/simple
 plots:
     x :
-        binning : [-5.0, 8.0, 40]
+      binning : [-5.0, 8.0, 40]
+      title   : x distribution
     y :
-        binning : [-5.0, 8.0, 40]
+      binning : [-5.0, 8.0, 40]
+      title   : y distribution

dmu_data/plotting/tests/styling.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+saving:
+    plt_dir : tests/plotting/styling
+plots:
+  x :
+    binning : [-5.0, 8.0, 40]
+    title   : x distribution
+    styling :
+      histtype : step
+  y :
+    binning : [-5.0, 8.0, 40]
+    title   : y distribution

{data_manipulation_utilities-0.2.6.data → data_manipulation_utilities-0.2.7.data}/scripts/publish RENAMED Viewed

File without changes

{data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{data_manipulation_utilities-0.2.6.dist-info → data_manipulation_utilities-0.2.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

data-manipulation-utilities 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

data-manipulation-utilities 0.2.6py3-none-any.whl → 0.2.7py3-none-any.whl