PyPI - data-manipulation-utilities - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

data-manipulation-utilities 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{data_manipulation_utilities-0.2.2.dist-info → data_manipulation_utilities-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: data_manipulation_utilities
-Version: 0.2.2
+Version: 0.2.3
 Description-Content-Type: text/markdown
 Requires-Dist: logzero
 Requires-Dist: PyYAML
@@ -578,6 +578,24 @@ These are utility functions meant to be used with ROOT dataframes.
 ## Adding a column from a numpy array
+### With numba
+For this do:
+```python
+import dmu.rdataframe.utilities as ut
+arr_val = numpy.array([10, 20, 30])
+rdf     = ut.add_column_with_numba(rdf, arr_val, 'values', identifier='some_name')
+```
+where the identifier needs to be unique, every time the function is called.
+This is the case, because the addition is done internally by declaring a numba function whose name
+cannot be repeated as mentioned
+[here](https://root-forum.cern.ch/t/ways-to-work-around-the-redefinition-of-compiled-functions-in-one-single-notebook-session/41442/1)
+### With awkward
 For this do:
 ```python

{data_manipulation_utilities-0.2.2.dist-info → data_manipulation_utilities-0.2.3.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-data_manipulation_utilities-0.2.2.data/scripts/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
+data_manipulation_utilities-0.2.3.data/scripts/publish,sha256=-3K_Y2_4CfWCV50rPB8CRuhjxDu7xMGswinRwPovgLs,1976
 dmu/arrays/utilities.py,sha256=PKoYyybPptA2aU-V3KLnJXBudWxTXu4x1uGdIMQ49HY,1722
 dmu/generic/utilities.py,sha256=0Xnq9t35wuebAqKxbyAiMk1ISB7IcXK4cFH25MT1fgw,1741
 dmu/logging/log_store.py,sha256=umdvjNDuV3LdezbG26b0AiyTglbvkxST19CQu9QATbA,4184
@@ -13,7 +13,7 @@ dmu/plotting/plotter_1d.py,sha256=g6H2xAgsL9a6vRkpbqHICb3qwV_qMiQPZxxw_oOSf9M,51
 dmu/plotting/plotter_2d.py,sha256=J-gKnagoHGfJFU7HBrhDFpGYH5Rxy0_zF5l8eE_7ZHE,2944
 dmu/plotting/utilities.py,sha256=SI9dvtZq2gr-PXVz71KE4o0i09rZOKgqJKD1jzf6KXk,1167
 dmu/rdataframe/atr_mgr.py,sha256=FdhaQWVpsm4OOe1IRbm7rfrq8VenTNdORyI-lZ2Bs1M,2386
-dmu/rdataframe/utilities.py,sha256=MDY3u_y0s-ANvHAWRzGyeuuZUKoaqilfmb8mqlgfrVc,2771
+dmu/rdataframe/utilities.py,sha256=pNcQARMP7txMhy6k27UnDcYf0buNy5U2fshaJDl_h8o,3661
 dmu/rfile/rfprinter.py,sha256=mp5jd-oCJAnuokbdmGyL9i6tK2lY72jEfROuBIZ_ums,3941
 dmu/rfile/utilities.py,sha256=XuYY7HuSBj46iSu3c60UYBHtI6KIPoJU_oofuhb-be0,945
 dmu/stats/fitter.py,sha256=vHNZ16U3apoQyeyM8evq-if49doF48sKB3q9wmA96Fw,18387
@@ -47,8 +47,8 @@ dmu_scripts/rfile/compare_root_files.py,sha256=T8lDnQxsRNMr37x1Y7YvWD8ySHrJOWZki
 dmu_scripts/rfile/print_trees.py,sha256=Ze4Ccl_iUldl4eVEDVnYBoe4amqBT1fSBR1zN5WSztk,941
 dmu_scripts/ssh/coned.py,sha256=lhilYNHWRCGxC-jtyJ3LQ4oUgWW33B2l1tYCcyHHsR0,4858
 dmu_scripts/text/transform_text.py,sha256=9akj1LB0HAyopOvkLjNOJiptZw5XoOQLe17SlcrGMD0,1456
-data_manipulation_utilities-0.2.2.dist-info/METADATA,sha256=0QwhQmQML65qk2kaXf1znMZOVNuvaY3l35E7cXLRCZ8,27359
-data_manipulation_utilities-0.2.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-data_manipulation_utilities-0.2.2.dist-info/entry_points.txt,sha256=1TIZDed651KuOH-DgaN5AoBdirKmrKE_oM1b6b7zTUU,270
-data_manipulation_utilities-0.2.2.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
-data_manipulation_utilities-0.2.2.dist-info/RECORD,,
+data_manipulation_utilities-0.2.3.dist-info/METADATA,sha256=STJ7vYfcSIM9dtMRzywGLwDzH1sUBE5DL9FqvskMcxo,27923
+data_manipulation_utilities-0.2.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+data_manipulation_utilities-0.2.3.dist-info/entry_points.txt,sha256=1TIZDed651KuOH-DgaN5AoBdirKmrKE_oM1b6b7zTUU,270
+data_manipulation_utilities-0.2.3.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
+data_manipulation_utilities-0.2.3.dist-info/RECORD,,

dmu/rdataframe/utilities.py CHANGED Viewed

@@ -1,6 +1,7 @@
 '''
 Module containing utility functions to be used with ROOT dataframes
 '''
+# pylint: disable=no-name-in-module
 import re
 from dataclasses import dataclass
@@ -10,7 +11,7 @@ import pandas  as pnd
 import awkward as ak
 import numpy
-from ROOT import RDataFrame, RDF
+from ROOT import RDataFrame, RDF, Numba
 from dmu.logging.log_store import LogStore
@@ -34,6 +35,8 @@ def add_column(rdf : RDataFrame, arr_val : Union[numpy.ndarray,None], name : str
          exclude_re : Regex with patter of column names that we won't pick
     '''
+    log.warning(f'Adding column {name} with awkward')
     d_opt = {} if d_opt is None else d_opt
     if arr_val is None:
         raise ValueError('Array of values not introduced')
@@ -72,6 +75,29 @@ def add_column(rdf : RDataFrame, arr_val : Union[numpy.ndarray,None], name : str
     return rdf
 # ---------------------------------------------------------------------
+def add_column_with_numba(
+        rdf        : RDataFrame,
+        arr_val    : Union[numpy.ndarray,None],
+        name       : str,
+        identifier : str) -> RDataFrame:
+    '''
+    Will take a dataframe, an array of numbers and a string
+    Will add the array as a colunm to the dataframe
+    The `identifier` argument is a string need in order to avoid collisions
+    when using Numba to define a function to get the value from.
+    '''
+    identifier=f'fun_{identifier}'
+    @Numba.Declare(['int'], 'float', name=identifier)
+    def get_value(index):
+        return arr_val[index]
+    log.debug(f'Adding column {name} with numba')
+    rdf = rdf.Define(name, f'Numba::{identifier}(rdfentry_)')
+    return rdf
+# ---------------------------------------------------------------------
 def rdf_report_to_df(rep : RDF.RCutFlowReport) -> pnd.DataFrame:
     '''
     Takes the output of rdf.Report(), i.e. an RDataFrame cutflow report.

{data_manipulation_utilities-0.2.2.data → data_manipulation_utilities-0.2.3.data}/scripts/publish RENAMED Viewed

File without changes

{data_manipulation_utilities-0.2.2.dist-info → data_manipulation_utilities-0.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{data_manipulation_utilities-0.2.2.dist-info → data_manipulation_utilities-0.2.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{data_manipulation_utilities-0.2.2.dist-info → data_manipulation_utilities-0.2.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

data-manipulation-utilities 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

data-manipulation-utilities 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl