data-manipulation-utilities 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_manipulation_utilities-0.2.2/src/data_manipulation_utilities.egg-info → data_manipulation_utilities-0.2.3}/PKG-INFO +19 -1
- data_manipulation_utilities-0.2.2/PKG-INFO → data_manipulation_utilities-0.2.3/README.md +18 -20
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/pyproject.toml +1 -1
- data_manipulation_utilities-0.2.2/README.md → data_manipulation_utilities-0.2.3/src/data_manipulation_utilities.egg-info/PKG-INFO +38 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rdataframe/utilities.py +27 -1
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/setup.cfg +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/SOURCES.txt +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/dependency_links.txt +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/entry_points.txt +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/requires.txt +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/top_level.txt +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/arrays/utilities.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/generic/utilities.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/logging/log_store.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/cv_classifier.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/cv_predict.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/train_mva.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/utilities.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/pdataframe/utilities.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/matrix.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/plotter.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/plotter_1d.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/plotter_2d.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/utilities.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rdataframe/atr_mgr.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rfile/rfprinter.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rfile/utilities.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/fitter.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/function.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/gof_calculator.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/minimizers.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/model_factory.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/utilities.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/zfit_plotter.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/testing/utilities.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/text/transformer.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/__init__.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/ml/tests/train_mva.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/2d.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/fig_size.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/high_stat.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/name.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/no_bounds.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/normalized.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/simple.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/title.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/weights.yaml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform.toml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform.txt +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform_set.toml +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform_set.txt +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform_trf.txt +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/git/publish +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/physics/check_truth.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/rfile/compare_root_files.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/rfile/print_trees.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/ssh/coned.py +0 -0
- {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/text/transform_text.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: data_manipulation_utilities
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.3
|
4
4
|
Description-Content-Type: text/markdown
|
5
5
|
Requires-Dist: logzero
|
6
6
|
Requires-Dist: PyYAML
|
@@ -578,6 +578,24 @@ These are utility functions meant to be used with ROOT dataframes.
|
|
578
578
|
|
579
579
|
## Adding a column from a numpy array
|
580
580
|
|
581
|
+
### With numba
|
582
|
+
|
583
|
+
For this do:
|
584
|
+
|
585
|
+
```python
|
586
|
+
import dmu.rdataframe.utilities as ut
|
587
|
+
|
588
|
+
arr_val = numpy.array([10, 20, 30])
|
589
|
+
rdf = ut.add_column_with_numba(rdf, arr_val, 'values', identifier='some_name')
|
590
|
+
```
|
591
|
+
|
592
|
+
where the identifier needs to be unique, every time the function is called.
|
593
|
+
This is the case, because the addition is done internally by declaring a numba function whose name
|
594
|
+
cannot be repeated as mentioned
|
595
|
+
[here](https://root-forum.cern.ch/t/ways-to-work-around-the-redefinition-of-compiled-functions-in-one-single-notebook-session/41442/1)
|
596
|
+
|
597
|
+
### With awkward
|
598
|
+
|
581
599
|
For this do:
|
582
600
|
|
583
601
|
```python
|
@@ -1,23 +1,3 @@
|
|
1
|
-
Metadata-Version: 2.2
|
2
|
-
Name: data_manipulation_utilities
|
3
|
-
Version: 0.2.2
|
4
|
-
Description-Content-Type: text/markdown
|
5
|
-
Requires-Dist: logzero
|
6
|
-
Requires-Dist: PyYAML
|
7
|
-
Requires-Dist: scipy
|
8
|
-
Requires-Dist: awkward
|
9
|
-
Requires-Dist: tqdm
|
10
|
-
Requires-Dist: joblib
|
11
|
-
Requires-Dist: scikit-learn
|
12
|
-
Requires-Dist: toml
|
13
|
-
Requires-Dist: numpy
|
14
|
-
Requires-Dist: matplotlib
|
15
|
-
Requires-Dist: mplhep
|
16
|
-
Requires-Dist: hist[plot]
|
17
|
-
Requires-Dist: pandas
|
18
|
-
Provides-Extra: dev
|
19
|
-
Requires-Dist: pytest; extra == "dev"
|
20
|
-
|
21
1
|
# D(ata) M(anipulation) U(tilities)
|
22
2
|
|
23
3
|
These are tools that can be used for different data analysis tasks.
|
@@ -578,6 +558,24 @@ These are utility functions meant to be used with ROOT dataframes.
|
|
578
558
|
|
579
559
|
## Adding a column from a numpy array
|
580
560
|
|
561
|
+
### With numba
|
562
|
+
|
563
|
+
For this do:
|
564
|
+
|
565
|
+
```python
|
566
|
+
import dmu.rdataframe.utilities as ut
|
567
|
+
|
568
|
+
arr_val = numpy.array([10, 20, 30])
|
569
|
+
rdf = ut.add_column_with_numba(rdf, arr_val, 'values', identifier='some_name')
|
570
|
+
```
|
571
|
+
|
572
|
+
where the identifier needs to be unique, every time the function is called.
|
573
|
+
This is the case, because the addition is done internally by declaring a numba function whose name
|
574
|
+
cannot be repeated as mentioned
|
575
|
+
[here](https://root-forum.cern.ch/t/ways-to-work-around-the-redefinition-of-compiled-functions-in-one-single-notebook-session/41442/1)
|
576
|
+
|
577
|
+
### With awkward
|
578
|
+
|
581
579
|
For this do:
|
582
580
|
|
583
581
|
```python
|
@@ -1,3 +1,23 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: data_manipulation_utilities
|
3
|
+
Version: 0.2.3
|
4
|
+
Description-Content-Type: text/markdown
|
5
|
+
Requires-Dist: logzero
|
6
|
+
Requires-Dist: PyYAML
|
7
|
+
Requires-Dist: scipy
|
8
|
+
Requires-Dist: awkward
|
9
|
+
Requires-Dist: tqdm
|
10
|
+
Requires-Dist: joblib
|
11
|
+
Requires-Dist: scikit-learn
|
12
|
+
Requires-Dist: toml
|
13
|
+
Requires-Dist: numpy
|
14
|
+
Requires-Dist: matplotlib
|
15
|
+
Requires-Dist: mplhep
|
16
|
+
Requires-Dist: hist[plot]
|
17
|
+
Requires-Dist: pandas
|
18
|
+
Provides-Extra: dev
|
19
|
+
Requires-Dist: pytest; extra == "dev"
|
20
|
+
|
1
21
|
# D(ata) M(anipulation) U(tilities)
|
2
22
|
|
3
23
|
These are tools that can be used for different data analysis tasks.
|
@@ -558,6 +578,24 @@ These are utility functions meant to be used with ROOT dataframes.
|
|
558
578
|
|
559
579
|
## Adding a column from a numpy array
|
560
580
|
|
581
|
+
### With numba
|
582
|
+
|
583
|
+
For this do:
|
584
|
+
|
585
|
+
```python
|
586
|
+
import dmu.rdataframe.utilities as ut
|
587
|
+
|
588
|
+
arr_val = numpy.array([10, 20, 30])
|
589
|
+
rdf = ut.add_column_with_numba(rdf, arr_val, 'values', identifier='some_name')
|
590
|
+
```
|
591
|
+
|
592
|
+
where the identifier needs to be unique, every time the function is called.
|
593
|
+
This is the case, because the addition is done internally by declaring a numba function whose name
|
594
|
+
cannot be repeated as mentioned
|
595
|
+
[here](https://root-forum.cern.ch/t/ways-to-work-around-the-redefinition-of-compiled-functions-in-one-single-notebook-session/41442/1)
|
596
|
+
|
597
|
+
### With awkward
|
598
|
+
|
561
599
|
For this do:
|
562
600
|
|
563
601
|
```python
|
@@ -1,6 +1,7 @@
|
|
1
1
|
'''
|
2
2
|
Module containing utility functions to be used with ROOT dataframes
|
3
3
|
'''
|
4
|
+
# pylint: disable=no-name-in-module
|
4
5
|
|
5
6
|
import re
|
6
7
|
from dataclasses import dataclass
|
@@ -10,7 +11,7 @@ import pandas as pnd
|
|
10
11
|
import awkward as ak
|
11
12
|
import numpy
|
12
13
|
|
13
|
-
from ROOT import RDataFrame, RDF
|
14
|
+
from ROOT import RDataFrame, RDF, Numba
|
14
15
|
|
15
16
|
from dmu.logging.log_store import LogStore
|
16
17
|
|
@@ -34,6 +35,8 @@ def add_column(rdf : RDataFrame, arr_val : Union[numpy.ndarray,None], name : str
|
|
34
35
|
exclude_re : Regex with patter of column names that we won't pick
|
35
36
|
'''
|
36
37
|
|
38
|
+
log.warning(f'Adding column {name} with awkward')
|
39
|
+
|
37
40
|
d_opt = {} if d_opt is None else d_opt
|
38
41
|
if arr_val is None:
|
39
42
|
raise ValueError('Array of values not introduced')
|
@@ -72,6 +75,29 @@ def add_column(rdf : RDataFrame, arr_val : Union[numpy.ndarray,None], name : str
|
|
72
75
|
|
73
76
|
return rdf
|
74
77
|
# ---------------------------------------------------------------------
|
78
|
+
def add_column_with_numba(
|
79
|
+
rdf : RDataFrame,
|
80
|
+
arr_val : Union[numpy.ndarray,None],
|
81
|
+
name : str,
|
82
|
+
identifier : str) -> RDataFrame:
|
83
|
+
'''
|
84
|
+
Will take a dataframe, an array of numbers and a string
|
85
|
+
Will add the array as a colunm to the dataframe
|
86
|
+
|
87
|
+
The `identifier` argument is a string need in order to avoid collisions
|
88
|
+
when using Numba to define a function to get the value from.
|
89
|
+
'''
|
90
|
+
identifier=f'fun_{identifier}'
|
91
|
+
|
92
|
+
@Numba.Declare(['int'], 'float', name=identifier)
|
93
|
+
def get_value(index):
|
94
|
+
return arr_val[index]
|
95
|
+
|
96
|
+
log.debug(f'Adding column {name} with numba')
|
97
|
+
rdf = rdf.Define(name, f'Numba::{identifier}(rdfentry_)')
|
98
|
+
|
99
|
+
return rdf
|
100
|
+
# ---------------------------------------------------------------------
|
75
101
|
def rdf_report_to_df(rep : RDF.RCutFlowReport) -> pnd.DataFrame:
|
76
102
|
'''
|
77
103
|
Takes the output of rdf.Report(), i.e. an RDataFrame cutflow report.
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/arrays/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/generic/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/logging/log_store.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/cv_classifier.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/cv_predict.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/train_mva.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/utilities.py
RENAMED
File without changes
|
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/matrix.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/plotter.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rfile/rfprinter.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rfile/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/fitter.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/function.py
RENAMED
File without changes
|
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/minimizers.py
RENAMED
File without changes
|
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/utilities.py
RENAMED
File without changes
|
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/testing/utilities.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/text/transformer.py
RENAMED
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/git/publish
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/ssh/coned.py
RENAMED
File without changes
|
File without changes
|