data-manipulation-utilities 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {data_manipulation_utilities-0.2.2/src/data_manipulation_utilities.egg-info → data_manipulation_utilities-0.2.3}/PKG-INFO +19 -1
  2. data_manipulation_utilities-0.2.2/PKG-INFO → data_manipulation_utilities-0.2.3/README.md +18 -20
  3. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/pyproject.toml +1 -1
  4. data_manipulation_utilities-0.2.2/README.md → data_manipulation_utilities-0.2.3/src/data_manipulation_utilities.egg-info/PKG-INFO +38 -0
  5. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rdataframe/utilities.py +27 -1
  6. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/setup.cfg +0 -0
  7. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/SOURCES.txt +0 -0
  8. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/dependency_links.txt +0 -0
  9. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/entry_points.txt +0 -0
  10. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/requires.txt +0 -0
  11. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/data_manipulation_utilities.egg-info/top_level.txt +0 -0
  12. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/arrays/utilities.py +0 -0
  13. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/generic/utilities.py +0 -0
  14. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/logging/log_store.py +0 -0
  15. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/cv_classifier.py +0 -0
  16. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/cv_predict.py +0 -0
  17. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/train_mva.py +0 -0
  18. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/ml/utilities.py +0 -0
  19. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/pdataframe/utilities.py +0 -0
  20. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/matrix.py +0 -0
  21. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/plotter.py +0 -0
  22. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/plotter_1d.py +0 -0
  23. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/plotter_2d.py +0 -0
  24. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/plotting/utilities.py +0 -0
  25. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rdataframe/atr_mgr.py +0 -0
  26. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rfile/rfprinter.py +0 -0
  27. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/rfile/utilities.py +0 -0
  28. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/fitter.py +0 -0
  29. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/function.py +0 -0
  30. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/gof_calculator.py +0 -0
  31. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/minimizers.py +0 -0
  32. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/model_factory.py +0 -0
  33. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/utilities.py +0 -0
  34. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/stats/zfit_plotter.py +0 -0
  35. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/testing/utilities.py +0 -0
  36. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu/text/transformer.py +0 -0
  37. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/__init__.py +0 -0
  38. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/ml/tests/train_mva.yaml +0 -0
  39. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/2d.yaml +0 -0
  40. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/fig_size.yaml +0 -0
  41. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/high_stat.yaml +0 -0
  42. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/name.yaml +0 -0
  43. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/no_bounds.yaml +0 -0
  44. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/normalized.yaml +0 -0
  45. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/simple.yaml +0 -0
  46. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/title.yaml +0 -0
  47. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/plotting/tests/weights.yaml +0 -0
  48. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform.toml +0 -0
  49. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform.txt +0 -0
  50. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform_set.toml +0 -0
  51. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform_set.txt +0 -0
  52. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_data/text/transform_trf.txt +0 -0
  53. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/git/publish +0 -0
  54. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/physics/check_truth.py +0 -0
  55. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/rfile/compare_root_files.py +0 -0
  56. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/rfile/print_trees.py +0 -0
  57. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/ssh/coned.py +0 -0
  58. {data_manipulation_utilities-0.2.2 → data_manipulation_utilities-0.2.3}/src/dmu_scripts/text/transform_text.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: data_manipulation_utilities
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: logzero
6
6
  Requires-Dist: PyYAML
@@ -578,6 +578,24 @@ These are utility functions meant to be used with ROOT dataframes.
578
578
 
579
579
  ## Adding a column from a numpy array
580
580
 
581
+ ### With numba
582
+
583
+ For this do:
584
+
585
+ ```python
586
+ import dmu.rdataframe.utilities as ut
587
+
588
+ arr_val = numpy.array([10, 20, 30])
589
+ rdf = ut.add_column_with_numba(rdf, arr_val, 'values', identifier='some_name')
590
+ ```
591
+
592
+ where the identifier needs to be unique, every time the function is called.
593
+ This is the case, because the addition is done internally by declaring a numba function whose name
594
+ cannot be repeated as mentioned
595
+ [here](https://root-forum.cern.ch/t/ways-to-work-around-the-redefinition-of-compiled-functions-in-one-single-notebook-session/41442/1)
596
+
597
+ ### With awkward
598
+
581
599
  For this do:
582
600
 
583
601
  ```python
@@ -1,23 +1,3 @@
1
- Metadata-Version: 2.2
2
- Name: data_manipulation_utilities
3
- Version: 0.2.2
4
- Description-Content-Type: text/markdown
5
- Requires-Dist: logzero
6
- Requires-Dist: PyYAML
7
- Requires-Dist: scipy
8
- Requires-Dist: awkward
9
- Requires-Dist: tqdm
10
- Requires-Dist: joblib
11
- Requires-Dist: scikit-learn
12
- Requires-Dist: toml
13
- Requires-Dist: numpy
14
- Requires-Dist: matplotlib
15
- Requires-Dist: mplhep
16
- Requires-Dist: hist[plot]
17
- Requires-Dist: pandas
18
- Provides-Extra: dev
19
- Requires-Dist: pytest; extra == "dev"
20
-
21
1
  # D(ata) M(anipulation) U(tilities)
22
2
 
23
3
  These are tools that can be used for different data analysis tasks.
@@ -578,6 +558,24 @@ These are utility functions meant to be used with ROOT dataframes.
578
558
 
579
559
  ## Adding a column from a numpy array
580
560
 
561
+ ### With numba
562
+
563
+ For this do:
564
+
565
+ ```python
566
+ import dmu.rdataframe.utilities as ut
567
+
568
+ arr_val = numpy.array([10, 20, 30])
569
+ rdf = ut.add_column_with_numba(rdf, arr_val, 'values', identifier='some_name')
570
+ ```
571
+
572
+ where the identifier needs to be unique, every time the function is called.
573
+ This is the case, because the addition is done internally by declaring a numba function whose name
574
+ cannot be repeated as mentioned
575
+ [here](https://root-forum.cern.ch/t/ways-to-work-around-the-redefinition-of-compiled-functions-in-one-single-notebook-session/41442/1)
576
+
577
+ ### With awkward
578
+
581
579
  For this do:
582
580
 
583
581
  ```python
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = 'data_manipulation_utilities'
3
- version = '0.2.2'
3
+ version = '0.2.3'
4
4
  readme = 'README.md'
5
5
  dependencies= [
6
6
  'logzero',
@@ -1,3 +1,23 @@
1
+ Metadata-Version: 2.2
2
+ Name: data_manipulation_utilities
3
+ Version: 0.2.3
4
+ Description-Content-Type: text/markdown
5
+ Requires-Dist: logzero
6
+ Requires-Dist: PyYAML
7
+ Requires-Dist: scipy
8
+ Requires-Dist: awkward
9
+ Requires-Dist: tqdm
10
+ Requires-Dist: joblib
11
+ Requires-Dist: scikit-learn
12
+ Requires-Dist: toml
13
+ Requires-Dist: numpy
14
+ Requires-Dist: matplotlib
15
+ Requires-Dist: mplhep
16
+ Requires-Dist: hist[plot]
17
+ Requires-Dist: pandas
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest; extra == "dev"
20
+
1
21
  # D(ata) M(anipulation) U(tilities)
2
22
 
3
23
  These are tools that can be used for different data analysis tasks.
@@ -558,6 +578,24 @@ These are utility functions meant to be used with ROOT dataframes.
558
578
 
559
579
  ## Adding a column from a numpy array
560
580
 
581
+ ### With numba
582
+
583
+ For this do:
584
+
585
+ ```python
586
+ import dmu.rdataframe.utilities as ut
587
+
588
+ arr_val = numpy.array([10, 20, 30])
589
+ rdf = ut.add_column_with_numba(rdf, arr_val, 'values', identifier='some_name')
590
+ ```
591
+
592
+ where the identifier needs to be unique, every time the function is called.
593
+ This is the case, because the addition is done internally by declaring a numba function whose name
594
+ cannot be repeated as mentioned
595
+ [here](https://root-forum.cern.ch/t/ways-to-work-around-the-redefinition-of-compiled-functions-in-one-single-notebook-session/41442/1)
596
+
597
+ ### With awkward
598
+
561
599
  For this do:
562
600
 
563
601
  ```python
@@ -1,6 +1,7 @@
1
1
  '''
2
2
  Module containing utility functions to be used with ROOT dataframes
3
3
  '''
4
+ # pylint: disable=no-name-in-module
4
5
 
5
6
  import re
6
7
  from dataclasses import dataclass
@@ -10,7 +11,7 @@ import pandas as pnd
10
11
  import awkward as ak
11
12
  import numpy
12
13
 
13
- from ROOT import RDataFrame, RDF
14
+ from ROOT import RDataFrame, RDF, Numba
14
15
 
15
16
  from dmu.logging.log_store import LogStore
16
17
 
@@ -34,6 +35,8 @@ def add_column(rdf : RDataFrame, arr_val : Union[numpy.ndarray,None], name : str
34
35
  exclude_re : Regex with patter of column names that we won't pick
35
36
  '''
36
37
 
38
+ log.warning(f'Adding column {name} with awkward')
39
+
37
40
  d_opt = {} if d_opt is None else d_opt
38
41
  if arr_val is None:
39
42
  raise ValueError('Array of values not introduced')
@@ -72,6 +75,29 @@ def add_column(rdf : RDataFrame, arr_val : Union[numpy.ndarray,None], name : str
72
75
 
73
76
  return rdf
74
77
  # ---------------------------------------------------------------------
78
+ def add_column_with_numba(
79
+ rdf : RDataFrame,
80
+ arr_val : Union[numpy.ndarray,None],
81
+ name : str,
82
+ identifier : str) -> RDataFrame:
83
+ '''
84
+ Will take a dataframe, an array of numbers and a string
85
+ Will add the array as a colunm to the dataframe
86
+
87
+ The `identifier` argument is a string need in order to avoid collisions
88
+ when using Numba to define a function to get the value from.
89
+ '''
90
+ identifier=f'fun_{identifier}'
91
+
92
+ @Numba.Declare(['int'], 'float', name=identifier)
93
+ def get_value(index):
94
+ return arr_val[index]
95
+
96
+ log.debug(f'Adding column {name} with numba')
97
+ rdf = rdf.Define(name, f'Numba::{identifier}(rdfentry_)')
98
+
99
+ return rdf
100
+ # ---------------------------------------------------------------------
75
101
  def rdf_report_to_df(rep : RDF.RCutFlowReport) -> pnd.DataFrame:
76
102
  '''
77
103
  Takes the output of rdf.Report(), i.e. an RDataFrame cutflow report.