data-manipulation-utilities 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/PKG-INFO +35 -1
  2. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/README.md +34 -0
  3. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/pyproject.toml +1 -1
  4. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/data_manipulation_utilities.egg-info/PKG-INFO +35 -1
  5. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/data_manipulation_utilities.egg-info/SOURCES.txt +1 -0
  6. data_manipulation_utilities-0.2.4/src/dmu/generic/version_management.py +132 -0
  7. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/setup.cfg +0 -0
  8. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/data_manipulation_utilities.egg-info/dependency_links.txt +0 -0
  9. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/data_manipulation_utilities.egg-info/entry_points.txt +0 -0
  10. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/data_manipulation_utilities.egg-info/requires.txt +0 -0
  11. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/data_manipulation_utilities.egg-info/top_level.txt +0 -0
  12. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/arrays/utilities.py +0 -0
  13. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/generic/utilities.py +0 -0
  14. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/logging/log_store.py +0 -0
  15. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/ml/cv_classifier.py +0 -0
  16. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/ml/cv_predict.py +0 -0
  17. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/ml/train_mva.py +0 -0
  18. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/ml/utilities.py +0 -0
  19. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/pdataframe/utilities.py +0 -0
  20. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/plotting/matrix.py +0 -0
  21. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/plotting/plotter.py +0 -0
  22. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/plotting/plotter_1d.py +0 -0
  23. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/plotting/plotter_2d.py +0 -0
  24. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/plotting/utilities.py +0 -0
  25. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/rdataframe/atr_mgr.py +0 -0
  26. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/rdataframe/utilities.py +0 -0
  27. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/rfile/rfprinter.py +0 -0
  28. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/rfile/utilities.py +0 -0
  29. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/stats/fitter.py +0 -0
  30. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/stats/function.py +0 -0
  31. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/stats/gof_calculator.py +0 -0
  32. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/stats/minimizers.py +0 -0
  33. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/stats/model_factory.py +0 -0
  34. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/stats/utilities.py +0 -0
  35. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/stats/zfit_plotter.py +0 -0
  36. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/testing/utilities.py +0 -0
  37. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu/text/transformer.py +0 -0
  38. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/__init__.py +0 -0
  39. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/ml/tests/train_mva.yaml +0 -0
  40. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/2d.yaml +0 -0
  41. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/fig_size.yaml +0 -0
  42. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/high_stat.yaml +0 -0
  43. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/name.yaml +0 -0
  44. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/no_bounds.yaml +0 -0
  45. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/normalized.yaml +0 -0
  46. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/simple.yaml +0 -0
  47. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/title.yaml +0 -0
  48. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/plotting/tests/weights.yaml +0 -0
  49. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/text/transform.toml +0 -0
  50. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/text/transform.txt +0 -0
  51. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/text/transform_set.toml +0 -0
  52. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/text/transform_set.txt +0 -0
  53. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_data/text/transform_trf.txt +0 -0
  54. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_scripts/git/publish +0 -0
  55. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_scripts/physics/check_truth.py +0 -0
  56. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_scripts/rfile/compare_root_files.py +0 -0
  57. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_scripts/rfile/print_trees.py +0 -0
  58. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_scripts/ssh/coned.py +0 -0
  59. {data_manipulation_utilities-0.2.3 → data_manipulation_utilities-0.2.4}/src/dmu_scripts/text/transform_text.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: data_manipulation_utilities
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: logzero
6
6
  Requires-Dist: PyYAML
@@ -856,6 +856,40 @@ Trees only in file_2.root:
856
856
  - Hlt2RD_BsToPhiMuMu_MVA/DecayTree
857
857
  ```
858
858
 
859
+ # File system
860
+
861
+ ## Versions
862
+
863
+ The utilities below allow the user to deal with versioned files and directories
864
+
865
+ ```python
866
+ from dmu.generic.version_management import get_last_version
867
+ from dmu.generic.version_management import get_next_version
868
+ from dmu.generic.version_management import get_latest_file
869
+
870
+ # get_next_version will take a version and provide the next one, e.g.
871
+ get_next_version('v1') # -> 'v2'
872
+ get_next_version('v1.1') # -> 'v2.1'
873
+ get_next_version('v10.1') # -> 'v11.1'
874
+
875
+ get_next_version('/a/b/c/v1') # -> '/a/b/c/v2'
876
+ get_next_version('/a/b/c/v1.1') # -> '/a/b/c/v2.1'
877
+ get_next_version('/a/b/c/v10.1') # -> '/a/b/c/v11.1'
878
+
879
+ # `get_latest_file` will return the path to the file with the highest version
880
+ # in the `dir_path` directory that matches a wildcard, e.g.:
881
+
882
+ last_file = get_latest_file(dir_path = file_dir, wc='name_*.txt')
883
+
884
+ # `get_last_version` will return the string with the latest version
885
+ # of directories in `dir_path`, e.g.:
886
+
887
+ oversion=get_last_version(dir_path=dir_path, version_only=True) # This will return only the version, e.g. v3.2
888
+ oversion=get_last_version(dir_path=dir_path, version_only=False) # This will return full path, e.g. /a/b/c/v3.2
889
+ ```
890
+
891
+ The function above should work for numeric (e.g. `v1.2`) and non-numeric (e.g. `va`, `vb`) versions.
892
+
859
893
  # Text manipulation
860
894
 
861
895
  ## Transformations
@@ -836,6 +836,40 @@ Trees only in file_2.root:
836
836
  - Hlt2RD_BsToPhiMuMu_MVA/DecayTree
837
837
  ```
838
838
 
839
+ # File system
840
+
841
+ ## Versions
842
+
843
+ The utilities below allow the user to deal with versioned files and directories
844
+
845
+ ```python
846
+ from dmu.generic.version_management import get_last_version
847
+ from dmu.generic.version_management import get_next_version
848
+ from dmu.generic.version_management import get_latest_file
849
+
850
+ # get_next_version will take a version and provide the next one, e.g.
851
+ get_next_version('v1') # -> 'v2'
852
+ get_next_version('v1.1') # -> 'v2.1'
853
+ get_next_version('v10.1') # -> 'v11.1'
854
+
855
+ get_next_version('/a/b/c/v1') # -> '/a/b/c/v2'
856
+ get_next_version('/a/b/c/v1.1') # -> '/a/b/c/v2.1'
857
+ get_next_version('/a/b/c/v10.1') # -> '/a/b/c/v11.1'
858
+
859
+ # `get_latest_file` will return the path to the file with the highest version
860
+ # in the `dir_path` directory that matches a wildcard, e.g.:
861
+
862
+ last_file = get_latest_file(dir_path = file_dir, wc='name_*.txt')
863
+
864
+ # `get_last_version` will return the string with the latest version
865
+ # of directories in `dir_path`, e.g.:
866
+
867
+ oversion=get_last_version(dir_path=dir_path, version_only=True) # This will return only the version, e.g. v3.2
868
+ oversion=get_last_version(dir_path=dir_path, version_only=False) # This will return full path, e.g. /a/b/c/v3.2
869
+ ```
870
+
871
+ The function above should work for numeric (e.g. `v1.2`) and non-numeric (e.g. `va`, `vb`) versions.
872
+
839
873
  # Text manipulation
840
874
 
841
875
  ## Transformations
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = 'data_manipulation_utilities'
3
- version = '0.2.3'
3
+ version = '0.2.4'
4
4
  readme = 'README.md'
5
5
  dependencies= [
6
6
  'logzero',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: data_manipulation_utilities
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: logzero
6
6
  Requires-Dist: PyYAML
@@ -856,6 +856,40 @@ Trees only in file_2.root:
856
856
  - Hlt2RD_BsToPhiMuMu_MVA/DecayTree
857
857
  ```
858
858
 
859
+ # File system
860
+
861
+ ## Versions
862
+
863
+ The utilities below allow the user to deal with versioned files and directories
864
+
865
+ ```python
866
+ from dmu.generic.version_management import get_last_version
867
+ from dmu.generic.version_management import get_next_version
868
+ from dmu.generic.version_management import get_latest_file
869
+
870
+ # get_next_version will take a version and provide the next one, e.g.
871
+ get_next_version('v1') # -> 'v2'
872
+ get_next_version('v1.1') # -> 'v2.1'
873
+ get_next_version('v10.1') # -> 'v11.1'
874
+
875
+ get_next_version('/a/b/c/v1') # -> '/a/b/c/v2'
876
+ get_next_version('/a/b/c/v1.1') # -> '/a/b/c/v2.1'
877
+ get_next_version('/a/b/c/v10.1') # -> '/a/b/c/v11.1'
878
+
879
+ # `get_latest_file` will return the path to the file with the highest version
880
+ # in the `dir_path` directory that matches a wildcard, e.g.:
881
+
882
+ last_file = get_latest_file(dir_path = file_dir, wc='name_*.txt')
883
+
884
+ # `get_last_version` will return the string with the latest version
885
+ # of directories in `dir_path`, e.g.:
886
+
887
+ oversion=get_last_version(dir_path=dir_path, version_only=True) # This will return only the version, e.g. v3.2
888
+ oversion=get_last_version(dir_path=dir_path, version_only=False) # This will return full path, e.g. /a/b/c/v3.2
889
+ ```
890
+
891
+ The function above should work for numeric (e.g. `v1.2`) and non-numeric (e.g. `va`, `vb`) versions.
892
+
859
893
  # Text manipulation
860
894
 
861
895
  ## Transformations
@@ -8,6 +8,7 @@ src/data_manipulation_utilities.egg-info/requires.txt
8
8
  src/data_manipulation_utilities.egg-info/top_level.txt
9
9
  src/dmu/arrays/utilities.py
10
10
  src/dmu/generic/utilities.py
11
+ src/dmu/generic/version_management.py
11
12
  src/dmu/logging/log_store.py
12
13
  src/dmu/ml/cv_classifier.py
13
14
  src/dmu/ml/cv_predict.py
@@ -0,0 +1,132 @@
1
+ '''
2
+ Module containing functions used to find latest, next version, etc of a path.
3
+ '''
4
+
5
+ import glob
6
+ import os
7
+ import re
8
+
9
+ from dmu.logging.log_store import LogStore
10
+
11
+ log=LogStore.add_logger('dmu:version_management')
12
+ #---------------------------------------
13
+ def _get_numeric_version(version : str) -> int:
14
+ '''
15
+ Takes string with numbers at the end (padded or not)
16
+ Returns integer version of numbers
17
+ '''
18
+ #Skip these directories
19
+ if version in ['__pycache__']:
20
+ return -1
21
+
22
+ regex=r'[a-z]+(\d+)'
23
+ mtch =re.match(regex, version)
24
+ if not mtch:
25
+ log.debug(f'Cannot extract numeric version from: {version}')
26
+ return -1
27
+
28
+ str_val = mtch.group(1)
29
+ val = int(str_val)
30
+
31
+ return val
32
+ #---------------------------------------
33
+ def get_last_version(dir_path : str, version_only : bool = True, main_only : bool = False):
34
+ '''Returns path or just version associated to latest version found in given path
35
+
36
+ Parameters
37
+ ---------------------
38
+ dir_path (str) : Path to directory where versioned subdirectories exist
39
+ version_only (bool): Returns only vxxxx if True, otherwise, full path to directory
40
+ main_only (bool): Returns vX where X is a number. Otherwise it will return vx.y in case version has subversion
41
+ '''
42
+ l_obj = glob.glob(f'{dir_path}/*')
43
+
44
+ if len(l_obj) == 0:
45
+ log.error(f'Nothing found in {dir_path}')
46
+ raise ValueError
47
+
48
+ d_dir_org = { os.path.basename(obj).replace('.', '') : obj for obj in l_obj if os.path.isdir(obj) }
49
+ d_dir_num = { _get_numeric_version(name) : dir_path for name, dir_path in d_dir_org.items() }
50
+
51
+ c_dir = sorted(d_dir_num.items())
52
+
53
+ try:
54
+ _, path = c_dir[-1]
55
+ except:
56
+ log.error(f'Cannot find path in: {dir_path}')
57
+ raise
58
+
59
+ name = os.path.basename(path)
60
+ dirn = os.path.dirname(path)
61
+
62
+ if main_only and '.' in name:
63
+ ind = name.index('.')
64
+ name= name[:ind]
65
+
66
+ if version_only:
67
+ return name
68
+
69
+ return f'{dirn}/{name}'
70
+ #---------------------------------------
71
+ def get_latest_file(dir_path : str, wc : str) -> str:
72
+ '''Will find latest file in a given directory
73
+
74
+ Parameters
75
+ --------------------
76
+ dir_path (str): Directory where files are found
77
+ wc (str): Wildcard associated to files, e.g. file_*.txt
78
+
79
+ Returns
80
+ --------------------
81
+ Path to latest file, according to version
82
+ '''
83
+ l_path = glob.glob(f'{dir_path}/{wc}')
84
+ if len(l_path) == 0:
85
+ log.error(f'Cannot find files in: {dir_path}/{wc}')
86
+ raise ValueError
87
+
88
+ l_path.sort()
89
+
90
+ return l_path[-1]
91
+ #---------------------------------------
92
+ def get_next_version(version : str) -> str:
93
+ '''Pick up string symbolizing version and return next version
94
+ Parameters
95
+ -------------------------
96
+ version (str) : Of the form vx.y or vx where x and y are integers. It can also be a full path
97
+
98
+ Returns
99
+ -------------------------
100
+ String equal to the argument, but with the main version augmented by 1, e.g. vx+1.y
101
+
102
+ Examples:
103
+ -------------------------
104
+
105
+ get_next_version('v1.1') = 'v2.1'
106
+ get_next_version('v1' ) = 'v2'
107
+ '''
108
+ if '/' in version:
109
+ path = version
110
+ dirname = os.path.dirname(path)
111
+ version = os.path.basename(path)
112
+ else:
113
+ dirname = None
114
+
115
+ rgx = r'v(\d+)(\.\d+)?'
116
+
117
+ mtch = re.match(rgx, version)
118
+ if not mtch:
119
+ log.error(f'Cannot match {version} with {rgx}')
120
+ raise ValueError
121
+
122
+ ver_org = mtch.group(1)
123
+ ver_nxt = int(ver_org) + 1
124
+ ver_nxt = str(ver_nxt)
125
+
126
+ version = version.replace(f'v{ver_org}', f'v{ver_nxt}')
127
+
128
+ if dirname is not None:
129
+ version = f'{dirname}/{version}'
130
+
131
+ return version
132
+ #---------------------------------------