data-manipulation-utilities 0.0.4__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/PKG-INFO +28 -1
  2. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/README.md +27 -0
  3. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/pyproject.toml +15 -13
  4. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/data_manipulation_utilities.egg-info/PKG-INFO +28 -1
  5. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/data_manipulation_utilities.egg-info/SOURCES.txt +3 -0
  6. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/data_manipulation_utilities.egg-info/entry_points.txt +1 -0
  7. data_manipulation_utilities-0.0.8/src/dmu/physics/utilities.py +57 -0
  8. data_manipulation_utilities-0.0.8/src/dmu_scripts/git/publish +88 -0
  9. data_manipulation_utilities-0.0.8/src/dmu_scripts/physics/update_decinfo.py +111 -0
  10. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/setup.cfg +0 -0
  11. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/data_manipulation_utilities.egg-info/dependency_links.txt +0 -0
  12. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/data_manipulation_utilities.egg-info/requires.txt +0 -0
  13. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/data_manipulation_utilities.egg-info/top_level.txt +0 -0
  14. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/arrays/utilities.py +0 -0
  15. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/generic/utilities.py +0 -0
  16. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/logging/log_store.py +0 -0
  17. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/ml/cv_classifier.py +0 -0
  18. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/ml/cv_predict.py +0 -0
  19. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/ml/train_mva.py +0 -0
  20. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/ml/utilities.py +0 -0
  21. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/plotting/plotter.py +0 -0
  22. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/plotting/plotter_1d.py +0 -0
  23. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/plotting/plotter_2d.py +0 -0
  24. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/rdataframe/atr_mgr.py +0 -0
  25. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/rdataframe/utilities.py +0 -0
  26. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/rfile/rfprinter.py +0 -0
  27. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/rfile/utilities.py +0 -0
  28. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/stats/fitter.py +0 -0
  29. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/stats/function.py +0 -0
  30. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/stats/utilities.py +0 -0
  31. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/testing/utilities.py +0 -0
  32. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu/text/transformer.py +0 -0
  33. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/__init__.py +0 -0
  34. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/ml/tests/train_mva.yaml +0 -0
  35. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/plotting/tests/2d.yaml +0 -0
  36. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/plotting/tests/fig_size.yaml +0 -0
  37. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/plotting/tests/high_stat.yaml +0 -0
  38. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/plotting/tests/name.yaml +0 -0
  39. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/plotting/tests/no_bounds.yaml +0 -0
  40. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/plotting/tests/simple.yaml +0 -0
  41. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/plotting/tests/title.yaml +0 -0
  42. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/plotting/tests/weights.yaml +0 -0
  43. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/text/transform.toml +0 -0
  44. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/text/transform.txt +0 -0
  45. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/text/transform_set.toml +0 -0
  46. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/text/transform_set.txt +0 -0
  47. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_data/text/transform_trf.txt +0 -0
  48. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_scripts/physics/check_truth.py +0 -0
  49. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_scripts/rfile/compare_root_files.py +0 -0
  50. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_scripts/rfile/print_trees.py +0 -0
  51. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_scripts/ssh/coned.py +0 -0
  52. {data_manipulation_utilities-0.0.4 → data_manipulation_utilities-0.0.8}/src/dmu_scripts/text/transform_text.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: data_manipulation_utilities
3
- Version: 0.0.4
3
+ Version: 0.0.8
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: zfit
6
6
  Requires-Dist: logzero
@@ -24,6 +24,33 @@ Requires-Dist: pytest; extra == "dev"
24
24
 
25
25
  These are tools that can be used for different data analysis tasks.
26
26
 
27
+ For LHCb specific tools (not useful for people outside LHCb) see [this](doc/lhcb.md)
28
+
29
+ # GIT
30
+
31
+ ## Pushing
32
+
33
+ From the root directory of a version controlled project (i.e. a directory with the `.git` subdirectory)
34
+ using a `pyproject.toml` file, run:
35
+
36
+ ```bash
37
+ publish
38
+ ```
39
+
40
+ such that:
41
+
42
+ 1. The `pyproject.toml` file is checked and the version of the project is extracted.
43
+ 1. If a tag named as the version exists move to the steps below.
44
+ 1. If it does not, make a new tag with the name as the version
45
+
46
+ Then, for each remote it pushes the tags and the commits.
47
+
48
+ *Why?*
49
+
50
+ 1. Tags should be named as the project's version
51
+ 1. As soon as a new version is created, that version needs to be tagged.
52
+ 1. In GitHub, one can configure actions to publish projects when the commits are tagged.
53
+
27
54
  # Generic
28
55
 
29
56
  This section describes generic tools that could not be put in a specific category, but tend to be useful.
@@ -2,6 +2,33 @@
2
2
 
3
3
  These are tools that can be used for different data analysis tasks.
4
4
 
5
+ For LHCb specific tools (not useful for people outside LHCb) see [this](doc/lhcb.md)
6
+
7
+ # GIT
8
+
9
+ ## Pushing
10
+
11
+ From the root directory of a version controlled project (i.e. a directory with the `.git` subdirectory)
12
+ using a `pyproject.toml` file, run:
13
+
14
+ ```bash
15
+ publish
16
+ ```
17
+
18
+ such that:
19
+
20
+ 1. The `pyproject.toml` file is checked and the version of the project is extracted.
21
+ 1. If a tag named as the version exists move to the steps below.
22
+ 1. If it does not, make a new tag with the name as the version
23
+
24
+ Then, for each remote it pushes the tags and the commits.
25
+
26
+ *Why?*
27
+
28
+ 1. Tags should be named as the project's version
29
+ 1. As soon as a new version is created, that version needs to be tagged.
30
+ 1. In GitHub, one can configure actions to publish projects when the commits are tagged.
31
+
5
32
  # Generic
6
33
 
7
34
  This section describes generic tools that could not be put in a specific category, but tend to be useful.
@@ -1,22 +1,22 @@
1
1
  [project]
2
2
  name = 'data_manipulation_utilities'
3
- version = '0.0.4'
3
+ version = '0.0.8'
4
4
  readme = 'README.md'
5
5
  dependencies= [
6
- 'zfit',
6
+ 'zfit',
7
7
  'logzero',
8
- 'PyYAML',
9
- 'scipy',
10
- 'awkward',
11
- 'tqdm',
12
- 'joblib',
13
- 'scikit-learn',
14
- 'toml',
15
- 'numpy',
16
- 'matplotlib',
17
- 'mplhep',
8
+ 'PyYAML',
9
+ 'scipy',
10
+ 'awkward',
11
+ 'tqdm',
12
+ 'joblib',
13
+ 'scikit-learn',
14
+ 'toml',
15
+ 'numpy',
16
+ 'matplotlib',
17
+ 'mplhep',
18
18
  'hist[plot]',
19
- 'polars',
19
+ 'polars',
20
20
  'pandas']
21
21
 
22
22
  [project.optional-dependencies]
@@ -29,6 +29,7 @@ where = ['src']
29
29
  transform_text ='dmu_scripts.text.transform_text:main'
30
30
  coned ='dmu_scripts.ssh.coned:main'
31
31
  check_truth ='dmu_scripts.physics.check_truth:main'
32
+ update_decinfo ='dmu_scripts.physics.update_decinfo:main'
32
33
  print_trees ='dmu_scripts.rfile.print_trees:main'
33
34
  compare_root_files='dmu_scripts.rfile.compare_root_files:main'
34
35
 
@@ -36,3 +37,4 @@ compare_root_files='dmu_scripts.rfile.compare_root_files:main'
36
37
  dmu_data=['text/*.txt', 'text/*.toml', 'ml/*/*.yaml', 'plotting/*/*.yaml']
37
38
 
38
39
  [tool.setuptools]
40
+ script-files=['src/dmu_scripts/git/publish']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: data_manipulation_utilities
3
- Version: 0.0.4
3
+ Version: 0.0.8
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: zfit
6
6
  Requires-Dist: logzero
@@ -24,6 +24,33 @@ Requires-Dist: pytest; extra == "dev"
24
24
 
25
25
  These are tools that can be used for different data analysis tasks.
26
26
 
27
+ For LHCb specific tools (not useful for people outside LHCb) see [this](doc/lhcb.md)
28
+
29
+ # GIT
30
+
31
+ ## Pushing
32
+
33
+ From the root directory of a version controlled project (i.e. a directory with the `.git` subdirectory)
34
+ using a `pyproject.toml` file, run:
35
+
36
+ ```bash
37
+ publish
38
+ ```
39
+
40
+ such that:
41
+
42
+ 1. The `pyproject.toml` file is checked and the version of the project is extracted.
43
+ 1. If a tag named as the version exists move to the steps below.
44
+ 1. If it does not, make a new tag with the name as the version
45
+
46
+ Then, for each remote it pushes the tags and the commits.
47
+
48
+ *Why?*
49
+
50
+ 1. Tags should be named as the project's version
51
+ 1. As soon as a new version is created, that version needs to be tagged.
52
+ 1. In GitHub, one can configure actions to publish projects when the commits are tagged.
53
+
27
54
  # Generic
28
55
 
29
56
  This section describes generic tools that could not be put in a specific category, but tend to be useful.
@@ -13,6 +13,7 @@ src/dmu/ml/cv_classifier.py
13
13
  src/dmu/ml/cv_predict.py
14
14
  src/dmu/ml/train_mva.py
15
15
  src/dmu/ml/utilities.py
16
+ src/dmu/physics/utilities.py
16
17
  src/dmu/plotting/plotter.py
17
18
  src/dmu/plotting/plotter_1d.py
18
19
  src/dmu/plotting/plotter_2d.py
@@ -40,7 +41,9 @@ src/dmu_data/text/transform.txt
40
41
  src/dmu_data/text/transform_set.toml
41
42
  src/dmu_data/text/transform_set.txt
42
43
  src/dmu_data/text/transform_trf.txt
44
+ src/dmu_scripts/git/publish
43
45
  src/dmu_scripts/physics/check_truth.py
46
+ src/dmu_scripts/physics/update_decinfo.py
44
47
  src/dmu_scripts/rfile/compare_root_files.py
45
48
  src/dmu_scripts/rfile/print_trees.py
46
49
  src/dmu_scripts/ssh/coned.py
@@ -4,3 +4,4 @@ compare_root_files = dmu_scripts.rfile.compare_root_files:main
4
4
  coned = dmu_scripts.ssh.coned:main
5
5
  print_trees = dmu_scripts.rfile.print_trees:main
6
6
  transform_text = dmu_scripts.text.transform_text:main
7
+ update_decinfo = dmu_scripts.physics.update_decinfo:main
@@ -0,0 +1,57 @@
1
+ '''
2
+ Module containing utility functions
3
+ '''
4
+ from importlib.resources import files
5
+ from functools import cache
6
+
7
+ import yaml
8
+
9
+ # ---------------------------------
10
+ @cache
11
+ def _get_evt_name() -> dict[str,str]:
12
+ file_path = files('dmu_data').joinpath('physics/evt_name.yaml')
13
+ file_path = str(file_path)
14
+ with open(file_path, encoding='utf-8') as ifile:
15
+ d_data = yaml.safe_load(ifile)
16
+
17
+ return d_data
18
+ # ---------------------------------
19
+ def _format_nickname(nickname : str, style : str) -> str:
20
+ if style == 'literal':
21
+ return nickname
22
+
23
+ if style != 'safe_1':
24
+ raise ValueError(f'Invalid style: {style}')
25
+
26
+ nickname = nickname.replace('.', 'p')
27
+ nickname = nickname.replace('-', 'mn')
28
+ nickname = nickname.replace('+', 'pl')
29
+ nickname = nickname.replace('=', '_eq_')
30
+ nickname = nickname.replace(',', '_')
31
+
32
+ return nickname
33
+ # ---------------------------------
34
+ def read_decay_name(event_type : str, style : str = 'safe_1') -> str:
35
+ '''
36
+ Takes event type, and style strings, returns nickname of decay as defined in DecFiles package
37
+
38
+ Styles:
39
+
40
+ literal : No change is made to nickname
41
+ safe_1 (default): With following replacements:
42
+ . -> p
43
+ = -> _eq_
44
+ - -> mn
45
+ + -> pl
46
+ , -> _
47
+ '''
48
+ d_evt_name = _get_evt_name()
49
+
50
+ if event_type not in d_evt_name:
51
+ raise ValueError(f'Event type {event_type} not found')
52
+
53
+ value = d_evt_name[event_type]
54
+ value = _format_nickname(value, style)
55
+
56
+ return value
57
+ # ---------------------------------
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # --------------------------
4
+ display_help()
5
+ {
6
+ echo "Script meant to:"
7
+ echo ""
8
+ echo "1. Check if version in pyproject.toml has been modified"
9
+ echo "2. If it has create new tag following version name"
10
+ echo "3. Push to remote "
11
+ }
12
+ # --------------------------
13
+ get_opts()
14
+ {
15
+ while getopts :hf: option; do
16
+ case "${option}" in
17
+ h)
18
+ display_help
19
+ exit 0
20
+ ;;
21
+ \?) echo "Invalid option: -${OPTARG}"
22
+ display_help
23
+ exit 1
24
+ ;;
25
+ :) echo "$0: Arguments needed"
26
+ display_help
27
+ exit 1
28
+ ;;
29
+ esac
30
+ done
31
+ }
32
+ # --------------------------
33
+ # Picks up version from pyproject.toml
34
+ get_version()
35
+ {
36
+ if [[ ! -f pyproject.toml ]];then
37
+ echo "Cannot find pyproject.toml"
38
+ exit 1
39
+ fi
40
+
41
+ VERSION_LINE=$(grep version pyproject.toml)
42
+
43
+ if [[ $? -ne 0 ]];then
44
+ ehco "Could not extract version from pyproject.toml"
45
+ exit 1
46
+ fi
47
+
48
+ if [[ "$VERSION_LINE" =~ .*([0-9]\.[0-9]\.[0-9]).* ]];then
49
+ VERSION=${BASH_REMATCH[1]}
50
+ echo "Using version: $VERSION"
51
+ return
52
+ fi
53
+
54
+ echo "Could not extract version from: $VERSION_LINE"
55
+ exit 1
56
+ }
57
+ # --------------------------
58
+ create_tag()
59
+ {
60
+ git tag -n | grep $VERSION
61
+
62
+ if [[ $? -eq 0 ]];then
63
+ echo "Version found among tags, not tagging"
64
+ return
65
+ fi
66
+
67
+ echo "Version $VERSION not found among tags, creating new tag"
68
+
69
+ git tag -a $VERSION
70
+ }
71
+ # --------------------------
72
+ push_all()
73
+ {
74
+ for REMOTE in $(git remote);do
75
+ echo "Pushing tags and commits to remote: $REMOTE"
76
+ git add pyproject.toml
77
+ git commit -m "Publication commit"
78
+
79
+ git push -u $REMOTE HEAD
80
+ git push $REMOTE --tags
81
+ done
82
+ }
83
+ # --------------------------
84
+ get_opts "$@"
85
+
86
+ get_version
87
+ create_tag
88
+ push_all
@@ -0,0 +1,111 @@
1
+ '''
2
+ Script meant to read information from files in
3
+
4
+ https://gitlab.cern.ch/lhcb-datapkg/Gen/DecFiles
5
+
6
+ and store it in current project as data
7
+ '''
8
+ import os
9
+ import re
10
+ import glob
11
+ from dataclasses import dataclass
12
+ from importlib.resources import files
13
+
14
+ import tqdm
15
+ import yaml
16
+ from dmu.logging.log_store import LogStore
17
+
18
+ log=LogStore.add_logger('dmu_scripts:physics:update_decinfo')
19
+ # ------------------------------
20
+ @dataclass
21
+ class Data:
22
+ '''
23
+ Class used to store shared data
24
+ '''
25
+ dec_path : str
26
+ regex : str = r'#[\s]*[a-zA-Z]+:[\s]*(.*)'
27
+ # ------------------------------
28
+ def _setup() -> None:
29
+ if 'DECPATH' not in os.environ:
30
+ raise ValueError('DECPATH, path to root of DecFiles, not found')
31
+
32
+ Data.dec_path = os.environ['DECPATH']
33
+ # ------------------------------
34
+ def _line_from_list(file_path : str, contains : str, l_line : list[str]) -> str:
35
+ l_value = [ line for line in l_line if contains in line ]
36
+
37
+ if len(l_value) == 0:
38
+ log.warning(f'Could not extract {contains} line in: {file_path}')
39
+ return 'not_found'
40
+
41
+ return l_value[0]
42
+ # ------------------------------
43
+ def _val_from_line(file_path : str, line : str) -> str:
44
+ if line == 'not_found':
45
+ return line
46
+
47
+ mtch = re.match(Data.regex, line)
48
+ if not mtch:
49
+ log.warning(f'Cannot extract value from \"{line}\" in file {file_path}')
50
+ return 'not_found'
51
+
52
+ value = mtch.group(1)
53
+ value = value.replace(' ', '')
54
+
55
+ return value
56
+ # ------------------------------
57
+ def _get_evt_name(file_path : str) -> tuple[str,str]:
58
+ with open(file_path, encoding='utf-8') as ifile:
59
+ l_line = ifile.read().splitlines()
60
+
61
+ evt_line = _line_from_list(file_path, 'EventType', l_line)
62
+ nam_line = _line_from_list(file_path, 'NickName' , l_line)
63
+
64
+ evt_type = _val_from_line(file_path, evt_line)
65
+ nickname = _val_from_line(file_path, nam_line)
66
+
67
+ return evt_type, nickname
68
+ # ------------------------------
69
+ def _read_info() -> dict[str,str]:
70
+ dec_file_wc = f'{Data.dec_path}/dkfiles/*.dec'
71
+ l_dec_file = glob.glob(dec_file_wc)
72
+ nfiles = len(l_dec_file)
73
+ if nfiles == 0:
74
+ raise ValueError(f'No dec file foudn in {dec_file_wc}')
75
+
76
+ log.info(f'Found {nfiles} decay files')
77
+
78
+ l_evt_name = [ _get_evt_name(file_path) for file_path in tqdm.tqdm(l_dec_file, ascii=' -') ]
79
+ d_evt_name = _dict_from_tup_list(l_evt_name)
80
+
81
+ return d_evt_name
82
+ # ------------------------------
83
+ def _dict_from_tup_list(l_evt_name : list[tuple[str,str]]) -> dict[str,str]:
84
+ d_res = {}
85
+ for key, val in l_evt_name:
86
+ if key in d_res:
87
+ old_val = d_res[key]
88
+ log.warning(f'Key {key} with value {old_val} already found, overriding with {val}')
89
+
90
+ d_res[key] = val
91
+
92
+ return d_res
93
+ # ------------------------------
94
+ def _dump_info(d_evt_name : dict[str,str]) -> None:
95
+ yaml_path = files('dmu_data').joinpath('physics/evt_name.yaml')
96
+ yaml_path = str(yaml_path)
97
+
98
+ log.info(f'Saving to: {yaml_path}')
99
+ with open(yaml_path, 'w', encoding='utf-8') as ofile:
100
+ yaml.dump(d_evt_name, ofile)
101
+ # ------------------------------
102
+ def main():
103
+ '''
104
+ Script starts here
105
+ '''
106
+ _setup()
107
+ d_evt_name = _read_info()
108
+ _dump_info(d_evt_name)
109
+ # ------------------------------
110
+ if __name__ == '__main__':
111
+ main()