data-manipulation-utilities 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,9 +69,20 @@ create_tag()
69
69
  git tag -a $VERSION
70
70
  }
71
71
  # --------------------------
72
+ push_all()
73
+ {
74
+ for REMOTE in $(git remote);do
75
+ echo "Pushing tags and commits to remote: $REMOTE"
76
+ git add pyproject.toml
77
+ git commit -m "Publication commit"
78
+
79
+ git push -u $REMOTE HEAD
80
+ git push $REMOTE --tags
81
+ done
82
+ }
83
+ # --------------------------
72
84
  get_opts "$@"
73
85
 
74
86
  get_version
75
87
  create_tag
76
- git push -u origin HEAD
77
- git push --tags
88
+ push_all
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: data_manipulation_utilities
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Description-Content-Type: text/markdown
5
5
  Requires-Dist: zfit
6
6
  Requires-Dist: logzero
@@ -18,12 +18,39 @@ Requires-Dist: hist[plot]
18
18
  Requires-Dist: polars
19
19
  Requires-Dist: pandas
20
20
  Provides-Extra: dev
21
- Requires-Dist: pytest ; extra == 'dev'
21
+ Requires-Dist: pytest; extra == "dev"
22
22
 
23
23
  # D(ata) M(anipulation) U(tilities)
24
24
 
25
25
  These are tools that can be used for different data analysis tasks.
26
26
 
27
+ For LHCb specific tools (not useful for people outside LHCb) see [this](doc/lhcb.md)
28
+
29
+ # GIT
30
+
31
+ ## Pushing
32
+
33
+ From the root directory of a version controlled project (i.e. a directory with the `.git` subdirectory)
34
+ using a `pyproject.toml` file, run:
35
+
36
+ ```bash
37
+ publish
38
+ ```
39
+
40
+ such that:
41
+
42
+ 1. The `pyproject.toml` file is checked and the version of the project is extracted.
43
+ 1. If a tag named as the version exists move to the steps below.
44
+ 1. If it does not, make a new tag with the name as the version
45
+
46
+ Then, for each remote it pushes the tags and the commits.
47
+
48
+ *Why?*
49
+
50
+ 1. Tags should be named as the project's version
51
+ 1. As soon as a new version is created, that version needs to be tagged.
52
+ 1. In GitHub, one can configure actions to publish projects when the commits are tagged.
53
+
27
54
  # Generic
28
55
 
29
56
  This section describes generic tools that could not be put in a specific category, but tend to be useful.
@@ -1,4 +1,4 @@
1
- data_manipulation_utilities-0.0.6.data/scripts/publish,sha256=9DPzH1NcyuyXo6uHjQitIce192jugsoCCjRY7HR7mCg,1684
1
+ data_manipulation_utilities-0.0.8.data/scripts/publish,sha256=MHDsn93GTeJxpsM3hNbOJ7JEtsugX21WOFQ2PGWairU,1943
2
2
  dmu/arrays/utilities.py,sha256=PKoYyybPptA2aU-V3KLnJXBudWxTXu4x1uGdIMQ49HY,1722
3
3
  dmu/generic/utilities.py,sha256=0Xnq9t35wuebAqKxbyAiMk1ISB7IcXK4cFH25MT1fgw,1741
4
4
  dmu/logging/log_store.py,sha256=v0tiNz-6ktT_afD5DuvCZ8Nmr82JKQOPli8hgd28P1Q,3960
@@ -6,6 +6,7 @@ dmu/ml/cv_classifier.py,sha256=n81m7i2M6Zq96AEd9EZGwXSrbG5m9jkS5RdeXvbsAXU,3712
6
6
  dmu/ml/cv_predict.py,sha256=Bqxu-f6qquKJokFljhCzL_kiGcjLJLQFhVBD130fsyw,4893
7
7
  dmu/ml/train_mva.py,sha256=d_n-A07DFweikz5nXap4OE_Mqx8VprFT7zbxmnQAbac,9638
8
8
  dmu/ml/utilities.py,sha256=Nue7O9zi1QXgjGRPH6wnSAW9jusMQ2ZOSDJzBqJKIi0,3687
9
+ dmu/physics/utilities.py,sha256=SGUNZT9qHvo6hpS12857cJRd4BPEwGDLpWaO5BhUJDk,1579
9
10
  dmu/plotting/plotter.py,sha256=laa6Kl7P-ZOIhaOFBVjOH4XQ4kPCV7wBNvLIMBnyCwM,7181
10
11
  dmu/plotting/plotter_1d.py,sha256=G-i94uzm2TjNaog1A4agAKar_G0qNdkAqIPCmzhe85Y,3660
11
12
  dmu/plotting/plotter_2d.py,sha256=SWPKns-CfpUZHgBXvwm3gceH3k2eL_mKGXQ8sWpZJB0,2919
@@ -33,14 +34,15 @@ dmu_data/text/transform.txt,sha256=EX760da6Vkf-_EPxnQlC5hGSkfFhJCCGCD19NU-1Qto,4
33
34
  dmu_data/text/transform_set.toml,sha256=Jeh7BTz82idqvbOQJtl9-ur56mZkzDn5WtvmIb48LoE,150
34
35
  dmu_data/text/transform_set.txt,sha256=1KivMoP9LxPn9955QrRmOzjEqduEjhTetQ9MXykO5LY,46
35
36
  dmu_data/text/transform_trf.txt,sha256=zxBRTgcSmX7RdqfmWF88W1YqbyNHa4Ccruf1MmnYv2A,74
36
- dmu_scripts/git/publish,sha256=9DPzH1NcyuyXo6uHjQitIce192jugsoCCjRY7HR7mCg,1684
37
+ dmu_scripts/git/publish,sha256=MHDsn93GTeJxpsM3hNbOJ7JEtsugX21WOFQ2PGWairU,1943
37
38
  dmu_scripts/physics/check_truth.py,sha256=b1P_Pa9ef6VcFtyY6Y9KS9Om9L-QrCBjDKp4dqca0PQ,3964
39
+ dmu_scripts/physics/update_decinfo.py,sha256=Y5cO3GdUWtVHeXVd37Q04xlFtQBMTLlqCF-medL8SM8,3322
38
40
  dmu_scripts/rfile/compare_root_files.py,sha256=T8lDnQxsRNMr37x1Y7YvWD8ySHrJOWZki7ZQynxXX9Q,9540
39
41
  dmu_scripts/rfile/print_trees.py,sha256=Ze4Ccl_iUldl4eVEDVnYBoe4amqBT1fSBR1zN5WSztk,941
40
42
  dmu_scripts/ssh/coned.py,sha256=lhilYNHWRCGxC-jtyJ3LQ4oUgWW33B2l1tYCcyHHsR0,4858
41
43
  dmu_scripts/text/transform_text.py,sha256=9akj1LB0HAyopOvkLjNOJiptZw5XoOQLe17SlcrGMD0,1456
42
- data_manipulation_utilities-0.0.6.dist-info/METADATA,sha256=t5uc_pnOn1_UBjs2HCrpiNY5Kg4saETW4BsbdVOAvQo,19299
43
- data_manipulation_utilities-0.0.6.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
44
- data_manipulation_utilities-0.0.6.dist-info/entry_points.txt,sha256=1TIZDed651KuOH-DgaN5AoBdirKmrKE_oM1b6b7zTUU,270
45
- data_manipulation_utilities-0.0.6.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
46
- data_manipulation_utilities-0.0.6.dist-info/RECORD,,
44
+ data_manipulation_utilities-0.0.8.dist-info/METADATA,sha256=sILwDHBZyO7OvdF1XkfC4fMzeZquVmZoHea3ytZz8lY,20074
45
+ data_manipulation_utilities-0.0.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
46
+ data_manipulation_utilities-0.0.8.dist-info/entry_points.txt,sha256=-nNgwig7t-dba8EXsI5TxmvLaDet9_qQAbpusuq64Xc,327
47
+ data_manipulation_utilities-0.0.8.dist-info/top_level.txt,sha256=n_x5J6uWtSqy9mRImKtdA2V2NJNyU8Kn3u8DTOKJix0,25
48
+ data_manipulation_utilities-0.0.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -4,3 +4,4 @@ compare_root_files = dmu_scripts.rfile.compare_root_files:main
4
4
  coned = dmu_scripts.ssh.coned:main
5
5
  print_trees = dmu_scripts.rfile.print_trees:main
6
6
  transform_text = dmu_scripts.text.transform_text:main
7
+ update_decinfo = dmu_scripts.physics.update_decinfo:main
@@ -0,0 +1,57 @@
1
+ '''
2
+ Module containing utility functions
3
+ '''
4
+ from importlib.resources import files
5
+ from functools import cache
6
+
7
+ import yaml
8
+
9
+ # ---------------------------------
10
+ @cache
11
+ def _get_evt_name() -> dict[str,str]:
12
+ file_path = files('dmu_data').joinpath('physics/evt_name.yaml')
13
+ file_path = str(file_path)
14
+ with open(file_path, encoding='utf-8') as ifile:
15
+ d_data = yaml.safe_load(ifile)
16
+
17
+ return d_data
18
+ # ---------------------------------
19
+ def _format_nickname(nickname : str, style : str) -> str:
20
+ if style == 'literal':
21
+ return nickname
22
+
23
+ if style != 'safe_1':
24
+ raise ValueError(f'Invalid style: {style}')
25
+
26
+ nickname = nickname.replace('.', 'p')
27
+ nickname = nickname.replace('-', 'mn')
28
+ nickname = nickname.replace('+', 'pl')
29
+ nickname = nickname.replace('=', '_eq_')
30
+ nickname = nickname.replace(',', '_')
31
+
32
+ return nickname
33
+ # ---------------------------------
34
+ def read_decay_name(event_type : str, style : str = 'safe_1') -> str:
35
+ '''
36
+ Takes event type, and style strings, returns nickname of decay as defined in DecFiles package
37
+
38
+ Styles:
39
+
40
+ literal : No change is made to nickname
41
+ safe_1 (default): With following replacements:
42
+ . -> p
43
+ = -> _eq_
44
+ - -> mn
45
+ + -> pl
46
+ , -> _
47
+ '''
48
+ d_evt_name = _get_evt_name()
49
+
50
+ if event_type not in d_evt_name:
51
+ raise ValueError(f'Event type {event_type} not found')
52
+
53
+ value = d_evt_name[event_type]
54
+ value = _format_nickname(value, style)
55
+
56
+ return value
57
+ # ---------------------------------
dmu_scripts/git/publish CHANGED
@@ -69,9 +69,20 @@ create_tag()
69
69
  git tag -a $VERSION
70
70
  }
71
71
  # --------------------------
72
+ push_all()
73
+ {
74
+ for REMOTE in $(git remote);do
75
+ echo "Pushing tags and commits to remote: $REMOTE"
76
+ git add pyproject.toml
77
+ git commit -m "Publication commit"
78
+
79
+ git push -u $REMOTE HEAD
80
+ git push $REMOTE --tags
81
+ done
82
+ }
83
+ # --------------------------
72
84
  get_opts "$@"
73
85
 
74
86
  get_version
75
87
  create_tag
76
- git push -u origin HEAD
77
- git push --tags
88
+ push_all
@@ -0,0 +1,111 @@
1
+ '''
2
+ Script meant to read information from files in
3
+
4
+ https://gitlab.cern.ch/lhcb-datapkg/Gen/DecFiles
5
+
6
+ and store it in current project as data
7
+ '''
8
+ import os
9
+ import re
10
+ import glob
11
+ from dataclasses import dataclass
12
+ from importlib.resources import files
13
+
14
+ import tqdm
15
+ import yaml
16
+ from dmu.logging.log_store import LogStore
17
+
18
+ log=LogStore.add_logger('dmu_scripts:physics:update_decinfo')
19
+ # ------------------------------
20
+ @dataclass
21
+ class Data:
22
+ '''
23
+ Class used to store shared data
24
+ '''
25
+ dec_path : str
26
+ regex : str = r'#[\s]*[a-zA-Z]+:[\s]*(.*)'
27
+ # ------------------------------
28
+ def _setup() -> None:
29
+ if 'DECPATH' not in os.environ:
30
+ raise ValueError('DECPATH, path to root of DecFiles, not found')
31
+
32
+ Data.dec_path = os.environ['DECPATH']
33
+ # ------------------------------
34
+ def _line_from_list(file_path : str, contains : str, l_line : list[str]) -> str:
35
+ l_value = [ line for line in l_line if contains in line ]
36
+
37
+ if len(l_value) == 0:
38
+ log.warning(f'Could not extract {contains} line in: {file_path}')
39
+ return 'not_found'
40
+
41
+ return l_value[0]
42
+ # ------------------------------
43
+ def _val_from_line(file_path : str, line : str) -> str:
44
+ if line == 'not_found':
45
+ return line
46
+
47
+ mtch = re.match(Data.regex, line)
48
+ if not mtch:
49
+ log.warning(f'Cannot extract value from \"{line}\" in file {file_path}')
50
+ return 'not_found'
51
+
52
+ value = mtch.group(1)
53
+ value = value.replace(' ', '')
54
+
55
+ return value
56
+ # ------------------------------
57
+ def _get_evt_name(file_path : str) -> tuple[str,str]:
58
+ with open(file_path, encoding='utf-8') as ifile:
59
+ l_line = ifile.read().splitlines()
60
+
61
+ evt_line = _line_from_list(file_path, 'EventType', l_line)
62
+ nam_line = _line_from_list(file_path, 'NickName' , l_line)
63
+
64
+ evt_type = _val_from_line(file_path, evt_line)
65
+ nickname = _val_from_line(file_path, nam_line)
66
+
67
+ return evt_type, nickname
68
+ # ------------------------------
69
+ def _read_info() -> dict[str,str]:
70
+ dec_file_wc = f'{Data.dec_path}/dkfiles/*.dec'
71
+ l_dec_file = glob.glob(dec_file_wc)
72
+ nfiles = len(l_dec_file)
73
+ if nfiles == 0:
74
+ raise ValueError(f'No dec file foudn in {dec_file_wc}')
75
+
76
+ log.info(f'Found {nfiles} decay files')
77
+
78
+ l_evt_name = [ _get_evt_name(file_path) for file_path in tqdm.tqdm(l_dec_file, ascii=' -') ]
79
+ d_evt_name = _dict_from_tup_list(l_evt_name)
80
+
81
+ return d_evt_name
82
+ # ------------------------------
83
+ def _dict_from_tup_list(l_evt_name : list[tuple[str,str]]) -> dict[str,str]:
84
+ d_res = {}
85
+ for key, val in l_evt_name:
86
+ if key in d_res:
87
+ old_val = d_res[key]
88
+ log.warning(f'Key {key} with value {old_val} already found, overriding with {val}')
89
+
90
+ d_res[key] = val
91
+
92
+ return d_res
93
+ # ------------------------------
94
+ def _dump_info(d_evt_name : dict[str,str]) -> None:
95
+ yaml_path = files('dmu_data').joinpath('physics/evt_name.yaml')
96
+ yaml_path = str(yaml_path)
97
+
98
+ log.info(f'Saving to: {yaml_path}')
99
+ with open(yaml_path, 'w', encoding='utf-8') as ofile:
100
+ yaml.dump(d_evt_name, ofile)
101
+ # ------------------------------
102
+ def main():
103
+ '''
104
+ Script starts here
105
+ '''
106
+ _setup()
107
+ d_evt_name = _read_info()
108
+ _dump_info(d_evt_name)
109
+ # ------------------------------
110
+ if __name__ == '__main__':
111
+ main()