halib 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halib/__init__.py +94 -0
- halib/common/__init__.py +0 -0
- halib/common/common.py +326 -0
- halib/common/rich_color.py +285 -0
- halib/common.py +151 -0
- halib/csvfile.py +48 -0
- halib/cuda.py +39 -0
- halib/dataset.py +209 -0
- halib/exp/__init__.py +0 -0
- halib/exp/core/__init__.py +0 -0
- halib/exp/core/base_config.py +167 -0
- halib/exp/core/base_exp.py +147 -0
- halib/exp/core/param_gen.py +170 -0
- halib/exp/core/wandb_op.py +117 -0
- halib/exp/data/__init__.py +0 -0
- halib/exp/data/dataclass_util.py +41 -0
- halib/exp/data/dataset.py +208 -0
- halib/exp/data/torchloader.py +165 -0
- halib/exp/perf/__init__.py +0 -0
- halib/exp/perf/flop_calc.py +190 -0
- halib/exp/perf/gpu_mon.py +58 -0
- halib/exp/perf/perfcalc.py +470 -0
- halib/exp/perf/perfmetrics.py +137 -0
- halib/exp/perf/perftb.py +778 -0
- halib/exp/perf/profiler.py +507 -0
- halib/exp/viz/__init__.py +0 -0
- halib/exp/viz/plot.py +754 -0
- halib/filesys.py +117 -0
- halib/filetype/__init__.py +0 -0
- halib/filetype/csvfile.py +192 -0
- halib/filetype/ipynb.py +61 -0
- halib/filetype/jsonfile.py +19 -0
- halib/filetype/textfile.py +12 -0
- halib/filetype/videofile.py +266 -0
- halib/filetype/yamlfile.py +87 -0
- halib/gdrive.py +179 -0
- halib/gdrive_mkdir.py +41 -0
- halib/gdrive_test.py +37 -0
- halib/jsonfile.py +22 -0
- halib/listop.py +13 -0
- halib/online/__init__.py +0 -0
- halib/online/gdrive.py +229 -0
- halib/online/gdrive_mkdir.py +53 -0
- halib/online/gdrive_test.py +50 -0
- halib/online/projectmake.py +131 -0
- halib/online/tele_noti.py +165 -0
- halib/plot.py +301 -0
- halib/projectmake.py +115 -0
- halib/research/__init__.py +0 -0
- halib/research/base_config.py +100 -0
- halib/research/base_exp.py +157 -0
- halib/research/benchquery.py +131 -0
- halib/research/core/__init__.py +0 -0
- halib/research/core/base_config.py +144 -0
- halib/research/core/base_exp.py +157 -0
- halib/research/core/param_gen.py +108 -0
- halib/research/core/wandb_op.py +117 -0
- halib/research/data/__init__.py +0 -0
- halib/research/data/dataclass_util.py +41 -0
- halib/research/data/dataset.py +208 -0
- halib/research/data/torchloader.py +165 -0
- halib/research/dataset.py +208 -0
- halib/research/flop_csv.py +34 -0
- halib/research/flops.py +156 -0
- halib/research/metrics.py +137 -0
- halib/research/mics.py +74 -0
- halib/research/params_gen.py +108 -0
- halib/research/perf/__init__.py +0 -0
- halib/research/perf/flop_calc.py +190 -0
- halib/research/perf/gpu_mon.py +58 -0
- halib/research/perf/perfcalc.py +363 -0
- halib/research/perf/perfmetrics.py +137 -0
- halib/research/perf/perftb.py +778 -0
- halib/research/perf/profiler.py +301 -0
- halib/research/perfcalc.py +361 -0
- halib/research/perftb.py +780 -0
- halib/research/plot.py +758 -0
- halib/research/profiler.py +300 -0
- halib/research/torchloader.py +162 -0
- halib/research/viz/__init__.py +0 -0
- halib/research/viz/plot.py +754 -0
- halib/research/wandb_op.py +116 -0
- halib/rich_color.py +285 -0
- halib/sys/__init__.py +0 -0
- halib/sys/cmd.py +8 -0
- halib/sys/filesys.py +124 -0
- halib/system/__init__.py +0 -0
- halib/system/_list_pc.csv +6 -0
- halib/system/cmd.py +8 -0
- halib/system/filesys.py +164 -0
- halib/system/path.py +106 -0
- halib/tele_noti.py +166 -0
- halib/textfile.py +13 -0
- halib/torchloader.py +162 -0
- halib/utils/__init__.py +0 -0
- halib/utils/dataclass_util.py +40 -0
- halib/utils/dict.py +317 -0
- halib/utils/dict_op.py +9 -0
- halib/utils/gpu_mon.py +58 -0
- halib/utils/list.py +17 -0
- halib/utils/listop.py +13 -0
- halib/utils/slack.py +86 -0
- halib/utils/tele_noti.py +166 -0
- halib/utils/video.py +82 -0
- halib/videofile.py +139 -0
- halib-0.2.30.dist-info/METADATA +237 -0
- halib-0.2.30.dist-info/RECORD +110 -0
- halib-0.2.30.dist-info/WHEEL +5 -0
- halib-0.2.30.dist-info/licenses/LICENSE.txt +17 -0
- halib-0.2.30.dist-info/top_level.txt +1 -0
halib/projectmake.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
from argparse import ArgumentParser
|
|
6
|
+
from io import BytesIO
|
|
7
|
+
import subprocess
|
|
8
|
+
|
|
9
|
+
import certifi
|
|
10
|
+
import pycurl
|
|
11
|
+
|
|
12
|
+
from halib.filetype import jsonfile
|
|
13
|
+
from halib.system import filesys
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_curl(url, user_and_pass, verbose=True):
|
|
17
|
+
c = pycurl.Curl()
|
|
18
|
+
c.setopt(pycurl.VERBOSE, verbose)
|
|
19
|
+
c.setopt(pycurl.CAINFO, certifi.where())
|
|
20
|
+
c.setopt(pycurl.URL, url)
|
|
21
|
+
c.setopt(pycurl.USERPWD, user_and_pass)
|
|
22
|
+
return c
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_user_and_pass(username, appPass):
|
|
26
|
+
return f'{username}:{appPass}'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def create_repo(username, appPass, repo_name, workspace,
|
|
30
|
+
proj_name, template_repo='py-proj-template'):
|
|
31
|
+
buffer = BytesIO()
|
|
32
|
+
url = f'https://api.bitbucket.org/2.0/repositories/{workspace}/{repo_name}'
|
|
33
|
+
data = json.dumps({'scm': 'git', 'project': {'key': f'{proj_name}'}})
|
|
34
|
+
|
|
35
|
+
user_and_pass = get_user_and_pass(username, appPass)
|
|
36
|
+
c = get_curl(url, user_and_pass)
|
|
37
|
+
c.setopt(pycurl.WRITEDATA, buffer)
|
|
38
|
+
c.setopt(pycurl.POST, 1)
|
|
39
|
+
c.setopt(pycurl.POSTFIELDS, data)
|
|
40
|
+
c.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
|
|
41
|
+
c.perform()
|
|
42
|
+
RESPOND_CODE = c.getinfo(pycurl.HTTP_CODE)
|
|
43
|
+
c.close()
|
|
44
|
+
# log info
|
|
45
|
+
body = buffer.getvalue()
|
|
46
|
+
msg = body.decode('iso-8859-1')
|
|
47
|
+
successful = True if str(RESPOND_CODE) == '200' else False
|
|
48
|
+
|
|
49
|
+
if successful and template_repo:
|
|
50
|
+
template_repo_url = f'https://{username}:{appPass}@bitbucket.org/{workspace}/{template_repo}.git'
|
|
51
|
+
git_clone(template_repo_url)
|
|
52
|
+
template_folder = f'./{template_repo}'
|
|
53
|
+
|
|
54
|
+
created_repo_url = f'https://{username}:{appPass}@bitbucket.org/{workspace}/{repo_name}.git'
|
|
55
|
+
git_clone(created_repo_url)
|
|
56
|
+
created_folder = f'./{repo_name}'
|
|
57
|
+
shutil.copytree(template_folder, created_folder,
|
|
58
|
+
dirs_exist_ok=True,
|
|
59
|
+
ignore=shutil.ignore_patterns(".git"))
|
|
60
|
+
os.system('rmdir /S /Q "{}"'.format(template_folder))
|
|
61
|
+
project_folder = 'project_name'
|
|
62
|
+
|
|
63
|
+
filesys.change_current_dir(created_folder)
|
|
64
|
+
filesys.rename_dir_or_file(project_folder, repo_name)
|
|
65
|
+
# push to remote
|
|
66
|
+
subprocess.check_call(["C:/batch/gitp.bat", "init proj from template"])
|
|
67
|
+
|
|
68
|
+
return successful, msg
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def parse_args():
|
|
72
|
+
parser = ArgumentParser(
|
|
73
|
+
description="Upload local folder to Google Drive")
|
|
74
|
+
parser.add_argument('-a', '--authFile', type=str,
|
|
75
|
+
help='authenticate file (json) to Bitbucket', default='bitbucket.json')
|
|
76
|
+
parser.add_argument('-r', '--repoName', type=str,
|
|
77
|
+
help='Repository name', default='hahv-proj')
|
|
78
|
+
parser.add_argument('-t', '--templateRepo', type=str,
|
|
79
|
+
help='template repo to fork',
|
|
80
|
+
default='True')
|
|
81
|
+
return parser.parse_args()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def git_clone(url):
|
|
85
|
+
subprocess.check_call(["git", "clone", url])
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def main():
|
|
89
|
+
args = parse_args()
|
|
90
|
+
authFile = args.authFile
|
|
91
|
+
repo_name = args.repoName
|
|
92
|
+
|
|
93
|
+
authInfo = jsonfile.read(authFile)
|
|
94
|
+
username = authInfo['username']
|
|
95
|
+
appPass = authInfo['appPass']
|
|
96
|
+
workspace_id = authInfo['workspace_id']
|
|
97
|
+
project_id = authInfo['project_id']
|
|
98
|
+
use_template = (args.templateRepo.lower() == "true")
|
|
99
|
+
template_repo = authInfo['template_repo'] if use_template else ''
|
|
100
|
+
|
|
101
|
+
extra_info = f'[Use template project {template_repo}]' if use_template else ''
|
|
102
|
+
print(f'[BitBucket] creating {repo_name} Project in Bitbucket {extra_info}')
|
|
103
|
+
|
|
104
|
+
successful, msg = create_repo(username, appPass,
|
|
105
|
+
repo_name, workspace_id, project_id,
|
|
106
|
+
template_repo=template_repo)
|
|
107
|
+
if successful:
|
|
108
|
+
print(f'[Bitbucket] {repo_name} created successfully.{extra_info}')
|
|
109
|
+
else:
|
|
110
|
+
formatted_msg = jsonfile.beautify(msg)
|
|
111
|
+
print(f'[Bitbucket] {repo_name} created failed. Details:\n{formatted_msg}')
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from rich.pretty import pprint
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclass_wizard import YAMLWizard
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class NamedConfig(ABC):
|
|
8
|
+
"""
|
|
9
|
+
Base class for named configurations.
|
|
10
|
+
All configurations should have a name.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def get_name(self):
|
|
15
|
+
"""
|
|
16
|
+
Get the name of the configuration.
|
|
17
|
+
This method should be implemented in subclasses.
|
|
18
|
+
"""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ExpBaseConfig(ABC, YAMLWizard):
|
|
23
|
+
"""
|
|
24
|
+
Base class for configuration objects.
|
|
25
|
+
What a cfg class must have:
|
|
26
|
+
1 - a dataset cfg
|
|
27
|
+
2 - a metric cfg
|
|
28
|
+
3 - a method cfg
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
# Save to yaml fil
|
|
32
|
+
def save_to_outdir(
|
|
33
|
+
self, filename: str = "__config.yaml", outdir=None, override: bool = False
|
|
34
|
+
) -> None:
|
|
35
|
+
"""
|
|
36
|
+
Save the configuration to the output directory.
|
|
37
|
+
"""
|
|
38
|
+
if outdir is not None:
|
|
39
|
+
output_dir = outdir
|
|
40
|
+
else:
|
|
41
|
+
output_dir = self.get_outdir()
|
|
42
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
43
|
+
assert (output_dir is not None) and (
|
|
44
|
+
os.path.isdir(output_dir)
|
|
45
|
+
), f"Output directory '{output_dir}' does not exist or is not a directory."
|
|
46
|
+
file_path = os.path.join(output_dir, filename)
|
|
47
|
+
if os.path.exists(file_path) and not override:
|
|
48
|
+
pprint(
|
|
49
|
+
f"File '{file_path}' already exists. Use 'override=True' to overwrite."
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
# method of YAMLWizard to_yaml_file
|
|
53
|
+
self.to_yaml_file(file_path)
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
@abstractmethod
|
|
57
|
+
# load from a custom YAML file
|
|
58
|
+
def from_custom_yaml_file(cls, yaml_file: str):
|
|
59
|
+
"""Load a configuration from a custom YAML file."""
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
@abstractmethod
|
|
63
|
+
def get_cfg_name(self):
|
|
64
|
+
"""
|
|
65
|
+
Get the name of the configuration.
|
|
66
|
+
This method should be implemented in subclasses.
|
|
67
|
+
"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def get_outdir(self):
|
|
72
|
+
"""
|
|
73
|
+
Get the output directory for the configuration.
|
|
74
|
+
This method should be implemented in subclasses.
|
|
75
|
+
"""
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
@abstractmethod
|
|
79
|
+
def get_general_cfg(self):
|
|
80
|
+
"""
|
|
81
|
+
Get the general configuration like output directory, log settings, SEED, etc.
|
|
82
|
+
This method should be implemented in subclasses.
|
|
83
|
+
"""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
def get_dataset_cfg(self) -> NamedConfig:
|
|
88
|
+
"""
|
|
89
|
+
Get the dataset configuration.
|
|
90
|
+
This method should be implemented in subclasses.
|
|
91
|
+
"""
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def get_metric_cfg(self) -> NamedConfig:
|
|
96
|
+
"""
|
|
97
|
+
Get the metric configuration.
|
|
98
|
+
This method should be implemented in subclasses.
|
|
99
|
+
"""
|
|
100
|
+
pass
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Tuple, Any, Optional
|
|
3
|
+
from ..research.base_config import ExpBaseConfig
|
|
4
|
+
from ..research.perfcalc import PerfCalc
|
|
5
|
+
from ..research.metrics import MetricsBackend
|
|
6
|
+
|
|
7
|
+
# ! SEE https://github.com/hahv/base_exp for sample usage
|
|
8
|
+
class BaseExperiment(PerfCalc, ABC):
|
|
9
|
+
"""
|
|
10
|
+
Base class for experiments.
|
|
11
|
+
Orchestrates the experiment pipeline using a pluggable metrics backend.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, config: ExpBaseConfig):
|
|
15
|
+
self.config = config
|
|
16
|
+
self.metric_backend = None
|
|
17
|
+
# Flag to track if init_general/prepare_dataset has run
|
|
18
|
+
self._is_env_ready = False
|
|
19
|
+
|
|
20
|
+
# -----------------------
|
|
21
|
+
# PerfCalc Required Methods
|
|
22
|
+
# -----------------------
|
|
23
|
+
def get_dataset_name(self):
|
|
24
|
+
return self.config.get_dataset_cfg().get_name()
|
|
25
|
+
|
|
26
|
+
def get_experiment_name(self):
|
|
27
|
+
return self.config.get_cfg_name()
|
|
28
|
+
|
|
29
|
+
def get_metric_backend(self):
|
|
30
|
+
if not self.metric_backend:
|
|
31
|
+
self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
|
|
32
|
+
return self.metric_backend
|
|
33
|
+
|
|
34
|
+
# -----------------------
|
|
35
|
+
# Abstract Experiment Steps
|
|
36
|
+
# -----------------------
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def init_general(self, general_cfg):
|
|
39
|
+
"""Setup general settings like SEED, logging, env variables."""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def prepare_dataset(self, dataset_cfg):
|
|
44
|
+
"""Load/prepare dataset."""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def prepare_metrics(self, metric_cfg) -> MetricsBackend:
|
|
49
|
+
"""
|
|
50
|
+
Prepare the metrics for the experiment.
|
|
51
|
+
This method should be implemented in subclasses.
|
|
52
|
+
"""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def before_exec_exp_once(self, *args, **kwargs):
|
|
57
|
+
"""Optional: any setup before exec_exp. Note this is called once per run_exp."""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def exec_exp(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
|
|
62
|
+
"""Run experiment process, e.g.: training/evaluation loop.
|
|
63
|
+
Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
|
|
64
|
+
"""
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def exec_eval(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
|
|
69
|
+
"""Run evaluation process.
|
|
70
|
+
Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
|
|
71
|
+
"""
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
# -----------------------
|
|
75
|
+
# Internal Helpers
|
|
76
|
+
# -----------------------
|
|
77
|
+
def _validate_and_unpack(self, results):
|
|
78
|
+
if results is None:
|
|
79
|
+
return None
|
|
80
|
+
if not isinstance(results, (tuple, list)) or len(results) != 2:
|
|
81
|
+
raise ValueError("exec must return (metrics_data, extra_data)")
|
|
82
|
+
return results[0], results[1]
|
|
83
|
+
|
|
84
|
+
def _prepare_environment(self, force_reload: bool = False):
|
|
85
|
+
"""
|
|
86
|
+
Common setup. Skips if already initialized, unless force_reload is True.
|
|
87
|
+
"""
|
|
88
|
+
if self._is_env_ready and not force_reload:
|
|
89
|
+
# Environment is already prepared, skipping setup.
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
# 1. Run Setup
|
|
93
|
+
self.init_general(self.config.get_general_cfg())
|
|
94
|
+
self.prepare_dataset(self.config.get_dataset_cfg())
|
|
95
|
+
|
|
96
|
+
# 2. Update metric backend (refresh if needed)
|
|
97
|
+
self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
|
|
98
|
+
|
|
99
|
+
# 3. Mark as ready
|
|
100
|
+
self._is_env_ready = True
|
|
101
|
+
|
|
102
|
+
# -----------------------
|
|
103
|
+
# Main Experiment Runner
|
|
104
|
+
# -----------------------
|
|
105
|
+
def run_exp(self, should_calc_metrics=True, reload_env=False, *args, **kwargs):
|
|
106
|
+
"""
|
|
107
|
+
Run the whole experiment pipeline.
|
|
108
|
+
:param reload_env: If True, forces dataset/general init to run again.
|
|
109
|
+
:param should_calc_metrics: Whether to calculate and save metrics after execution.
|
|
110
|
+
:kwargs Params:
|
|
111
|
+
+ 'outfile' to save csv file results,
|
|
112
|
+
+ 'outdir' to set output directory for experiment results.
|
|
113
|
+
+ 'return_df' to return a DataFrame of results instead of a dictionary.
|
|
114
|
+
|
|
115
|
+
Full pipeline:
|
|
116
|
+
1. Init
|
|
117
|
+
2. Prepare Environment (General + Dataset + Metrics)
|
|
118
|
+
3. Save Config
|
|
119
|
+
4. Execute
|
|
120
|
+
5. Calculate & Save Metrics
|
|
121
|
+
"""
|
|
122
|
+
self._prepare_environment(force_reload=reload_env)
|
|
123
|
+
|
|
124
|
+
# Any pre-exec setup (loading models, etc)
|
|
125
|
+
self.before_exec_exp_once(*args, **kwargs)
|
|
126
|
+
# Save config before running
|
|
127
|
+
self.config.save_to_outdir()
|
|
128
|
+
|
|
129
|
+
# Execute experiment
|
|
130
|
+
results = self.exec_exp(*args, **kwargs)
|
|
131
|
+
|
|
132
|
+
if should_calc_metrics and results is not None:
|
|
133
|
+
metrics_data, extra_data = self._validate_and_unpack(results)
|
|
134
|
+
# Calculate & Save metrics
|
|
135
|
+
perf_results = self.calc_perfs(
|
|
136
|
+
raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
|
|
137
|
+
)
|
|
138
|
+
return perf_results
|
|
139
|
+
else:
|
|
140
|
+
return results
|
|
141
|
+
|
|
142
|
+
# -----------------------
|
|
143
|
+
# Main Experiment Evaluator
|
|
144
|
+
# -----------------------
|
|
145
|
+
def eval_exp(self, reload_env=False, *args, **kwargs):
|
|
146
|
+
"""
|
|
147
|
+
Run evaluation only.
|
|
148
|
+
:param reload_env: If True, forces dataset/general init to run again.
|
|
149
|
+
"""
|
|
150
|
+
self._prepare_environment(force_reload=reload_env)
|
|
151
|
+
results = self.exec_eval(*args, **kwargs)
|
|
152
|
+
if results is not None:
|
|
153
|
+
metrics_data, extra_data = self._validate_and_unpack(results)
|
|
154
|
+
return self.calc_perfs(
|
|
155
|
+
raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
|
|
156
|
+
)
|
|
157
|
+
return None
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from rich.pretty import pprint
|
|
3
|
+
from argparse import ArgumentParser
|
|
4
|
+
|
|
5
|
+
def cols_to_col_groups(df):
|
|
6
|
+
columns = list(df.columns)
|
|
7
|
+
# pprint(columns)
|
|
8
|
+
|
|
9
|
+
col_groups = []
|
|
10
|
+
current_group = []
|
|
11
|
+
|
|
12
|
+
def have_unnamed(col_group):
|
|
13
|
+
return any("unnamed" in col.lower() for col in col_group)
|
|
14
|
+
|
|
15
|
+
for i, col in enumerate(columns):
|
|
16
|
+
# Add the first column to the current group
|
|
17
|
+
if not current_group:
|
|
18
|
+
current_group.append(col)
|
|
19
|
+
continue
|
|
20
|
+
|
|
21
|
+
prev_col = columns[i - 1]
|
|
22
|
+
# Check if current column is "unnamed" or shares base name with previous
|
|
23
|
+
# Assuming "equal" means same base name (before any suffix like '_1')
|
|
24
|
+
base_prev = (
|
|
25
|
+
prev_col.split("_")[0].lower() if "_" in prev_col else prev_col.lower()
|
|
26
|
+
)
|
|
27
|
+
base_col = col.split("_")[0].lower() if "_" in col else col.lower()
|
|
28
|
+
is_unnamed = "unnamed" in col.lower()
|
|
29
|
+
is_equal = base_col == base_prev
|
|
30
|
+
|
|
31
|
+
if is_unnamed or is_equal:
|
|
32
|
+
# Add to current group
|
|
33
|
+
current_group.append(col)
|
|
34
|
+
else:
|
|
35
|
+
# Start a new group
|
|
36
|
+
col_groups.append(current_group)
|
|
37
|
+
current_group = [col]
|
|
38
|
+
# Append the last group
|
|
39
|
+
if current_group:
|
|
40
|
+
col_groups.append(current_group)
|
|
41
|
+
meta_dict = {"common_cols": [], "db_cols": []}
|
|
42
|
+
for group in col_groups:
|
|
43
|
+
if not have_unnamed(group):
|
|
44
|
+
meta_dict["common_cols"].extend(group)
|
|
45
|
+
else:
|
|
46
|
+
# find the first unnamed column
|
|
47
|
+
named_col = next(
|
|
48
|
+
(col for col in group if "unnamed" not in col.lower()), None
|
|
49
|
+
)
|
|
50
|
+
group_cols = [f"{named_col}_{i}" for i in range(len(group))]
|
|
51
|
+
meta_dict["db_cols"].extend(group_cols)
|
|
52
|
+
return meta_dict
|
|
53
|
+
|
|
54
|
+
# def bech_by_db_name(df, db_list="db1, db2", key_metrics="p, r, f1, acc"):
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def str_2_list(input_str, sep=","):
|
|
58
|
+
out_ls = []
|
|
59
|
+
if len(input_str.strip()) == 0:
|
|
60
|
+
return out_ls
|
|
61
|
+
if sep not in input_str:
|
|
62
|
+
out_ls.append(input_str.strip())
|
|
63
|
+
return out_ls
|
|
64
|
+
else:
|
|
65
|
+
out_ls = [item.strip() for item in input_str.split(sep) if item.strip()]
|
|
66
|
+
return out_ls
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def filter_bech_df_by_db_and_metrics(df, db_list="", key_metrics=""):
|
|
70
|
+
meta_cols_dict = cols_to_col_groups(df)
|
|
71
|
+
op_df = df.copy()
|
|
72
|
+
op_df.columns = (
|
|
73
|
+
meta_cols_dict["common_cols"].copy() + meta_cols_dict["db_cols"].copy()
|
|
74
|
+
)
|
|
75
|
+
filterd_cols = []
|
|
76
|
+
filterd_cols.extend(meta_cols_dict["common_cols"])
|
|
77
|
+
|
|
78
|
+
selected_db_list = str_2_list(db_list)
|
|
79
|
+
db_filted_cols = []
|
|
80
|
+
if len(selected_db_list) > 0:
|
|
81
|
+
for db_name in db_list.split(","):
|
|
82
|
+
db_name = db_name.strip()
|
|
83
|
+
for col_name in meta_cols_dict["db_cols"]:
|
|
84
|
+
if db_name.lower() in col_name.lower():
|
|
85
|
+
db_filted_cols.append(col_name)
|
|
86
|
+
else:
|
|
87
|
+
db_filted_cols = meta_cols_dict["db_cols"]
|
|
88
|
+
|
|
89
|
+
filterd_cols.extend(db_filted_cols)
|
|
90
|
+
df_filtered = op_df[filterd_cols].copy()
|
|
91
|
+
df_filtered
|
|
92
|
+
|
|
93
|
+
selected_metrics_ls = str_2_list(key_metrics)
|
|
94
|
+
if len(selected_metrics_ls) > 0:
|
|
95
|
+
# get the second row as metrics row (header)
|
|
96
|
+
metrics_row = df_filtered.iloc[0].copy()
|
|
97
|
+
# only get the values in columns in (db_filterd_cols)
|
|
98
|
+
metrics_values = metrics_row[db_filted_cols].values
|
|
99
|
+
keep_metrics_cols = []
|
|
100
|
+
# create a zip of db_filted_cols and metrics_values (in that metrics_row)
|
|
101
|
+
metrics_list = list(zip(metrics_values, db_filted_cols))
|
|
102
|
+
selected_metrics_ls = [metric.strip().lower() for metric in selected_metrics_ls]
|
|
103
|
+
for metric, col_name in metrics_list:
|
|
104
|
+
if metric.lower() in selected_metrics_ls:
|
|
105
|
+
keep_metrics_cols.append(col_name)
|
|
106
|
+
|
|
107
|
+
else:
|
|
108
|
+
pprint("No metrics selected, keeping all db columns")
|
|
109
|
+
keep_metrics_cols = db_filted_cols
|
|
110
|
+
|
|
111
|
+
final_filterd_cols = meta_cols_dict["common_cols"].copy() + keep_metrics_cols
|
|
112
|
+
df_final = df_filtered[final_filterd_cols].copy()
|
|
113
|
+
return df_final
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def parse_args():
|
|
117
|
+
parser = ArgumentParser(
|
|
118
|
+
description="desc text")
|
|
119
|
+
parser.add_argument('-csv', '--csv', type=str, help='CSV file path', default=r"E:\Dev\__halib\test\bench.csv")
|
|
120
|
+
return parser.parse_args()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def main():
|
|
124
|
+
args = parse_args()
|
|
125
|
+
csv_file = args.csv
|
|
126
|
+
df = pd.read_csv(csv_file, sep=";", encoding="utf-8")
|
|
127
|
+
filtered_df = filter_bech_df_by_db_and_metrics(df, "bowfire", "acc")
|
|
128
|
+
print(filtered_df)
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from rich.pretty import pprint
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import List, Optional, TypeVar, Generic
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from dataclass_wizard import YAMLWizard
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NamedConfig(ABC):
|
|
12
|
+
"""
|
|
13
|
+
Base class for named configurations.
|
|
14
|
+
All configurations should have a name.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def get_name(self):
|
|
19
|
+
"""
|
|
20
|
+
Get the name of the configuration.
|
|
21
|
+
This method should be implemented in subclasses.
|
|
22
|
+
"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class AutoNamedConfig(YAMLWizard, NamedConfig):
|
|
28
|
+
"""
|
|
29
|
+
Mixin that automatically implements get_name() by returning self.name.
|
|
30
|
+
Classes using this MUST have a 'name' field.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
name: Optional[str] = None
|
|
34
|
+
|
|
35
|
+
def get_name(self):
|
|
36
|
+
return self.name
|
|
37
|
+
|
|
38
|
+
def __post_init__(self):
|
|
39
|
+
# Enforce the "MUST" rule here
|
|
40
|
+
if self.name is None:
|
|
41
|
+
# We allow None during initial load, but it must be set before usage
|
|
42
|
+
# or handled by the loader.
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
T = TypeVar("T", bound=AutoNamedConfig)
|
|
46
|
+
|
|
47
|
+
class BaseSelectorConfig(Generic[T]):
|
|
48
|
+
"""
|
|
49
|
+
Base class to handle the logic of selecting an item from a list by name.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def _resolve_selection(self, items: List[T], selected_name: str, context: str) -> T:
|
|
53
|
+
if selected_name is None:
|
|
54
|
+
raise ValueError(f"No {context} selected in the configuration.")
|
|
55
|
+
|
|
56
|
+
# Create a lookup dict for O(1) access, or just iterate if list is short
|
|
57
|
+
for item in items:
|
|
58
|
+
if item.name == selected_name:
|
|
59
|
+
return item
|
|
60
|
+
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"{context.capitalize()} '{selected_name}' not found in the configuration list."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class ExpBaseConfig(ABC, YAMLWizard):
|
|
67
|
+
"""
|
|
68
|
+
Base class for configuration objects.
|
|
69
|
+
What a cfg class must have:
|
|
70
|
+
1 - a dataset cfg
|
|
71
|
+
2 - a metric cfg
|
|
72
|
+
3 - a method cfg
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
# Save to yaml fil
|
|
76
|
+
def save_to_outdir(
|
|
77
|
+
self, filename: str = "__config.yaml", outdir=None, override: bool = False
|
|
78
|
+
) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Save the configuration to the output directory.
|
|
81
|
+
"""
|
|
82
|
+
if outdir is not None:
|
|
83
|
+
output_dir = outdir
|
|
84
|
+
else:
|
|
85
|
+
output_dir = self.get_outdir()
|
|
86
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
87
|
+
assert (output_dir is not None) and (
|
|
88
|
+
os.path.isdir(output_dir)
|
|
89
|
+
), f"Output directory '{output_dir}' does not exist or is not a directory."
|
|
90
|
+
file_path = os.path.join(output_dir, filename)
|
|
91
|
+
if os.path.exists(file_path) and not override:
|
|
92
|
+
pprint(
|
|
93
|
+
f"File '{file_path}' already exists. Use 'override=True' to overwrite."
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
# method of YAMLWizard to_yaml_file
|
|
97
|
+
self.to_yaml_file(file_path)
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
@abstractmethod
|
|
101
|
+
# load from a custom YAML file
|
|
102
|
+
def from_custom_yaml_file(cls, yaml_file: str):
|
|
103
|
+
"""Load a configuration from a custom YAML file."""
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
@abstractmethod
|
|
107
|
+
def get_cfg_name(self):
|
|
108
|
+
"""
|
|
109
|
+
Get the name of the configuration.
|
|
110
|
+
This method should be implemented in subclasses.
|
|
111
|
+
"""
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
@abstractmethod
|
|
115
|
+
def get_outdir(self):
|
|
116
|
+
"""
|
|
117
|
+
Get the output directory for the configuration.
|
|
118
|
+
This method should be implemented in subclasses.
|
|
119
|
+
"""
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
@abstractmethod
|
|
123
|
+
def get_general_cfg(self):
|
|
124
|
+
"""
|
|
125
|
+
Get the general configuration like output directory, log settings, SEED, etc.
|
|
126
|
+
This method should be implemented in subclasses.
|
|
127
|
+
"""
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
@abstractmethod
|
|
131
|
+
def get_dataset_cfg(self) -> NamedConfig:
|
|
132
|
+
"""
|
|
133
|
+
Get the dataset configuration.
|
|
134
|
+
This method should be implemented in subclasses.
|
|
135
|
+
"""
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
@abstractmethod
|
|
139
|
+
def get_metric_cfg(self) -> NamedConfig:
|
|
140
|
+
"""
|
|
141
|
+
Get the metric configuration.
|
|
142
|
+
This method should be implemented in subclasses.
|
|
143
|
+
"""
|
|
144
|
+
pass
|