halib 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. halib/__init__.py +3 -3
  2. halib/common/__init__.py +0 -0
  3. halib/common/common.py +178 -0
  4. halib/common/rich_color.py +285 -0
  5. halib/filetype/csvfile.py +3 -9
  6. halib/filetype/ipynb.py +3 -5
  7. halib/filetype/jsonfile.py +0 -3
  8. halib/filetype/textfile.py +0 -1
  9. halib/filetype/videofile.py +91 -2
  10. halib/filetype/yamlfile.py +3 -3
  11. halib/online/projectmake.py +7 -6
  12. halib/online/tele_noti.py +165 -0
  13. halib/research/core/__init__.py +0 -0
  14. halib/research/core/base_config.py +144 -0
  15. halib/research/core/base_exp.py +157 -0
  16. halib/research/core/param_gen.py +108 -0
  17. halib/research/core/wandb_op.py +117 -0
  18. halib/research/data/__init__.py +0 -0
  19. halib/research/data/dataclass_util.py +41 -0
  20. halib/research/data/dataset.py +208 -0
  21. halib/research/data/torchloader.py +165 -0
  22. halib/research/perf/__init__.py +0 -0
  23. halib/research/perf/flop_calc.py +190 -0
  24. halib/research/perf/gpu_mon.py +58 -0
  25. halib/research/perf/perfcalc.py +363 -0
  26. halib/research/perf/perfmetrics.py +137 -0
  27. halib/research/perf/perftb.py +778 -0
  28. halib/research/perf/profiler.py +301 -0
  29. halib/research/viz/__init__.py +0 -0
  30. halib/research/viz/plot.py +754 -0
  31. halib/system/filesys.py +60 -20
  32. halib/system/path.py +73 -0
  33. halib/utils/dict.py +9 -0
  34. halib/utils/list.py +12 -0
  35. {halib-0.2.1.dist-info → halib-0.2.2.dist-info}/METADATA +4 -1
  36. {halib-0.2.1.dist-info → halib-0.2.2.dist-info}/RECORD +39 -14
  37. {halib-0.2.1.dist-info → halib-0.2.2.dist-info}/WHEEL +0 -0
  38. {halib-0.2.1.dist-info → halib-0.2.2.dist-info}/licenses/LICENSE.txt +0 -0
  39. {halib-0.2.1.dist-info → halib-0.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,165 @@
1
+ # Watch a log file and send a telegram message when train reaches a certain epoch or end
2
+
3
+ import os
4
+ import yaml
5
+ import asyncio
6
+ import telegram
7
+ import pandas as pd
8
+
9
+ from rich.pretty import pprint
10
+ from rich.console import Console
11
+ import plotly.graph_objects as go
12
+
13
+ from ..system import filesys as fs
14
+ from ..filetype import textfile, csvfile
15
+
16
+ from argparse import ArgumentParser
17
+
18
+ tele_console = Console()
19
+
20
+
21
+ def parse_args():
22
+ parser = ArgumentParser(description="desc text")
23
+ parser.add_argument(
24
+ "-cfg",
25
+ "--cfg",
26
+ type=str,
27
+ help="yaml file for tele",
28
+ default=r"E:\Dev\__halib\halib\online\tele_noti_cfg.yaml",
29
+ )
30
+
31
+ return parser.parse_args()
32
+
33
+ def get_watcher_message_df(target_file, num_last_lines):
34
+ file_ext = fs.get_file_name(target_file, split_file_ext=True)[1]
35
+ supported_ext = [".txt", ".log", ".csv"]
36
+ assert (
37
+ file_ext in supported_ext
38
+ ), f"File extension {file_ext} not supported. Supported extensions are {supported_ext}"
39
+ last_lines_df = None
40
+ if file_ext in [".txt", ".log"]:
41
+ lines = textfile.read_line_by_line(target_file)
42
+ if num_last_lines > len(lines):
43
+ num_last_lines = len(lines)
44
+ last_line_arr = lines[-num_last_lines:]
45
+ # add a line start with word "epoch"
46
+ epoch_info_list = "Epoch: n/a"
47
+ for line in reversed(lines):
48
+ if "epoch" in line.lower():
49
+ epoch_info_list = line
50
+ break
51
+ last_line_arr.insert(0, epoch_info_list) # insert at the beginning
52
+ dfCreator = csvfile.DFCreator()
53
+ dfCreator.create_table("last_lines", ["line"])
54
+ last_line_arr = [[line] for line in last_line_arr]
55
+ dfCreator.insert_rows("last_lines", last_line_arr)
56
+ dfCreator.fill_table_from_row_pool("last_lines")
57
+ last_lines_df = dfCreator["last_lines"].copy()
58
+ else:
59
+ df = pd.read_csv(target_file)
60
+ num_rows = len(df)
61
+ if num_last_lines > num_rows:
62
+ num_last_lines = num_rows
63
+ last_lines_df = df.tail(num_last_lines)
64
+ return last_lines_df
65
+
66
+
67
+ def df2img(df: pd.DataFrame, output_img_dir, decimal_places, out_img_scale):
68
+ df = df.round(decimal_places)
69
+ fig = go.Figure(
70
+ data=[
71
+ go.Table(
72
+ header=dict(values=list(df.columns), align="center"),
73
+ cells=dict(
74
+ values=df.values.transpose(),
75
+ fill_color=[["white", "lightgrey"] * df.shape[0]],
76
+ align="center",
77
+ ),
78
+ )
79
+ ]
80
+ )
81
+ if not os.path.exists(output_img_dir):
82
+ os.makedirs(output_img_dir)
83
+ img_path = os.path.normpath(os.path.join(output_img_dir, "last_lines.png"))
84
+ fig.write_image(img_path, scale=out_img_scale)
85
+ return img_path
86
+
87
+
88
+ def compose_message_and_img_path(
89
+ target_file, project, num_last_lines, decimal_places, out_img_scale, output_img_dir
90
+ ):
91
+ context_msg = f">> Project: {project} \n>> File: {target_file} \n>> Last {num_last_lines} lines:"
92
+ msg_df = get_watcher_message_df(target_file, num_last_lines)
93
+ try:
94
+ img_path = df2img(msg_df, output_img_dir, decimal_places, out_img_scale)
95
+ except Exception as e:
96
+ pprint(f"Error: {e}")
97
+ img_path = None
98
+ return context_msg, img_path
99
+
100
+
101
+ async def send_to_telegram(cfg_dict, interval_in_sec):
102
+ # pprint(cfg_dict)
103
+ token = cfg_dict["telegram"]["token"]
104
+ chat_id = cfg_dict["telegram"]["chat_id"]
105
+
106
+ noti_settings = cfg_dict["noti_settings"]
107
+ project = noti_settings["project"]
108
+ target_file = noti_settings["target_file"]
109
+ num_last_lines = noti_settings["num_last_lines"]
110
+ output_img_dir = noti_settings["output_img_dir"]
111
+ decimal_places = noti_settings["decimal_places"]
112
+ out_img_scale = noti_settings["out_img_scale"]
113
+
114
+ bot = telegram.Bot(token=token)
115
+ async with bot:
116
+ try:
117
+ context_msg, img_path = compose_message_and_img_path(
118
+ target_file,
119
+ project,
120
+ num_last_lines,
121
+ decimal_places,
122
+ out_img_scale,
123
+ output_img_dir,
124
+ )
125
+ time_now = next_time = pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")
126
+ sep_line = "-" * 50
127
+ context_msg = f"{sep_line}\n>> Time: {time_now}\n{context_msg}"
128
+ # calculate the next time to send message
129
+ next_time = pd.Timestamp.now() + pd.Timedelta(seconds=interval_in_sec)
130
+ next_time = next_time.strftime("%Y-%m-%d %H:%M:%S")
131
+ next_time_info = f"Next msg: {next_time}"
132
+ tele_console.rule()
133
+ tele_console.print("[green] Send message to telegram [/green]")
134
+ tele_console.print(
135
+ f"[red] Next message will be sent at <{next_time}> [/red]"
136
+ )
137
+ await bot.send_message(text=context_msg, chat_id=chat_id)
138
+ if img_path:
139
+ await bot.send_photo(chat_id=chat_id, photo=open(img_path, "rb"))
140
+ await bot.send_message(text=next_time_info, chat_id=chat_id)
141
+ except Exception as e:
142
+ pprint(f"Error: {e}")
143
+ pprint("Message not sent to telegram")
144
+
145
+
146
+ async def run_forever(cfg_path):
147
+ cfg_dict = yaml.safe_load(open(cfg_path, "r"))
148
+ noti_settings = cfg_dict["noti_settings"]
149
+ interval_in_min = noti_settings["interval_in_min"]
150
+ interval_in_sec = int(interval_in_min * 60)
151
+ pprint(
152
+ f"Message will be sent every {interval_in_min} minutes or {interval_in_sec} seconds"
153
+ )
154
+ while True:
155
+ await send_to_telegram(cfg_dict, interval_in_sec)
156
+ await asyncio.sleep(interval_in_sec)
157
+
158
+
159
+ async def main():
160
+ args = parse_args()
161
+ await run_forever(args.cfg)
162
+
163
+
164
+ if __name__ == "__main__":
165
+ asyncio.run(main())
File without changes
@@ -0,0 +1,144 @@
1
+ import os
2
+ from rich.pretty import pprint
3
+ from abc import ABC, abstractmethod
4
+ from typing import List, Optional, TypeVar, Generic
5
+
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass
8
+ from dataclass_wizard import YAMLWizard
9
+
10
+
11
+ class NamedConfig(ABC):
12
+ """
13
+ Base class for named configurations.
14
+ All configurations should have a name.
15
+ """
16
+
17
+ @abstractmethod
18
+ def get_name(self):
19
+ """
20
+ Get the name of the configuration.
21
+ This method should be implemented in subclasses.
22
+ """
23
+ pass
24
+
25
+
26
+ @dataclass
27
+ class AutoNamedConfig(YAMLWizard, NamedConfig):
28
+ """
29
+ Mixin that automatically implements get_name() by returning self.name.
30
+ Classes using this MUST have a 'name' field.
31
+ """
32
+
33
+ name: Optional[str] = None
34
+
35
+ def get_name(self):
36
+ return self.name
37
+
38
+ def __post_init__(self):
39
+ # Enforce the "MUST" rule here
40
+ if self.name is None:
41
+ # We allow None during initial load, but it must be set before usage
42
+ # or handled by the loader.
43
+ pass
44
+
45
+ T = TypeVar("T", bound=AutoNamedConfig)
46
+
47
+ class BaseSelectorConfig(Generic[T]):
48
+ """
49
+ Base class to handle the logic of selecting an item from a list by name.
50
+ """
51
+
52
+ def _resolve_selection(self, items: List[T], selected_name: str, context: str) -> T:
53
+ if selected_name is None:
54
+ raise ValueError(f"No {context} selected in the configuration.")
55
+
56
+ # Create a lookup dict for O(1) access, or just iterate if list is short
57
+ for item in items:
58
+ if item.name == selected_name:
59
+ return item
60
+
61
+ raise ValueError(
62
+ f"{context.capitalize()} '{selected_name}' not found in the configuration list."
63
+ )
64
+
65
+
66
+ class ExpBaseConfig(ABC, YAMLWizard):
67
+ """
68
+ Base class for configuration objects.
69
+ What a cfg class must have:
70
+ 1 - a dataset cfg
71
+ 2 - a metric cfg
72
+ 3 - a method cfg
73
+ """
74
+
75
+ # Save to yaml fil
76
+ def save_to_outdir(
77
+ self, filename: str = "__config.yaml", outdir=None, override: bool = False
78
+ ) -> None:
79
+ """
80
+ Save the configuration to the output directory.
81
+ """
82
+ if outdir is not None:
83
+ output_dir = outdir
84
+ else:
85
+ output_dir = self.get_outdir()
86
+ os.makedirs(output_dir, exist_ok=True)
87
+ assert (output_dir is not None) and (
88
+ os.path.isdir(output_dir)
89
+ ), f"Output directory '{output_dir}' does not exist or is not a directory."
90
+ file_path = os.path.join(output_dir, filename)
91
+ if os.path.exists(file_path) and not override:
92
+ pprint(
93
+ f"File '{file_path}' already exists. Use 'override=True' to overwrite."
94
+ )
95
+ else:
96
+ # method of YAMLWizard to_yaml_file
97
+ self.to_yaml_file(file_path)
98
+
99
+ @classmethod
100
+ @abstractmethod
101
+ # load from a custom YAML file
102
+ def from_custom_yaml_file(cls, yaml_file: str):
103
+ """Load a configuration from a custom YAML file."""
104
+ pass
105
+
106
+ @abstractmethod
107
+ def get_cfg_name(self):
108
+ """
109
+ Get the name of the configuration.
110
+ This method should be implemented in subclasses.
111
+ """
112
+ pass
113
+
114
+ @abstractmethod
115
+ def get_outdir(self):
116
+ """
117
+ Get the output directory for the configuration.
118
+ This method should be implemented in subclasses.
119
+ """
120
+ return None
121
+
122
+ @abstractmethod
123
+ def get_general_cfg(self):
124
+ """
125
+ Get the general configuration like output directory, log settings, SEED, etc.
126
+ This method should be implemented in subclasses.
127
+ """
128
+ pass
129
+
130
+ @abstractmethod
131
+ def get_dataset_cfg(self) -> NamedConfig:
132
+ """
133
+ Get the dataset configuration.
134
+ This method should be implemented in subclasses.
135
+ """
136
+ pass
137
+
138
+ @abstractmethod
139
+ def get_metric_cfg(self) -> NamedConfig:
140
+ """
141
+ Get the metric configuration.
142
+ This method should be implemented in subclasses.
143
+ """
144
+ pass
@@ -0,0 +1,157 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Tuple, Any, Optional
3
+ from base_config import ExpBaseConfig
4
+ from ..perf.perfcalc import PerfCalc
5
+ from ..perf.perfmetrics import MetricsBackend
6
+
7
+ # ! SEE https://github.com/hahv/base_exp for sample usage
8
+ class BaseExperiment(PerfCalc, ABC):
9
+ """
10
+ Base class for experiments.
11
+ Orchestrates the experiment pipeline using a pluggable metrics backend.
12
+ """
13
+
14
+ def __init__(self, config: ExpBaseConfig):
15
+ self.config = config
16
+ self.metric_backend = None
17
+ # Flag to track if init_general/prepare_dataset has run
18
+ self._is_env_ready = False
19
+
20
+ # -----------------------
21
+ # PerfCalc Required Methods
22
+ # -----------------------
23
+ def get_dataset_name(self):
24
+ return self.config.get_dataset_cfg().get_name()
25
+
26
+ def get_experiment_name(self):
27
+ return self.config.get_cfg_name()
28
+
29
+ def get_metric_backend(self):
30
+ if not self.metric_backend:
31
+ self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
32
+ return self.metric_backend
33
+
34
+ # -----------------------
35
+ # Abstract Experiment Steps
36
+ # -----------------------
37
+ @abstractmethod
38
+ def init_general(self, general_cfg):
39
+ """Setup general settings like SEED, logging, env variables."""
40
+ pass
41
+
42
+ @abstractmethod
43
+ def prepare_dataset(self, dataset_cfg):
44
+ """Load/prepare dataset."""
45
+ pass
46
+
47
+ @abstractmethod
48
+ def prepare_metrics(self, metric_cfg) -> MetricsBackend:
49
+ """
50
+ Prepare the metrics for the experiment.
51
+ This method should be implemented in subclasses.
52
+ """
53
+ pass
54
+
55
+ @abstractmethod
56
+ def before_exec_exp_once(self, *args, **kwargs):
57
+ """Optional: any setup before exec_exp. Note this is called once per run_exp."""
58
+ pass
59
+
60
+ @abstractmethod
61
+ def exec_exp(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
62
+ """Run experiment process, e.g.: training/evaluation loop.
63
+ Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
64
+ """
65
+ pass
66
+
67
+ @abstractmethod
68
+ def exec_eval(self, *args, **kwargs) -> Optional[Tuple[Any, Any]]:
69
+ """Run evaluation process.
70
+ Return: either `None` or a tuple of (raw_metrics_data, extra_data) for calc_and_save_exp_perfs
71
+ """
72
+ pass
73
+
74
+ # -----------------------
75
+ # Internal Helpers
76
+ # -----------------------
77
+ def _validate_and_unpack(self, results):
78
+ if results is None:
79
+ return None
80
+ if not isinstance(results, (tuple, list)) or len(results) != 2:
81
+ raise ValueError("exec must return (metrics_data, extra_data)")
82
+ return results[0], results[1]
83
+
84
+ def _prepare_environment(self, force_reload: bool = False):
85
+ """
86
+ Common setup. Skips if already initialized, unless force_reload is True.
87
+ """
88
+ if self._is_env_ready and not force_reload:
89
+ # Environment is already prepared, skipping setup.
90
+ return
91
+
92
+ # 1. Run Setup
93
+ self.init_general(self.config.get_general_cfg())
94
+ self.prepare_dataset(self.config.get_dataset_cfg())
95
+
96
+ # 2. Update metric backend (refresh if needed)
97
+ self.metric_backend = self.prepare_metrics(self.config.get_metric_cfg())
98
+
99
+ # 3. Mark as ready
100
+ self._is_env_ready = True
101
+
102
+ # -----------------------
103
+ # Main Experiment Runner
104
+ # -----------------------
105
+ def run_exp(self, should_calc_metrics=True, reload_env=False, *args, **kwargs):
106
+ """
107
+ Run the whole experiment pipeline.
108
+ :param reload_env: If True, forces dataset/general init to run again.
109
+ :param should_calc_metrics: Whether to calculate and save metrics after execution.
110
+ :kwargs Params:
111
+ + 'outfile' to save csv file results,
112
+ + 'outdir' to set output directory for experiment results.
113
+ + 'return_df' to return a DataFrame of results instead of a dictionary.
114
+
115
+ Full pipeline:
116
+ 1. Init
117
+ 2. Prepare Environment (General + Dataset + Metrics)
118
+ 3. Save Config
119
+ 4. Execute
120
+ 5. Calculate & Save Metrics
121
+ """
122
+ self._prepare_environment(force_reload=reload_env)
123
+
124
+ # Any pre-exec setup (loading models, etc)
125
+ self.before_exec_exp_once(*args, **kwargs)
126
+ # Save config before running
127
+ self.config.save_to_outdir()
128
+
129
+ # Execute experiment
130
+ results = self.exec_exp(*args, **kwargs)
131
+
132
+ if should_calc_metrics and results is not None:
133
+ metrics_data, extra_data = self._validate_and_unpack(results)
134
+ # Calculate & Save metrics
135
+ perf_results = self.calc_perfs(
136
+ raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
137
+ )
138
+ return perf_results
139
+ else:
140
+ return results
141
+
142
+ # -----------------------
143
+ # Main Experiment Evaluator
144
+ # -----------------------
145
+ def eval_exp(self, reload_env=False, *args, **kwargs):
146
+ """
147
+ Run evaluation only.
148
+ :param reload_env: If True, forces dataset/general init to run again.
149
+ """
150
+ self._prepare_environment(force_reload=reload_env)
151
+ results = self.exec_eval(*args, **kwargs)
152
+ if results is not None:
153
+ metrics_data, extra_data = self._validate_and_unpack(results)
154
+ return self.calc_perfs(
155
+ raw_metrics_data=metrics_data, extra_data=extra_data, *args, **kwargs
156
+ )
157
+ return None
@@ -0,0 +1,108 @@
1
+ import os
2
+ import yaml
3
+ import numpy as np
4
+ from typing import Dict, Any, List
5
+
6
+ from ...common.common import *
7
+ from ...filetype import yamlfile
8
+
9
+ class ParamGen:
10
+ @staticmethod
11
+ def build_from_file(params_file):
12
+ builder = ParamGen(params_file)
13
+ return builder.params
14
+
15
+ def __init__(self, params_file=None):
16
+ self.params = {}
17
+ assert os.path.isfile(params_file), f"params_file not found: {params_file}"
18
+ self.params = self._build(params_file)
19
+
20
+ def _expand_param(self, param_name: str, config: Dict[str, Any]) -> List[Any]:
21
+ """
22
+ Validates and expands the values for a single parameter configuration.
23
+
24
+ Args:
25
+ param_name: The name of the parameter being processed.
26
+ config: The configuration dictionary for this parameter.
27
+
28
+ Returns:
29
+ A list of the expanded values for the parameter.
30
+
31
+ Raises:
32
+ TypeError: If the configuration or its values have an incorrect type.
33
+ ValueError: If the configuration is missing keys or has an invalid structure.
34
+ """
35
+ # 1. Validate the configuration structure
36
+ if not isinstance(config, dict):
37
+ raise TypeError(f"Config for '{param_name}' must be a dictionary.")
38
+
39
+ if "type" not in config or "values" not in config:
40
+ raise ValueError(
41
+ f"Config for '{param_name}' must contain 'type' and 'values' keys."
42
+ )
43
+
44
+ gen_type = config["type"]
45
+ values = config["values"]
46
+
47
+ # 2. Handle the generation based on type
48
+ if gen_type == "list":
49
+ # Ensure values are returned as a list, even if a single item was provided
50
+ return values if isinstance(values, list) else [values]
51
+
52
+ elif gen_type == "range":
53
+ if not isinstance(values, list) or len(values) != 3:
54
+ raise ValueError(
55
+ f"For 'range' type on '{param_name}', 'values' must be a list of 3 numbers "
56
+ f"[start, end, step], but got: {values}"
57
+ )
58
+
59
+ start, end, step = values
60
+ if all(isinstance(v, int) for v in values):
61
+ return list(range(start, end, step))
62
+ elif all(isinstance(v, (int, float)) for v in values):
63
+ # Use numpy for floating point ranges
64
+ temp_list = list(np.arange(start, end, step))
65
+ # convert to float (not np.float)
66
+ return [float(v) for v in temp_list]
67
+ else:
68
+ raise TypeError(
69
+ f"All 'values' for 'range' on '{param_name}' must be numbers."
70
+ )
71
+
72
+ else:
73
+ raise ValueError(
74
+ f"Invalid 'type' for '{param_name}': '{gen_type}'. Must be 'list' or 'range'."
75
+ )
76
+
77
+ def _build(self, params_file):
78
+ """
79
+ Builds a full optimization configuration by expanding parameter values based on their type.
80
+
81
+ This function processes a dictionary where each key is a parameter name and each value
82
+ is a config dict specifying the 'type' ('list' or 'range') and 'values' for generation.
83
+
84
+ Args:
85
+ opt_cfg: The input configuration dictionary.
86
+ Example:
87
+ {
88
+ "learning_rate": {"type": "range", "values": [0.01, 0.1, 0.01]},
89
+ "optimizer": {"type": "list", "values": ["adam", "sgd"]},
90
+ "epochs": {"type": "list", "values": 100}
91
+ }
92
+
93
+ Returns:
94
+ A dictionary with parameter names mapped to their fully expanded list of values.
95
+ """
96
+ cfg_raw_dict = yamlfile.load_yaml(params_file, to_dict=True)
97
+ if not isinstance(cfg_raw_dict, dict):
98
+ raise TypeError("The entire opt_cfg must be a dictionary.")
99
+
100
+ # Use a dictionary comprehension for a clean and efficient build
101
+ return {
102
+ param_name: self._expand_param(param_name, config)
103
+ for param_name, config in cfg_raw_dict.items()
104
+ }
105
+
106
+ def save(self, outfile):
107
+ with open(outfile, "w") as f:
108
+ yaml.dump(self.params, f)
@@ -0,0 +1,117 @@
1
+ import os
2
+ import glob
3
+ import wandb
4
+ import argparse
5
+ import subprocess
6
+
7
+ from tqdm import tqdm
8
+ from rich.console import Console
9
+
10
+ console = Console()
11
+
12
+ def sync_runs(outdir):
13
+ outdir = os.path.abspath(outdir)
14
+ assert os.path.exists(outdir), f"Output directory {outdir} does not exist."
15
+ sub_dirs = [name for name in os.listdir(outdir) if os.path.isdir(os.path.join(outdir, name))]
16
+ assert len(sub_dirs) > 0, f"No subdirectories found in {outdir}."
17
+ console.rule("Parent Directory")
18
+ console.print(f"[yellow]{outdir}[/yellow]")
19
+
20
+ exp_dirs = [os.path.join(outdir, sub_dir) for sub_dir in sub_dirs]
21
+ wandb_dirs = []
22
+ for exp_dir in exp_dirs:
23
+ wandb_dirs.extend(glob.glob(f"{exp_dir}/wandb/*run-*"))
24
+ if len(wandb_dirs) == 0:
25
+ console.print(f"No wandb runs found in {outdir}.")
26
+ return
27
+ else:
28
+ console.print(f"Found [bold]{len(wandb_dirs)}[/bold] wandb runs in {outdir}.")
29
+ for i, wandb_dir in enumerate(wandb_dirs):
30
+ console.rule(f"Syncing wandb run {i + 1}/{len(wandb_dirs)}")
31
+ console.print(f"Syncing: {wandb_dir}")
32
+ process = subprocess.Popen(
33
+ ["wandb", "sync", wandb_dir],
34
+ stdout=subprocess.PIPE,
35
+ stderr=subprocess.STDOUT,
36
+ text=True,
37
+ )
38
+
39
+ for line in process.stdout:
40
+ console.print(line.strip())
41
+ if " ERROR Error while calling W&B API" in line:
42
+ break
43
+ process.stdout.close()
44
+ process.wait()
45
+ if process.returncode != 0:
46
+ console.print(f"[red]Error syncing {wandb_dir}. Return code: {process.returncode}[/red]")
47
+ else:
48
+ console.print(f"Successfully synced {wandb_dir}.")
49
+
50
+ def delete_runs(project, pattern=None):
51
+ console.rule("Delete W&B Runs")
52
+ confirm_msg = f"Are you sure you want to delete all runs in"
53
+ confirm_msg += f" \n\tproject: [red]{project}[/red]"
54
+ if pattern:
55
+ confirm_msg += f"\n\tpattern: [blue]{pattern}[/blue]"
56
+
57
+ console.print(confirm_msg)
58
+ confirmation = input(f"This action cannot be undone. [y/N]: ").strip().lower()
59
+ if confirmation != "y":
60
+ print("Cancelled.")
61
+ return
62
+
63
+ print("Confirmed. Proceeding...")
64
+ api = wandb.Api()
65
+ runs = api.runs(project)
66
+
67
+ deleted = 0
68
+ console.rule("Deleting W&B Runs")
69
+ if len(runs) == 0:
70
+ print("No runs found in the project.")
71
+ return
72
+ for run in tqdm(runs):
73
+ if pattern is None or pattern in run.name:
74
+ run.delete()
75
+ console.print(f"Deleted run: [red]{run.name}[/red]")
76
+ deleted += 1
77
+
78
+ console.print(f"Total runs deleted: {deleted}")
79
+
80
+
81
+ def valid_argument(args):
82
+ if args.op == "sync":
83
+ assert os.path.exists(args.outdir), f"Output directory {args.outdir} does not exist."
84
+ elif args.op == "delete":
85
+ assert isinstance(args.project, str) and len(args.project.strip()) > 0, "Project name must be a non-empty string."
86
+ else:
87
+ raise ValueError(f"Unknown operation: {args.op}")
88
+
89
+ def parse_args():
90
+ parser = argparse.ArgumentParser(description="Operations on W&B runs")
91
+ parser.add_argument("-op", "--op", type=str, help="Operation to perform", default="sync", choices=["delete", "sync"])
92
+ parser.add_argument("-prj", "--project", type=str, default="fire-paper2-2025", help="W&B project name")
93
+ parser.add_argument("-outdir", "--outdir", type=str, help="arg1 description", default="./zout/train")
94
+ parser.add_argument("-pt", "--pattern",
95
+ type=str,
96
+ default=None,
97
+ help="Run name pattern to match for deletion",
98
+ )
99
+
100
+ return parser.parse_args()
101
+
102
+
103
+ def main():
104
+ args = parse_args()
105
+ # Validate arguments, stop if invalid
106
+ valid_argument(args)
107
+
108
+ op = args.op
109
+ if op == "sync":
110
+ sync_runs(args.outdir)
111
+ elif op == "delete":
112
+ delete_runs(args.project, args.pattern)
113
+ else:
114
+ raise ValueError(f"Unknown operation: {op}")
115
+
116
+ if __name__ == "__main__":
117
+ main()
File without changes