halib 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halib/__init__.py +94 -0
- halib/common/__init__.py +0 -0
- halib/common/common.py +326 -0
- halib/common/rich_color.py +285 -0
- halib/common.py +151 -0
- halib/csvfile.py +48 -0
- halib/cuda.py +39 -0
- halib/dataset.py +209 -0
- halib/exp/__init__.py +0 -0
- halib/exp/core/__init__.py +0 -0
- halib/exp/core/base_config.py +167 -0
- halib/exp/core/base_exp.py +147 -0
- halib/exp/core/param_gen.py +170 -0
- halib/exp/core/wandb_op.py +117 -0
- halib/exp/data/__init__.py +0 -0
- halib/exp/data/dataclass_util.py +41 -0
- halib/exp/data/dataset.py +208 -0
- halib/exp/data/torchloader.py +165 -0
- halib/exp/perf/__init__.py +0 -0
- halib/exp/perf/flop_calc.py +190 -0
- halib/exp/perf/gpu_mon.py +58 -0
- halib/exp/perf/perfcalc.py +470 -0
- halib/exp/perf/perfmetrics.py +137 -0
- halib/exp/perf/perftb.py +778 -0
- halib/exp/perf/profiler.py +507 -0
- halib/exp/viz/__init__.py +0 -0
- halib/exp/viz/plot.py +754 -0
- halib/filesys.py +117 -0
- halib/filetype/__init__.py +0 -0
- halib/filetype/csvfile.py +192 -0
- halib/filetype/ipynb.py +61 -0
- halib/filetype/jsonfile.py +19 -0
- halib/filetype/textfile.py +12 -0
- halib/filetype/videofile.py +266 -0
- halib/filetype/yamlfile.py +87 -0
- halib/gdrive.py +179 -0
- halib/gdrive_mkdir.py +41 -0
- halib/gdrive_test.py +37 -0
- halib/jsonfile.py +22 -0
- halib/listop.py +13 -0
- halib/online/__init__.py +0 -0
- halib/online/gdrive.py +229 -0
- halib/online/gdrive_mkdir.py +53 -0
- halib/online/gdrive_test.py +50 -0
- halib/online/projectmake.py +131 -0
- halib/online/tele_noti.py +165 -0
- halib/plot.py +301 -0
- halib/projectmake.py +115 -0
- halib/research/__init__.py +0 -0
- halib/research/base_config.py +100 -0
- halib/research/base_exp.py +157 -0
- halib/research/benchquery.py +131 -0
- halib/research/core/__init__.py +0 -0
- halib/research/core/base_config.py +144 -0
- halib/research/core/base_exp.py +157 -0
- halib/research/core/param_gen.py +108 -0
- halib/research/core/wandb_op.py +117 -0
- halib/research/data/__init__.py +0 -0
- halib/research/data/dataclass_util.py +41 -0
- halib/research/data/dataset.py +208 -0
- halib/research/data/torchloader.py +165 -0
- halib/research/dataset.py +208 -0
- halib/research/flop_csv.py +34 -0
- halib/research/flops.py +156 -0
- halib/research/metrics.py +137 -0
- halib/research/mics.py +74 -0
- halib/research/params_gen.py +108 -0
- halib/research/perf/__init__.py +0 -0
- halib/research/perf/flop_calc.py +190 -0
- halib/research/perf/gpu_mon.py +58 -0
- halib/research/perf/perfcalc.py +363 -0
- halib/research/perf/perfmetrics.py +137 -0
- halib/research/perf/perftb.py +778 -0
- halib/research/perf/profiler.py +301 -0
- halib/research/perfcalc.py +361 -0
- halib/research/perftb.py +780 -0
- halib/research/plot.py +758 -0
- halib/research/profiler.py +300 -0
- halib/research/torchloader.py +162 -0
- halib/research/viz/__init__.py +0 -0
- halib/research/viz/plot.py +754 -0
- halib/research/wandb_op.py +116 -0
- halib/rich_color.py +285 -0
- halib/sys/__init__.py +0 -0
- halib/sys/cmd.py +8 -0
- halib/sys/filesys.py +124 -0
- halib/system/__init__.py +0 -0
- halib/system/_list_pc.csv +6 -0
- halib/system/cmd.py +8 -0
- halib/system/filesys.py +164 -0
- halib/system/path.py +106 -0
- halib/tele_noti.py +166 -0
- halib/textfile.py +13 -0
- halib/torchloader.py +162 -0
- halib/utils/__init__.py +0 -0
- halib/utils/dataclass_util.py +40 -0
- halib/utils/dict.py +317 -0
- halib/utils/dict_op.py +9 -0
- halib/utils/gpu_mon.py +58 -0
- halib/utils/list.py +17 -0
- halib/utils/listop.py +13 -0
- halib/utils/slack.py +86 -0
- halib/utils/tele_noti.py +166 -0
- halib/utils/video.py +82 -0
- halib/videofile.py +139 -0
- halib-0.2.30.dist-info/METADATA +237 -0
- halib-0.2.30.dist-info/RECORD +110 -0
- halib-0.2.30.dist-info/WHEEL +5 -0
- halib-0.2.30.dist-info/licenses/LICENSE.txt +17 -0
- halib-0.2.30.dist-info/top_level.txt +1 -0
halib/utils/dict.py
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
from future.utils.surrogateescape import fn
|
|
2
|
+
import copy
|
|
3
|
+
import json
|
|
4
|
+
import hashlib
|
|
5
|
+
from rich.pretty import pprint
|
|
6
|
+
from typing import Dict, Any, Callable, Optional, List, Tuple
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DictUtils:
|
|
10
|
+
"""
|
|
11
|
+
General-purpose dictionary manipulation utilities.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def flatten(
|
|
16
|
+
d: Dict[str, Any],
|
|
17
|
+
parent_key: str = "",
|
|
18
|
+
sep: str = ".",
|
|
19
|
+
is_leaf_predicate: Optional[Callable[[Any], bool]] = None,
|
|
20
|
+
) -> Dict[str, Any]:
|
|
21
|
+
"""
|
|
22
|
+
Recursively flattens a nested dictionary.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
d: The dictionary to flatten.
|
|
26
|
+
parent_key: Prefix for keys (used during recursion).
|
|
27
|
+
sep: Separator for dot-notation keys.
|
|
28
|
+
is_leaf_predicate: Optional function that returns True if a value should
|
|
29
|
+
be treated as a leaf (value) rather than a branch to recurse.
|
|
30
|
+
Useful if you have dicts you don't want flattened.
|
|
31
|
+
"""
|
|
32
|
+
items = []
|
|
33
|
+
for k, v in d.items():
|
|
34
|
+
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
|
35
|
+
|
|
36
|
+
# Check if we should treat this as a leaf (custom logic)
|
|
37
|
+
if is_leaf_predicate and is_leaf_predicate(v):
|
|
38
|
+
items.append((new_key, v))
|
|
39
|
+
# Standard recursion
|
|
40
|
+
elif isinstance(v, dict):
|
|
41
|
+
items.extend(
|
|
42
|
+
DictUtils.flatten(
|
|
43
|
+
v, new_key, sep=sep, is_leaf_predicate=is_leaf_predicate
|
|
44
|
+
).items()
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
items.append((new_key, v))
|
|
48
|
+
return dict(items)
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def unflatten(flat_dict: Dict[str, Any], sep: str = ".") -> Dict[str, Any]:
|
|
52
|
+
"""
|
|
53
|
+
Converts flat dot-notation keys back to nested dictionaries.
|
|
54
|
+
e.g., {'a.b': 1} -> {'a': {'b': 1}}
|
|
55
|
+
"""
|
|
56
|
+
nested = {}
|
|
57
|
+
for key, value in flat_dict.items():
|
|
58
|
+
DictUtils.deep_set(nested, key, value, sep=sep)
|
|
59
|
+
return nested
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def deep_update(base: Dict[str, Any], update: Dict[str, Any]) -> Dict[str, Any]:
|
|
63
|
+
"""
|
|
64
|
+
Recursively merges 'update' dict into 'base' dict.
|
|
65
|
+
|
|
66
|
+
Unlike the standard `dict.update()`, which replaces nested dictionaries entirely,
|
|
67
|
+
this method enters nested dictionaries and updates them key-by-key. This preserves
|
|
68
|
+
existing keys in 'base' that are not present in 'update'.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
base: The original dictionary to modify.
|
|
72
|
+
update: The dictionary containing new values.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
The modified 'base' dictionary.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
>>> base = {'model': {'name': 'v1', 'dropout': 0.5}}
|
|
79
|
+
>>> new_vals = {'model': {'name': 'v2'}}
|
|
80
|
+
>>> # Standard update would delete 'dropout'. deep_update keeps it:
|
|
81
|
+
>>> DictUtils.deep_update(base, new_vals)
|
|
82
|
+
{'model': {'name': 'v2', 'dropout': 0.5}}
|
|
83
|
+
"""
|
|
84
|
+
for k, v in update.items():
|
|
85
|
+
if isinstance(v, dict) and k in base and isinstance(base[k], dict):
|
|
86
|
+
DictUtils.deep_update(base[k], v)
|
|
87
|
+
else:
|
|
88
|
+
base[k] = v
|
|
89
|
+
return base
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def deep_set(d: Dict[str, Any], dot_key: str, value: Any, sep: str = ".") -> None:
|
|
93
|
+
"""
|
|
94
|
+
Sets a value in a nested dictionary using a dot-notation key path.
|
|
95
|
+
Automatically creates any missing intermediate dictionaries.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
d: The dictionary to modify.
|
|
99
|
+
dot_key: The path to the value (e.g., "model.backbone.layers").
|
|
100
|
+
value: The value to set.
|
|
101
|
+
sep: The separator used in the key (default is ".").
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
>>> cfg = {}
|
|
105
|
+
>>> DictUtils.deep_set(cfg, "a.b.c", 10)
|
|
106
|
+
>>> print(cfg)
|
|
107
|
+
{'a': {'b': {'c': 10}}}
|
|
108
|
+
"""
|
|
109
|
+
parts = dot_key.split(sep)
|
|
110
|
+
target = d
|
|
111
|
+
for part in parts[:-1]:
|
|
112
|
+
if part not in target:
|
|
113
|
+
target[part] = {}
|
|
114
|
+
target = target[part]
|
|
115
|
+
if not isinstance(target, dict):
|
|
116
|
+
# Handle conflict if a path was previously a value (e.g. overwriting a leaf)
|
|
117
|
+
target = {}
|
|
118
|
+
target[parts[-1]] = value
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def get_unique_hash(input_dict, length=12):
|
|
122
|
+
"""
|
|
123
|
+
Returns a unique hash string for a dictionary.
|
|
124
|
+
|
|
125
|
+
:param input_dict: The dictionary params
|
|
126
|
+
:param length: The desired length of the hash string (default 12)
|
|
127
|
+
"""
|
|
128
|
+
assert length >= 12, "Hash length must be at least 12 to ensure uniqueness."
|
|
129
|
+
# 1. Sort keys to ensure {a:1, b:2} == {b:2, a:1}
|
|
130
|
+
config_str = json.dumps(input_dict, sort_keys=True)
|
|
131
|
+
|
|
132
|
+
# 2. Generate full SHA-256 hash (64 chars long)
|
|
133
|
+
full_hash = hashlib.sha256(config_str.encode("utf-8")).hexdigest()
|
|
134
|
+
|
|
135
|
+
# 3. Truncate to desired length
|
|
136
|
+
return full_hash[:length]
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def deep_exclude(
|
|
140
|
+
d: Dict[str, Any],
|
|
141
|
+
keys_to_exclude: List[str],
|
|
142
|
+
in_place: bool = False,
|
|
143
|
+
sep: str = ".",
|
|
144
|
+
) -> Dict[str, Any]:
|
|
145
|
+
"""
|
|
146
|
+
Removes keys from a nested dictionary based on a list of dot-notation paths.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
d: The dictionary to filter.
|
|
150
|
+
keys_to_exclude: A list of flattened keys to exclude (e.g., ['model.layers.dropout']).
|
|
151
|
+
in_place: If True, modifies the dictionary directly.
|
|
152
|
+
If False, creates and modifies a deep copy, leaving the original untouched.
|
|
153
|
+
sep: Separator used in the dot-notation keys (default: ".").
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
The modified dictionary (either the original object or the new copy).
|
|
157
|
+
|
|
158
|
+
Example:
|
|
159
|
+
>>> data = {'a': {'b': 1, 'c': 2}}
|
|
160
|
+
>>> DictUtils.deep_exclude(data, ['a.b'], in_place=False)
|
|
161
|
+
{'a': {'c': 2}}
|
|
162
|
+
"""
|
|
163
|
+
# 1. Handle the copy logic based on the in_place flag
|
|
164
|
+
if in_place:
|
|
165
|
+
target_dict = d
|
|
166
|
+
else:
|
|
167
|
+
target_dict = copy.deepcopy(d)
|
|
168
|
+
|
|
169
|
+
# 2. Iterate over each dot-notation key we want to delete
|
|
170
|
+
for flat_key in keys_to_exclude:
|
|
171
|
+
parts = flat_key.split(sep)
|
|
172
|
+
|
|
173
|
+
# 3. Traverse to the parent container of the key we want to delete
|
|
174
|
+
current_level = target_dict
|
|
175
|
+
parent_found = True
|
|
176
|
+
|
|
177
|
+
# Loop through path parts up to the second-to-last item (the parent)
|
|
178
|
+
for part in parts[:-1]:
|
|
179
|
+
if isinstance(current_level, dict) and part in current_level:
|
|
180
|
+
current_level = current_level[part]
|
|
181
|
+
else:
|
|
182
|
+
# The path doesn't exist in this dict, safely skip deletion
|
|
183
|
+
parent_found = False
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
# 4. Delete the final key (leaf) if the parent was found
|
|
187
|
+
if parent_found and isinstance(current_level, dict):
|
|
188
|
+
leaf_key = parts[-1]
|
|
189
|
+
if leaf_key in current_level:
|
|
190
|
+
del current_level[leaf_key]
|
|
191
|
+
|
|
192
|
+
return target_dict
|
|
193
|
+
|
|
194
|
+
@staticmethod
|
|
195
|
+
def deep_include(
|
|
196
|
+
d: Dict[str, Any],
|
|
197
|
+
keys_to_include: List[str],
|
|
198
|
+
in_place: bool = False,
|
|
199
|
+
sep: str = ".",
|
|
200
|
+
) -> Dict[str, Any]:
|
|
201
|
+
"""
|
|
202
|
+
Filters a nested dictionary to keep ONLY the specified dot-notation paths.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
d: The dictionary to filter.
|
|
206
|
+
keys_to_include: A list of flattened keys to include (e.g., ['a.b.c']).
|
|
207
|
+
in_place: If True, modifies the original dictionary.
|
|
208
|
+
sep: Separator used in the dot-notation keys.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
The filtered dictionary.
|
|
212
|
+
"""
|
|
213
|
+
# 1. Create a fresh container for the keys we want to preserve
|
|
214
|
+
# Unlike deep_remove, it's often cleaner to build a new dict
|
|
215
|
+
# than to delete everything else.
|
|
216
|
+
new_dict = {}
|
|
217
|
+
|
|
218
|
+
for flat_key in keys_to_include:
|
|
219
|
+
parts = flat_key.split(sep)
|
|
220
|
+
|
|
221
|
+
# Pointers to traverse both dictionaries
|
|
222
|
+
current_source = d
|
|
223
|
+
current_target = new_dict
|
|
224
|
+
|
|
225
|
+
for i, part in enumerate(parts):
|
|
226
|
+
if isinstance(current_source, dict) and part in current_source:
|
|
227
|
+
# Move down the source
|
|
228
|
+
current_source = current_source[part]
|
|
229
|
+
|
|
230
|
+
# If we are at the leaf of the 'keep' path, copy the value
|
|
231
|
+
if i == len(parts) - 1:
|
|
232
|
+
current_target[part] = copy.deepcopy(current_source)
|
|
233
|
+
else:
|
|
234
|
+
# If the path doesn't exist in our new_dict yet, create it
|
|
235
|
+
if part not in current_target or not isinstance(
|
|
236
|
+
current_target[part], dict
|
|
237
|
+
):
|
|
238
|
+
current_target[part] = {}
|
|
239
|
+
current_target = current_target[part]
|
|
240
|
+
else:
|
|
241
|
+
# The path to keep doesn't exist in the source, skip it
|
|
242
|
+
break
|
|
243
|
+
|
|
244
|
+
# 2. Handle the in_place logic
|
|
245
|
+
if in_place:
|
|
246
|
+
d.clear()
|
|
247
|
+
d.update(new_dict)
|
|
248
|
+
return d
|
|
249
|
+
|
|
250
|
+
return new_dict
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def apply_exclusion_mask(
|
|
254
|
+
d: Dict[str, Any],
|
|
255
|
+
config_mask: Dict[str, Any],
|
|
256
|
+
in_place: bool = False,
|
|
257
|
+
sep: str = ".",
|
|
258
|
+
) -> Dict[str, Any]:
|
|
259
|
+
"""
|
|
260
|
+
Uses a dictionary 'mask' to define what to throw away.
|
|
261
|
+
"""
|
|
262
|
+
# Assuming your DictUtils.flatten returns a dict of {path: value}
|
|
263
|
+
flatten_dict = DictUtils.flatten(config_mask, sep=sep)
|
|
264
|
+
paths_to_exclude = list(flatten_dict.keys())
|
|
265
|
+
return DictUtils.deep_exclude(d, paths_to_exclude, in_place=in_place, sep=sep)
|
|
266
|
+
|
|
267
|
+
@staticmethod
|
|
268
|
+
def apply_inclusion_mask(
|
|
269
|
+
d: Dict[str, Any],
|
|
270
|
+
config_mask: Dict[str, Any],
|
|
271
|
+
in_place: bool = False,
|
|
272
|
+
sep: str = ".",
|
|
273
|
+
) -> Dict[str, Any]:
|
|
274
|
+
"""
|
|
275
|
+
Renamed from 'deep_keep_by_config'.
|
|
276
|
+
Uses a dictionary 'mask' to define what to allow.
|
|
277
|
+
"""
|
|
278
|
+
flatten_dict = DictUtils.flatten(config_mask, sep=sep)
|
|
279
|
+
paths_to_include = list(flatten_dict.keys())
|
|
280
|
+
return DictUtils.deep_include(d, paths_to_include, in_place=in_place, sep=sep)
|
|
281
|
+
|
|
282
|
+
@staticmethod
|
|
283
|
+
def prune(d: Any, prune_values: Tuple[Any, ...] = (None, {}, [], "")) -> Any:
|
|
284
|
+
"""
|
|
285
|
+
Recursively removes keys where values match any item in 'prune_values'.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
d: The dictionary or list to clean.
|
|
289
|
+
prune_values: A tuple of values to be removed.
|
|
290
|
+
Default is (None, {}, [], "") which removes all empty types.
|
|
291
|
+
Pass specific values (e.g., ({}, "")) to keep None or [].
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
The cleaned structure.
|
|
295
|
+
"""
|
|
296
|
+
if isinstance(d, dict):
|
|
297
|
+
new_dict = {}
|
|
298
|
+
for k, v in d.items():
|
|
299
|
+
# 1. Recursively clean children first
|
|
300
|
+
cleaned_v = DictUtils.prune(v, prune_values)
|
|
301
|
+
|
|
302
|
+
# 2. Check if the CLEANED value is in the delete list
|
|
303
|
+
# We use strict check to ensure we don't delete 0 or False unless requested
|
|
304
|
+
if cleaned_v not in prune_values:
|
|
305
|
+
new_dict[k] = cleaned_v
|
|
306
|
+
return new_dict
|
|
307
|
+
|
|
308
|
+
elif isinstance(d, list):
|
|
309
|
+
new_list = []
|
|
310
|
+
for v in d:
|
|
311
|
+
cleaned_v = DictUtils.prune(v, prune_values)
|
|
312
|
+
if cleaned_v not in prune_values:
|
|
313
|
+
new_list.append(cleaned_v)
|
|
314
|
+
return new_list
|
|
315
|
+
|
|
316
|
+
else:
|
|
317
|
+
return d
|
halib/utils/dict_op.py
ADDED
halib/utils/gpu_mon.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# install `pynvml_utils` package first
|
|
2
|
+
# see this repo: https://github.com/gpuopenanalytics/pynvml
|
|
3
|
+
from pynvml_utils import nvidia_smi
|
|
4
|
+
import time
|
|
5
|
+
import threading
|
|
6
|
+
from rich.pretty import pprint
|
|
7
|
+
|
|
8
|
+
class GPUMonitor:
|
|
9
|
+
def __init__(self, gpu_index=0, interval=0.01):
|
|
10
|
+
self.nvsmi = nvidia_smi.getInstance()
|
|
11
|
+
self.gpu_index = gpu_index
|
|
12
|
+
self.interval = interval
|
|
13
|
+
self.gpu_stats = []
|
|
14
|
+
self._running = False
|
|
15
|
+
self._thread = None
|
|
16
|
+
|
|
17
|
+
def _monitor(self):
|
|
18
|
+
while self._running:
|
|
19
|
+
stats = self.nvsmi.DeviceQuery("power.draw, memory.used")["gpu"][
|
|
20
|
+
self.gpu_index
|
|
21
|
+
]
|
|
22
|
+
# pprint(stats)
|
|
23
|
+
self.gpu_stats.append(
|
|
24
|
+
{
|
|
25
|
+
"power": stats["power_readings"]["power_draw"],
|
|
26
|
+
"power_unit": stats["power_readings"]["unit"],
|
|
27
|
+
"memory": stats["fb_memory_usage"]["used"],
|
|
28
|
+
"memory_unit": stats["fb_memory_usage"]["unit"],
|
|
29
|
+
}
|
|
30
|
+
)
|
|
31
|
+
time.sleep(self.interval)
|
|
32
|
+
|
|
33
|
+
def start(self):
|
|
34
|
+
if not self._running:
|
|
35
|
+
self._running = True
|
|
36
|
+
# clear previous stats
|
|
37
|
+
self.gpu_stats.clear()
|
|
38
|
+
self._thread = threading.Thread(target=self._monitor)
|
|
39
|
+
self._thread.start()
|
|
40
|
+
|
|
41
|
+
def stop(self):
|
|
42
|
+
if self._running:
|
|
43
|
+
self._running = False
|
|
44
|
+
self._thread.join()
|
|
45
|
+
# clear the thread reference
|
|
46
|
+
self._thread = None
|
|
47
|
+
|
|
48
|
+
def get_stats(self):
|
|
49
|
+
## return self.gpu_stats
|
|
50
|
+
assert self._running is False, "GPU monitor is still running. Stop it first."
|
|
51
|
+
|
|
52
|
+
powers = [s["power"] for s in self.gpu_stats if s["power"] is not None]
|
|
53
|
+
memories = [s["memory"] for s in self.gpu_stats if s["memory"] is not None]
|
|
54
|
+
avg_power = sum(powers) / len(powers) if powers else 0
|
|
55
|
+
max_memory = max(memories) if memories else 0
|
|
56
|
+
# power_unit = self.gpu_stats[0]["power_unit"] if self.gpu_stats else "W"
|
|
57
|
+
# memory_unit = self.gpu_stats[0]["memory_unit"] if self.gpu_stats else "MiB"
|
|
58
|
+
return {"gpu_avg_power": avg_power, "gpu_avg_max_memory": max_memory}
|
halib/utils/list.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
class ListUtils:
|
|
2
|
+
"""Utility functions for list operations."""
|
|
3
|
+
|
|
4
|
+
@staticmethod
|
|
5
|
+
def subtract(list_a, list_b):
|
|
6
|
+
return [item for item in list_a if item not in list_b]
|
|
7
|
+
|
|
8
|
+
@staticmethod
|
|
9
|
+
def union(list_a, list_b, no_duplicate=False):
|
|
10
|
+
if no_duplicate:
|
|
11
|
+
return list(set(list_a) | set(list_b))
|
|
12
|
+
else:
|
|
13
|
+
return list_a + list_b
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def intersection(list_a, list_b):
|
|
17
|
+
return list(set(list_a) & set(list_b))
|
halib/utils/listop.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
def subtract(list_a, list_b):
|
|
2
|
+
return [item for item in list_a if item not in list_b]
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def union(list_a, list_b, no_duplicate=False):
|
|
6
|
+
if no_duplicate:
|
|
7
|
+
return list(set(list_a) | set(list_b))
|
|
8
|
+
else:
|
|
9
|
+
return list_a + list_b
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def intersection(list_a, list_b):
|
|
13
|
+
return list(set(list_a) & set(list_b))
|
halib/utils/slack.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from slack_sdk import WebClient
|
|
3
|
+
from slack_sdk.errors import SlackApiError
|
|
4
|
+
from rich.pretty import pprint
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
Utilities for interacting with Slack for experiment notification via Wandb Logger.
|
|
8
|
+
"""
|
|
9
|
+
class SlackUtils:
|
|
10
|
+
_instance = None
|
|
11
|
+
|
|
12
|
+
def __new__(cls, token=None):
|
|
13
|
+
"""
|
|
14
|
+
Singleton __new__ method.
|
|
15
|
+
Ensures only one instance of SlackUtils exists.
|
|
16
|
+
"""
|
|
17
|
+
if cls._instance is None:
|
|
18
|
+
if token is None:
|
|
19
|
+
raise ValueError(
|
|
20
|
+
"A Slack Token is required for the first initialization."
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Create the instance
|
|
24
|
+
cls._instance = super(SlackUtils, cls).__new__(cls)
|
|
25
|
+
|
|
26
|
+
# Initialize the WebClient only once
|
|
27
|
+
cls._instance.client = WebClient(token=token)
|
|
28
|
+
cls._instance.token = token
|
|
29
|
+
|
|
30
|
+
return cls._instance
|
|
31
|
+
|
|
32
|
+
def clear_channel(self, channel_id, sleep_interval=1.0):
|
|
33
|
+
"""
|
|
34
|
+
Fetches and deletes all messages in a specified channel.
|
|
35
|
+
"""
|
|
36
|
+
cursor = None
|
|
37
|
+
deleted_count = 0
|
|
38
|
+
|
|
39
|
+
pprint(f"--- Starting cleanup for Channel ID: {channel_id} ---")
|
|
40
|
+
|
|
41
|
+
while True:
|
|
42
|
+
try:
|
|
43
|
+
# Fetch history in batches of 100
|
|
44
|
+
response = self.client.conversations_history( # ty:ignore[unresolved-attribute]
|
|
45
|
+
channel=channel_id, cursor=cursor, limit=100
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
messages = response.get("messages", [])
|
|
49
|
+
|
|
50
|
+
if not messages:
|
|
51
|
+
pprint("No more messages found to delete.")
|
|
52
|
+
break
|
|
53
|
+
|
|
54
|
+
for msg in messages:
|
|
55
|
+
ts = msg.get("ts")
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Attempt delete
|
|
59
|
+
self.client.chat_delete( # ty:ignore[unresolved-attribute]
|
|
60
|
+
channel=channel_id, ts=ts
|
|
61
|
+
)
|
|
62
|
+
pprint(f"Deleted: {ts}")
|
|
63
|
+
deleted_count += 1
|
|
64
|
+
|
|
65
|
+
# Rate limit protection (Tier 3 limit)
|
|
66
|
+
time.sleep(sleep_interval)
|
|
67
|
+
|
|
68
|
+
except SlackApiError as e:
|
|
69
|
+
error_code = e.response["error"]
|
|
70
|
+
if error_code == "cant_delete_message":
|
|
71
|
+
pprint(f"Skipped (Permission denied): {ts}")
|
|
72
|
+
elif error_code == "message_not_found":
|
|
73
|
+
pprint(f"Skipped (Already deleted): {ts}")
|
|
74
|
+
else:
|
|
75
|
+
pprint(f"Error deleting {ts}: {error_code}")
|
|
76
|
+
# Check for pagination
|
|
77
|
+
if response["has_more"]:
|
|
78
|
+
cursor = response["response_metadata"]["next_cursor"]
|
|
79
|
+
else:
|
|
80
|
+
break
|
|
81
|
+
|
|
82
|
+
except SlackApiError as e:
|
|
83
|
+
print(f"Critical API Error fetching history: {e.response['error']}")
|
|
84
|
+
break
|
|
85
|
+
|
|
86
|
+
print(f"--- Completed. Total messages deleted: {deleted_count} ---")
|
halib/utils/tele_noti.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Watch a log file and send a telegram message when train reaches a certain epoch or end
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import yaml
|
|
5
|
+
import asyncio
|
|
6
|
+
import telegram
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from rich.pretty import pprint
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
import plotly.graph_objects as go
|
|
12
|
+
|
|
13
|
+
from ..system import filesys as fs
|
|
14
|
+
from ..filetype import textfile, csvfile
|
|
15
|
+
|
|
16
|
+
from argparse import ArgumentParser
|
|
17
|
+
|
|
18
|
+
tele_console = Console()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def parse_args():
|
|
22
|
+
parser = ArgumentParser(description="desc text")
|
|
23
|
+
parser.add_argument(
|
|
24
|
+
"-cfg",
|
|
25
|
+
"--cfg",
|
|
26
|
+
type=str,
|
|
27
|
+
help="yaml file for tele",
|
|
28
|
+
default=r"E:\Dev\halib\cfg_tele_noti.yaml",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
return parser.parse_args()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_watcher_message_df(target_file, num_last_lines):
|
|
35
|
+
file_ext = fs.get_file_name(target_file, split_file_ext=True)[1]
|
|
36
|
+
supported_ext = [".txt", ".log", ".csv"]
|
|
37
|
+
assert (
|
|
38
|
+
file_ext in supported_ext
|
|
39
|
+
), f"File extension {file_ext} not supported. Supported extensions are {supported_ext}"
|
|
40
|
+
last_lines_df = None
|
|
41
|
+
if file_ext in [".txt", ".log"]:
|
|
42
|
+
lines = textfile.read_line_by_line(target_file)
|
|
43
|
+
if num_last_lines > len(lines):
|
|
44
|
+
num_last_lines = len(lines)
|
|
45
|
+
last_line_arr = lines[-num_last_lines:]
|
|
46
|
+
# add a line start with word "epoch"
|
|
47
|
+
epoch_info_list = "Epoch: n/a"
|
|
48
|
+
for line in reversed(lines):
|
|
49
|
+
if "epoch" in line.lower():
|
|
50
|
+
epoch_info_list = line
|
|
51
|
+
break
|
|
52
|
+
last_line_arr.insert(0, epoch_info_list) # insert at the beginning
|
|
53
|
+
dfCreator = csvfile.DFCreator()
|
|
54
|
+
dfCreator.create_table("last_lines", ["line"])
|
|
55
|
+
last_line_arr = [[line] for line in last_line_arr]
|
|
56
|
+
dfCreator.insert_rows("last_lines", last_line_arr)
|
|
57
|
+
dfCreator.fill_table_from_row_pool("last_lines")
|
|
58
|
+
last_lines_df = dfCreator["last_lines"].copy()
|
|
59
|
+
else:
|
|
60
|
+
df = pd.read_csv(target_file)
|
|
61
|
+
num_rows = len(df)
|
|
62
|
+
if num_last_lines > num_rows:
|
|
63
|
+
num_last_lines = num_rows
|
|
64
|
+
last_lines_df = df.tail(num_last_lines)
|
|
65
|
+
return last_lines_df
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def df2img(df: pd.DataFrame, output_img_dir, decimal_places, out_img_scale):
|
|
69
|
+
df = df.round(decimal_places)
|
|
70
|
+
fig = go.Figure(
|
|
71
|
+
data=[
|
|
72
|
+
go.Table(
|
|
73
|
+
header=dict(values=list(df.columns), align="center"),
|
|
74
|
+
cells=dict(
|
|
75
|
+
values=df.values.transpose(),
|
|
76
|
+
fill_color=[["white", "lightgrey"] * df.shape[0]],
|
|
77
|
+
align="center",
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
]
|
|
81
|
+
)
|
|
82
|
+
if not os.path.exists(output_img_dir):
|
|
83
|
+
os.makedirs(output_img_dir)
|
|
84
|
+
img_path = os.path.normpath(os.path.join(output_img_dir, "last_lines.png"))
|
|
85
|
+
fig.write_image(img_path, scale=out_img_scale)
|
|
86
|
+
return img_path
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def compose_message_and_img_path(
|
|
90
|
+
target_file, project, num_last_lines, decimal_places, out_img_scale, output_img_dir
|
|
91
|
+
):
|
|
92
|
+
context_msg = f">> Project: {project} \n>> File: {target_file} \n>> Last {num_last_lines} lines:"
|
|
93
|
+
msg_df = get_watcher_message_df(target_file, num_last_lines)
|
|
94
|
+
try:
|
|
95
|
+
img_path = df2img(msg_df, output_img_dir, decimal_places, out_img_scale)
|
|
96
|
+
except Exception as e:
|
|
97
|
+
pprint(f"Error: {e}")
|
|
98
|
+
img_path = None
|
|
99
|
+
return context_msg, img_path
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async def send_to_telegram(cfg_dict, interval_in_sec):
|
|
103
|
+
# pprint(cfg_dict)
|
|
104
|
+
token = cfg_dict["telegram"]["token"]
|
|
105
|
+
chat_id = cfg_dict["telegram"]["chat_id"]
|
|
106
|
+
|
|
107
|
+
noti_settings = cfg_dict["noti_settings"]
|
|
108
|
+
project = noti_settings["project"]
|
|
109
|
+
target_file = noti_settings["target_file"]
|
|
110
|
+
num_last_lines = noti_settings["num_last_lines"]
|
|
111
|
+
output_img_dir = noti_settings["output_img_dir"]
|
|
112
|
+
decimal_places = noti_settings["decimal_places"]
|
|
113
|
+
out_img_scale = noti_settings["out_img_scale"]
|
|
114
|
+
|
|
115
|
+
bot = telegram.Bot(token=token)
|
|
116
|
+
async with bot:
|
|
117
|
+
try:
|
|
118
|
+
context_msg, img_path = compose_message_and_img_path(
|
|
119
|
+
target_file,
|
|
120
|
+
project,
|
|
121
|
+
num_last_lines,
|
|
122
|
+
decimal_places,
|
|
123
|
+
out_img_scale,
|
|
124
|
+
output_img_dir,
|
|
125
|
+
)
|
|
126
|
+
time_now = next_time = pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
127
|
+
sep_line = "-" * 50
|
|
128
|
+
context_msg = f"{sep_line}\n>> Time: {time_now}\n{context_msg}"
|
|
129
|
+
# calculate the next time to send message
|
|
130
|
+
next_time = pd.Timestamp.now() + pd.Timedelta(seconds=interval_in_sec)
|
|
131
|
+
next_time = next_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
132
|
+
next_time_info = f"Next msg: {next_time}"
|
|
133
|
+
tele_console.rule()
|
|
134
|
+
tele_console.print("[green] Send message to telegram [/green]")
|
|
135
|
+
tele_console.print(
|
|
136
|
+
f"[red] Next message will be sent at <{next_time}> [/red]"
|
|
137
|
+
)
|
|
138
|
+
await bot.send_message(text=context_msg, chat_id=chat_id)
|
|
139
|
+
if img_path:
|
|
140
|
+
await bot.send_photo(chat_id=chat_id, photo=open(img_path, "rb"))
|
|
141
|
+
await bot.send_message(text=next_time_info, chat_id=chat_id)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
pprint(f"Error: {e}")
|
|
144
|
+
pprint("Message not sent to telegram")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
async def run_forever(cfg_path):
|
|
148
|
+
cfg_dict = yaml.safe_load(open(cfg_path, "r"))
|
|
149
|
+
noti_settings = cfg_dict["noti_settings"]
|
|
150
|
+
interval_in_min = noti_settings["interval_in_min"]
|
|
151
|
+
interval_in_sec = int(interval_in_min * 60)
|
|
152
|
+
pprint(
|
|
153
|
+
f"Message will be sent every {interval_in_min} minutes or {interval_in_sec} seconds"
|
|
154
|
+
)
|
|
155
|
+
while True:
|
|
156
|
+
await send_to_telegram(cfg_dict, interval_in_sec)
|
|
157
|
+
await asyncio.sleep(interval_in_sec)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
async def main():
|
|
161
|
+
args = parse_args()
|
|
162
|
+
await run_forever(args.cfg)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
if __name__ == "__main__":
|
|
166
|
+
asyncio.run(main())
|