halib 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halib/__init__.py +94 -0
- halib/common/__init__.py +0 -0
- halib/common/common.py +326 -0
- halib/common/rich_color.py +285 -0
- halib/common.py +151 -0
- halib/csvfile.py +48 -0
- halib/cuda.py +39 -0
- halib/dataset.py +209 -0
- halib/exp/__init__.py +0 -0
- halib/exp/core/__init__.py +0 -0
- halib/exp/core/base_config.py +167 -0
- halib/exp/core/base_exp.py +147 -0
- halib/exp/core/param_gen.py +170 -0
- halib/exp/core/wandb_op.py +117 -0
- halib/exp/data/__init__.py +0 -0
- halib/exp/data/dataclass_util.py +41 -0
- halib/exp/data/dataset.py +208 -0
- halib/exp/data/torchloader.py +165 -0
- halib/exp/perf/__init__.py +0 -0
- halib/exp/perf/flop_calc.py +190 -0
- halib/exp/perf/gpu_mon.py +58 -0
- halib/exp/perf/perfcalc.py +470 -0
- halib/exp/perf/perfmetrics.py +137 -0
- halib/exp/perf/perftb.py +778 -0
- halib/exp/perf/profiler.py +507 -0
- halib/exp/viz/__init__.py +0 -0
- halib/exp/viz/plot.py +754 -0
- halib/filesys.py +117 -0
- halib/filetype/__init__.py +0 -0
- halib/filetype/csvfile.py +192 -0
- halib/filetype/ipynb.py +61 -0
- halib/filetype/jsonfile.py +19 -0
- halib/filetype/textfile.py +12 -0
- halib/filetype/videofile.py +266 -0
- halib/filetype/yamlfile.py +87 -0
- halib/gdrive.py +179 -0
- halib/gdrive_mkdir.py +41 -0
- halib/gdrive_test.py +37 -0
- halib/jsonfile.py +22 -0
- halib/listop.py +13 -0
- halib/online/__init__.py +0 -0
- halib/online/gdrive.py +229 -0
- halib/online/gdrive_mkdir.py +53 -0
- halib/online/gdrive_test.py +50 -0
- halib/online/projectmake.py +131 -0
- halib/online/tele_noti.py +165 -0
- halib/plot.py +301 -0
- halib/projectmake.py +115 -0
- halib/research/__init__.py +0 -0
- halib/research/base_config.py +100 -0
- halib/research/base_exp.py +157 -0
- halib/research/benchquery.py +131 -0
- halib/research/core/__init__.py +0 -0
- halib/research/core/base_config.py +144 -0
- halib/research/core/base_exp.py +157 -0
- halib/research/core/param_gen.py +108 -0
- halib/research/core/wandb_op.py +117 -0
- halib/research/data/__init__.py +0 -0
- halib/research/data/dataclass_util.py +41 -0
- halib/research/data/dataset.py +208 -0
- halib/research/data/torchloader.py +165 -0
- halib/research/dataset.py +208 -0
- halib/research/flop_csv.py +34 -0
- halib/research/flops.py +156 -0
- halib/research/metrics.py +137 -0
- halib/research/mics.py +74 -0
- halib/research/params_gen.py +108 -0
- halib/research/perf/__init__.py +0 -0
- halib/research/perf/flop_calc.py +190 -0
- halib/research/perf/gpu_mon.py +58 -0
- halib/research/perf/perfcalc.py +363 -0
- halib/research/perf/perfmetrics.py +137 -0
- halib/research/perf/perftb.py +778 -0
- halib/research/perf/profiler.py +301 -0
- halib/research/perfcalc.py +361 -0
- halib/research/perftb.py +780 -0
- halib/research/plot.py +758 -0
- halib/research/profiler.py +300 -0
- halib/research/torchloader.py +162 -0
- halib/research/viz/__init__.py +0 -0
- halib/research/viz/plot.py +754 -0
- halib/research/wandb_op.py +116 -0
- halib/rich_color.py +285 -0
- halib/sys/__init__.py +0 -0
- halib/sys/cmd.py +8 -0
- halib/sys/filesys.py +124 -0
- halib/system/__init__.py +0 -0
- halib/system/_list_pc.csv +6 -0
- halib/system/cmd.py +8 -0
- halib/system/filesys.py +164 -0
- halib/system/path.py +106 -0
- halib/tele_noti.py +166 -0
- halib/textfile.py +13 -0
- halib/torchloader.py +162 -0
- halib/utils/__init__.py +0 -0
- halib/utils/dataclass_util.py +40 -0
- halib/utils/dict.py +317 -0
- halib/utils/dict_op.py +9 -0
- halib/utils/gpu_mon.py +58 -0
- halib/utils/list.py +17 -0
- halib/utils/listop.py +13 -0
- halib/utils/slack.py +86 -0
- halib/utils/tele_noti.py +166 -0
- halib/utils/video.py +82 -0
- halib/videofile.py +139 -0
- halib-0.2.30.dist-info/METADATA +237 -0
- halib-0.2.30.dist-info/RECORD +110 -0
- halib-0.2.30.dist-info/WHEEL +5 -0
- halib-0.2.30.dist-info/licenses/LICENSE.txt +17 -0
- halib-0.2.30.dist-info/top_level.txt +1 -0
halib/filesys.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import glob
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
from distutils.dir_util import copy_tree
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def is_exit(path):
|
|
8
|
+
return os.path.exists(path)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def is_directory(path):
|
|
12
|
+
return os.path.isdir(path)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_current_dir():
|
|
16
|
+
return os.getcwd()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def change_current_dir(new_dir):
|
|
20
|
+
if is_directory(new_dir):
|
|
21
|
+
os.chdir(new_dir)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_dir_name(directory):
|
|
25
|
+
return os.path.basename(os.path.normpath(directory))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_parent_dir(directory, return_full_path=False):
|
|
29
|
+
if not return_full_path:
|
|
30
|
+
return os.path.basename(os.path.dirname(directory))
|
|
31
|
+
else:
|
|
32
|
+
return os.path.dirname(directory)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def make_dir(directory):
|
|
36
|
+
if not os.path.exists(directory):
|
|
37
|
+
os.makedirs(directory)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def copy_dir(src_dir, dst_dir, dirs_exist_ok=True, ignore_patterns=None):
|
|
41
|
+
shutil.copytree(src_dir, dst_dir, dirs_exist_ok=dirs_exist_ok,
|
|
42
|
+
ignore=ignore_patterns)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def delete_dir(directory):
|
|
46
|
+
shutil.rmtree(directory)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def list_dirs(directory):
|
|
50
|
+
folders = list(filter(lambda x: os.path.isdir(os.path.join(directory, x)),
|
|
51
|
+
os.listdir(directory)))
|
|
52
|
+
return folders
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def list_files(directory):
|
|
56
|
+
files = list(filter(lambda x: os.path.isfile(os.path.join(directory, x)),
|
|
57
|
+
os.listdir(directory)))
|
|
58
|
+
return files
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def filter_files_by_extension(directory, ext, recursive=True):
|
|
62
|
+
if is_directory(directory):
|
|
63
|
+
result_files = []
|
|
64
|
+
if isinstance(ext, list):
|
|
65
|
+
ext_list = ext
|
|
66
|
+
else:
|
|
67
|
+
ext_list = [ext]
|
|
68
|
+
if not recursive:
|
|
69
|
+
filter_pattern = f'{directory}/*'
|
|
70
|
+
else:
|
|
71
|
+
filter_pattern = f'{directory}/**/*'
|
|
72
|
+
|
|
73
|
+
for ext_item in ext_list:
|
|
74
|
+
ext_filter = f'{filter_pattern}.{ext_item}'
|
|
75
|
+
files = glob.glob(filter_pattern, recursive=True)
|
|
76
|
+
files = [f for f in files if is_file(f) and f.endswith(ext_item)]
|
|
77
|
+
result_files.extend(files)
|
|
78
|
+
return result_files
|
|
79
|
+
else:
|
|
80
|
+
raise OSError("Directory not exists")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def is_file(path):
|
|
84
|
+
return os.path.isfile(path)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_file_name(file_path, split_file_ext=False):
|
|
88
|
+
if is_file(file_path):
|
|
89
|
+
if split_file_ext:
|
|
90
|
+
filename, file_extension = os.path.splitext(os.path.basename(file_path))
|
|
91
|
+
return filename, file_extension
|
|
92
|
+
else:
|
|
93
|
+
return os.path.basename(file_path)
|
|
94
|
+
else:
|
|
95
|
+
raise OSError("Not a file")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_absolute_path(file_path):
|
|
99
|
+
return os.path.abspath(file_path)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# dest can be a directory
|
|
103
|
+
def copy_file(source, dest):
|
|
104
|
+
shutil.copy2(source, dest)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def delete_file(path):
|
|
108
|
+
if is_file(path):
|
|
109
|
+
os.remove(path)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def rename_dir_or_file(old, new):
|
|
113
|
+
os.renames(old, new)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def move_dir_or_file(source, destination):
|
|
117
|
+
shutil.move(source, destination)
|
|
File without changes
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import textwrap
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import pygwalker as pyg
|
|
5
|
+
from tabulate import tabulate
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from itables import init_notebook_mode, show
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
def read(file, separator=","):
|
|
12
|
+
df = pd.read_csv(file, separator)
|
|
13
|
+
return df
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def read_auto_sep(filepath, sample_size=2048, **kwargs):
|
|
17
|
+
"""
|
|
18
|
+
Read a CSV file with automatic delimiter detection.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
filepath : str
|
|
23
|
+
Path to the CSV file.
|
|
24
|
+
sample_size : int, optional
|
|
25
|
+
Number of bytes to read for delimiter sniffing.
|
|
26
|
+
**kwargs : dict
|
|
27
|
+
Extra keyword args passed to pandas.read_csv.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
df : pandas.DataFrame
|
|
32
|
+
"""
|
|
33
|
+
with open(filepath, "r", newline="", encoding=kwargs.get("encoding", "utf-8")) as f:
|
|
34
|
+
sample = f.read(sample_size)
|
|
35
|
+
f.seek(0)
|
|
36
|
+
try:
|
|
37
|
+
dialect = csv.Sniffer().sniff(sample, delimiters=[",", ";", "\t", "|", ":"])
|
|
38
|
+
sep = dialect.delimiter
|
|
39
|
+
except csv.Error:
|
|
40
|
+
sep = "," # fallback if detection fails
|
|
41
|
+
|
|
42
|
+
return pd.read_csv(filepath, sep=sep, **kwargs)
|
|
43
|
+
|
|
44
|
+
# for append, mode = 'a'
|
|
45
|
+
def fn_write(df, outfile, mode="w", header=True, index_label=None):
|
|
46
|
+
if not outfile.endswith(".csv"):
|
|
47
|
+
outfile = f"{outfile}.csv"
|
|
48
|
+
if index_label is not None:
|
|
49
|
+
df.to_csv(outfile, mode=mode, header=header, index_label=index_label)
|
|
50
|
+
else:
|
|
51
|
+
df.to_csv(outfile, mode=mode, header=header, index=False)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def fn_make_df_with_columns(columns):
|
|
55
|
+
df = pd.DataFrame(columns=columns)
|
|
56
|
+
return df
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def fn_insert_rows(df, singleRow_or_rowList):
|
|
60
|
+
row_data = (
|
|
61
|
+
singleRow_or_rowList
|
|
62
|
+
if type(singleRow_or_rowList[0]) is list
|
|
63
|
+
else [singleRow_or_rowList]
|
|
64
|
+
)
|
|
65
|
+
new_row_df = pd.DataFrame(row_data, columns=df.columns)
|
|
66
|
+
df = pd.concat([df, new_row_df], ignore_index=True)
|
|
67
|
+
return df
|
|
68
|
+
|
|
69
|
+
# Auto-wrap function for each cell
|
|
70
|
+
def auto_wrap(cell, width=40):
|
|
71
|
+
return textwrap.fill(str(cell), width=width)
|
|
72
|
+
|
|
73
|
+
def fn_display_df(df, max_col_width=40):
|
|
74
|
+
# Apply wrapping; tablefmt="psql" for PostgreSQL-like output
|
|
75
|
+
# wrapped_df = df.applymap(lambda x: auto_wrap(x, width=max_col_width))
|
|
76
|
+
# fix the future warning of applymap
|
|
77
|
+
wrapped_df = df.apply(
|
|
78
|
+
lambda col: col.map(lambda x: auto_wrap(x, width=max_col_width))
|
|
79
|
+
)
|
|
80
|
+
print(tabulate(wrapped_df, headers="keys", tablefmt="grid", numalign="right"))
|
|
81
|
+
|
|
82
|
+
def showdf(df, display_mode="itable", in_jupyter=True, all_interactive=False):
|
|
83
|
+
if display_mode == "itable":
|
|
84
|
+
if in_jupyter:
|
|
85
|
+
init_notebook_mode(all_interactive=all_interactive)
|
|
86
|
+
show(
|
|
87
|
+
df,
|
|
88
|
+
# layout={"top1": "searchPanes"},
|
|
89
|
+
# searchPanes={"layout": "column-3", "cascadePanes": True},
|
|
90
|
+
caption="table caption",
|
|
91
|
+
layout={"top1": "searchBuilder"},
|
|
92
|
+
buttons=["csvHtml5", "excelHtml5", "colvis"],
|
|
93
|
+
search={"regex": True, "caseInsensitive": True},
|
|
94
|
+
paging=False, # no paging
|
|
95
|
+
scrollY="300px", # height of table
|
|
96
|
+
scrollCollapse=True,
|
|
97
|
+
showIndex=True, # show row no.
|
|
98
|
+
select=True, # allow row selected
|
|
99
|
+
keys=True, # enable navigate using arrow keys
|
|
100
|
+
)
|
|
101
|
+
elif display_mode == "pygwalker":
|
|
102
|
+
return pyg.walk(df)
|
|
103
|
+
else:
|
|
104
|
+
raise ValueError("Invalid display mode, current support [itable, pygwalker]")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def fn_config_display_pd(
|
|
108
|
+
max_rows=None,
|
|
109
|
+
max_columns=None,
|
|
110
|
+
display_width=1000,
|
|
111
|
+
col_header_justify="center",
|
|
112
|
+
precision=10,
|
|
113
|
+
):
|
|
114
|
+
pd.set_option("display.max_rows", max_rows)
|
|
115
|
+
pd.set_option("display.max_columns", max_columns)
|
|
116
|
+
pd.set_option("display.width", display_width)
|
|
117
|
+
pd.set_option("display.colheader_justify", col_header_justify)
|
|
118
|
+
pd.set_option("display.precision", precision)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class DFCreator(dict):
|
|
122
|
+
"""docstring for ClassName."""
|
|
123
|
+
|
|
124
|
+
def __init__(self, *arg, **kw):
|
|
125
|
+
super(DFCreator, self).__init__(*arg, **kw)
|
|
126
|
+
self.row_pool_dict = {}
|
|
127
|
+
|
|
128
|
+
def create_table(self, table_name, columns):
|
|
129
|
+
self[table_name] = pd.DataFrame(columns=columns)
|
|
130
|
+
self.row_pool_dict[table_name] = []
|
|
131
|
+
|
|
132
|
+
"""Instead of inserting to dataframe, insert to row pool for fast computation"""
|
|
133
|
+
|
|
134
|
+
def insert_rows(self, table_name, singleRow_or_rowList):
|
|
135
|
+
rows_data = (
|
|
136
|
+
singleRow_or_rowList
|
|
137
|
+
if type(singleRow_or_rowList[0]) is list
|
|
138
|
+
else [singleRow_or_rowList]
|
|
139
|
+
)
|
|
140
|
+
self.row_pool_dict[table_name].extend(rows_data)
|
|
141
|
+
|
|
142
|
+
"""Fill from row pool to actual table dataframe"""
|
|
143
|
+
|
|
144
|
+
def fill_table_from_row_pool(self, table_name):
|
|
145
|
+
if len(self.row_pool_dict[table_name]) > 0:
|
|
146
|
+
# concat row pool to table dataframe
|
|
147
|
+
self[table_name] = fn_insert_rows(
|
|
148
|
+
self[table_name], self.row_pool_dict[table_name]
|
|
149
|
+
)
|
|
150
|
+
# free the pool
|
|
151
|
+
self.row_pool_dict[table_name] = []
|
|
152
|
+
|
|
153
|
+
def write_table(
|
|
154
|
+
self,
|
|
155
|
+
table_name,
|
|
156
|
+
output_dir,
|
|
157
|
+
out_file_name=None,
|
|
158
|
+
mode="w",
|
|
159
|
+
header=True,
|
|
160
|
+
index_label=None,
|
|
161
|
+
):
|
|
162
|
+
self.fill_table_from_row_pool(table_name)
|
|
163
|
+
|
|
164
|
+
if not out_file_name:
|
|
165
|
+
outfile = f"{output_dir}/{table_name}.csv"
|
|
166
|
+
else:
|
|
167
|
+
outfile = f"{output_dir}/{out_file_name}.csv"
|
|
168
|
+
|
|
169
|
+
fn_write(self[table_name], outfile, mode, header, index_label)
|
|
170
|
+
|
|
171
|
+
def write_all_table(self, output_dir, mode="w", header=True, index_label=None):
|
|
172
|
+
for table_name in self.keys():
|
|
173
|
+
outfile = f"{output_dir}/{table_name}.csv"
|
|
174
|
+
fn_write(self[table_name], outfile, mode, header, index_label)
|
|
175
|
+
|
|
176
|
+
def display_table(self, table_name):
|
|
177
|
+
self.fill_table_from_row_pool(table_name)
|
|
178
|
+
fn_display_df(self[table_name])
|
|
179
|
+
|
|
180
|
+
def display_table_schema(self, table_name):
|
|
181
|
+
columns = list(self[table_name].columns)
|
|
182
|
+
console.print(f"TABLE {table_name}: {columns}", style="bold blue")
|
|
183
|
+
|
|
184
|
+
def display_all_table_schema(self):
|
|
185
|
+
table_names = list(self.keys())
|
|
186
|
+
for table_name in table_names:
|
|
187
|
+
self.display_table_schema(table_name)
|
|
188
|
+
|
|
189
|
+
def display_all_table(self):
|
|
190
|
+
for table_name in self.keys():
|
|
191
|
+
console.rule(table_name)
|
|
192
|
+
self.display_table(table_name)
|
halib/filetype/ipynb.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import ipynbname
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
|
|
5
|
+
from ..common.common import now_str
|
|
6
|
+
|
|
7
|
+
@contextmanager
|
|
8
|
+
def gen_ipynb_name(
|
|
9
|
+
filename,
|
|
10
|
+
add_time_stamp=False,
|
|
11
|
+
nb_prefix="nb__",
|
|
12
|
+
separator="__",
|
|
13
|
+
):
|
|
14
|
+
"""
|
|
15
|
+
Context manager that prefixes the filename with the notebook name.
|
|
16
|
+
Output: NotebookName_OriginalName.ext
|
|
17
|
+
"""
|
|
18
|
+
try:
|
|
19
|
+
nb_name = ipynbname.name()
|
|
20
|
+
except FileNotFoundError:
|
|
21
|
+
nb_name = "script" # Fallback
|
|
22
|
+
|
|
23
|
+
p = Path(filename)
|
|
24
|
+
|
|
25
|
+
# --- FIX START ---
|
|
26
|
+
|
|
27
|
+
# 1. Get the parts separately
|
|
28
|
+
original_stem = p.stem # "test" (no extension)
|
|
29
|
+
extension = p.suffix # ".csv"
|
|
30
|
+
|
|
31
|
+
now_string = now_str() if add_time_stamp else ""
|
|
32
|
+
|
|
33
|
+
# 2. Construct the base name (Notebook + Separator + OriginalName)
|
|
34
|
+
base_name = f"{nb_prefix}{nb_name}{separator}{original_stem}"
|
|
35
|
+
|
|
36
|
+
# 3. Append timestamp if needed
|
|
37
|
+
if now_string:
|
|
38
|
+
base_name = f"{base_name}{separator}{now_string}"
|
|
39
|
+
|
|
40
|
+
# 4. Add the extension at the VERY END
|
|
41
|
+
new_filename = f"{base_name}{extension}"
|
|
42
|
+
|
|
43
|
+
# --- FIX END ---
|
|
44
|
+
|
|
45
|
+
final_path = p.parent / new_filename
|
|
46
|
+
|
|
47
|
+
# Assuming you use 'rich' console based on your snippet
|
|
48
|
+
# console.rule()
|
|
49
|
+
# print(f"📝 Saving as: {final_path}")
|
|
50
|
+
|
|
51
|
+
yield str(final_path)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
# --- Usage Example ---
|
|
56
|
+
# Assume Notebook Name is: "MyThesisWork"
|
|
57
|
+
filename = "results.csv"
|
|
58
|
+
with gen_ipynb_name(filename) as filename_ipynb:
|
|
59
|
+
# filename_ipynb is now: "MyThesisWork_results.csv"
|
|
60
|
+
print(f"File to save: {filename_ipynb}")
|
|
61
|
+
# df.to_csv(filename_ipynb)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
def read(file):
|
|
4
|
+
with open(file) as f:
|
|
5
|
+
data = json.load(f)
|
|
6
|
+
return data
|
|
7
|
+
|
|
8
|
+
def write(data_dict, outfile):
|
|
9
|
+
with open(outfile, "w") as json_file:
|
|
10
|
+
json.dump(data_dict, json_file)
|
|
11
|
+
|
|
12
|
+
def beautify(json_str):
|
|
13
|
+
formatted_json = json_str
|
|
14
|
+
try:
|
|
15
|
+
parsed = json.loads(json_str)
|
|
16
|
+
formatted_json = json.dumps(parsed, indent=4, sort_keys=True)
|
|
17
|
+
except Exception as e:
|
|
18
|
+
pass
|
|
19
|
+
return formatted_json
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
def read_line_by_line(file_path):
|
|
2
|
+
with open(file_path, "r") as file:
|
|
3
|
+
lines = file.readlines()
|
|
4
|
+
lines = [line.rstrip() for line in lines]
|
|
5
|
+
return lines
|
|
6
|
+
|
|
7
|
+
def write(lines, outfile, append=False):
|
|
8
|
+
mode = "a" if append else "w"
|
|
9
|
+
with open(outfile, mode, encoding="utf-8") as f:
|
|
10
|
+
for line in lines:
|
|
11
|
+
f.write(line)
|
|
12
|
+
f.write("\n")
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import cv2
|
|
3
|
+
import enlighten
|
|
4
|
+
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from tube_dl import Youtube, Playlist
|
|
7
|
+
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
|
|
8
|
+
|
|
9
|
+
from . import textfile
|
|
10
|
+
from . import csvfile
|
|
11
|
+
from ..system import filesys
|
|
12
|
+
|
|
13
|
+
class VideoUtils:
|
|
14
|
+
@staticmethod
|
|
15
|
+
def _default_meta_extractor(video_path):
|
|
16
|
+
"""Default video metadata extractor function."""
|
|
17
|
+
# Open the video file
|
|
18
|
+
cap = cv2.VideoCapture(video_path)
|
|
19
|
+
|
|
20
|
+
# Check if the video was opened successfully
|
|
21
|
+
if not cap.isOpened():
|
|
22
|
+
print(f"Error: Could not open video file {video_path}")
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
# Get the frame count
|
|
26
|
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
27
|
+
|
|
28
|
+
# Get the FPS
|
|
29
|
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
30
|
+
|
|
31
|
+
# get frame size
|
|
32
|
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
33
|
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
34
|
+
|
|
35
|
+
# Release the video capture object
|
|
36
|
+
cap.release()
|
|
37
|
+
|
|
38
|
+
meta_dict = {
|
|
39
|
+
"video_path": video_path,
|
|
40
|
+
"width": width,
|
|
41
|
+
"height": height,
|
|
42
|
+
"frame_count": frame_count,
|
|
43
|
+
"fps": fps,
|
|
44
|
+
}
|
|
45
|
+
return meta_dict
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def get_video_meta_dict(video_path, meta_dict_extractor_func=None):
|
|
49
|
+
assert os.path.exists(video_path), f"Video file {video_path} does not exist"
|
|
50
|
+
if meta_dict_extractor_func and callable(meta_dict_extractor_func):
|
|
51
|
+
assert (
|
|
52
|
+
meta_dict_extractor_func.__code__.co_argcount == 1
|
|
53
|
+
), "meta_dict_extractor_func must take exactly one argument (video_path)"
|
|
54
|
+
meta_dict = meta_dict_extractor_func(video_path)
|
|
55
|
+
assert isinstance(
|
|
56
|
+
meta_dict, dict
|
|
57
|
+
), "meta_dict_extractor_func must return a dictionary"
|
|
58
|
+
assert "video_path" in meta_dict, "meta_dict must contain 'video_path'"
|
|
59
|
+
else:
|
|
60
|
+
meta_dict = VideoUtils._default_meta_extractor(video_path=video_path)
|
|
61
|
+
return meta_dict
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def get_video_dir_meta_df(
|
|
65
|
+
video_dir,
|
|
66
|
+
video_exts=[".mp4", ".avi", ".mov", ".mkv"],
|
|
67
|
+
search_recursive=False,
|
|
68
|
+
csv_outfile=None,
|
|
69
|
+
):
|
|
70
|
+
assert os.path.exists(video_dir), f"Video directory {video_dir} does not exist"
|
|
71
|
+
video_files = filesys.filter_files_by_extension(
|
|
72
|
+
video_dir, video_exts, recursive=search_recursive
|
|
73
|
+
)
|
|
74
|
+
assert (
|
|
75
|
+
len(video_files) > 0
|
|
76
|
+
), f"No video files found in {video_dir} with extensions {video_exts}"
|
|
77
|
+
video_meta_list = []
|
|
78
|
+
for vfile in video_files:
|
|
79
|
+
meta_dict = VideoUtils.get_video_meta_dict(vfile)
|
|
80
|
+
if meta_dict:
|
|
81
|
+
video_meta_list.append(meta_dict)
|
|
82
|
+
dfmk = csvfile.DFCreator()
|
|
83
|
+
columns = list(video_meta_list[0].keys())
|
|
84
|
+
assert len(columns) > 0, "No video metadata found"
|
|
85
|
+
assert "video_path" in columns, "video_path column not found in video metadata"
|
|
86
|
+
# move video_path to the first column
|
|
87
|
+
columns.remove("video_path")
|
|
88
|
+
columns.insert(0, "video_path")
|
|
89
|
+
dfmk.create_table("video_meta", columns)
|
|
90
|
+
rows = [[meta[col] for col in columns] for meta in video_meta_list]
|
|
91
|
+
dfmk.insert_rows("video_meta", rows)
|
|
92
|
+
dfmk.fill_table_from_row_pool("video_meta")
|
|
93
|
+
|
|
94
|
+
if csv_outfile:
|
|
95
|
+
dfmk["video_meta"].to_csv(csv_outfile, index=False, sep=";")
|
|
96
|
+
return dfmk["video_meta"].copy()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# -----------------------------
|
|
100
|
+
# FFmpeg Horizontal Stack
|
|
101
|
+
# -----------------------------
|
|
102
|
+
@staticmethod
|
|
103
|
+
def hstack(video_files, output_file):
|
|
104
|
+
"""Horizontally stack multiple videos using FFmpeg."""
|
|
105
|
+
tmp_file = "video_list.txt"
|
|
106
|
+
try:
|
|
107
|
+
with open(tmp_file, "w") as f:
|
|
108
|
+
for video in video_files:
|
|
109
|
+
f.write(f"file '{video}'\n")
|
|
110
|
+
|
|
111
|
+
ffmpeg_cmd = (
|
|
112
|
+
f"ffmpeg -f concat -safe 0 -i {tmp_file} "
|
|
113
|
+
f'-filter_complex "[0:v][1:v][2:v]hstack=inputs={len(video_files)}[v]" '
|
|
114
|
+
f'-map "[v]" -c:v libx264 -preset fast -crf 22 {output_file}'
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
os.system(ffmpeg_cmd)
|
|
118
|
+
print(f"[INFO] Video stacked successfully: {output_file}")
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(f"[ERROR] Video stacking failed: {e}")
|
|
122
|
+
finally:
|
|
123
|
+
if os.path.exists(tmp_file):
|
|
124
|
+
os.remove(tmp_file)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class VideoResolution(Enum):
|
|
128
|
+
VR480p = "720x480"
|
|
129
|
+
VR576p = "1280x720"
|
|
130
|
+
VR720p_hd = "1280x720"
|
|
131
|
+
VR1080p_full_hd = "1920x1080 "
|
|
132
|
+
VR4K_uhd = "3840x2160"
|
|
133
|
+
VR8K_uhd = "7680x4320"
|
|
134
|
+
|
|
135
|
+
def __str__(self):
|
|
136
|
+
return "%s" % self.value
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def get_video_resolution_size(video_resolution):
|
|
140
|
+
separator = "x"
|
|
141
|
+
resolution_str = str(video_resolution)
|
|
142
|
+
info_arr = resolution_str.split(separator)
|
|
143
|
+
width, height = int(info_arr[0]), int(info_arr[1])
|
|
144
|
+
return width, height
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def get_videos_by_resolution(
|
|
148
|
+
directory, video_resolution, video_ext="mp4", include_better=True
|
|
149
|
+
):
|
|
150
|
+
video_paths = filesys.filter_files_by_extension(directory, video_ext)
|
|
151
|
+
filtered_video_paths = []
|
|
152
|
+
for path in video_paths:
|
|
153
|
+
vid = cv2.VideoCapture(path)
|
|
154
|
+
height = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
|
155
|
+
width = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
|
|
156
|
+
valid = False
|
|
157
|
+
video_width, video_height = get_video_resolution_size(video_resolution)
|
|
158
|
+
if not include_better:
|
|
159
|
+
if width == video_width and height == video_height:
|
|
160
|
+
valid = True
|
|
161
|
+
else:
|
|
162
|
+
if width >= video_width and height >= video_height:
|
|
163
|
+
valid = True
|
|
164
|
+
|
|
165
|
+
if valid:
|
|
166
|
+
filtered_video_paths.append(path)
|
|
167
|
+
return filtered_video_paths
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# time in seconds
|
|
171
|
+
def trim_video(source, destination, start_time, end_time):
|
|
172
|
+
ffmpeg_extract_subclip(source, start_time, end_time, targetname=destination)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
progress_bar = None
|
|
176
|
+
|
|
177
|
+
def on_progress(bytes_done, total_bytes):
|
|
178
|
+
global progress_bar
|
|
179
|
+
if progress_bar is None:
|
|
180
|
+
progress_bar = enlighten.get_manager().counter(
|
|
181
|
+
total=20, desc="Downloading", unit="byte", color="blue"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
progress_bar.total = total_bytes
|
|
185
|
+
progress_bar.count = bytes_done
|
|
186
|
+
progress_bar.update(incr=0)
|
|
187
|
+
# print(bytes_done)
|
|
188
|
+
if bytes_done >= total_bytes:
|
|
189
|
+
progress_bar.close()
|
|
190
|
+
progress_bar = None
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_youtube_url(full_url_or_video_code):
|
|
194
|
+
if "youtube" in full_url_or_video_code:
|
|
195
|
+
url = full_url_or_video_code
|
|
196
|
+
else:
|
|
197
|
+
url = f"https://youtube.com/watch?v={full_url_or_video_code}"
|
|
198
|
+
return url
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def download_yt_video(
|
|
202
|
+
full_url_or_video_code,
|
|
203
|
+
save_folder="./",
|
|
204
|
+
report_progress=False,
|
|
205
|
+
video_idx="1",
|
|
206
|
+
total_video="1",
|
|
207
|
+
):
|
|
208
|
+
url = get_youtube_url(full_url_or_video_code)
|
|
209
|
+
filesys.make_dir(save_folder)
|
|
210
|
+
filesys.change_current_dir(save_folder)
|
|
211
|
+
try:
|
|
212
|
+
yt = Youtube(url)
|
|
213
|
+
title_en = yt.title.encode("ascii", "ignore")
|
|
214
|
+
file_download = yt.formats.first()
|
|
215
|
+
if report_progress:
|
|
216
|
+
print(f"\n[{video_idx}/{total_video}][DOWNLOAD]{title_en}")
|
|
217
|
+
file_download.download(onprogress=on_progress, skip_existing=True)
|
|
218
|
+
except TypeError:
|
|
219
|
+
print(f"[ERROR] download {url}")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def download_playlist(
|
|
223
|
+
playlist_url, save_folder="./", report_progress=False, start_pattern=None
|
|
224
|
+
):
|
|
225
|
+
print(f"[DOWNLOAD PLAYLIST] {playlist_url}")
|
|
226
|
+
pl = Playlist(playlist_url).videos
|
|
227
|
+
total_video = len(pl)
|
|
228
|
+
should_start = False
|
|
229
|
+
url = None
|
|
230
|
+
count = 0
|
|
231
|
+
for idx, code in enumerate(pl):
|
|
232
|
+
try:
|
|
233
|
+
url = f"https://youtube.com/watch?v={code}"
|
|
234
|
+
yt = Youtube(url)
|
|
235
|
+
count += 1
|
|
236
|
+
if start_pattern is None:
|
|
237
|
+
should_start = True
|
|
238
|
+
elif start_pattern in yt.title:
|
|
239
|
+
should_start = True
|
|
240
|
+
if should_start:
|
|
241
|
+
download_yt_video(
|
|
242
|
+
url,
|
|
243
|
+
save_folder,
|
|
244
|
+
report_progress,
|
|
245
|
+
video_idx=str(count),
|
|
246
|
+
total_video=str(total_video),
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
except TypeError:
|
|
250
|
+
print(f"[ERROR] download {url}")
|
|
251
|
+
enlighten.get_manager().stop()
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# Pntt https://www.youtube.com/playlist?list=PLYaaU301HUe06Zlf3qv9q2dnVulj35gOb
|
|
255
|
+
# Format line: playlist_save_folder_path [SPACE] playlist_url
|
|
256
|
+
def download_multiple_playlist_in_files(text_file, report_progress=False):
|
|
257
|
+
playlists = textfile.read_line_by_line(text_file)
|
|
258
|
+
for folder_plUrl in playlists:
|
|
259
|
+
folder = folder_plUrl.split()[0]
|
|
260
|
+
plUrl = folder_plUrl.split()[1]
|
|
261
|
+
download_playlist(plUrl, save_folder=folder, report_progress=report_progress)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# test code
|
|
265
|
+
# pl = 'https://youtube.com/playlist?list=PLYaaU301HUe03PabLEGbMGB8nhHgq58Zr'
|
|
266
|
+
# download_playlist(pl, './test', report_progress=True)
|