halib 0.1.7__py3-none-any.whl → 0.1.99__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halib/__init__.py +84 -0
- halib/common.py +151 -0
- halib/cuda.py +39 -0
- halib/dataset.py +209 -0
- halib/filetype/csvfile.py +151 -45
- halib/filetype/ipynb.py +63 -0
- halib/filetype/jsonfile.py +1 -1
- halib/filetype/textfile.py +4 -4
- halib/filetype/videofile.py +44 -33
- halib/filetype/yamlfile.py +95 -0
- halib/gdrive.py +1 -1
- halib/online/gdrive.py +104 -54
- halib/online/gdrive_mkdir.py +29 -17
- halib/online/gdrive_test.py +31 -18
- halib/online/projectmake.py +58 -43
- halib/plot.py +296 -11
- halib/projectmake.py +1 -1
- halib/research/__init__.py +0 -0
- halib/research/base_config.py +100 -0
- halib/research/base_exp.py +100 -0
- halib/research/benchquery.py +131 -0
- halib/research/dataset.py +208 -0
- halib/research/flop_csv.py +34 -0
- halib/research/flops.py +156 -0
- halib/research/metrics.py +133 -0
- halib/research/mics.py +68 -0
- halib/research/params_gen.py +108 -0
- halib/research/perfcalc.py +336 -0
- halib/research/perftb.py +780 -0
- halib/research/plot.py +758 -0
- halib/research/profiler.py +300 -0
- halib/research/torchloader.py +162 -0
- halib/research/wandb_op.py +116 -0
- halib/rich_color.py +285 -0
- halib/sys/filesys.py +17 -10
- halib/system/__init__.py +0 -0
- halib/system/cmd.py +8 -0
- halib/system/filesys.py +124 -0
- halib/tele_noti.py +166 -0
- halib/torchloader.py +162 -0
- halib/utils/__init__.py +0 -0
- halib/utils/dataclass_util.py +40 -0
- halib/utils/dict_op.py +9 -0
- halib/utils/gpu_mon.py +58 -0
- halib/utils/listop.py +13 -0
- halib/utils/tele_noti.py +166 -0
- halib/utils/video.py +82 -0
- halib/videofile.py +1 -1
- halib-0.1.99.dist-info/METADATA +209 -0
- halib-0.1.99.dist-info/RECORD +64 -0
- {halib-0.1.7.dist-info → halib-0.1.99.dist-info}/WHEEL +1 -1
- halib-0.1.7.dist-info/METADATA +0 -59
- halib-0.1.7.dist-info/RECORD +0 -30
- {halib-0.1.7.dist-info → halib-0.1.99.dist-info/licenses}/LICENSE.txt +0 -0
- {halib-0.1.7.dist-info → halib-0.1.99.dist-info}/top_level.txt +0 -0
halib/filetype/csvfile.py
CHANGED
|
@@ -6,87 +6,193 @@ from rich import inspect
|
|
|
6
6
|
from rich.pretty import pprint
|
|
7
7
|
from tqdm import tqdm
|
|
8
8
|
from loguru import logger
|
|
9
|
+
from itables import init_notebook_mode, show
|
|
10
|
+
import pygwalker as pyg
|
|
11
|
+
import textwrap
|
|
12
|
+
import csv
|
|
9
13
|
|
|
10
14
|
console = Console()
|
|
11
15
|
|
|
16
|
+
|
|
12
17
|
def read(file, separator=","):
|
|
13
18
|
df = pd.read_csv(file, separator)
|
|
14
19
|
return df
|
|
15
20
|
|
|
16
21
|
|
|
22
|
+
def read_auto_sep(filepath, sample_size=2048, **kwargs):
|
|
23
|
+
"""
|
|
24
|
+
Read a CSV file with automatic delimiter detection.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
filepath : str
|
|
29
|
+
Path to the CSV file.
|
|
30
|
+
sample_size : int, optional
|
|
31
|
+
Number of bytes to read for delimiter sniffing.
|
|
32
|
+
**kwargs : dict
|
|
33
|
+
Extra keyword args passed to pandas.read_csv.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
df : pandas.DataFrame
|
|
38
|
+
"""
|
|
39
|
+
with open(filepath, "r", newline="", encoding=kwargs.get("encoding", "utf-8")) as f:
|
|
40
|
+
sample = f.read(sample_size)
|
|
41
|
+
f.seek(0)
|
|
42
|
+
try:
|
|
43
|
+
dialect = csv.Sniffer().sniff(sample, delimiters=[",", ";", "\t", "|", ":"])
|
|
44
|
+
sep = dialect.delimiter
|
|
45
|
+
except csv.Error:
|
|
46
|
+
sep = "," # fallback if detection fails
|
|
47
|
+
|
|
48
|
+
return pd.read_csv(filepath, sep=sep, **kwargs)
|
|
49
|
+
|
|
17
50
|
# for append, mode = 'a'
|
|
18
|
-
def
|
|
19
|
-
if not outfile.endswith(
|
|
20
|
-
outfile = f
|
|
51
|
+
def fn_write(df, outfile, mode="w", header=True, index_label=None):
|
|
52
|
+
if not outfile.endswith(".csv"):
|
|
53
|
+
outfile = f"{outfile}.csv"
|
|
21
54
|
if index_label is not None:
|
|
22
55
|
df.to_csv(outfile, mode=mode, header=header, index_label=index_label)
|
|
23
56
|
else:
|
|
24
57
|
df.to_csv(outfile, mode=mode, header=header, index=False)
|
|
25
58
|
|
|
26
59
|
|
|
27
|
-
def
|
|
60
|
+
def fn_make_df_with_columns(columns):
|
|
28
61
|
df = pd.DataFrame(columns=columns)
|
|
29
62
|
return df
|
|
30
63
|
|
|
31
64
|
|
|
32
|
-
def
|
|
33
|
-
row_data =
|
|
65
|
+
def fn_insert_rows(df, singleRow_or_rowList):
|
|
66
|
+
row_data = (
|
|
67
|
+
singleRow_or_rowList
|
|
68
|
+
if type(singleRow_or_rowList[0]) is list
|
|
69
|
+
else [singleRow_or_rowList]
|
|
70
|
+
)
|
|
34
71
|
new_row_df = pd.DataFrame(row_data, columns=df.columns)
|
|
35
72
|
df = pd.concat([df, new_row_df], ignore_index=True)
|
|
36
73
|
return df
|
|
37
74
|
|
|
75
|
+
# Auto-wrap function for each cell
|
|
76
|
+
def auto_wrap(cell, width=40):
|
|
77
|
+
return textwrap.fill(str(cell), width=width)
|
|
78
|
+
|
|
79
|
+
def fn_display_df(df, max_col_width=40):
|
|
80
|
+
# Apply wrapping; tablefmt="psql" for PostgreSQL-like output
|
|
81
|
+
# wrapped_df = df.applymap(lambda x: auto_wrap(x, width=max_col_width))
|
|
82
|
+
# fix the future warning of applymap
|
|
83
|
+
wrapped_df = df.apply(
|
|
84
|
+
lambda col: col.map(lambda x: auto_wrap(x, width=max_col_width))
|
|
85
|
+
)
|
|
86
|
+
print(tabulate(wrapped_df, headers="keys", tablefmt="grid", numalign="right"))
|
|
87
|
+
|
|
88
|
+
def showdf(df, display_mode="itable", in_jupyter=True, all_interactive=False):
|
|
89
|
+
if display_mode == "itable":
|
|
90
|
+
if in_jupyter:
|
|
91
|
+
init_notebook_mode(all_interactive=all_interactive)
|
|
92
|
+
show(
|
|
93
|
+
df,
|
|
94
|
+
# layout={"top1": "searchPanes"},
|
|
95
|
+
# searchPanes={"layout": "column-3", "cascadePanes": True},
|
|
96
|
+
caption="table caption",
|
|
97
|
+
layout={"top1": "searchBuilder"},
|
|
98
|
+
buttons=["csvHtml5", "excelHtml5", "colvis"],
|
|
99
|
+
search={"regex": True, "caseInsensitive": True},
|
|
100
|
+
paging=False, # no paging
|
|
101
|
+
scrollY="300px", # height of table
|
|
102
|
+
scrollCollapse=True,
|
|
103
|
+
showIndex=True, # show row no.
|
|
104
|
+
select=True, # allow row selected
|
|
105
|
+
keys=True, # enable navigate using arrow keys
|
|
106
|
+
)
|
|
107
|
+
elif display_mode == "pygwalker":
|
|
108
|
+
return pyg.walk(df)
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError("Invalid display mode, current support [itable, pygwalker]")
|
|
38
111
|
|
|
39
|
-
def display_df(df):
|
|
40
|
-
print(tabulate(df, headers='keys', tablefmt='psql', numalign="right"))
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def config_display_pd(max_rows=None, max_columns=None,
|
|
44
|
-
display_width=1000, col_header_justify='center',
|
|
45
|
-
precision=10):
|
|
46
|
-
pd.set_option('display.max_rows', max_rows)
|
|
47
|
-
pd.set_option('display.max_columns', max_columns)
|
|
48
|
-
pd.set_option('display.width', display_width)
|
|
49
|
-
pd.set_option('display.colheader_justify', col_header_justify)
|
|
50
|
-
pd.set_option('display.precision', precision)
|
|
51
112
|
|
|
113
|
+
def fn_config_display_pd(
|
|
114
|
+
max_rows=None,
|
|
115
|
+
max_columns=None,
|
|
116
|
+
display_width=1000,
|
|
117
|
+
col_header_justify="center",
|
|
118
|
+
precision=10,
|
|
119
|
+
):
|
|
120
|
+
pd.set_option("display.max_rows", max_rows)
|
|
121
|
+
pd.set_option("display.max_columns", max_columns)
|
|
122
|
+
pd.set_option("display.width", display_width)
|
|
123
|
+
pd.set_option("display.colheader_justify", col_header_justify)
|
|
124
|
+
pd.set_option("display.precision", precision)
|
|
52
125
|
|
|
53
126
|
|
|
54
127
|
class DFCreator(dict):
|
|
55
128
|
"""docstring for ClassName."""
|
|
56
|
-
|
|
57
|
-
def __init__(self
|
|
58
|
-
|
|
59
|
-
|
|
129
|
+
|
|
130
|
+
def __init__(self, *arg, **kw):
|
|
131
|
+
super(DFCreator, self).__init__(*arg, **kw)
|
|
132
|
+
self.row_pool_dict = {}
|
|
133
|
+
|
|
60
134
|
def create_table(self, table_name, columns):
|
|
61
135
|
self[table_name] = pd.DataFrame(columns=columns)
|
|
62
|
-
|
|
136
|
+
self.row_pool_dict[table_name] = []
|
|
137
|
+
|
|
138
|
+
"""Instead of inserting to dataframe, insert to row pool for fast computation"""
|
|
139
|
+
|
|
63
140
|
def insert_rows(self, table_name, singleRow_or_rowList):
|
|
64
|
-
|
|
65
|
-
|
|
141
|
+
rows_data = (
|
|
142
|
+
singleRow_or_rowList
|
|
143
|
+
if type(singleRow_or_rowList[0]) is list
|
|
144
|
+
else [singleRow_or_rowList]
|
|
145
|
+
)
|
|
146
|
+
self.row_pool_dict[table_name].extend(rows_data)
|
|
147
|
+
|
|
148
|
+
"""Fill from row pool to actual table dataframe"""
|
|
149
|
+
|
|
150
|
+
def fill_table_from_row_pool(self, table_name):
|
|
151
|
+
if len(self.row_pool_dict[table_name]) > 0:
|
|
152
|
+
# concat row pool to table dataframe
|
|
153
|
+
self[table_name] = fn_insert_rows(
|
|
154
|
+
self[table_name], self.row_pool_dict[table_name]
|
|
155
|
+
)
|
|
156
|
+
# free the pool
|
|
157
|
+
self.row_pool_dict[table_name] = []
|
|
158
|
+
|
|
159
|
+
def write_table(
|
|
160
|
+
self,
|
|
161
|
+
table_name,
|
|
162
|
+
output_dir,
|
|
163
|
+
out_file_name=None,
|
|
164
|
+
mode="w",
|
|
165
|
+
header=True,
|
|
166
|
+
index_label=None,
|
|
167
|
+
):
|
|
168
|
+
self.fill_table_from_row_pool(table_name)
|
|
169
|
+
|
|
170
|
+
if not out_file_name:
|
|
171
|
+
outfile = f"{output_dir}/{table_name}.csv"
|
|
172
|
+
else:
|
|
173
|
+
outfile = f"{output_dir}/{out_file_name}.csv"
|
|
174
|
+
|
|
175
|
+
fn_write(self[table_name], outfile, mode, header, index_label)
|
|
176
|
+
|
|
177
|
+
def write_all_table(self, output_dir, mode="w", header=True, index_label=None):
|
|
178
|
+
for table_name in self.keys():
|
|
179
|
+
outfile = f"{output_dir}/{table_name}.csv"
|
|
180
|
+
fn_write(self[table_name], outfile, mode, header, index_label)
|
|
181
|
+
|
|
66
182
|
def display_table(self, table_name):
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
write(self[table_name], outfile, mode, header, index_label)
|
|
71
|
-
|
|
72
|
-
|
|
183
|
+
self.fill_table_from_row_pool(table_name)
|
|
184
|
+
fn_display_df(self[table_name])
|
|
185
|
+
|
|
73
186
|
def display_table_schema(self, table_name):
|
|
74
187
|
columns = list(self[table_name].columns)
|
|
75
|
-
console.print(f
|
|
76
|
-
|
|
188
|
+
console.print(f"TABLE {table_name}: {columns}", style="bold blue")
|
|
189
|
+
|
|
77
190
|
def display_all_table_schema(self):
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
191
|
+
table_names = list(self.keys())
|
|
192
|
+
for table_name in table_names:
|
|
193
|
+
self.display_table_schema(table_name)
|
|
194
|
+
|
|
82
195
|
def display_all_table(self):
|
|
83
196
|
for table_name in self.keys():
|
|
84
197
|
console.rule(table_name)
|
|
85
|
-
|
|
86
|
-
def write_all_table(self, output_dir, mode='w', header=True, index_label=None):
|
|
87
|
-
for table_name in self.keys():
|
|
88
|
-
outfile = f'{output_dir}/{table_name}.csv'
|
|
89
|
-
write(self[table_name], outfile, mode, header, index_label)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
198
|
+
self.display_table(table_name)
|
halib/filetype/ipynb.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import ipynbname
|
|
5
|
+
|
|
6
|
+
from ..common import console, now_str
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@contextmanager
|
|
10
|
+
def gen_ipynb_name(
|
|
11
|
+
filename,
|
|
12
|
+
add_time_stamp=False,
|
|
13
|
+
nb_prefix="nb__",
|
|
14
|
+
separator="__",
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Context manager that prefixes the filename with the notebook name.
|
|
18
|
+
Output: NotebookName_OriginalName.ext
|
|
19
|
+
"""
|
|
20
|
+
try:
|
|
21
|
+
nb_name = ipynbname.name()
|
|
22
|
+
except FileNotFoundError:
|
|
23
|
+
nb_name = "script" # Fallback
|
|
24
|
+
|
|
25
|
+
p = Path(filename)
|
|
26
|
+
|
|
27
|
+
# --- FIX START ---
|
|
28
|
+
|
|
29
|
+
# 1. Get the parts separately
|
|
30
|
+
original_stem = p.stem # "test" (no extension)
|
|
31
|
+
extension = p.suffix # ".csv"
|
|
32
|
+
|
|
33
|
+
now_string = now_str() if add_time_stamp else ""
|
|
34
|
+
|
|
35
|
+
# 2. Construct the base name (Notebook + Separator + OriginalName)
|
|
36
|
+
base_name = f"{nb_prefix}{nb_name}{separator}{original_stem}"
|
|
37
|
+
|
|
38
|
+
# 3. Append timestamp if needed
|
|
39
|
+
if now_string:
|
|
40
|
+
base_name = f"{base_name}{separator}{now_string}"
|
|
41
|
+
|
|
42
|
+
# 4. Add the extension at the VERY END
|
|
43
|
+
new_filename = f"{base_name}{extension}"
|
|
44
|
+
|
|
45
|
+
# --- FIX END ---
|
|
46
|
+
|
|
47
|
+
final_path = p.parent / new_filename
|
|
48
|
+
|
|
49
|
+
# Assuming you use 'rich' console based on your snippet
|
|
50
|
+
# console.rule()
|
|
51
|
+
# print(f"📝 Saving as: {final_path}")
|
|
52
|
+
|
|
53
|
+
yield str(final_path)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
|
+
# --- Usage Example ---
|
|
58
|
+
# Assume Notebook Name is: "MyThesisWork"
|
|
59
|
+
filename = "results.csv"
|
|
60
|
+
with gen_ipynb_name(filename) as filename_ipynb:
|
|
61
|
+
# filename_ipynb is now: "MyThesisWork_results.csv"
|
|
62
|
+
print(f"File to save: {filename_ipynb}")
|
|
63
|
+
# df.to_csv(filename_ipynb)
|
halib/filetype/jsonfile.py
CHANGED
halib/filetype/textfile.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
def read_line_by_line(file_path):
|
|
2
|
-
with open(file_path,
|
|
2
|
+
with open(file_path, "r") as file:
|
|
3
3
|
lines = file.readlines()
|
|
4
4
|
lines = [line.rstrip() for line in lines]
|
|
5
5
|
return lines
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def write(lines, outfile, append=False):
|
|
9
|
-
mode =
|
|
10
|
-
with open(outfile, mode, encoding=
|
|
9
|
+
mode = "a" if append else "w"
|
|
10
|
+
with open(outfile, mode, encoding="utf-8") as f:
|
|
11
11
|
for line in lines:
|
|
12
12
|
f.write(line)
|
|
13
|
-
f.write(
|
|
13
|
+
f.write("\n")
|
halib/filetype/videofile.py
CHANGED
|
@@ -1,36 +1,35 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
|
|
3
1
|
import cv2
|
|
2
|
+
import textfile
|
|
4
3
|
import enlighten
|
|
5
|
-
from
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from ..system import filesys
|
|
6
6
|
from tube_dl import Youtube, Playlist
|
|
7
|
-
|
|
8
|
-
from halib.sys import filesys
|
|
9
|
-
from halib.filetype import textfile
|
|
7
|
+
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
|
|
10
8
|
|
|
11
9
|
|
|
12
10
|
class VideoResolution(Enum):
|
|
13
|
-
VR480p =
|
|
14
|
-
VR576p =
|
|
15
|
-
VR720p_hd =
|
|
16
|
-
VR1080p_full_hd =
|
|
17
|
-
VR4K_uhd =
|
|
18
|
-
VR8K_uhd =
|
|
11
|
+
VR480p = "720x480"
|
|
12
|
+
VR576p = "1280x720"
|
|
13
|
+
VR720p_hd = "1280x720"
|
|
14
|
+
VR1080p_full_hd = "1920x1080 "
|
|
15
|
+
VR4K_uhd = "3840x2160"
|
|
16
|
+
VR8K_uhd = "7680x4320"
|
|
19
17
|
|
|
20
18
|
def __str__(self):
|
|
21
|
-
return
|
|
19
|
+
return "%s" % self.value
|
|
22
20
|
|
|
23
21
|
|
|
24
22
|
def get_video_resolution_size(video_resolution):
|
|
25
|
-
separator =
|
|
23
|
+
separator = "x"
|
|
26
24
|
resolution_str = str(video_resolution)
|
|
27
25
|
info_arr = resolution_str.split(separator)
|
|
28
26
|
width, height = int(info_arr[0]), int(info_arr[1])
|
|
29
27
|
return width, height
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
def get_videos_by_resolution(
|
|
33
|
-
|
|
30
|
+
def get_videos_by_resolution(
|
|
31
|
+
directory, video_resolution, video_ext="mp4", include_better=True
|
|
32
|
+
):
|
|
34
33
|
video_paths = filesys.filter_files_by_extension(directory, video_ext)
|
|
35
34
|
filtered_video_paths = []
|
|
36
35
|
for path in video_paths:
|
|
@@ -62,7 +61,9 @@ progress_bar = None
|
|
|
62
61
|
def on_progress(bytes_done, total_bytes):
|
|
63
62
|
global progress_bar
|
|
64
63
|
if progress_bar is None:
|
|
65
|
-
progress_bar = enlighten.get_manager().counter(
|
|
64
|
+
progress_bar = enlighten.get_manager().counter(
|
|
65
|
+
total=20, desc="Downloading", unit="byte", color="blue"
|
|
66
|
+
)
|
|
66
67
|
|
|
67
68
|
progress_bar.total = total_bytes
|
|
68
69
|
progress_bar.count = bytes_done
|
|
@@ -74,33 +75,38 @@ def on_progress(bytes_done, total_bytes):
|
|
|
74
75
|
|
|
75
76
|
|
|
76
77
|
def get_youtube_url(full_url_or_video_code):
|
|
77
|
-
if
|
|
78
|
+
if "youtube" in full_url_or_video_code:
|
|
78
79
|
url = full_url_or_video_code
|
|
79
80
|
else:
|
|
80
|
-
url = f
|
|
81
|
+
url = f"https://youtube.com/watch?v={full_url_or_video_code}"
|
|
81
82
|
return url
|
|
82
83
|
|
|
83
84
|
|
|
84
|
-
def download_yt_video(
|
|
85
|
-
|
|
85
|
+
def download_yt_video(
|
|
86
|
+
full_url_or_video_code,
|
|
87
|
+
save_folder="./",
|
|
88
|
+
report_progress=False,
|
|
89
|
+
video_idx="1",
|
|
90
|
+
total_video="1",
|
|
91
|
+
):
|
|
86
92
|
url = get_youtube_url(full_url_or_video_code)
|
|
87
93
|
filesys.make_dir(save_folder)
|
|
88
94
|
filesys.change_current_dir(save_folder)
|
|
89
95
|
try:
|
|
90
96
|
yt = Youtube(url)
|
|
91
|
-
title_en = yt.title.encode(
|
|
97
|
+
title_en = yt.title.encode("ascii", "ignore")
|
|
92
98
|
file_download = yt.formats.first()
|
|
93
99
|
if report_progress:
|
|
94
|
-
print(f
|
|
100
|
+
print(f"\n[{video_idx}/{total_video}][DOWNLOAD]{title_en}")
|
|
95
101
|
file_download.download(onprogress=on_progress, skip_existing=True)
|
|
96
102
|
except TypeError:
|
|
97
|
-
print(f
|
|
103
|
+
print(f"[ERROR] download {url}")
|
|
98
104
|
|
|
99
105
|
|
|
100
|
-
def download_playlist(
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
print(f
|
|
106
|
+
def download_playlist(
|
|
107
|
+
playlist_url, save_folder="./", report_progress=False, start_pattern=None
|
|
108
|
+
):
|
|
109
|
+
print(f"[DOWNLOAD PLAYLIST] {playlist_url}")
|
|
104
110
|
pl = Playlist(playlist_url).videos
|
|
105
111
|
total_video = len(pl)
|
|
106
112
|
should_start = False
|
|
@@ -108,7 +114,7 @@ def download_playlist(playlist_url, save_folder='./',
|
|
|
108
114
|
count = 0
|
|
109
115
|
for idx, code in enumerate(pl):
|
|
110
116
|
try:
|
|
111
|
-
url = f
|
|
117
|
+
url = f"https://youtube.com/watch?v={code}"
|
|
112
118
|
yt = Youtube(url)
|
|
113
119
|
count += 1
|
|
114
120
|
if start_pattern is None:
|
|
@@ -116,12 +122,16 @@ def download_playlist(playlist_url, save_folder='./',
|
|
|
116
122
|
elif start_pattern in yt.title:
|
|
117
123
|
should_start = True
|
|
118
124
|
if should_start:
|
|
119
|
-
download_yt_video(
|
|
120
|
-
|
|
121
|
-
|
|
125
|
+
download_yt_video(
|
|
126
|
+
url,
|
|
127
|
+
save_folder,
|
|
128
|
+
report_progress,
|
|
129
|
+
video_idx=str(count),
|
|
130
|
+
total_video=str(total_video),
|
|
131
|
+
)
|
|
122
132
|
|
|
123
133
|
except TypeError:
|
|
124
|
-
print(f
|
|
134
|
+
print(f"[ERROR] download {url}")
|
|
125
135
|
enlighten.get_manager().stop()
|
|
126
136
|
|
|
127
137
|
|
|
@@ -134,6 +144,7 @@ def download_multiple_playlist_in_files(text_file, report_progress=False):
|
|
|
134
144
|
plUrl = folder_plUrl.split()[1]
|
|
135
145
|
download_playlist(plUrl, save_folder=folder, report_progress=report_progress)
|
|
136
146
|
|
|
147
|
+
|
|
137
148
|
# test code
|
|
138
149
|
# pl = 'https://youtube.com/playlist?list=PLYaaU301HUe03PabLEGbMGB8nhHgq58Zr'
|
|
139
150
|
# download_playlist(pl, './test', report_progress=True)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import networkx as nx
|
|
3
|
+
from rich import inspect
|
|
4
|
+
from rich.pretty import pprint
|
|
5
|
+
from omegaconf import OmegaConf
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from argparse import ArgumentParser
|
|
8
|
+
|
|
9
|
+
from ..research.mics import *
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _load_yaml_recursively(
|
|
15
|
+
yaml_file, yaml_files=[], share_nx_graph=nx.DiGraph(), log_info=False
|
|
16
|
+
):
|
|
17
|
+
conf = OmegaConf.load(yaml_file)
|
|
18
|
+
yaml_files.append(yaml_file)
|
|
19
|
+
if "__base__" in conf:
|
|
20
|
+
parent = conf["__base__"]
|
|
21
|
+
if isinstance(parent, str):
|
|
22
|
+
parent = [parent]
|
|
23
|
+
for p in parent:
|
|
24
|
+
edge = (yaml_file, p)
|
|
25
|
+
share_nx_graph.add_edge(*edge)
|
|
26
|
+
for cycle in nx.simple_cycles(share_nx_graph):
|
|
27
|
+
assert False, f"Cyclic dependency detected: {cycle}"
|
|
28
|
+
# update conf with parent; BY loading parent and merging with conf (the child)
|
|
29
|
+
conf = OmegaConf.merge(
|
|
30
|
+
_load_yaml_recursively(p, yaml_files, share_nx_graph), conf
|
|
31
|
+
)
|
|
32
|
+
if log_info:
|
|
33
|
+
console.rule()
|
|
34
|
+
console.print(f"current yaml_file: {yaml_file}")
|
|
35
|
+
inspect(yaml_files)
|
|
36
|
+
pprint(OmegaConf.to_container(conf, resolve=True))
|
|
37
|
+
time.sleep(1)
|
|
38
|
+
return conf
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def load_yaml(yaml_file, to_dict=False, log_info=False):
|
|
42
|
+
yaml_files = []
|
|
43
|
+
share_nx_graph = nx.DiGraph()
|
|
44
|
+
omgconf = _load_yaml_recursively(
|
|
45
|
+
yaml_file,
|
|
46
|
+
yaml_files=yaml_files,
|
|
47
|
+
share_nx_graph=share_nx_graph,
|
|
48
|
+
log_info=log_info,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if to_dict:
|
|
52
|
+
return OmegaConf.to_container(omgconf, resolve=True)
|
|
53
|
+
else:
|
|
54
|
+
return omgconf
|
|
55
|
+
|
|
56
|
+
def load_yaml_with_PC_abbr(
|
|
57
|
+
yaml_file, pc_abbr_to_working_disk=DEFAULT_ABBR_WORKING_DISK
|
|
58
|
+
):
|
|
59
|
+
# current PC abbreviation
|
|
60
|
+
pc_abbr = get_PC_abbr_name()
|
|
61
|
+
|
|
62
|
+
# current plaftform: windows or linux
|
|
63
|
+
current_platform = platform.system().lower()
|
|
64
|
+
|
|
65
|
+
assert pc_abbr in pc_abbr_to_working_disk, f"The is no mapping for {pc_abbr} to <working_disk>"
|
|
66
|
+
|
|
67
|
+
# working disk
|
|
68
|
+
working_disk = pc_abbr_to_working_disk.get(pc_abbr)
|
|
69
|
+
|
|
70
|
+
# load yaml file
|
|
71
|
+
data_dict = load_yaml(yaml_file=yaml_file, to_dict=True)
|
|
72
|
+
|
|
73
|
+
# Normalize paths in the loaded data
|
|
74
|
+
data_dict = normalize_paths(data_dict, working_disk, current_platform)
|
|
75
|
+
return data_dict
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def parse_args():
|
|
79
|
+
parser = ArgumentParser(description="desc text")
|
|
80
|
+
parser.add_argument(
|
|
81
|
+
"-cfg", "--cfg", type=str, help="cfg file", default="cfg__default.yaml"
|
|
82
|
+
)
|
|
83
|
+
return parser.parse_args()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def main():
|
|
87
|
+
args = parse_args()
|
|
88
|
+
cfg_file = args.cfg
|
|
89
|
+
cfg = load_yaml(cfg_file, to_dict=True)
|
|
90
|
+
console.rule()
|
|
91
|
+
pprint(cfg)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
if __name__ == "__main__":
|
|
95
|
+
main()
|
halib/gdrive.py
CHANGED
|
@@ -12,7 +12,7 @@ from pydrive.auth import GoogleAuth
|
|
|
12
12
|
from pydrive.drive import GoogleDrive
|
|
13
13
|
from pydrive.files import GoogleDriveFileList
|
|
14
14
|
|
|
15
|
-
from halib.
|
|
15
|
+
from halib.system import filesys
|
|
16
16
|
from halib.filetype import textfile
|
|
17
17
|
|
|
18
18
|
# Import general libraries
|