halib 0.1.7__py3-none-any.whl → 0.1.99__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. halib/__init__.py +84 -0
  2. halib/common.py +151 -0
  3. halib/cuda.py +39 -0
  4. halib/dataset.py +209 -0
  5. halib/filetype/csvfile.py +151 -45
  6. halib/filetype/ipynb.py +63 -0
  7. halib/filetype/jsonfile.py +1 -1
  8. halib/filetype/textfile.py +4 -4
  9. halib/filetype/videofile.py +44 -33
  10. halib/filetype/yamlfile.py +95 -0
  11. halib/gdrive.py +1 -1
  12. halib/online/gdrive.py +104 -54
  13. halib/online/gdrive_mkdir.py +29 -17
  14. halib/online/gdrive_test.py +31 -18
  15. halib/online/projectmake.py +58 -43
  16. halib/plot.py +296 -11
  17. halib/projectmake.py +1 -1
  18. halib/research/__init__.py +0 -0
  19. halib/research/base_config.py +100 -0
  20. halib/research/base_exp.py +100 -0
  21. halib/research/benchquery.py +131 -0
  22. halib/research/dataset.py +208 -0
  23. halib/research/flop_csv.py +34 -0
  24. halib/research/flops.py +156 -0
  25. halib/research/metrics.py +133 -0
  26. halib/research/mics.py +68 -0
  27. halib/research/params_gen.py +108 -0
  28. halib/research/perfcalc.py +336 -0
  29. halib/research/perftb.py +780 -0
  30. halib/research/plot.py +758 -0
  31. halib/research/profiler.py +300 -0
  32. halib/research/torchloader.py +162 -0
  33. halib/research/wandb_op.py +116 -0
  34. halib/rich_color.py +285 -0
  35. halib/sys/filesys.py +17 -10
  36. halib/system/__init__.py +0 -0
  37. halib/system/cmd.py +8 -0
  38. halib/system/filesys.py +124 -0
  39. halib/tele_noti.py +166 -0
  40. halib/torchloader.py +162 -0
  41. halib/utils/__init__.py +0 -0
  42. halib/utils/dataclass_util.py +40 -0
  43. halib/utils/dict_op.py +9 -0
  44. halib/utils/gpu_mon.py +58 -0
  45. halib/utils/listop.py +13 -0
  46. halib/utils/tele_noti.py +166 -0
  47. halib/utils/video.py +82 -0
  48. halib/videofile.py +1 -1
  49. halib-0.1.99.dist-info/METADATA +209 -0
  50. halib-0.1.99.dist-info/RECORD +64 -0
  51. {halib-0.1.7.dist-info → halib-0.1.99.dist-info}/WHEEL +1 -1
  52. halib-0.1.7.dist-info/METADATA +0 -59
  53. halib-0.1.7.dist-info/RECORD +0 -30
  54. {halib-0.1.7.dist-info → halib-0.1.99.dist-info/licenses}/LICENSE.txt +0 -0
  55. {halib-0.1.7.dist-info → halib-0.1.99.dist-info}/top_level.txt +0 -0
halib/torchloader.py ADDED
@@ -0,0 +1,162 @@
1
+ """
2
+ * @author Hoang Van-Ha
3
+ * @email hoangvanhauit@gmail.com
4
+ * @create date 2024-03-27 15:40:22
5
+ * @modify date 2024-03-27 15:40:22
6
+ * @desc this module works as a utility tools for finding the best configuration for dataloader (num_workers, batch_size, pin_menory, etc.) that fits your hardware.
7
+ """
8
+ from argparse import ArgumentParser
9
+ from .common import *
10
+ from .filetype import csvfile
11
+ from .filetype.yamlfile import load_yaml
12
+ from rich import inspect
13
+ from torch.utils.data import DataLoader
14
+ from torchvision import datasets, transforms
15
+ from tqdm import tqdm
16
+ from typing import Union
17
+ import itertools as it # for cartesian product
18
+ import os
19
+ import time
20
+ import traceback
21
+
22
+
23
+ def parse_args():
24
+ parser = ArgumentParser(description="desc text")
25
+ parser.add_argument("-cfg", "--cfg", type=str, help="cfg file for searching")
26
+ return parser.parse_args()
27
+
28
+
29
+ def get_test_range(cfg: dict, search_item="num_workers"):
30
+ item_search_cfg = cfg["search_space"].get(search_item, None)
31
+ if item_search_cfg is None:
32
+ raise ValueError(f"search_item: {search_item} not found in cfg")
33
+ if isinstance(item_search_cfg, list):
34
+ return item_search_cfg
35
+ elif isinstance(item_search_cfg, dict):
36
+ if "mode" in item_search_cfg:
37
+ mode = item_search_cfg["mode"]
38
+ assert mode in ["range", "list"], f"mode: {mode} not supported"
39
+ value_in_mode = item_search_cfg.get(mode, None)
40
+ if value_in_mode is None:
41
+ raise ValueError(f"mode<{mode}>: data not found in <{search_item}>")
42
+ if mode == "range":
43
+ assert len(value_in_mode) == 3, f"range must have 3 values: start, stop, step"
44
+ start = value_in_mode[0]
45
+ stop = value_in_mode[1]
46
+ step = value_in_mode[2]
47
+ return list(range(start, stop, step))
48
+ elif mode == "list":
49
+ return item_search_cfg["list"]
50
+ else:
51
+ return [item_search_cfg] # for int, float, str, bool, etc.
52
+
53
+
54
+ def load_an_batch(loader_iter):
55
+ start = time.time()
56
+ next(loader_iter)
57
+ end = time.time()
58
+ return end - start
59
+
60
+
61
+ def test_dataloader_with_cfg(origin_dataloader: DataLoader, cfg: Union[dict, str]):
62
+ try:
63
+ if isinstance(cfg, str):
64
+ cfg = load_yaml(cfg, to_dict=True)
65
+ dfmk = csvfile.DFCreator()
66
+ search_items = ["batch_size", "num_workers", "persistent_workers", "pin_memory"]
67
+ batch_limit = cfg["general"]["batch_limit"]
68
+ csv_cfg = cfg["general"]["to_csv"]
69
+ log_batch_info = cfg["general"]["log_batch_info"]
70
+
71
+ save_to_csv = csv_cfg["enabled"]
72
+ log_dir = csv_cfg["log_dir"]
73
+ filename = csv_cfg["filename"]
74
+ filename = f"{now_str()}_{filename}.csv"
75
+ outfile = os.path.join(log_dir, filename)
76
+
77
+ dfmk.create_table(
78
+ "cfg_search",
79
+ (search_items + ["avg_time_taken"]),
80
+ )
81
+ ls_range_test = []
82
+ for item in search_items:
83
+ range_test = get_test_range(cfg, search_item=item)
84
+ range_test = [(item, i) for i in range_test]
85
+ ls_range_test.append(range_test)
86
+
87
+ all_combinations = list(it.product(*ls_range_test))
88
+
89
+ rows = []
90
+ for cfg_idx, combine in enumerate(all_combinations):
91
+ console.rule(f"Testing cfg {cfg_idx+1}/{len(all_combinations)}")
92
+ inspect(combine)
93
+ batch_size = combine[search_items.index("batch_size")][1]
94
+ num_workers = combine[search_items.index("num_workers")][1]
95
+ persistent_workers = combine[search_items.index("persistent_workers")][1]
96
+ pin_memory = combine[search_items.index("pin_memory")][1]
97
+
98
+ test_dataloader = DataLoader(origin_dataloader.dataset, batch_size=batch_size, num_workers=num_workers, persistent_workers=persistent_workers, pin_memory=pin_memory, shuffle=True)
99
+ row = [
100
+ batch_size,
101
+ num_workers,
102
+ persistent_workers,
103
+ pin_memory,
104
+ 0.0,
105
+ ]
106
+
107
+ # calculate the avg time taken to load the data for <batch_limit> batches
108
+ trainiter = iter(test_dataloader)
109
+ time_elapsed = 0
110
+ pprint('Start testing...')
111
+ for i in tqdm(range(batch_limit)):
112
+ single_batch_time = load_an_batch(trainiter)
113
+ if log_batch_info:
114
+ pprint(f"Batch {i+1} took {single_batch_time:.4f} seconds to load")
115
+ time_elapsed += single_batch_time
116
+ row[-1] = time_elapsed / batch_limit
117
+ rows.append(row)
118
+ dfmk.insert_rows('cfg_search', rows)
119
+ dfmk.fill_table_from_row_pool('cfg_search')
120
+ with ConsoleLog("results"):
121
+ csvfile.fn_display_df(dfmk['cfg_search'])
122
+ if save_to_csv:
123
+ dfmk["cfg_search"].to_csv(outfile, index=False)
124
+ console.print(f"[red] Data saved to <{outfile}> [/red]")
125
+
126
+ except Exception as e:
127
+ traceback.print_exc()
128
+ print(e)
129
+ # get current directory of this python file
130
+ current_dir = os.path.dirname(os.path.realpath(__file__))
131
+ standar_cfg_path = os.path.join(current_dir, "torchloader_search.yaml")
132
+ pprint(
133
+ f"Make sure you get the right <cfg.yaml> file. An example of <cfg.yaml> file can be found at this path: {standar_cfg_path}"
134
+ )
135
+ return
136
+
137
+ def main():
138
+ args = parse_args()
139
+ cfg_yaml = args.cfg
140
+ cfg_dict = load_yaml(cfg_yaml, to_dict=True)
141
+
142
+ # Define transforms for data augmentation and normalization
143
+ transform = transforms.Compose(
144
+ [
145
+ transforms.RandomHorizontalFlip(), # Randomly flip images horizontally
146
+ transforms.RandomRotation(10), # Randomly rotate images by 10 degrees
147
+ transforms.ToTensor(), # Convert images to PyTorch tensors
148
+ transforms.Normalize(
149
+ (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)
150
+ ), # Normalize pixel values to [-1, 1]
151
+ ]
152
+ )
153
+ test_dataset = datasets.CIFAR10(
154
+ root="./data", train=False, download=True, transform=transform
155
+ )
156
+ batch_size = 64
157
+ train_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
158
+ test_dataloader_with_cfg(train_loader, cfg_dict)
159
+
160
+
161
+ if __name__ == "__main__":
162
+ main()
File without changes
@@ -0,0 +1,40 @@
1
+ import yaml
2
+ from typing import Any
3
+ from rich.pretty import pprint
4
+ from ..filetype import yamlfile
5
+ # from halib.filetype import yamlfile
6
+ from dataclasses import make_dataclass
7
+
8
+ def dict_to_dataclass(name: str, data: dict):
9
+ fields = []
10
+ values = {}
11
+
12
+ for key, value in data.items():
13
+ if isinstance(value, dict):
14
+ sub_dc = dict_to_dataclass(key.capitalize(), value)
15
+ fields.append((key, type(sub_dc)))
16
+ values[key] = sub_dc
17
+ else:
18
+ field_type = type(value) if value is not None else Any
19
+ fields.append((key, field_type))
20
+ values[key] = value
21
+
22
+ DC = make_dataclass(name.capitalize(), fields)
23
+ return DC(**values)
24
+
25
+ def yaml_to_dataclass(name: str, yaml_str: str):
26
+ data = yaml.safe_load(yaml_str)
27
+ return dict_to_dataclass(name, data)
28
+
29
+
30
+ def yamlfile_to_dataclass(name: str, file_path: str):
31
+ data_dict = yamlfile.load_yaml(file_path, to_dict=True)
32
+ if "__base__" in data_dict:
33
+ del data_dict["__base__"]
34
+ return dict_to_dataclass(name, data_dict)
35
+
36
+ if __name__ == "__main__":
37
+ cfg = yamlfile_to_dataclass("Config", "test/dataclass_util_test_cfg.yaml")
38
+
39
+ # ! NOTICE: after print out this dataclass, we can copy the output and paste it into CHATGPT to generate a list of needed dataclass classes using `from dataclass_wizard import YAMLWizard`
40
+ pprint(cfg)
halib/utils/dict_op.py ADDED
@@ -0,0 +1,9 @@
1
+ def flatten_dict(d, parent_key="", sep="."):
2
+ items = {}
3
+ for k, v in d.items():
4
+ key = f"{parent_key}{sep}{k}" if parent_key else k
5
+ if isinstance(v, dict):
6
+ items.update(flatten_dict(v, key, sep=sep))
7
+ else:
8
+ items[key] = v
9
+ return items
halib/utils/gpu_mon.py ADDED
@@ -0,0 +1,58 @@
1
+ # install `pynvml_utils` package first
2
+ # see this repo: https://github.com/gpuopenanalytics/pynvml
3
+ from pynvml_utils import nvidia_smi
4
+ import time
5
+ import threading
6
+ from rich.pretty import pprint
7
+
8
+ class GPUMonitor:
9
+ def __init__(self, gpu_index=0, interval=0.01):
10
+ self.nvsmi = nvidia_smi.getInstance()
11
+ self.gpu_index = gpu_index
12
+ self.interval = interval
13
+ self.gpu_stats = []
14
+ self._running = False
15
+ self._thread = None
16
+
17
+ def _monitor(self):
18
+ while self._running:
19
+ stats = self.nvsmi.DeviceQuery("power.draw, memory.used")["gpu"][
20
+ self.gpu_index
21
+ ]
22
+ # pprint(stats)
23
+ self.gpu_stats.append(
24
+ {
25
+ "power": stats["power_readings"]["power_draw"],
26
+ "power_unit": stats["power_readings"]["unit"],
27
+ "memory": stats["fb_memory_usage"]["used"],
28
+ "memory_unit": stats["fb_memory_usage"]["unit"],
29
+ }
30
+ )
31
+ time.sleep(self.interval)
32
+
33
+ def start(self):
34
+ if not self._running:
35
+ self._running = True
36
+ # clear previous stats
37
+ self.gpu_stats.clear()
38
+ self._thread = threading.Thread(target=self._monitor)
39
+ self._thread.start()
40
+
41
+ def stop(self):
42
+ if self._running:
43
+ self._running = False
44
+ self._thread.join()
45
+ # clear the thread reference
46
+ self._thread = None
47
+
48
+ def get_stats(self):
49
+ ## return self.gpu_stats
50
+ assert self._running is False, "GPU monitor is still running. Stop it first."
51
+
52
+ powers = [s["power"] for s in self.gpu_stats if s["power"] is not None]
53
+ memories = [s["memory"] for s in self.gpu_stats if s["memory"] is not None]
54
+ avg_power = sum(powers) / len(powers) if powers else 0
55
+ max_memory = max(memories) if memories else 0
56
+ # power_unit = self.gpu_stats[0]["power_unit"] if self.gpu_stats else "W"
57
+ # memory_unit = self.gpu_stats[0]["memory_unit"] if self.gpu_stats else "MiB"
58
+ return {"gpu_avg_power": avg_power, "gpu_avg_max_memory": max_memory}
halib/utils/listop.py ADDED
@@ -0,0 +1,13 @@
1
+ def subtract(list_a, list_b):
2
+ return [item for item in list_a if item not in list_b]
3
+
4
+
5
+ def union(list_a, list_b, no_duplicate=False):
6
+ if no_duplicate:
7
+ return list(set(list_a) | set(list_b))
8
+ else:
9
+ return list_a + list_b
10
+
11
+
12
+ def intersection(list_a, list_b):
13
+ return list(set(list_a) & set(list_b))
@@ -0,0 +1,166 @@
1
+ # Watch a log file and send a telegram message when train reaches a certain epoch or end
2
+
3
+ import os
4
+ import yaml
5
+ import asyncio
6
+ import telegram
7
+ import pandas as pd
8
+
9
+ from rich.pretty import pprint
10
+ from rich.console import Console
11
+ import plotly.graph_objects as go
12
+
13
+ from ..system import filesys as fs
14
+ from ..filetype import textfile, csvfile
15
+
16
+ from argparse import ArgumentParser
17
+
18
+ tele_console = Console()
19
+
20
+
21
+ def parse_args():
22
+ parser = ArgumentParser(description="desc text")
23
+ parser.add_argument(
24
+ "-cfg",
25
+ "--cfg",
26
+ type=str,
27
+ help="yaml file for tele",
28
+ default=r"E:\Dev\halib\cfg_tele_noti.yaml",
29
+ )
30
+
31
+ return parser.parse_args()
32
+
33
+
34
+ def get_watcher_message_df(target_file, num_last_lines):
35
+ file_ext = fs.get_file_name(target_file, split_file_ext=True)[1]
36
+ supported_ext = [".txt", ".log", ".csv"]
37
+ assert (
38
+ file_ext in supported_ext
39
+ ), f"File extension {file_ext} not supported. Supported extensions are {supported_ext}"
40
+ last_lines_df = None
41
+ if file_ext in [".txt", ".log"]:
42
+ lines = textfile.read_line_by_line(target_file)
43
+ if num_last_lines > len(lines):
44
+ num_last_lines = len(lines)
45
+ last_line_arr = lines[-num_last_lines:]
46
+ # add a line start with word "epoch"
47
+ epoch_info_list = "Epoch: n/a"
48
+ for line in reversed(lines):
49
+ if "epoch" in line.lower():
50
+ epoch_info_list = line
51
+ break
52
+ last_line_arr.insert(0, epoch_info_list) # insert at the beginning
53
+ dfCreator = csvfile.DFCreator()
54
+ dfCreator.create_table("last_lines", ["line"])
55
+ last_line_arr = [[line] for line in last_line_arr]
56
+ dfCreator.insert_rows("last_lines", last_line_arr)
57
+ dfCreator.fill_table_from_row_pool("last_lines")
58
+ last_lines_df = dfCreator["last_lines"].copy()
59
+ else:
60
+ df = pd.read_csv(target_file)
61
+ num_rows = len(df)
62
+ if num_last_lines > num_rows:
63
+ num_last_lines = num_rows
64
+ last_lines_df = df.tail(num_last_lines)
65
+ return last_lines_df
66
+
67
+
68
+ def df2img(df: pd.DataFrame, output_img_dir, decimal_places, out_img_scale):
69
+ df = df.round(decimal_places)
70
+ fig = go.Figure(
71
+ data=[
72
+ go.Table(
73
+ header=dict(values=list(df.columns), align="center"),
74
+ cells=dict(
75
+ values=df.values.transpose(),
76
+ fill_color=[["white", "lightgrey"] * df.shape[0]],
77
+ align="center",
78
+ ),
79
+ )
80
+ ]
81
+ )
82
+ if not os.path.exists(output_img_dir):
83
+ os.makedirs(output_img_dir)
84
+ img_path = os.path.normpath(os.path.join(output_img_dir, "last_lines.png"))
85
+ fig.write_image(img_path, scale=out_img_scale)
86
+ return img_path
87
+
88
+
89
+ def compose_message_and_img_path(
90
+ target_file, project, num_last_lines, decimal_places, out_img_scale, output_img_dir
91
+ ):
92
+ context_msg = f">> Project: {project} \n>> File: {target_file} \n>> Last {num_last_lines} lines:"
93
+ msg_df = get_watcher_message_df(target_file, num_last_lines)
94
+ try:
95
+ img_path = df2img(msg_df, output_img_dir, decimal_places, out_img_scale)
96
+ except Exception as e:
97
+ pprint(f"Error: {e}")
98
+ img_path = None
99
+ return context_msg, img_path
100
+
101
+
102
+ async def send_to_telegram(cfg_dict, interval_in_sec):
103
+ # pprint(cfg_dict)
104
+ token = cfg_dict["telegram"]["token"]
105
+ chat_id = cfg_dict["telegram"]["chat_id"]
106
+
107
+ noti_settings = cfg_dict["noti_settings"]
108
+ project = noti_settings["project"]
109
+ target_file = noti_settings["target_file"]
110
+ num_last_lines = noti_settings["num_last_lines"]
111
+ output_img_dir = noti_settings["output_img_dir"]
112
+ decimal_places = noti_settings["decimal_places"]
113
+ out_img_scale = noti_settings["out_img_scale"]
114
+
115
+ bot = telegram.Bot(token=token)
116
+ async with bot:
117
+ try:
118
+ context_msg, img_path = compose_message_and_img_path(
119
+ target_file,
120
+ project,
121
+ num_last_lines,
122
+ decimal_places,
123
+ out_img_scale,
124
+ output_img_dir,
125
+ )
126
+ time_now = next_time = pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")
127
+ sep_line = "-" * 50
128
+ context_msg = f"{sep_line}\n>> Time: {time_now}\n{context_msg}"
129
+ # calculate the next time to send message
130
+ next_time = pd.Timestamp.now() + pd.Timedelta(seconds=interval_in_sec)
131
+ next_time = next_time.strftime("%Y-%m-%d %H:%M:%S")
132
+ next_time_info = f"Next msg: {next_time}"
133
+ tele_console.rule()
134
+ tele_console.print("[green] Send message to telegram [/green]")
135
+ tele_console.print(
136
+ f"[red] Next message will be sent at <{next_time}> [/red]"
137
+ )
138
+ await bot.send_message(text=context_msg, chat_id=chat_id)
139
+ if img_path:
140
+ await bot.send_photo(chat_id=chat_id, photo=open(img_path, "rb"))
141
+ await bot.send_message(text=next_time_info, chat_id=chat_id)
142
+ except Exception as e:
143
+ pprint(f"Error: {e}")
144
+ pprint("Message not sent to telegram")
145
+
146
+
147
+ async def run_forever(cfg_path):
148
+ cfg_dict = yaml.safe_load(open(cfg_path, "r"))
149
+ noti_settings = cfg_dict["noti_settings"]
150
+ interval_in_min = noti_settings["interval_in_min"]
151
+ interval_in_sec = int(interval_in_min * 60)
152
+ pprint(
153
+ f"Message will be sent every {interval_in_min} minutes or {interval_in_sec} seconds"
154
+ )
155
+ while True:
156
+ await send_to_telegram(cfg_dict, interval_in_sec)
157
+ await asyncio.sleep(interval_in_sec)
158
+
159
+
160
+ async def main():
161
+ args = parse_args()
162
+ await run_forever(args.cfg)
163
+
164
+
165
+ if __name__ == "__main__":
166
+ asyncio.run(main())
halib/utils/video.py ADDED
@@ -0,0 +1,82 @@
1
+ import os
2
+ import cv2
3
+ from ..filetype import csvfile
4
+ from ..system import filesys as fs
5
+
6
+
7
+ class VideoUtils:
8
+
9
+ @staticmethod
10
+ def _default_meta_extractor(video_path):
11
+ """Default video metadata extractor function."""
12
+ # Open the video file
13
+ cap = cv2.VideoCapture(video_path)
14
+
15
+ # Check if the video was opened successfully
16
+ if not cap.isOpened():
17
+ print(f"Error: Could not open video file {video_path}")
18
+ return None
19
+
20
+ # Get the frame count
21
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
22
+
23
+ # Get the FPS
24
+ fps = cap.get(cv2.CAP_PROP_FPS)
25
+
26
+ # get frame size
27
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
28
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
29
+
30
+ # Release the video capture object
31
+ cap.release()
32
+
33
+ meta_dict = {
34
+ "video_path": video_path,
35
+ "width": width,
36
+ "height": height,
37
+ "frame_count": frame_count,
38
+ "fps": fps
39
+ }
40
+ return meta_dict
41
+
42
+ @staticmethod
43
+ def get_video_meta_dict(video_path, meta_dict_extractor_func=None):
44
+ assert os.path.exists(video_path), f"Video file {video_path} does not exist"
45
+ if meta_dict_extractor_func and callable(meta_dict_extractor_func):
46
+ assert meta_dict_extractor_func.__code__.co_argcount == 1, "meta_dict_extractor_func must take exactly one argument (video_path)"
47
+ meta_dict = meta_dict_extractor_func(video_path)
48
+ assert isinstance(meta_dict, dict), "meta_dict_extractor_func must return a dictionary"
49
+ assert 'video_path' in meta_dict, "meta_dict must contain 'video_path'"
50
+ else:
51
+ meta_dict = VideoUtils._default_meta_extractor(video_path=video_path)
52
+ return meta_dict
53
+ @staticmethod
54
+ def get_video_dir_meta_df(video_dir, video_exts=['.mp4', '.avi', '.mov', '.mkv'], search_recursive=False, csv_outfile=None):
55
+ assert os.path.exists(video_dir), f"Video directory {video_dir} does not exist"
56
+ video_files = fs.filter_files_by_extension(video_dir, video_exts, recursive=search_recursive)
57
+ assert len(video_files) > 0, f"No video files found in {video_dir} with extensions {video_exts}"
58
+ video_meta_list = []
59
+ for vfile in video_files:
60
+ meta_dict = VideoUtils.get_video_meta_dict(vfile)
61
+ if meta_dict:
62
+ video_meta_list.append(meta_dict)
63
+ dfmk = csvfile.DFCreator()
64
+ columns = list(video_meta_list[0].keys())
65
+ assert len(columns) > 0, "No video metadata found"
66
+ assert 'video_path' in columns, "video_path column not found in video metadata"
67
+ # move video_path to the first column
68
+ columns.remove('video_path')
69
+ columns.insert(0, 'video_path')
70
+ dfmk.create_table("video_meta", columns)
71
+ rows = [[meta[col] for col in columns] for meta in video_meta_list]
72
+ dfmk.insert_rows("video_meta", rows)
73
+ dfmk.fill_table_from_row_pool("video_meta")
74
+
75
+ if csv_outfile:
76
+ dfmk["video_meta"].to_csv(csv_outfile, index=False, sep=";")
77
+ return dfmk["video_meta"].copy()
78
+
79
+
80
+
81
+
82
+
halib/videofile.py CHANGED
@@ -5,7 +5,7 @@ import enlighten
5
5
  from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
6
6
  from tube_dl import Youtube, Playlist
7
7
 
8
- from halib.sys import filesys
8
+ from halib.system import filesys
9
9
  from halib.filetype import textfile
10
10
 
11
11