simba-uw-tf-dev 4.7.2__py3-none-any.whl → 4.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. simba/SimBA.py +1178 -1171
  2. simba/assets/icons/left_arrow_green.png +0 -0
  3. simba/assets/icons/left_arrow_red.png +0 -0
  4. simba/assets/icons/right_arrow_green.png +0 -0
  5. simba/assets/icons/right_arrow_red.png +0 -0
  6. simba/assets/lookups/yolo_schematics/yolo_mitra.csv +1 -1
  7. simba/mixins/image_mixin.py +129 -4
  8. simba/model/yolo_fit.py +22 -15
  9. simba/model/yolo_pose_inference.py +7 -2
  10. simba/roi_tools/roi_utils.py +2 -2
  11. simba/sandbox/convert_h264_to_mp4_lossless.py +129 -0
  12. simba/sandbox/extract_and_convert_videos.py +257 -0
  13. simba/sandbox/remove_end_of_video.py +80 -0
  14. simba/sandbox/video_timelaps.py +291 -0
  15. simba/ui/pop_ups/run_machine_models_popup.py +2 -2
  16. simba/ui/pop_ups/video_processing_pop_up.py +3637 -3469
  17. simba/ui/tkinter_functions.py +3 -1
  18. simba/ui/video_timelaps.py +332 -0
  19. simba/utils/lookups.py +67 -1
  20. simba/utils/read_write.py +10 -3
  21. simba/video_processors/batch_process_create_ffmpeg_commands.py +0 -1
  22. simba/video_processors/video_processing.py +5385 -5264
  23. {simba_uw_tf_dev-4.7.2.dist-info → simba_uw_tf_dev-4.7.4.dist-info}/METADATA +1 -1
  24. {simba_uw_tf_dev-4.7.2.dist-info → simba_uw_tf_dev-4.7.4.dist-info}/RECORD +28 -19
  25. {simba_uw_tf_dev-4.7.2.dist-info → simba_uw_tf_dev-4.7.4.dist-info}/LICENSE +0 -0
  26. {simba_uw_tf_dev-4.7.2.dist-info → simba_uw_tf_dev-4.7.4.dist-info}/WHEEL +0 -0
  27. {simba_uw_tf_dev-4.7.2.dist-info → simba_uw_tf_dev-4.7.4.dist-info}/entry_points.txt +0 -0
  28. {simba_uw_tf_dev-4.7.2.dist-info → simba_uw_tf_dev-4.7.4.dist-info}/top_level.txt +0 -0
Binary file
Binary file
Binary file
Binary file
@@ -6,4 +6,4 @@ right_side
6
6
  tail_base
7
7
  center
8
8
  tail_center
9
- tail_tip
9
+ tail_tip
@@ -18,6 +18,7 @@ from collections import ChainMap
18
18
  import cv2
19
19
  import pandas as pd
20
20
  from numba import float64, int64, jit, njit, prange, uint8
21
+ from PIL import Image, ImageDraw, ImageFont
21
22
  from shapely.geometry import Polygon
22
23
  from skimage.metrics import structural_similarity
23
24
 
@@ -30,11 +31,12 @@ from simba.utils.checks import (check_file_exist_and_readable, check_float,
30
31
  from simba.utils.data import terminate_cpu_pool
31
32
  from simba.utils.enums import Defaults, Formats, GeometryEnum, Options
32
33
  from simba.utils.errors import ArrayError, FrameRangeError, InvalidInputError
34
+ from simba.utils.lookups import get_fonts
33
35
  from simba.utils.printing import SimbaTimer, stdout_success
34
36
  from simba.utils.read_write import (find_core_cnt,
35
37
  find_files_of_filetypes_in_directory,
36
38
  get_fn_ext, get_video_meta_data,
37
- read_frm_of_video)
39
+ read_frm_of_video, seconds_to_timestamp)
38
40
 
39
41
 
40
42
  class ImageMixin(object):
@@ -2052,18 +2054,141 @@ class ImageMixin(object):
2052
2054
 
2053
2055
  return denoised_img
2054
2056
 
2057
+ @staticmethod
2058
+ def get_timelapse_img(video_path: Union[str, os.PathLike],
2059
+ frame_cnt: int = 25,
2060
+ size: Optional[int] = None,
2061
+ crop_ratio: int = 50) -> np.ndarray:
2055
2062
 
2063
+ """
2064
+ Creates timelapse image from video.
2056
2065
 
2066
+ .. image:: _static/img/get_timelapse_img.png
2067
+ :width: 600
2068
+ :align: center
2057
2069
 
2070
+ :param Union[str, os.PathLike] video_path: Path to the video to cerate the timelapse image from.
2071
+ :param int frame_cnt: Number of frames to grab from the video. There will be an even interval between each frame.
2072
+ :param Optional[int] size: The total width in pixels of the final timelapse image. If None, uses the video width (adjusted for crop_ratio).
2073
+ :param int crop_ratio: The percent of each original video (from the left) to show.
2074
+ :return np.ndarray: The timelapse image as a numpy array
2058
2075
 
2076
+ :example:
2077
+ >>> img = ImageMixin.get_timelapse_img(video_path=r"E:\troubleshooting\mitra_emergence\project_folder\clip_test\Box1_180mISOcontrol_Females_clipped_progress_bar.mp4", size=100)
2078
+ """
2059
2079
 
2080
+ video_meta = get_video_meta_data(video_path=video_path, raise_error=True)
2081
+ frm_ids = [int(i * video_meta['frame_count'] / frame_cnt) for i in range(frame_cnt)]
2082
+ cap = cv2.VideoCapture(video_path)
2083
+ frms = [read_frm_of_video(video_path=cap, frame_index=x, use_ffmpeg=False) for x in frm_ids]
2084
+
2085
+ effective_video_width = int(video_meta['width'] * (crop_ratio / 100))
2086
+ if size is None:
2087
+ size = effective_video_width
2088
+ per_frame_width_after_crop = size / frame_cnt
2089
+ per_frame_width_before_crop = per_frame_width_after_crop / (crop_ratio / 100)
2090
+ scale_factor = per_frame_width_before_crop / frms[0].shape[1]
2091
+ scaled_frms = [cv2.resize(x, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) for x in frms]
2092
+ if crop_ratio is not None:
2093
+ scaled_frms = [ImageMixin.segment_img_vertical(img=x, pct=crop_ratio, left=True) for x in scaled_frms]
2094
+
2095
+ return cv2.hconcat(scaled_frms)
2060
2096
 
2097
+ @staticmethod
2098
+ def create_time_ruler(width: int,
2099
+ video_path: Union[str, os.PathLike],
2100
+ height: int = 60,
2101
+ num_divisions: int = 6,
2102
+ font: str = 'Arial',
2103
+ bg_color: Tuple[int, int, int] = (255, 255, 255),
2104
+ line_color: Tuple[int, int, int] = (128, 128, 128),
2105
+ text_color: Tuple[int, int, int] = (0, 0, 0),
2106
+ padding: int = 60,
2107
+ show_time: bool = True) -> np.ndarray:
2108
+ """
2109
+ Create a horizontal ruler/scale bar with tick marks and labels.
2110
+
2111
+ .. image:: _static/img/create_time_ruler.png
2112
+ :width: 600
2113
+ :align: center
2061
2114
 
2115
+ :param int width: Width of the ruler in pixels (should match timelapse image width if one is used)
2116
+ :param Union[str, os.PathLike] video_path: Path to video file to get metadata from
2117
+ :param int height: Height of the ruler in pixels. Default 60.
2118
+ :param int num_divisions: Number of major divisions on the ruler. Default 6.
2119
+ :param str font: Font name to use for labels. Default 'Algerian'.
2120
+ :param Tuple[int, int, int] bg_color: Background color (R, G, B). Default white.
2121
+ :param Tuple[int, int, int] line_color: Color for tick marks and lines (R, G, B). Default grey.
2122
+ :param Tuple[int, int, int] text_color: Color for text labels (R, G, B). Default black.
2123
+ :param bool show_time: If True, show time labels, else show frame numbers. Default True.
2124
+ :return: Ruler image as numpy array (BGR format for OpenCV compatibility)
2125
+ :rtype: np.ndarray
2062
2126
 
2127
+ :example:
2128
+ >>> ruler = ImageMixin.create_time_ruler(width=1920, video_path='path/to/video.mp4', height=60, num_divisions=6)
2129
+ """
2063
2130
 
2064
-
2065
-
2066
-
2131
+ check_file_exist_and_readable(file_path=video_path)
2132
+ check_int(name='width', value=width, min_value=1, raise_error=True)
2133
+ check_int(name='height', value=height, min_value=1, raise_error=True)
2134
+ check_int(name='num_divisions', value=num_divisions, min_value=1, raise_error=True)
2135
+ check_int(name='padding', value=padding, min_value=0, raise_error=True)
2136
+ check_str(name='font', value=font, allow_blank=False, raise_error=True)
2137
+ check_if_valid_rgb_tuple(data=bg_color, raise_error=True, source=ImageMixin.create_time_ruler.__name__)
2138
+ check_if_valid_rgb_tuple(data=line_color, raise_error=True, source=ImageMixin.create_time_ruler.__name__)
2139
+ check_if_valid_rgb_tuple(data=text_color, raise_error=True, source=ImageMixin.create_time_ruler.__name__)
2140
+
2141
+ video_meta = get_video_meta_data(video_path=video_path, raise_error=True)
2142
+ total_width = width + (2 * padding)
2143
+
2144
+ img = Image.new('RGB', (total_width, height), color=bg_color)
2145
+ draw = ImageDraw.Draw(img)
2146
+ font_dict = get_fonts()
2147
+ try:
2148
+ font_path = font_dict[font]
2149
+ pil_font = ImageFont.truetype(font_path, size=12)
2150
+ except (KeyError, OSError):
2151
+ pil_font = ImageFont.load_default()
2152
+ major_tick_height, half_tick_height = height * 0.6, height * 0.4
2153
+ quarter_tick_height, eighth_tick_height = height * 0.25, height * 0.15
2154
+
2155
+ for i in range(num_divisions + 1):
2156
+ x = padding + int(i * width / num_divisions)
2157
+ draw.line([(x, 0), (x, major_tick_height)], fill=line_color, width=2)
2158
+ if show_time and video_meta['video_length_s'] is not None:
2159
+ seconds_at_division = i * video_meta['video_length_s'] / num_divisions
2160
+ label = seconds_to_timestamp(seconds=seconds_at_division)
2161
+ elif video_meta['frame_count'] is not None:
2162
+ label = str(int(i * video_meta['frame_count'] / num_divisions))
2163
+ else:
2164
+ label = str(i)
2165
+ bbox = draw.textbbox((0, 0), label, font=pil_font)
2166
+ text_width = bbox[2] - bbox[0]
2167
+ if i == 0:
2168
+ draw.text((x, major_tick_height + 5), label, fill=text_color, font=pil_font)
2169
+ elif i == num_divisions:
2170
+ draw.text((x - text_width, major_tick_height + 5), label, fill=text_color, font=pil_font)
2171
+ else:
2172
+ draw.text((x - text_width // 2, major_tick_height + 5), label, fill=text_color, font=pil_font)
2173
+ if i < num_divisions:
2174
+ x_half = padding + int((i + 0.5) * width / num_divisions)
2175
+ draw.line([(x_half, 0), (x_half, half_tick_height)], fill=line_color, width=1)
2176
+ for q in [0.25, 0.75]:
2177
+ x_quarter = padding + int((i + q) * width / num_divisions)
2178
+ draw.line([(x_quarter, 0), (x_quarter, quarter_tick_height)], fill=line_color, width=1)
2179
+ for e in [0.125, 0.375, 0.625, 0.875]:
2180
+ x_eighth = padding + int((i + e) * width / num_divisions)
2181
+ draw.line([(x_eighth, 0), (x_eighth, eighth_tick_height)], fill=line_color, width=1)
2182
+
2183
+ draw.line([(0, height - 1), (total_width, height - 1)], fill=line_color, width=1)
2184
+ img_bgr = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
2185
+ return img_bgr
2186
+
2187
+
2188
+ # img = ImageMixin.create_time_ruler(width=1920, video_path=r"E:\troubleshooting\mitra_emergence\project_folder\clip_test\Box1_180mISOcontrol_Females_clipped_progress_bar.mp4", height=60, num_divisions=6)
2189
+ #
2190
+ # cv2.imshow('sadasdas', img)
2191
+ # cv2.waitKey(40000)
2067
2192
 
2068
2193
  #x = ImageMixin.get_blob_locations(video_path=r"C:\troubleshooting\RAT_NOR\project_folder\videos\2022-06-20_NOB_DOT_4_downsampled_bg_subtracted.mp4", gpu=True)
2069
2194
  # imgs = ImageMixin().read_all_img_in_dir(dir='/Users/simon/Desktop/envs/simba/troubleshooting/RAT_NOR/project_folder/videos/examples')
simba/model/yolo_fit.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import sys
3
+ from contextlib import redirect_stderr, redirect_stdout
3
4
 
4
5
  os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
5
6
  import argparse
@@ -21,7 +22,8 @@ from simba.utils.checks import (check_file_exist_and_readable,
21
22
  check_valid_boolean, check_valid_device)
22
23
  from simba.utils.enums import Options
23
24
  from simba.utils.errors import SimBAGPUError, SimBAPAckageVersionError
24
- from simba.utils.read_write import find_core_cnt
25
+ from simba.utils.printing import stdout_information
26
+ from simba.utils.read_write import find_core_cnt, get_current_time
25
27
  from simba.utils.yolo import load_yolo_model
26
28
 
27
29
 
@@ -108,20 +110,25 @@ class FitYolo():
108
110
 
109
111
 
110
112
  def run(self):
111
- model = load_yolo_model(weights_path=self.weights_path,
112
- verbose=self.verbose,
113
- format=self.format,
114
- device=self.device)
115
-
116
- model.train(data=self.model_yaml,
117
- epochs=self.epochs,
118
- project=self.save_path,
119
- batch=self.batch,
120
- plots=self.plots,
121
- imgsz=self.imgsz,
122
- workers=self.workers,
123
- device=self.device,
124
- patience=self.patience)
113
+ # Temporarily redirect stdout/stderr to terminal to ensure ultralytics output goes to terminal
114
+ # sys.__stdout__ and sys.__stderr__ are the original terminal streams
115
+ stdout_information(msg=f'[{get_current_time()}] Please follow the YOLO pose model training in the terminal from where SimBA was launched ...', source=self.__class__.__name__)
116
+ stdout_information(msg=f'[{get_current_time()}] Results will be stored in the {self.save_path} directory ..', source=self.__class__.__name__)
117
+ with redirect_stdout(sys.__stdout__), redirect_stderr(sys.__stderr__):
118
+ model = load_yolo_model(weights_path=self.weights_path,
119
+ verbose=self.verbose,
120
+ format=self.format,
121
+ device=self.device)
122
+
123
+ model.train(data=self.model_yaml,
124
+ epochs=self.epochs,
125
+ project=self.save_path,
126
+ batch=self.batch,
127
+ plots=self.plots,
128
+ imgsz=self.imgsz,
129
+ workers=self.workers,
130
+ device=self.device,
131
+ patience=self.patience)
125
132
 
126
133
 
127
134
  if __name__ == "__main__" and not hasattr(sys, 'ps1'):
@@ -34,7 +34,7 @@ from simba.utils.errors import (CountError, InvalidFilepathError,
34
34
  InvalidFileTypeError, SimBAGPUError,
35
35
  SimBAPAckageVersionError)
36
36
  from simba.utils.lookups import get_current_time
37
- from simba.utils.printing import SimbaTimer, stdout_success
37
+ from simba.utils.printing import SimbaTimer, stdout_information, stdout_success
38
38
  from simba.utils.read_write import (find_files_of_filetypes_in_directory,
39
39
  get_video_meta_data, recursive_file_search)
40
40
  from simba.utils.warnings import FileExistWarning, NoDataFoundWarning
@@ -182,7 +182,12 @@ class YOLOPoseInference():
182
182
  results = {}
183
183
  class_dict = self.model.names
184
184
  timer = SimbaTimer(start=True)
185
- print(f'Starting tracking inference for {len(self.video_path)} video(s) ({get_current_time()})... ')
185
+ if self.save_dir is not None:
186
+ msg = f'[{get_current_time()}] Starting tracking inference for {len(self.video_path)} video(s). Results will be saved in {self.save_dir} ... '
187
+ else:
188
+ msg = f'[{get_current_time()}] Starting tracking inference for {len(self.video_path)} video(s) ... '
189
+ stdout_information(msg=msg, source=self.__class__.__name__)
190
+ stdout_information(msg='Follow progress in OS terminal window ...', source=self.__class__.__name__)
186
191
  for video_cnt, path in enumerate(self.video_path):
187
192
  video_timer = SimbaTimer(start=True)
188
193
  _, video_name, _ = get_fn_ext(filepath=path)
@@ -17,8 +17,8 @@ from PIL import ImageTk
17
17
  from scipy.spatial.distance import cdist
18
18
  from shapely.geometry import Polygon
19
19
 
20
- from simba.utils.checks import (check_file_exist_and_readable, check_instance,
21
- check_int, check_str, check_valid_array,
20
+ from simba.utils.checks import (check_file_exist_and_readable, check_int,
21
+ check_str, check_valid_array,
22
22
  check_valid_dataframe, check_valid_tuple,
23
23
  check_video_and_data_frm_count_align)
24
24
  from simba.utils.enums import (ROI_SETTINGS, ConfigKey, Formats, Keys, Options,
@@ -0,0 +1,129 @@
1
+ """
2
+ Convert .h264 files to lossless MP4 format using FFmpeg.
3
+
4
+ This script converts H.264 raw video files to MP4 container format using
5
+ lossless encoding (copy codec) to preserve quality.
6
+ """
7
+ import os
8
+ import subprocess
9
+ import glob
10
+ from pathlib import Path
11
+ from typing import List, Union
12
+
13
+ def check_ffmpeg_available() -> bool:
14
+ """Check if FFmpeg is available in the system."""
15
+ try:
16
+ subprocess.run(['ffmpeg', '-version'],
17
+ stdout=subprocess.PIPE,
18
+ stderr=subprocess.PIPE,
19
+ check=True)
20
+ return True
21
+ except (subprocess.CalledProcessError, FileNotFoundError):
22
+ return False
23
+
24
+ def convert_h264_to_mp4_lossless(input_path: Union[str, Path],
25
+ output_path: Union[str, Path] = None) -> bool:
26
+ """
27
+ Convert a single .h264 file to lossless MP4.
28
+
29
+ :param Union[str, Path] input_path: Path to input .h264 file
30
+ :param Union[str, Path] output_path: Optional output path. If None, creates output in same directory with .mp4 extension
31
+ :return: True if conversion successful, False otherwise
32
+ """
33
+ input_path = Path(input_path)
34
+
35
+ if not input_path.exists():
36
+ print(f"[ERROR] File not found: {input_path}")
37
+ return False
38
+
39
+ if output_path is None:
40
+ output_path = input_path.with_suffix('.mp4')
41
+ else:
42
+ output_path = Path(output_path)
43
+
44
+ if output_path.exists():
45
+ print(f"[SKIP] Output file already exists: {output_path}")
46
+ return False
47
+
48
+ # FFmpeg command for lossless conversion (copy codec, no re-encoding)
49
+ # -c:v copy: Copy video stream without re-encoding (lossless)
50
+ # -c:a copy: Copy audio stream if present (lossless)
51
+ # -movflags +faststart: Optimize for web streaming (optional)
52
+ cmd = [
53
+ 'ffmpeg',
54
+ '-i', str(input_path),
55
+ '-c:v', 'copy', # Copy video codec (lossless)
56
+ '-c:a', 'copy', # Copy audio codec if present (lossless)
57
+ '-y', # Overwrite output file if exists
58
+ str(output_path)
59
+ ]
60
+
61
+ try:
62
+ print(f"Converting {input_path.name} -> {output_path.name}...")
63
+ result = subprocess.run(
64
+ cmd,
65
+ stdout=subprocess.PIPE,
66
+ stderr=subprocess.PIPE,
67
+ check=True,
68
+ text=True
69
+ )
70
+ print(f" [OK] Successfully converted {input_path.name}")
71
+ return True
72
+ except subprocess.CalledProcessError as e:
73
+ print(f" [ERROR] FFmpeg error for {input_path.name}: {e.stderr}")
74
+ return False
75
+ except Exception as e:
76
+ print(f" [ERROR] Unexpected error for {input_path.name}: {e}")
77
+ return False
78
+
79
+ def convert_all_h264_files(directory: str,
80
+ pattern: str = None) -> None:
81
+ """
82
+ Convert all .h264 files in a directory to MP4.
83
+
84
+ :param str directory: Directory containing .h264 files
85
+ :param str pattern: Optional pattern to match in filename (e.g., '4.03.001_6_2026_01_16_09_15_00_000')
86
+ """
87
+ dir_path = Path(directory)
88
+
89
+ if not dir_path.exists():
90
+ print(f"[ERROR] Directory does not exist: {directory}")
91
+ return
92
+
93
+ if not check_ffmpeg_available():
94
+ print("[ERROR] FFmpeg is not available. Please install FFmpeg.")
95
+ return
96
+
97
+ # Find all .h264 files
98
+ h264_files = list(dir_path.glob('*.h264'))
99
+
100
+ if pattern:
101
+ h264_files = [f for f in h264_files if pattern in f.name]
102
+
103
+ if not h264_files:
104
+ pattern_msg = f" matching pattern '{pattern}'" if pattern else ""
105
+ print(f"No .h264 files found{pattern_msg} in {directory}")
106
+ return
107
+
108
+ print(f"Found {len(h264_files)} .h264 file(s) to convert...")
109
+
110
+ successful = 0
111
+ failed = 0
112
+
113
+ for h264_file in sorted(h264_files):
114
+ if convert_h264_to_mp4_lossless(h264_file):
115
+ successful += 1
116
+ else:
117
+ failed += 1
118
+
119
+ print(f"\nConversion complete!")
120
+ print(f" Successful: {successful}")
121
+ print(f" Failed: {failed}")
122
+ print(f" Total: {len(h264_files)}")
123
+
124
+ if __name__ == "__main__":
125
+ # Convert all .h264 files in the directory
126
+ directory = r"E:\lp_videos_tar"
127
+ pattern = None # Convert all .h264 files, not just matching a pattern
128
+
129
+ convert_all_h264_files(directory, pattern=pattern)
@@ -0,0 +1,257 @@
1
+ """
2
+ Extract tar files and convert all videos to lossless MP4 format.
3
+
4
+ This script:
5
+ 1. Extracts all .tar, .tar.gz, and .tgz files in a directory
6
+ 2. Finds all video files (various formats)
7
+ 3. Converts them to lossless MP4 using FFmpeg stream copy
8
+ """
9
+ import os
10
+ import tarfile
11
+ import glob
12
+ import subprocess
13
+ from pathlib import Path
14
+ from typing import List, Set
15
+
16
+ # Common video file extensions
17
+ VIDEO_EXTENSIONS = {'.h264', '.avi', '.mov', '.mkv', '.flv', '.m4v', '.mp4',
18
+ '.webm', '.wmv', '.mpg', '.mpeg', '.ts', '.mts', '.m2ts'}
19
+
20
+ def check_ffmpeg_available() -> bool:
21
+ """Check if FFmpeg is available in the system."""
22
+ try:
23
+ subprocess.run(['ffmpeg', '-version'],
24
+ stdout=subprocess.PIPE,
25
+ stderr=subprocess.PIPE,
26
+ check=True)
27
+ return True
28
+ except (subprocess.CalledProcessError, FileNotFoundError):
29
+ return False
30
+
31
+ def extract_tar_files(directory_path: str, output_dir: str = None) -> List[Path]:
32
+ """
33
+ Extracts all .tar, .tar.gz, and .tgz files in a given directory.
34
+
35
+ Args:
36
+ directory_path (str): The path to the directory containing the tar files.
37
+ output_dir (str, optional): The directory where to extract the contents.
38
+ If None, extracts to the same directory as the tar file.
39
+ Defaults to None.
40
+ Returns:
41
+ List[Path]: List of directories where files were extracted
42
+ """
43
+ if output_dir is None:
44
+ output_dir = directory_path
45
+
46
+ dir_path = Path(directory_path)
47
+ output_path = Path(output_dir)
48
+ output_path.mkdir(parents=True, exist_ok=True)
49
+
50
+ tar_files = []
51
+ for ext in ['*.tar', '*.tar.gz', '*.tgz']:
52
+ tar_files.extend(dir_path.glob(ext))
53
+
54
+ if not tar_files:
55
+ print(f"No tar file(s) found in {directory_path}")
56
+ return []
57
+
58
+ print(f"Found {len(tar_files)} tar file(s) to extract...")
59
+
60
+ extracted_dirs = []
61
+ for tar_file_path in tar_files:
62
+ tar_file = Path(tar_file_path)
63
+ print(f"Extracting {tar_file.name}...")
64
+ try:
65
+ # Extract to a subdirectory named after the tar file (without extension)
66
+ extract_dir = output_path / tar_file.stem
67
+ extract_dir.mkdir(parents=True, exist_ok=True)
68
+
69
+ with tarfile.open(tar_file_path, 'r:*') as f:
70
+ if hasattr(tarfile, 'data_filter'): # Python 3.12+
71
+ f.extractall(path=extract_dir, filter='data')
72
+ else:
73
+ f.extractall(path=extract_dir)
74
+ print(f" [OK] Extracted {tar_file.name} to {extract_dir}")
75
+ extracted_dirs.append(extract_dir)
76
+ except tarfile.ReadError as e:
77
+ print(f" [ERROR] Error extracting {tar_file.name}: {e} (Not a valid tar file or corrupted)")
78
+ except Exception as e:
79
+ print(f" [ERROR] Error extracting {tar_file.name}: {e}")
80
+
81
+ print("\nExtraction complete!")
82
+ return extracted_dirs
83
+
84
+ def find_video_files(directory: Path, recursive: bool = True) -> List[Path]:
85
+ """
86
+ Find all video files in a directory.
87
+
88
+ Args:
89
+ directory: Directory to search
90
+ recursive: If True, search recursively in subdirectories
91
+
92
+ Returns:
93
+ List of video file paths
94
+ """
95
+ video_files = []
96
+
97
+ if recursive:
98
+ for ext in VIDEO_EXTENSIONS:
99
+ video_files.extend(directory.rglob(f'*{ext}'))
100
+ else:
101
+ for ext in VIDEO_EXTENSIONS:
102
+ video_files.extend(directory.glob(f'*{ext}'))
103
+
104
+ return sorted(video_files)
105
+
106
+ def convert_video_to_mp4_lossless(input_path: Path, output_path: Path = None) -> bool:
107
+ """
108
+ Convert a video file to lossless MP4 using FFmpeg stream copy.
109
+
110
+ Args:
111
+ input_path: Path to input video file
112
+ output_path: Optional output path. If None, creates output in same directory with .mp4 extension
113
+
114
+ Returns:
115
+ True if conversion successful, False otherwise
116
+ """
117
+ if not input_path.exists():
118
+ print(f"[ERROR] File not found: {input_path}")
119
+ return False
120
+
121
+ if output_path is None:
122
+ output_path = input_path.with_suffix('.mp4')
123
+ else:
124
+ output_path = Path(output_path)
125
+
126
+ # Skip if already MP4
127
+ if input_path.suffix.lower() == '.mp4':
128
+ print(f"[SKIP] File is already MP4: {input_path.name}")
129
+ return False
130
+
131
+ if output_path.exists():
132
+ print(f"[SKIP] Output file already exists: {output_path.name}")
133
+ return False
134
+
135
+ # FFmpeg command for lossless conversion (stream copy)
136
+ # -c:v copy: Copy video stream without re-encoding (lossless)
137
+ # -c:a copy: Copy audio stream if present (lossless)
138
+ # -y: Overwrite output file if exists
139
+ cmd = [
140
+ 'ffmpeg',
141
+ '-i', str(input_path),
142
+ '-c:v', 'copy', # Copy video codec (lossless)
143
+ '-c:a', 'copy', # Copy audio codec if present (lossless)
144
+ '-y',
145
+ str(output_path)
146
+ ]
147
+
148
+ try:
149
+ print(f"Converting {input_path.name} -> {output_path.name}...")
150
+ result = subprocess.run(
151
+ cmd,
152
+ stdout=subprocess.PIPE,
153
+ stderr=subprocess.PIPE,
154
+ check=True,
155
+ text=True
156
+ )
157
+ print(f" [OK] Successfully converted {input_path.name}")
158
+ return True
159
+ except subprocess.CalledProcessError as e:
160
+ print(f" [ERROR] FFmpeg error for {input_path.name}")
161
+ # Print first few lines of stderr for debugging
162
+ stderr_lines = e.stderr.split('\n')[:5]
163
+ for line in stderr_lines:
164
+ if line.strip():
165
+ print(f" {line}")
166
+ return False
167
+ except Exception as e:
168
+ print(f" [ERROR] Unexpected error for {input_path.name}: {e}")
169
+ return False
170
+
171
+ def extract_and_convert_videos(directory_path: str,
172
+ extract_to_subdirs: bool = True,
173
+ convert_recursive: bool = True) -> None:
174
+ """
175
+ Extract all tar files and convert all videos to lossless MP4.
176
+
177
+ Args:
178
+ directory_path: Directory containing tar files
179
+ extract_to_subdirs: If True, extract each tar to its own subdirectory
180
+ convert_recursive: If True, search for videos recursively in extracted directories
181
+ """
182
+ dir_path = Path(directory_path)
183
+
184
+ if not dir_path.exists():
185
+ print(f"[ERROR] Directory does not exist: {directory_path}")
186
+ return
187
+
188
+ if not check_ffmpeg_available():
189
+ print("[ERROR] FFmpeg is not available. Please install FFmpeg.")
190
+ return
191
+
192
+ # Step 1: Extract tar files
193
+ print("=" * 60)
194
+ print("STEP 1: Extracting tar files...")
195
+ print("=" * 60)
196
+ extracted_dirs = extract_tar_files(directory_path,
197
+ output_dir=directory_path if extract_to_subdirs else None)
198
+
199
+ # Step 2: Find all video files
200
+ print("\n" + "=" * 60)
201
+ print("STEP 2: Finding video files...")
202
+ print("=" * 60)
203
+
204
+ # Search in extracted directories and the main directory
205
+ search_dirs = extracted_dirs if extracted_dirs else [dir_path]
206
+ all_video_files = []
207
+
208
+ for search_dir in search_dirs:
209
+ videos = find_video_files(search_dir, recursive=convert_recursive)
210
+ all_video_files.extend(videos)
211
+ if videos:
212
+ print(f"Found {len(videos)} video file(s) in {search_dir}")
213
+
214
+ # Also search in main directory if we extracted to subdirs
215
+ if extract_to_subdirs and dir_path not in search_dirs:
216
+ videos = find_video_files(dir_path, recursive=False)
217
+ all_video_files.extend(videos)
218
+ if videos:
219
+ print(f"Found {len(videos)} video file(s) in {dir_path}")
220
+
221
+ if not all_video_files:
222
+ print("No video files found to convert.")
223
+ return
224
+
225
+ print(f"\nTotal video files found: {len(all_video_files)}")
226
+
227
+ # Step 3: Convert videos to MP4
228
+ print("\n" + "=" * 60)
229
+ print("STEP 3: Converting videos to lossless MP4...")
230
+ print("=" * 60)
231
+
232
+ successful = 0
233
+ failed = 0
234
+ skipped = 0
235
+
236
+ for video_file in all_video_files:
237
+ result = convert_video_to_mp4_lossless(video_file)
238
+ if result is True:
239
+ successful += 1
240
+ elif result is False and video_file.suffix.lower() == '.mp4':
241
+ skipped += 1
242
+ else:
243
+ failed += 1
244
+
245
+ print("\n" + "=" * 60)
246
+ print("Conversion complete!")
247
+ print("=" * 60)
248
+ print(f" Successful: {successful}")
249
+ print(f" Failed: {failed}")
250
+ print(f" Skipped (already MP4): {skipped}")
251
+ print(f" Total: {len(all_video_files)}")
252
+
253
+ if __name__ == "__main__":
254
+ target_directory = r"E:\new_tars"
255
+ extract_and_convert_videos(target_directory,
256
+ extract_to_subdirs=True,
257
+ convert_recursive=True)