rcdl 3.0.0b18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rcdl might be problematic. Click here for more details.

@@ -0,0 +1,366 @@
1
+ # core/downloader_subprocess.py
2
+
3
+ """
4
+ Handle all subprocess call to external tool (yt-dlp, ffmpeg, ...)
5
+ """
6
+
7
+ import subprocess
8
+ import logging
9
+ from pathlib import Path
10
+ import os
11
+
12
+
13
+ from rcdl.interface.ui import UI, NestedProgress
14
+ from rcdl.core import parser
15
+ from rcdl.core.models import Media, Post
16
+ from rcdl.core.config import Config
17
+ from rcdl.utils import bytes_to_str
18
+
19
+
20
+ def ytdlp_clear_cache():
21
+ """Clear yt-dlp cache"""
22
+ cmd = ["yt-dlp", "--rm-cache-dir"]
23
+ subprocess.run(cmd, check=False)
24
+
25
+
26
+ def kill_aria2c():
27
+ """Kill all aria2c process"""
28
+ cmd = ["pkill", "-f", "aria2c"]
29
+ subprocess.run(cmd, check=False)
30
+
31
+
32
+ def ytdlp_subprocess(
33
+ url: str,
34
+ filepath: Path | str,
35
+ ):
36
+ """Call yt-dlp in a subprocess to download a video"""
37
+
38
+ cmd = [
39
+ "yt-dlp",
40
+ "-q",
41
+ "--progress",
42
+ url,
43
+ "-o",
44
+ filepath,
45
+ "--external-downloader",
46
+ "aria2c",
47
+ ]
48
+
49
+ logging.info("CMD: %s", " ".join(cmd))
50
+
51
+ result = subprocess.run(cmd, capture_output=True, text=True, check=False)
52
+ if result.returncode != 0:
53
+ logging.error("yt-dlp failed to dl vid: %s", result.stderr)
54
+
55
+ return result.returncode
56
+
57
+
58
+ def ffprobe_get_duration(path: Path) -> int | None:
59
+ """Get duration of a video in seconds with ffprobe
60
+ Return an int or None if command failed"""
61
+ cmd = [
62
+ "ffprobe",
63
+ "-v",
64
+ "error",
65
+ "-show_entries",
66
+ "format=duration",
67
+ "-of",
68
+ "default=noprint_wrappers=1:nokey=1",
69
+ str(path),
70
+ ]
71
+ try:
72
+ result = subprocess.run(
73
+ cmd,
74
+ stdout=subprocess.PIPE,
75
+ stderr=subprocess.DEVNULL,
76
+ text=True,
77
+ check=True,
78
+ )
79
+ return int(float(result.stdout.strip()))
80
+ except subprocess.CalledProcessError as e:
81
+ UI.error(f"Failed to use ffprobe on {path} due to {e}")
82
+ return None
83
+ except (AttributeError, ValueError, OverflowError) as e:
84
+ UI.error(f"Failed to parse duration result of {path} due to {e}")
85
+ return None
86
+
87
+
88
+ def get_max_width_height(medias: list[Media], post: Post) -> tuple[int, int]:
89
+ """Get width and height of all media in list. Return max within video found and config"""
90
+
91
+ cmd = [
92
+ "ffprobe",
93
+ "-v",
94
+ "error",
95
+ "-select_streams",
96
+ "v:0",
97
+ "-show_entries",
98
+ "stream=width,height",
99
+ "-of",
100
+ "csv=p=0",
101
+ ]
102
+ width = 0
103
+ height = 0
104
+ max_width = 1920
105
+ max_height = 1080
106
+ for m in medias:
107
+ path = os.path.join(Config.creator_folder(post.user), m.file_path)
108
+ full_cmd = cmd + [path]
109
+
110
+ try:
111
+ result = subprocess.run(
112
+ full_cmd, capture_output=True, text=True, check=True
113
+ )
114
+ w_str, h_str = result.stdout.strip().split(",")
115
+
116
+ width = min(int(w_str), max_width)
117
+ height = min(int(h_str), max_height)
118
+ except subprocess.CalledProcessError as e:
119
+ UI.error(f"Fail to use ffprobe to get width, height on {path} due to {e}")
120
+ except (AttributeError, ValueError, OverflowError) as e:
121
+ UI.error(f"Failed to parse duration for {path} due to {e}")
122
+ return (width, height)
123
+
124
+
125
+ def get_total_duration(medias: list[Media], post: Post) -> int:
126
+ """Get total duration in ms of all medias in list"""
127
+
128
+ def _get_duration(path: str) -> int:
129
+ """Get video duration in ms"""
130
+
131
+ cmd = [
132
+ "ffprobe",
133
+ "-v",
134
+ "error",
135
+ "-select_streams",
136
+ "v:0",
137
+ "-show_entries",
138
+ "format=duration",
139
+ "-of",
140
+ "default=noprint_wrappers=1:nokey=1",
141
+ path,
142
+ ]
143
+
144
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
145
+ return int(float(result.stdout.strip()) * 1000)
146
+
147
+ duration = 0
148
+ for m in medias:
149
+ path = os.path.join(Config.creator_folder(post.user), m.file_path)
150
+ duration += _get_duration(path)
151
+ return duration
152
+
153
+
154
+ def ffmpeg_concat_build_command(medias: list[Media], post: Post) -> dict:
155
+ """Build the ffmpeg concat command"""
156
+
157
+ width, height = get_max_width_height(medias, post)
158
+ logging.info("Found (%s, %s) (width, height) for this group.", width, height)
159
+ if width == 0:
160
+ width = Config.MAX_WIDTH
161
+ if height == 0:
162
+ height = Config.MAX_HEIGHT
163
+
164
+ # output path
165
+ output_filename = parser.get_filename_fuse(post)
166
+ output_path = os.path.join(Config.creator_folder(post.user), output_filename)
167
+
168
+ # build cmd
169
+ cmd = ["ffmpeg", "-y", "-progress", "pipe:2", "-nostats"]
170
+
171
+ # inputs
172
+ for media in medias:
173
+ input_path = os.path.join(Config.creator_folder(post.user), media.file_path)
174
+ cmd.extend(["-i", input_path])
175
+
176
+ # filter complex
177
+ filter_lines = []
178
+ for idx in range(len(medias)):
179
+ filter_lines.append(
180
+ f"[{idx}:v]"
181
+ f"scale={width}:{height}:force_original_aspect_ratio=decrease,"
182
+ f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2,"
183
+ f"fps={Config.FPS},setsar=1"
184
+ f"[v{idx}]"
185
+ )
186
+
187
+ # concat inputs
188
+ concat = []
189
+ for idx in range(len(medias)):
190
+ concat.append(f"[v{idx}][{idx}:a]")
191
+
192
+ filter_lines.append(f"{''.join(concat)}concat=n={len(medias)}:v=1:a=1[outv][outa]")
193
+ filter_complex = ";".join(filter_lines)
194
+
195
+ cmd.extend(
196
+ [
197
+ "-filter_complex",
198
+ filter_complex,
199
+ "-map",
200
+ "[outv]",
201
+ "-map",
202
+ "[outa]",
203
+ "-c:v",
204
+ "libx264",
205
+ "-preset",
206
+ Config.PRESET,
207
+ "-threads",
208
+ str(Config.THREADS),
209
+ "-c:a",
210
+ "aac",
211
+ "-movflags",
212
+ "+faststart",
213
+ output_path,
214
+ ]
215
+ )
216
+
217
+ return {"cmd": cmd, "output_path": output_path}
218
+
219
+
220
+ def parse_line_ffmpeg_concat_into_advance(line: str) -> int | None:
221
+ line = line.strip()
222
+ if not line:
223
+ return None
224
+
225
+ progres_key = "out_time_ms"
226
+ if line.startswith(progres_key):
227
+ current_progress_str = line.replace(f"{progres_key}=", "").strip()
228
+ try:
229
+ current_progress_us = int(current_progress_str)
230
+ current_progress_ms = current_progress_us // 1000
231
+ return current_progress_ms
232
+ except ValueError as e:
233
+ logging.warning(
234
+ "Skipping invalid progress line: %r (%s)",
235
+ current_progress_str,
236
+ e,
237
+ )
238
+ return None
239
+ except Exception as e:
240
+ UI.error(f"Unexpected error while updating progress: {e}")
241
+ return None
242
+ return None
243
+
244
+
245
+ def ffmpeg_concat(medias: list[Media], post: Post, progress: NestedProgress):
246
+ """Run ffmpeg concat command to merge video together"""
247
+
248
+ command_builder = ffmpeg_concat_build_command(medias, post)
249
+ cmd = command_builder["cmd"]
250
+
251
+ logging.info("CMD: %s", " ".join(cmd))
252
+
253
+ ffmpeg_log = Config.CACHE_DIR / "ffmpeg.log"
254
+ with open(ffmpeg_log, "w", encoding="utf-8") as log_file:
255
+ print(cmd, file=log_file)
256
+ # run cmd
257
+ process = subprocess.Popen(
258
+ cmd,
259
+ stdout=subprocess.DEVNULL,
260
+ stderr=subprocess.PIPE,
261
+ text=True,
262
+ bufsize=1,
263
+ )
264
+
265
+ assert process.stderr is not None
266
+ total_duration = get_total_duration(medias, post)
267
+ progress.start_current(
268
+ description=f"{post.user}->{medias[0].file_path}", total=total_duration
269
+ )
270
+
271
+ last_progress = 0
272
+
273
+ for line in process.stderr:
274
+ line = line.strip()
275
+ print(line, file=log_file)
276
+ current_progress_ms = parse_line_ffmpeg_concat_into_advance(line)
277
+ if current_progress_ms is None:
278
+ continue
279
+ delta = current_progress_ms - last_progress
280
+ progress.advance_current(step=delta)
281
+ last_progress = current_progress_ms
282
+
283
+ process.wait()
284
+ progress.finish_current()
285
+
286
+ UI.debug(f"Result: {process.returncode}")
287
+ if process.returncode != 0:
288
+ UI.error(f"Failed to concat videos. See ffmpeg log file {ffmpeg_log}")
289
+ with open(ffmpeg_log, "r", encoding="utf-8") as f:
290
+ lines = f.read()
291
+ logging.warning("---FFMPEG LOG---")
292
+ logging.warning(lines)
293
+ logging.warning("---END FFMPEG LOG---")
294
+ return process.returncode
295
+
296
+ return 0
297
+
298
+
299
+ def parse_line_into_pourcent(line: str) -> float | None:
300
+ line = line.strip()
301
+ if not line:
302
+ return None
303
+
304
+ if "%" in line:
305
+ try:
306
+ parts = line.split("%")
307
+ parts = parts[0].strip().split(" ")
308
+ pourcent = parts[-1]
309
+ flt_prcnt = float(pourcent)
310
+ return flt_prcnt
311
+ except Exception as e:
312
+ UI.error(f"Error parsing line {line}: {e}")
313
+ return None
314
+ return None
315
+
316
+
317
+ def handbrake_optimized(media: Media, user: str, progress: NestedProgress):
318
+ """Optimize video size with handbrake software"""
319
+
320
+ handbrake_process = Config.HANDBRAKE_RUN_CMD.split(" ")
321
+
322
+ folder_path = Config.creator_folder(user)
323
+ video_path = os.path.join(folder_path, media.file_path)
324
+
325
+ output_path = video_path + ".opti.mp4"
326
+
327
+ cmd = ["-i", video_path, "-o", output_path, "--preset", "HQ 1080p30 Surround"]
328
+
329
+ full_cmd = handbrake_process + cmd
330
+ UI.debug(f"Running cmd '{full_cmd}'")
331
+
332
+ # -- process
333
+ process = subprocess.Popen(
334
+ full_cmd,
335
+ stdout=subprocess.PIPE,
336
+ stderr=subprocess.DEVNULL,
337
+ text=True,
338
+ )
339
+
340
+ assert process.stdout is not None
341
+ progress.start_current(description="Optimizing", total=100)
342
+ progress.set_status(
343
+ f"{user}@({media.service}) -> ",
344
+ f"{media.file_path} ({bytes_to_str(media.file_size)})",
345
+ )
346
+
347
+ current_progress = 0.0
348
+
349
+ for line in process.stdout:
350
+ float_pourcent = parse_line_into_pourcent(line)
351
+ if float_pourcent is None:
352
+ continue
353
+ delta = float_pourcent - current_progress
354
+ current_progress = float_pourcent
355
+ progress.advance_current(step=delta)
356
+
357
+ process.wait()
358
+ progress.finish_current()
359
+ # -- end process
360
+
361
+ if process.returncode == 0:
362
+ UI.debug("Return code: 0")
363
+ else:
364
+ UI.error(f"Return code: {process.returncode}")
365
+
366
+ return process.returncode
rcdl/core/file_io.py ADDED
@@ -0,0 +1,41 @@
1
+ # core/file_io.py
2
+
3
+ """All write/read to file function (excluding sqlite database)"""
4
+
5
+ import json
6
+
7
+
8
+ def write_json(path, data, mode="w"):
9
+ """Write dict data to json"""
10
+ with open(path, mode, encoding="utf-8") as f:
11
+ json.dump(data, f, indent=4)
12
+
13
+
14
+ def load_json(path) -> dict:
15
+ """Load data from json"""
16
+ with open(path, "r", encoding="utf-8") as f:
17
+ data = json.load(f)
18
+ return data
19
+
20
+
21
+ def load_txt(path) -> list[str]:
22
+ """Read text from a .txt file.
23
+ Return list of stripped lines"""
24
+ with open(path, "r", encoding="utf-8") as f:
25
+ lines = f.readlines()
26
+ for i, line in enumerate(lines):
27
+ lines[i] = line.strip()
28
+ return lines
29
+
30
+
31
+ def write_txt(path, lines: list[str] | str, mode: str = "a"):
32
+ """Write txt to .txt file"""
33
+ if isinstance(lines, str):
34
+ lines = [lines]
35
+
36
+ with open(path, mode, encoding="utf-8") as f:
37
+ for line in lines:
38
+ if not line.endswith("\n"):
39
+ f.write(line + "\n")
40
+ else:
41
+ f.write(line)
rcdl/core/fuse.py ADDED
@@ -0,0 +1,127 @@
1
+ # core/fuse.py
2
+
3
+ """Handle merging videos from a same post"""
4
+
5
+ import os
6
+ import subprocess
7
+
8
+ from rcdl.core.config import Config
9
+ from rcdl.core.db import DB
10
+ from rcdl.core.models import FusedStatus, Status, FusedMedia, Media, Post
11
+ from rcdl.interface.ui import UI, NestedProgress
12
+ import rcdl.core.downloader_subprocess as dls
13
+ from rcdl.utils import get_media_metadata, get_date_now
14
+
15
+
16
+ def update_db(fuse: FusedMedia, medias: list[Media], user: str, result):
17
+ """Update DB depending on subprocess result (SUCESS/FAILURE)"""
18
+ if result == 0:
19
+ path = os.path.join(Config.creator_folder(user), fuse.file_path)
20
+ duration, file_size, checksum = get_media_metadata(path)
21
+ fuse.duration = duration
22
+ fuse.status = FusedStatus.FUSED
23
+ fuse.checksum = checksum
24
+ fuse.created_at = get_date_now()
25
+ fuse.file_size = file_size
26
+ for media in medias:
27
+ media.status = Status.FUSED
28
+ else:
29
+ fuse.fail_count += 1
30
+ with DB() as db:
31
+ db.update_fuse(fuse)
32
+ for media in medias:
33
+ db.update_media(media)
34
+
35
+
36
+ def get_medias_and_post(
37
+ post_id: str, total_parts: int
38
+ ) -> tuple[None, None] | tuple[list[Media], Post]:
39
+ """Get medias and post related to a fuse group.
40
+ Return a list[Media] and a Post
41
+ Handle Errors, return None, None"""
42
+ # get associated post
43
+ with DB() as db:
44
+ post = db.query_post_by_id(post_id)
45
+ if post is None:
46
+ UI.error(f"Could not match fuses post id {post_id} to a post in post tables")
47
+ return None, None
48
+
49
+ # get all videos of a post
50
+ with DB() as db:
51
+ medias = db.query_media_by_post_id(post_id)
52
+
53
+ # check number of media in db match total part expected in fused media
54
+ if len(medias) != total_parts:
55
+ UI.error(f"Found {len(medias)} videos part. Expected {total_parts}")
56
+ return None, None
57
+
58
+ # check all video are downloaded
59
+ allowed_status = [Status.DOWNLOADED, Status.OPTIMIZED]
60
+ if Config.DEBUG:
61
+ allowed_status.append(Status.FUSED)
62
+ ok = True
63
+ for media in medias:
64
+ if media.status not in allowed_status:
65
+ ok = False
66
+ break
67
+ if not ok:
68
+ return None, None
69
+
70
+ # sort medias list
71
+ sorted_medias = sorted(medias, key=lambda m: m.sequence)
72
+ return sorted_medias, post
73
+
74
+
75
+ def fuse_medias():
76
+ """Fuse all media part of a fuse group with status PENDING in DB fuses"""
77
+ # get all fused media
78
+ with DB() as db:
79
+ fuses = db.query_fuses_by_status(FusedStatus.PENDING)
80
+ if Config.DEBUG:
81
+ with DB() as db:
82
+ ok_fuses = db.query_fuses_by_status(FusedStatus.FUSED)
83
+ fuses.extend(ok_fuses)
84
+
85
+ progress = NestedProgress(UI.console)
86
+ progress.start(
87
+ total=len(fuses), total_label="Fusing videos", current_label="Current fuse"
88
+ )
89
+
90
+ for fm in fuses:
91
+ medias, post = get_medias_and_post(fm.id, fm.total_parts)
92
+ if medias is None or post is None:
93
+ progress.advance_total()
94
+ continue
95
+
96
+ # concat medias
97
+ result = 1
98
+ try:
99
+ result = dls.ffmpeg_concat(medias, post, progress)
100
+ except (OSError, subprocess.SubprocessError, ValueError) as e:
101
+ UI.error(f"Failed to concat video (id:{post.id}) due to: {e}")
102
+
103
+ # update db
104
+ update_db(fm, medias, post.user, result)
105
+
106
+ progress.advance_total()
107
+
108
+ # remove part file
109
+ for media in medias:
110
+ media_full_path = os.path.join(
111
+ Config.creator_folder(post.user), media.file_path
112
+ )
113
+ try:
114
+ if Config.DEBUG:
115
+ UI.info(f"Skipped '{media_full_path}' removal")
116
+ continue
117
+ os.remove(media_full_path)
118
+ UI.info(f"Removed file '{media_full_path}'")
119
+ except (FileNotFoundError, PermissionError) as e:
120
+ UI.error(
121
+ f"FileNotFound/PermissionError: Failed to "
122
+ f"remove media '{media_full_path}' due to: {e}"
123
+ )
124
+ except OSError as e:
125
+ UI.error(f"Failed to remove media '{media_full_path}' due to: {e}")
126
+
127
+ progress.close()
rcdl/core/models.py ADDED
@@ -0,0 +1,105 @@
1
+ # core/models.py
2
+
3
+ """Hold all dataclass models and enums"""
4
+
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+
8
+
9
+ class Status(Enum):
10
+ """Status for media"""
11
+
12
+ PENDING = "pending" # to be downloaded
13
+ DOWNLOADED = "downloaded" # video has been downloaded
14
+ FUSED = "fused" # video has been fused, and impliitly removed
15
+ TO_BE_DELETED = "to_be_delete" # video has been marked for delete
16
+ DELETED = "deleted" # video has been deleted
17
+ OPTIMIZED = "optimized" # video has been optimized (reduce file size)
18
+
19
+
20
+ class FusedStatus(Enum):
21
+ """Status for fused group"""
22
+
23
+ PENDING = "pending"
24
+ FUSED = "fused"
25
+
26
+
27
+ class CreatorStatus(Enum):
28
+ FAVORITED = "FAVORITED"
29
+ NA = "NA"
30
+
31
+
32
+ @dataclass
33
+ class Post:
34
+ """Post model that shadow post dict response of request
35
+ Partially used in posts db (check db_queries.py)
36
+ """
37
+
38
+ id: str
39
+ user: str
40
+ service: str
41
+ domain: str
42
+ title: str
43
+ substring: str
44
+ published: str
45
+ file: dict
46
+ attachments: list
47
+ json_hash: str
48
+ raw_json: str
49
+ fetched_at: str
50
+
51
+
52
+ @dataclass
53
+ class Media:
54
+ """Media model: use in medias DB"""
55
+
56
+ post_id: str
57
+ service: str
58
+ url: str
59
+ duration: float
60
+ sequence: int
61
+ status: Status
62
+ checksum: str
63
+ file_path: str
64
+ created_at: str
65
+ updated_at: str
66
+ file_size: int
67
+ fail_count: int = 0
68
+
69
+
70
+ @dataclass
71
+ class FusedMedia:
72
+ """Fuses group models.
73
+ Used in fuses db."""
74
+
75
+ id: str
76
+ duration: int
77
+ total_parts: int
78
+ status: FusedStatus
79
+ checksum: str
80
+ file_path: str
81
+ created_at: str
82
+ updated_at: str
83
+ file_size: int
84
+ fail_count: int = 0
85
+
86
+
87
+ @dataclass
88
+ class Creator:
89
+ """Creator model"""
90
+
91
+ id: str
92
+ name: str
93
+ service: str
94
+ domain: str
95
+ indexed: str
96
+ updated: str
97
+ favorited: int
98
+
99
+ status: CreatorStatus
100
+
101
+ # param
102
+ max_size: int
103
+ max_posts: int
104
+ min_date: str
105
+ max_date: str