PyPI - rcdl - Versions diffs - 2.2.2__py3-none-any.whl - Mend

rcdl 2.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

rcdl/__init__.py +5 -0
rcdl/__main__.py +25 -0
rcdl/core/api.py +54 -0
rcdl/core/config.py +93 -0
rcdl/core/db.py +235 -0
rcdl/core/db_queries.py +66 -0
rcdl/core/downloader.py +265 -0
rcdl/core/downloader_subprocess.py +190 -0
rcdl/core/file_io.py +34 -0
rcdl/core/fuse.py +118 -0
rcdl/core/models.py +56 -0
rcdl/core/parser.py +280 -0
rcdl/interface/cli.py +136 -0
rcdl/interface/ui.py +193 -0
rcdl/scripts/migrate_creators_json_txt.py +37 -0
rcdl/scripts/migrate_old_format_to_db.py +188 -0
rcdl/scripts/upload_pypi.py +98 -0
rcdl/utils.py +11 -0
rcdl-2.2.2.dist-info/METADATA +89 -0
rcdl-2.2.2.dist-info/RECORD +22 -0
rcdl-2.2.2.dist-info/WHEEL +4 -0
rcdl-2.2.2.dist-info/entry_points.txt +3 -0

rcdl/core/downloader.py ADDED Viewed

@@ -0,0 +1,265 @@
+# core/downloader.py
+import logging
+import os
+import requests
+import rcdl.core.parser as parser
+from .api import URL
+from .config import Config
+from .models import Creator, Video, VideoStatus
+from .db import DB
+from .downloader_subprocess import ytdlp_subprocess
+from .file_io import write_json, load_json
+from rcdl.interface.ui import UI
+class PostsFetcher:
+    """
+    Fetch posts from api. Save as JSON. Handle multiple pages requests
+    """
+    def __init__(
+        self, url: str, json_path: str, max_page: int = Config.DEFAULT_MAX_PAGE
+    ):
+        self.url = url
+        self.json_path = json_path
+        self.page = 0
+        self.max_page = max_page
+        self.status = 200
+    def _request_page(self, url: str) -> requests.Response:
+        """Request a single page and return json dict"""
+        logging.info(f"RequestEngine url {url}")
+        headers = URL.get_headers()
+        response = requests.get(url, headers=headers)
+        if response.status_code != 200:
+            logging.warning(f"Failed request {url}: {requests.status_codes}")
+        return response
+    def request(self, params: dict = {}):
+        with UI.progress_posts_fetcher(self.max_page) as progress:
+            task = progress.add_task("Fetching posts", total=self.max_page)
+            while self.status == 200 and self.page < self.max_page:
+                o = self.page * Config.POST_PER_PAGE
+                params["o"] = o
+                url = URL.add_params(self.url, params)
+                try:
+                    # Dry run: not request acutally made
+                    if Config.DRY_RUN:
+                        logging.debug(
+                            f"DRY-RUN posts fetcher {url} -> {self.json_path}"
+                        )
+                        self.page += 1
+                        continue
+                    response = self._request_page(url)
+                    self.status = response.status_code
+                    # if the programm crash while doing requests, previous requests are still saved and not overwritten.
+                    if self.page > 0:
+                        json_data = list(load_json(self.json_path))
+                    else:
+                        json_data = []
+                    # for discover command, response json is in a different format and contains 'posts'
+                    if self.status == 200:
+                        if "posts" in response.json():
+                            json_data.extend(response.json()["posts"])
+                        else:
+                            json_data.extend(response.json())
+                        write_json(self.json_path, json_data, mode="w")
+                    progress.update(
+                        task,
+                        advance=1,
+                        description=f"Fetched {len(json_data)} posts (page {self.page + 1}/{self.max_page})",
+                    )
+                except Exception as e:
+                    logging.error(f"Error in request {url} p{self.page}: {e}")
+                finally:
+                    self.page += 1
+class VideoDownloader:
+    """Handle downloading a list of Videos and update DB status"""
+    def __init__(self):
+        pass
+    def _build_url(self, video: Video):
+        return URL.get_url_from_file(video.domain, video.url)
+    def _build_output_path(self, video: Video, discover: bool = False):
+        if discover:
+            return os.path.join(Config.DISCOVER_DIR, video.relative_path)
+        return os.path.join(
+            Config.creator_folder(video.creator_id), video.relative_path
+        )
+    def _update_db_status(self, result: int, video: Video):
+        with DB() as d:
+            if result == 0:
+                d.set_status(video, VideoStatus.DOWNLOADED, fail_count=0)
+            else:
+                d.set_status(video, VideoStatus.FAILED, fail_count=video.fail_count + 1)
+    def downloads(
+        self, videos: list[Video], write_db: bool = True, discover: bool = False
+    ):
+        progress, task = UI.video_progress(total=len(videos))
+        try:
+            for video in videos:
+                url = self._build_url(video)
+                filepath = self._build_output_path(video, discover=discover)
+                UI.set_current_video_progress(
+                    f"{video.creator_id}@({video.service})", video.relative_path
+                )
+                if Config.DRY_RUN:
+                    UI.debug(f"Dry run: dl {video.creator_id} @ {filepath}")
+                    progress.advance(task)
+                    continue
+                if os.path.exists(filepath):
+                    UI.warning(
+                        f"Video {url} @ {filepath} already exists. Possible DB problem"
+                    )
+                    progress.advance(task)
+                    continue
+                result = ytdlp_subprocess(url, filepath)
+                if write_db:
+                    self._update_db_status(result, video)
+                progress.advance(task)
+        finally:
+            UI.close_video_progress()
+def fetch_posts_by_tag(tag: str, max_page: int = Config.DEFAULT_MAX_PAGE) -> dict:
+    """Helper function to get all posts from a search results"""
+    url = URL.get_posts_page_url_wo_param()
+    path = Config.cache_file(tag)
+    pf = PostsFetcher(url, str(path), max_page=max_page)
+    pf.request(params={"tag": tag})
+    return load_json(path)
+def fetch_posts_by_creator(creator: Creator) -> dict:
+    """Helper function to get all posts from a creator"""
+    url = URL.get_creator_post_wo_param(creator)
+    path = Config.cache_file(f"{creator.creator_id}_{creator.service}")
+    pf = PostsFetcher(url, str(path))
+    pf.request()
+    return load_json(path)
+def refresh_creators_videos():
+    """
+    Command refresh
+    For each creator:
+        - get all posts to a .json
+        - from the .json filter to keep only the posts with videos in it
+        - convert posts dict to Videos
+        - update the DB
+    """
+    creators = parser.get_creators()
+    for creator in creators:
+        UI.info(f"Creator {creator.creator_id} from {creator.service}")
+        fetch_posts_by_creator(creator)
+        posts_with_videos = parser.filter_posts_with_videos_from_json(
+            str(Config.cache_file(f"{creator.creator_id}_{creator.service}"))
+        )
+        all_videos = parser.convert_posts_to_videos(posts_with_videos)
+        UI.info(
+            f"Found {len(all_videos)} videos from {len(posts_with_videos)} posts with videos url"
+        )
+        # put all videos in db
+        with DB() as db:
+            db.insert_videos(all_videos)
+def download_videos_to_be_dl():
+    """
+    Command dlsf
+    Download videos in db with status TO_BE_DOWNLOADED OR (FAILED & fail_count < Config.)
+    """
+    with DB() as db:
+        videos = db.query_videos(pending=True)
+    vd = VideoDownloader()
+    vd.downloads(videos, write_db=True, discover=False)
+# --- --- --- --- --- DISCOVER --- --- --- --- ---
+def discover(tag: str, max_page: int):
+    discover_creators(tag, max_page)
+    dl_video_from_discover_creators()
+def discover_creators(tag: str, max_page: int):
+    # download posts with searched tags
+    posts = fetch_posts_by_tag(tag, max_page)
+    logging.info(f"Find {len(posts)} post")
+    path = str(Config.cache_file(tag))
+    posts_with_videos = parser.filter_posts_with_videos_from_json(path)
+    logging.info(f"Find {len(posts_with_videos)} posts with videos")
+    creators = parser.get_creators_from_posts(posts_with_videos)
+    # save to csv
+    file = os.path.join(Config.DISCOVER_DIR, "discover.csv")
+    with open(file, "w") as f:
+        for c in creators:
+            line = f"{c.creator_id};{c.service};{c.domain};{'to_be_treated'}\n"
+            f.write(line)
+def dl_video_from_discover_creators():
+    # load csv
+    file = os.path.join(Config.DISCOVER_DIR, "discover.csv")
+    with open(file, "r") as f:
+        lines = f.readlines()
+    creators = []
+    for line in lines:
+        line = line.replace("\n", "").strip().split(";")
+        creators.append(
+            Creator(creator_id=line[0], service=line[1], domain=line[2], status=line[3])
+        )
+    # get posts
+    for creator in creators:
+        response = requests.get(
+            URL.get_creator_post_wo_param(creator), headers=URL.get_headers()
+        )
+        if response.status_code != 200:
+            print(f"ERROR - Request {URL.get_creator_post_wo_param(creator)}")
+        response_posts = response.json()
+        posts = parser.filter_posts_with_videos_from_list(response_posts)
+        print(f"{len(posts)} found")
+        if len(posts) > 5:
+            posts = posts[0:5]
+            print("Limited posts to 5")
+        for post in posts:
+            urls = parser.extract_video_urls(post)
+            url = URL.get_url_from_file(creator.domain, urls[0])
+            filename = f"{post['user']}_{post['id']}.mp4"
+            filepath = os.path.join(Config.DISCOVER_DIR, filename)
+            ytdlp_subprocess(url, filepath)

rcdl/core/downloader_subprocess.py ADDED Viewed

@@ -0,0 +1,190 @@
+# core/downloader_subprocess.py
+import subprocess
+import logging
+from pathlib import Path
+import os
+from rcdl.core.models import Video
+from rcdl.core.config import Config
+from rcdl.interface.ui import UI
+def ytdlp_subprocess(
+    url: str,
+    filepath: Path | str,
+):
+    """Call yt-dlp in a subprocess"""
+    cmd = [
+        "yt-dlp",
+        "-q",
+        "--progress",
+        url,
+        "-o",
+        filepath,
+        "--external-downloader",
+        "aria2c",
+    ]
+    logging.info(f"CMD: {' '.join(cmd)}")
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        logging.error(f"yt-dlp failed to dl vid: {result.stderr}")
+    return result.returncode
+def ffmpeg_concat_build_command(videos: list[Video]) -> dict:
+    # parameters
+    width: int = 1920
+    height: int = 1080
+    fps: int = 30
+    preset: str = "veryfast"
+    threads: int = 0  # 0 for max
+    # output path
+    v = videos[0]
+    output_filename = f"tmp_{v.published}_{v.title}.mp4"
+    output_path = os.path.join(Config.creator_folder(v.creator_id), output_filename)
+    # build cmd
+    cmd = ["ffmpeg", "-y", "-progress", "pipe:2", "-nostats"]
+    # inputs
+    for v in videos:
+        input_path = os.path.join(Config.creator_folder(v.creator_id), v.relative_path)
+        cmd.extend(["-i", input_path])
+    # filter complex
+    filter_lines = []
+    for idx in range(len(videos)):
+        filter_lines.append(
+            f"[{idx}:v]"
+            f"scale={width}:{height}:force_original_aspect_ratio=decrease,"
+            f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2,"
+            f"fps={fps},setsar=1"
+            f"[v{idx}]"
+        )
+    # concat inputs
+    concat = []
+    for idx in range(len(videos)):
+        concat.append(f"[v{idx}][{idx}:a]")
+    filter_lines.append(f"{''.join(concat)}concat=n={len(videos)}:v=1:a=1[outv][outa]")
+    filter_complex = ";".join(filter_lines)
+    cmd.extend(
+        [
+            "-filter_complex",
+            filter_complex,
+            "-map",
+            "[outv]",
+            "-map",
+            "[outa]",
+            "-c:v",
+            "libx264",
+            "-preset",
+            preset,
+            "-threads",
+            str(threads),
+            "-c:a",
+            "aac",
+            "-f",
+            "mp4",
+            output_path,
+        ]
+    )
+    return {"cmd": cmd, "output_path": output_path}
+def get_duration(path: str) -> int:
+    cmd = [
+        "ffprobe",
+        "-v",
+        "error",
+        "-select_streams",
+        "v:0",
+        "-show_entries",
+        "format=duration",
+        "-of",
+        "default=noprint_wrappers=1:nokey=1",
+        path,
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+    return int(float(result.stdout.strip()) * 1000)
+def get_total_duration(videos: list[Video]) -> int:
+    duration = 0
+    for v in videos:
+        path = os.path.join(Config.creator_folder(v.creator_id), v.relative_path)
+        duration += get_duration(path)
+    return duration
+def ffmpeg_concat(videos: list[Video]):
+    command_builder = ffmpeg_concat_build_command(videos)
+    cmd = command_builder["cmd"]
+    output_path = command_builder["output_path"]
+    logging.info(f"CMD: {' '.join(cmd)}")
+    ffmpeg_log = Config.CACHE_DIR / "ffmpeg.log"
+    with open(ffmpeg_log, "w", encoding="utf-8") as log_file:
+        print(cmd, file=log_file)
+        # run cmd
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=1,
+        )
+        assert process.stderr is not None
+        total_duration = get_total_duration(videos)
+        progress, task = UI.concat_progress(total=total_duration)
+        last_progress = 0
+        UI.set_current_concat_progress(f"{videos[0].relative_path}", output_path)
+        for line in process.stderr:
+            line = line.strip()
+            if not line:
+                continue
+            print(line, file=log_file)
+            progres_key = "out_time_ms"
+            if line.startswith(progres_key):
+                current_progress_str = line.replace(f"{progres_key}=", "").strip()
+                try:
+                    current_progress_us = int(current_progress_str)
+                    current_progress_ms = current_progress_us // 1000
+                    delta = current_progress_ms - last_progress
+                    progress.advance(task, advance=delta)
+                    last_progress = current_progress_ms
+                except Exception:
+                    pass
+        process.wait()
+        UI.close_concat_progress()
+    if process.returncode != 0:
+        UI.error(f"Failed to concat videos. See ffmpeg log file {ffmpeg_log}")
+        with open(ffmpeg_log, "r") as f:
+            lines = f.readlines()
+        logging.error("---FFMPEG LOG---")
+        for line in lines:
+            logging.error(line)
+        logging.error("---END FFMPEG LOG---")
+        return process.returncode
+    temp_output_path = output_path
+    new_output_path = temp_output_path.replace("tmp_", "")
+    os.replace(temp_output_path, new_output_path)
+    UI.info(f"Rename {output_path} -> {output_path}")
+    return 0

rcdl/core/file_io.py ADDED Viewed

@@ -0,0 +1,34 @@
+# core/file_io.py
+import json
+def write_json(path, data, mode="w"):
+    with open(path, mode) as f:
+        json.dump(data, f, indent=4)
+def load_json(path) -> dict:
+    with open(path, "r") as f:
+        data = json.load(f)
+    return data
+def load_txt(path) -> list[str]:
+    with open(path, "r") as f:
+        lines = f.readlines()
+    for i in range(len(lines)):
+        lines[i] = lines[i].strip()
+    return lines
+def write_txt(path, lines: list[str] | str, mode: str = "a"):
+    if isinstance(lines, str):
+        lines = [lines]
+    with open(path, mode) as f:
+        for line in lines:
+            if not line.endswith("\n"):
+                f.write(line + "\n")
+            else:
+                f.write(line)

rcdl/core/fuse.py ADDED Viewed

@@ -0,0 +1,118 @@
+# core/fuse.py
+import os
+from rcdl.interface.ui import UI
+from rcdl.core.db import DB
+from rcdl.core.models import VideoStatus
+from rcdl.core.config import Config
+from rcdl.core.downloader_subprocess import ffmpeg_concat
+def fuse_videos():
+    """Fuse videos"""
+    allowed_status = [
+        VideoStatus.DOWNLOADED,
+        VideoStatus.CONCAT_WIP,
+        VideoStatus.CONCAT_FAILED,
+    ]
+    if Config.DEBUG:
+        allowed_status.append(VideoStatus.DOWNLOADED)
+    # load db videos
+    with DB() as db:
+        videos = db.query_videos(status=allowed_status, min_part_number=1)
+    # get unique posts id
+    posts_ids = set()
+    for video in videos:
+        posts_ids.add(video.post_id)
+    with UI.progress_total_concat() as progress:
+        task = progress.add_task("Total concat", total=len(posts_ids))
+        for post_id in posts_ids:
+            UI.info(f"Looking at post_id: {post_id}")
+            # get all videos with same post_id
+            with DB() as db:
+                videos = db.query_videos(post_id=post_id)
+            if not videos:
+                UI.error("Query SQL Failed.")
+                progress.update(task, advance=1)
+                continue
+            # check each videos of the same post is fully downloaded
+            ok = True
+            for video in videos:
+                if video.status not in allowed_status:
+                    ok = False
+                    break
+            if not ok:
+                progress.update(task, advance=1)
+                continue
+            # sort by part number
+            videos.sort(key=lambda v: int(v.part))
+            ok = True
+            for video in videos:
+                # make sure video exist
+                path = os.path.join(
+                    Config.creator_folder(video.creator_id), video.relative_path
+                )
+                if not os.path.exists(path):
+                    UI.error(f"Video @ {path} does not exists")
+                    ok = False
+                # if status is concat WIP, these should not be possible
+                if video.status == VideoStatus.CONCAT_WIP:
+                    UI.warning(
+                        f"Video '{video.relative_path}' has status 'CONCAT_WIP'. This is a bug and should not be possible."
+                    )
+            # update videos status in db to CONCAT_WIP
+            # in case of problems in the scripts, we will know
+            with DB() as db:
+                for video in videos:
+                    db.set_status(video, VideoStatus.CONCAT_WIP)
+            result = 1
+            try:
+                result = ffmpeg_concat(videos)
+            except Exception as e:
+                UI.error(f"Failed concat due to: {e}")
+            # concat failed
+            if not result == 0:
+                with DB() as db:
+                    for video in videos:
+                        db.set_status(video, VideoStatus.CONCAT_FAILED)
+                continue
+            # concat succeeded
+            with DB() as db:
+                for video in videos:
+                    db.set_status(video, VideoStatus.CONCAT_DONE)
+            # update progress bar
+            progress.update(
+                task,
+                advance=1,
+                description=f"Concated videos for post id {post_id}",
+            )
+            # remove part video if concat OK
+            with DB() as db:
+                for video in videos:
+                    path = os.path.join(
+                        Config.creator_folder(video.creator_id), video.relative_path
+                    )
+                    try:
+                        os.remove(path)
+                        UI.info(f"Removed {path}")
+                        db.set_status(video, VideoStatus.REMOVED)
+                    except Exception as e:
+                        UI.error(f"Failed to remove {path} due to error: {e}")

rcdl/core/models.py ADDED Viewed

@@ -0,0 +1,56 @@
+# core/models.py
+from dataclasses import dataclass
+from typing import Optional
+from enum import Enum
+class VideoStatus(Enum):
+    NOT_DOWNLOADED = "not_downloaded"
+    DOWNLOADED = "downloaded"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+    IGNORED = "ignored"
+    REMOVED = "removed"
+    CONCAT_WIP = "concat_wip"  # concat in progress
+    CONCAT_DONE = "concat_done"
+    CONCAT_FAILED = "concat_failed"
+class DiscoverStatus(Enum):
+    TO_BE_TREATED = "to_be_treated"
+    DOWNLOADED = "downloaded"
+    BLACKLISTED = "blacklisted"
+    WHITELSITED = "whitelisted"
+    DOWNLOAD_MORE = "download_more"
+@dataclass
+class Creator:
+    creator_id: str
+    service: str
+    domain: str
+    status: Optional[str]
+@dataclass
+class Video:
+    # important fields
+    post_id: str
+    creator_id: str
+    service: str
+    domain: str
+    relative_path: str
+    url: str
+    part: int = 0
+    # metadata
+    published: Optional[str] = None
+    title: Optional[str] = None
+    substring: Optional[str] = None
+    downloaded_at: Optional[str] = None
+    file_size: Optional[float] = None
+    # status in cdl
+    status: Optional[VideoStatus] = None
+    fail_count: int = 0