PyPI - rcdl - Versions diffs - 2.2.2__py3-none-any.whl → 3.0.0b13__py3-none-any.whl - Mend

rcdl 2.2.2py3-none-any.whl → 3.0.0b13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

rcdl/__init__.py +5 -0
rcdl/__main__.py +15 -3
rcdl/core/__init__.py +0 -0
rcdl/core/adapters.py +241 -0
rcdl/core/api.py +31 -9
rcdl/core/config.py +133 -14
rcdl/core/db.py +239 -191
rcdl/core/db_queries.py +75 -44
rcdl/core/downloader.py +184 -142
rcdl/core/downloader_subprocess.py +257 -85
rcdl/core/file_io.py +13 -6
rcdl/core/fuse.py +115 -106
rcdl/core/models.py +83 -34
rcdl/core/opti.py +90 -0
rcdl/core/parser.py +80 -78
rcdl/gui/__init__.py +0 -0
rcdl/gui/__main__.py +5 -0
rcdl/gui/db_viewer.py +41 -0
rcdl/gui/gui.py +54 -0
rcdl/gui/video_manager.py +170 -0
rcdl/interface/__init__.py +0 -0
rcdl/interface/cli.py +100 -20
rcdl/interface/ui.py +105 -116
rcdl/utils.py +163 -5
{rcdl-2.2.2.dist-info → rcdl-3.0.0b13.dist-info}/METADATA +48 -15
rcdl-3.0.0b13.dist-info/RECORD +28 -0
rcdl/scripts/migrate_creators_json_txt.py +0 -37
rcdl/scripts/migrate_old_format_to_db.py +0 -188
rcdl/scripts/upload_pypi.py +0 -98
rcdl-2.2.2.dist-info/RECORD +0 -22
{rcdl-2.2.2.dist-info → rcdl-3.0.0b13.dist-info}/WHEEL +0 -0
{rcdl-2.2.2.dist-info → rcdl-3.0.0b13.dist-info}/entry_points.txt +0 -0

rcdl/core/downloader.py CHANGED Viewed

@@ -1,18 +1,32 @@
 # core/downloader.py
+"""
+Handle post, media download to website
+"""
 import logging
 import os
+import json
 import requests
-import rcdl.core.parser as parser
-from .api import URL
-from .config import Config
-from .models import Creator, Video, VideoStatus
-from .db import DB
-from .downloader_subprocess import ytdlp_subprocess
-from .file_io import write_json, load_json
-from rcdl.interface.ui import UI
+from rcdl.interface.ui import UI, NestedProgress
+from rcdl.core import parser
+from rcdl.core import adapters
+from rcdl.core.api import URL
+from rcdl.core.config import Config
+from rcdl.core.models import (
+    Creator,
+    Status,
+    Media,
+    Post,
+    FusedMedia,
+    FusedStatus,
+)
+from rcdl.core.db import DB
+from rcdl.core.downloader_subprocess import ytdlp_subprocess
+from rcdl.core.file_io import write_json, load_json
+from rcdl.utils import get_date_now, get_media_metadata
 class PostsFetcher:
@@ -33,14 +47,18 @@ class PostsFetcher:
     def _request_page(self, url: str) -> requests.Response:
         """Request a single page and return json dict"""
-        logging.info(f"RequestEngine url {url}")
+        logging.info("RequestEngine url %s", url)
         headers = URL.get_headers()
-        response = requests.get(url, headers=headers)
+        response = requests.get(url, headers=headers, timeout=Config.TIMEOUT)
         if response.status_code != 200:
-            logging.warning(f"Failed request {url}: {requests.status_codes}")
+            logging.warning("Failed request %s: %s", url, response.status_code)
         return response
-    def request(self, params: dict = {}):
+    def request(self, params: dict | None = None):
+        """Request multiple page of an url"""
+        if params is None:
+            params = {}
         with UI.progress_posts_fetcher(self.max_page) as progress:
             task = progress.add_task("Fetching posts", total=self.max_page)
@@ -53,7 +71,7 @@ class PostsFetcher:
                     # Dry run: not request acutally made
                     if Config.DRY_RUN:
                         logging.debug(
-                            f"DRY-RUN posts fetcher {url} -> {self.json_path}"
+                            "DRY-RUN posts fetcher %s -> %s", url, self.json_path
                         )
                         self.page += 1
                         continue
@@ -61,13 +79,15 @@ class PostsFetcher:
                     response = self._request_page(url)
                     self.status = response.status_code
-                    # if the programm crash while doing requests, previous requests are still saved and not overwritten.
+                    # if the programm crash while doing requests,
+                    # previous requests are still saved and not overwritten.
                     if self.page > 0:
                         json_data = list(load_json(self.json_path))
                     else:
                         json_data = []
-                    # for discover command, response json is in a different format and contains 'posts'
+                    # for discover command, response json is in a
+                    # different format and contains 'posts'
                     if self.status == 200:
                         if "posts" in response.json():
                             json_data.extend(response.json()["posts"])
@@ -79,70 +99,116 @@ class PostsFetcher:
                     progress.update(
                         task,
                         advance=1,
-                        description=f"Fetched {len(json_data)} posts (page {self.page + 1}/{self.max_page})",
+                        description=(
+                            f"Fetched {len(json_data)}"
+                            f" posts (page {self.page + 1}/{self.max_page})"
+                        ),
+                    )
+                except requests.RequestException as e:
+                    logging.error(
+                        "Failed to request %s (page: %s) deu to: %s", url, self.page, e
+                    )
+                except json.JSONDecodeError as e:
+                    logging.error(
+                        "Failed to decode JSON response of request %s due to: %s",
+                        url,
+                        e,
                     )
-                except Exception as e:
-                    logging.error(f"Error in request {url} p{self.page}: {e}")
                 finally:
                     self.page += 1
-class VideoDownloader:
-    """Handle downloading a list of Videos and update DB status"""
+class MediaDownloader:
+    """Handle downloading a list of media and update DB status"""
     def __init__(self):
         pass
-    def _build_url(self, video: Video):
-        return URL.get_url_from_file(video.domain, video.url)
+    def _build_url(self, domain: str, url: str):
+        """Return full url"""
+        return URL.get_url_from_file(domain, url)
-    def _build_output_path(self, video: Video, discover: bool = False):
-        if discover:
-            return os.path.join(Config.DISCOVER_DIR, video.relative_path)
+    def _build_full_path(self, user: str, media_path: str):
+        """Return full path"""
+        return os.path.join(Config.creator_folder(user), media_path)
-        return os.path.join(
-            Config.creator_folder(video.creator_id), video.relative_path
-        )
+    def _media_exist(self, full_path: str):
+        """Check a file exist"""
+        return os.path.exists(full_path)
-    def _update_db_status(self, result: int, video: Video):
-        with DB() as d:
-            if result == 0:
-                d.set_status(video, VideoStatus.DOWNLOADED, fail_count=0)
-            else:
-                d.set_status(video, VideoStatus.FAILED, fail_count=video.fail_count + 1)
+    def _update_db(self, result: int, media: Media, full_path: str):
+        """Update db information"""
-    def downloads(
-        self, videos: list[Video], write_db: bool = True, discover: bool = False
-    ):
-        progress, task = UI.video_progress(total=len(videos))
-        try:
-            for video in videos:
-                url = self._build_url(video)
-                filepath = self._build_output_path(video, discover=discover)
-                UI.set_current_video_progress(
-                    f"{video.creator_id}@({video.service})", video.relative_path
-                )
+        # video failed to download
+        if result != 0:
+            media.fail_count += 1
+        else:
+            duration, file_size, checksum = get_media_metadata(full_path)
+            media.duration = duration
+            media.status = Status.DOWNLOADED
+            media.checksum = checksum
+            media.created_at = get_date_now()
+            media.file_size = file_size
-                if Config.DRY_RUN:
-                    UI.debug(f"Dry run: dl {video.creator_id} @ {filepath}")
-                    progress.advance(task)
-                    continue
+        with DB() as db:
+            db.update_media(media)
+    def download(self, medias: list[Media], max_fail_count: int | None = None):
+        """Download all medias in media with PENDING stats"""
+        # init progress bar
+        progress = NestedProgress(UI.console)
+        progress.start(
+            total=len(medias),
+            total_label="Downloading videos",
+            current_label="Current video",
+        )
-                if os.path.exists(filepath):
-                    UI.warning(
-                        f"Video {url} @ {filepath} already exists. Possible DB problem"
-                    )
-                    progress.advance(task)
-                    continue
+        max_try = Config.MAX_FAIL_COUNT
+        if max_fail_count is not None:
+            max_try = max_fail_count
+        for media in medias:
+            progress.start_current("Downloading", total=2)
+            if media.fail_count > max_try:
+                UI.warning(
+                    f"Video skipped due to too many failed download attempt ({media.fail_count})"
+                )
+                progress.advance_total()
+                continue
+            # match post info from db with post_id to get user/creator_id
+            with DB() as db:
+                post = db.query_post_by_id(media.post_id)
+            if post is None:
+                UI.error(f"Could not match media post_id {media.post_id} with a post")
+                progress.advance_total()
+                continue
+            # build full url and full path
+            url = self._build_url(post.domain, media.url)
+            full_path = self._build_full_path(post.user, media.file_path)
+            # update progress bar info (video in download info)
+            progress.set_status(f"{post.user}@({post.service}) -> ", media.file_path)
+            # check video does not already exist
+            if self._media_exist(full_path):
+                UI.warning(
+                    f"Video {url} @ {full_path} already exists. Possible DB problem"
+                )
+                self._update_db(0, media, full_path)
+                progress.advance_total()
+                continue
-                result = ytdlp_subprocess(url, filepath)
-                if write_db:
-                    self._update_db_status(result, video)
+            # dry run: no actual download, skippe rest of fn
+            if Config.DRY_RUN:
+                UI.debug(f"(dry-run) dl  {post.user}@{full_path} from {url}")
+                progress.advance_total()
+                continue
-                progress.advance(task)
-        finally:
-            UI.close_video_progress()
+            result = ytdlp_subprocess(url, full_path)
+            self._update_db(result, media, full_path)
+            progress.advance_total()
+        progress.close()
 def fetch_posts_by_tag(tag: str, max_page: int = Config.DEFAULT_MAX_PAGE) -> dict:
@@ -158,108 +224,84 @@ def fetch_posts_by_tag(tag: str, max_page: int = Config.DEFAULT_MAX_PAGE) -> dic
 def fetch_posts_by_creator(creator: Creator) -> dict:
     """Helper function to get all posts from a creator"""
     url = URL.get_creator_post_wo_param(creator)
-    path = Config.cache_file(f"{creator.creator_id}_{creator.service}")
+    path = Config.cache_file(f"{creator.id}_{creator.service}")
     pf = PostsFetcher(url, str(path))
     pf.request()
     return load_json(path)
+def get_fuses_from_post(posts: list[Post]) -> list[FusedMedia]:
+    """Update data on fuses database table for video to be fused"""
+    fuses: list[FusedMedia] = []
+    for post in posts:
+        json_post = json.loads(post.raw_json)
+        total_parts = len(parser.extract_video_urls(json_post))
+        if total_parts > 1:
+            fuses.append(
+                FusedMedia(
+                    id=post.id,
+                    duration=0,
+                    total_parts=total_parts,
+                    status=FusedStatus.PENDING,
+                    checksum="",
+                    file_path=parser.get_filename_fuse(post),
+                    created_at="",
+                    updated_at="",
+                    file_size=0,
+                    fail_count=0,
+                )
+            )
+    return fuses
 def refresh_creators_videos():
     """
-    Command refresh
     For each creator:
-        - get all posts to a .json
-        - from the .json filter to keep only the posts with videos in it
-        - convert posts dict to Videos
-        - update the DB
+        - get posts with videos & update posts DB
+        - extract all medias & update medias DB
+        - extract fuses group & update fuses DB
     """
     creators = parser.get_creators()
     for creator in creators:
-        UI.info(f"Creator {creator.creator_id} from {creator.service}")
+        UI.info(f"Creator {creator.id} from {creator.service}")
+        # request all posts by creator
         fetch_posts_by_creator(creator)
+        # only keep posts with video url (mp4, m4v, ...)
         posts_with_videos = parser.filter_posts_with_videos_from_json(
-            str(Config.cache_file(f"{creator.creator_id}_{creator.service}"))
+            str(Config.cache_file(f"{creator.id}_{creator.service}"))
         )
-        all_videos = parser.convert_posts_to_videos(posts_with_videos)
-        UI.info(
-            f"Found {len(all_videos)} videos from {len(posts_with_videos)} posts with videos url"
-        )
+        # convert all json dict into Post model
+        posts = adapters.json_posts_to_posts(posts_with_videos)
-        # put all videos in db
+        # insert posts in db
         with DB() as db:
-            db.insert_videos(all_videos)
-def download_videos_to_be_dl():
-    """
-    Command dlsf
-    Download videos in db with status TO_BE_DOWNLOADED OR (FAILED & fail_count < Config.)
-    """
-    with DB() as db:
-        videos = db.query_videos(pending=True)
-    vd = VideoDownloader()
-    vd.downloads(videos, write_db=True, discover=False)
-# --- --- --- --- --- DISCOVER --- --- --- --- ---
-def discover(tag: str, max_page: int):
-    discover_creators(tag, max_page)
-    dl_video_from_discover_creators()
+            db.insert_posts(posts)
+        # find all multiple part videos and update db
+        fuses = get_fuses_from_post(posts)
+        with DB() as db:
+            db.insert_fused_media(fuses)
-def discover_creators(tag: str, max_page: int):
-    # download posts with searched tags
-    posts = fetch_posts_by_tag(tag, max_page)
-    logging.info(f"Find {len(posts)} post")
-    path = str(Config.cache_file(tag))
-    posts_with_videos = parser.filter_posts_with_videos_from_json(path)
-    logging.info(f"Find {len(posts_with_videos)} posts with videos")
-    creators = parser.get_creators_from_posts(posts_with_videos)
-    # save to csv
-    file = os.path.join(Config.DISCOVER_DIR, "discover.csv")
-    with open(file, "w") as f:
-        for c in creators:
-            line = f"{c.creator_id};{c.service};{c.domain};{'to_be_treated'}\n"
-            f.write(line)
+        # convert all posts into videos
+        medias = []
+        for post in posts:
+            medias.extend(adapters.post_to_videos(post))
-def dl_video_from_discover_creators():
-    # load csv
-    file = os.path.join(Config.DISCOVER_DIR, "discover.csv")
-    with open(file, "r") as f:
-        lines = f.readlines()
+        # insert videos in db
+        with DB() as db:
+            db.insert_medias(medias)
-    creators = []
-    for line in lines:
-        line = line.replace("\n", "").strip().split(";")
-        creators.append(
-            Creator(creator_id=line[0], service=line[1], domain=line[2], status=line[3])
-        )
-    # get posts
-    for creator in creators:
-        response = requests.get(
-            URL.get_creator_post_wo_param(creator), headers=URL.get_headers()
-        )
-        if response.status_code != 200:
-            print(f"ERROR - Request {URL.get_creator_post_wo_param(creator)}")
-        response_posts = response.json()
-        posts = parser.filter_posts_with_videos_from_list(response_posts)
-        print(f"{len(posts)} found")
-        if len(posts) > 5:
-            posts = posts[0:5]
-            print("Limited posts to 5")
+def download_videos_to_be_dl(max_fail_count: int | None):
+    """
+    Download all media with PENDING status in DB
+    """
+    with DB() as db:
+        medias = db.query_media_by_status(Status.PENDING)
-        for post in posts:
-            urls = parser.extract_video_urls(post)
-            url = URL.get_url_from_file(creator.domain, urls[0])
-            filename = f"{post['user']}_{post['id']}.mp4"
-            filepath = os.path.join(Config.DISCOVER_DIR, filename)
-            ytdlp_subprocess(url, filepath)
+    media_downloader = MediaDownloader()
+    media_downloader.download(medias, max_fail_count=max_fail_count)

rcdl 2.2.2__py3-none-any.whl → 3.0.0b13__py3-none-any.whl

rcdl 2.2.2py3-none-any.whl → 3.0.0b13py3-none-any.whl