PyPI - rcdl - Versions diffs - 3.0.0b18__py3-none-any.whl - Mend

rcdl 3.0.0b18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rcdl might be problematic. Click here for more details.

Files changed (28) hide show

rcdl/__init__.py +10 -0
rcdl/__main__.py +37 -0
rcdl/core/__init__.py +0 -0
rcdl/core/adapters.py +241 -0
rcdl/core/api.py +76 -0
rcdl/core/config.py +212 -0
rcdl/core/db.py +283 -0
rcdl/core/db_queries.py +97 -0
rcdl/core/downloader.py +307 -0
rcdl/core/downloader_subprocess.py +366 -0
rcdl/core/file_io.py +41 -0
rcdl/core/fuse.py +127 -0
rcdl/core/models.py +105 -0
rcdl/core/opti.py +90 -0
rcdl/core/parser.py +282 -0
rcdl/gui/__init__.py +0 -0
rcdl/gui/__main__.py +5 -0
rcdl/gui/db_viewer.py +41 -0
rcdl/gui/gui.py +54 -0
rcdl/gui/video_manager.py +170 -0
rcdl/interface/__init__.py +0 -0
rcdl/interface/cli.py +216 -0
rcdl/interface/ui.py +194 -0
rcdl/utils.py +180 -0
rcdl-3.0.0b18.dist-info/METADATA +122 -0
rcdl-3.0.0b18.dist-info/RECORD +28 -0
rcdl-3.0.0b18.dist-info/WHEEL +4 -0
rcdl-3.0.0b18.dist-info/entry_points.txt +3 -0

rcdl/core/db.py ADDED Viewed

@@ -0,0 +1,283 @@
+# core/db.py
+"""
+Handle SQL Database
+"""
+import sqlite3
+from rcdl.core import adapters
+from rcdl.core import db_queries as queries
+from rcdl.core.config import Config
+from rcdl.core.models import Post, Media, Status, FusedMedia, FusedStatus
+from rcdl.utils import get_date_now
+from rcdl.interface.ui import UI
+class DB:
+    """Handle all sqlite database command"""
+    def __init__(self):
+        self.conn = sqlite3.connect(Config.DB_PATH)
+        self.conn.row_factory = sqlite3.Row
+    def __enter__(self):
+        """necessary to use with openDB()"""
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        """necessary to use with openDB()"""
+        self.close()
+    def close(self):
+        """Properly close database"""
+        self.conn.close()
+    def init_database(self):
+        """Create tables (posts, fuses, medias) if they dont exist"""
+        self.conn.execute(queries.CREATE_POSTS_TABLE)
+        self.conn.execute(queries.CREATE_MEDIAS_TABLE)
+        self.conn.execute(queries.CREATE_FUSE_TABLE)
+        self.conn.commit()
+    def get_nb_per_status(self):
+        """Return an info dict per tables with number of entry per status
+        info['tables1']['status1'] = X
+        ...
+        """
+        info = {}
+        info["medias"] = {}
+        info["fuses"] = {}
+        info["posts"] = 0
+        for status in Status:
+            info["medias"][status] = len(self.query_media_by_status(status))
+        for status in FusedStatus:
+            info["fuses"][status] = len(self.query_fuses_by_status(status))
+        cur = self.conn.execute(("SELECT COUNT(*) AS count FROM posts"))
+        row = cur.fetchone()
+        info["posts"] = row["count"] if row else 0
+        return info
+    def query_post_by_id(self, _id: str) -> Post | None:
+        """Get a post from his post id"""
+        row = self.conn.execute(queries.QUERY_POST_ID, (_id,)).fetchone()
+        UI.debug(f"{queries.QUERY_POST_ID} {_id} returned {row}")
+        if row is None:
+            return None
+        return adapters.row_to_post(row)
+    def query_post_by_user(self, user: str) -> list[Post]:
+        """Get all posts of a user"""
+        cur = self.conn.cursor()
+        cur.execute(queries.QUERY_POST_USER, (user,))
+        rows = cur.fetchall()
+        UI.debug(f"{queries.QUERY_POST_USER} {user} returned {len(rows)} results")
+        return adapters.rows_to_posts(rows)
+    def query_media_by_status(self, status: Status) -> list[Media]:
+        """Get all medias with specified status"""
+        rows = self.conn.execute(queries.QUERY_MEDIA_STATUS, (status.value,)).fetchall()
+        UI.debug(
+            f"{queries.QUERY_MEDIA_STATUS} {status.value} returned {len(rows)} result"
+        )
+        return adapters.rows_to_medias(rows)
+    def query_medias_by_status_sorted(
+        self,
+        status: Status | list[Status],
+        sort_by: str | None = None,
+        ascending: bool = True,
+    ) -> list[Media]:
+        """Get all medias with specified status (one or multiple)
+        Return them sorted by column and asc or desc"""
+        # validate sort column
+        valid_columns = {
+            "id",
+            "post_id",
+            "service",
+            "url",
+            "duration",
+            "sequence",
+            "status",
+            "checksum",
+            "file_path",
+            "created_at",
+            "updated_at",
+            "file_size",
+            "fail_count",
+        }
+        order_clause = ""
+        if sort_by:
+            if sort_by not in valid_columns:
+                UI.error(f"Invalid sort column: {sort_by}")
+            order_clause = f"ORDER BY {sort_by} {'ASC' if ascending else 'DESC'}"
+        # status filter
+        if isinstance(status, Status):
+            status = [status]
+        status_values = [s.value if isinstance(s, Status) else s for s in status]
+        placeholders = ", ".join("?" for _ in status_values)
+        sql = f"SELECT * FROM medias WHERE status IN ({placeholders}) {order_clause}"
+        rows = self.conn.execute(sql, status_values).fetchall()
+        UI.debug(
+            f"Queried medias with status={status_values}, sorted by {sort_by}, ascending={ascending}, {len(rows)} results"
+        )
+        return adapters.rows_to_medias(rows)
+    def query_media_by_post_id(self, _id: str) -> list[Media]:
+        """Get all medias from the same post by post id"""
+        rows = self.conn.execute(queries.QUERY_MEDIA_ID, (_id,)).fetchall()
+        UI.debug(f"{queries.QUERY_MEDIA_ID} {_id} returned {len(rows)} result")
+        return adapters.rows_to_medias(rows)
+    def query_fuses_by_status(self, status: FusedStatus) -> list[FusedMedia]:
+        """Get all fused_media with specified status"""
+        rows = self.conn.execute(queries.QUERY_FUSES_STATUS, (status.value,)).fetchall()
+        UI.debug(
+            f"{queries.QUERY_FUSES_STATUS} {status.value} returned {len(rows)} result"
+        )
+        return adapters.rows_to_fuses(rows)
+    def query_fuses_by_id(self, _id: str) -> FusedMedia | None:
+        """Get a fuse group by its unique post id"""
+        row = self.conn.execute(queries.QUERY_FUSES_ID, (_id,)).fetchone()
+        UI.debug(f"{queries.QUERY_FUSES_ID} {_id} returned {row} result")
+        return adapters.row_to_fused_media(row)
+    def insert_posts(self, posts: list[Post] | Post):
+        """Add post to DB if it does not already exist (UNIQUE post_id)"""
+        if isinstance(posts, Post):
+            posts = [posts]
+        values = []
+        for post in posts:
+            values.append(
+                (
+                    post.id,
+                    post.user,
+                    post.service,
+                    post.domain,
+                    post.published,
+                    post.json_hash,
+                    post.raw_json,
+                    post.fetched_at,
+                )
+            )
+        with self.conn:
+            self.conn.executemany(queries.INSERT_POST, values)
+        inserted = self.conn.total_changes
+        UI.debug(f"Inserted {inserted} new posts out of {len(posts)} total posts")
+    def insert_medias(self, medias: list[Media] | Media):
+        """Insert media into the db if it does not already exist (UNIQUE post_id, url)"""
+        if isinstance(medias, Media):
+            medias = [medias]
+        values = []
+        for media in medias:
+            values.append(
+                (
+                    media.post_id,
+                    media.service,
+                    media.url,
+                    media.duration,
+                    media.sequence,
+                    media.status.value,
+                    media.checksum,
+                    media.file_path,
+                    media.created_at,
+                    get_date_now(),
+                    media.file_size,
+                    media.fail_count,
+                )
+            )
+        with self.conn:
+            self.conn.executemany(queries.INSERT_MEDIA, values)
+        inserted = self.conn.total_changes
+        UI.debug(f"Inserted {inserted} new media out of {len(medias)} total medias")
+    def update_media(self, media: Media):
+        """Update media entry in the db. Found it by post_id & url, and update:
+        - duration, file_size, checksum, status, create_at, updated_at, fail_count"""
+        params = (
+            media.duration,
+            media.file_size,
+            media.checksum,
+            media.status.value,
+            media.created_at,
+            get_date_now(),
+            media.fail_count,
+            media.post_id,
+            media.url,
+        )
+        with self.conn:
+            self.conn.execute(queries.UPDATE_MEDIA, params)
+            UI.debug(f"Updated media {media.post_id} / {media.url}")
+    def insert_fused_media(self, fuses: list[FusedMedia] | FusedMedia):
+        """Insert fused_media into the db if it does not already exist (UNIQUE post_id)"""
+        if isinstance(fuses, FusedMedia):
+            fuses = [fuses]
+        values = []
+        for fuse in fuses:
+            values.append(
+                (
+                    fuse.id,
+                    fuse.duration,
+                    fuse.total_parts,
+                    fuse.status.value,
+                    fuse.checksum,
+                    fuse.file_path,
+                    fuse.created_at,
+                    get_date_now(),
+                    fuse.file_size,
+                    fuse.fail_count,
+                )
+            )
+        with self.conn:
+            self.conn.executemany(queries.INSERT_FUSED_MEDIA, values)
+        inserted = self.conn.total_changes
+        UI.debug(
+            f"Inserted {inserted} new fused_media out of {len(fuses)} total fused_media"
+        )
+    def update_fuse(self, fuse: FusedMedia):
+        """Update fuse group: duration, status, checksum,
+        created_at, updated_at, file_size, fail_count
+        """
+        params = (
+            fuse.duration,
+            fuse.status.value,
+            fuse.checksum,
+            fuse.created_at,
+            get_date_now(),
+            fuse.file_size,
+            fuse.fail_count,
+            fuse.id,
+        )
+        with self.conn:
+            self.conn.execute(queries.UPDATE_FUSE, params)
+            UI.debug(f"Updated fuse {fuse.id} / {fuse.file_path}")

rcdl/core/db_queries.py ADDED Viewed

@@ -0,0 +1,97 @@
+# core/db_queries.py
+"""
+Hold SQL STRING
+"""
+CREATE_MEDIAS_TABLE = """
+CREATE TABLE IF NOT EXISTS medias (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    post_id TEXT,
+    service TEXT,
+    url TEXT,
+    duration REAL,
+    sequence INTEGER,
+    status TEXT,
+    checksum TEXT,
+    file_path TEXT,
+    created_at DATETIME,
+    updated_at DATETIME,
+    file_size INTEGER,
+    fail_count INTEGER,
+    UNIQUE(post_id, url)
+)
+"""
+CREATE_POSTS_TABLE = """
+CREATE TABLE IF NOT EXISTS posts (
+    id TEXT PRIMARY KEY,
+    user TEXT,
+    service TEXT,
+    domain TEXT,
+    published DATETIME,
+    json_hash TEXT,
+    raw_json JSON,
+    fetched_at DATETIME
+)
+"""
+CREATE_FUSE_TABLE = """
+CREATE TABLE IF NOT EXISTS fuses (
+    id TEXT PRIMARY KEY,
+    duration INTEGER,
+    total_parts INTEGER,
+    status TEXT,
+    checksum TEXT,
+    file_path TEXT,
+    created_at DATETIME,
+    updated_at DATETIME,
+    file_size INTEGER,
+    fail_count INTEGER
+)
+"""
+INSERT_POST = """
+INSERT OR IGNORE INTO posts (
+    id, user, service, domain, published,
+    json_hash, raw_json, fetched_at
+)
+VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+"""
+INSERT_FUSED_MEDIA = """
+INSERT OR IGNORE INTO fuses (
+    id, duration, total_parts, status, checksum,
+    file_path, created_at, updated_at, file_size, fail_count
+)
+VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+"""
+INSERT_MEDIA = """
+INSERT OR IGNORE INTO medias (
+    post_id, service, url, duration, sequence, status,
+    checksum, file_path, created_at, updated_at, file_size, fail_count
+)
+VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+"""
+UPDATE_MEDIA = """
+UPDATE medias
+SET duration = ?, file_size = ?, checksum = ?, status = ?,
+    created_at = ?, updated_at = ?, fail_count = ?
+WHERE post_id = ? AND url = ?
+"""
+UPDATE_FUSE = """
+UPDATE fuses
+SET duration = ?, status = ?, checksum = ?,
+    created_at = ?, updated_at = ?, file_size = ?,
+    fail_count = ?
+WHERE id = ?
+"""
+QUERY_POST_ID = "SELECT * FROM posts WHERE id = ?"
+QUERY_POST_USER = "SELECT * FROM posts WHERE user = ?"
+QUERY_MEDIA_STATUS = "SELECT * FROM medias WHERE status = ?"
+QUERY_MEDIA_ID = "SELECT * FROM medias WHERE post_id = ?"
+QUERY_FUSES_STATUS = "SELECT * FROM fuses WHERE status = ?"
+QUERY_FUSES_ID = "SELECT * FROM fuses WHERE id = ?"

rcdl/core/downloader.py ADDED Viewed

@@ -0,0 +1,307 @@
+# core/downloader.py
+"""
+Handle post, media download to website
+"""
+import logging
+import os
+import json
+import requests
+from rcdl.interface.ui import UI, NestedProgress
+from rcdl.core import parser
+from rcdl.core import adapters
+from rcdl.core.api import URL
+from rcdl.core.config import Config
+from rcdl.core.models import (
+    Creator,
+    Status,
+    Media,
+    Post,
+    FusedMedia,
+    FusedStatus,
+)
+from rcdl.core.db import DB
+from rcdl.core.downloader_subprocess import ytdlp_subprocess
+from rcdl.core.file_io import write_json, load_json
+from rcdl.utils import get_date_now, get_media_metadata
+class PostsFetcher:
+    """
+    Fetch posts from api. Save as JSON. Handle multiple pages requests
+    """
+    def __init__(
+        self, url: str, json_path: str, max_page: int = Config.DEFAULT_MAX_PAGE
+    ):
+        self.url = url
+        self.json_path = json_path
+        self.page = 0
+        self.max_page = max_page
+        self.status = 200
+    def _request_page(self, url: str) -> requests.Response:
+        """Request a single page and return json dict"""
+        logging.info("RequestEngine url %s", url)
+        headers = URL.get_headers()
+        response = requests.get(url, headers=headers, timeout=Config.TIMEOUT)
+        if response.status_code != 200:
+            logging.warning("Failed request %s: %s", url, response.status_code)
+        return response
+    def request(self, params: dict | None = None):
+        """Request multiple page of an url"""
+        if params is None:
+            params = {}
+        with UI.progress_posts_fetcher(self.max_page) as progress:
+            task = progress.add_task("Fetching posts", total=self.max_page)
+            while self.status == 200 and self.page < self.max_page:
+                o = self.page * Config.POST_PER_PAGE
+                params["o"] = o
+                url = URL.add_params(self.url, params)
+                try:
+                    # Dry run: not request acutally made
+                    if Config.DRY_RUN:
+                        logging.debug(
+                            "DRY-RUN posts fetcher %s -> %s", url, self.json_path
+                        )
+                        self.page += 1
+                        continue
+                    response = self._request_page(url)
+                    self.status = response.status_code
+                    # if the programm crash while doing requests,
+                    # previous requests are still saved and not overwritten.
+                    if self.page > 0:
+                        json_data = list(load_json(self.json_path))
+                    else:
+                        json_data = []
+                    # for discover command, response json is in a
+                    # different format and contains 'posts'
+                    if self.status == 200:
+                        if "posts" in response.json():
+                            json_data.extend(response.json()["posts"])
+                        else:
+                            json_data.extend(response.json())
+                        write_json(self.json_path, json_data, mode="w")
+                    progress.update(
+                        task,
+                        advance=1,
+                        description=(
+                            f"Fetched {len(json_data)}"
+                            f" posts (page {self.page + 1}/{self.max_page})"
+                        ),
+                    )
+                except requests.RequestException as e:
+                    logging.error(
+                        "Failed to request %s (page: %s) deu to: %s", url, self.page, e
+                    )
+                except json.JSONDecodeError as e:
+                    logging.error(
+                        "Failed to decode JSON response of request %s due to: %s",
+                        url,
+                        e,
+                    )
+                finally:
+                    self.page += 1
+class MediaDownloader:
+    """Handle downloading a list of media and update DB status"""
+    def __init__(self):
+        pass
+    def _build_url(self, domain: str, url: str):
+        """Return full url"""
+        return URL.get_url_from_file(domain, url)
+    def _build_full_path(self, user: str, media_path: str):
+        """Return full path"""
+        return os.path.join(Config.creator_folder(user), media_path)
+    def _media_exist(self, full_path: str):
+        """Check a file exist"""
+        return os.path.exists(full_path)
+    def _update_db(self, result: int, media: Media, full_path: str):
+        """Update db information"""
+        # video failed to download
+        if result != 0:
+            media.fail_count += 1
+        else:
+            duration, file_size, checksum = get_media_metadata(full_path)
+            media.duration = duration
+            media.status = Status.DOWNLOADED
+            media.checksum = checksum
+            media.created_at = get_date_now()
+            media.file_size = file_size
+        with DB() as db:
+            db.update_media(media)
+    def download(self, medias: list[Media], max_fail_count: int | None = None):
+        """Download all medias in media with PENDING stats"""
+        # init progress bar
+        progress = NestedProgress(UI.console)
+        progress.start(
+            total=len(medias),
+            total_label="Downloading videos",
+            current_label="Current video",
+        )
+        max_try = Config.MAX_FAIL_COUNT
+        if max_fail_count is not None:
+            max_try = max_fail_count
+        for media in medias:
+            progress.start_current("Downloading", total=2)
+            if media.fail_count > max_try:
+                UI.warning(
+                    f"Video skipped due to too many failed download attempt ({media.fail_count})"
+                )
+                progress.advance_total()
+                continue
+            # match post info from db with post_id to get user/creator_id
+            with DB() as db:
+                post = db.query_post_by_id(media.post_id)
+            if post is None:
+                UI.error(f"Could not match media post_id {media.post_id} with a post")
+                progress.advance_total()
+                continue
+            # build full url and full path
+            url = self._build_url(post.domain, media.url)
+            full_path = self._build_full_path(post.user, media.file_path)
+            # update progress bar info (video in download info)
+            progress.set_status(f"{post.user}@({post.service}) -> ", media.file_path)
+            # check video does not already exist
+            if self._media_exist(full_path):
+                UI.warning(
+                    f"Video {url} @ {full_path} already exists. Possible DB problem"
+                )
+                self._update_db(0, media, full_path)
+                progress.advance_total()
+                continue
+            # dry run: no actual download, skippe rest of fn
+            if Config.DRY_RUN:
+                UI.debug(f"(dry-run) dl  {post.user}@{full_path} from {url}")
+                progress.advance_total()
+                continue
+            result = ytdlp_subprocess(url, full_path)
+            self._update_db(result, media, full_path)
+            progress.advance_total()
+        progress.close()
+def fetch_posts_by_tag(tag: str, max_page: int = Config.DEFAULT_MAX_PAGE) -> dict:
+    """Helper function to get all posts from a search results"""
+    url = URL.get_posts_page_url_wo_param()
+    path = Config.cache_file(tag)
+    pf = PostsFetcher(url, str(path), max_page=max_page)
+    pf.request(params={"tag": tag})
+    return load_json(path)
+def fetch_posts_by_creator(creator: Creator) -> dict:
+    """Helper function to get all posts from a creator"""
+    url = URL.get_creator_post_wo_param(creator)
+    path = Config.cache_file(f"{creator.id}_{creator.service}")
+    pf = PostsFetcher(url, str(path))
+    pf.request()
+    return load_json(path)
+def get_fuses_from_post(posts: list[Post]) -> list[FusedMedia]:
+    """Update data on fuses database table for video to be fused"""
+    fuses: list[FusedMedia] = []
+    for post in posts:
+        json_post = json.loads(post.raw_json)
+        total_parts = len(parser.extract_video_urls(json_post))
+        if total_parts > 1:
+            fuses.append(
+                FusedMedia(
+                    id=post.id,
+                    duration=0,
+                    total_parts=total_parts,
+                    status=FusedStatus.PENDING,
+                    checksum="",
+                    file_path=parser.get_filename_fuse(post),
+                    created_at="",
+                    updated_at="",
+                    file_size=0,
+                    fail_count=0,
+                )
+            )
+    return fuses
+def refresh_creators_videos():
+    """
+    For each creator:
+        - get posts with videos & update posts DB
+        - extract all medias & update medias DB
+        - extract fuses group & update fuses DB
+    """
+    creators = parser.get_creators()
+    for creator in creators:
+        UI.info(f"Creator {creator.id} from {creator.service}")
+        # request all posts by creator
+        fetch_posts_by_creator(creator)
+        # only keep posts with video url (mp4, m4v, ...)
+        posts_with_videos = parser.filter_posts_with_videos_from_json(
+            str(Config.cache_file(f"{creator.id}_{creator.service}"))
+        )
+        # convert all json dict into Post model
+        posts = adapters.json_posts_to_posts(posts_with_videos)
+        # insert posts in db
+        with DB() as db:
+            db.insert_posts(posts)
+        # find all multiple part videos and update db
+        fuses = get_fuses_from_post(posts)
+        with DB() as db:
+            db.insert_fused_media(fuses)
+        # convert all posts into videos
+        medias = []
+        for post in posts:
+            medias.extend(adapters.post_to_videos(post))
+        # insert videos in db
+        with DB() as db:
+            db.insert_medias(medias)
+def download_videos_to_be_dl(max_fail_count: int | None):
+    """
+    Download all media with PENDING status in DB
+    """
+    with DB() as db:
+        medias = db.query_media_by_status(Status.PENDING)
+    media_downloader = MediaDownloader()
+    media_downloader.download(medias, max_fail_count=max_fail_count)