PyPI - rcdl - Versions diffs - 2.2.2__py3-none-any.whl - Mend

rcdl 2.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

rcdl/__init__.py +5 -0
rcdl/__main__.py +25 -0
rcdl/core/api.py +54 -0
rcdl/core/config.py +93 -0
rcdl/core/db.py +235 -0
rcdl/core/db_queries.py +66 -0
rcdl/core/downloader.py +265 -0
rcdl/core/downloader_subprocess.py +190 -0
rcdl/core/file_io.py +34 -0
rcdl/core/fuse.py +118 -0
rcdl/core/models.py +56 -0
rcdl/core/parser.py +280 -0
rcdl/interface/cli.py +136 -0
rcdl/interface/ui.py +193 -0
rcdl/scripts/migrate_creators_json_txt.py +37 -0
rcdl/scripts/migrate_old_format_to_db.py +188 -0
rcdl/scripts/upload_pypi.py +98 -0
rcdl/utils.py +11 -0
rcdl-2.2.2.dist-info/METADATA +89 -0
rcdl-2.2.2.dist-info/RECORD +22 -0
rcdl-2.2.2.dist-info/WHEEL +4 -0
rcdl-2.2.2.dist-info/entry_points.txt +3 -0

rcdl/core/parser.py ADDED Viewed

@@ -0,0 +1,280 @@
+# core/parser.py
+import logging
+from pathvalidate import sanitize_filename
+from .models import Video, VideoStatus, Creator
+from .file_io import load_json, load_txt, write_txt
+from .config import Config
+from rcdl.interface.ui import UI
+COOMER_PAYSITES = ["onlyfans", "fansly", "candfans"]
+KEMONO_PAYSITES = [
+    "patreon",
+    "fanbox",
+    "fantia",
+    "boosty",
+    "gumroad",
+    "subscribestar",
+    "dlsite",
+]
+def get_domain(arg: str | dict | Video) -> str:
+    """From a service get the domain (coomer or kemono)
+    Input is either: service(str), post(dict), video(models.Video)
+    """
+    def _service(service: str) -> str:
+        if service in COOMER_PAYSITES:
+            return "coomer"
+        if service in KEMONO_PAYSITES:
+            return "kemono"
+        logging.error(f"Service {service} not associated to any domain")
+        return ""
+    if isinstance(arg, dict):
+        return _service(arg["service"])
+    elif isinstance(arg, Video):
+        return _service(arg.service)
+    return _service(arg)
+def get_title(post: dict) -> str:
+    """Extract title from a post(dict)"""
+    title = post["title"]
+    if title == "":
+        if "content" in post:
+            title = post["content"]
+        elif "substring" in post:
+            title = post["substring"]
+    return sanitize_filename(title)
+def get_date(post: dict) -> str:
+    """Extract date from a post(dict)"""
+    if "published" in post:
+        date = post["published"][0:10]
+    elif "added" in post:
+        date = post["added"][0:10]
+    else:
+        logging.error(f"Could not extract date from {post['id']}")
+        date = "NA"
+    return date
+def get_part(post: dict, url: str) -> int:
+    """
+    For posts containing multiple video url. Each url is considered a part,
+    so all videos from the same posts will simply have a different part number
+    """
+    urls = extract_video_urls(post)
+    part = 0
+    if len(urls) == 1:
+        return 0
+    for u in urls:
+        if u == url:
+            return part
+        part += 1
+    logging.error(
+        f"Could not extract part number for post id {post['id']} with url {url}"
+    )
+    return -1
+def get_filename(post: dict, url: str) -> str:
+    title = get_title(post)
+    date = get_date(post)
+    part = get_part(post, url)
+    file_title = f"{date}_{title}".replace("'", " ").replace('"', "")
+    filename = f"{file_title}_p{part}.mp4"
+    return filename
+def convert_post_to_video(post: dict, url: str, discover=False) -> Video:
+    part = get_part(post, url)
+    title = get_title(post)
+    date = get_date(post)
+    filename = get_filename(post, url)
+    if discover:
+        filename = f"{post['user']}_{post['id']}.mp4"
+    return Video(
+        post_id=post["id"],
+        creator_id=post["user"],
+        service=post["service"],
+        relative_path=filename,
+        url=url,
+        domain=get_domain(post),
+        part=part,
+        published=date,
+        title=title,
+        status=VideoStatus.NOT_DOWNLOADED,
+        fail_count=0,
+    )
+def convert_posts_to_videos(posts: list[dict], discover: bool = False) -> list[Video]:
+    videos = []
+    for post in posts:
+        urls = extract_video_urls(post)
+        if not discover:
+            for url in urls:
+                videos.append(convert_post_to_video(post, url))
+        else:
+            if len(urls) == 0:
+                continue
+            videos.append(convert_post_to_video(post, urls[0], discover=discover))
+    return videos
+def extract_video_urls(post: dict) -> list:
+    video_extensions = (".mp4", ".webm", ".mov", ".avi", ".mkv", ".flv", ".wmv", ".m4v")
+    urls = set()
+    # Check main file
+    if post["file"]:
+        if post["file"]["path"]:
+            path = post["file"]["path"]
+            if path.endswith(video_extensions):
+                urls.add(f"{path}")
+    if post["attachments"]:
+        attachments = post["attachments"]
+        for attachment in attachments:
+            if attachment["path"]:
+                if attachment["path"].endswith(video_extensions):
+                    urls.add(f"{attachment['path']}")
+    return list(urls)
+def filter_posts_with_videos_from_list(data: list[dict]) -> list[dict]:
+    """Return posts with video url from a json with a list of posts"""
+    posts_with_videos = []
+    for post in data:
+        if len(extract_video_urls(post)) > 0:
+            posts_with_videos.append(post)
+    return posts_with_videos
+def filter_posts_with_videos_from_json(path: str) -> list:
+    """Return posts with video url from a json with a list of posts"""
+    posts = load_json(path)
+    posts_with_videos = []
+    for post in posts:
+        if len(extract_video_urls(post)) > 0:
+            posts_with_videos.append(post)
+    return posts_with_videos
+def valid_service(service: str) -> bool:
+    if service in COOMER_PAYSITES:
+        return True
+    if service in KEMONO_PAYSITES:
+        return True
+    return False
+def get_creator_from_line(line: str) -> Creator | None:
+    """
+    Convert a line into a Creator model
+    arg: line -> 'service/creator'
+    This is the format of creators.txt
+    """
+    parts = line.split("/")
+    if valid_service(parts[0].strip()):
+        return Creator(
+            creator_id=parts[1].strip(),
+            service=parts[0].strip(),
+            domain=get_domain(parts[0].strip()),
+            status=None,
+        )
+    elif valid_service(parts[1].strip()):
+        return Creator(
+            creator_id=parts[0].strip(),
+            service=parts[1].strip(),
+            domain=get_domain(parts[1].strip()),
+            status=None,
+        )
+    else:
+        UI.error(
+            f"Creator file not valid: {line} can not be interpreted. Format is: 'service/creator_id'"
+        )
+    return None
+def get_creators() -> list[Creator]:
+    """
+    Load creators.txt and return a list of models.Creator
+    """
+    lines = load_txt(Config.CREATORS_FILE)
+    creators = []
+    for line in lines:
+        creator = get_creator_from_line(line)
+        if creator is None:
+            continue
+        creators.append(creator)
+    if len(creators) < 1:
+        UI.error(f"Could not find any creators. Check {Config.CREATORS_FILE}")
+    return creators
+def get_creators_from_posts(posts: list[dict]) -> list[Creator]:
+    creators = list()
+    seen = set()
+    for post in posts:
+        key = (post["user"], post["service"], "coomer")
+        if key in seen:
+            continue
+        seen.add(key)
+        creators.append(
+            Creator(
+                creator_id=post["user"],
+                service=post["service"],
+                domain="coomer",
+                status="to_be_treated",
+            )
+        )
+    return creators
+def parse_creator_input(value: str) -> tuple[str | None, str]:
+    value = value.strip()
+    # url
+    if "://" in value:
+        parts = value.replace("https://", "").strip().split("/")
+        logging.info(f"From {value} extracte service {parts[1]} and creator {parts[3]}")
+        return parts[1], parts[3]  # service, creator_id
+    # creators.txt format
+    if "/" in value:
+        c = get_creator_from_line(value)
+        if c is not None:
+            logging.info(
+                f"From {value} extracte service {c.service} and creator {c.creator_id}"
+            )
+            return c.service, c.creator_id
+    logging.info(f"From {value} extracte service None and creator {value}")
+    return None, value
+def append_creator(creator: Creator):
+    line = f"{creator.service}/{creator.creator_id}"
+    lines = load_txt(Config.CREATORS_FILE)
+    if line in lines:
+        return
+    lines.append(line)
+    write_txt(Config.CREATORS_FILE, line, mode="a")

rcdl/interface/cli.py ADDED Viewed

@@ -0,0 +1,136 @@
+# interface/cli.py
+import logging
+import click
+from rcdl.core import downloader as dl
+from rcdl.core.config import Config
+from rcdl.core.parser import (
+    get_creators,
+    get_creator_from_line,
+    parse_creator_input,
+    append_creator,
+)
+from rcdl.core.db import DB
+from .ui import UI
+from rcdl.core.fuse import fuse_videos
+from rcdl import __version__
+@click.command(help="Refresh video to be downloaded")
+def refresh():
+    """Refresh database with creators videos
+    - get all creators from creators.txt
+    - for each creators find all videos and put them in the database
+    No download is done in this command
+    """
+    UI.info("Welcome to RCDL refresh")
+    dl.refresh_creators_videos()
+    with DB() as db:
+        info = db.get_db_videos_info()
+    UI.db_videos_status_table(info)
+@click.command(help="Download all videos from all creator")
+def dlsf():
+    """Download video based on DB information
+    - read databse
+    - for each video NOT_DOWNLOADED or FAILED & fail_count < settings, dl video
+    """
+    UI.info("Welcome to RCDL dlsf")
+    dl.download_videos_to_be_dl()
+@click.command("fuse", help="Fuse part video into one")
+def fuse():
+    """Fuse videos"""
+    UI.info("fuse")
+    fuse_videos()
+@click.command(help="Discover videos/creators with tags")
+@click.option("--tag", required=True, type=str, help="Tag to search for")
+@click.option(
+    "--max-page", default=10, type=int, help="Maximum number of pages to fetch"
+)
+def discover(tag, max_page):
+    """Discover new creators/videos
+    currently WIP. Do not use in prod"""
+    msg = f"[cdl] discover with tag={tag} max_page={max_page}"
+    click.echo(msg)
+    logging.info(msg)
+    dl.discover(tag, max_page)
+@click.command("add", help="Add a creator")
+@click.argument("creator_input")
+def add_creator(creator_input):
+    """Add a creator (URL or str) to creators.txt"""
+    service, creator_id = parse_creator_input(creator_input)
+    line = f"{service}/{creator_id}"
+    creator = get_creator_from_line(line)
+    if creator is not None:
+        append_creator(creator)
+        UI.info(f"Added {line} to creators.txt")
+    else:
+        UI.warning("Could not extract creator from input. Please check input is valid")
+@click.command("remove", help="Remove a creator")
+@click.argument("creator_input")
+def remove_creator(creator_input):
+    """Remove a creator (excat line) from creators.txt"""
+    _service, creator_id = parse_creator_input(str(creator_input))
+    creators = get_creators()
+    all_creators = []
+    matched_creator = None
+    for creator in creators:
+        if creator.creator_id == creator_id:
+            matched_creator = creator
+            continue
+        all_creators.append(creator)
+    if matched_creator is None:
+        UI.error(f"Could not find creator from {creator_input}")
+        return
+    else:
+        open(Config.CREATORS_FILE, "w").close()
+        for c in all_creators:
+            append_creator(c)
+        UI.info(
+            f"Removed creator {matched_creator.creator_id}@({matched_creator.service})"
+        )
+@click.command("list", help="List all creators")
+def list_creators():
+    creators = get_creators()
+    UI.table_creators(creators)
+# --- CLI GROUP ---
+@click.group()
+@click.option("--debug", is_flag=True)
+@click.option("--dry-run", is_flag=True)
+@click.version_option(version=__version__, prog_name=Config.APP_NAME)
+def cli(debug, dry_run):
+    Config.set_debug(debug)
+    Config.set_dry_run(dry_run)
+cli.add_command(dlsf)
+cli.add_command(discover)
+cli.add_command(refresh)
+cli.add_command(add_creator)
+cli.add_command(remove_creator)
+cli.add_command(list_creators)
+cli.add_command(fuse)

rcdl/interface/ui.py ADDED Viewed

@@ -0,0 +1,193 @@
+# interface/ui.py
+import logging
+from rich.console import Console, Group
+from rich.table import Table
+from rich.progress import (
+    Progress,
+    SpinnerColumn,
+    BarColumn,
+    TextColumn,
+    TimeRemainingColumn,
+)
+from rich import box
+from rich.live import Live
+from rich.text import Text
+from rcdl.core.models import VideoStatus, Creator
+class UI:
+    console = Console()
+    logger = logging.getLogger()
+    _video_progress_text: Text | None = None
+    _concat_progress_text: Text | None = None
+    _live: Live | None = None
+    @staticmethod
+    def _log_to_file(log_level, msg: str):
+        log_level(msg)
+    @classmethod
+    def success(cls, msg: str):
+        """Print success msg"""
+        cls.console.print(f"[green]{msg}[/]")
+    @classmethod
+    def info(cls, msg: str):
+        """Print & log info msg"""
+        cls.console.print(msg)
+        cls._log_to_file(cls.logger.info, msg)
+    @classmethod
+    def debug(cls, msg: str):
+        """Print & log debug msg"""
+        cls.console.print(f"[dim]{msg}[/]")
+        cls._log_to_file(cls.logger.debug, msg)
+    @classmethod
+    def warning(cls, msg: str):
+        """Print & log warning msg"""
+        cls.console.print(f"[yellow]{msg}[/]")
+        cls._log_to_file(cls.logger.debug, msg)
+    @classmethod
+    def error(cls, msg: str):
+        """Print & log error msg"""
+        cls.console.print(f"[red]{msg}[/]")
+        cls._log_to_file(cls.logger.debug, msg)
+    @classmethod
+    def db_videos_status_table(cls, info: dict):
+        """
+        Print to cli a table with info of numbers of videos per status.
+        Take in arg a dict: {
+            "not_downloaded": int,
+            "downloaded": int, "failed: int", "skipped: int", "ignored: int"}
+        """
+        table = Table(title="DB Videos status")
+        table.add_column("Video status")
+        table.add_column("Number of videos")
+        for status in VideoStatus:
+            name = status.value.replace("_", " ").capitalize()
+            table.add_row(name, str(info[status.value]))
+        cls.console.print(table)
+    @classmethod
+    def table_creators(cls, creators: list[Creator]):
+        """Print to cli a table with all creators in creators.txt. Format is Creator ID | Service"""
+        table = Table(title="Creators", box=box.MINIMAL, show_lines=True)
+        table.add_column("Creators ID")
+        table.add_column("Service")
+        for creator in creators:
+            table.add_row(creator.creator_id, creator.service)
+        cls.console.print(table)
+    @classmethod
+    def progress_posts_fetcher(cls, max_pages: int):
+        progress = Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            console=cls.console,
+            transient=False,  # remove progress bar after finish
+        )
+        return progress
+    @classmethod
+    def video_progress(cls, total: int):
+        """Create video download progress output"""
+        progress = Progress(
+            SpinnerColumn(),
+            TextColumn("[bold cyan]{task.description}"),
+            BarColumn(),
+            TextColumn("{task.completed}/{task.total}"),
+            TimeRemainingColumn(),
+            console=cls.console,
+            transient=False,  # remove the bar after completion
+        )
+        cls._video_progress_text = Text("Waiting...", style="Cyan")
+        group = Group(progress, cls._video_progress_text)
+        cls._live = Live(group, console=cls.console)
+        cls._live.__enter__()
+        task = progress.add_task("Downloading videos", total=total)
+        return progress, task
+    @classmethod
+    def set_current_video_progress(cls, creator_info: str, filename: str):
+        """Update video download output
+        args:
+            creator_info: str =  'creator_id@(service)'
+            filename: str = video.relative_path
+        """
+        if cls._video_progress_text is None:
+            return
+        cls._video_progress_text.plain = ""
+        cls._video_progress_text.append(f"{creator_info} -> ", style="Cyan")
+        cls._video_progress_text.append(filename, style="green")
+    @classmethod
+    def close_video_progress(cls):
+        """Close video progress"""
+        if cls._live:
+            cls._live.__exit__(None, None, None)
+            cls._live = None
+    @classmethod
+    def concat_progress(cls, total: int):
+        """Create concat progress bat"""
+        progress = Progress(
+            SpinnerColumn(),
+            TextColumn("[bold cyan]{task.description}"),
+            BarColumn(),
+            TextColumn("{task.completed}/{task.total}"),
+            TimeRemainingColumn(),
+            console=cls.console,
+            transient=False,  # remove the bar after completion
+        )
+        cls._concat_progress_text = Text("Waiting...", style="Cyan")
+        group = Group(progress, cls._concat_progress_text)
+        cls._live = Live(group, console=cls.console)
+        cls._live.__enter__()
+        task = progress.add_task("Concatenating videos", total=total)
+        return progress, task
+    @classmethod
+    def set_current_concat_progress(cls, msg: str, filename: str):
+        """Update video download output
+        args:
+            creator_info: str =  'creator_id@(service)'
+            filename: str = video.relative_path
+        """
+        if cls._concat_progress_text is None:
+            return
+        cls._concat_progress_text.plain = ""
+        cls._concat_progress_text.append(f"{msg} -> ", style="Cyan")
+        cls._concat_progress_text.append(filename, style="green")
+    @classmethod
+    def close_concat_progress(cls):
+        """Close video progress"""
+        if cls._live:
+            cls._live.__exit__(None, None, None)
+            cls._live = None
+    @classmethod
+    def progress_total_concat(cls):
+        progress = Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            TextColumn("{task.completed}/{task.total}"),
+            console=cls.console,
+            transient=False,  # remove progress bar after finish
+        )
+        return progress

rcdl/scripts/migrate_creators_json_txt.py ADDED Viewed

@@ -0,0 +1,37 @@
+# scripts/migrage_creators_json_txt.py
+import os
+import json
+from rcdl.core.models import Creator
+from rcdl.core.config import Config
+from rcdl.core.parser import get_domain, append_creator
+JSON_PATH = Config.CACHE_DIR / "creators.json"
+# check file exist
+if not os.path.exists(JSON_PATH):
+    print("creators.json deoes not exist. Check")
+# load file
+with open(JSON_PATH, "r") as f:
+    json_creators = json.load(f)
+# convert to Creator
+creators = []
+for json_creator in json_creators:
+    creators.append(
+        Creator(
+            creator_id=json_creator["creator_id"],
+            service=json_creator["service"],
+            domain=get_domain(json_creator["service"]),
+            status=None,
+        )
+    )
+# save creator
+for c in creators:
+    append_creator(c)
+    print(f"Saved new creator: {c.service}/{c.creator_id}")
+print(f"You can now delete {JSON_PATH}")