rcdl 2.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rcdl/core/parser.py ADDED
@@ -0,0 +1,280 @@
1
+ # core/parser.py
2
+
3
+ import logging
4
+ from pathvalidate import sanitize_filename
5
+
6
+ from .models import Video, VideoStatus, Creator
7
+ from .file_io import load_json, load_txt, write_txt
8
+ from .config import Config
9
+ from rcdl.interface.ui import UI
10
+
11
+
12
+ COOMER_PAYSITES = ["onlyfans", "fansly", "candfans"]
13
+ KEMONO_PAYSITES = [
14
+ "patreon",
15
+ "fanbox",
16
+ "fantia",
17
+ "boosty",
18
+ "gumroad",
19
+ "subscribestar",
20
+ "dlsite",
21
+ ]
22
+
23
+
24
+ def get_domain(arg: str | dict | Video) -> str:
25
+ """From a service get the domain (coomer or kemono)
26
+ Input is either: service(str), post(dict), video(models.Video)
27
+ """
28
+
29
+ def _service(service: str) -> str:
30
+ if service in COOMER_PAYSITES:
31
+ return "coomer"
32
+ if service in KEMONO_PAYSITES:
33
+ return "kemono"
34
+ logging.error(f"Service {service} not associated to any domain")
35
+ return ""
36
+
37
+ if isinstance(arg, dict):
38
+ return _service(arg["service"])
39
+ elif isinstance(arg, Video):
40
+ return _service(arg.service)
41
+
42
+ return _service(arg)
43
+
44
+
45
+ def get_title(post: dict) -> str:
46
+ """Extract title from a post(dict)"""
47
+ title = post["title"]
48
+ if title == "":
49
+ if "content" in post:
50
+ title = post["content"]
51
+ elif "substring" in post:
52
+ title = post["substring"]
53
+ return sanitize_filename(title)
54
+
55
+
56
+ def get_date(post: dict) -> str:
57
+ """Extract date from a post(dict)"""
58
+ if "published" in post:
59
+ date = post["published"][0:10]
60
+ elif "added" in post:
61
+ date = post["added"][0:10]
62
+ else:
63
+ logging.error(f"Could not extract date from {post['id']}")
64
+ date = "NA"
65
+ return date
66
+
67
+
68
+ def get_part(post: dict, url: str) -> int:
69
+ """
70
+ For posts containing multiple video url. Each url is considered a part,
71
+ so all videos from the same posts will simply have a different part number
72
+ """
73
+ urls = extract_video_urls(post)
74
+ part = 0
75
+ if len(urls) == 1:
76
+ return 0
77
+
78
+ for u in urls:
79
+ if u == url:
80
+ return part
81
+ part += 1
82
+
83
+ logging.error(
84
+ f"Could not extract part number for post id {post['id']} with url {url}"
85
+ )
86
+ return -1
87
+
88
+
89
+ def get_filename(post: dict, url: str) -> str:
90
+ title = get_title(post)
91
+ date = get_date(post)
92
+ part = get_part(post, url)
93
+ file_title = f"{date}_{title}".replace("'", " ").replace('"', "")
94
+ filename = f"{file_title}_p{part}.mp4"
95
+ return filename
96
+
97
+
98
+ def convert_post_to_video(post: dict, url: str, discover=False) -> Video:
99
+ part = get_part(post, url)
100
+ title = get_title(post)
101
+ date = get_date(post)
102
+ filename = get_filename(post, url)
103
+
104
+ if discover:
105
+ filename = f"{post['user']}_{post['id']}.mp4"
106
+
107
+ return Video(
108
+ post_id=post["id"],
109
+ creator_id=post["user"],
110
+ service=post["service"],
111
+ relative_path=filename,
112
+ url=url,
113
+ domain=get_domain(post),
114
+ part=part,
115
+ published=date,
116
+ title=title,
117
+ status=VideoStatus.NOT_DOWNLOADED,
118
+ fail_count=0,
119
+ )
120
+
121
+
122
+ def convert_posts_to_videos(posts: list[dict], discover: bool = False) -> list[Video]:
123
+ videos = []
124
+ for post in posts:
125
+ urls = extract_video_urls(post)
126
+ if not discover:
127
+ for url in urls:
128
+ videos.append(convert_post_to_video(post, url))
129
+ else:
130
+ if len(urls) == 0:
131
+ continue
132
+ videos.append(convert_post_to_video(post, urls[0], discover=discover))
133
+ return videos
134
+
135
+
136
+ def extract_video_urls(post: dict) -> list:
137
+ video_extensions = (".mp4", ".webm", ".mov", ".avi", ".mkv", ".flv", ".wmv", ".m4v")
138
+ urls = set()
139
+
140
+ # Check main file
141
+ if post["file"]:
142
+ if post["file"]["path"]:
143
+ path = post["file"]["path"]
144
+ if path.endswith(video_extensions):
145
+ urls.add(f"{path}")
146
+
147
+ if post["attachments"]:
148
+ attachments = post["attachments"]
149
+ for attachment in attachments:
150
+ if attachment["path"]:
151
+ if attachment["path"].endswith(video_extensions):
152
+ urls.add(f"{attachment['path']}")
153
+
154
+ return list(urls)
155
+
156
+
157
+ def filter_posts_with_videos_from_list(data: list[dict]) -> list[dict]:
158
+ """Return posts with video url from a json with a list of posts"""
159
+
160
+ posts_with_videos = []
161
+ for post in data:
162
+ if len(extract_video_urls(post)) > 0:
163
+ posts_with_videos.append(post)
164
+ return posts_with_videos
165
+
166
+
167
+ def filter_posts_with_videos_from_json(path: str) -> list:
168
+ """Return posts with video url from a json with a list of posts"""
169
+ posts = load_json(path)
170
+
171
+ posts_with_videos = []
172
+ for post in posts:
173
+ if len(extract_video_urls(post)) > 0:
174
+ posts_with_videos.append(post)
175
+ return posts_with_videos
176
+
177
+
178
+ def valid_service(service: str) -> bool:
179
+ if service in COOMER_PAYSITES:
180
+ return True
181
+ if service in KEMONO_PAYSITES:
182
+ return True
183
+ return False
184
+
185
+
186
+ def get_creator_from_line(line: str) -> Creator | None:
187
+ """
188
+ Convert a line into a Creator model
189
+ arg: line -> 'service/creator'
190
+ This is the format of creators.txt
191
+ """
192
+ parts = line.split("/")
193
+ if valid_service(parts[0].strip()):
194
+ return Creator(
195
+ creator_id=parts[1].strip(),
196
+ service=parts[0].strip(),
197
+ domain=get_domain(parts[0].strip()),
198
+ status=None,
199
+ )
200
+ elif valid_service(parts[1].strip()):
201
+ return Creator(
202
+ creator_id=parts[0].strip(),
203
+ service=parts[1].strip(),
204
+ domain=get_domain(parts[1].strip()),
205
+ status=None,
206
+ )
207
+ else:
208
+ UI.error(
209
+ f"Creator file not valid: {line} can not be interpreted. Format is: 'service/creator_id'"
210
+ )
211
+ return None
212
+
213
+
214
+ def get_creators() -> list[Creator]:
215
+ """
216
+ Load creators.txt and return a list of models.Creator
217
+ """
218
+ lines = load_txt(Config.CREATORS_FILE)
219
+ creators = []
220
+ for line in lines:
221
+ creator = get_creator_from_line(line)
222
+ if creator is None:
223
+ continue
224
+ creators.append(creator)
225
+ if len(creators) < 1:
226
+ UI.error(f"Could not find any creators. Check {Config.CREATORS_FILE}")
227
+ return creators
228
+
229
+
230
+ def get_creators_from_posts(posts: list[dict]) -> list[Creator]:
231
+ creators = list()
232
+ seen = set()
233
+
234
+ for post in posts:
235
+ key = (post["user"], post["service"], "coomer")
236
+ if key in seen:
237
+ continue
238
+
239
+ seen.add(key)
240
+ creators.append(
241
+ Creator(
242
+ creator_id=post["user"],
243
+ service=post["service"],
244
+ domain="coomer",
245
+ status="to_be_treated",
246
+ )
247
+ )
248
+ return creators
249
+
250
+
251
+ def parse_creator_input(value: str) -> tuple[str | None, str]:
252
+ value = value.strip()
253
+
254
+ # url
255
+ if "://" in value:
256
+ parts = value.replace("https://", "").strip().split("/")
257
+ logging.info(f"From {value} extracte service {parts[1]} and creator {parts[3]}")
258
+ return parts[1], parts[3] # service, creator_id
259
+
260
+ # creators.txt format
261
+ if "/" in value:
262
+ c = get_creator_from_line(value)
263
+ if c is not None:
264
+ logging.info(
265
+ f"From {value} extracte service {c.service} and creator {c.creator_id}"
266
+ )
267
+ return c.service, c.creator_id
268
+
269
+ logging.info(f"From {value} extracte service None and creator {value}")
270
+ return None, value
271
+
272
+
273
+ def append_creator(creator: Creator):
274
+ line = f"{creator.service}/{creator.creator_id}"
275
+ lines = load_txt(Config.CREATORS_FILE)
276
+
277
+ if line in lines:
278
+ return
279
+ lines.append(line)
280
+ write_txt(Config.CREATORS_FILE, line, mode="a")
rcdl/interface/cli.py ADDED
@@ -0,0 +1,136 @@
1
+ # interface/cli.py
2
+
3
+ import logging
4
+
5
+ import click
6
+
7
+ from rcdl.core import downloader as dl
8
+ from rcdl.core.config import Config
9
+ from rcdl.core.parser import (
10
+ get_creators,
11
+ get_creator_from_line,
12
+ parse_creator_input,
13
+ append_creator,
14
+ )
15
+ from rcdl.core.db import DB
16
+ from .ui import UI
17
+ from rcdl.core.fuse import fuse_videos
18
+
19
+
20
+ from rcdl import __version__
21
+
22
+
23
+ @click.command(help="Refresh video to be downloaded")
24
+ def refresh():
25
+ """Refresh database with creators videos
26
+
27
+ - get all creators from creators.txt
28
+ - for each creators find all videos and put them in the database
29
+ No download is done in this command
30
+ """
31
+
32
+ UI.info("Welcome to RCDL refresh")
33
+ dl.refresh_creators_videos()
34
+
35
+ with DB() as db:
36
+ info = db.get_db_videos_info()
37
+
38
+ UI.db_videos_status_table(info)
39
+
40
+
41
+ @click.command(help="Download all videos from all creator")
42
+ def dlsf():
43
+ """Download video based on DB information
44
+
45
+ - read databse
46
+ - for each video NOT_DOWNLOADED or FAILED & fail_count < settings, dl video
47
+ """
48
+ UI.info("Welcome to RCDL dlsf")
49
+ dl.download_videos_to_be_dl()
50
+
51
+
52
+ @click.command("fuse", help="Fuse part video into one")
53
+ def fuse():
54
+ """Fuse videos"""
55
+ UI.info("fuse")
56
+ fuse_videos()
57
+
58
+
59
+ @click.command(help="Discover videos/creators with tags")
60
+ @click.option("--tag", required=True, type=str, help="Tag to search for")
61
+ @click.option(
62
+ "--max-page", default=10, type=int, help="Maximum number of pages to fetch"
63
+ )
64
+ def discover(tag, max_page):
65
+ """Discover new creators/videos
66
+ currently WIP. Do not use in prod"""
67
+ msg = f"[cdl] discover with tag={tag} max_page={max_page}"
68
+ click.echo(msg)
69
+ logging.info(msg)
70
+ dl.discover(tag, max_page)
71
+
72
+
73
+ @click.command("add", help="Add a creator")
74
+ @click.argument("creator_input")
75
+ def add_creator(creator_input):
76
+ """Add a creator (URL or str) to creators.txt"""
77
+ service, creator_id = parse_creator_input(creator_input)
78
+ line = f"{service}/{creator_id}"
79
+ creator = get_creator_from_line(line)
80
+ if creator is not None:
81
+ append_creator(creator)
82
+ UI.info(f"Added {line} to creators.txt")
83
+ else:
84
+ UI.warning("Could not extract creator from input. Please check input is valid")
85
+
86
+
87
+ @click.command("remove", help="Remove a creator")
88
+ @click.argument("creator_input")
89
+ def remove_creator(creator_input):
90
+ """Remove a creator (excat line) from creators.txt"""
91
+ _service, creator_id = parse_creator_input(str(creator_input))
92
+
93
+ creators = get_creators()
94
+ all_creators = []
95
+ matched_creator = None
96
+ for creator in creators:
97
+ if creator.creator_id == creator_id:
98
+ matched_creator = creator
99
+ continue
100
+ all_creators.append(creator)
101
+
102
+ if matched_creator is None:
103
+ UI.error(f"Could not find creator from {creator_input}")
104
+ return
105
+ else:
106
+ open(Config.CREATORS_FILE, "w").close()
107
+ for c in all_creators:
108
+ append_creator(c)
109
+ UI.info(
110
+ f"Removed creator {matched_creator.creator_id}@({matched_creator.service})"
111
+ )
112
+
113
+
114
+ @click.command("list", help="List all creators")
115
+ def list_creators():
116
+ creators = get_creators()
117
+ UI.table_creators(creators)
118
+
119
+
120
+ # --- CLI GROUP ---
121
+ @click.group()
122
+ @click.option("--debug", is_flag=True)
123
+ @click.option("--dry-run", is_flag=True)
124
+ @click.version_option(version=__version__, prog_name=Config.APP_NAME)
125
+ def cli(debug, dry_run):
126
+ Config.set_debug(debug)
127
+ Config.set_dry_run(dry_run)
128
+
129
+
130
+ cli.add_command(dlsf)
131
+ cli.add_command(discover)
132
+ cli.add_command(refresh)
133
+ cli.add_command(add_creator)
134
+ cli.add_command(remove_creator)
135
+ cli.add_command(list_creators)
136
+ cli.add_command(fuse)
rcdl/interface/ui.py ADDED
@@ -0,0 +1,193 @@
1
+ # interface/ui.py
2
+
3
+ import logging
4
+ from rich.console import Console, Group
5
+ from rich.table import Table
6
+ from rich.progress import (
7
+ Progress,
8
+ SpinnerColumn,
9
+ BarColumn,
10
+ TextColumn,
11
+ TimeRemainingColumn,
12
+ )
13
+ from rich import box
14
+ from rich.live import Live
15
+ from rich.text import Text
16
+ from rcdl.core.models import VideoStatus, Creator
17
+
18
+
19
+ class UI:
20
+ console = Console()
21
+ logger = logging.getLogger()
22
+
23
+ _video_progress_text: Text | None = None
24
+ _concat_progress_text: Text | None = None
25
+ _live: Live | None = None
26
+
27
+ @staticmethod
28
+ def _log_to_file(log_level, msg: str):
29
+ log_level(msg)
30
+
31
+ @classmethod
32
+ def success(cls, msg: str):
33
+ """Print success msg"""
34
+ cls.console.print(f"[green]{msg}[/]")
35
+
36
+ @classmethod
37
+ def info(cls, msg: str):
38
+ """Print & log info msg"""
39
+ cls.console.print(msg)
40
+ cls._log_to_file(cls.logger.info, msg)
41
+
42
+ @classmethod
43
+ def debug(cls, msg: str):
44
+ """Print & log debug msg"""
45
+ cls.console.print(f"[dim]{msg}[/]")
46
+ cls._log_to_file(cls.logger.debug, msg)
47
+
48
+ @classmethod
49
+ def warning(cls, msg: str):
50
+ """Print & log warning msg"""
51
+ cls.console.print(f"[yellow]{msg}[/]")
52
+ cls._log_to_file(cls.logger.debug, msg)
53
+
54
+ @classmethod
55
+ def error(cls, msg: str):
56
+ """Print & log error msg"""
57
+ cls.console.print(f"[red]{msg}[/]")
58
+ cls._log_to_file(cls.logger.debug, msg)
59
+
60
+ @classmethod
61
+ def db_videos_status_table(cls, info: dict):
62
+ """
63
+ Print to cli a table with info of numbers of videos per status.
64
+ Take in arg a dict: {
65
+ "not_downloaded": int,
66
+ "downloaded": int, "failed: int", "skipped: int", "ignored: int"}
67
+ """
68
+
69
+ table = Table(title="DB Videos status")
70
+
71
+ table.add_column("Video status")
72
+ table.add_column("Number of videos")
73
+
74
+ for status in VideoStatus:
75
+ name = status.value.replace("_", " ").capitalize()
76
+ table.add_row(name, str(info[status.value]))
77
+
78
+ cls.console.print(table)
79
+
80
+ @classmethod
81
+ def table_creators(cls, creators: list[Creator]):
82
+ """Print to cli a table with all creators in creators.txt. Format is Creator ID | Service"""
83
+ table = Table(title="Creators", box=box.MINIMAL, show_lines=True)
84
+ table.add_column("Creators ID")
85
+ table.add_column("Service")
86
+ for creator in creators:
87
+ table.add_row(creator.creator_id, creator.service)
88
+ cls.console.print(table)
89
+
90
+ @classmethod
91
+ def progress_posts_fetcher(cls, max_pages: int):
92
+ progress = Progress(
93
+ SpinnerColumn(),
94
+ TextColumn("[progress.description]{task.description}"),
95
+ BarColumn(),
96
+ console=cls.console,
97
+ transient=False, # remove progress bar after finish
98
+ )
99
+ return progress
100
+
101
+ @classmethod
102
+ def video_progress(cls, total: int):
103
+ """Create video download progress output"""
104
+ progress = Progress(
105
+ SpinnerColumn(),
106
+ TextColumn("[bold cyan]{task.description}"),
107
+ BarColumn(),
108
+ TextColumn("{task.completed}/{task.total}"),
109
+ TimeRemainingColumn(),
110
+ console=cls.console,
111
+ transient=False, # remove the bar after completion
112
+ )
113
+
114
+ cls._video_progress_text = Text("Waiting...", style="Cyan")
115
+ group = Group(progress, cls._video_progress_text)
116
+ cls._live = Live(group, console=cls.console)
117
+ cls._live.__enter__()
118
+
119
+ task = progress.add_task("Downloading videos", total=total)
120
+ return progress, task
121
+
122
+ @classmethod
123
+ def set_current_video_progress(cls, creator_info: str, filename: str):
124
+ """Update video download output
125
+ args:
126
+ creator_info: str = 'creator_id@(service)'
127
+ filename: str = video.relative_path
128
+ """
129
+ if cls._video_progress_text is None:
130
+ return
131
+ cls._video_progress_text.plain = ""
132
+ cls._video_progress_text.append(f"{creator_info} -> ", style="Cyan")
133
+ cls._video_progress_text.append(filename, style="green")
134
+
135
+ @classmethod
136
+ def close_video_progress(cls):
137
+ """Close video progress"""
138
+ if cls._live:
139
+ cls._live.__exit__(None, None, None)
140
+ cls._live = None
141
+
142
+ @classmethod
143
+ def concat_progress(cls, total: int):
144
+ """Create concat progress bat"""
145
+ progress = Progress(
146
+ SpinnerColumn(),
147
+ TextColumn("[bold cyan]{task.description}"),
148
+ BarColumn(),
149
+ TextColumn("{task.completed}/{task.total}"),
150
+ TimeRemainingColumn(),
151
+ console=cls.console,
152
+ transient=False, # remove the bar after completion
153
+ )
154
+
155
+ cls._concat_progress_text = Text("Waiting...", style="Cyan")
156
+ group = Group(progress, cls._concat_progress_text)
157
+ cls._live = Live(group, console=cls.console)
158
+ cls._live.__enter__()
159
+
160
+ task = progress.add_task("Concatenating videos", total=total)
161
+ return progress, task
162
+
163
+ @classmethod
164
+ def set_current_concat_progress(cls, msg: str, filename: str):
165
+ """Update video download output
166
+ args:
167
+ creator_info: str = 'creator_id@(service)'
168
+ filename: str = video.relative_path
169
+ """
170
+ if cls._concat_progress_text is None:
171
+ return
172
+ cls._concat_progress_text.plain = ""
173
+ cls._concat_progress_text.append(f"{msg} -> ", style="Cyan")
174
+ cls._concat_progress_text.append(filename, style="green")
175
+
176
+ @classmethod
177
+ def close_concat_progress(cls):
178
+ """Close video progress"""
179
+ if cls._live:
180
+ cls._live.__exit__(None, None, None)
181
+ cls._live = None
182
+
183
+ @classmethod
184
+ def progress_total_concat(cls):
185
+ progress = Progress(
186
+ SpinnerColumn(),
187
+ TextColumn("[progress.description]{task.description}"),
188
+ BarColumn(),
189
+ TextColumn("{task.completed}/{task.total}"),
190
+ console=cls.console,
191
+ transient=False, # remove progress bar after finish
192
+ )
193
+ return progress
@@ -0,0 +1,37 @@
1
+ # scripts/migrage_creators_json_txt.py
2
+
3
+ import os
4
+ import json
5
+
6
+ from rcdl.core.models import Creator
7
+ from rcdl.core.config import Config
8
+ from rcdl.core.parser import get_domain, append_creator
9
+
10
+ JSON_PATH = Config.CACHE_DIR / "creators.json"
11
+
12
+ # check file exist
13
+ if not os.path.exists(JSON_PATH):
14
+ print("creators.json deoes not exist. Check")
15
+
16
+ # load file
17
+ with open(JSON_PATH, "r") as f:
18
+ json_creators = json.load(f)
19
+
20
+ # convert to Creator
21
+ creators = []
22
+ for json_creator in json_creators:
23
+ creators.append(
24
+ Creator(
25
+ creator_id=json_creator["creator_id"],
26
+ service=json_creator["service"],
27
+ domain=get_domain(json_creator["service"]),
28
+ status=None,
29
+ )
30
+ )
31
+
32
+ # save creator
33
+ for c in creators:
34
+ append_creator(c)
35
+ print(f"Saved new creator: {c.service}/{c.creator_id}")
36
+
37
+ print(f"You can now delete {JSON_PATH}")