rcdl 2.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rcdl/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ # __init__.py
2
+
3
+ from importlib.metadata import version
4
+
5
+ __version__ = version("rcdl")
rcdl/__main__.py ADDED
@@ -0,0 +1,25 @@
1
+ # __main__.py
2
+
3
+ import logging
4
+
5
+ from rcdl.core.config import Config, setup_logging
6
+
7
+ # setup file structure
8
+ Config.ensure_dirs()
9
+ Config.ensure_files()
10
+
11
+ # setup logging
12
+ setup_logging(Config.LOG_FILE, level=0)
13
+
14
+ logging.info("--- INIT ---")
15
+ logging.info("Logger initialized")
16
+
17
+ # init database
18
+ from rcdl.core.db import DB # noqa: E402
19
+
20
+ db = DB()
21
+ db.init_table()
22
+ logging.info(f"DB version: {db.get_schema_version()}")
23
+ db.close()
24
+
25
+ from rcdl.interface.cli import cli # noqa: E402, F401
rcdl/core/api.py ADDED
@@ -0,0 +1,54 @@
1
+ # core/api.py
2
+
3
+ from .models import Creator
4
+
5
+
6
+ class URL:
7
+ DOMAINS_BASE_URL = {
8
+ "coomer": "https://coomer.st/api/v1/",
9
+ "kemono": "https://kemono.cr/api/v1/",
10
+ }
11
+
12
+ @staticmethod
13
+ def get_base_url(domain: str) -> str:
14
+ if domain not in URL.DOMAINS_BASE_URL:
15
+ raise KeyError(f"{domain} not in known domains urls")
16
+ return URL.DOMAINS_BASE_URL[domain]
17
+
18
+ @staticmethod
19
+ def get_post_revision(creator: Creator, post_id) -> str:
20
+ return f"{URL.get_base_url(creator.domain)}{creator.service}/user/{creator.creator_id}/post/{post_id}/revisions"
21
+
22
+ @staticmethod
23
+ def get_headers() -> dict:
24
+ return {
25
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0 Safari/537.36",
26
+ "Accept": "text/css",
27
+ }
28
+
29
+ @staticmethod
30
+ def get_url_from_file(domain: str, path_url: str):
31
+ if domain == "coomer":
32
+ return f"https://coomer.st{path_url}"
33
+ elif domain == "kemono":
34
+ return f"https://kemono.cr{path_url}"
35
+ else:
36
+ raise ValueError(
37
+ f"Domain {domain} is not an accepted value/does not exist. Please check your creators.json file"
38
+ )
39
+
40
+ @staticmethod
41
+ def add_params(url: str, params: dict):
42
+ url += "?"
43
+ for key in params:
44
+ url += f"{key}={params[key]}&"
45
+ return url[:-1]
46
+
47
+ @staticmethod
48
+ def get_creator_post_wo_param(creator: Creator) -> str:
49
+ return f"{URL.get_base_url(creator.domain)}{creator.service}/user/{creator.creator_id}/posts"
50
+
51
+ @staticmethod
52
+ def get_posts_page_url_wo_param():
53
+ domain = URL.DOMAINS_BASE_URL["coomer"]
54
+ return f"{domain}posts"
rcdl/core/config.py ADDED
@@ -0,0 +1,93 @@
1
+ # core/config.py
2
+
3
+ from pathlib import Path
4
+ import logging
5
+ import os
6
+
7
+ from .file_io import write_txt
8
+
9
+
10
+ class Config:
11
+ # paths
12
+ APP_NAME = "rcdl"
13
+
14
+ BASE_DIR = Path(os.environ.get("RCDL_BASE_DIR", Path.home() / "Videos/rcdl"))
15
+
16
+ CACHE_DIR = BASE_DIR / ".cache"
17
+ DB_PATH = CACHE_DIR / "cdl.db"
18
+ LOG_FILE = CACHE_DIR / "cdl.log"
19
+ FUSE_CSV_FILE = CACHE_DIR / "cdl_fuse.csv"
20
+ CREATORS_FILE = CACHE_DIR / "creators.txt"
21
+ DISCOVER_DIR = CACHE_DIR / "discover"
22
+
23
+ # default creators
24
+ DEFAULT_CREATORS = ["boixd/onlyfans"]
25
+
26
+ DEBUG = False
27
+ DRY_RUN = False
28
+
29
+ # api settings
30
+ POST_PER_PAGE = 50
31
+ DEFAULT_MAX_PAGE = 10
32
+ MAX_FAIL_COUNT = 7
33
+
34
+ @classmethod
35
+ def ensure_dirs(cls):
36
+ cls.CACHE_DIR.mkdir(parents=True, exist_ok=True)
37
+ cls.DISCOVER_DIR.mkdir(exist_ok=True)
38
+
39
+ @classmethod
40
+ def ensure_files(cls):
41
+ files = [
42
+ cls.DB_PATH,
43
+ cls.FUSE_CSV_FILE,
44
+ cls.CREATORS_FILE,
45
+ ]
46
+ for file in files:
47
+ if not file.exists():
48
+ file.touch()
49
+ logging.info("Created file %s", file)
50
+ if file == cls.CREATORS_FILE:
51
+ write_txt(cls.CREATORS_FILE, cls.DEFAULT_CREATORS, mode="w")
52
+
53
+ @classmethod
54
+ def creator_folder(cls, creator_id: str) -> Path:
55
+ folder = cls.BASE_DIR / creator_id
56
+ folder.mkdir(exist_ok=True)
57
+ return folder
58
+
59
+ @classmethod
60
+ def cache_file(cls, filename: str, ext: str = ".json") -> Path:
61
+ file_name = filename + ext
62
+ file = cls.CACHE_DIR / file_name
63
+ return file
64
+
65
+ @classmethod
66
+ def set_debug(cls, debug: bool):
67
+ cls.DEBUG = debug
68
+
69
+ @classmethod
70
+ def set_dry_run(cls, dry_run: bool):
71
+ cls.DRY_RUN = dry_run
72
+
73
+
74
+ def setup_logging(log_file: Path, level: int = 0):
75
+ logger = logging.getLogger()
76
+ logger.setLevel(level)
77
+ logger.handlers.clear() # avoid double handlers if called multiple times
78
+
79
+ # loggin format & file handler
80
+ file_handler = logging.FileHandler(log_file, encoding="utf-8", mode="a")
81
+ file_handler.setFormatter(
82
+ logging.Formatter(
83
+ "{asctime} - {levelname} - {message}",
84
+ style="{",
85
+ datefmt="%Y-%m-%d %H:%M:%S",
86
+ )
87
+ )
88
+ logger.addHandler(file_handler)
89
+
90
+ # log library warning/errors
91
+ stream = logging.StreamHandler()
92
+ stream.setLevel(logging.ERROR) # only show warnings/errors from libraries
93
+ logger.addHandler(stream)
rcdl/core/db.py ADDED
@@ -0,0 +1,235 @@
1
+ # core/db.py
2
+
3
+ """Handle SQL DB and DB Parsing"""
4
+
5
+ import sqlite3
6
+ import logging
7
+ from typing import Iterable
8
+
9
+
10
+ import rcdl.core.db_queries as queries
11
+ from .config import Config
12
+ from .models import Video, VideoStatus
13
+
14
+
15
+ class DB:
16
+ def __init__(self):
17
+ self.conn = sqlite3.connect(Config.DB_PATH)
18
+ self.conn.row_factory = sqlite3.Row
19
+ self.conn.execute("PRAGMA foreign_keys = ON") # check input into db
20
+ self.conn.execute("PRAGMA journal_mode = WAL") #
21
+ self.conn.execute("PRAGMA synchronous = NORMAL") # faster write speed
22
+
23
+ def __enter__(self):
24
+ return self
25
+
26
+ def __exit__(self, exc_type, exc_value, traceback):
27
+ self.close()
28
+
29
+ def init_table(self):
30
+ # init table for videos to DL
31
+ self.conn.execute(queries.CREATE_VIDEOS_TABLE)
32
+ self.conn.execute(queries.CREATE_IDX_VIDEOS_STATUS)
33
+ self.conn.execute(queries.CREATE_IDX_VIDEOS_CREATOR)
34
+ self.conn.execute(queries.CREATE_IDX_VIDEOS_FAIL_COUNT)
35
+
36
+ # init table for version schem, easy migration if necessary
37
+ self.conn.execute(queries.CREATE_SCHEMA_VERSION_TABLE)
38
+
39
+ # Initialize version if empty
40
+ cur = self.conn.cursor()
41
+ cur.execute("SELECT COUNT(*) as cnt FROM schema_version")
42
+ row = cur.fetchone()
43
+ if row["cnt"] == 0:
44
+ cur.execute("INSERT INTO schema_version (version) VALUES (?)", (1,))
45
+
46
+ self.conn.commit()
47
+
48
+ def get_schema_version(self) -> int:
49
+ # get current db version -> for future update to db
50
+ cur = self.conn.cursor()
51
+ cur.execute("SELECT version FROM schema_version")
52
+ row = cur.fetchone()
53
+ return row["version"] if row else 0
54
+
55
+ def set_schema_version(self, version: int):
56
+ # set current db version
57
+ self.conn.execute("UPDATE schema_version SET version = ?", (version,))
58
+ self.conn.commit()
59
+
60
+ def _row_to_video(self, row: sqlite3.Row) -> Video:
61
+ # helper function to retunr Video model from a sql result
62
+ return Video(
63
+ post_id=row["post_id"],
64
+ creator_id=row["creator_id"],
65
+ service=row["service"],
66
+ domain=row["domain"],
67
+ relative_path=row["relative_path"],
68
+ url=row["url"],
69
+ part=row["part"],
70
+ status=VideoStatus(row["status"]),
71
+ fail_count=row["fail_count"],
72
+ published=row["published"],
73
+ title=row["title"],
74
+ substring=row["substring"],
75
+ downloaded_at=row["downloaded_at"],
76
+ file_size=row["file_size"],
77
+ )
78
+
79
+ def query_videos(
80
+ self,
81
+ *,
82
+ status: VideoStatus | Iterable[VideoStatus] | None = None,
83
+ creator_id: str | None = None,
84
+ post_id: str | None = None,
85
+ max_fail_count: int | None = None,
86
+ min_part_number: int | None = None,
87
+ pending: bool = False,
88
+ ) -> list[Video]:
89
+ """
90
+ Query the DB.
91
+ Parametes are 'AND' so if video status and creator_id are sepcified,
92
+ it will look for a match between the two
93
+
94
+ pending:
95
+ No parameters are taken into account if pending True.
96
+ Look for videos with: NOT DOWNLOADED status OR (FAILED & fail_count < Config.max_fail_count)
97
+ """
98
+ sql = "SELECT * FROM videos"
99
+ conditions = []
100
+ params = []
101
+
102
+ if pending:
103
+ sql += " WHERE status = ? OR (status = ? AND fail_count < ?)"
104
+ params.extend(
105
+ [
106
+ VideoStatus.NOT_DOWNLOADED.value,
107
+ VideoStatus.FAILED.value,
108
+ max_fail_count or Config.MAX_FAIL_COUNT,
109
+ ]
110
+ )
111
+ else:
112
+ if status is not None:
113
+ if isinstance(status, VideoStatus):
114
+ conditions.append("status = ?")
115
+ params.append(status.value)
116
+ else:
117
+ statuses = list(status)
118
+ placeholders = ",".join("?" for _ in statuses)
119
+ conditions.append(f"status IN ({placeholders})")
120
+ params.extend(s.value for s in statuses)
121
+
122
+ if creator_id is not None:
123
+ conditions.append("creator_id = ?")
124
+ params.append(creator_id)
125
+
126
+ if post_id is not None:
127
+ conditions.append("post_id = ?")
128
+ params.append(post_id)
129
+
130
+ if max_fail_count is not None:
131
+ conditions.append("fail_count < ?")
132
+ params.append(max_fail_count)
133
+
134
+ if min_part_number is not None:
135
+ conditions.append("part > ?")
136
+ params.append(min_part_number)
137
+
138
+ if conditions:
139
+ sql += " WHERE " + " AND ".join(conditions)
140
+
141
+ logging.debug(f"SQL CMD: {sql} with params: {params}")
142
+ cur = self.conn.cursor()
143
+ cur.execute(sql, params)
144
+ rows = cur.fetchall()
145
+ if Config.DEBUG:
146
+ logging.debug(f"DB query returned {len(rows)} result")
147
+
148
+ return [self._row_to_video(r) for r in rows]
149
+
150
+ def get_db_videos_info(self):
151
+ """Return number of videos per status
152
+ return info: dict {
153
+ "not_downloaded": int,
154
+ "failed": int,
155
+ etc...
156
+ }
157
+ """
158
+ info = {}
159
+ for status in VideoStatus:
160
+ vids = self.query_videos(status=status)
161
+ info[status.value] = len(vids)
162
+ return info
163
+
164
+ def set_status(
165
+ self, video: Video, status: VideoStatus, *, fail_count: int | None = None
166
+ ):
167
+ """Set video status to specified status"""
168
+ video.status = status
169
+ if fail_count is not None:
170
+ video.fail_count = fail_count
171
+ self._upsert_video(video)
172
+
173
+ def insert_videos(self, videos: list[Video]):
174
+ """
175
+ Insert a video if not already present. Else ignore.
176
+ Does not modify any values
177
+ """
178
+ if not videos:
179
+ return
180
+
181
+ rows = []
182
+ for video in videos:
183
+ rows.append(
184
+ (
185
+ video.post_id,
186
+ video.creator_id,
187
+ video.service,
188
+ video.domain,
189
+ video.relative_path,
190
+ video.url,
191
+ video.part,
192
+ VideoStatus.NOT_DOWNLOADED.value,
193
+ 0,
194
+ video.published,
195
+ video.title,
196
+ video.substring,
197
+ None,
198
+ None,
199
+ )
200
+ )
201
+
202
+ self.conn.executemany(queries.INSERT_IGNORE_VIDEO_UPSERT, rows)
203
+ self.conn.commit()
204
+
205
+ def _upsert_video(self, video: Video):
206
+ """Upsert a video.
207
+ If video already in DB, update specifics fields:
208
+ status, fail_count, relative path, file_size, downloaded_at
209
+ """
210
+ if video.status is None:
211
+ video.status = VideoStatus.NOT_DOWNLOADED
212
+
213
+ self.conn.execute(
214
+ queries.INSERT_VIDEO_UPSERT,
215
+ (
216
+ video.post_id,
217
+ video.creator_id,
218
+ video.service,
219
+ video.domain,
220
+ video.relative_path,
221
+ video.url,
222
+ video.part,
223
+ video.status.value,
224
+ video.fail_count,
225
+ video.published,
226
+ video.title,
227
+ video.substring,
228
+ video.downloaded_at,
229
+ video.file_size,
230
+ ),
231
+ )
232
+ self.conn.commit()
233
+
234
+ def close(self):
235
+ self.conn.close()
@@ -0,0 +1,66 @@
1
+ # core/db_queries.py
2
+
3
+ """
4
+ Hold all the SQL commands strings
5
+ """
6
+
7
+ CREATE_VIDEOS_TABLE = """
8
+ CREATE TABLE IF NOT EXISTS videos (
9
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
10
+ post_id TEXT,
11
+ creator_id TEXT,
12
+ service TEXT,
13
+ domain TEXT,
14
+ relative_path TEXT,
15
+ url TEXT,
16
+ part TEXT,
17
+ status TEXT DEFAULT 'not_downloaded',
18
+ fail_count INTEGER DEFAULT 0,
19
+ published TEXT,
20
+ title TEXT,
21
+ substring TEXT,
22
+ downloaded_at TEXT,
23
+ file_size REAL,
24
+ UNIQUE (service, url)
25
+ )
26
+ """
27
+
28
+ CREATE_SCHEMA_VERSION_TABLE = """
29
+ CREATE TABLE IF NOT EXISTS schema_version (
30
+ version INTEGER NOT NULL
31
+ )
32
+ """
33
+
34
+ INSERT_VIDEO_UPSERT = """
35
+ INSERT INTO videos (
36
+ post_id, creator_id, service, domain, relative_path, url, part,
37
+ status, fail_count, published, title, substring,
38
+ downloaded_at, file_size
39
+ )
40
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
41
+ ON CONFLICT(service, url) DO UPDATE SET
42
+ status = excluded.status,
43
+ fail_count = excluded.fail_count,
44
+ relative_path = excluded.relative_path,
45
+ downloaded_at = excluded.downloaded_at,
46
+ file_size = excluded.file_size
47
+ """
48
+
49
+ INSERT_IGNORE_VIDEO_UPSERT = """
50
+ INSERT OR IGNORE INTO videos (
51
+ post_id, creator_id, service, domain, relative_path, url, part,
52
+ status, fail_count, published, title, substring,
53
+ downloaded_at, file_size
54
+ )
55
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
56
+ """
57
+
58
+ CREATE_IDX_VIDEOS_STATUS = (
59
+ "CREATE INDEX IF NOT EXISTS idx_videos_status ON videos(status)"
60
+ )
61
+ CREATE_IDX_VIDEOS_CREATOR = (
62
+ "CREATE INDEX IF NOT EXISTS idx_videos_creator ON videos(creator_id)"
63
+ )
64
+ CREATE_IDX_VIDEOS_FAIL_COUNT = (
65
+ "CREATE INDEX IF NOT EXISTS idx_videos_fail_count ON videos(fail_count)"
66
+ )