rcdl 2.2.2__py3-none-any.whl → 3.0.0b13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rcdl/__init__.py +5 -0
- rcdl/__main__.py +15 -3
- rcdl/core/__init__.py +0 -0
- rcdl/core/adapters.py +241 -0
- rcdl/core/api.py +31 -9
- rcdl/core/config.py +133 -14
- rcdl/core/db.py +239 -191
- rcdl/core/db_queries.py +75 -44
- rcdl/core/downloader.py +184 -142
- rcdl/core/downloader_subprocess.py +257 -85
- rcdl/core/file_io.py +13 -6
- rcdl/core/fuse.py +115 -106
- rcdl/core/models.py +83 -34
- rcdl/core/opti.py +90 -0
- rcdl/core/parser.py +80 -78
- rcdl/gui/__init__.py +0 -0
- rcdl/gui/__main__.py +5 -0
- rcdl/gui/db_viewer.py +41 -0
- rcdl/gui/gui.py +54 -0
- rcdl/gui/video_manager.py +170 -0
- rcdl/interface/__init__.py +0 -0
- rcdl/interface/cli.py +100 -20
- rcdl/interface/ui.py +105 -116
- rcdl/utils.py +163 -5
- {rcdl-2.2.2.dist-info → rcdl-3.0.0b13.dist-info}/METADATA +48 -15
- rcdl-3.0.0b13.dist-info/RECORD +28 -0
- rcdl/scripts/migrate_creators_json_txt.py +0 -37
- rcdl/scripts/migrate_old_format_to_db.py +0 -188
- rcdl/scripts/upload_pypi.py +0 -98
- rcdl-2.2.2.dist-info/RECORD +0 -22
- {rcdl-2.2.2.dist-info → rcdl-3.0.0b13.dist-info}/WHEEL +0 -0
- {rcdl-2.2.2.dist-info → rcdl-3.0.0b13.dist-info}/entry_points.txt +0 -0
rcdl/core/downloader.py
CHANGED
|
@@ -1,18 +1,32 @@
|
|
|
1
1
|
# core/downloader.py
|
|
2
2
|
|
|
3
|
+
"""
|
|
4
|
+
Handle post, media download to website
|
|
5
|
+
"""
|
|
6
|
+
|
|
3
7
|
import logging
|
|
4
8
|
import os
|
|
9
|
+
import json
|
|
5
10
|
|
|
6
11
|
import requests
|
|
7
12
|
|
|
8
|
-
|
|
9
|
-
from .
|
|
10
|
-
from .
|
|
11
|
-
from .
|
|
12
|
-
from .
|
|
13
|
-
from .
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
from rcdl.interface.ui import UI, NestedProgress
|
|
14
|
+
from rcdl.core import parser
|
|
15
|
+
from rcdl.core import adapters
|
|
16
|
+
from rcdl.core.api import URL
|
|
17
|
+
from rcdl.core.config import Config
|
|
18
|
+
from rcdl.core.models import (
|
|
19
|
+
Creator,
|
|
20
|
+
Status,
|
|
21
|
+
Media,
|
|
22
|
+
Post,
|
|
23
|
+
FusedMedia,
|
|
24
|
+
FusedStatus,
|
|
25
|
+
)
|
|
26
|
+
from rcdl.core.db import DB
|
|
27
|
+
from rcdl.core.downloader_subprocess import ytdlp_subprocess
|
|
28
|
+
from rcdl.core.file_io import write_json, load_json
|
|
29
|
+
from rcdl.utils import get_date_now, get_media_metadata
|
|
16
30
|
|
|
17
31
|
|
|
18
32
|
class PostsFetcher:
|
|
@@ -33,14 +47,18 @@ class PostsFetcher:
|
|
|
33
47
|
|
|
34
48
|
def _request_page(self, url: str) -> requests.Response:
|
|
35
49
|
"""Request a single page and return json dict"""
|
|
36
|
-
logging.info(
|
|
50
|
+
logging.info("RequestEngine url %s", url)
|
|
37
51
|
headers = URL.get_headers()
|
|
38
|
-
response = requests.get(url, headers=headers)
|
|
52
|
+
response = requests.get(url, headers=headers, timeout=Config.TIMEOUT)
|
|
39
53
|
if response.status_code != 200:
|
|
40
|
-
logging.warning(
|
|
54
|
+
logging.warning("Failed request %s: %s", url, response.status_code)
|
|
41
55
|
return response
|
|
42
56
|
|
|
43
|
-
def request(self, params: dict =
|
|
57
|
+
def request(self, params: dict | None = None):
|
|
58
|
+
"""Request multiple page of an url"""
|
|
59
|
+
if params is None:
|
|
60
|
+
params = {}
|
|
61
|
+
|
|
44
62
|
with UI.progress_posts_fetcher(self.max_page) as progress:
|
|
45
63
|
task = progress.add_task("Fetching posts", total=self.max_page)
|
|
46
64
|
|
|
@@ -53,7 +71,7 @@ class PostsFetcher:
|
|
|
53
71
|
# Dry run: not request acutally made
|
|
54
72
|
if Config.DRY_RUN:
|
|
55
73
|
logging.debug(
|
|
56
|
-
|
|
74
|
+
"DRY-RUN posts fetcher %s -> %s", url, self.json_path
|
|
57
75
|
)
|
|
58
76
|
self.page += 1
|
|
59
77
|
continue
|
|
@@ -61,13 +79,15 @@ class PostsFetcher:
|
|
|
61
79
|
response = self._request_page(url)
|
|
62
80
|
self.status = response.status_code
|
|
63
81
|
|
|
64
|
-
# if the programm crash while doing requests,
|
|
82
|
+
# if the programm crash while doing requests,
|
|
83
|
+
# previous requests are still saved and not overwritten.
|
|
65
84
|
if self.page > 0:
|
|
66
85
|
json_data = list(load_json(self.json_path))
|
|
67
86
|
else:
|
|
68
87
|
json_data = []
|
|
69
88
|
|
|
70
|
-
# for discover command, response json is in a
|
|
89
|
+
# for discover command, response json is in a
|
|
90
|
+
# different format and contains 'posts'
|
|
71
91
|
if self.status == 200:
|
|
72
92
|
if "posts" in response.json():
|
|
73
93
|
json_data.extend(response.json()["posts"])
|
|
@@ -79,70 +99,116 @@ class PostsFetcher:
|
|
|
79
99
|
progress.update(
|
|
80
100
|
task,
|
|
81
101
|
advance=1,
|
|
82
|
-
description=
|
|
102
|
+
description=(
|
|
103
|
+
f"Fetched {len(json_data)}"
|
|
104
|
+
f" posts (page {self.page + 1}/{self.max_page})"
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
except requests.RequestException as e:
|
|
108
|
+
logging.error(
|
|
109
|
+
"Failed to request %s (page: %s) deu to: %s", url, self.page, e
|
|
110
|
+
)
|
|
111
|
+
except json.JSONDecodeError as e:
|
|
112
|
+
logging.error(
|
|
113
|
+
"Failed to decode JSON response of request %s due to: %s",
|
|
114
|
+
url,
|
|
115
|
+
e,
|
|
83
116
|
)
|
|
84
|
-
except Exception as e:
|
|
85
|
-
logging.error(f"Error in request {url} p{self.page}: {e}")
|
|
86
117
|
finally:
|
|
87
118
|
self.page += 1
|
|
88
119
|
|
|
89
120
|
|
|
90
|
-
class
|
|
91
|
-
"""Handle downloading a list of
|
|
121
|
+
class MediaDownloader:
|
|
122
|
+
"""Handle downloading a list of media and update DB status"""
|
|
92
123
|
|
|
93
124
|
def __init__(self):
|
|
94
125
|
pass
|
|
95
126
|
|
|
96
|
-
def _build_url(self,
|
|
97
|
-
|
|
127
|
+
def _build_url(self, domain: str, url: str):
|
|
128
|
+
"""Return full url"""
|
|
129
|
+
return URL.get_url_from_file(domain, url)
|
|
98
130
|
|
|
99
|
-
def
|
|
100
|
-
|
|
101
|
-
|
|
131
|
+
def _build_full_path(self, user: str, media_path: str):
|
|
132
|
+
"""Return full path"""
|
|
133
|
+
return os.path.join(Config.creator_folder(user), media_path)
|
|
102
134
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
)
|
|
135
|
+
def _media_exist(self, full_path: str):
|
|
136
|
+
"""Check a file exist"""
|
|
137
|
+
return os.path.exists(full_path)
|
|
106
138
|
|
|
107
|
-
def
|
|
108
|
-
|
|
109
|
-
if result == 0:
|
|
110
|
-
d.set_status(video, VideoStatus.DOWNLOADED, fail_count=0)
|
|
111
|
-
else:
|
|
112
|
-
d.set_status(video, VideoStatus.FAILED, fail_count=video.fail_count + 1)
|
|
139
|
+
def _update_db(self, result: int, media: Media, full_path: str):
|
|
140
|
+
"""Update db information"""
|
|
113
141
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
f"{video.creator_id}@({video.service})", video.relative_path
|
|
125
|
-
)
|
|
142
|
+
# video failed to download
|
|
143
|
+
if result != 0:
|
|
144
|
+
media.fail_count += 1
|
|
145
|
+
else:
|
|
146
|
+
duration, file_size, checksum = get_media_metadata(full_path)
|
|
147
|
+
media.duration = duration
|
|
148
|
+
media.status = Status.DOWNLOADED
|
|
149
|
+
media.checksum = checksum
|
|
150
|
+
media.created_at = get_date_now()
|
|
151
|
+
media.file_size = file_size
|
|
126
152
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
153
|
+
with DB() as db:
|
|
154
|
+
db.update_media(media)
|
|
155
|
+
|
|
156
|
+
def download(self, medias: list[Media], max_fail_count: int | None = None):
|
|
157
|
+
"""Download all medias in media with PENDING stats"""
|
|
158
|
+
# init progress bar
|
|
159
|
+
progress = NestedProgress(UI.console)
|
|
160
|
+
progress.start(
|
|
161
|
+
total=len(medias),
|
|
162
|
+
total_label="Downloading videos",
|
|
163
|
+
current_label="Current video",
|
|
164
|
+
)
|
|
131
165
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
166
|
+
max_try = Config.MAX_FAIL_COUNT
|
|
167
|
+
if max_fail_count is not None:
|
|
168
|
+
max_try = max_fail_count
|
|
169
|
+
for media in medias:
|
|
170
|
+
progress.start_current("Downloading", total=2)
|
|
171
|
+
if media.fail_count > max_try:
|
|
172
|
+
UI.warning(
|
|
173
|
+
f"Video skipped due to too many failed download attempt ({media.fail_count})"
|
|
174
|
+
)
|
|
175
|
+
progress.advance_total()
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# match post info from db with post_id to get user/creator_id
|
|
179
|
+
with DB() as db:
|
|
180
|
+
post = db.query_post_by_id(media.post_id)
|
|
181
|
+
if post is None:
|
|
182
|
+
UI.error(f"Could not match media post_id {media.post_id} with a post")
|
|
183
|
+
progress.advance_total()
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
# build full url and full path
|
|
187
|
+
url = self._build_url(post.domain, media.url)
|
|
188
|
+
full_path = self._build_full_path(post.user, media.file_path)
|
|
189
|
+
|
|
190
|
+
# update progress bar info (video in download info)
|
|
191
|
+
progress.set_status(f"{post.user}@({post.service}) -> ", media.file_path)
|
|
192
|
+
|
|
193
|
+
# check video does not already exist
|
|
194
|
+
if self._media_exist(full_path):
|
|
195
|
+
UI.warning(
|
|
196
|
+
f"Video {url} @ {full_path} already exists. Possible DB problem"
|
|
197
|
+
)
|
|
198
|
+
self._update_db(0, media, full_path)
|
|
199
|
+
progress.advance_total()
|
|
200
|
+
continue
|
|
138
201
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
202
|
+
# dry run: no actual download, skippe rest of fn
|
|
203
|
+
if Config.DRY_RUN:
|
|
204
|
+
UI.debug(f"(dry-run) dl {post.user}@{full_path} from {url}")
|
|
205
|
+
progress.advance_total()
|
|
206
|
+
continue
|
|
142
207
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
208
|
+
result = ytdlp_subprocess(url, full_path)
|
|
209
|
+
self._update_db(result, media, full_path)
|
|
210
|
+
progress.advance_total()
|
|
211
|
+
progress.close()
|
|
146
212
|
|
|
147
213
|
|
|
148
214
|
def fetch_posts_by_tag(tag: str, max_page: int = Config.DEFAULT_MAX_PAGE) -> dict:
|
|
@@ -158,108 +224,84 @@ def fetch_posts_by_tag(tag: str, max_page: int = Config.DEFAULT_MAX_PAGE) -> dic
|
|
|
158
224
|
def fetch_posts_by_creator(creator: Creator) -> dict:
|
|
159
225
|
"""Helper function to get all posts from a creator"""
|
|
160
226
|
url = URL.get_creator_post_wo_param(creator)
|
|
161
|
-
path = Config.cache_file(f"{creator.
|
|
227
|
+
path = Config.cache_file(f"{creator.id}_{creator.service}")
|
|
162
228
|
pf = PostsFetcher(url, str(path))
|
|
163
229
|
pf.request()
|
|
164
230
|
|
|
165
231
|
return load_json(path)
|
|
166
232
|
|
|
167
233
|
|
|
234
|
+
def get_fuses_from_post(posts: list[Post]) -> list[FusedMedia]:
|
|
235
|
+
"""Update data on fuses database table for video to be fused"""
|
|
236
|
+
fuses: list[FusedMedia] = []
|
|
237
|
+
for post in posts:
|
|
238
|
+
json_post = json.loads(post.raw_json)
|
|
239
|
+
total_parts = len(parser.extract_video_urls(json_post))
|
|
240
|
+
if total_parts > 1:
|
|
241
|
+
fuses.append(
|
|
242
|
+
FusedMedia(
|
|
243
|
+
id=post.id,
|
|
244
|
+
duration=0,
|
|
245
|
+
total_parts=total_parts,
|
|
246
|
+
status=FusedStatus.PENDING,
|
|
247
|
+
checksum="",
|
|
248
|
+
file_path=parser.get_filename_fuse(post),
|
|
249
|
+
created_at="",
|
|
250
|
+
updated_at="",
|
|
251
|
+
file_size=0,
|
|
252
|
+
fail_count=0,
|
|
253
|
+
)
|
|
254
|
+
)
|
|
255
|
+
return fuses
|
|
256
|
+
|
|
257
|
+
|
|
168
258
|
def refresh_creators_videos():
|
|
169
259
|
"""
|
|
170
|
-
Command refresh
|
|
171
260
|
For each creator:
|
|
172
|
-
- get
|
|
173
|
-
-
|
|
174
|
-
-
|
|
175
|
-
- update the DB
|
|
261
|
+
- get posts with videos & update posts DB
|
|
262
|
+
- extract all medias & update medias DB
|
|
263
|
+
- extract fuses group & update fuses DB
|
|
176
264
|
"""
|
|
177
265
|
creators = parser.get_creators()
|
|
178
266
|
for creator in creators:
|
|
179
|
-
UI.info(f"Creator {creator.
|
|
267
|
+
UI.info(f"Creator {creator.id} from {creator.service}")
|
|
180
268
|
|
|
269
|
+
# request all posts by creator
|
|
181
270
|
fetch_posts_by_creator(creator)
|
|
271
|
+
|
|
272
|
+
# only keep posts with video url (mp4, m4v, ...)
|
|
182
273
|
posts_with_videos = parser.filter_posts_with_videos_from_json(
|
|
183
|
-
str(Config.cache_file(f"{creator.
|
|
274
|
+
str(Config.cache_file(f"{creator.id}_{creator.service}"))
|
|
184
275
|
)
|
|
185
|
-
all_videos = parser.convert_posts_to_videos(posts_with_videos)
|
|
186
276
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
)
|
|
277
|
+
# convert all json dict into Post model
|
|
278
|
+
posts = adapters.json_posts_to_posts(posts_with_videos)
|
|
190
279
|
|
|
191
|
-
#
|
|
280
|
+
# insert posts in db
|
|
192
281
|
with DB() as db:
|
|
193
|
-
db.
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def download_videos_to_be_dl():
|
|
197
|
-
"""
|
|
198
|
-
Command dlsf
|
|
199
|
-
Download videos in db with status TO_BE_DOWNLOADED OR (FAILED & fail_count < Config.)
|
|
200
|
-
"""
|
|
201
|
-
with DB() as db:
|
|
202
|
-
videos = db.query_videos(pending=True)
|
|
203
|
-
|
|
204
|
-
vd = VideoDownloader()
|
|
205
|
-
vd.downloads(videos, write_db=True, discover=False)
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
# --- --- --- --- --- DISCOVER --- --- --- --- ---
|
|
209
|
-
def discover(tag: str, max_page: int):
|
|
210
|
-
discover_creators(tag, max_page)
|
|
211
|
-
dl_video_from_discover_creators()
|
|
282
|
+
db.insert_posts(posts)
|
|
212
283
|
|
|
284
|
+
# find all multiple part videos and update db
|
|
285
|
+
fuses = get_fuses_from_post(posts)
|
|
286
|
+
with DB() as db:
|
|
287
|
+
db.insert_fused_media(fuses)
|
|
213
288
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
path = str(Config.cache_file(tag))
|
|
220
|
-
posts_with_videos = parser.filter_posts_with_videos_from_json(path)
|
|
221
|
-
logging.info(f"Find {len(posts_with_videos)} posts with videos")
|
|
222
|
-
|
|
223
|
-
creators = parser.get_creators_from_posts(posts_with_videos)
|
|
224
|
-
|
|
225
|
-
# save to csv
|
|
226
|
-
file = os.path.join(Config.DISCOVER_DIR, "discover.csv")
|
|
227
|
-
with open(file, "w") as f:
|
|
228
|
-
for c in creators:
|
|
229
|
-
line = f"{c.creator_id};{c.service};{c.domain};{'to_be_treated'}\n"
|
|
230
|
-
f.write(line)
|
|
231
|
-
|
|
289
|
+
# convert all posts into videos
|
|
290
|
+
medias = []
|
|
291
|
+
for post in posts:
|
|
292
|
+
medias.extend(adapters.post_to_videos(post))
|
|
232
293
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
with open(file, "r") as f:
|
|
237
|
-
lines = f.readlines()
|
|
294
|
+
# insert videos in db
|
|
295
|
+
with DB() as db:
|
|
296
|
+
db.insert_medias(medias)
|
|
238
297
|
|
|
239
|
-
creators = []
|
|
240
|
-
for line in lines:
|
|
241
|
-
line = line.replace("\n", "").strip().split(";")
|
|
242
|
-
creators.append(
|
|
243
|
-
Creator(creator_id=line[0], service=line[1], domain=line[2], status=line[3])
|
|
244
|
-
)
|
|
245
298
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
print(f"ERROR - Request {URL.get_creator_post_wo_param(creator)}")
|
|
253
|
-
response_posts = response.json()
|
|
254
|
-
posts = parser.filter_posts_with_videos_from_list(response_posts)
|
|
255
|
-
print(f"{len(posts)} found")
|
|
256
|
-
if len(posts) > 5:
|
|
257
|
-
posts = posts[0:5]
|
|
258
|
-
print("Limited posts to 5")
|
|
299
|
+
def download_videos_to_be_dl(max_fail_count: int | None):
|
|
300
|
+
"""
|
|
301
|
+
Download all media with PENDING status in DB
|
|
302
|
+
"""
|
|
303
|
+
with DB() as db:
|
|
304
|
+
medias = db.query_media_by_status(Status.PENDING)
|
|
259
305
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
url = URL.get_url_from_file(creator.domain, urls[0])
|
|
263
|
-
filename = f"{post['user']}_{post['id']}.mp4"
|
|
264
|
-
filepath = os.path.join(Config.DISCOVER_DIR, filename)
|
|
265
|
-
ytdlp_subprocess(url, filepath)
|
|
306
|
+
media_downloader = MediaDownloader()
|
|
307
|
+
media_downloader.download(medias, max_fail_count=max_fail_count)
|