misato 0.7.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
misato-0.7.9/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 MiyukiQAQ
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+
misato-0.7.9/PKG-INFO ADDED
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: misato
3
+ Version: 0.7.9
4
+ Requires-Python: >=3.9
5
+ License-File: LICENSE
6
+ Requires-Dist: curl_cffi
7
+ Dynamic: license-file
8
+ Dynamic: requires-dist
9
+ Dynamic: requires-python
misato-0.7.9/README.md ADDED
@@ -0,0 +1,126 @@
1
+ ## ⭐️ misato
2
+
3
+ A tool for downloading videos from the "MissAV" website.
4
+
5
+ ## ⚙️ Installation
6
+
7
+ To install misato from the Python Package Index (PyPI) run:
8
+
9
+ ```
10
+ pip install misato
11
+ ```
12
+
13
+ To upgrade misato from the Python Package Index (PyPI) run:
14
+
15
+ ```
16
+ pip install --upgrade misato
17
+ ```
18
+
19
+ ## 📷 Snapshot
20
+
21
+ ![snapshot.png](resources/readme_pics/snapshot.png)
22
+
23
+ ## 📖 Instructions
24
+
25
+ ```
26
+ [root@misato ~]# misato -h
27
+ usage: main.py [-h] [-auto [...]] [-auth [...]] [-limit] [-search] [-file] [-proxy] [-ffmpeg] [-cover] [-ffcover] [-noban] [-title] [-quality] [-retry] [-delay] [-timeout]
28
+
29
+ A tool for downloading videos from the "MissAV" website.
30
+
31
+ Main Options:
32
+ Use the -auto option to specify the video or playlist URLs to download. can be mixed.
33
+ Use the -auth option to specify the username and password to download the videos collected by the account.
34
+ Use the -search option to search for movie by serial number and download it.
35
+ Use the -file option to download video or playlist URLs in the file. ( Each line is a URL )
36
+
37
+ Additional Options:
38
+ Use the -limit option to limit the number of downloads.
39
+ Use the -proxy option to configure http proxy server ip and port.
40
+ Use the -ffmpeg option to get the best video quality. ( Recommend! )
41
+ Use the -cover option to save the cover when downloading the video
42
+ Use the -ffcover option to set the cover as the video preview (ffmpeg required)
43
+ Use the -noban option to turn off the misato banner when downloading the video
44
+ Use the -title option to use the full title as the movie file name
45
+ Use the -quality option to specify the movie resolution (360, 480, 720, 1080...)
46
+ Use the -retry option to specify the number of retries for downloading segments
47
+ Use the -delay option to specify the delay before retry ( seconds )
48
+ Use the -timeout option to specify the timeout for segment download ( seconds )
49
+
50
+ options:
51
+ -h, --help show this help message and exit
52
+ -auto [ ...] Multiple movie and playlist URLs can be mixed. separate with spaces
53
+ -auth [ ...] Username and password, separate with space
54
+ -limit Limit the number of downloads
55
+ -search Movie serial number
56
+ -file File path
57
+ -proxy HTTP(S) proxy
58
+ -ffmpeg Enable ffmpeg processing
59
+ -cover Download video cover
60
+ -ffcover Set cover as preview (ffmpeg required)
61
+ -noban Do not display the banner
62
+ -title Full title as file name
63
+ -quality Specify the movie resolution
64
+ -retry Number of retries for downloading segments
65
+ -delay Delay in seconds before retry
66
+ -timeout Timeout in seconds for segment download
67
+
68
+ Examples:
69
+ misato -auto "https://missav.ai/sw-950" "https://missav.ai/dm132/actresses/JULIA"
70
+ misato -auto "https://missav.ai/dm132/actresses/JULIA" -limit 20 -ffcover
71
+ misato -auto "https://missav.ai/sw-950" "https://missav.ai/dandy-917"
72
+ misato -auto "https://missav.ai/sw-950" -proxy localhost:7890
73
+ misato -auth misato@gmail.com miyukiQAQ -ffmpeg -noban -limit 20
74
+ misato -file /home/misato/url.txt -ffmpeg -title -limit 20
75
+ misato -search sw-950 -ffcover -quality 720
76
+ ```
77
+
78
+ ## 💬 The ```-auto``` option
79
+
80
+ - Use the -auto option to download movies from a playlist.
81
+ - This playlist can be a public playlist created by your own account, or any playlist displayed based on search results or tag filters.
82
+ - **You should wrap the playlist URL with " " when you use the -auto option.**
83
+
84
+ Command Examples:
85
+ - ```misato -auto "https://missav.ai/search/JULIA?filters=uncensored-leak&sort=saved" -limit 50 -ffmpeg```
86
+ - ```misato -auto "https://missav.ai/search/JULIA?filters=individual&sort=views" -limit 20 -ffmpeg```
87
+ - ```misato -auto "https://missav.ai/dm132/actresses/JULIA" -limit 20 -ffmpeg```
88
+ - ```misato -auto "https://missav.ai/playlists/ewzoukev" -limit 20 -ffmpeg```
89
+ - ```misato -auto "https://missav.ai/dm444/en/labels/WANZ" -limit 20 -ffmpeg```
90
+ - ```misato -auto "https://missav.ai/dm21/en/makers/Takara%20Visual" -limit 20 -ffmpeg```
91
+ - ```misato -auto "https://missav.ai/dm1/en/genres/4K" -limit 20 -ffmpeg```
92
+
93
+ ## 💡 Precautions
94
+
95
+ - If you are from an ancient oriental country, you will most likely need a proxy.
96
+ - Use ffmpeg to synthesize videos for the best experience.
97
+
98
+ ## 👀 About FFmpeg
99
+
100
+ 1. If you want misato to use ffmpeg to process the video, use the -ffmpeg option.
101
+ 2. Please check whether the ffmpeg command is valid before using the -ffmpeg option. (e.g. ```ffmpeg -version```)
102
+ 3. To install FFmpeg, please refer to https://ffmpeg.org/
103
+
104
+ ## 📄 Disclaimer
105
+
106
+ This project is licensed under the [MIT License](LICENSE). The following additional disclaimers and notices apply:
107
+
108
+ ### 1. Legal Compliance
109
+ - This software is provided solely for **communication, research, learning, and personal use**.
110
+ - Users are responsible for ensuring that their use of this software complies with all applicable laws and regulations in their jurisdiction.
111
+ - The software must not be used for any unlawful, unethical, or unauthorized purposes, including but not limited to violating third-party rights or legal restrictions.
112
+
113
+ ### 2. No Warranty
114
+ As stated in the MIT License:
115
+ > "THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT."
116
+
117
+ ### 3. Limitation of Liability
118
+ - The author(s) shall not be held liable for any claims, damages, or other liabilities arising from or in connection with the use or performance of this software.
119
+ - Users bear all risks and responsibilities for the use of this software, including but not limited to data loss, system damage, or legal consequences.
120
+
121
+ ### 4. Third-Party Dependencies
122
+ - This project may include or depend on third-party libraries or tools. Users are responsible for reviewing and complying with the licenses and terms of these dependencies.
123
+
124
+ ### 5. Security and Privacy
125
+ - This software may interact with user systems, networks, or data. Users should implement appropriate security measures to protect sensitive information and infrastructure.
126
+ - The authors are not responsible for any security vulnerabilities or data breaches resulting from the use of this software.
File without changes
@@ -0,0 +1,20 @@
1
+ RECORD_FILE = 'downloaded_urls_misato.txt'
2
+ FFMPEG_INPUT_FILE = 'ffmpeg_input_misato.txt'
3
+ TMP_HTML_FILE = 'tmp_movie_misato.html'
4
+ MOVIE_SAVE_PATH_ROOT = 'movies_folder_misato'
5
+ COVER_URL_PREFIX = 'https://fourhoi.com/'
6
+ VIDEO_M3U8_PREFIX = 'https://surrit.com/'
7
+ VIDEO_PLAYLIST_SUFFIX = '/playlist.m3u8'
8
+ HREF_REGEX_MOVIE_COLLECTION = r'<a class="text-secondary group-hover:text-primary" href="([^"]+)" alt="'
9
+ HREF_REGEX_PUBLIC_PLAYLIST = r'<a href="([^"]+)" alt="'
10
+ HREF_REGEX_NEXT_PAGE = r'<a href="([^"]+)" rel="next"'
11
+ MATCH_UUID_PATTERN = r'm3u8\|([a-f0-9\|]+)\|com\|surrit\|https\|video'
12
+ MATCH_TITLE_PATTERN = r'<title>([^"]+)</title>'
13
+ RESOLUTION_PATTERN = r'RESOLUTION=(\d+)x(\d+)'
14
+ MAGIC_NUMBER = 114514
15
+ RETRY = 5
16
+ DELAY = 2
17
+ TIMEOUT = 10
18
+ HEADERS = {
19
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
20
+ }
@@ -0,0 +1,24 @@
1
+ import subprocess
2
+ from typing import Optional
3
+ from misato.config import FFMPEG_INPUT_FILE
4
+ from misato.logger import logger
5
+
6
+
7
+ class FFmpegProcessor:
8
+ @staticmethod
9
+ def create_video_from_segments(segment_files: list[str], output_file: str, cover_file: Optional[str] = None) -> None:
10
+ with open(FFMPEG_INPUT_FILE, 'w') as f:
11
+ for file in segment_files:
12
+ f.write(f"file '{file}'\n")
13
+ ffmpeg_command = ['ffmpeg', '-y', '-loglevel', 'error', '-f', 'concat', '-safe', '0', '-i', FFMPEG_INPUT_FILE]
14
+ if cover_file:
15
+ ffmpeg_command.extend(['-i', cover_file, '-map', '0', '-map', '1', '-c', 'copy', '-disposition:v:1', 'attached_pic'])
16
+ else:
17
+ ffmpeg_command.extend(['-c', 'copy'])
18
+ ffmpeg_command.append(output_file)
19
+ try:
20
+ subprocess.run(ffmpeg_command, check=True, stdout=subprocess.DEVNULL)
21
+ logger.info("FFmpeg execution completed.")
22
+ except subprocess.CalledProcessError as e:
23
+ logger.error(f"FFmpeg execution failed: {e}")
24
+ raise
@@ -0,0 +1,29 @@
1
+ from typing import Optional
2
+ import time
3
+ from curl_cffi import requests
4
+ from misato.config import HEADERS, RETRY, DELAY, TIMEOUT
5
+ from misato.logger import logger
6
+
7
+
8
+ class HttpClient:
9
+ def get(self, url: str, cookies: Optional[dict] = None, retries: int = RETRY, delay: int = DELAY, timeout: int = TIMEOUT) -> Optional[bytes]:
10
+ for attempt in range(retries):
11
+ try:
12
+ response = requests.get(url=url, headers=HEADERS, cookies=cookies, timeout=timeout, verify=False)
13
+ return response.content
14
+ except Exception as e:
15
+ logger.error(f"Failed to fetch data (attempt {attempt + 1}/{retries}): {e} url is: {url}")
16
+ time.sleep(delay)
17
+ logger.error(f"Max retries reached. Failed to fetch data. url is: {url}")
18
+ return None
19
+
20
+ def post(self, url: str, data: dict, cookies: Optional[dict] = None, retries: int = RETRY, delay: int = DELAY, timeout: int = TIMEOUT) -> Optional[requests.Response]:
21
+ for attempt in range(retries):
22
+ try:
23
+ response = requests.post(url=url, data=data, headers=HEADERS, cookies=cookies, timeout=timeout, verify=False)
24
+ return response
25
+ except Exception as e:
26
+ logger.error(f"Failed to post data (attempt {attempt + 1}/{retries}): {e} url is: {url}")
27
+ time.sleep(delay)
28
+ logger.error(f"Max retries reached. Failed to post data. url is: {url}")
29
+ return None
@@ -0,0 +1,17 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger('misato-logger')
4
+ logger.setLevel(logging.DEBUG)
5
+
6
+ file_handler = logging.FileHandler('misato.log')
7
+ file_handler.setLevel(logging.DEBUG)
8
+
9
+ console_handler = logging.StreamHandler()
10
+ console_handler.setLevel(logging.DEBUG)
11
+
12
+ formatter = logging.Formatter('misato - %(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
13
+ file_handler.setFormatter(formatter)
14
+ console_handler.setFormatter(formatter)
15
+
16
+ logger.addHandler(file_handler)
17
+ logger.addHandler(console_handler)
@@ -0,0 +1,188 @@
1
+ import argparse
2
+ import os
3
+ import subprocess
4
+ from misato.logger import logger
5
+ from misato.config import MOVIE_SAVE_PATH_ROOT, RECORD_FILE, MAGIC_NUMBER
6
+ from misato.http_client import HttpClient
7
+ from misato.url_sources import SingleUrlSource, PlaylistSource, AuthSource, SearchSource, FileSource, AutoUrlSource
8
+ from misato.video_downloader import VideoDownloader
9
+ from misato.utils import delete_all_subfolders, ThreadSafeCounter
10
+
11
+ banner = """
12
+ ██████ ██████ ███ █████ ███
13
+ ░░██████ ██████ ░░░ ░░███ ░░░
14
+ ░███░█████░███ ████ █████ ████ █████ ████ ░███ █████ ████
15
+ ░███░░███ ░███ ░░███ ░░███ ░███ ░░███ ░███ ░███░░███ ░░███
16
+ ░███ ░░░ ░███ ░███ ░███ ░███ ░███ ░███ ░██████░ ░███
17
+ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ░███░░███ ░███
18
+ █████ █████ █████ ░░███████ ░░████████ ████ █████ █████
19
+ ░░░░░ ░░░░░ ░░░░░ ░░░░░███ ░░░░░░░░ ░░░░ ░░░░░ ░░░░░
20
+ ███ ░███
21
+ ░░██████
22
+ ░░░░░░
23
+ """
24
+
25
+
26
+ class DownloadTracker:
27
+ def __init__(self, record_file: str):
28
+ self.record_file = record_file
29
+ self.downloaded_urls = set()
30
+ if os.path.exists(record_file):
31
+ with open(record_file, 'r', encoding='utf-8') as f:
32
+ self.downloaded_urls.update(line.strip() for line in f)
33
+
34
+ def is_downloaded(self, url: str) -> bool:
35
+ return url in self.downloaded_urls
36
+
37
+ def record_download(self, url: str) -> None:
38
+ self.downloaded_urls.add(url)
39
+ with open(self.record_file, 'a', encoding='utf-8') as f:
40
+ f.write(url + '\n')
41
+
42
+
43
+ def check_ffmpeg_command(ffmpeg: bool) -> bool:
44
+ if not ffmpeg:
45
+ return True
46
+ try:
47
+ subprocess.run(['ffmpeg', '-version'], check=True, stdout=subprocess.DEVNULL)
48
+ return True
49
+ except Exception:
50
+ return False
51
+
52
+
53
+ def validate_args(args):
54
+ params = [args.auth, args.search, args.file, args.auto]
55
+ if sum(param is not None for param in params) != 1:
56
+ logger.error("Exactly one of -auto, -auth, -search, -file must be specified.")
57
+ exit(MAGIC_NUMBER)
58
+ if args.auth and len(args.auth) != 2:
59
+ logger.error("Auth requires username and password.")
60
+ exit(MAGIC_NUMBER)
61
+ if not check_ffmpeg_command(args.ffmpeg) or not check_ffmpeg_command(args.ffcover):
62
+ logger.error("FFmpeg command status error.")
63
+ exit(MAGIC_NUMBER)
64
+ for opt in ['limit', 'quality', 'retry', 'delay', 'timeout']:
65
+ value = getattr(args, opt)
66
+ if value and (not value.isdigit() or int(value) <= 0):
67
+ logger.error(f"The -{opt} option must be a positive integer.")
68
+ exit(MAGIC_NUMBER)
69
+ if args.file and (not os.path.isfile(args.file) or os.path.getsize(args.file) == 0):
70
+ logger.error("The -file option must be a valid non-empty file.")
71
+ exit(MAGIC_NUMBER)
72
+
73
+
74
+ def main():
75
+ parser = argparse.ArgumentParser(
76
+ description='A tool for downloading videos from the "MissAV" website.\n'
77
+ '\n'
78
+ 'Main Options:\n'
79
+ 'Use the -auto option to specify the video or playlist URLs to download. can be mixed.\n'
80
+ 'Use the -auth option to specify the username and password to download the videos collected by the account.\n'
81
+ 'Use the -search option to search for movie by serial number and download it.\n'
82
+ 'Use the -file option to download video or playlist URLs in the file. ( Each line is a URL )\n'
83
+ '\n'
84
+ 'Additional Options:\n'
85
+ 'Use the -limit option to limit the number of downloads. \n'
86
+ 'Use the -proxy option to configure http proxy server ip and port.\n'
87
+ 'Use the -ffmpeg option to get the best video quality. ( Recommend! )\n'
88
+ 'Use the -cover option to save the cover when downloading the video\n'
89
+ 'Use the -ffcover option to set the cover as the video preview (ffmpeg required)\n'
90
+ 'Use the -noban option to turn off the misato banner when downloading the video\n'
91
+ 'Use the -title option to use the full title as the movie file name\n'
92
+ 'Use the -quality option to specify the movie resolution (360, 480, 720, 1080...)\n'
93
+ 'Use the -retry option to specify the number of retries for downloading segments\n'
94
+ 'Use the -delay option to specify the delay before retry ( seconds )\n'
95
+ 'Use the -timeout option to specify the timeout for segment download ( seconds )\n',
96
+ epilog='Examples:\n'
97
+ ' misato -auto "https://missav.ai/sw-950" "https://missav.ai/dm132/actresses/JULIA"\n'
98
+ ' misato -auto "https://missav.ai/dm132/actresses/JULIA" -limit 20 -ffcover\n'
99
+ ' misato -auto "https://missav.ai/sw-950" "https://missav.ai/dandy-917"\n'
100
+ ' misato -auto "https://missav.ai/sw-950" -proxy localhost:7890\n'
101
+ ' misato -auth misato@gmail.com misatoQAQ -ffmpeg -noban -limit 20\n'
102
+ ' misato -file /home/misato/url.txt -ffmpeg -title -limit 20\n'
103
+ ' misato -search sw-950 -ffcover -quality 720\n',
104
+ formatter_class=argparse.RawTextHelpFormatter
105
+ )
106
+ parser.add_argument('-auto', nargs='+', metavar='', help='Multiple movie and playlist URLs can be mixed. separate with spaces')
107
+ parser.add_argument('-auth', nargs='+', metavar='', help='Username and password, separate with space')
108
+ parser.add_argument('-limit', type=str, metavar='', help='Limit the number of downloads')
109
+ parser.add_argument('-search', type=str, metavar='', help='Movie serial number')
110
+ parser.add_argument('-file', type=str, metavar='', help='File path')
111
+ parser.add_argument('-proxy', type=str, metavar='', help='HTTP(S) proxy')
112
+ parser.add_argument('-ffmpeg', action='store_true', help='Enable ffmpeg processing')
113
+ parser.add_argument('-cover', action='store_true', help='Download video cover')
114
+ parser.add_argument('-ffcover', action='store_true', help='Set cover as preview (ffmpeg required)')
115
+ parser.add_argument('-noban', action='store_true', help='Do not display the banner')
116
+ parser.add_argument('-title', action='store_true', help='Full title as file name')
117
+ parser.add_argument('-quality', type=str, metavar='', help='Specify the movie resolution')
118
+ parser.add_argument('-retry', type=str, metavar='', help='Number of retries for downloading segments')
119
+ parser.add_argument('-delay', type=str, metavar='', help='Delay in seconds before retry')
120
+ parser.add_argument('-timeout', type=str, metavar='', help='Timeout in seconds for segment download')
121
+
122
+ args = parser.parse_args()
123
+ logger.info(str(args))
124
+ validate_args(args)
125
+
126
+ if not args.noban:
127
+ print(banner)
128
+
129
+ if args.ffcover:
130
+ args.ffmpeg = True
131
+ args.cover = True
132
+
133
+ if args.proxy:
134
+ logger.info("Network proxy enabled.")
135
+ os.environ["http_proxy"] = f"http://{args.proxy}"
136
+ os.environ["https_proxy"] = f"http://{args.proxy}"
137
+
138
+ http_client = HttpClient()
139
+ movie_counter = ThreadSafeCounter()
140
+ source = (
141
+ AutoUrlSource(movie_counter=movie_counter, auto_urls=args.auto, limit=args.limit) if args.auto else
142
+ AuthSource(movie_counter=movie_counter, username=args.auth[0], password=args.auth[1], limit=args.limit) if args.auth else
143
+ SearchSource(movie_counter=movie_counter, key=args.search) if args.search else
144
+ FileSource(movie_counter=movie_counter, file_path=args.file, limit=args.limit) if args.file else None
145
+ )
146
+ if not source:
147
+ logger.error("No source specified.")
148
+ exit(MAGIC_NUMBER)
149
+
150
+ movie_urls = source.get_urls()
151
+ if not movie_urls:
152
+ logger.error("No URLs to download.")
153
+ exit(MAGIC_NUMBER)
154
+
155
+ download_tracker = DownloadTracker(RECORD_FILE)
156
+ options = {
157
+ 'download_action': True,
158
+ 'write_action': True,
159
+ 'ffmpeg_action': args.ffmpeg,
160
+ 'num_threads': os.cpu_count(),
161
+ 'cover_action': args.cover,
162
+ 'title_action': args.title,
163
+ 'cover_as_preview': args.ffcover,
164
+ 'quality': int(args.quality) if args.quality else None,
165
+ 'retry': int(args.retry) if args.retry else 5,
166
+ 'delay': int(args.delay) if args.delay else 2,
167
+ 'timeout': int(args.timeout) if args.timeout else 10
168
+ }
169
+
170
+ for url in movie_urls:
171
+ if download_tracker.is_downloaded(url):
172
+ logger.info(f"{url} already downloaded, skipping.")
173
+ continue
174
+ delete_all_subfolders(MOVIE_SAVE_PATH_ROOT)
175
+ try:
176
+ logger.info(f"Processing URL: {url}")
177
+ downloader = VideoDownloader(url, http_client, options)
178
+ downloader.download()
179
+ download_tracker.record_download(url)
180
+ logger.info(f"Processing URL Complete: {url}")
181
+ print()
182
+ except Exception as e:
183
+ logger.error(f"Failed to download {url}: {e}")
184
+ delete_all_subfolders(MOVIE_SAVE_PATH_ROOT)
185
+
186
+
187
+ if __name__ == "__main__":
188
+ main()
@@ -0,0 +1,159 @@
1
+ from abc import ABC, abstractmethod
2
+ import re
3
+ from typing import Optional
4
+ from misato.http_client import HttpClient
5
+ from misato.config import HREF_REGEX_PUBLIC_PLAYLIST, HREF_REGEX_NEXT_PAGE, MATCH_UUID_PATTERN
6
+ from misato.logger import logger
7
+ from misato.utils import ThreadSafeCounter
8
+ from enum import Enum
9
+
10
+ class UrlType(Enum):
11
+ SINGLE = 1
12
+ PLAYLIST = 2
13
+
14
+ class UrlSource(ABC):
15
+ @abstractmethod
16
+ def get_urls(self) -> list[str]:
17
+ pass
18
+
19
+ @staticmethod
20
+ def movie_count_log(movie_counter: ThreadSafeCounter, movie_url: str):
21
+ logger.info(f"Movie {movie_counter.increment_and_get()} url: {movie_url}")
22
+
23
+ @staticmethod
24
+ def get_urls_from_list(movie_counter: ThreadSafeCounter, play_list_url: str, limit: Optional[str], cookie=None, http_client: HttpClient = None) -> list[str]:
25
+ movie_url_list = []
26
+ url = play_list_url
27
+ while url and (limit is None or movie_counter.get() < limit):
28
+ html_source = http_client.get(url, cookies=cookie)
29
+ if html_source is None:
30
+ break
31
+ html_source = html_source.decode('utf-8')
32
+ movie_url_matches = re.findall(HREF_REGEX_PUBLIC_PLAYLIST, html_source)
33
+ temp_url_list = list(set(movie_url_matches))
34
+ for movie_url in temp_url_list:
35
+ movie_url_list.append(movie_url)
36
+ UrlSource.movie_count_log(movie_counter, movie_url)
37
+ if limit and movie_counter.get() == limit:
38
+ return movie_url_list
39
+ next_page_matches = re.findall(HREF_REGEX_NEXT_PAGE, html_source)
40
+ url = next_page_matches[0].replace('&amp;', '&') if next_page_matches else None
41
+ return movie_url_list
42
+
43
+ class SingleUrlSource(UrlSource):
44
+ def __init__(self, movie_counter: ThreadSafeCounter, url: str, limit: Optional[str]):
45
+ self.movie_counter = movie_counter
46
+ self.url = url
47
+ self.limit = int(limit) if limit else None
48
+
49
+ def get_urls(self) -> list[str]:
50
+ if self.limit and self.movie_counter.get() == self.limit:
51
+ return []
52
+ else:
53
+ UrlSource.movie_count_log(self.movie_counter, self.url)
54
+ return [self.url]
55
+
56
+ class PlaylistSource(UrlSource):
57
+ def __init__(self, movie_counter: ThreadSafeCounter, playlist_url: str, limit: Optional[str]):
58
+ self.movie_counter = movie_counter
59
+ self.playlist_url = playlist_url
60
+ self.limit = int(limit) if limit else None
61
+ self.http_client = HttpClient()
62
+
63
+ def get_urls(self) -> list[str]:
64
+ url = self.playlist_url
65
+ return UrlSource.get_urls_from_list(movie_counter=self.movie_counter, play_list_url=url, limit=self.limit, cookie=None, http_client=self.http_client)
66
+
67
+ class AutoUrlSource(UrlSource):
68
+ def __init__(self, movie_counter: ThreadSafeCounter, auto_urls: list[str], limit: Optional[str]):
69
+ self.movie_counter = movie_counter
70
+ self.auto_urls = auto_urls
71
+ self.limit = int(limit) if limit else None
72
+ self.http_client = HttpClient()
73
+
74
+ def get_urls(self) -> list[str]:
75
+ movie_url_list = []
76
+
77
+ for url in self.auto_urls:
78
+
79
+ url_type : UrlType = self._determine_url_type(url)
80
+ if url_type == UrlType.SINGLE:
81
+ single_url_source = SingleUrlSource(movie_counter=self.movie_counter, url=url, limit=self.limit)
82
+ movie_url_list.extend(single_url_source.get_urls())
83
+ else:
84
+ playlist_source = PlaylistSource(movie_counter=self.movie_counter, playlist_url=url, limit=self.limit)
85
+ movie_url_list.extend(playlist_source.get_urls())
86
+
87
+ return movie_url_list
88
+
89
+ def _determine_url_type(self, url: str) -> Optional[UrlType]:
90
+ if self._is_movie_url(url):
91
+ return UrlType.SINGLE
92
+ else:
93
+ return UrlType.PLAYLIST
94
+
95
+ def _is_movie_url(self, url: str) -> bool:
96
+ html = self.http_client.get(url)
97
+ if not html:
98
+ return False
99
+ html = html.decode('utf-8')
100
+ match = re.search(MATCH_UUID_PATTERN, html)
101
+ if not match:
102
+ return False
103
+ return True
104
+
105
+ class AuthSource(UrlSource):
106
+ def __init__(self, movie_counter: ThreadSafeCounter, username: str, password: str, limit: Optional[str]):
107
+ self.movie_counter = movie_counter
108
+ self.http_client = HttpClient()
109
+ self.cookie = self._login(username, password)
110
+ self.limit = int(limit) if limit else None
111
+
112
+ def _login(self, username: str, password: str) -> dict:
113
+ response = self.http_client.post('https://missav.ai/api/login', data={'email': username, 'password': password})
114
+ if response and response.status_code == 200:
115
+ cookie_info = response.cookies.get_dict()
116
+ if "user_uuid" in cookie_info:
117
+ logger.info(f"User uuid: {cookie_info['user_uuid']}")
118
+ return cookie_info
119
+ logger.error("Login failed, check your network connection or account information.")
120
+ exit(114514)
121
+
122
+ def get_urls(self) -> list[str]:
123
+ url = 'https://missav.ai/saved'
124
+ return UrlSource.get_urls_from_list(movie_counter=self.movie_counter, play_list_url=url, limit=self.limit, cookie=self.cookie, http_client=self.http_client)
125
+
126
+ class SearchSource(UrlSource):
127
+ def __init__(self, movie_counter: ThreadSafeCounter, key: str):
128
+ self.movie_counter = movie_counter
129
+ self.key = key
130
+ self.http_client = HttpClient()
131
+
132
+ def get_urls(self) -> list[str]:
133
+ search_url = f"https://missav.ai/search/{self.key}"
134
+ search_regex = r'<a href="([^"]+)" alt="' + self.key + '" >'
135
+ html_source = self.http_client.get(search_url)
136
+ if html_source is None:
137
+ logger.error(f"Search failed, key: {self.key}")
138
+ return []
139
+ html_source = html_source.decode('utf-8')
140
+ movie_url_matches = re.findall(search_regex, html_source)
141
+ temp_url_list = list(set(movie_url_matches))
142
+ if temp_url_list:
143
+ logger.info(f"Search {self.key} successfully: {temp_url_list[0]}")
144
+ UrlSource.movie_count_log(self.movie_counter, temp_url_list[0])
145
+ return [temp_url_list[0]]
146
+ logger.error(f"Search failed, key: {self.key}")
147
+ return []
148
+
149
+ class FileSource(UrlSource):
150
+ def __init__(self, movie_counter: ThreadSafeCounter, file_path: str, limit: Optional[str]):
151
+ self.movie_counter = movie_counter
152
+ self.file_path = file_path
153
+ self.limit = int(limit) if limit else None
154
+
155
+ def get_urls(self) -> list[str]:
156
+ with open(self.file_path, 'r', encoding='utf-8') as f:
157
+ urls = [line.strip() for line in f.readlines() if line.strip()]
158
+ auto_url_source = AutoUrlSource(movie_counter=self.movie_counter, auto_urls=urls, limit=self.limit)
159
+ return auto_url_source.get_urls()
@@ -0,0 +1,71 @@
1
+ import threading
2
+ import sys
3
+ import os
4
+ import shutil
5
+ from typing import Tuple
6
+
7
+
8
+ class ThreadSafeCounter:
9
+ def __init__(self) -> None:
10
+ self._count = 0
11
+ self._lock = threading.Lock()
12
+
13
+ def increment_and_get(self) -> int:
14
+ with self._lock:
15
+ self._count += 1
16
+ return self._count
17
+
18
+ def get(self) -> int:
19
+ with self._lock:
20
+ return self._count
21
+
22
+ def reset(self) -> None:
23
+ with self._lock:
24
+ self._count = 0
25
+
26
+
27
+ def display_progress_bar(max_value: int, file_counter: ThreadSafeCounter) -> None:
28
+ bar_length = 50
29
+ current_value = file_counter.increment_and_get()
30
+ progress = current_value / max_value
31
+ block = int(round(bar_length * progress))
32
+ text = f"\rProgress: [{'#' * block + '-' * (bar_length - block)}] {current_value}/{max_value}"
33
+ sys.stdout.write(text)
34
+ sys.stdout.flush()
35
+
36
+
37
+ def split_integer_into_intervals(integer: int, n: int) -> list[Tuple[int, int]]:
38
+ interval_size = integer // n
39
+ remainder = integer % n
40
+ intervals = [(i * interval_size, (i + 1) * interval_size) for i in range(n)]
41
+ if remainder:
42
+ intervals[-1] = (intervals[-1][0], intervals[-1][1] + remainder)
43
+ return intervals
44
+
45
+
46
+ def find_last_non_empty_line(text: str) -> str:
47
+ lines = text.splitlines()
48
+ for line in reversed(lines):
49
+ if line.strip():
50
+ return line
51
+ raise Exception("Failed to find the last non-empty line in m3u8 playlist.")
52
+
53
+
54
+ def find_closest(arr: list[int], target: int) -> int:
55
+ closest = arr[0]
56
+ min_diff = abs(arr[0] - target)
57
+ for num in arr:
58
+ diff = abs(num - target)
59
+ if diff < min_diff:
60
+ min_diff = diff
61
+ closest = num
62
+ return closest
63
+
64
+
65
+ def delete_all_subfolders(folder_path: str) -> None:
66
+ if not os.path.exists(folder_path):
67
+ return
68
+ for item in os.listdir(folder_path):
69
+ item_path = os.path.join(folder_path, item)
70
+ if os.path.isdir(item_path):
71
+ shutil.rmtree(item_path)
@@ -0,0 +1,160 @@
1
+ import os
2
+ import re
3
+ from typing import Optional, Tuple
4
+ import threading
5
+ from misato.config import MOVIE_SAVE_PATH_ROOT, MATCH_UUID_PATTERN, MATCH_TITLE_PATTERN, COVER_URL_PREFIX, TMP_HTML_FILE, RESOLUTION_PATTERN, VIDEO_M3U8_PREFIX, VIDEO_PLAYLIST_SUFFIX
6
+ from misato.http_client import HttpClient
7
+ from misato.logger import logger
8
+ from misato.utils import ThreadSafeCounter, display_progress_bar, split_integer_into_intervals, find_last_non_empty_line, find_closest
9
+ from misato.ffmpeg_processor import FFmpegProcessor
10
+
11
+
12
+ class VideoDownloader:
13
+ def __init__(self, url: str, http_client: HttpClient, options: dict):
14
+ self.url = url
15
+ self.http_client = http_client
16
+ self.movie_name = url.split('/')[-1]
17
+ self.movie_folder = os.path.join(MOVIE_SAVE_PATH_ROOT, self.movie_name)
18
+ self.options = options
19
+ self.uuid = None
20
+ self.title = None
21
+ self.final_file_name = None
22
+ self.counter = ThreadSafeCounter()
23
+
24
+ def _fetch_metadata(self) -> bool:
25
+ html = self.http_client.get(self.url)
26
+ if not html:
27
+ logger.error(f"Failed to fetch HTML for {self.url}")
28
+ return False
29
+ html = html.decode('utf-8')
30
+ with open(TMP_HTML_FILE, 'w', encoding='utf-8') as file:
31
+ file.write(html)
32
+ match = re.search(MATCH_UUID_PATTERN, html)
33
+ if not match:
34
+ logger.error("Failed to match uuid.")
35
+ return False
36
+ result = match.group(1)
37
+ self.uuid = "-".join(result.split("|")[::-1])
38
+ logger.info(f"Matching uuid successfully: {self.uuid}")
39
+ title_match = re.search(MATCH_TITLE_PATTERN, html)
40
+ if title_match:
41
+ illegal_chars = '<>:"/\|?* '
42
+ origin_title = title_match.group(1)
43
+ safe_title = origin_title
44
+ for char in illegal_chars:
45
+ safe_title = safe_title.replace(char, '_')
46
+ if "uncensored" in self.url:
47
+ safe_title += "_uncensored"
48
+ self.title = safe_title
49
+ return True
50
+
51
+ def _download_cover(self) -> None:
52
+ if not self.options.get('cover_action'):
53
+ return
54
+ cover_url = f"{COVER_URL_PREFIX}{self.movie_name}/cover-n.jpg"
55
+ cover_content = self.http_client.get(cover_url)
56
+ if cover_content:
57
+ cover_path = os.path.join(MOVIE_SAVE_PATH_ROOT, f"{self.movie_name}-cover.jpg")
58
+ with open(cover_path, 'wb') as f:
59
+ f.write(cover_content)
60
+ else:
61
+ logger.error(f"Failed to download cover for {self.movie_name}")
62
+
63
+ def _get_final_quality_and_resolution(self, playlist: str) -> Tuple[Optional[str], Optional[str]]:
64
+ matches = re.findall(RESOLUTION_PATTERN, playlist)
65
+ quality_map = {height: width for width, height in matches}
66
+ quality_list = list(quality_map.keys())
67
+ if not quality_list:
68
+ logger.error("No resolutions found in playlist.")
69
+ return None, None
70
+ quality = self.options.get('quality')
71
+ if quality is None:
72
+ final_quality = quality_list[-1] + 'p'
73
+ resolution_url = find_last_non_empty_line(playlist)
74
+ else:
75
+ target = int(quality)
76
+ closest_height = find_closest([int(h) for h in quality_list], target)
77
+ final_quality = str(closest_height) + 'p'
78
+ url_type_x = f"{quality_map[str(closest_height)]}x{closest_height}/video.m3u8"
79
+ url_type_p = f"{closest_height}p/video.m3u8"
80
+ resolution_url = url_type_x if url_type_x in playlist else url_type_p if url_type_p in playlist else find_last_non_empty_line(playlist)
81
+ return final_quality, resolution_url
82
+
83
+ def _thread_task(self, start: int, end: int, uuid: str, resolution: str, video_offset_max: int) -> None:
84
+ for i in range(start, end):
85
+ url = f"https://surrit.com/{uuid}/{resolution}/video{i}.jpeg"
86
+ content = self.http_client.get(url, retries=self.options.get('retry', 5), delay=self.options.get('delay', 2), timeout=self.options.get('timeout', 10))
87
+ if content:
88
+ file_path = os.path.join(self.movie_folder, f"video{i}.jpeg")
89
+ with open(file_path, 'wb') as f:
90
+ f.write(content)
91
+ display_progress_bar(video_offset_max + 1, self.counter)
92
+ else:
93
+ logger.error(f"Failed to download segment {i} for {self.movie_name}")
94
+
95
+ def _download_segments(self, uuid: str, resolution: str, video_offset_max: int) -> None:
96
+ if not self.options.get('download_action'):
97
+ return
98
+ intervals = split_integer_into_intervals(video_offset_max + 1, self.options.get('num_threads', os.cpu_count()))
99
+ self.counter.reset()
100
+ threads = []
101
+ for start, end in intervals:
102
+ thread = threading.Thread(target=self._thread_task, args=(start, end, uuid, resolution, video_offset_max))
103
+ threads.append(thread)
104
+ thread.start()
105
+ for thread in threads:
106
+ thread.join()
107
+ self.counter.reset()
108
+
109
+ def _check_integrity(self, video_offset_max: int) -> None:
110
+ downloaded_files = len([f for f in os.listdir(self.movie_folder) if f.endswith('.jpeg')])
111
+ total_files = video_offset_max + 1
112
+ integrity = downloaded_files / total_files
113
+ print()
114
+ logger.info(f"File integrity for {self.movie_name}: {integrity:.2%} ({downloaded_files}/{total_files} files)")
115
+
116
+ def _assemble_video(self, video_offset_max: int) -> None:
117
+ if not self.options.get('write_action'):
118
+ return
119
+ self.final_file_name = f"{self.movie_name}_{self.final_quality}"
120
+ output_file = os.path.join(MOVIE_SAVE_PATH_ROOT, f"{self.final_file_name}.mp4")
121
+ if self.options.get('ffmpeg_action'):
122
+ segment_files = [os.path.join(self.movie_folder, f"video{i}.jpeg") for i in range(video_offset_max + 1) if os.path.exists(os.path.join(self.movie_folder, f"video{i}.jpeg"))]
123
+ cover_file = os.path.join(MOVIE_SAVE_PATH_ROOT, f"{self.movie_name}-cover.jpg") if self.options.get('cover_as_preview') and os.path.exists(os.path.join(MOVIE_SAVE_PATH_ROOT, f"{self.movie_name}-cover.jpg")) else None
124
+ FFmpegProcessor.create_video_from_segments(segment_files, output_file, cover_file)
125
+ else:
126
+ with open(output_file, 'wb') as outfile:
127
+ for i in range(video_offset_max + 1):
128
+ file_path = os.path.join(self.movie_folder, f"video{i}.jpeg")
129
+ if os.path.exists(file_path):
130
+ with open(file_path, 'rb') as infile:
131
+ outfile.write(infile.read())
132
+ if self.options.get('title_action') and self.title:
133
+ os.rename(output_file, os.path.join(MOVIE_SAVE_PATH_ROOT, f"{self.title}.mp4"))
134
+
135
+ def download(self) -> None:
136
+ if not self._fetch_metadata():
137
+ return
138
+ playlist_url = f"{VIDEO_M3U8_PREFIX}{self.uuid}{VIDEO_PLAYLIST_SUFFIX}"
139
+ playlist = self.http_client.get(playlist_url)
140
+ if not playlist:
141
+ logger.error("Failed to fetch playlist.")
142
+ return
143
+ playlist = playlist.decode('utf-8')
144
+ self.final_quality, resolution_url = self._get_final_quality_and_resolution(playlist)
145
+ if not self.final_quality:
146
+ return
147
+ video_m3u8_url = f"{VIDEO_M3U8_PREFIX}{self.uuid}/{resolution_url}"
148
+ video_m3u8 = self.http_client.get(video_m3u8_url)
149
+ if not video_m3u8:
150
+ logger.error("Failed to fetch video m3u8.")
151
+ return
152
+ video_m3u8 = video_m3u8.decode('utf-8')
153
+ video_offset_max_str = video_m3u8.splitlines()[-2]
154
+ video_offset_max = int(re.search(r'\d+', video_offset_max_str).group(0))
155
+ if not os.path.exists(self.movie_folder):
156
+ os.makedirs(self.movie_folder)
157
+ self._download_cover()
158
+ self._download_segments(self.uuid, resolution_url.split('/')[0], video_offset_max)
159
+ self._check_integrity(video_offset_max)
160
+ self._assemble_video(video_offset_max)
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: misato
3
+ Version: 0.7.9
4
+ Requires-Python: >=3.9
5
+ License-File: LICENSE
6
+ Requires-Dist: curl_cffi
7
+ Dynamic: license-file
8
+ Dynamic: requires-dist
9
+ Dynamic: requires-python
@@ -0,0 +1,21 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ setup.py
5
+ misato/__init__.py
6
+ misato/config.py
7
+ misato/ffmpeg_processor.py
8
+ misato/http_client.py
9
+ misato/logger.py
10
+ misato/main.py
11
+ misato/url_sources.py
12
+ misato/utils.py
13
+ misato/video_downloader.py
14
+ misato.egg-info/PKG-INFO
15
+ misato.egg-info/SOURCES.txt
16
+ misato.egg-info/dependency_links.txt
17
+ misato.egg-info/entry_points.txt
18
+ misato.egg-info/requires.txt
19
+ misato.egg-info/top_level.txt
20
+ test/__init__.py
21
+ test/test.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ misato = misato.main:main
@@ -0,0 +1 @@
1
+ curl_cffi
@@ -0,0 +1,2 @@
1
+ misato
2
+ test
misato-0.7.9/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
misato-0.7.9/setup.py ADDED
@@ -0,0 +1,16 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='misato',
5
+ version='0.7.9',
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ 'curl_cffi',
9
+ ],
10
+ entry_points={
11
+ 'console_scripts': [
12
+ 'misato=misato.main:main',
13
+ ],
14
+ },
15
+ python_requires='>=3.9',
16
+ )
File without changes
@@ -0,0 +1,10 @@
1
+ import os
2
+
3
+ if __name__ == '__main__':
4
+
5
+ proxy = "localhost:7890"
6
+
7
+ os.environ["http_proxy"] = f"http://{proxy}"
8
+ os.environ["https_proxy"] = f"http://{proxy}"
9
+
10
+ resources.miyuki.download(movie_url="https://missav.com/ja/fc2-ppv-4597386", quality="700", download_action=False, ffmpeg_action=True, retry=10, delay=20, timeout=30, title_action=True)