musicdl 2.1.11__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- musicdl/__init__.py +5 -5
- musicdl/modules/__init__.py +10 -3
- musicdl/modules/common/__init__.py +2 -0
- musicdl/modules/common/gdstudio.py +204 -0
- musicdl/modules/js/__init__.py +1 -0
- musicdl/modules/js/youtube/__init__.py +2 -0
- musicdl/modules/js/youtube/botguard.js +1 -0
- musicdl/modules/js/youtube/jsinterp.py +902 -0
- musicdl/modules/js/youtube/runner.js +2 -0
- musicdl/modules/sources/__init__.py +41 -10
- musicdl/modules/sources/apple.py +207 -0
- musicdl/modules/sources/base.py +256 -28
- musicdl/modules/sources/bilibili.py +118 -0
- musicdl/modules/sources/buguyy.py +148 -0
- musicdl/modules/sources/fangpi.py +153 -0
- musicdl/modules/sources/fivesing.py +108 -0
- musicdl/modules/sources/gequbao.py +148 -0
- musicdl/modules/sources/jamendo.py +108 -0
- musicdl/modules/sources/joox.py +104 -68
- musicdl/modules/sources/kugou.py +129 -76
- musicdl/modules/sources/kuwo.py +188 -68
- musicdl/modules/sources/lizhi.py +107 -0
- musicdl/modules/sources/migu.py +172 -66
- musicdl/modules/sources/mitu.py +140 -0
- musicdl/modules/sources/mp3juice.py +264 -0
- musicdl/modules/sources/netease.py +163 -115
- musicdl/modules/sources/qianqian.py +125 -77
- musicdl/modules/sources/qq.py +232 -94
- musicdl/modules/sources/tidal.py +342 -0
- musicdl/modules/sources/ximalaya.py +256 -0
- musicdl/modules/sources/yinyuedao.py +144 -0
- musicdl/modules/sources/youtube.py +238 -0
- musicdl/modules/utils/__init__.py +12 -4
- musicdl/modules/utils/appleutils.py +563 -0
- musicdl/modules/utils/data.py +107 -0
- musicdl/modules/utils/logger.py +211 -58
- musicdl/modules/utils/lyric.py +73 -0
- musicdl/modules/utils/misc.py +335 -23
- musicdl/modules/utils/modulebuilder.py +75 -0
- musicdl/modules/utils/neteaseutils.py +81 -0
- musicdl/modules/utils/qqutils.py +184 -0
- musicdl/modules/utils/quarkparser.py +105 -0
- musicdl/modules/utils/songinfoutils.py +54 -0
- musicdl/modules/utils/tidalutils.py +738 -0
- musicdl/modules/utils/youtubeutils.py +3606 -0
- musicdl/musicdl.py +184 -86
- musicdl-2.7.3.dist-info/LICENSE +203 -0
- musicdl-2.7.3.dist-info/METADATA +704 -0
- musicdl-2.7.3.dist-info/RECORD +53 -0
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/WHEEL +5 -5
- musicdl-2.7.3.dist-info/entry_points.txt +2 -0
- musicdl/modules/sources/baiduFlac.py +0 -69
- musicdl/modules/sources/xiami.py +0 -104
- musicdl/modules/utils/downloader.py +0 -80
- musicdl-2.1.11.dist-info/LICENSE +0 -22
- musicdl-2.1.11.dist-info/METADATA +0 -82
- musicdl-2.1.11.dist-info/RECORD +0 -24
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/top_level.txt +0 -0
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Function:
|
|
3
|
+
Implementation of SongInfo
|
|
4
|
+
Author:
|
|
5
|
+
Zhenchao Jin
|
|
6
|
+
WeChat Official Account (微信公众号):
|
|
7
|
+
Charles的皮卡丘
|
|
8
|
+
'''
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
import os
|
|
11
|
+
from .misc import sanitize_filepath
|
|
12
|
+
from typing import Any, Dict, Optional
|
|
13
|
+
from dataclasses import dataclass, field, fields
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
'''SongInfo'''
|
|
17
|
+
@dataclass
|
|
18
|
+
class SongInfo:
|
|
19
|
+
# raw data replied by requested APIs
|
|
20
|
+
raw_data: Dict[str, Any] = field(default_factory=dict)
|
|
21
|
+
# from which music client
|
|
22
|
+
source: Optional[str] = None
|
|
23
|
+
root_source: Optional[str] = None
|
|
24
|
+
# song information
|
|
25
|
+
song_name: Optional[str] = None
|
|
26
|
+
singers: Optional[str] = None
|
|
27
|
+
album: Optional[str] = None
|
|
28
|
+
ext: Optional[str] = None
|
|
29
|
+
file_size_bytes: Optional[int] = None
|
|
30
|
+
file_size: Optional[str] = None
|
|
31
|
+
duration_s: Optional[int] = None
|
|
32
|
+
duration: Optional[str] = None
|
|
33
|
+
bitrate: Optional[int] = None
|
|
34
|
+
codec: Optional[str] = None
|
|
35
|
+
samplerate: Optional[int] = None
|
|
36
|
+
channels: Optional[int] = None
|
|
37
|
+
# lyric
|
|
38
|
+
lyric: Optional[str] = None
|
|
39
|
+
# cover
|
|
40
|
+
cover_url: Optional[str] = None
|
|
41
|
+
# download url related variables
|
|
42
|
+
download_url: Optional[Any] = None
|
|
43
|
+
download_url_status: Optional[Any] = None
|
|
44
|
+
default_download_headers: Dict[str, Any] = field(default_factory=dict)
|
|
45
|
+
downloaded_contents: Optional[Any] = None
|
|
46
|
+
chunk_size: Optional[int] = 1024 * 1024
|
|
47
|
+
@property
|
|
48
|
+
def with_valid_download_url(self) -> bool:
|
|
49
|
+
if isinstance(self.download_url, str):
|
|
50
|
+
is_valid_format = self.download_url and self.download_url.startswith('http')
|
|
51
|
+
else:
|
|
52
|
+
is_valid_format = self.download_url
|
|
53
|
+
is_downloadable = isinstance(self.download_url_status, dict) and self.download_url_status.get('ok')
|
|
54
|
+
return bool(is_valid_format and is_downloadable)
|
|
55
|
+
# save info
|
|
56
|
+
work_dir: Optional[str] = './'
|
|
57
|
+
_save_path: Optional[str] = None
|
|
58
|
+
@property
|
|
59
|
+
def save_path(self) -> str:
|
|
60
|
+
if self._save_path is not None: return self._save_path
|
|
61
|
+
sp, same_name_file_idx = os.path.join(self.work_dir, f"{self.song_name} - {self.identifier}.{self.ext.removeprefix('.')}"), 1
|
|
62
|
+
while os.path.exists(sp):
|
|
63
|
+
sp = os.path.join(self.work_dir, f"{self.song_name} - {self.identifier} ({same_name_file_idx}).{self.ext.removeprefix('.')}")
|
|
64
|
+
same_name_file_idx += 1
|
|
65
|
+
sp = sanitize_filepath(sp)
|
|
66
|
+
self._save_path = sp
|
|
67
|
+
return sp
|
|
68
|
+
# identifier
|
|
69
|
+
identifier: Optional[str] = None
|
|
70
|
+
'''fieldnames'''
|
|
71
|
+
@classmethod
|
|
72
|
+
def fieldnames(cls) -> set[str]:
|
|
73
|
+
return {f.name for f in fields(cls)}
|
|
74
|
+
'''fromdict'''
|
|
75
|
+
@classmethod
|
|
76
|
+
def fromdict(cls, data: Dict[str, Any]) -> "SongInfo":
|
|
77
|
+
field_names = cls.fieldnames()
|
|
78
|
+
filtered = {k: v for k, v in data.items() if k in field_names}
|
|
79
|
+
return cls(**filtered)
|
|
80
|
+
'''todict'''
|
|
81
|
+
def todict(self) -> Dict[str, Any]:
|
|
82
|
+
return {f.name: getattr(self, f.name) for f in fields(self)}
|
|
83
|
+
'''update'''
|
|
84
|
+
def update(self, data: Dict[str, Any] = None, **kwargs: Any) -> "SongInfo":
|
|
85
|
+
if data is None: data = {}
|
|
86
|
+
merged: Dict[str, Any] = {**data, **kwargs}
|
|
87
|
+
field_names = self.fieldnames()
|
|
88
|
+
for k, v in merged.items():
|
|
89
|
+
if k in field_names: setattr(self, k, v)
|
|
90
|
+
return self
|
|
91
|
+
'''getitem'''
|
|
92
|
+
def __getitem__(self, key: str) -> Any:
|
|
93
|
+
field_names = self.fieldnames()
|
|
94
|
+
if key not in field_names: raise KeyError(key)
|
|
95
|
+
return getattr(self, key)
|
|
96
|
+
'''setitem'''
|
|
97
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
98
|
+
field_names = self.fieldnames()
|
|
99
|
+
if key not in field_names: raise KeyError(key)
|
|
100
|
+
setattr(self, key, value)
|
|
101
|
+
'''contains'''
|
|
102
|
+
def __contains__(self, key: object) -> bool:
|
|
103
|
+
return isinstance(key, str) and key in self.fieldnames()
|
|
104
|
+
'''get'''
|
|
105
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
106
|
+
if key in self.fieldnames(): return getattr(self, key)
|
|
107
|
+
return default
|
musicdl/modules/utils/logger.py
CHANGED
|
@@ -1,58 +1,211 @@
|
|
|
1
|
-
'''
|
|
2
|
-
Function:
|
|
3
|
-
|
|
4
|
-
Author:
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Charles的皮卡丘
|
|
8
|
-
'''
|
|
9
|
-
import
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
1
|
+
'''
|
|
2
|
+
Function:
|
|
3
|
+
Implementation of logging related utils
|
|
4
|
+
Author:
|
|
5
|
+
Zhenchao Jin
|
|
6
|
+
WeChat Official Account (微信公众号):
|
|
7
|
+
Charles的皮卡丘
|
|
8
|
+
'''
|
|
9
|
+
import os
|
|
10
|
+
import shutil
|
|
11
|
+
import logging
|
|
12
|
+
import collections.abc
|
|
13
|
+
from wcwidth import wcswidth
|
|
14
|
+
from tabulate import tabulate
|
|
15
|
+
from prettytable import PrettyTable
|
|
16
|
+
from platformdirs import user_log_dir
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
'''predefined colors in terminal'''
|
|
20
|
+
COLORS = {
|
|
21
|
+
'red': '\033[31m',
|
|
22
|
+
'green': '\033[32m',
|
|
23
|
+
'yellow': '\033[33m',
|
|
24
|
+
'blue': '\033[34m',
|
|
25
|
+
'pink': '\033[35m',
|
|
26
|
+
'cyan': '\033[36m',
|
|
27
|
+
'highlight': '\033[93m',
|
|
28
|
+
'number': '\033[96m',
|
|
29
|
+
'singer': '\033[93m',
|
|
30
|
+
'flac': '\033[95m',
|
|
31
|
+
'songname': '\033[91m'
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
'''LoggerHandle'''
|
|
36
|
+
class LoggerHandle():
|
|
37
|
+
appname = 'musicdl'
|
|
38
|
+
appauthor = 'zcjin'
|
|
39
|
+
def __init__(self):
|
|
40
|
+
# set up log dir
|
|
41
|
+
log_dir = user_log_dir(appname=self.appname, appauthor=self.appauthor)
|
|
42
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
43
|
+
log_file_path = os.path.join(log_dir, "musicdl.log")
|
|
44
|
+
self.log_file_path = log_file_path
|
|
45
|
+
# config logging
|
|
46
|
+
logging.basicConfig(
|
|
47
|
+
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
48
|
+
handlers=[logging.FileHandler(log_file_path, encoding="utf-8"), logging.StreamHandler()]
|
|
49
|
+
)
|
|
50
|
+
'''log'''
|
|
51
|
+
@staticmethod
|
|
52
|
+
def log(level, message):
|
|
53
|
+
message = str(message)
|
|
54
|
+
logger = logging.getLogger(LoggerHandle.appname)
|
|
55
|
+
logger.log(level, message)
|
|
56
|
+
'''debug'''
|
|
57
|
+
def debug(self, message, disable_print=False):
|
|
58
|
+
message = str(message)
|
|
59
|
+
if disable_print:
|
|
60
|
+
fp = open(self.log_file_path, 'a', encoding='utf-8')
|
|
61
|
+
fp.write(message + '\n')
|
|
62
|
+
else:
|
|
63
|
+
LoggerHandle.log(logging.DEBUG, message)
|
|
64
|
+
'''info'''
|
|
65
|
+
def info(self, message, disable_print=False):
|
|
66
|
+
message = str(message)
|
|
67
|
+
if disable_print:
|
|
68
|
+
fp = open(self.log_file_path, 'a', encoding='utf-8')
|
|
69
|
+
fp.write(message + '\n')
|
|
70
|
+
else:
|
|
71
|
+
LoggerHandle.log(logging.INFO, message)
|
|
72
|
+
'''warning'''
|
|
73
|
+
def warning(self, message, disable_print=False):
|
|
74
|
+
message = str(message)
|
|
75
|
+
if disable_print:
|
|
76
|
+
fp = open(self.log_file_path, 'a', encoding='utf-8')
|
|
77
|
+
fp.write(message + '\n')
|
|
78
|
+
else:
|
|
79
|
+
if '\033[31m' not in message:
|
|
80
|
+
message = colorize(message, 'red')
|
|
81
|
+
LoggerHandle.log(logging.WARNING, message)
|
|
82
|
+
'''error'''
|
|
83
|
+
def error(self, message, disable_print=False):
|
|
84
|
+
message = str(message)
|
|
85
|
+
if disable_print:
|
|
86
|
+
fp = open(self.log_file_path, 'a', encoding='utf-8')
|
|
87
|
+
fp.write(message + '\n')
|
|
88
|
+
else:
|
|
89
|
+
if '\033[31m' not in message:
|
|
90
|
+
message = colorize(message, 'red')
|
|
91
|
+
LoggerHandle.log(logging.ERROR, message)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
'''printtable'''
|
|
95
|
+
def printtable(titles, items, terminal_right_space_len=4):
|
|
96
|
+
assert isinstance(titles, collections.abc.Sequence) and isinstance(items, collections.abc.Sequence), 'title and items should be iterable'
|
|
97
|
+
table = PrettyTable(titles)
|
|
98
|
+
for item in items: table.add_row(item)
|
|
99
|
+
max_width = shutil.get_terminal_size().columns - terminal_right_space_len
|
|
100
|
+
assert max_width > 0, f'"terminal_right_space_len" should smaller than {shutil.get_terminal_size()}'
|
|
101
|
+
table.max_table_width = max_width
|
|
102
|
+
print(table)
|
|
103
|
+
return table
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
'''displen'''
|
|
107
|
+
def displen(s: str) -> int:
|
|
108
|
+
if s is None:
|
|
109
|
+
return 0
|
|
110
|
+
return max(wcswidth(str(s)), 0)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
'''tablewidth'''
|
|
114
|
+
def tablewidth(table_str: str) -> int:
|
|
115
|
+
lines = table_str.splitlines()
|
|
116
|
+
if not lines:
|
|
117
|
+
return 0
|
|
118
|
+
return max(displen(line) for line in lines)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
'''truncatebydispwidth'''
|
|
122
|
+
def truncatebydispwidth(text: str, max_width: int) -> str:
|
|
123
|
+
text, cur_w = str(text), displen(text)
|
|
124
|
+
if cur_w <= max_width: return text
|
|
125
|
+
if max_width <= 0: return ""
|
|
126
|
+
if max_width <= 3:
|
|
127
|
+
acc, out = 0, []
|
|
128
|
+
for ch in text:
|
|
129
|
+
w = displen(ch)
|
|
130
|
+
if acc + w > max_width: break
|
|
131
|
+
out.append(ch)
|
|
132
|
+
acc += w
|
|
133
|
+
return "".join(out)
|
|
134
|
+
target, acc, out_chars = max_width - 3, 0, []
|
|
135
|
+
for ch in text:
|
|
136
|
+
w = displen(ch)
|
|
137
|
+
if acc + w > target: break
|
|
138
|
+
out_chars.append(ch)
|
|
139
|
+
acc += w
|
|
140
|
+
return "".join(out_chars) + "..."
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
'''smarttrunctable'''
|
|
144
|
+
def smarttrunctable(headers, rows, max_col_width=40, terminal_right_space_len=10, no_trunc_cols=None, min_col_width=4, max_iterations=2000):
|
|
145
|
+
headers = [str(h) for h in headers]
|
|
146
|
+
rows = [[str(c) for c in row] for row in rows]
|
|
147
|
+
ncols = len(headers)
|
|
148
|
+
assert all(len(r) == ncols for r in rows), "all rows must have same length as headers"
|
|
149
|
+
term_width = shutil.get_terminal_size().columns
|
|
150
|
+
target_width = term_width - terminal_right_space_len
|
|
151
|
+
if target_width <= 0: target_width = term_width
|
|
152
|
+
protected_idx = set()
|
|
153
|
+
if no_trunc_cols:
|
|
154
|
+
for spec in no_trunc_cols:
|
|
155
|
+
if isinstance(spec, int):
|
|
156
|
+
if 0 <= spec < ncols: protected_idx.add(spec)
|
|
157
|
+
else:
|
|
158
|
+
for j, h in enumerate(headers):
|
|
159
|
+
if h == str(spec): protected_idx.add(j)
|
|
160
|
+
col_max = []
|
|
161
|
+
for j in range(ncols):
|
|
162
|
+
w = displen(headers[j])
|
|
163
|
+
for row in rows: w = max(w, displen(row[j]))
|
|
164
|
+
col_max.append(w)
|
|
165
|
+
col_limits = []
|
|
166
|
+
for j in range(ncols):
|
|
167
|
+
if j in protected_idx: col_limits.append(None)
|
|
168
|
+
else:
|
|
169
|
+
limit = col_max[j]
|
|
170
|
+
if max_col_width: limit = min(limit, max_col_width)
|
|
171
|
+
limit = max(limit, min_col_width)
|
|
172
|
+
col_limits.append(limit)
|
|
173
|
+
last_table = ""
|
|
174
|
+
for _ in range(max_iterations):
|
|
175
|
+
truncated_headers = []
|
|
176
|
+
for j, h in enumerate(headers):
|
|
177
|
+
if col_limits[j] is None: truncated_headers.append(h)
|
|
178
|
+
else: truncated_headers.append(truncatebydispwidth(h, col_limits[j]))
|
|
179
|
+
truncated_rows = []
|
|
180
|
+
for row in rows:
|
|
181
|
+
new_row = []
|
|
182
|
+
for j, cell in enumerate(row):
|
|
183
|
+
if col_limits[j] is None: new_row.append(cell)
|
|
184
|
+
else: new_row.append(truncatebydispwidth(cell, col_limits[j]))
|
|
185
|
+
truncated_rows.append(new_row)
|
|
186
|
+
table_str = tabulate(truncated_rows, headers=truncated_headers, tablefmt="fancy_grid")
|
|
187
|
+
last_table = table_str
|
|
188
|
+
w = tablewidth(table_str)
|
|
189
|
+
if w <= target_width: return table_str
|
|
190
|
+
col_cur = [displen(h) for h in truncated_headers]
|
|
191
|
+
for row in truncated_rows:
|
|
192
|
+
for j, cell in enumerate(row): col_cur[j] = max(col_cur[j], displen(cell))
|
|
193
|
+
candidates = [j for j in range(ncols) if col_limits[j] is not None and col_limits[j] > min_col_width]
|
|
194
|
+
if not candidates: return last_table
|
|
195
|
+
j_longest = max(candidates, key=lambda k: col_cur[k])
|
|
196
|
+
col_limits[j_longest] -= 1
|
|
197
|
+
return last_table
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
'''colorize'''
|
|
201
|
+
def colorize(string, color):
|
|
202
|
+
string = str(string)
|
|
203
|
+
if color not in COLORS: return string
|
|
204
|
+
return COLORS[color] + string + '\033[0m'
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
'''printfullline'''
|
|
208
|
+
def printfullline(ch: str = "*", end: str = '\n', terminal_right_space_len: int = 1):
|
|
209
|
+
cols = shutil.get_terminal_size().columns - terminal_right_space_len
|
|
210
|
+
assert cols > 0, f'"terminal_right_space_len" should smaller than {shutil.get_terminal_size()}'
|
|
211
|
+
print(ch * cols, end=end)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Function:
|
|
3
|
+
Implementation of WhisperLRC
|
|
4
|
+
Author:
|
|
5
|
+
Zhenchao Jin
|
|
6
|
+
WeChat Official Account (微信公众号):
|
|
7
|
+
Charles的皮卡丘
|
|
8
|
+
'''
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import tempfile
|
|
12
|
+
import requests
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
'''WhisperLRC'''
|
|
16
|
+
class WhisperLRC:
|
|
17
|
+
def __init__(self, model_size_or_path="small", device="auto", compute_type="int8", cpu_threads=4, num_workers=1, **kwargs):
|
|
18
|
+
try:
|
|
19
|
+
from faster_whisper import WhisperModel
|
|
20
|
+
self.whisper_model = WhisperModel(model_size_or_path, device=device, compute_type=compute_type, cpu_threads=cpu_threads, num_workers=num_workers, **kwargs)
|
|
21
|
+
except:
|
|
22
|
+
self.whisper_model = None
|
|
23
|
+
'''downloadtotmpdir'''
|
|
24
|
+
@staticmethod
|
|
25
|
+
def downloadtotmpdir(url: str, headers: dict = None, timeout: int = 300, cookies: dict = None, request_overrides: dict = None):
|
|
26
|
+
headers, cookies, request_overrides = headers or {}, cookies or {}, request_overrides or {}
|
|
27
|
+
if 'headers' not in request_overrides: request_overrides['headers'] = headers
|
|
28
|
+
if 'timeout' not in request_overrides: request_overrides['timeout'] = timeout
|
|
29
|
+
if 'cookies' not in request_overrides: request_overrides['cookies'] = cookies
|
|
30
|
+
resp = requests.get(url, stream=True, **request_overrides)
|
|
31
|
+
resp.raise_for_status()
|
|
32
|
+
m = re.search(r"\.([a-z0-9]{2,5})(?:\?|$)", url, re.I)
|
|
33
|
+
fd, path = tempfile.mkstemp(suffix="."+(m.group(1).lower() if m else "bin"))
|
|
34
|
+
with os.fdopen(fd, "wb") as fp:
|
|
35
|
+
for ch in resp.iter_content(32768):
|
|
36
|
+
if ch: fp.write(ch)
|
|
37
|
+
return path
|
|
38
|
+
'''timestamp'''
|
|
39
|
+
@staticmethod
|
|
40
|
+
def timestamp(t):
|
|
41
|
+
t = max(0.0, float(t)); mm = int(t//60); ss = t - mm*60
|
|
42
|
+
return f"[{mm:02d}:{ss:05.2f}]"
|
|
43
|
+
'''fromurl'''
|
|
44
|
+
def fromurl(self, url: str, transcribe_overrides: dict = None, headers: dict = None, timeout: int = 300, cookies: dict = None, request_overrides: dict = None):
|
|
45
|
+
assert self.whisper_model is not None, 'faster_whisper should be installed via "pip install "faster_whisper"'
|
|
46
|
+
transcribe_overrides, headers, cookies, request_overrides = transcribe_overrides or {}, headers or {}, cookies or {}, request_overrides or {}
|
|
47
|
+
tmp_file_path = ''
|
|
48
|
+
try:
|
|
49
|
+
tmp_file_path = self.downloadtotmpdir(url, headers=headers, timeout=timeout, cookies=cookies, request_overrides=request_overrides)
|
|
50
|
+
default_transcribe_settings = {
|
|
51
|
+
'language': None, 'vad_filter': True, 'vad_parameters': dict(min_silence_duration_ms=300), 'chunk_length': 30, 'beam_size': 5
|
|
52
|
+
}
|
|
53
|
+
default_transcribe_settings.update(transcribe_overrides)
|
|
54
|
+
segs, info = self.whisper_model.transcribe(tmp_file_path, **default_transcribe_settings)
|
|
55
|
+
lrc = "\n".join(f"{self.timestamp(s.start)}{s.text.strip()}" for s in segs)
|
|
56
|
+
result = {"language": info.language, "prob": info.language_probability, "duration": getattr(info, "duration", None), 'lyric': lrc}
|
|
57
|
+
return result
|
|
58
|
+
finally:
|
|
59
|
+
if tmp_file_path and os.path.exists(tmp_file_path):
|
|
60
|
+
try: os.remove(tmp_file_path)
|
|
61
|
+
except: pass
|
|
62
|
+
'''fromfilepath'''
|
|
63
|
+
def fromfilepath(self, file_path: str, transcribe_overrides: dict = None):
|
|
64
|
+
assert self.whisper_model is not None, 'faster_whisper should be installed via "pip install "faster_whisper"'
|
|
65
|
+
transcribe_overrides = transcribe_overrides or {}
|
|
66
|
+
default_transcribe_settings = {
|
|
67
|
+
'language': None, 'vad_filter': True, 'vad_parameters': dict(min_silence_duration_ms=300), 'chunk_length': 30, 'beam_size': 5
|
|
68
|
+
}
|
|
69
|
+
default_transcribe_settings.update(transcribe_overrides)
|
|
70
|
+
segs, info = self.whisper_model.transcribe(file_path, **default_transcribe_settings)
|
|
71
|
+
lrc = "\n".join(f"{self.timestamp(s.start)}{s.text.strip()}" for s in segs)
|
|
72
|
+
result = {"language": info.language, "prob": info.language_probability, "duration": getattr(info, "duration", None), 'lyric': lrc}
|
|
73
|
+
return result
|