jmcomic 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jmcomic/__init__.py +29 -0
- jmcomic/api.py +131 -0
- jmcomic/cl.py +121 -0
- jmcomic/jm_client_impl.py +1217 -0
- jmcomic/jm_client_interface.py +609 -0
- jmcomic/jm_config.py +505 -0
- jmcomic/jm_downloader.py +350 -0
- jmcomic/jm_entity.py +695 -0
- jmcomic/jm_exception.py +191 -0
- jmcomic/jm_option.py +647 -0
- jmcomic/jm_plugin.py +1203 -0
- jmcomic/jm_toolkit.py +937 -0
- jmcomic-0.0.2.dist-info/METADATA +229 -0
- jmcomic-0.0.2.dist-info/RECORD +18 -0
- jmcomic-0.0.2.dist-info/WHEEL +5 -0
- jmcomic-0.0.2.dist-info/entry_points.txt +2 -0
- jmcomic-0.0.2.dist-info/licenses/LICENSE +21 -0
- jmcomic-0.0.2.dist-info/top_level.txt +1 -0
jmcomic/jm_toolkit.py
ADDED
|
@@ -0,0 +1,937 @@
|
|
|
1
|
+
from PIL import Image
|
|
2
|
+
|
|
3
|
+
from .jm_exception import *
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class JmcomicText:
|
|
7
|
+
pattern_jm_domain = compile(r'https://([\w.-]+)')
|
|
8
|
+
pattern_jm_pa_id = [
|
|
9
|
+
(compile(r'(photos?|albums?)/(\d+)'), 2),
|
|
10
|
+
(compile(r'id=(\d+)'), 1),
|
|
11
|
+
]
|
|
12
|
+
pattern_html_jm_pub_domain = compile(r'[\w-]+\.\w+/?\w+')
|
|
13
|
+
|
|
14
|
+
pattern_html_photo_photo_id = compile(r'<meta property="og:url" content=".*?/photo/(\d+)/?.*?">')
|
|
15
|
+
pattern_html_photo_scramble_id = compile(r'var scramble_id = (\d+);')
|
|
16
|
+
pattern_html_photo_name = compile(r'<title>([\s\S]*?)\|.*</title>')
|
|
17
|
+
# pattern_html_photo_data_original_list = compile(r'data-original="(.*?)" id="album_photo_.+?"')
|
|
18
|
+
pattern_html_photo_data_original_domain = compile(r'src="https://(.*?)/media/albums/blank')
|
|
19
|
+
pattern_html_photo_data_original_0 = compile(r'data-original="(.*?)"[^>]*?id="album_photo[^>]*?data-page="0"')
|
|
20
|
+
pattern_html_photo_tags = compile(r'<meta name="keywords"[\s\S]*?content="(.*?)"')
|
|
21
|
+
pattern_html_photo_series_id = compile(r'var series_id = (\d+);')
|
|
22
|
+
pattern_html_photo_sort = compile(r'var sort = (\d+);')
|
|
23
|
+
pattern_html_photo_page_arr = compile(r'var page_arr = (.*?);')
|
|
24
|
+
|
|
25
|
+
pattern_html_b64_decode_content = compile(r'const html = base64DecodeUtf8\("(.*?)"\)')
|
|
26
|
+
pattern_html_album_album_id = compile(r'<span class="number">.*?:JM(\d+)</span>')
|
|
27
|
+
pattern_html_album_scramble_id = compile(r'var scramble_id = (\d+);')
|
|
28
|
+
pattern_html_album_name = compile(r'id="book-name"[^>]*?>([\s\S]*?)<')
|
|
29
|
+
pattern_html_album_episode_list = compile(r'data-album="(\d+)"[^>]*>[\s\S]*?第(\d+)[话話]([\s\S]*?)<[\s\S]*?>')
|
|
30
|
+
pattern_html_album_page_count = compile(r'<span class="pagecount">.*?:(\d+)</span>')
|
|
31
|
+
pattern_html_album_pub_date = compile(r'>上架日期 : (.*?)</span>')
|
|
32
|
+
pattern_html_album_update_date = compile(r'>更新日期 : (.*?)</span>')
|
|
33
|
+
pattern_html_tag_a = compile(r'<a[^>]*?>\s*(\S*)\s*</a>')
|
|
34
|
+
# 作品
|
|
35
|
+
pattern_html_album_works = [
|
|
36
|
+
compile(r'<span itemprop="author" data-type="works">([\s\S]*?)</span>'),
|
|
37
|
+
pattern_html_tag_a,
|
|
38
|
+
]
|
|
39
|
+
# 登場人物
|
|
40
|
+
pattern_html_album_actors = [
|
|
41
|
+
compile(r'<span itemprop="author" data-type="actor">([\s\S]*?)</span>'),
|
|
42
|
+
pattern_html_tag_a,
|
|
43
|
+
]
|
|
44
|
+
# 标签
|
|
45
|
+
pattern_html_album_tags = [
|
|
46
|
+
compile(r'<span itemprop="genre" data-type="tags">([\s\S]*?)</span>'),
|
|
47
|
+
pattern_html_tag_a,
|
|
48
|
+
]
|
|
49
|
+
# 作者
|
|
50
|
+
pattern_html_album_authors = [
|
|
51
|
+
compile(r'<span itemprop="author" data-type="author">([\s\S]*?)</span>'),
|
|
52
|
+
pattern_html_tag_a,
|
|
53
|
+
]
|
|
54
|
+
# 點擊喜歡
|
|
55
|
+
pattern_html_album_likes = compile(r'<span id="albim_likes_\d+">(.*?)</span>')
|
|
56
|
+
# 觀看
|
|
57
|
+
pattern_html_album_views = compile(r'<span>(.*?)</span>\n *<span>(次觀看|观看次数|次观看次数)</span>')
|
|
58
|
+
# 評論(div)
|
|
59
|
+
pattern_html_album_comment_count = compile(r'<div class="badge"[^>]*?id="total_video_comments">(\d+)</div>'), 0
|
|
60
|
+
|
|
61
|
+
# 提取接口返回值信息
|
|
62
|
+
pattern_ajax_favorite_msg = compile(r'</button>(.*?)</div>')
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def parse_to_jm_domain(cls, text: str):
|
|
66
|
+
if text.startswith(JmModuleConfig.PROT):
|
|
67
|
+
return cls.pattern_jm_domain.search(text)[1]
|
|
68
|
+
|
|
69
|
+
return text
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def parse_to_jm_id(cls, text) -> str:
|
|
73
|
+
if isinstance(text, int):
|
|
74
|
+
return str(text)
|
|
75
|
+
|
|
76
|
+
ExceptionTool.require_true(isinstance(text, str), f"无法解析jm车号, 参数类型为: {type(text)}")
|
|
77
|
+
|
|
78
|
+
# 43210
|
|
79
|
+
if text.isdigit():
|
|
80
|
+
return text
|
|
81
|
+
|
|
82
|
+
# Jm43210
|
|
83
|
+
ExceptionTool.require_true(len(text) >= 2, f"无法解析jm车号, 文本太短: {text}")
|
|
84
|
+
|
|
85
|
+
# text: JM12341
|
|
86
|
+
c0 = text[0]
|
|
87
|
+
c1 = text[1]
|
|
88
|
+
if (c0 == 'J' or c0 == 'j') and (c1 == 'M' or c1 == 'm'):
|
|
89
|
+
# JM123456
|
|
90
|
+
return text[2:]
|
|
91
|
+
else:
|
|
92
|
+
# https://xxx/photo/412038
|
|
93
|
+
# https://xxx/album/?id=412038
|
|
94
|
+
for p, i in cls.pattern_jm_pa_id:
|
|
95
|
+
match = p.search(text)
|
|
96
|
+
if match is not None:
|
|
97
|
+
return match[i]
|
|
98
|
+
|
|
99
|
+
ExceptionTool.raises(f"无法解析jm车号, 文本为: {text}")
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def analyse_jm_pub_html(cls, html: str, domain_keyword=('jm', 'comic')) -> List[str]:
|
|
103
|
+
domain_ls = cls.pattern_html_jm_pub_domain.findall(html)
|
|
104
|
+
|
|
105
|
+
return list(filter(
|
|
106
|
+
lambda domain: any(kw in domain for kw in domain_keyword),
|
|
107
|
+
domain_ls
|
|
108
|
+
))
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def parse_jm_base64_html(cls, resp_text: str) -> str:
|
|
112
|
+
from base64 import b64decode
|
|
113
|
+
html_b64 = PatternTool.match_or_default(resp_text, cls.pattern_html_b64_decode_content, None)
|
|
114
|
+
if html_b64 is None:
|
|
115
|
+
return resp_text
|
|
116
|
+
html = b64decode(html_b64).decode()
|
|
117
|
+
return html
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def analyse_jm_photo_html(cls, html: str) -> JmPhotoDetail:
|
|
121
|
+
return cls.reflect_new_instance(
|
|
122
|
+
html,
|
|
123
|
+
"pattern_html_photo_",
|
|
124
|
+
JmModuleConfig.photo_class()
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
@classmethod
|
|
128
|
+
def analyse_jm_album_html(cls, html: str) -> JmAlbumDetail:
|
|
129
|
+
return cls.reflect_new_instance(
|
|
130
|
+
cls.parse_jm_base64_html(html),
|
|
131
|
+
"pattern_html_album_",
|
|
132
|
+
JmModuleConfig.album_class()
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def reflect_new_instance(cls, html: str, cls_field_prefix: str, clazz: type):
|
|
137
|
+
|
|
138
|
+
def match_field(field_name: str, pattern: Union[Pattern, List[Pattern]], text):
|
|
139
|
+
|
|
140
|
+
if isinstance(pattern, list):
|
|
141
|
+
# 如果是 pattern 是 List[re.Pattern],
|
|
142
|
+
# 取最后一个 pattern 用于 match field,
|
|
143
|
+
# 其他的 pattern 用来给文本缩小范围(相当于多次正则匹配)
|
|
144
|
+
last_pattern = pattern[len(pattern) - 1]
|
|
145
|
+
# 缩小文本
|
|
146
|
+
for i in range(0, len(pattern) - 1):
|
|
147
|
+
match: Match = pattern[i].search(text)
|
|
148
|
+
if match is None:
|
|
149
|
+
return None
|
|
150
|
+
text = match[0]
|
|
151
|
+
|
|
152
|
+
return last_pattern.findall(text)
|
|
153
|
+
|
|
154
|
+
if field_name.endswith("_list"):
|
|
155
|
+
return pattern.findall(text)
|
|
156
|
+
else:
|
|
157
|
+
match = pattern.search(text)
|
|
158
|
+
if match is not None:
|
|
159
|
+
return match[1]
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
field_dict = {}
|
|
163
|
+
pattern_name: str
|
|
164
|
+
for pattern_name, pattern in cls.__dict__.items():
|
|
165
|
+
if not pattern_name.startswith(cls_field_prefix):
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
# 支持如果不匹配,使用默认值
|
|
169
|
+
if isinstance(pattern, tuple):
|
|
170
|
+
pattern, default = pattern
|
|
171
|
+
else:
|
|
172
|
+
default = None
|
|
173
|
+
|
|
174
|
+
# 获取字段名和值
|
|
175
|
+
field_name = pattern_name[pattern_name.index(cls_field_prefix) + len(cls_field_prefix):]
|
|
176
|
+
field_value = match_field(field_name, pattern, html)
|
|
177
|
+
|
|
178
|
+
if field_value is None:
|
|
179
|
+
if default is None:
|
|
180
|
+
ExceptionTool.raises_regex(
|
|
181
|
+
f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern}]"
|
|
182
|
+
+ (f"\n响应文本=[{html}]" if len(html) < 200 else
|
|
183
|
+
f'响应文本过长(len={len(html)}),不打印'
|
|
184
|
+
),
|
|
185
|
+
html=html,
|
|
186
|
+
pattern=pattern,
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
field_value = default
|
|
190
|
+
|
|
191
|
+
# 保存字段
|
|
192
|
+
field_dict[field_name] = field_value
|
|
193
|
+
|
|
194
|
+
return clazz(**field_dict)
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def format_url(cls, path, domain):
|
|
198
|
+
ExceptionTool.require_true(isinstance(domain, str) and len(domain) != 0, '域名为空')
|
|
199
|
+
|
|
200
|
+
if domain.startswith(JmModuleConfig.PROT):
|
|
201
|
+
return f'{domain}{path}'
|
|
202
|
+
|
|
203
|
+
return f'{JmModuleConfig.PROT}{domain}{path}'
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def format_album_url(cls, aid, domain='18comic.vip'):
|
|
207
|
+
"""
|
|
208
|
+
把album_id变为可访问的URL,方便print打印后用浏览器访问
|
|
209
|
+
"""
|
|
210
|
+
return cls.format_url(f'/album/{aid}/', domain)
|
|
211
|
+
|
|
212
|
+
class DSLReplacer:
|
|
213
|
+
|
|
214
|
+
def __init__(self):
|
|
215
|
+
self.dsl_dict: Dict[Pattern, Callable[[Match], str]] = {}
|
|
216
|
+
|
|
217
|
+
def parse_dsl_text(self, text) -> str:
|
|
218
|
+
for pattern, replacer in self.dsl_dict.items():
|
|
219
|
+
text = pattern.sub(replacer, text)
|
|
220
|
+
return text
|
|
221
|
+
|
|
222
|
+
def add_dsl_and_replacer(self, dsl: str, replacer: Callable[[Match], str]):
|
|
223
|
+
pattern = compile(dsl)
|
|
224
|
+
self.dsl_dict[pattern] = replacer
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def match_os_env(cls, match: Match) -> str:
|
|
228
|
+
name = match[1]
|
|
229
|
+
value = os.getenv(name, None)
|
|
230
|
+
ExceptionTool.require_true(value is not None, f'未配置环境变量: {name}')
|
|
231
|
+
return value
|
|
232
|
+
|
|
233
|
+
dsl_replacer = DSLReplacer()
|
|
234
|
+
|
|
235
|
+
@classmethod
|
|
236
|
+
def parse_to_abspath(cls, dsl_text: str) -> str:
|
|
237
|
+
return os.path.abspath(cls.parse_dsl_text(dsl_text))
|
|
238
|
+
|
|
239
|
+
@classmethod
|
|
240
|
+
def parse_dsl_text(cls, dsl_text: str) -> str:
|
|
241
|
+
return cls.dsl_replacer.parse_dsl_text(dsl_text)
|
|
242
|
+
|
|
243
|
+
bracket_map = {'(': ')',
|
|
244
|
+
'[': ']',
|
|
245
|
+
'【': '】',
|
|
246
|
+
'(': ')',
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
@classmethod
|
|
250
|
+
def parse_orig_album_name(cls, name: str, default=None):
|
|
251
|
+
word_list = cls.tokenize(name)
|
|
252
|
+
|
|
253
|
+
for word in word_list:
|
|
254
|
+
if word[0] in cls.bracket_map:
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
return word
|
|
258
|
+
|
|
259
|
+
return default
|
|
260
|
+
|
|
261
|
+
@classmethod
|
|
262
|
+
def tokenize(cls, title: str) -> List[str]:
|
|
263
|
+
"""
|
|
264
|
+
繞道#2 [暴碧漢化組] [えーすけ(123)] よりみち#2 (COMIC 快樂天 2024年1月號) [中國翻譯] [DL版]
|
|
265
|
+
:return: ['繞道#2', '[暴碧漢化組]', '[えーすけ(123)]', 'よりみち#2', '(COMIC 快樂天 2024年1月號)', '[中國翻譯]', '[DL版]']
|
|
266
|
+
"""
|
|
267
|
+
title = title.strip()
|
|
268
|
+
ret = []
|
|
269
|
+
bracket_map = cls.bracket_map
|
|
270
|
+
|
|
271
|
+
char_list = []
|
|
272
|
+
i = 0
|
|
273
|
+
length = len(title)
|
|
274
|
+
|
|
275
|
+
def add(w=None):
|
|
276
|
+
if w is None:
|
|
277
|
+
w = ''.join(char_list).strip()
|
|
278
|
+
|
|
279
|
+
if w == '':
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
ret.append(w)
|
|
283
|
+
char_list.clear()
|
|
284
|
+
|
|
285
|
+
def find_right_pair(left_pair, i):
|
|
286
|
+
stack = [left_pair]
|
|
287
|
+
j = i + 1
|
|
288
|
+
|
|
289
|
+
while j < length and len(stack) != 0:
|
|
290
|
+
c = title[j]
|
|
291
|
+
if c in bracket_map:
|
|
292
|
+
stack.append(c)
|
|
293
|
+
elif c == bracket_map[stack[-1]]:
|
|
294
|
+
stack.pop()
|
|
295
|
+
|
|
296
|
+
j += 1
|
|
297
|
+
|
|
298
|
+
if len(stack) == 0:
|
|
299
|
+
return j
|
|
300
|
+
else:
|
|
301
|
+
return -1
|
|
302
|
+
|
|
303
|
+
while i < length:
|
|
304
|
+
c = title[i]
|
|
305
|
+
|
|
306
|
+
if c in bracket_map:
|
|
307
|
+
# 上一个单词结束
|
|
308
|
+
add()
|
|
309
|
+
# 定位右括号
|
|
310
|
+
j = find_right_pair(c, i)
|
|
311
|
+
if j == -1:
|
|
312
|
+
# 括号未闭合
|
|
313
|
+
char_list.append(c)
|
|
314
|
+
i += 1
|
|
315
|
+
continue
|
|
316
|
+
# 整个括号的单词结束
|
|
317
|
+
add(title[i:j])
|
|
318
|
+
# 移动指针
|
|
319
|
+
i = j
|
|
320
|
+
else:
|
|
321
|
+
char_list.append(c)
|
|
322
|
+
i += 1
|
|
323
|
+
|
|
324
|
+
add()
|
|
325
|
+
return ret
|
|
326
|
+
|
|
327
|
+
@classmethod
|
|
328
|
+
def to_zh_cn(cls, s):
|
|
329
|
+
import zhconv
|
|
330
|
+
return zhconv.convert(s, 'zh-cn')
|
|
331
|
+
|
|
332
|
+
@classmethod
|
|
333
|
+
def try_mkdir(cls, save_dir: str):
|
|
334
|
+
try:
|
|
335
|
+
mkdir_if_not_exists(save_dir)
|
|
336
|
+
except OSError as e:
|
|
337
|
+
if e.errno == 36:
|
|
338
|
+
# 目录名过长
|
|
339
|
+
limit = JmModuleConfig.VAR_FILE_NAME_LENGTH_LIMIT
|
|
340
|
+
jm_log('error', f'目录名过长,无法创建目录,强制缩短到{limit}个字符并重试')
|
|
341
|
+
save_dir = save_dir[0:limit]
|
|
342
|
+
return cls.try_mkdir(save_dir)
|
|
343
|
+
raise e
|
|
344
|
+
return save_dir
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# 支持dsl: #{???} -> os.getenv(???)
|
|
348
|
+
JmcomicText.dsl_replacer.add_dsl_and_replacer(r'\$\{(.*?)\}', JmcomicText.match_os_env)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
class PatternTool:
|
|
352
|
+
|
|
353
|
+
@classmethod
|
|
354
|
+
def match_or_default(cls, html: str, pattern: Pattern, default):
|
|
355
|
+
match = pattern.search(html)
|
|
356
|
+
return default if match is None else match[1]
|
|
357
|
+
|
|
358
|
+
@classmethod
|
|
359
|
+
def require_match(cls, html: str, pattern: Pattern, msg, rindex=1):
|
|
360
|
+
match = pattern.search(html)
|
|
361
|
+
if match is not None:
|
|
362
|
+
return match[rindex] if rindex is not None else match
|
|
363
|
+
|
|
364
|
+
ExceptionTool.raises_regex(
|
|
365
|
+
msg,
|
|
366
|
+
html=html,
|
|
367
|
+
pattern=pattern,
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
@classmethod
|
|
371
|
+
def require_not_match(cls, html: str, pattern: Pattern, *, msg_func):
|
|
372
|
+
match = pattern.search(html)
|
|
373
|
+
if match is None:
|
|
374
|
+
return
|
|
375
|
+
|
|
376
|
+
ExceptionTool.raises_regex(
|
|
377
|
+
msg_func(match),
|
|
378
|
+
html=html,
|
|
379
|
+
pattern=pattern,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
class JmPageTool:
|
|
384
|
+
# 用来缩减html的长度
|
|
385
|
+
pattern_html_search_shorten_for = compile(r'<div class="well well-sm">([\s\S]*)<div class="row">')
|
|
386
|
+
|
|
387
|
+
# 用来提取搜索页面的album的信息
|
|
388
|
+
pattern_html_search_album_info_list = compile(
|
|
389
|
+
r'<a href="/album/(\d+)/[\s\S]*?title="(.*?)"([\s\S]*?)<div class="title-truncate tags .*>([\s\S]*?)</div>'
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# 用来提取分类页面的album的信息
|
|
393
|
+
pattern_html_category_album_info_list = compile(
|
|
394
|
+
r'<a href="/album/(\d+)/[^>]*>[^>]*?'
|
|
395
|
+
r'title="(.*?)"[^>]*>[ \n]*</a>[ \n]*'
|
|
396
|
+
r'<div class="label-loveicon">([\s\S]*?)'
|
|
397
|
+
r'<div class="clearfix">'
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# 用来查找tag列表
|
|
401
|
+
pattern_html_search_tags = compile(r'<a[^>]*?>(.*?)</a>')
|
|
402
|
+
|
|
403
|
+
# 查找错误,例如 [错误,關鍵字過短,請至少輸入兩個字以上。]
|
|
404
|
+
pattern_html_search_error = compile(r'<fieldset>\n<legend>(.*?)</legend>\n<div class=.*?>\n(.*?)\n</div>\n</fieldset>')
|
|
405
|
+
|
|
406
|
+
pattern_html_search_total = compile(r'class="text-white">(\d+)</span> A漫.'), 0
|
|
407
|
+
|
|
408
|
+
# 收藏页面的本子结果
|
|
409
|
+
pattern_html_favorite_content = compile(
|
|
410
|
+
r'<div id="favorites_album_[^>]*?>[\s\S]*?'
|
|
411
|
+
r'<a href="/album/(\d+)/[^"]*">[\s\S]*?'
|
|
412
|
+
r'<div class="video-title title-truncate">([^<]*?)'
|
|
413
|
+
r'</div>'
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# 收藏夹的收藏总数
|
|
417
|
+
pattern_html_favorite_total = compile(r' : (\d+)[^/]*/\D*(\d+)')
|
|
418
|
+
|
|
419
|
+
# 所有的收藏夹
|
|
420
|
+
pattern_html_favorite_folder_list = [
|
|
421
|
+
compile(r'<select class="user-select" name="movefolder-fid">([\s\S]*)</select>'),
|
|
422
|
+
compile(r'<option value="(\d+)">([^<]*?)</option>')
|
|
423
|
+
]
|
|
424
|
+
|
|
425
|
+
@classmethod
|
|
426
|
+
def parse_html_to_search_page(cls, html: str) -> JmSearchPage:
|
|
427
|
+
# 1. 检查是否失败
|
|
428
|
+
PatternTool.require_not_match(
|
|
429
|
+
html,
|
|
430
|
+
cls.pattern_html_search_error,
|
|
431
|
+
msg_func=lambda match: '{}: {}'.format(match[1], match[2])
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
# 2. 缩小文本范围
|
|
435
|
+
html = PatternTool.require_match(
|
|
436
|
+
html,
|
|
437
|
+
cls.pattern_html_search_shorten_for,
|
|
438
|
+
msg='未匹配到搜索结果',
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# 3. 提取结果
|
|
442
|
+
content = [] # content这个名字来源于api版搜索返回值
|
|
443
|
+
total = int(PatternTool.match_or_default(html, *cls.pattern_html_search_total)) # 总结果数
|
|
444
|
+
|
|
445
|
+
album_info_list = cls.pattern_html_search_album_info_list.findall(html)
|
|
446
|
+
|
|
447
|
+
for (album_id, title, _label_category_text, tag_text) in album_info_list:
|
|
448
|
+
# 从label_category_text中可以解析出label-category和label-sub
|
|
449
|
+
# 这里不作解析,因为没什么用...
|
|
450
|
+
tags = cls.pattern_html_search_tags.findall(tag_text)
|
|
451
|
+
content.append((
|
|
452
|
+
album_id, {
|
|
453
|
+
'name': title, # 改成name是为了兼容 parse_api_resp_to_page
|
|
454
|
+
'tags': tags
|
|
455
|
+
}
|
|
456
|
+
))
|
|
457
|
+
|
|
458
|
+
return JmSearchPage(content, total)
|
|
459
|
+
|
|
460
|
+
@classmethod
|
|
461
|
+
def parse_html_to_category_page(cls, html: str) -> JmSearchPage:
|
|
462
|
+
content = []
|
|
463
|
+
total = int(PatternTool.match_or_default(html, *cls.pattern_html_search_total))
|
|
464
|
+
|
|
465
|
+
album_info_list = cls.pattern_html_category_album_info_list.findall(html)
|
|
466
|
+
|
|
467
|
+
for (album_id, title, tag_text) in album_info_list:
|
|
468
|
+
tags = cls.pattern_html_search_tags.findall(tag_text)
|
|
469
|
+
content.append((
|
|
470
|
+
album_id, {
|
|
471
|
+
'name': title, # 改成name是为了兼容 parse_api_resp_to_page
|
|
472
|
+
'tags': tags
|
|
473
|
+
}
|
|
474
|
+
))
|
|
475
|
+
|
|
476
|
+
return JmSearchPage(content, total)
|
|
477
|
+
|
|
478
|
+
@classmethod
|
|
479
|
+
def parse_html_to_favorite_page(cls, html: str) -> JmFavoritePage:
|
|
480
|
+
total = int(PatternTool.require_match(
|
|
481
|
+
html,
|
|
482
|
+
cls.pattern_html_favorite_total,
|
|
483
|
+
'未匹配到收藏夹的本子总数',
|
|
484
|
+
))
|
|
485
|
+
|
|
486
|
+
# 收藏夹的本子结果
|
|
487
|
+
content = cls.pattern_html_favorite_content.findall(html)
|
|
488
|
+
content = [
|
|
489
|
+
(aid, {'name': atitle})
|
|
490
|
+
for aid, atitle in content
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
# 匹配收藏夹列表
|
|
494
|
+
p1, p2 = cls.pattern_html_favorite_folder_list
|
|
495
|
+
folder_list_text = PatternTool.require_match(html, p1, '未匹配到收藏夹列表')
|
|
496
|
+
folder_list_raw = p2.findall(folder_list_text)
|
|
497
|
+
folder_list = [{'name': fname, 'FID': fid} for fid, fname in folder_list_raw]
|
|
498
|
+
|
|
499
|
+
return JmFavoritePage(content, folder_list, total)
|
|
500
|
+
|
|
501
|
+
@classmethod
|
|
502
|
+
def parse_api_to_search_page(cls, data: AdvancedDict) -> JmSearchPage:
|
|
503
|
+
"""
|
|
504
|
+
model_data: {
|
|
505
|
+
"search_query": "MANA",
|
|
506
|
+
"total": "177",
|
|
507
|
+
"content": [
|
|
508
|
+
{
|
|
509
|
+
"id": "441923",
|
|
510
|
+
"author": "MANA",
|
|
511
|
+
"description": "",
|
|
512
|
+
"name": "[MANA] 神里绫华5",
|
|
513
|
+
"image": "",
|
|
514
|
+
"category": {
|
|
515
|
+
"id": "1",
|
|
516
|
+
"title": "同人"
|
|
517
|
+
},
|
|
518
|
+
"category_sub": {
|
|
519
|
+
"id": "1",
|
|
520
|
+
"title": "同人"
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
]
|
|
524
|
+
}
|
|
525
|
+
"""
|
|
526
|
+
total: int = int(data.total or 0) # 2024.1.5 data.total可能为None
|
|
527
|
+
content = cls.adapt_content(data.content)
|
|
528
|
+
return JmSearchPage(content, total)
|
|
529
|
+
|
|
530
|
+
@classmethod
|
|
531
|
+
def parse_api_to_favorite_page(cls, data: AdvancedDict) -> JmFavoritePage:
|
|
532
|
+
"""
|
|
533
|
+
{
|
|
534
|
+
"list": [
|
|
535
|
+
{
|
|
536
|
+
"id": "363859",
|
|
537
|
+
"author": "紺菓",
|
|
538
|
+
"description": "",
|
|
539
|
+
"name": "[無邪氣漢化組] (C99) [紺色果實 (紺菓)] サレンの樂しい夢 (プリンセスコネクト!Re:Dive) [中國翻譯]",
|
|
540
|
+
"latest_ep": null,
|
|
541
|
+
"latest_ep_aid": null,
|
|
542
|
+
"image": "",
|
|
543
|
+
"category": {
|
|
544
|
+
"id": "1",
|
|
545
|
+
"title": "同人"
|
|
546
|
+
},
|
|
547
|
+
"category_sub": {
|
|
548
|
+
"id": "1",
|
|
549
|
+
"title": "同人"
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
],
|
|
553
|
+
"folder_list": [
|
|
554
|
+
{
|
|
555
|
+
"0": "123",
|
|
556
|
+
"FID": "123",
|
|
557
|
+
"1": "456",
|
|
558
|
+
"UID": "456",
|
|
559
|
+
"2": "收藏夹名",
|
|
560
|
+
"name": "收藏夹名"
|
|
561
|
+
}
|
|
562
|
+
],
|
|
563
|
+
"total": "87",
|
|
564
|
+
"count": 20
|
|
565
|
+
}
|
|
566
|
+
"""
|
|
567
|
+
total: int = int(data.total)
|
|
568
|
+
# count: int = int(data.count)
|
|
569
|
+
content = cls.adapt_content(data.list)
|
|
570
|
+
folder_list = data.get('folder_list', [])
|
|
571
|
+
|
|
572
|
+
return JmFavoritePage(content, folder_list, total)
|
|
573
|
+
|
|
574
|
+
@classmethod
|
|
575
|
+
def adapt_content(cls, content):
|
|
576
|
+
def adapt_item(item: AdvancedDict):
|
|
577
|
+
item: dict = item.src_dict
|
|
578
|
+
item.setdefault('tags', [])
|
|
579
|
+
return item
|
|
580
|
+
|
|
581
|
+
content = [
|
|
582
|
+
(item.id, adapt_item(item)) for item in content
|
|
583
|
+
]
|
|
584
|
+
return content
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
class JmApiAdaptTool:
|
|
588
|
+
"""
|
|
589
|
+
本类负责把移动端的api返回值,适配为标准的实体类
|
|
590
|
+
|
|
591
|
+
# album
|
|
592
|
+
{
|
|
593
|
+
"id": 123,
|
|
594
|
+
"name": "[狗野叉漢化]",
|
|
595
|
+
"author": [
|
|
596
|
+
"AREA188"
|
|
597
|
+
],
|
|
598
|
+
"images": [
|
|
599
|
+
"00004.webp"
|
|
600
|
+
],
|
|
601
|
+
"description": null,
|
|
602
|
+
"total_views": "41314",
|
|
603
|
+
"likes": "918",
|
|
604
|
+
"series": [],
|
|
605
|
+
"series_id": "0",
|
|
606
|
+
"comment_total": "5",
|
|
607
|
+
"tags": [
|
|
608
|
+
"全彩",
|
|
609
|
+
"中文"
|
|
610
|
+
],
|
|
611
|
+
"works": [],
|
|
612
|
+
"actors": [],
|
|
613
|
+
"related_list": [
|
|
614
|
+
{
|
|
615
|
+
"id": "333718",
|
|
616
|
+
"author": "been",
|
|
617
|
+
"description": "",
|
|
618
|
+
"name": "[been]The illusion of lies(1)[中國語][無修正][全彩]",
|
|
619
|
+
"image": ""
|
|
620
|
+
}
|
|
621
|
+
],
|
|
622
|
+
"liked": false,
|
|
623
|
+
"is_favorite": false
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
# photo
|
|
627
|
+
{
|
|
628
|
+
"id": 413446,
|
|
629
|
+
"series": [
|
|
630
|
+
{
|
|
631
|
+
"id": "487043",
|
|
632
|
+
"name": "第48話",
|
|
633
|
+
"sort": "48"
|
|
634
|
+
}
|
|
635
|
+
],
|
|
636
|
+
"tags": "慾望 調教 NTL 地鐵 戲劇",
|
|
637
|
+
"name": "癡漢成癮-第2話",
|
|
638
|
+
"images": [
|
|
639
|
+
"00047.webp"
|
|
640
|
+
],
|
|
641
|
+
"series_id": "400222",
|
|
642
|
+
"is_favorite": false,
|
|
643
|
+
"liked": false
|
|
644
|
+
}
|
|
645
|
+
"""
|
|
646
|
+
field_adapter = {
|
|
647
|
+
JmAlbumDetail: [
|
|
648
|
+
'likes',
|
|
649
|
+
'tags',
|
|
650
|
+
'works',
|
|
651
|
+
'actors',
|
|
652
|
+
'related_list',
|
|
653
|
+
'name',
|
|
654
|
+
('id', 'album_id'),
|
|
655
|
+
('author', 'authors'),
|
|
656
|
+
('total_views', 'views'),
|
|
657
|
+
('comment_total', 'comment_count'),
|
|
658
|
+
],
|
|
659
|
+
JmPhotoDetail: [
|
|
660
|
+
'name',
|
|
661
|
+
'series_id',
|
|
662
|
+
'tags',
|
|
663
|
+
('id', 'photo_id'),
|
|
664
|
+
('images', 'page_arr'),
|
|
665
|
+
|
|
666
|
+
]
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
@classmethod
|
|
670
|
+
def parse_entity(cls, data: dict, clazz: type):
|
|
671
|
+
adapter = cls.get_adapter(clazz)
|
|
672
|
+
|
|
673
|
+
fields = {}
|
|
674
|
+
for k in adapter:
|
|
675
|
+
if isinstance(k, str):
|
|
676
|
+
v = data[k]
|
|
677
|
+
fields[k] = v
|
|
678
|
+
elif isinstance(k, tuple):
|
|
679
|
+
k, rename_k = k
|
|
680
|
+
v = data[k]
|
|
681
|
+
fields[rename_k] = v
|
|
682
|
+
|
|
683
|
+
if issubclass(clazz, JmAlbumDetail):
|
|
684
|
+
cls.post_adapt_album(data, clazz, fields)
|
|
685
|
+
else:
|
|
686
|
+
cls.post_adapt_photo(data, clazz, fields)
|
|
687
|
+
|
|
688
|
+
return clazz(**fields)
|
|
689
|
+
|
|
690
|
+
@classmethod
|
|
691
|
+
def get_adapter(cls, clazz: type):
|
|
692
|
+
for k, v in cls.field_adapter.items():
|
|
693
|
+
if issubclass(clazz, k):
|
|
694
|
+
return v
|
|
695
|
+
|
|
696
|
+
ExceptionTool.raises(f'不支持的类型: {clazz}')
|
|
697
|
+
|
|
698
|
+
@classmethod
|
|
699
|
+
def post_adapt_album(cls, data: dict, _clazz: type, fields: dict):
|
|
700
|
+
series = data['series']
|
|
701
|
+
episode_list = []
|
|
702
|
+
for chapter in series:
|
|
703
|
+
chapter = AdvancedDict(chapter)
|
|
704
|
+
# photo_id, photo_index, photo_title, photo_pub_date
|
|
705
|
+
episode_list.append(
|
|
706
|
+
(chapter.id, chapter.sort, chapter.name)
|
|
707
|
+
)
|
|
708
|
+
fields['episode_list'] = episode_list
|
|
709
|
+
for it in 'scramble_id', 'page_count', 'pub_date', 'update_date':
|
|
710
|
+
fields[it] = '0'
|
|
711
|
+
|
|
712
|
+
@classmethod
|
|
713
|
+
def post_adapt_photo(cls, data: dict, _clazz: type, fields: dict):
|
|
714
|
+
# 1. 获取sort字段,如果data['series']中没有,使用默认值1
|
|
715
|
+
sort = 1
|
|
716
|
+
series: list = data['series'] # series中的sort从1开始
|
|
717
|
+
for chapter in series:
|
|
718
|
+
chapter = AdvancedDict(chapter)
|
|
719
|
+
if int(chapter.id) == int(data['id']):
|
|
720
|
+
sort = chapter.sort
|
|
721
|
+
break
|
|
722
|
+
|
|
723
|
+
fields['sort'] = sort
|
|
724
|
+
import random
|
|
725
|
+
fields['data_original_domain'] = random.choice(JmModuleConfig.DOMAIN_IMAGE_LIST)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
class JmImageTool:
|
|
729
|
+
|
|
730
|
+
@classmethod
|
|
731
|
+
def save_resp_img(cls, resp: Any, filepath: str, need_convert=True):
|
|
732
|
+
"""
|
|
733
|
+
接收HTTP响应对象,将其保存到图片文件.
|
|
734
|
+
如果需要改变图片的文件格式,比如 .jpg → .png,则需要指定参数 neet_convert=True.
|
|
735
|
+
如果不需要改变图片的文件格式,使用 need_convert=False,可以跳过PIL解析图片,效率更高.
|
|
736
|
+
|
|
737
|
+
:param resp: JmImageResp
|
|
738
|
+
:param filepath: 图片文件路径
|
|
739
|
+
:param need_convert: 是否转换图片
|
|
740
|
+
"""
|
|
741
|
+
if need_convert is False:
|
|
742
|
+
cls.save_directly(resp, filepath)
|
|
743
|
+
else:
|
|
744
|
+
cls.save_image(cls.open_image(resp.content), filepath)
|
|
745
|
+
|
|
746
|
+
@classmethod
|
|
747
|
+
def save_image(cls, image: Image, filepath: str):
|
|
748
|
+
"""
|
|
749
|
+
保存图片
|
|
750
|
+
|
|
751
|
+
:param image: PIL.Image对象
|
|
752
|
+
:param filepath: 保存文件路径
|
|
753
|
+
"""
|
|
754
|
+
image.save(filepath)
|
|
755
|
+
|
|
756
|
+
@classmethod
|
|
757
|
+
def save_directly(cls, resp, filepath):
|
|
758
|
+
from common import save_resp_content
|
|
759
|
+
save_resp_content(resp, filepath)
|
|
760
|
+
|
|
761
|
+
@classmethod
|
|
762
|
+
def decode_and_save(cls,
|
|
763
|
+
num: int,
|
|
764
|
+
img_src: Image,
|
|
765
|
+
decoded_save_path: str
|
|
766
|
+
) -> None:
|
|
767
|
+
"""
|
|
768
|
+
解密图片并保存
|
|
769
|
+
:param num: 分割数,可以用 cls.calculate_segmentation_num 计算
|
|
770
|
+
:param img_src: 原始图片
|
|
771
|
+
:param decoded_save_path: 解密图片的保存路径
|
|
772
|
+
"""
|
|
773
|
+
|
|
774
|
+
# 无需解密,直接保存
|
|
775
|
+
if num == 0:
|
|
776
|
+
cls.save_image(img_src, decoded_save_path)
|
|
777
|
+
return
|
|
778
|
+
|
|
779
|
+
import math
|
|
780
|
+
w, h = img_src.size
|
|
781
|
+
|
|
782
|
+
# 创建新的解密图片
|
|
783
|
+
img_decode = Image.new("RGB", (w, h))
|
|
784
|
+
over = h % num
|
|
785
|
+
for i in range(num):
|
|
786
|
+
move = math.floor(h / num)
|
|
787
|
+
y_src = h - (move * (i + 1)) - over
|
|
788
|
+
y_dst = move * i
|
|
789
|
+
|
|
790
|
+
if i == 0:
|
|
791
|
+
move += over
|
|
792
|
+
else:
|
|
793
|
+
y_dst += over
|
|
794
|
+
|
|
795
|
+
img_decode.paste(
|
|
796
|
+
img_src.crop((
|
|
797
|
+
0, y_src,
|
|
798
|
+
w, y_src + move
|
|
799
|
+
)),
|
|
800
|
+
(
|
|
801
|
+
0, y_dst,
|
|
802
|
+
w, y_dst + move
|
|
803
|
+
)
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
# save every step result
|
|
807
|
+
# cls.save_image(img_decode, change_file_name(
|
|
808
|
+
# decoded_save_path,
|
|
809
|
+
# f'{of_file_name(decoded_save_path, trim_suffix=True)}_{i}{of_file_suffix(decoded_save_path)}'
|
|
810
|
+
# ))
|
|
811
|
+
|
|
812
|
+
# 保存到新的解密文件
|
|
813
|
+
cls.save_image(img_decode, decoded_save_path)
|
|
814
|
+
|
|
815
|
+
@classmethod
|
|
816
|
+
def open_image(cls, fp: Union[str, bytes]):
|
|
817
|
+
from io import BytesIO
|
|
818
|
+
fp = fp if isinstance(fp, str) else BytesIO(fp)
|
|
819
|
+
return Image.open(fp)
|
|
820
|
+
|
|
821
|
+
@classmethod
|
|
822
|
+
def get_num(cls, scramble_id, aid, filename: str) -> int:
|
|
823
|
+
"""
|
|
824
|
+
获得图片分割数
|
|
825
|
+
"""
|
|
826
|
+
|
|
827
|
+
scramble_id = int(scramble_id)
|
|
828
|
+
aid = int(aid)
|
|
829
|
+
|
|
830
|
+
if aid < scramble_id:
|
|
831
|
+
return 0
|
|
832
|
+
elif aid < JmMagicConstants.SCRAMBLE_268850:
|
|
833
|
+
return 10
|
|
834
|
+
else:
|
|
835
|
+
import hashlib
|
|
836
|
+
x = 10 if aid < JmMagicConstants.SCRAMBLE_421926 else 8
|
|
837
|
+
s = f"{aid}{filename}" # 拼接
|
|
838
|
+
s = s.encode()
|
|
839
|
+
s = hashlib.md5(s).hexdigest()
|
|
840
|
+
num = ord(s[-1])
|
|
841
|
+
num %= x
|
|
842
|
+
num = num * 2 + 2
|
|
843
|
+
return num
|
|
844
|
+
|
|
845
|
+
@classmethod
|
|
846
|
+
def get_num_by_url(cls, scramble_id, url) -> int:
|
|
847
|
+
"""
|
|
848
|
+
获得图片分割数
|
|
849
|
+
"""
|
|
850
|
+
return cls.get_num(
|
|
851
|
+
scramble_id,
|
|
852
|
+
aid=JmcomicText.parse_to_jm_id(url),
|
|
853
|
+
filename=of_file_name(url, True),
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
@classmethod
|
|
857
|
+
def get_num_by_detail(cls, detail: JmImageDetail) -> int:
|
|
858
|
+
"""
|
|
859
|
+
获得图片分割数
|
|
860
|
+
"""
|
|
861
|
+
return cls.get_num(detail.scramble_id, detail.aid, detail.img_file_name)
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
class JmCryptoTool:
|
|
865
|
+
"""
|
|
866
|
+
禁漫加解密相关逻辑
|
|
867
|
+
"""
|
|
868
|
+
|
|
869
|
+
@classmethod
|
|
870
|
+
def token_and_tokenparam(cls,
|
|
871
|
+
ts,
|
|
872
|
+
ver=None,
|
|
873
|
+
secret=None,
|
|
874
|
+
):
|
|
875
|
+
"""
|
|
876
|
+
计算禁漫接口的请求headers的token和tokenparam
|
|
877
|
+
|
|
878
|
+
:param ts: 时间戳
|
|
879
|
+
:param ver: app版本
|
|
880
|
+
:param secret: 密钥
|
|
881
|
+
:return (token, tokenparam)
|
|
882
|
+
"""
|
|
883
|
+
|
|
884
|
+
if ver is None:
|
|
885
|
+
ver = JmMagicConstants.APP_VERSION
|
|
886
|
+
|
|
887
|
+
if secret is None:
|
|
888
|
+
secret = JmMagicConstants.APP_TOKEN_SECRET
|
|
889
|
+
|
|
890
|
+
# tokenparam: 1700566805,1.6.3
|
|
891
|
+
tokenparam = '{},{}'.format(ts, ver)
|
|
892
|
+
|
|
893
|
+
# token: 81498a20feea7fbb7149c637e49702e3
|
|
894
|
+
token = cls.md5hex(f'{ts}{secret}')
|
|
895
|
+
|
|
896
|
+
return token, tokenparam
|
|
897
|
+
|
|
898
|
+
@classmethod
|
|
899
|
+
def decode_resp_data(cls,
|
|
900
|
+
data: str,
|
|
901
|
+
ts,
|
|
902
|
+
secret=None,
|
|
903
|
+
) -> str:
|
|
904
|
+
"""
|
|
905
|
+
解密接口返回值
|
|
906
|
+
|
|
907
|
+
:param data: resp.json()['data']
|
|
908
|
+
:param ts: 时间戳
|
|
909
|
+
:param secret: 密钥
|
|
910
|
+
:return: json格式的字符串
|
|
911
|
+
"""
|
|
912
|
+
if secret is None:
|
|
913
|
+
secret = JmMagicConstants.APP_DATA_SECRET
|
|
914
|
+
|
|
915
|
+
# 1. base64解码
|
|
916
|
+
import base64
|
|
917
|
+
data_b64 = base64.b64decode(data)
|
|
918
|
+
|
|
919
|
+
# 2. AES-ECB解密
|
|
920
|
+
key = cls.md5hex(f'{ts}{secret}').encode('utf-8')
|
|
921
|
+
from Crypto.Cipher import AES
|
|
922
|
+
data_aes = AES.new(key, AES.MODE_ECB).decrypt(data_b64)
|
|
923
|
+
|
|
924
|
+
# 3. 移除末尾的padding
|
|
925
|
+
data = data_aes[:-data_aes[-1]]
|
|
926
|
+
|
|
927
|
+
# 4. 解码为字符串 (json)
|
|
928
|
+
res = data.decode('utf-8')
|
|
929
|
+
|
|
930
|
+
return res
|
|
931
|
+
|
|
932
|
+
@classmethod
|
|
933
|
+
def md5hex(cls, key: str):
|
|
934
|
+
ExceptionTool.require_true(isinstance(key, str), 'key参数需为字符串')
|
|
935
|
+
|
|
936
|
+
from hashlib import md5
|
|
937
|
+
return md5(key.encode("utf-8")).hexdigest()
|