jmcomic 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jmcomic/jm_toolkit.py ADDED
@@ -0,0 +1,937 @@
1
+ from PIL import Image
2
+
3
+ from .jm_exception import *
4
+
5
+
6
+ class JmcomicText:
7
+ pattern_jm_domain = compile(r'https://([\w.-]+)')
8
+ pattern_jm_pa_id = [
9
+ (compile(r'(photos?|albums?)/(\d+)'), 2),
10
+ (compile(r'id=(\d+)'), 1),
11
+ ]
12
+ pattern_html_jm_pub_domain = compile(r'[\w-]+\.\w+/?\w+')
13
+
14
+ pattern_html_photo_photo_id = compile(r'<meta property="og:url" content=".*?/photo/(\d+)/?.*?">')
15
+ pattern_html_photo_scramble_id = compile(r'var scramble_id = (\d+);')
16
+ pattern_html_photo_name = compile(r'<title>([\s\S]*?)\|.*</title>')
17
+ # pattern_html_photo_data_original_list = compile(r'data-original="(.*?)" id="album_photo_.+?"')
18
+ pattern_html_photo_data_original_domain = compile(r'src="https://(.*?)/media/albums/blank')
19
+ pattern_html_photo_data_original_0 = compile(r'data-original="(.*?)"[^>]*?id="album_photo[^>]*?data-page="0"')
20
+ pattern_html_photo_tags = compile(r'<meta name="keywords"[\s\S]*?content="(.*?)"')
21
+ pattern_html_photo_series_id = compile(r'var series_id = (\d+);')
22
+ pattern_html_photo_sort = compile(r'var sort = (\d+);')
23
+ pattern_html_photo_page_arr = compile(r'var page_arr = (.*?);')
24
+
25
+ pattern_html_b64_decode_content = compile(r'const html = base64DecodeUtf8\("(.*?)"\)')
26
+ pattern_html_album_album_id = compile(r'<span class="number">.*?:JM(\d+)</span>')
27
+ pattern_html_album_scramble_id = compile(r'var scramble_id = (\d+);')
28
+ pattern_html_album_name = compile(r'id="book-name"[^>]*?>([\s\S]*?)<')
29
+ pattern_html_album_episode_list = compile(r'data-album="(\d+)"[^>]*>[\s\S]*?第(\d+)[话話]([\s\S]*?)<[\s\S]*?>')
30
+ pattern_html_album_page_count = compile(r'<span class="pagecount">.*?:(\d+)</span>')
31
+ pattern_html_album_pub_date = compile(r'>上架日期 : (.*?)</span>')
32
+ pattern_html_album_update_date = compile(r'>更新日期 : (.*?)</span>')
33
+ pattern_html_tag_a = compile(r'<a[^>]*?>\s*(\S*)\s*</a>')
34
+ # 作品
35
+ pattern_html_album_works = [
36
+ compile(r'<span itemprop="author" data-type="works">([\s\S]*?)</span>'),
37
+ pattern_html_tag_a,
38
+ ]
39
+ # 登場人物
40
+ pattern_html_album_actors = [
41
+ compile(r'<span itemprop="author" data-type="actor">([\s\S]*?)</span>'),
42
+ pattern_html_tag_a,
43
+ ]
44
+ # 标签
45
+ pattern_html_album_tags = [
46
+ compile(r'<span itemprop="genre" data-type="tags">([\s\S]*?)</span>'),
47
+ pattern_html_tag_a,
48
+ ]
49
+ # 作者
50
+ pattern_html_album_authors = [
51
+ compile(r'<span itemprop="author" data-type="author">([\s\S]*?)</span>'),
52
+ pattern_html_tag_a,
53
+ ]
54
+ # 點擊喜歡
55
+ pattern_html_album_likes = compile(r'<span id="albim_likes_\d+">(.*?)</span>')
56
+ # 觀看
57
+ pattern_html_album_views = compile(r'<span>(.*?)</span>\n *<span>(次觀看|观看次数|次观看次数)</span>')
58
+ # 評論(div)
59
+ pattern_html_album_comment_count = compile(r'<div class="badge"[^>]*?id="total_video_comments">(\d+)</div>'), 0
60
+
61
+ # 提取接口返回值信息
62
+ pattern_ajax_favorite_msg = compile(r'</button>(.*?)</div>')
63
+
64
+ @classmethod
65
+ def parse_to_jm_domain(cls, text: str):
66
+ if text.startswith(JmModuleConfig.PROT):
67
+ return cls.pattern_jm_domain.search(text)[1]
68
+
69
+ return text
70
+
71
+ @classmethod
72
+ def parse_to_jm_id(cls, text) -> str:
73
+ if isinstance(text, int):
74
+ return str(text)
75
+
76
+ ExceptionTool.require_true(isinstance(text, str), f"无法解析jm车号, 参数类型为: {type(text)}")
77
+
78
+ # 43210
79
+ if text.isdigit():
80
+ return text
81
+
82
+ # Jm43210
83
+ ExceptionTool.require_true(len(text) >= 2, f"无法解析jm车号, 文本太短: {text}")
84
+
85
+ # text: JM12341
86
+ c0 = text[0]
87
+ c1 = text[1]
88
+ if (c0 == 'J' or c0 == 'j') and (c1 == 'M' or c1 == 'm'):
89
+ # JM123456
90
+ return text[2:]
91
+ else:
92
+ # https://xxx/photo/412038
93
+ # https://xxx/album/?id=412038
94
+ for p, i in cls.pattern_jm_pa_id:
95
+ match = p.search(text)
96
+ if match is not None:
97
+ return match[i]
98
+
99
+ ExceptionTool.raises(f"无法解析jm车号, 文本为: {text}")
100
+
101
+ @classmethod
102
+ def analyse_jm_pub_html(cls, html: str, domain_keyword=('jm', 'comic')) -> List[str]:
103
+ domain_ls = cls.pattern_html_jm_pub_domain.findall(html)
104
+
105
+ return list(filter(
106
+ lambda domain: any(kw in domain for kw in domain_keyword),
107
+ domain_ls
108
+ ))
109
+
110
+ @classmethod
111
+ def parse_jm_base64_html(cls, resp_text: str) -> str:
112
+ from base64 import b64decode
113
+ html_b64 = PatternTool.match_or_default(resp_text, cls.pattern_html_b64_decode_content, None)
114
+ if html_b64 is None:
115
+ return resp_text
116
+ html = b64decode(html_b64).decode()
117
+ return html
118
+
119
+ @classmethod
120
+ def analyse_jm_photo_html(cls, html: str) -> JmPhotoDetail:
121
+ return cls.reflect_new_instance(
122
+ html,
123
+ "pattern_html_photo_",
124
+ JmModuleConfig.photo_class()
125
+ )
126
+
127
+ @classmethod
128
+ def analyse_jm_album_html(cls, html: str) -> JmAlbumDetail:
129
+ return cls.reflect_new_instance(
130
+ cls.parse_jm_base64_html(html),
131
+ "pattern_html_album_",
132
+ JmModuleConfig.album_class()
133
+ )
134
+
135
+ @classmethod
136
+ def reflect_new_instance(cls, html: str, cls_field_prefix: str, clazz: type):
137
+
138
+ def match_field(field_name: str, pattern: Union[Pattern, List[Pattern]], text):
139
+
140
+ if isinstance(pattern, list):
141
+ # 如果是 pattern 是 List[re.Pattern],
142
+ # 取最后一个 pattern 用于 match field,
143
+ # 其他的 pattern 用来给文本缩小范围(相当于多次正则匹配)
144
+ last_pattern = pattern[len(pattern) - 1]
145
+ # 缩小文本
146
+ for i in range(0, len(pattern) - 1):
147
+ match: Match = pattern[i].search(text)
148
+ if match is None:
149
+ return None
150
+ text = match[0]
151
+
152
+ return last_pattern.findall(text)
153
+
154
+ if field_name.endswith("_list"):
155
+ return pattern.findall(text)
156
+ else:
157
+ match = pattern.search(text)
158
+ if match is not None:
159
+ return match[1]
160
+ return None
161
+
162
+ field_dict = {}
163
+ pattern_name: str
164
+ for pattern_name, pattern in cls.__dict__.items():
165
+ if not pattern_name.startswith(cls_field_prefix):
166
+ continue
167
+
168
+ # 支持如果不匹配,使用默认值
169
+ if isinstance(pattern, tuple):
170
+ pattern, default = pattern
171
+ else:
172
+ default = None
173
+
174
+ # 获取字段名和值
175
+ field_name = pattern_name[pattern_name.index(cls_field_prefix) + len(cls_field_prefix):]
176
+ field_value = match_field(field_name, pattern, html)
177
+
178
+ if field_value is None:
179
+ if default is None:
180
+ ExceptionTool.raises_regex(
181
+ f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern}]"
182
+ + (f"\n响应文本=[{html}]" if len(html) < 200 else
183
+ f'响应文本过长(len={len(html)}),不打印'
184
+ ),
185
+ html=html,
186
+ pattern=pattern,
187
+ )
188
+ else:
189
+ field_value = default
190
+
191
+ # 保存字段
192
+ field_dict[field_name] = field_value
193
+
194
+ return clazz(**field_dict)
195
+
196
+ @classmethod
197
+ def format_url(cls, path, domain):
198
+ ExceptionTool.require_true(isinstance(domain, str) and len(domain) != 0, '域名为空')
199
+
200
+ if domain.startswith(JmModuleConfig.PROT):
201
+ return f'{domain}{path}'
202
+
203
+ return f'{JmModuleConfig.PROT}{domain}{path}'
204
+
205
+ @classmethod
206
+ def format_album_url(cls, aid, domain='18comic.vip'):
207
+ """
208
+ 把album_id变为可访问的URL,方便print打印后用浏览器访问
209
+ """
210
+ return cls.format_url(f'/album/{aid}/', domain)
211
+
212
+ class DSLReplacer:
213
+
214
+ def __init__(self):
215
+ self.dsl_dict: Dict[Pattern, Callable[[Match], str]] = {}
216
+
217
+ def parse_dsl_text(self, text) -> str:
218
+ for pattern, replacer in self.dsl_dict.items():
219
+ text = pattern.sub(replacer, text)
220
+ return text
221
+
222
+ def add_dsl_and_replacer(self, dsl: str, replacer: Callable[[Match], str]):
223
+ pattern = compile(dsl)
224
+ self.dsl_dict[pattern] = replacer
225
+
226
+ @classmethod
227
+ def match_os_env(cls, match: Match) -> str:
228
+ name = match[1]
229
+ value = os.getenv(name, None)
230
+ ExceptionTool.require_true(value is not None, f'未配置环境变量: {name}')
231
+ return value
232
+
233
+ dsl_replacer = DSLReplacer()
234
+
235
+ @classmethod
236
+ def parse_to_abspath(cls, dsl_text: str) -> str:
237
+ return os.path.abspath(cls.parse_dsl_text(dsl_text))
238
+
239
+ @classmethod
240
+ def parse_dsl_text(cls, dsl_text: str) -> str:
241
+ return cls.dsl_replacer.parse_dsl_text(dsl_text)
242
+
243
+ bracket_map = {'(': ')',
244
+ '[': ']',
245
+ '【': '】',
246
+ '(': ')',
247
+ }
248
+
249
+ @classmethod
250
+ def parse_orig_album_name(cls, name: str, default=None):
251
+ word_list = cls.tokenize(name)
252
+
253
+ for word in word_list:
254
+ if word[0] in cls.bracket_map:
255
+ continue
256
+
257
+ return word
258
+
259
+ return default
260
+
261
+ @classmethod
262
+ def tokenize(cls, title: str) -> List[str]:
263
+ """
264
+ 繞道#2 [暴碧漢化組] [えーすけ(123)] よりみち#2 (COMIC 快樂天 2024年1月號) [中國翻譯] [DL版]
265
+ :return: ['繞道#2', '[暴碧漢化組]', '[えーすけ(123)]', 'よりみち#2', '(COMIC 快樂天 2024年1月號)', '[中國翻譯]', '[DL版]']
266
+ """
267
+ title = title.strip()
268
+ ret = []
269
+ bracket_map = cls.bracket_map
270
+
271
+ char_list = []
272
+ i = 0
273
+ length = len(title)
274
+
275
+ def add(w=None):
276
+ if w is None:
277
+ w = ''.join(char_list).strip()
278
+
279
+ if w == '':
280
+ return
281
+
282
+ ret.append(w)
283
+ char_list.clear()
284
+
285
+ def find_right_pair(left_pair, i):
286
+ stack = [left_pair]
287
+ j = i + 1
288
+
289
+ while j < length and len(stack) != 0:
290
+ c = title[j]
291
+ if c in bracket_map:
292
+ stack.append(c)
293
+ elif c == bracket_map[stack[-1]]:
294
+ stack.pop()
295
+
296
+ j += 1
297
+
298
+ if len(stack) == 0:
299
+ return j
300
+ else:
301
+ return -1
302
+
303
+ while i < length:
304
+ c = title[i]
305
+
306
+ if c in bracket_map:
307
+ # 上一个单词结束
308
+ add()
309
+ # 定位右括号
310
+ j = find_right_pair(c, i)
311
+ if j == -1:
312
+ # 括号未闭合
313
+ char_list.append(c)
314
+ i += 1
315
+ continue
316
+ # 整个括号的单词结束
317
+ add(title[i:j])
318
+ # 移动指针
319
+ i = j
320
+ else:
321
+ char_list.append(c)
322
+ i += 1
323
+
324
+ add()
325
+ return ret
326
+
327
+ @classmethod
328
+ def to_zh_cn(cls, s):
329
+ import zhconv
330
+ return zhconv.convert(s, 'zh-cn')
331
+
332
+ @classmethod
333
+ def try_mkdir(cls, save_dir: str):
334
+ try:
335
+ mkdir_if_not_exists(save_dir)
336
+ except OSError as e:
337
+ if e.errno == 36:
338
+ # 目录名过长
339
+ limit = JmModuleConfig.VAR_FILE_NAME_LENGTH_LIMIT
340
+ jm_log('error', f'目录名过长,无法创建目录,强制缩短到{limit}个字符并重试')
341
+ save_dir = save_dir[0:limit]
342
+ return cls.try_mkdir(save_dir)
343
+ raise e
344
+ return save_dir
345
+
346
+
347
+ # 支持dsl: #{???} -> os.getenv(???)
348
+ JmcomicText.dsl_replacer.add_dsl_and_replacer(r'\$\{(.*?)\}', JmcomicText.match_os_env)
349
+
350
+
351
+ class PatternTool:
352
+
353
+ @classmethod
354
+ def match_or_default(cls, html: str, pattern: Pattern, default):
355
+ match = pattern.search(html)
356
+ return default if match is None else match[1]
357
+
358
+ @classmethod
359
+ def require_match(cls, html: str, pattern: Pattern, msg, rindex=1):
360
+ match = pattern.search(html)
361
+ if match is not None:
362
+ return match[rindex] if rindex is not None else match
363
+
364
+ ExceptionTool.raises_regex(
365
+ msg,
366
+ html=html,
367
+ pattern=pattern,
368
+ )
369
+
370
+ @classmethod
371
+ def require_not_match(cls, html: str, pattern: Pattern, *, msg_func):
372
+ match = pattern.search(html)
373
+ if match is None:
374
+ return
375
+
376
+ ExceptionTool.raises_regex(
377
+ msg_func(match),
378
+ html=html,
379
+ pattern=pattern,
380
+ )
381
+
382
+
383
+ class JmPageTool:
384
+ # 用来缩减html的长度
385
+ pattern_html_search_shorten_for = compile(r'<div class="well well-sm">([\s\S]*)<div class="row">')
386
+
387
+ # 用来提取搜索页面的album的信息
388
+ pattern_html_search_album_info_list = compile(
389
+ r'<a href="/album/(\d+)/[\s\S]*?title="(.*?)"([\s\S]*?)<div class="title-truncate tags .*>([\s\S]*?)</div>'
390
+ )
391
+
392
+ # 用来提取分类页面的album的信息
393
+ pattern_html_category_album_info_list = compile(
394
+ r'<a href="/album/(\d+)/[^>]*>[^>]*?'
395
+ r'title="(.*?)"[^>]*>[ \n]*</a>[ \n]*'
396
+ r'<div class="label-loveicon">([\s\S]*?)'
397
+ r'<div class="clearfix">'
398
+ )
399
+
400
+ # 用来查找tag列表
401
+ pattern_html_search_tags = compile(r'<a[^>]*?>(.*?)</a>')
402
+
403
+ # 查找错误,例如 [错误,關鍵字過短,請至少輸入兩個字以上。]
404
+ pattern_html_search_error = compile(r'<fieldset>\n<legend>(.*?)</legend>\n<div class=.*?>\n(.*?)\n</div>\n</fieldset>')
405
+
406
+ pattern_html_search_total = compile(r'class="text-white">(\d+)</span> A漫.'), 0
407
+
408
+ # 收藏页面的本子结果
409
+ pattern_html_favorite_content = compile(
410
+ r'<div id="favorites_album_[^>]*?>[\s\S]*?'
411
+ r'<a href="/album/(\d+)/[^"]*">[\s\S]*?'
412
+ r'<div class="video-title title-truncate">([^<]*?)'
413
+ r'</div>'
414
+ )
415
+
416
+ # 收藏夹的收藏总数
417
+ pattern_html_favorite_total = compile(r' : (\d+)[^/]*/\D*(\d+)')
418
+
419
+ # 所有的收藏夹
420
+ pattern_html_favorite_folder_list = [
421
+ compile(r'<select class="user-select" name="movefolder-fid">([\s\S]*)</select>'),
422
+ compile(r'<option value="(\d+)">([^<]*?)</option>')
423
+ ]
424
+
425
+ @classmethod
426
+ def parse_html_to_search_page(cls, html: str) -> JmSearchPage:
427
+ # 1. 检查是否失败
428
+ PatternTool.require_not_match(
429
+ html,
430
+ cls.pattern_html_search_error,
431
+ msg_func=lambda match: '{}: {}'.format(match[1], match[2])
432
+ )
433
+
434
+ # 2. 缩小文本范围
435
+ html = PatternTool.require_match(
436
+ html,
437
+ cls.pattern_html_search_shorten_for,
438
+ msg='未匹配到搜索结果',
439
+ )
440
+
441
+ # 3. 提取结果
442
+ content = [] # content这个名字来源于api版搜索返回值
443
+ total = int(PatternTool.match_or_default(html, *cls.pattern_html_search_total)) # 总结果数
444
+
445
+ album_info_list = cls.pattern_html_search_album_info_list.findall(html)
446
+
447
+ for (album_id, title, _label_category_text, tag_text) in album_info_list:
448
+ # 从label_category_text中可以解析出label-category和label-sub
449
+ # 这里不作解析,因为没什么用...
450
+ tags = cls.pattern_html_search_tags.findall(tag_text)
451
+ content.append((
452
+ album_id, {
453
+ 'name': title, # 改成name是为了兼容 parse_api_resp_to_page
454
+ 'tags': tags
455
+ }
456
+ ))
457
+
458
+ return JmSearchPage(content, total)
459
+
460
+ @classmethod
461
+ def parse_html_to_category_page(cls, html: str) -> JmSearchPage:
462
+ content = []
463
+ total = int(PatternTool.match_or_default(html, *cls.pattern_html_search_total))
464
+
465
+ album_info_list = cls.pattern_html_category_album_info_list.findall(html)
466
+
467
+ for (album_id, title, tag_text) in album_info_list:
468
+ tags = cls.pattern_html_search_tags.findall(tag_text)
469
+ content.append((
470
+ album_id, {
471
+ 'name': title, # 改成name是为了兼容 parse_api_resp_to_page
472
+ 'tags': tags
473
+ }
474
+ ))
475
+
476
+ return JmSearchPage(content, total)
477
+
478
+ @classmethod
479
+ def parse_html_to_favorite_page(cls, html: str) -> JmFavoritePage:
480
+ total = int(PatternTool.require_match(
481
+ html,
482
+ cls.pattern_html_favorite_total,
483
+ '未匹配到收藏夹的本子总数',
484
+ ))
485
+
486
+ # 收藏夹的本子结果
487
+ content = cls.pattern_html_favorite_content.findall(html)
488
+ content = [
489
+ (aid, {'name': atitle})
490
+ for aid, atitle in content
491
+ ]
492
+
493
+ # 匹配收藏夹列表
494
+ p1, p2 = cls.pattern_html_favorite_folder_list
495
+ folder_list_text = PatternTool.require_match(html, p1, '未匹配到收藏夹列表')
496
+ folder_list_raw = p2.findall(folder_list_text)
497
+ folder_list = [{'name': fname, 'FID': fid} for fid, fname in folder_list_raw]
498
+
499
+ return JmFavoritePage(content, folder_list, total)
500
+
501
+ @classmethod
502
+ def parse_api_to_search_page(cls, data: AdvancedDict) -> JmSearchPage:
503
+ """
504
+ model_data: {
505
+ "search_query": "MANA",
506
+ "total": "177",
507
+ "content": [
508
+ {
509
+ "id": "441923",
510
+ "author": "MANA",
511
+ "description": "",
512
+ "name": "[MANA] 神里绫华5",
513
+ "image": "",
514
+ "category": {
515
+ "id": "1",
516
+ "title": "同人"
517
+ },
518
+ "category_sub": {
519
+ "id": "1",
520
+ "title": "同人"
521
+ }
522
+ }
523
+ ]
524
+ }
525
+ """
526
+ total: int = int(data.total or 0) # 2024.1.5 data.total可能为None
527
+ content = cls.adapt_content(data.content)
528
+ return JmSearchPage(content, total)
529
+
530
+ @classmethod
531
+ def parse_api_to_favorite_page(cls, data: AdvancedDict) -> JmFavoritePage:
532
+ """
533
+ {
534
+ "list": [
535
+ {
536
+ "id": "363859",
537
+ "author": "紺菓",
538
+ "description": "",
539
+ "name": "[無邪氣漢化組] (C99) [紺色果實 (紺菓)] サレンの樂しい夢 (プリンセスコネクト!Re:Dive) [中國翻譯]",
540
+ "latest_ep": null,
541
+ "latest_ep_aid": null,
542
+ "image": "",
543
+ "category": {
544
+ "id": "1",
545
+ "title": "同人"
546
+ },
547
+ "category_sub": {
548
+ "id": "1",
549
+ "title": "同人"
550
+ }
551
+ }
552
+ ],
553
+ "folder_list": [
554
+ {
555
+ "0": "123",
556
+ "FID": "123",
557
+ "1": "456",
558
+ "UID": "456",
559
+ "2": "收藏夹名",
560
+ "name": "收藏夹名"
561
+ }
562
+ ],
563
+ "total": "87",
564
+ "count": 20
565
+ }
566
+ """
567
+ total: int = int(data.total)
568
+ # count: int = int(data.count)
569
+ content = cls.adapt_content(data.list)
570
+ folder_list = data.get('folder_list', [])
571
+
572
+ return JmFavoritePage(content, folder_list, total)
573
+
574
+ @classmethod
575
+ def adapt_content(cls, content):
576
+ def adapt_item(item: AdvancedDict):
577
+ item: dict = item.src_dict
578
+ item.setdefault('tags', [])
579
+ return item
580
+
581
+ content = [
582
+ (item.id, adapt_item(item)) for item in content
583
+ ]
584
+ return content
585
+
586
+
587
+ class JmApiAdaptTool:
588
+ """
589
+ 本类负责把移动端的api返回值,适配为标准的实体类
590
+
591
+ # album
592
+ {
593
+ "id": 123,
594
+ "name": "[狗野叉漢化]",
595
+ "author": [
596
+ "AREA188"
597
+ ],
598
+ "images": [
599
+ "00004.webp"
600
+ ],
601
+ "description": null,
602
+ "total_views": "41314",
603
+ "likes": "918",
604
+ "series": [],
605
+ "series_id": "0",
606
+ "comment_total": "5",
607
+ "tags": [
608
+ "全彩",
609
+ "中文"
610
+ ],
611
+ "works": [],
612
+ "actors": [],
613
+ "related_list": [
614
+ {
615
+ "id": "333718",
616
+ "author": "been",
617
+ "description": "",
618
+ "name": "[been]The illusion of lies(1)[中國語][無修正][全彩]",
619
+ "image": ""
620
+ }
621
+ ],
622
+ "liked": false,
623
+ "is_favorite": false
624
+ }
625
+
626
+ # photo
627
+ {
628
+ "id": 413446,
629
+ "series": [
630
+ {
631
+ "id": "487043",
632
+ "name": "第48話",
633
+ "sort": "48"
634
+ }
635
+ ],
636
+ "tags": "慾望 調教 NTL 地鐵 戲劇",
637
+ "name": "癡漢成癮-第2話",
638
+ "images": [
639
+ "00047.webp"
640
+ ],
641
+ "series_id": "400222",
642
+ "is_favorite": false,
643
+ "liked": false
644
+ }
645
+ """
646
+ field_adapter = {
647
+ JmAlbumDetail: [
648
+ 'likes',
649
+ 'tags',
650
+ 'works',
651
+ 'actors',
652
+ 'related_list',
653
+ 'name',
654
+ ('id', 'album_id'),
655
+ ('author', 'authors'),
656
+ ('total_views', 'views'),
657
+ ('comment_total', 'comment_count'),
658
+ ],
659
+ JmPhotoDetail: [
660
+ 'name',
661
+ 'series_id',
662
+ 'tags',
663
+ ('id', 'photo_id'),
664
+ ('images', 'page_arr'),
665
+
666
+ ]
667
+ }
668
+
669
+ @classmethod
670
+ def parse_entity(cls, data: dict, clazz: type):
671
+ adapter = cls.get_adapter(clazz)
672
+
673
+ fields = {}
674
+ for k in adapter:
675
+ if isinstance(k, str):
676
+ v = data[k]
677
+ fields[k] = v
678
+ elif isinstance(k, tuple):
679
+ k, rename_k = k
680
+ v = data[k]
681
+ fields[rename_k] = v
682
+
683
+ if issubclass(clazz, JmAlbumDetail):
684
+ cls.post_adapt_album(data, clazz, fields)
685
+ else:
686
+ cls.post_adapt_photo(data, clazz, fields)
687
+
688
+ return clazz(**fields)
689
+
690
+ @classmethod
691
+ def get_adapter(cls, clazz: type):
692
+ for k, v in cls.field_adapter.items():
693
+ if issubclass(clazz, k):
694
+ return v
695
+
696
+ ExceptionTool.raises(f'不支持的类型: {clazz}')
697
+
698
+ @classmethod
699
+ def post_adapt_album(cls, data: dict, _clazz: type, fields: dict):
700
+ series = data['series']
701
+ episode_list = []
702
+ for chapter in series:
703
+ chapter = AdvancedDict(chapter)
704
+ # photo_id, photo_index, photo_title, photo_pub_date
705
+ episode_list.append(
706
+ (chapter.id, chapter.sort, chapter.name)
707
+ )
708
+ fields['episode_list'] = episode_list
709
+ for it in 'scramble_id', 'page_count', 'pub_date', 'update_date':
710
+ fields[it] = '0'
711
+
712
+ @classmethod
713
+ def post_adapt_photo(cls, data: dict, _clazz: type, fields: dict):
714
+ # 1. 获取sort字段,如果data['series']中没有,使用默认值1
715
+ sort = 1
716
+ series: list = data['series'] # series中的sort从1开始
717
+ for chapter in series:
718
+ chapter = AdvancedDict(chapter)
719
+ if int(chapter.id) == int(data['id']):
720
+ sort = chapter.sort
721
+ break
722
+
723
+ fields['sort'] = sort
724
+ import random
725
+ fields['data_original_domain'] = random.choice(JmModuleConfig.DOMAIN_IMAGE_LIST)
726
+
727
+
728
+ class JmImageTool:
729
+
730
+ @classmethod
731
+ def save_resp_img(cls, resp: Any, filepath: str, need_convert=True):
732
+ """
733
+ 接收HTTP响应对象,将其保存到图片文件.
734
+ 如果需要改变图片的文件格式,比如 .jpg → .png,则需要指定参数 neet_convert=True.
735
+ 如果不需要改变图片的文件格式,使用 need_convert=False,可以跳过PIL解析图片,效率更高.
736
+
737
+ :param resp: JmImageResp
738
+ :param filepath: 图片文件路径
739
+ :param need_convert: 是否转换图片
740
+ """
741
+ if need_convert is False:
742
+ cls.save_directly(resp, filepath)
743
+ else:
744
+ cls.save_image(cls.open_image(resp.content), filepath)
745
+
746
+ @classmethod
747
+ def save_image(cls, image: Image, filepath: str):
748
+ """
749
+ 保存图片
750
+
751
+ :param image: PIL.Image对象
752
+ :param filepath: 保存文件路径
753
+ """
754
+ image.save(filepath)
755
+
756
+ @classmethod
757
+ def save_directly(cls, resp, filepath):
758
+ from common import save_resp_content
759
+ save_resp_content(resp, filepath)
760
+
761
+ @classmethod
762
+ def decode_and_save(cls,
763
+ num: int,
764
+ img_src: Image,
765
+ decoded_save_path: str
766
+ ) -> None:
767
+ """
768
+ 解密图片并保存
769
+ :param num: 分割数,可以用 cls.calculate_segmentation_num 计算
770
+ :param img_src: 原始图片
771
+ :param decoded_save_path: 解密图片的保存路径
772
+ """
773
+
774
+ # 无需解密,直接保存
775
+ if num == 0:
776
+ cls.save_image(img_src, decoded_save_path)
777
+ return
778
+
779
+ import math
780
+ w, h = img_src.size
781
+
782
+ # 创建新的解密图片
783
+ img_decode = Image.new("RGB", (w, h))
784
+ over = h % num
785
+ for i in range(num):
786
+ move = math.floor(h / num)
787
+ y_src = h - (move * (i + 1)) - over
788
+ y_dst = move * i
789
+
790
+ if i == 0:
791
+ move += over
792
+ else:
793
+ y_dst += over
794
+
795
+ img_decode.paste(
796
+ img_src.crop((
797
+ 0, y_src,
798
+ w, y_src + move
799
+ )),
800
+ (
801
+ 0, y_dst,
802
+ w, y_dst + move
803
+ )
804
+ )
805
+
806
+ # save every step result
807
+ # cls.save_image(img_decode, change_file_name(
808
+ # decoded_save_path,
809
+ # f'{of_file_name(decoded_save_path, trim_suffix=True)}_{i}{of_file_suffix(decoded_save_path)}'
810
+ # ))
811
+
812
+ # 保存到新的解密文件
813
+ cls.save_image(img_decode, decoded_save_path)
814
+
815
+ @classmethod
816
+ def open_image(cls, fp: Union[str, bytes]):
817
+ from io import BytesIO
818
+ fp = fp if isinstance(fp, str) else BytesIO(fp)
819
+ return Image.open(fp)
820
+
821
+ @classmethod
822
+ def get_num(cls, scramble_id, aid, filename: str) -> int:
823
+ """
824
+ 获得图片分割数
825
+ """
826
+
827
+ scramble_id = int(scramble_id)
828
+ aid = int(aid)
829
+
830
+ if aid < scramble_id:
831
+ return 0
832
+ elif aid < JmMagicConstants.SCRAMBLE_268850:
833
+ return 10
834
+ else:
835
+ import hashlib
836
+ x = 10 if aid < JmMagicConstants.SCRAMBLE_421926 else 8
837
+ s = f"{aid}{filename}" # 拼接
838
+ s = s.encode()
839
+ s = hashlib.md5(s).hexdigest()
840
+ num = ord(s[-1])
841
+ num %= x
842
+ num = num * 2 + 2
843
+ return num
844
+
845
+ @classmethod
846
+ def get_num_by_url(cls, scramble_id, url) -> int:
847
+ """
848
+ 获得图片分割数
849
+ """
850
+ return cls.get_num(
851
+ scramble_id,
852
+ aid=JmcomicText.parse_to_jm_id(url),
853
+ filename=of_file_name(url, True),
854
+ )
855
+
856
+ @classmethod
857
+ def get_num_by_detail(cls, detail: JmImageDetail) -> int:
858
+ """
859
+ 获得图片分割数
860
+ """
861
+ return cls.get_num(detail.scramble_id, detail.aid, detail.img_file_name)
862
+
863
+
864
+ class JmCryptoTool:
865
+ """
866
+ 禁漫加解密相关逻辑
867
+ """
868
+
869
+ @classmethod
870
+ def token_and_tokenparam(cls,
871
+ ts,
872
+ ver=None,
873
+ secret=None,
874
+ ):
875
+ """
876
+ 计算禁漫接口的请求headers的token和tokenparam
877
+
878
+ :param ts: 时间戳
879
+ :param ver: app版本
880
+ :param secret: 密钥
881
+ :return (token, tokenparam)
882
+ """
883
+
884
+ if ver is None:
885
+ ver = JmMagicConstants.APP_VERSION
886
+
887
+ if secret is None:
888
+ secret = JmMagicConstants.APP_TOKEN_SECRET
889
+
890
+ # tokenparam: 1700566805,1.6.3
891
+ tokenparam = '{},{}'.format(ts, ver)
892
+
893
+ # token: 81498a20feea7fbb7149c637e49702e3
894
+ token = cls.md5hex(f'{ts}{secret}')
895
+
896
+ return token, tokenparam
897
+
898
+ @classmethod
899
+ def decode_resp_data(cls,
900
+ data: str,
901
+ ts,
902
+ secret=None,
903
+ ) -> str:
904
+ """
905
+ 解密接口返回值
906
+
907
+ :param data: resp.json()['data']
908
+ :param ts: 时间戳
909
+ :param secret: 密钥
910
+ :return: json格式的字符串
911
+ """
912
+ if secret is None:
913
+ secret = JmMagicConstants.APP_DATA_SECRET
914
+
915
+ # 1. base64解码
916
+ import base64
917
+ data_b64 = base64.b64decode(data)
918
+
919
+ # 2. AES-ECB解密
920
+ key = cls.md5hex(f'{ts}{secret}').encode('utf-8')
921
+ from Crypto.Cipher import AES
922
+ data_aes = AES.new(key, AES.MODE_ECB).decrypt(data_b64)
923
+
924
+ # 3. 移除末尾的padding
925
+ data = data_aes[:-data_aes[-1]]
926
+
927
+ # 4. 解码为字符串 (json)
928
+ res = data.decode('utf-8')
929
+
930
+ return res
931
+
932
+ @classmethod
933
+ def md5hex(cls, key: str):
934
+ ExceptionTool.require_true(isinstance(key, str), 'key参数需为字符串')
935
+
936
+ from hashlib import md5
937
+ return md5(key.encode("utf-8")).hexdigest()