jmcomic 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jmcomic/jm_entity.py ADDED
@@ -0,0 +1,695 @@
1
+ from functools import lru_cache
2
+
3
+ from common import *
4
+
5
+ from .jm_config import *
6
+
7
+
8
+ class Downloadable:
9
+
10
+ def __init__(self):
11
+ self.save_path: str = ''
12
+ self.exists: bool = False
13
+ self.skip = False
14
+
15
+
16
+ class JmBaseEntity:
17
+
18
+ def to_file(self, filepath):
19
+ from common import PackerUtil
20
+ PackerUtil.pack(self, filepath)
21
+
22
+ @classmethod
23
+ def is_image(cls):
24
+ return False
25
+
26
+ @classmethod
27
+ def is_photo(cls):
28
+ return False
29
+
30
+ @classmethod
31
+ def is_album(cls):
32
+ return False
33
+
34
+ @classmethod
35
+ def is_page(cls):
36
+ return False
37
+
38
+
39
+ class IndexedEntity:
40
+ def getindex(self, index: int):
41
+ raise NotImplementedError
42
+
43
+ def __len__(self):
44
+ raise NotImplementedError
45
+
46
+ def __getitem__(self, item) -> Any:
47
+ if isinstance(item, slice):
48
+ start = item.start or 0
49
+ stop = item.stop or len(self)
50
+ step = item.step or 1
51
+ return [self.getindex(index) for index in range(start, stop, step)]
52
+
53
+ elif isinstance(item, int):
54
+ return self.getindex(item)
55
+
56
+ else:
57
+ raise TypeError(f"Invalid item type for {self.__class__}")
58
+
59
+ def __iter__(self):
60
+ for index in range(len(self)):
61
+ yield self.getindex(index)
62
+
63
+
64
+ class DetailEntity(JmBaseEntity, IndexedEntity):
65
+
66
+ @property
67
+ def id(self) -> str:
68
+ raise NotImplementedError
69
+
70
+ @property
71
+ def title(self) -> str:
72
+ return getattr(self, 'name')
73
+
74
+ @property
75
+ def author(self):
76
+ raise NotImplementedError
77
+
78
+ @property
79
+ def oname(self) -> str:
80
+ """
81
+ oname = original name
82
+
83
+ 示例:
84
+
85
+ title:"喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]"
86
+
87
+ oname:"喂我吃吧 老師!"
88
+
89
+ :return: 返回本子的原始名称
90
+ """
91
+ from .jm_toolkit import JmcomicText
92
+ oname = JmcomicText.parse_orig_album_name(self.title)
93
+ if oname is not None:
94
+ return oname
95
+
96
+ jm_log('entity', f'无法提取出原album名字: {self.title}')
97
+ return self.title
98
+
99
+ @property
100
+ def authoroname(self):
101
+ """
102
+ authoroname = author + oname
103
+
104
+ 个人认为识别度比较高的本子名称,一眼看去就能获取到本子的关键信息
105
+
106
+ 具体格式: '【author】oname'
107
+
108
+ 示例:
109
+
110
+ Pname:喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]
111
+
112
+ Pauthoroname:【BLVEFO9】喂我吃吧 老師!
113
+
114
+ :return: 返回作者名+本子原始名称,格式为: '【author】oname'
115
+ """
116
+ return f'【{self.author}】{self.oname}'
117
+
118
+ @property
119
+ def idoname(self):
120
+ """
121
+ 类似 authoroname
122
+
123
+ :return: '[id] oname'
124
+ """
125
+ return f'[{self.id}] {self.oname}'
126
+
127
+ def __str__(self):
128
+ return f'''{self.__class__.__name__}({self.__alias__()}-{self.id}: "{self.title}")'''
129
+
130
+ __repr__ = __str__
131
+
132
+ @classmethod
133
+ def __alias__(cls):
134
+ # "JmAlbumDetail" -> "album" (本子)
135
+ # "JmPhotoDetail" -> "photo" (章节)
136
+ cls_name = cls.__name__
137
+ return cls_name[cls_name.index("m") + 1: cls_name.rfind("Detail")].lower()
138
+
139
+ @classmethod
140
+ def get_dirname(cls, detail: 'DetailEntity', ref: str) -> str:
141
+ """
142
+ 该方法被 DirDule 调用,用于生成特定层次的文件夹
143
+
144
+ 通常调用方式如下:
145
+ Atitle -> ref = 'title' -> DetailEntity.get_dirname(album, 'title')
146
+ 该方法需要返回 ref 对应的文件夹名,默认实现直接返回 getattr(detail, 'title')
147
+
148
+ 用户可重写此方法,来实现自定义文件夹名
149
+
150
+ v2.4.5: 此方法支持优先从 JmModuleConfig.XFIELD_ADVICE 中获取自定义函数并调用返回结果
151
+
152
+ :param detail: 本子/章节 实例
153
+ :param ref: 字段名
154
+ :returns: 文件夹名
155
+ """
156
+
157
+ advice_func = (JmModuleConfig.AFIELD_ADVICE
158
+ if isinstance(detail, JmAlbumDetail)
159
+ else JmModuleConfig.PFIELD_ADVICE
160
+ ).get(ref, None)
161
+
162
+ if advice_func is not None:
163
+ return advice_func(detail)
164
+
165
+ return getattr(detail, ref)
166
+
167
+ def get_properties_dict(self):
168
+ import inspect
169
+
170
+ prefix = self.__class__.__name__[2]
171
+ result = {}
172
+
173
+ # field
174
+ for k, v in self.__dict__.items():
175
+ result[prefix + k] = v
176
+
177
+ # property
178
+ for cls in inspect.getmro(type(self)):
179
+ for name, attr in cls.__dict__.items():
180
+ k = prefix + name
181
+ if k not in result and isinstance(attr, property):
182
+ v = attr.__get__(self, cls)
183
+ result[k] = v
184
+
185
+ # advice
186
+ advice_dict = JmModuleConfig.AFIELD_ADVICE if self.is_album() else JmModuleConfig.PFIELD_ADVICE
187
+ for name, func in advice_dict.items():
188
+ k = prefix + name
189
+ result[k] = func(self)
190
+
191
+ return result
192
+
193
+
194
+ class JmImageDetail(JmBaseEntity, Downloadable):
195
+
196
+ def __init__(self,
197
+ aid,
198
+ scramble_id,
199
+ img_url,
200
+ img_file_name,
201
+ img_file_suffix,
202
+ from_photo=None,
203
+ query_params=None,
204
+ index=-1,
205
+ ):
206
+ super().__init__()
207
+ if scramble_id is None or (isinstance(scramble_id, str) and scramble_id == ''):
208
+ from .jm_toolkit import ExceptionTool
209
+ ExceptionTool.raises(f'图片的scramble_id不能为空')
210
+
211
+ self.aid: str = str(aid)
212
+ self.scramble_id: str = str(scramble_id)
213
+ self.img_url: str = img_url
214
+ self.img_file_name: str = img_file_name # without suffix
215
+ self.img_file_suffix: str = img_file_suffix
216
+
217
+ self.from_photo: Optional[JmPhotoDetail] = from_photo
218
+ self.query_params: Optional[str] = query_params
219
+ self.index = index # 从1开始
220
+
221
+ @property
222
+ def filename_without_suffix(self):
223
+ return self.img_file_name
224
+
225
+ @property
226
+ def filename(self):
227
+ return self.img_file_name + self.img_file_suffix
228
+
229
+ @property
230
+ def is_gif(self):
231
+ return self.img_file_suffix == '.gif'
232
+
233
+ @property
234
+ def download_url(self) -> str:
235
+ """
236
+ 图片的下载路径
237
+ 与 self.img_url 的唯一不同是,在最后会带上 ?{self.query_params}
238
+ :returns: 图片的下载路径
239
+ """
240
+ if self.query_params is None:
241
+ return self.img_url
242
+
243
+ return f'{self.img_url}?{self.query_params}'
244
+
245
+ @classmethod
246
+ def of(cls,
247
+ photo_id: str,
248
+ scramble_id: str,
249
+ data_original: str,
250
+ from_photo=None,
251
+ query_params=None,
252
+ index=-1,
253
+ ) -> 'JmImageDetail':
254
+ """
255
+ 该方法用于创建 JmImageDetail 对象
256
+ """
257
+
258
+ # /xxx.yyy
259
+ # ↑ ↑
260
+ # x y
261
+ x = data_original.rfind('/')
262
+ y = data_original.rfind('.')
263
+
264
+ return JmImageDetail(
265
+ aid=photo_id,
266
+ scramble_id=scramble_id,
267
+ img_url=data_original,
268
+ img_file_name=data_original[x + 1:y],
269
+ img_file_suffix=data_original[y:],
270
+ from_photo=from_photo,
271
+ query_params=query_params,
272
+ index=index,
273
+ )
274
+
275
+ @property
276
+ def tag(self) -> str:
277
+ """
278
+ this tag is used to print pretty info when logging
279
+ """
280
+ return f'{self.aid}/{self.img_file_name}{self.img_file_suffix} [{self.index}/{len(self.from_photo)}]'
281
+
282
+ @classmethod
283
+ def is_image(cls):
284
+ return True
285
+
286
+ def __str__(self):
287
+ return f'''{self.__class__.__name__}(image-[{self.download_url}])'''
288
+
289
+ __repr__ = __str__
290
+
291
+
292
+ class JmPhotoDetail(DetailEntity, Downloadable):
293
+
294
+ def __init__(self,
295
+ photo_id,
296
+ name,
297
+ series_id,
298
+ sort,
299
+ tags='',
300
+ scramble_id='',
301
+ page_arr=None,
302
+ data_original_domain=None,
303
+ data_original_0=None,
304
+ author=None,
305
+ from_album=None,
306
+ ):
307
+ super().__init__()
308
+ self.photo_id: str = str(photo_id)
309
+ self.scramble_id: str = str(scramble_id)
310
+ self.name: str = str(name).strip()
311
+ self.sort: int = int(sort)
312
+ self._tags: str = tags
313
+ self._series_id: int = int(series_id)
314
+
315
+ self._author: Optional[str] = author
316
+ self.from_album: Optional[JmAlbumDetail] = from_album
317
+ self.index = self.album_index
318
+
319
+ # 下面的属性和图片url有关
320
+ if isinstance(page_arr, str):
321
+ import json
322
+ page_arr = json.loads(page_arr)
323
+
324
+ # page_arr存放了该photo的所有图片文件名 img_name
325
+ self.page_arr: List[str] = page_arr
326
+ # 图片的cdn域名
327
+ self.data_original_domain: Optional[str] = data_original_domain
328
+ # 第一张图的URL
329
+ self.data_original_0 = data_original_0
330
+
331
+ # 2023-07-14
332
+ # 禁漫的图片url加上了一个参数v,如果没有带上这个参数v,图片会返回空数据
333
+ # 参数v的特点:
334
+ # 1. 值似乎是该photo的更新时间的时间戳,因此所有图片都使用同一个值
335
+ # 2. 值目前在网页端只在photo页面的图片标签的data-original属性出现
336
+ # 这里的模拟思路是,获取到第一个图片标签的data-original,
337
+ # 取出其query参数 → self.data_original_query_params, 该值未来会传递给 JmImageDetail
338
+ # self.data_original_query_params = self.get_data_original_query_params(data_original_0)
339
+ self.data_original_query_params = None
340
+
341
+ @property
342
+ def is_single_album(self) -> bool:
343
+ return self._series_id == 0
344
+
345
+ @property
346
+ def tags(self) -> List[str]:
347
+ if self.from_album is not None:
348
+ return self.from_album.tags
349
+
350
+ tag_str = self._tags
351
+ if ',' in tag_str:
352
+ # html
353
+ return tag_str.split(',')
354
+ else:
355
+ # api
356
+ return tag_str.split()
357
+
358
+ @property
359
+ def indextitle(self):
360
+ return f'第{self.album_index}話 {self.name}'
361
+
362
+ @property
363
+ def album_id(self) -> str:
364
+ return self.photo_id if self.is_single_album else str(self._series_id)
365
+
366
+ @property
367
+ def album_index(self) -> int:
368
+ """
369
+ 返回这个章节在本子中的序号,从1开始
370
+ """
371
+
372
+ # 如果是单章本子,JM给的sort为2。
373
+ # 这里返回1比较符合语义定义
374
+ if self.is_single_album and self.sort == 2:
375
+ return 1
376
+
377
+ return self.sort
378
+
379
+ @property
380
+ def author(self) -> str:
381
+ # 优先使用 from_album
382
+ if self.from_album is not None:
383
+ return self.from_album.author
384
+
385
+ if self._author is not None and self._author != '':
386
+ return self._author.strip()
387
+
388
+ # 使用默认
389
+ return JmModuleConfig.DEFAULT_AUTHOR
390
+
391
+ def create_image_detail(self, index) -> JmImageDetail:
392
+ # 校验参数
393
+ length = len(self.page_arr)
394
+ if index >= length:
395
+ raise IndexError(f'image index out of range for photo-{self.photo_id}: {index} >= {length}')
396
+
397
+ data_original = self.get_img_data_original(self.page_arr[index])
398
+
399
+ return JmModuleConfig.image_class().of(
400
+ self.photo_id,
401
+ self.scramble_id,
402
+ data_original,
403
+ from_photo=self,
404
+ query_params=self.data_original_query_params,
405
+ index=index + 1,
406
+ )
407
+
408
+ def get_img_data_original(self, img_name: str) -> str:
409
+ """
410
+ 根据图片名,生成图片的完整请求路径 URL
411
+ 例如:img_name = 01111.webp
412
+ 返回:https://cdn-msp2.18comic.org/media/photos/147643/01111.webp
413
+ """
414
+ domain = self.data_original_domain
415
+
416
+ from .jm_toolkit import ExceptionTool
417
+ ExceptionTool.require_true(domain is not None, f'图片域名为空: {domain}')
418
+
419
+ return f'{JmModuleConfig.PROT}{domain}/media/photos/{self.photo_id}/{img_name}'
420
+
421
+ # noinspection PyMethodMayBeStatic
422
+ def get_data_original_query_params(self, data_original_0: Optional[str]) -> str:
423
+ if data_original_0 is None:
424
+ return f'v={time_stamp()}'
425
+
426
+ index = data_original_0.rfind('?')
427
+ if index == -1:
428
+ return f'v={time_stamp()}'
429
+
430
+ return data_original_0[index + 1:]
431
+
432
+ @property
433
+ def id(self):
434
+ return self.photo_id
435
+
436
+ @lru_cache(None)
437
+ def getindex(self, index) -> JmImageDetail:
438
+ return self.create_image_detail(index)
439
+
440
+ def __getitem__(self, item) -> Union[JmImageDetail, List[JmImageDetail]]:
441
+ return super().__getitem__(item)
442
+
443
+ def __len__(self):
444
+ return len(self.page_arr)
445
+
446
+ def __iter__(self) -> Generator[JmImageDetail, None, None]:
447
+ return super().__iter__()
448
+
449
+ @classmethod
450
+ def is_photo(cls):
451
+ return True
452
+
453
+
454
+ class JmAlbumDetail(DetailEntity, Downloadable):
455
+
456
+ def __init__(self,
457
+ album_id,
458
+ scramble_id,
459
+ name,
460
+ episode_list,
461
+ page_count,
462
+ pub_date,
463
+ update_date,
464
+ likes,
465
+ views,
466
+ comment_count,
467
+ works,
468
+ actors,
469
+ authors,
470
+ tags,
471
+ related_list=None,
472
+ ):
473
+ super().__init__()
474
+ self.album_id: str = str(album_id)
475
+ self.scramble_id: str = str(scramble_id)
476
+ self.name: str = str(name).strip()
477
+ self.page_count: int = int(page_count) # 总页数
478
+ self.pub_date: str = pub_date # 发布日期
479
+ self.update_date: str = update_date # 更新日期
480
+
481
+ self.likes: str = likes # [1K] 點擊喜歡
482
+ self.views: str = views # [40K] 次觀看
483
+ self.comment_count: int = int(comment_count) # 评论数
484
+ self.works: List[str] = works # 作品
485
+ self.actors: List[str] = actors # 登場人物
486
+ self.tags: List[str] = tags # 標籤
487
+ self.authors: List[str] = authors # 作者
488
+
489
+ # 有的 album 没有章节,则自成一章。
490
+ episode_list: List[Tuple[str, str, str]]
491
+ if len(episode_list) == 0:
492
+ # photo_id, photo_index, photo_title, photo_pub_date
493
+ episode_list = [(album_id, "1", name)]
494
+ else:
495
+ episode_list = self.distinct_episode(episode_list)
496
+
497
+ self.episode_list = episode_list
498
+ self.related_list = related_list
499
+
500
+ @property
501
+ def author(self):
502
+ """
503
+ 作者
504
+ 禁漫本子的作者标签可能有多个,全部作者请使用字段 self.author_list
505
+ """
506
+ if len(self.authors) >= 1:
507
+ return self.authors[0]
508
+
509
+ return JmModuleConfig.DEFAULT_AUTHOR
510
+
511
+ @property
512
+ def id(self):
513
+ return self.album_id
514
+
515
+ @staticmethod
516
+ def distinct_episode(episode_list: list):
517
+ """
518
+ 去重章节
519
+ photo_id, photo_index, photo_title, photo_pub_date
520
+ """
521
+ episode_list.sort(key=lambda e: int(e[1])) # 按照photo_index排序
522
+ ret = [episode_list[0]]
523
+
524
+ for i in range(1, len(episode_list)):
525
+ if ret[-1][1] != episode_list[i][1]:
526
+ ret.append(episode_list[i])
527
+
528
+ return ret
529
+
530
+ def create_photo_detail(self, index) -> JmPhotoDetail:
531
+ # 校验参数
532
+ length = len(self.episode_list)
533
+
534
+ if index >= length:
535
+ raise IndexError(f'photo index out of range for album-{self.album_id}: {index} >= {length}')
536
+
537
+ # ('212214', '81', '94 突然打來', '2020-08-29')
538
+ pid, pindex, pname = self.episode_list[index]
539
+
540
+ photo = JmModuleConfig.photo_class()(
541
+ photo_id=pid,
542
+ scramble_id=self.scramble_id,
543
+ name=pname,
544
+ series_id=self.album_id,
545
+ sort=pindex,
546
+ from_album=self,
547
+ )
548
+
549
+ return photo
550
+
551
+ @lru_cache(None)
552
+ def getindex(self, item) -> JmPhotoDetail:
553
+ return self.create_photo_detail(item)
554
+
555
+ def __getitem__(self, item) -> Union[JmPhotoDetail, List[JmPhotoDetail]]:
556
+ return super().__getitem__(item)
557
+
558
+ def __len__(self):
559
+ return len(self.episode_list)
560
+
561
+ def __iter__(self) -> Generator[JmPhotoDetail, None, None]:
562
+ return super().__iter__()
563
+
564
+ @classmethod
565
+ def is_album(cls):
566
+ return True
567
+
568
+
569
+ class JmPageContent(JmBaseEntity, IndexedEntity):
570
+ ContentItem = Tuple[str, Dict[str, Any]]
571
+
572
+ def __init__(self, content: List[ContentItem], total: int):
573
+
574
+ """
575
+ content:
576
+ [
577
+ album_id, {title, tags, ...}
578
+ ]
579
+ :param content: 分页数据
580
+ :param total: 总结果数
581
+ """
582
+ self.content = content
583
+ self.total = total
584
+
585
+ @property
586
+ def page_count(self) -> int:
587
+ """
588
+ 页数
589
+ """
590
+ page_size = self.page_size
591
+ import math
592
+ return math.ceil(int(self.total) / page_size)
593
+
594
+ @property
595
+ def page_size(self) -> int:
596
+ """
597
+ 页大小
598
+ """
599
+ raise NotImplementedError
600
+
601
+ def iter_id(self) -> Generator[str, None, None]:
602
+ """
603
+ 返回 album_id 的迭代器
604
+ """
605
+ for aid, ainfo in self.content:
606
+ yield aid
607
+
608
+ def iter_id_title(self) -> Generator[Tuple[str, str], None, None]:
609
+ """
610
+ 返回 album_id, album_title 的迭代器
611
+ """
612
+ for aid, ainfo in self.content:
613
+ yield aid, ainfo['name']
614
+
615
+ def iter_id_title_tag(self) -> Generator[Tuple[str, str, List[str]], None, None]:
616
+ """
617
+ 返回 album_id, album_title, album_tags 的迭代器
618
+ """
619
+ for aid, ainfo in self.content:
620
+ ainfo.setdefault('tags', [])
621
+ yield aid, ainfo['name'], ainfo['tags']
622
+
623
+ # 下面的方法实现方便的元素访问
624
+
625
+ def __len__(self):
626
+ return len(self.content)
627
+
628
+ def __iter__(self):
629
+ return self.iter_id_title()
630
+
631
+ def __getitem__(self, item) -> Union[ContentItem, List[ContentItem]]:
632
+ return super().__getitem__(item)
633
+
634
+ def getindex(self, index: int):
635
+ return self.content[index]
636
+
637
+ @classmethod
638
+ def is_page(cls):
639
+ return True
640
+
641
+
642
+ class JmSearchPage(JmPageContent):
643
+
644
+ @property
645
+ def page_size(self) -> int:
646
+ return JmModuleConfig.PAGE_SIZE_SEARCH
647
+
648
+ # 下面的方法是对单个album的包装
649
+
650
+ @property
651
+ def is_single_album(self):
652
+ return hasattr(self, 'album')
653
+
654
+ @property
655
+ def single_album(self) -> JmAlbumDetail:
656
+ return getattr(self, 'album')
657
+
658
+ @classmethod
659
+ def wrap_single_album(cls, album: JmAlbumDetail) -> 'JmSearchPage':
660
+ page = JmSearchPage([(
661
+ album.album_id, {
662
+ 'name': album.name,
663
+ 'tags': album.tags,
664
+ }
665
+ )], 1)
666
+ setattr(page, 'album', album)
667
+ return page
668
+
669
+
670
+ JmCategoryPage = JmSearchPage
671
+
672
+
673
+ class JmFavoritePage(JmPageContent):
674
+
675
+ def __init__(self, content, folder_list, total):
676
+ """
677
+
678
+ :param content: 收藏夹一页数据
679
+ :param folder_list: 所有的收藏夹的信息
680
+ :param total: 收藏夹的收藏总数
681
+ """
682
+ super().__init__(content, total)
683
+ self.folder_list = folder_list
684
+
685
+ @property
686
+ def page_size(self) -> int:
687
+ return JmModuleConfig.PAGE_SIZE_FAVORITE
688
+
689
+ def iter_folder_id_name(self) -> Generator[Tuple[str, str], None, None]:
690
+ """
691
+ 用户文件夹的迭代器
692
+ """
693
+ for folder_info in self.folder_list:
694
+ fid, fname = folder_info['FID'], folder_info['name']
695
+ yield fid, fname