jmcomic 2.2.3__tar.gz → 2.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: jmcomic
3
- Version: 2.2.3
3
+ Version: 2.2.4
4
4
  Summary: Python API For JMComic (禁漫天堂)
5
5
  Home-page: https://github.com/hect0x7/JMComic-Crawler-Python
6
6
  Author: hect0x7
@@ -2,7 +2,7 @@
2
2
  # 被依赖方 <--- 使用方
3
3
  # config <--- entity <--- toolkit <--- client <--- option <--- downloader
4
4
 
5
- __version__ = '2.2.3'
5
+ __version__ = '2.2.4'
6
6
 
7
7
  from .api import *
8
8
  from .jm_plugin import *
@@ -57,7 +57,7 @@ class AbstractJmClient(
57
57
  api_path=url,
58
58
  domain=self.domain_list[domain_index],
59
59
  )
60
- jm_debug(self.debug_topic_request(), url)
60
+ jm_debug(self.debug_topic_request(), self.decode(url))
61
61
  else:
62
62
  # 图片url
63
63
  pass
@@ -124,7 +124,7 @@ class AbstractJmClient(
124
124
  for func in {
125
125
  'get_photo_detail',
126
126
  'get_album_detail',
127
- 'search_album',
127
+ 'search',
128
128
  }:
129
129
  wrap_func_cache(func, func + '.cache.dict')
130
130
 
@@ -151,6 +151,23 @@ class AbstractJmClient(
151
151
  jm_debug('req.fallback', msg)
152
152
  raise JmModuleConfig.exception(msg)
153
153
 
154
+ # noinspection PyMethodMayBeStatic
155
+ def append_params_to_url(self, url, params):
156
+ from urllib.parse import urlencode
157
+
158
+ # 将参数字典编码为查询字符串
159
+ query_string = urlencode(params)
160
+ url = f"{url}?{query_string}"
161
+ return url
162
+
163
+ # noinspection PyMethodMayBeStatic
164
+ def decode(self, url: str):
165
+ if not JmModuleConfig.decode_url_when_debug or '/search/' not in url:
166
+ return url
167
+
168
+ from urllib.parse import unquote
169
+ return unquote(url.replace('+', ' '))
170
+
154
171
 
155
172
  # 基于网页实现的JmClient
156
173
  class JmHtmlClient(AbstractJmClient):
@@ -182,14 +199,17 @@ class JmHtmlClient(AbstractJmClient):
182
199
 
183
200
  return photo
184
201
 
185
- def search_album(self, search_query, main_tag=0, page=1) -> JmSearchPage:
202
+ def search(self, search_query, page, main_tag) -> JmSearchPage:
186
203
  params = {
187
204
  'main_tag': main_tag,
188
205
  'search_query': search_query,
189
206
  'page': page,
190
207
  }
191
208
 
192
- resp = self.get_jm_html('/search/photos', params=params, allow_redirects=True)
209
+ resp = self.get_jm_html(
210
+ self.append_params_to_url('/search/photos', params),
211
+ allow_redirects=True,
212
+ )
193
213
 
194
214
  # 检查是否发生了重定向
195
215
  # 因为如果搜索的是禁漫车号,会直接跳转到本子详情页面
@@ -371,7 +391,7 @@ class JmApiClient(AbstractJmClient):
371
391
  client_key = 'api'
372
392
  API_SEARCH = '/search'
373
393
 
374
- def search_album(self, search_query, main_tag=0, page=1) -> JmApiResp:
394
+ def search(self, search_query, main_tag=0, page=1) -> JmApiResp:
375
395
  """
376
396
  model_data: {
377
397
  "search_query": "MANA",
@@ -151,9 +151,6 @@ class JmDetailClient:
151
151
  def get_photo_detail(self, photo_id, fetch_album=True) -> JmPhotoDetail:
152
152
  raise NotImplementedError
153
153
 
154
- def search_album(self, search_query: str, main_tag: int = 0, page: int = 1) -> JmSearchPage:
155
- raise NotImplementedError
156
-
157
154
  def of_api_url(self, api_path, domain):
158
155
  raise NotImplementedError
159
156
 
@@ -272,11 +269,65 @@ class JmImageClient:
272
269
  return data_original.endswith('.gif')
273
270
 
274
271
 
272
+ class JmSearchAlbumClient:
273
+ """
274
+ 搜尋的最佳姿勢?
275
+ 【包含搜尋】
276
+ 搜尋[+]全彩[空格][+]人妻,僅顯示全彩且是人妻的本本
277
+ 範例:+全彩 +人妻
278
+
279
+ 【排除搜尋】
280
+ 搜尋全彩[空格][-]人妻,顯示全彩並排除人妻的本本
281
+ 範例:全彩 -人妻
282
+
283
+ 【我都要搜尋】
284
+ 搜尋全彩[空格]人妻,會顯示所有包含全彩及人妻的本本
285
+ 範例:全彩 人妻
286
+ """
287
+
288
+ def search(self, search_query: str, page: int, main_tag: int) -> JmSearchPage:
289
+ """
290
+ 搜索【成人A漫】
291
+ """
292
+ raise NotImplementedError
293
+
294
+ def search_site(self, search_query: str, page: int = 1) -> JmSearchPage:
295
+ """
296
+ 对应禁漫的站内搜索
297
+ """
298
+ return self.search(search_query, page, 0)
299
+
300
+ def search_work(self, search_query: str, page: int = 1) -> JmSearchPage:
301
+ """
302
+ 搜索album的作品 work
303
+ """
304
+ return self.search(search_query, page, 1)
305
+
306
+ def search_author(self, search_query: str, page: int = 1) -> JmSearchPage:
307
+ """
308
+ 搜索album的作者 author
309
+ """
310
+ return self.search(search_query, page, 2)
311
+
312
+ def search_tag(self, search_query: str, page: int = 1) -> JmSearchPage:
313
+ """
314
+ 搜索album的标签 tag
315
+ """
316
+ return self.search(search_query, page, 3)
317
+
318
+ def search_actor(self, search_query: str, page: int = 1) -> JmSearchPage:
319
+ """
320
+ 搜索album的登场角色 actor
321
+ """
322
+ return self.search(search_query, page, 4)
323
+
324
+
275
325
  # noinspection PyAbstractClass
276
326
  class JmcomicClient(
277
327
  JmImageClient,
278
328
  JmDetailClient,
279
329
  JmUserClient,
330
+ JmSearchAlbumClient,
280
331
  Postman,
281
332
  ):
282
333
  def get_jmcomic_url(self):
@@ -77,6 +77,8 @@ class JmModuleConfig:
77
77
 
78
78
  # debug开关标记
79
79
  enable_jm_debug = True
80
+ # debug时解码url
81
+ decode_url_when_debug = True
80
82
 
81
83
  # 插件注册表
82
84
  plugin_registry = {}
@@ -23,7 +23,7 @@ class DownloadCallback:
23
23
  f'章节数: [{len(album)}], '
24
24
  f'总页数: [{album.page_count}], '
25
25
  f'标题: [{album.title}], '
26
- f'关键词: [{album.keywords}]'
26
+ f'关键词: [{album.tag_list}]'
27
27
  )
28
28
 
29
29
  def after_album(self, album: JmAlbumDetail):
@@ -198,9 +198,9 @@ class JmPhotoDetail(DetailEntity):
198
198
  return self._series_id == 0
199
199
 
200
200
  @property
201
- def keywords(self) -> List[str]:
201
+ def tags(self) -> List[str]:
202
202
  if self.from_album is not None:
203
- return self.from_album.keywords
203
+ return self.from_album.tag_list
204
204
 
205
205
  return self._keywords.split(',')
206
206
 
@@ -303,19 +303,30 @@ class JmAlbumDetail(DetailEntity):
303
303
  episode_list,
304
304
  page_count,
305
305
  author_list,
306
- keywords_list,
306
+ tag_list,
307
307
  pub_date,
308
308
  update_date,
309
+ likes,
310
+ views,
311
+ comment_count,
312
+ work_list,
313
+ actor_list,
309
314
  ):
310
315
  self.album_id: str = album_id
311
316
  self.scramble_id: str = scramble_id
312
317
  self.title: str = title
313
- self.page_count = int(page_count)
314
- self._author_list: List[str] = author_list
315
- self._keywords_list: List[str] = keywords_list
318
+ self.page_count = int(page_count) # 总页数
316
319
  self.pub_date: str = pub_date # 发布日期
317
320
  self.update_date: str = update_date # 更新日期
318
321
 
322
+ self.likes: str = likes # [1K] 點擊喜歡
323
+ self.views: str = views # [40K] 次觀看
324
+ self.comment_count = int(comment_count)
325
+ self.work_list: List[str] = work_list # 作品
326
+ self.actor_list: List[str] = actor_list # 登場人物
327
+ self.tag_list: List[str] = tag_list # 標籤
328
+ self.author_list: List[str] = author_list # 作者
329
+
319
330
  # 有的 album 没有章节,则自成一章。
320
331
  if len(episode_list) == 0:
321
332
  # photo_id, photo_index, photo_title, photo_pub_date
@@ -325,6 +336,38 @@ class JmAlbumDetail(DetailEntity):
325
336
 
326
337
  self.episode_list: List[Tuple] = episode_list
327
338
 
339
+ @property
340
+ def author(self):
341
+ """
342
+ 作者
343
+ 禁漫本子的作者标签可能有多个,全部作者请使用字段 self.author_list
344
+ """
345
+ if len(self.author_list) >= 1:
346
+ return self.author_list[0]
347
+
348
+ return JmModuleConfig.default_author
349
+
350
+ @property
351
+ def id(self):
352
+ return self.album_id
353
+
354
+ @staticmethod
355
+ def distinct_episode(episode_list):
356
+ ret = []
357
+
358
+ def not_exist(episode):
359
+ photo_id = episode[0]
360
+ for each in ret:
361
+ if each[0] == photo_id:
362
+ return False
363
+ return True
364
+
365
+ for episode in episode_list:
366
+ if not_exist(episode):
367
+ ret.append(episode)
368
+
369
+ return ret
370
+
328
371
  def create_photo_detail(self, index) -> Tuple[JmPhotoDetail, Tuple]:
329
372
  # 校验参数
330
373
  length = len(self.episode_list)
@@ -351,37 +394,6 @@ class JmAlbumDetail(DetailEntity):
351
394
 
352
395
  return photo, episode_info
353
396
 
354
- @property
355
- def author(self):
356
- if len(self._author_list) >= 1:
357
- return self._author_list[0]
358
- return JmModuleConfig.default_author
359
-
360
- @property
361
- def keywords(self) -> List[str]:
362
- return self._keywords_list
363
-
364
- @property
365
- def id(self):
366
- return self.album_id
367
-
368
- @staticmethod
369
- def distinct_episode(episode_list):
370
- ret = []
371
-
372
- def not_exist(episode):
373
- photo_id = episode[0]
374
- for each in ret:
375
- if each[0] == photo_id:
376
- return False
377
- return True
378
-
379
- for episode in episode_list:
380
- if not_exist(episode):
381
- ret.append(episode)
382
-
383
- return ret
384
-
385
397
  def getindex(self, item) -> JmPhotoDetail:
386
398
  return self.create_photo_detail(item)[0]
387
399
 
@@ -421,7 +433,7 @@ class JmSearchPage(JmBaseEntity, IterableEntity):
421
433
  album.title,
422
434
  None,
423
435
  None,
424
- album.keywords,
436
+ album.tag_list,
425
437
  )
426
438
  obj = JmSearchPage([album_info])
427
439
 
@@ -28,16 +28,32 @@ class JmcomicText:
28
28
  pattern_html_album_page_count = compile('<span class="pagecount">.*?:(\d+)</span>')
29
29
  pattern_html_album_pub_date = compile('>上架日期 : (.*?)</span>')
30
30
  pattern_html_album_update_date = compile('>更新日期 : (.*?)</span>')
31
- pattern_html_album_keywords_list = [
31
+ # 作品
32
+ pattern_html_album_work_list = [
33
+ compile('<span itemprop="author" data-type="works">([\s\S]*?)</span>'),
34
+ compile('<a[\s\S]*?>(.*?)</a>')
35
+ ]
36
+ # 登場人物
37
+ pattern_html_album_actor_list = [
38
+ compile('<span itemprop="author" data-type="actor">([\s\S]*?)</span>'),
39
+ compile('<a[\s\S]*?>(.*?)</a>')
40
+ ]
41
+ # 标签
42
+ pattern_html_album_tag_list = [
32
43
  compile('<span itemprop="genre" data-type="tags">([\s\S]*?)</span>'),
33
44
  compile('<a[\s\S]*?>(.*?)</a>')
34
45
  ]
35
-
36
- # album 作者
46
+ # 作者
37
47
  pattern_html_album_author_list = [
38
48
  compile('作者: *<span itemprop="author" data-type="author">([\s\S]*?)</span>'),
39
49
  compile("<a[\s\S]*?>(.*?)</a>"),
40
50
  ]
51
+ # 點擊喜歡
52
+ pattern_html_album_likes = compile('<span id="albim_likes_\d+">(.*?)</span>')
53
+ # 觀看
54
+ pattern_html_album_views = compile('<span>(.*?)</span> 次觀看')
55
+ # 評論
56
+ pattern_html_album_comment_count = compile('<div class="badge" id="total_video_comments">(\d+)</div></a></li>')
41
57
 
42
58
  @classmethod
43
59
  def parse_to_jm_domain(cls, text: str):
@@ -142,7 +158,7 @@ class JmcomicText:
142
158
 
143
159
  if field_value is None:
144
160
  JmModuleConfig.raise_regex_error_executor(
145
- f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern_value.pattern}]",
161
+ f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern_value}]",
146
162
  html,
147
163
  field_name,
148
164
  pattern_value
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: jmcomic
3
- Version: 2.2.3
3
+ Version: 2.2.4
4
4
  Summary: Python API For JMComic (禁漫天堂)
5
5
  Home-page: https://github.com/hect0x7/JMComic-Crawler-Python
6
6
  Author: hect0x7
File without changes
File without changes
File without changes
File without changes
File without changes