novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +14 -11
  3. novel_downloader/cli/export.py +19 -19
  4. novel_downloader/cli/ui.py +35 -8
  5. novel_downloader/config/adapter.py +216 -153
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  8. novel_downloader/core/downloaders/__init__.py +2 -0
  9. novel_downloader/core/downloaders/base.py +34 -85
  10. novel_downloader/core/downloaders/common.py +147 -171
  11. novel_downloader/core/downloaders/qianbi.py +30 -64
  12. novel_downloader/core/downloaders/qidian.py +157 -184
  13. novel_downloader/core/downloaders/qqbook.py +292 -0
  14. novel_downloader/core/downloaders/registry.py +2 -2
  15. novel_downloader/core/exporters/__init__.py +2 -0
  16. novel_downloader/core/exporters/base.py +37 -59
  17. novel_downloader/core/exporters/common.py +620 -0
  18. novel_downloader/core/exporters/linovelib.py +47 -0
  19. novel_downloader/core/exporters/qidian.py +41 -12
  20. novel_downloader/core/exporters/qqbook.py +28 -0
  21. novel_downloader/core/exporters/registry.py +2 -2
  22. novel_downloader/core/fetchers/__init__.py +4 -2
  23. novel_downloader/core/fetchers/aaatxt.py +2 -22
  24. novel_downloader/core/fetchers/b520.py +3 -23
  25. novel_downloader/core/fetchers/base.py +80 -105
  26. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  27. novel_downloader/core/fetchers/dxmwx.py +10 -22
  28. novel_downloader/core/fetchers/esjzone.py +6 -29
  29. novel_downloader/core/fetchers/guidaye.py +2 -22
  30. novel_downloader/core/fetchers/hetushu.py +9 -29
  31. novel_downloader/core/fetchers/i25zw.py +2 -16
  32. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  33. novel_downloader/core/fetchers/jpxs123.py +2 -16
  34. novel_downloader/core/fetchers/lewenn.py +2 -22
  35. novel_downloader/core/fetchers/linovelib.py +4 -20
  36. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  37. novel_downloader/core/fetchers/piaotia.py +2 -16
  38. novel_downloader/core/fetchers/qbtr.py +2 -16
  39. novel_downloader/core/fetchers/qianbi.py +1 -20
  40. novel_downloader/core/fetchers/qidian.py +27 -68
  41. novel_downloader/core/fetchers/qqbook.py +177 -0
  42. novel_downloader/core/fetchers/quanben5.py +9 -29
  43. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  44. novel_downloader/core/fetchers/sfacg.py +3 -16
  45. novel_downloader/core/fetchers/shencou.py +2 -16
  46. novel_downloader/core/fetchers/shuhaige.py +2 -22
  47. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  48. novel_downloader/core/fetchers/ttkan.py +3 -14
  49. novel_downloader/core/fetchers/wanbengo.py +2 -22
  50. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  51. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  52. novel_downloader/core/fetchers/xs63b.py +3 -15
  53. novel_downloader/core/fetchers/xshbook.py +2 -22
  54. novel_downloader/core/fetchers/yamibo.py +4 -28
  55. novel_downloader/core/fetchers/yibige.py +13 -26
  56. novel_downloader/core/interfaces/exporter.py +19 -7
  57. novel_downloader/core/interfaces/fetcher.py +23 -49
  58. novel_downloader/core/interfaces/parser.py +2 -2
  59. novel_downloader/core/parsers/__init__.py +4 -2
  60. novel_downloader/core/parsers/b520.py +2 -2
  61. novel_downloader/core/parsers/base.py +5 -39
  62. novel_downloader/core/parsers/esjzone.py +3 -3
  63. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
  64. novel_downloader/core/parsers/qidian.py +717 -0
  65. novel_downloader/core/parsers/qqbook.py +709 -0
  66. novel_downloader/core/parsers/xiguashuwu.py +8 -15
  67. novel_downloader/core/searchers/__init__.py +2 -2
  68. novel_downloader/core/searchers/b520.py +1 -1
  69. novel_downloader/core/searchers/base.py +2 -2
  70. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  71. novel_downloader/locales/en.json +3 -3
  72. novel_downloader/locales/zh.json +3 -3
  73. novel_downloader/models/__init__.py +2 -0
  74. novel_downloader/models/book.py +1 -0
  75. novel_downloader/models/config.py +12 -0
  76. novel_downloader/resources/config/settings.toml +23 -5
  77. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  78. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  79. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  80. novel_downloader/utils/__init__.py +0 -2
  81. novel_downloader/utils/chapter_storage.py +2 -3
  82. novel_downloader/utils/constants.py +7 -3
  83. novel_downloader/utils/cookies.py +32 -17
  84. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  85. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  86. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  87. novel_downloader/utils/epub/__init__.py +2 -3
  88. novel_downloader/utils/epub/builder.py +6 -6
  89. novel_downloader/utils/epub/constants.py +1 -6
  90. novel_downloader/utils/epub/documents.py +7 -7
  91. novel_downloader/utils/epub/models.py +8 -8
  92. novel_downloader/utils/epub/utils.py +10 -10
  93. novel_downloader/utils/file_utils/io.py +48 -73
  94. novel_downloader/utils/file_utils/normalize.py +1 -7
  95. novel_downloader/utils/file_utils/sanitize.py +4 -11
  96. novel_downloader/utils/fontocr/__init__.py +13 -0
  97. novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
  98. novel_downloader/utils/fontocr/loader.py +52 -0
  99. novel_downloader/utils/logger.py +80 -56
  100. novel_downloader/utils/network.py +16 -40
  101. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  102. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  103. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  104. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  105. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  106. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  107. novel_downloader/web/main.py +1 -1
  108. novel_downloader/web/pages/download.py +1 -1
  109. novel_downloader/web/pages/search.py +4 -4
  110. novel_downloader/web/services/task_manager.py +2 -0
  111. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
  112. novel_downloader-2.0.2.dist-info/RECORD +203 -0
  113. novel_downloader/core/exporters/common/__init__.py +0 -11
  114. novel_downloader/core/exporters/common/epub.py +0 -198
  115. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  116. novel_downloader/core/exporters/common/txt.py +0 -146
  117. novel_downloader/core/exporters/epub_util.py +0 -215
  118. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  119. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  120. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  121. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  122. novel_downloader/core/exporters/txt_util.py +0 -67
  123. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  124. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  125. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  126. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  127. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  128. novel_downloader/core/parsers/qidian/main_parser.py +0 -101
  129. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
  130. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  131. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  132. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  133. novel_downloader-2.0.0.dist-info/RECORD +0 -210
  134. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  135. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  136. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  137. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -11,8 +11,7 @@ from typing import Any
11
11
 
12
12
  from novel_downloader.core.fetchers.base import BaseSession
13
13
  from novel_downloader.core.fetchers.registry import register_fetcher
14
- from novel_downloader.models import FetcherConfig, LoginField
15
- from novel_downloader.utils import async_jitter_sleep
14
+ from novel_downloader.models import LoginField
16
15
 
17
16
 
18
17
  @register_fetcher(
@@ -23,6 +22,8 @@ class EsjzoneSession(BaseSession):
23
22
  A session class for interacting with the ESJ Zone (www.esjzone.cc) novel website.
24
23
  """
25
24
 
25
+ site_name: str = "esjzone"
26
+
26
27
  BOOKCASE_URL = "https://www.esjzone.cc/my/favorite"
27
28
  BOOK_INFO_URL = "https://www.esjzone.cc/detail/{book_id}.html"
28
29
  CHAPTER_URL = "https://www.esjzone.cc/forum/{book_id}/{chapter_id}.html"
@@ -30,13 +31,7 @@ class EsjzoneSession(BaseSession):
30
31
  API_LOGIN_URL_1 = "https://www.esjzone.cc/my/login"
31
32
  API_LOGIN_URL_2 = "https://www.esjzone.cc/inc/mem_login.php"
32
33
 
33
- def __init__(
34
- self,
35
- config: FetcherConfig,
36
- cookies: dict[str, str] | None = None,
37
- **kwargs: Any,
38
- ) -> None:
39
- super().__init__("esjzone", config, cookies, **kwargs)
34
+ _TOKEN_RE = re.compile(r"<JinJing>(.*?)</JinJing>")
40
35
 
41
36
  async def login(
42
37
  self,
@@ -68,11 +63,7 @@ class EsjzoneSession(BaseSession):
68
63
  ):
69
64
  self._is_logged_in = True
70
65
  return True
71
- await async_jitter_sleep(
72
- self.backoff_factor,
73
- mul_spread=1.1,
74
- max_sleep=self.backoff_factor + 2,
75
- )
66
+ await self._sleep()
76
67
 
77
68
  self._is_logged_in = False
78
69
  return False
@@ -82,12 +73,6 @@ class EsjzoneSession(BaseSession):
82
73
  book_id: str,
83
74
  **kwargs: Any,
84
75
  ) -> list[str]:
85
- """
86
- Fetch the raw HTML of the book info page asynchronously.
87
-
88
- :param book_id: The book identifier.
89
- :return: The page content as string list.
90
- """
91
76
  url = self.book_info_url(book_id=book_id)
92
77
  return [await self.fetch(url, **kwargs)]
93
78
 
@@ -97,13 +82,6 @@ class EsjzoneSession(BaseSession):
97
82
  chapter_id: str,
98
83
  **kwargs: Any,
99
84
  ) -> list[str]:
100
- """
101
- Fetch the raw HTML of a single chapter asynchronously.
102
-
103
- :param book_id: The book identifier.
104
- :param chapter_id: The chapter identifier.
105
- :return: The page content as string list.
106
- """
107
85
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
108
86
  return [await self.fetch(url, **kwargs)]
109
87
 
@@ -228,8 +206,7 @@ class EsjzoneSession(BaseSession):
228
206
  return not any(kw in resp_text[0] for kw in keywords)
229
207
 
230
208
  def _extract_token(self, text: str) -> str:
231
- match = re.search(r"<JinJing>(.+?)</JinJing>", text)
232
- return match.group(1) if match else ""
209
+ return m.group(1) if (m := self._TOKEN_RE.search(text)) else ""
233
210
 
234
211
  @staticmethod
235
212
  def _filter_cookies(
@@ -9,7 +9,6 @@ from typing import Any
9
9
 
10
10
  from novel_downloader.core.fetchers.base import BaseSession
11
11
  from novel_downloader.core.fetchers.registry import register_fetcher
12
- from novel_downloader.models import FetcherConfig
13
12
 
14
13
 
15
14
  @register_fetcher(
@@ -20,28 +19,16 @@ class GuidayeSession(BaseSession):
20
19
  A session class for interacting with the 名著阅读 (b.guidaye.com) novel website.
21
20
  """
22
21
 
22
+ site_name: str = "guidaye"
23
+
23
24
  BOOK_INFO_URL = "https://b.guidaye.com/{book_id}/"
24
25
  CHAPTER_URL = "https://b.guidaye.com/{book_id}/{chapter_id}.html"
25
26
 
26
- def __init__(
27
- self,
28
- config: FetcherConfig,
29
- cookies: dict[str, str] | None = None,
30
- **kwargs: Any,
31
- ) -> None:
32
- super().__init__("guidaye", config, cookies, **kwargs)
33
-
34
27
  async def get_book_info(
35
28
  self,
36
29
  book_id: str,
37
30
  **kwargs: Any,
38
31
  ) -> list[str]:
39
- """
40
- Fetch the raw HTML of the book info page asynchronously.
41
-
42
- :param book_id: The book identifier.
43
- :return: The page content as string list.
44
- """
45
32
  book_id = book_id.replace("-", "/")
46
33
  url = self.book_info_url(book_id=book_id)
47
34
  return [await self.fetch(url, **kwargs)]
@@ -52,13 +39,6 @@ class GuidayeSession(BaseSession):
52
39
  chapter_id: str,
53
40
  **kwargs: Any,
54
41
  ) -> list[str]:
55
- """
56
- Fetch the raw HTML of a single chapter asynchronously.
57
-
58
- :param book_id: The book identifier.
59
- :param chapter_id: The chapter identifier.
60
- :return: The page content as string list.
61
- """
62
42
  book_id = book_id.replace("-", "/")
63
43
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
64
44
  return [await self.fetch(url, **kwargs)]
@@ -9,7 +9,6 @@ from typing import Any
9
9
 
10
10
  from novel_downloader.core.fetchers.base import BaseSession
11
11
  from novel_downloader.core.fetchers.registry import register_fetcher
12
- from novel_downloader.models import FetcherConfig
13
12
 
14
13
 
15
14
  @register_fetcher(
@@ -20,34 +19,22 @@ class HetushuSession(BaseSession):
20
19
  A session class for interacting with the 和图书 (www.hetushu.com) novel website.
21
20
  """
22
21
 
22
+ site_name: str = "hetushu"
23
+ BASE_URL_MAP: dict[str, str] = {
24
+ "simplified": "www.hetushu.com",
25
+ "traditional": "www.hetubook.com",
26
+ }
27
+ DEFAULT_BASE_URL: str = "www.hetushu.com"
28
+
23
29
  BOOK_INFO_URL = "https://{base_url}/book/{book_id}/index.html"
24
30
  CHAPTER_URL = "https://{base_url}/book/{book_id}/{chapter_id}.html"
25
31
 
26
- def __init__(
27
- self,
28
- config: FetcherConfig,
29
- cookies: dict[str, str] | None = None,
30
- **kwargs: Any,
31
- ) -> None:
32
- super().__init__("hetushu", config, cookies, **kwargs)
33
- self.base_url = (
34
- "www.hetushu.com"
35
- if config.locale_style == "simplified"
36
- else "www.hetubook.com"
37
- )
38
-
39
32
  async def get_book_info(
40
33
  self,
41
34
  book_id: str,
42
35
  **kwargs: Any,
43
36
  ) -> list[str]:
44
- """
45
- Fetch the raw HTML of the book info page asynchronously.
46
-
47
- :param book_id: The book identifier.
48
- :return: The page content as string list.
49
- """
50
- url = self.book_info_url(base_url=self.base_url, book_id=book_id)
37
+ url = self.book_info_url(base_url=self._base_url, book_id=book_id)
51
38
  return [await self.fetch(url, **kwargs)]
52
39
 
53
40
  async def get_book_chapter(
@@ -56,15 +43,8 @@ class HetushuSession(BaseSession):
56
43
  chapter_id: str,
57
44
  **kwargs: Any,
58
45
  ) -> list[str]:
59
- """
60
- Fetch the raw HTML of a single chapter asynchronously.
61
-
62
- :param book_id: The book identifier.
63
- :param chapter_id: The chapter identifier.
64
- :return: The page content as string list.
65
- """
66
46
  url = self.chapter_url(
67
- base_url=self.base_url, book_id=book_id, chapter_id=chapter_id
47
+ base_url=self._base_url, book_id=book_id, chapter_id=chapter_id
68
48
  )
69
49
  return [await self.fetch(url, **kwargs)]
70
50
 
@@ -10,7 +10,6 @@ from typing import Any
10
10
 
11
11
  from novel_downloader.core.fetchers.base import BaseSession
12
12
  from novel_downloader.core.fetchers.registry import register_fetcher
13
- from novel_downloader.models import FetcherConfig
14
13
 
15
14
 
16
15
  @register_fetcher(
@@ -21,18 +20,12 @@ class I25zwSession(BaseSession):
21
20
  A session class for interacting with the 25中文网 (www.i25zw.com) novel website.
22
21
  """
23
22
 
23
+ site_name: str = "i25zw"
24
+
24
25
  BOOK_INFO_URL = "https://www.i25zw.com/book/{book_id}.html"
25
26
  BOOK_CATALOG_URL = "https://www.i25zw.com/{book_id}/"
26
27
  CHAPTER_URL = "https://www.i25zw.com/{book_id}/{chapter_id}.html"
27
28
 
28
- def __init__(
29
- self,
30
- config: FetcherConfig,
31
- cookies: dict[str, str] | None = None,
32
- **kwargs: Any,
33
- ) -> None:
34
- super().__init__("i25zw", config, cookies, **kwargs)
35
-
36
29
  async def get_book_info(
37
30
  self,
38
31
  book_id: str,
@@ -61,13 +54,6 @@ class I25zwSession(BaseSession):
61
54
  chapter_id: str,
62
55
  **kwargs: Any,
63
56
  ) -> list[str]:
64
- """
65
- Fetch the raw HTML of a single chapter asynchronously.
66
-
67
- :param book_id: The book identifier.
68
- :param chapter_id: The chapter identifier.
69
- :return: The page content as string list.
70
- """
71
57
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
72
58
  return [await self.fetch(url, **kwargs)]
73
59
 
@@ -11,7 +11,6 @@ from typing import Any
11
11
 
12
12
  from novel_downloader.core.fetchers.base import BaseSession
13
13
  from novel_downloader.core.fetchers.registry import register_fetcher
14
- from novel_downloader.models import FetcherConfig
15
14
 
16
15
 
17
16
  @register_fetcher(
@@ -22,19 +21,13 @@ class Ixdzs8Session(BaseSession):
22
21
  A session class for interacting with the 爱下电子书 (ixdzs8.com) novel website.
23
22
  """
24
23
 
24
+ site_name: str = "ixdzs8"
25
+
25
26
  BOOK_INFO_URL = "https://ixdzs8.com/read/{book_id}/"
26
27
  BOOK_CATALOG_URL = "https://ixdzs8.com/novel/clist/"
27
28
  CHAPTER_URL = "https://ixdzs8.com/read/{book_id}/{chapter_id}.html"
28
29
  _TOKEN_PATTERN = re.compile(r'let\s+token\s*=\s*"([^"]+)"')
29
30
 
30
- def __init__(
31
- self,
32
- config: FetcherConfig,
33
- cookies: dict[str, str] | None = None,
34
- **kwargs: Any,
35
- ) -> None:
36
- super().__init__("ixdzs8", config, cookies, **kwargs)
37
-
38
31
  async def get_book_info(
39
32
  self,
40
33
  book_id: str,
@@ -63,13 +56,6 @@ class Ixdzs8Session(BaseSession):
63
56
  chapter_id: str,
64
57
  **kwargs: Any,
65
58
  ) -> list[str]:
66
- """
67
- Fetch the raw HTML of a single chapter asynchronously.
68
-
69
- :param book_id: The book identifier.
70
- :param chapter_id: The chapter identifier.
71
- :return: The page content as string list.
72
- """
73
59
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
74
60
  return [await self.fetch_verified_html(url, **kwargs)]
75
61
 
@@ -11,7 +11,6 @@ from lxml import html
11
11
 
12
12
  from novel_downloader.core.fetchers.base import BaseSession
13
13
  from novel_downloader.core.fetchers.registry import register_fetcher
14
- from novel_downloader.models import FetcherConfig
15
14
 
16
15
 
17
16
  @register_fetcher(
@@ -22,18 +21,12 @@ class Jpxs123Session(BaseSession):
22
21
  A session class for interacting with the 精品小说网 (www.jpxs123.com) novel website.
23
22
  """
24
23
 
24
+ site_name: str = "jpxs123"
25
+
25
26
  BASE_URL = "https://www.jpxs123.com"
26
27
  BOOK_INFO_URL = "https://www.jpxs123.com/{book_id}.html"
27
28
  CHAPTER_URL = "https://www.jpxs123.com/{book_id}/{chapter_id}.html"
28
29
 
29
- def __init__(
30
- self,
31
- config: FetcherConfig,
32
- cookies: dict[str, str] | None = None,
33
- **kwargs: Any,
34
- ) -> None:
35
- super().__init__("jpxs123", config, cookies, **kwargs)
36
-
37
30
  async def get_book_info(
38
31
  self,
39
32
  book_id: str,
@@ -68,13 +61,6 @@ class Jpxs123Session(BaseSession):
68
61
  chapter_id: str,
69
62
  **kwargs: Any,
70
63
  ) -> list[str]:
71
- """
72
- Fetch the raw HTML of a single chapter asynchronously.
73
-
74
- :param book_id: The book identifier.
75
- :param chapter_id: The chapter identifier.
76
- :return: The page content as string list.
77
- """
78
64
  book_id = book_id.replace("-", "/")
79
65
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
80
66
  return [await self.fetch(url, **kwargs)]
@@ -9,7 +9,6 @@ from typing import Any
9
9
 
10
10
  from novel_downloader.core.fetchers.base import BaseSession
11
11
  from novel_downloader.core.fetchers.registry import register_fetcher
12
- from novel_downloader.models import FetcherConfig
13
12
 
14
13
 
15
14
  @register_fetcher(
@@ -20,28 +19,16 @@ class LewennSession(BaseSession):
20
19
  A session class for interacting with the 乐文小说网 (www.lewenn.net) novel website.
21
20
  """
22
21
 
22
+ site_name: str = "lewenn"
23
+
23
24
  BOOK_INFO_URL = "https://www.lewenn.net/{book_id}/"
24
25
  CHAPTER_URL = "https://www.lewenn.net/{book_id}/{chapter_id}.html"
25
26
 
26
- def __init__(
27
- self,
28
- config: FetcherConfig,
29
- cookies: dict[str, str] | None = None,
30
- **kwargs: Any,
31
- ) -> None:
32
- super().__init__("lewenn", config, cookies, **kwargs)
33
-
34
27
  async def get_book_info(
35
28
  self,
36
29
  book_id: str,
37
30
  **kwargs: Any,
38
31
  ) -> list[str]:
39
- """
40
- Fetch the raw HTML of the book info page asynchronously.
41
-
42
- :param book_id: The book identifier.
43
- :return: The page content as string list.
44
- """
45
32
  url = self.book_info_url(book_id=book_id)
46
33
  return [await self.fetch(url, **kwargs)]
47
34
 
@@ -51,13 +38,6 @@ class LewennSession(BaseSession):
51
38
  chapter_id: str,
52
39
  **kwargs: Any,
53
40
  ) -> list[str]:
54
- """
55
- Fetch the raw HTML of a single chapter asynchronously.
56
-
57
- :param book_id: The book identifier.
58
- :param chapter_id: The chapter identifier.
59
- :return: The page content as string list.
60
- """
61
41
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
62
42
  return [await self.fetch(url, **kwargs)]
63
43
 
@@ -10,8 +10,6 @@ from typing import Any
10
10
 
11
11
  from novel_downloader.core.fetchers.base import BaseSession
12
12
  from novel_downloader.core.fetchers.registry import register_fetcher
13
- from novel_downloader.models import FetcherConfig
14
- from novel_downloader.utils import async_jitter_sleep
15
13
 
16
14
 
17
15
  @register_fetcher(
@@ -22,6 +20,8 @@ class LinovelibSession(BaseSession):
22
20
  A session class for interacting with 哔哩轻小说 (www.linovelib.com) novel website.
23
21
  """
24
22
 
23
+ site_name: str = "linovelib"
24
+
25
25
  BASE_URL = "https://www.linovelib.com"
26
26
  BOOK_INFO_URL = "https://www.linovelib.com/novel/{book_id}.html"
27
27
  BOOK_CATALOG_UTL = "https://www.linovelib.com/novel/{book_id}/catalog"
@@ -30,14 +30,6 @@ class LinovelibSession(BaseSession):
30
30
 
31
31
  _VOL_ID_PATTERN: re.Pattern[str] = re.compile(r"/novel/\d+/(vol_\d+)\.html")
32
32
 
33
- def __init__(
34
- self,
35
- config: FetcherConfig,
36
- cookies: dict[str, str] | None = None,
37
- **kwargs: Any,
38
- ) -> None:
39
- super().__init__("linovelib", config, cookies, **kwargs)
40
-
41
33
  async def get_book_info(
42
34
  self,
43
35
  book_id: str,
@@ -63,11 +55,7 @@ class LinovelibSession(BaseSession):
63
55
 
64
56
  vol_htmls = []
65
57
  for vol_id in vol_ids:
66
- await async_jitter_sleep(
67
- self.request_interval,
68
- mul_spread=1.1,
69
- max_sleep=self.request_interval + 2,
70
- )
58
+ await self._sleep()
71
59
  html = await self.get_book_volume(book_id, vol_id, **kwargs)
72
60
  if html:
73
61
  vol_htmls.append(html)
@@ -129,11 +117,7 @@ class LinovelibSession(BaseSession):
129
117
 
130
118
  html_pages.append(html)
131
119
  idx += 1
132
- await async_jitter_sleep(
133
- self.request_interval,
134
- mul_spread=1.1,
135
- max_sleep=self.request_interval + 2,
136
- )
120
+ await self._sleep()
137
121
 
138
122
  return html_pages
139
123
 
@@ -1,57 +1,42 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.fetchers.eightnovel
4
- -----------------------------------------
3
+ novel_downloader.core.fetchers.n8novel
4
+ --------------------------------------
5
5
 
6
6
  """
7
7
 
8
8
  import re
9
- from re import Pattern
10
9
  from typing import Any
11
10
 
12
11
  from novel_downloader.core.fetchers.base import BaseSession
13
12
  from novel_downloader.core.fetchers.registry import register_fetcher
14
- from novel_downloader.models import FetcherConfig
15
13
 
16
14
 
17
15
  @register_fetcher(
18
- site_keys=["8novel", "eightnovel"],
16
+ site_keys=["8novel", "n8novel"],
19
17
  )
20
- class EightnovelSession(BaseSession):
18
+ class N8novelSession(BaseSession):
21
19
  """
22
20
  A session class for interacting with the 无限轻小说 (www.8novel.com) novel website.
23
21
  """
24
22
 
23
+ site_name: str = "n8novel"
24
+
25
25
  BOOK_INFO_URL = "https://www.8novel.com/novelbooks/{book_id}/"
26
26
  CHAPTER_URL = "https://article.8novel.com/read/{book_id}/?{chapter_id}"
27
27
  CHAPTER_CONTENT_URL = (
28
28
  "https://article.8novel.com/txt/1/{book_id}/{chapter_id}{seed_segment}.html"
29
29
  )
30
30
 
31
- _SPLIT_STR_PATTERN = re.compile(
32
- r'["\']([^"\']+)["\']\s*\.split\s*\(\s*["\']\s*,\s*["\']\s*\)', re.DOTALL
31
+ _SPLIT_DIGITS_PATTERN = re.compile(
32
+ r'["\'](\d+(?:,\d+)*)["\']\s*\.split\s*\(\s*["\']\s*,\s*["\']\s*\)', re.DOTALL
33
33
  )
34
- _DIGIT_LIST_PATTERN: Pattern[str] = re.compile(r"^\d+(?:,\d+)*$")
35
-
36
- def __init__(
37
- self,
38
- config: FetcherConfig,
39
- cookies: dict[str, str] | None = None,
40
- **kwargs: Any,
41
- ) -> None:
42
- super().__init__("eightnovel", config, cookies, **kwargs)
43
34
 
44
35
  async def get_book_info(
45
36
  self,
46
37
  book_id: str,
47
38
  **kwargs: Any,
48
39
  ) -> list[str]:
49
- """
50
- Fetch the raw HTML of the book info page asynchronously.
51
-
52
- :param book_id: The book identifier.
53
- :return: The page content as string list.
54
- """
55
40
  url = self.book_info_url(book_id=book_id)
56
41
  return [await self.fetch(url, **kwargs)]
57
42
 
@@ -84,12 +69,6 @@ class EightnovelSession(BaseSession):
84
69
 
85
70
  @classmethod
86
71
  def book_info_url(cls, book_id: str) -> str:
87
- """
88
- Construct the URL for fetching a book's info page.
89
-
90
- :param book_id: The identifier of the book.
91
- :return: Fully qualified URL for the book info page.
92
- """
93
72
  return cls.BOOK_INFO_URL.format(book_id=book_id)
94
73
 
95
74
  @classmethod
@@ -110,17 +89,10 @@ class EightnovelSession(BaseSession):
110
89
  of the form "...".split(","), pick the ones that may contain seed,
111
90
  and return the last value.
112
91
  """
113
- split_literals: list[str] = cls._SPLIT_STR_PATTERN.findall(html_str)
114
-
115
- numeric_lists = [
116
- lit for lit in split_literals if cls._DIGIT_LIST_PATTERN.fullmatch(lit)
117
- ]
118
-
119
- if not numeric_lists:
120
- return ""
121
-
122
- last_list = numeric_lists[-1]
123
- return last_list.split(",")[-1]
92
+ matches: list[str] = cls._SPLIT_DIGITS_PATTERN.findall(html_str)
93
+ if not matches:
94
+ raise ValueError("No digit lists found in HTML.")
95
+ return matches[-1].split(",")[-1]
124
96
 
125
97
  @classmethod
126
98
  def _build_chapter_content_url(
@@ -10,7 +10,6 @@ from typing import Any
10
10
 
11
11
  from novel_downloader.core.fetchers.base import BaseSession
12
12
  from novel_downloader.core.fetchers.registry import register_fetcher
13
- from novel_downloader.models import FetcherConfig
14
13
 
15
14
 
16
15
  @register_fetcher(
@@ -21,18 +20,12 @@ class PiaotiaSession(BaseSession):
21
20
  A session class for interacting with the 飘天文学网 (www.piaotia.com) novel website.
22
21
  """
23
22
 
23
+ site_name: str = "piaotia"
24
+
24
25
  BOOK_INFO_URL = "https://www.piaotia.com/bookinfo/{book_id}.html"
25
26
  BOOK_CATALOG_URL = "https://www.piaotia.com/html/{book_id}/index.html"
26
27
  CHAPTER_URL = "https://www.piaotia.com/html/{book_id}/{chapter_id}.html"
27
28
 
28
- def __init__(
29
- self,
30
- config: FetcherConfig,
31
- cookies: dict[str, str] | None = None,
32
- **kwargs: Any,
33
- ) -> None:
34
- super().__init__("piaotia", config, cookies, **kwargs)
35
-
36
29
  async def get_book_info(
37
30
  self,
38
31
  book_id: str,
@@ -62,13 +55,6 @@ class PiaotiaSession(BaseSession):
62
55
  chapter_id: str,
63
56
  **kwargs: Any,
64
57
  ) -> list[str]:
65
- """
66
- Fetch the raw HTML of a single chapter asynchronously.
67
-
68
- :param book_id: The book identifier.
69
- :param chapter_id: The chapter identifier.
70
- :return: The page content as string list.
71
- """
72
58
  book_id = book_id.replace("-", "/")
73
59
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
74
60
  return [await self.fetch(url, **kwargs)]
@@ -11,7 +11,6 @@ from lxml import html
11
11
 
12
12
  from novel_downloader.core.fetchers.base import BaseSession
13
13
  from novel_downloader.core.fetchers.registry import register_fetcher
14
- from novel_downloader.models import FetcherConfig
15
14
 
16
15
 
17
16
  @register_fetcher(
@@ -22,18 +21,12 @@ class QbtrSession(BaseSession):
22
21
  A session class for interacting with the 全本同人小说 (www.qbtr.cc) novel website.
23
22
  """
24
23
 
24
+ site_name: str = "qbtr"
25
+
25
26
  BASE_URL = "https://www.qbtr.cc"
26
27
  BOOK_INFO_URL = "https://www.qbtr.cc/{book_id}.html"
27
28
  CHAPTER_URL = "https://www.qbtr.cc/{book_id}/{chapter_id}.html"
28
29
 
29
- def __init__(
30
- self,
31
- config: FetcherConfig,
32
- cookies: dict[str, str] | None = None,
33
- **kwargs: Any,
34
- ) -> None:
35
- super().__init__("qbtr", config, cookies, **kwargs)
36
-
37
30
  async def get_book_info(
38
31
  self,
39
32
  book_id: str,
@@ -68,13 +61,6 @@ class QbtrSession(BaseSession):
68
61
  chapter_id: str,
69
62
  **kwargs: Any,
70
63
  ) -> list[str]:
71
- """
72
- Fetch the raw HTML of a single chapter asynchronously.
73
-
74
- :param book_id: The book identifier.
75
- :param chapter_id: The chapter identifier.
76
- :return: The page content as string list.
77
- """
78
64
  book_id = book_id.replace("-", "/")
79
65
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
80
66
  return [await self.fetch(url, **kwargs)]
@@ -10,7 +10,6 @@ from typing import Any
10
10
 
11
11
  from novel_downloader.core.fetchers.base import BaseSession
12
12
  from novel_downloader.core.fetchers.registry import register_fetcher
13
- from novel_downloader.models import FetcherConfig
14
13
 
15
14
 
16
15
  @register_fetcher(
@@ -21,23 +20,12 @@ class QianbiSession(BaseSession):
21
20
  A session class for interacting with the 铅笔小说 (www.23qb.com) novel website.
22
21
  """
23
22
 
24
- BASE_URLS = [
25
- "www.23qb.com",
26
- "www.23qb.net",
27
- ]
23
+ site_name: str = "qianbi"
28
24
 
29
25
  BOOK_INFO_URL = "https://www.23qb.com/book/{book_id}/"
30
26
  BOOK_CATALOG_URL = "https://www.23qb.com/book/{book_id}/catalog"
31
27
  CHAPTER_URL = "https://www.23qb.com/book/{book_id}/{chapter_id}.html"
32
28
 
33
- def __init__(
34
- self,
35
- config: FetcherConfig,
36
- cookies: dict[str, str] | None = None,
37
- **kwargs: Any,
38
- ) -> None:
39
- super().__init__("qianbi", config, cookies, **kwargs)
40
-
41
29
  async def get_book_info(
42
30
  self,
43
31
  book_id: str,
@@ -66,13 +54,6 @@ class QianbiSession(BaseSession):
66
54
  chapter_id: str,
67
55
  **kwargs: Any,
68
56
  ) -> list[str]:
69
- """
70
- Fetch the raw HTML of a single chapter asynchronously.
71
-
72
- :param book_id: The book identifier.
73
- :param chapter_id: The chapter identifier.
74
- :return: The page content as string list.
75
- """
76
57
  url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
77
58
  return [await self.fetch(url, **kwargs)]
78
59