novel-downloader 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/clean.py +97 -78
  3. novel_downloader/cli/config.py +177 -0
  4. novel_downloader/cli/download.py +132 -87
  5. novel_downloader/cli/export.py +77 -0
  6. novel_downloader/cli/main.py +21 -28
  7. novel_downloader/config/__init__.py +1 -25
  8. novel_downloader/config/adapter.py +32 -31
  9. novel_downloader/config/loader.py +3 -3
  10. novel_downloader/config/site_rules.py +1 -2
  11. novel_downloader/core/__init__.py +3 -6
  12. novel_downloader/core/downloaders/__init__.py +10 -13
  13. novel_downloader/core/downloaders/base.py +233 -0
  14. novel_downloader/core/downloaders/biquge.py +27 -0
  15. novel_downloader/core/downloaders/common.py +414 -0
  16. novel_downloader/core/downloaders/esjzone.py +27 -0
  17. novel_downloader/core/downloaders/linovelib.py +27 -0
  18. novel_downloader/core/downloaders/qianbi.py +27 -0
  19. novel_downloader/core/downloaders/qidian.py +352 -0
  20. novel_downloader/core/downloaders/sfacg.py +27 -0
  21. novel_downloader/core/downloaders/yamibo.py +27 -0
  22. novel_downloader/core/exporters/__init__.py +37 -0
  23. novel_downloader/core/{savers → exporters}/base.py +73 -44
  24. novel_downloader/core/exporters/biquge.py +25 -0
  25. novel_downloader/core/exporters/common/__init__.py +12 -0
  26. novel_downloader/core/{savers → exporters}/common/epub.py +40 -52
  27. novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +36 -39
  28. novel_downloader/core/{savers → exporters}/common/txt.py +20 -24
  29. novel_downloader/core/exporters/epub_utils/__init__.py +40 -0
  30. novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -1
  31. novel_downloader/core/exporters/epub_utils/image_loader.py +131 -0
  32. novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -3
  33. novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +49 -2
  34. novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -1
  35. novel_downloader/core/exporters/esjzone.py +25 -0
  36. novel_downloader/core/exporters/linovelib/__init__.py +10 -0
  37. novel_downloader/core/exporters/linovelib/epub.py +449 -0
  38. novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
  39. novel_downloader/core/exporters/linovelib/txt.py +129 -0
  40. novel_downloader/core/exporters/qianbi.py +25 -0
  41. novel_downloader/core/{savers → exporters}/qidian.py +8 -8
  42. novel_downloader/core/exporters/sfacg.py +25 -0
  43. novel_downloader/core/exporters/yamibo.py +25 -0
  44. novel_downloader/core/factory/__init__.py +5 -17
  45. novel_downloader/core/factory/downloader.py +24 -126
  46. novel_downloader/core/factory/exporter.py +58 -0
  47. novel_downloader/core/factory/fetcher.py +96 -0
  48. novel_downloader/core/factory/parser.py +17 -12
  49. novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
  50. novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
  51. novel_downloader/core/fetchers/base/browser.py +383 -0
  52. novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
  53. novel_downloader/core/fetchers/base/session.py +419 -0
  54. novel_downloader/core/fetchers/biquge/__init__.py +14 -0
  55. novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
  56. novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
  57. novel_downloader/core/fetchers/common/__init__.py +14 -0
  58. novel_downloader/core/fetchers/common/browser.py +79 -0
  59. novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
  60. novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
  61. novel_downloader/core/fetchers/esjzone/browser.py +202 -0
  62. novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
  63. novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
  64. novel_downloader/core/fetchers/linovelib/browser.py +178 -0
  65. novel_downloader/core/fetchers/linovelib/session.py +178 -0
  66. novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
  67. novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
  68. novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
  69. novel_downloader/core/fetchers/qidian/__init__.py +14 -0
  70. novel_downloader/core/fetchers/qidian/browser.py +266 -0
  71. novel_downloader/core/fetchers/qidian/session.py +326 -0
  72. novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
  73. novel_downloader/core/fetchers/sfacg/browser.py +189 -0
  74. novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
  75. novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
  76. novel_downloader/core/fetchers/yamibo/browser.py +229 -0
  77. novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
  78. novel_downloader/core/interfaces/__init__.py +8 -12
  79. novel_downloader/core/interfaces/downloader.py +54 -0
  80. novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
  81. novel_downloader/core/interfaces/fetcher.py +162 -0
  82. novel_downloader/core/interfaces/parser.py +6 -7
  83. novel_downloader/core/parsers/__init__.py +5 -6
  84. novel_downloader/core/parsers/base.py +9 -13
  85. novel_downloader/core/parsers/biquge/main_parser.py +12 -13
  86. novel_downloader/core/parsers/common/helper.py +3 -3
  87. novel_downloader/core/parsers/common/main_parser.py +39 -34
  88. novel_downloader/core/parsers/esjzone/main_parser.py +24 -17
  89. novel_downloader/core/parsers/linovelib/__init__.py +10 -0
  90. novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
  91. novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
  92. novel_downloader/core/parsers/qidian/__init__.py +2 -11
  93. novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
  94. novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
  95. novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
  96. novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
  97. novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
  98. novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
  99. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
  100. novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
  101. novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
  102. novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
  103. novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
  104. novel_downloader/locales/en.json +18 -2
  105. novel_downloader/locales/zh.json +18 -2
  106. novel_downloader/models/__init__.py +64 -0
  107. novel_downloader/models/browser.py +21 -0
  108. novel_downloader/models/chapter.py +25 -0
  109. novel_downloader/models/config.py +100 -0
  110. novel_downloader/models/login.py +20 -0
  111. novel_downloader/models/site_rules.py +99 -0
  112. novel_downloader/models/tasks.py +33 -0
  113. novel_downloader/models/types.py +15 -0
  114. novel_downloader/resources/config/settings.toml +31 -25
  115. novel_downloader/resources/json/linovelib_font_map.json +3573 -0
  116. novel_downloader/tui/__init__.py +7 -0
  117. novel_downloader/tui/app.py +32 -0
  118. novel_downloader/tui/main.py +17 -0
  119. novel_downloader/tui/screens/__init__.py +14 -0
  120. novel_downloader/tui/screens/home.py +191 -0
  121. novel_downloader/tui/screens/login.py +74 -0
  122. novel_downloader/tui/styles/home_layout.tcss +79 -0
  123. novel_downloader/tui/widgets/richlog_handler.py +24 -0
  124. novel_downloader/utils/__init__.py +6 -0
  125. novel_downloader/utils/chapter_storage.py +25 -38
  126. novel_downloader/utils/constants.py +15 -5
  127. novel_downloader/utils/cookies.py +66 -0
  128. novel_downloader/utils/crypto_utils.py +1 -74
  129. novel_downloader/utils/file_utils/io.py +1 -1
  130. novel_downloader/utils/fontocr/ocr_v1.py +2 -1
  131. novel_downloader/utils/fontocr/ocr_v2.py +2 -2
  132. novel_downloader/utils/hash_store.py +10 -18
  133. novel_downloader/utils/hash_utils.py +3 -2
  134. novel_downloader/utils/logger.py +2 -3
  135. novel_downloader/utils/network.py +53 -39
  136. novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
  137. novel_downloader/utils/text_utils/font_mapping.py +1 -1
  138. novel_downloader/utils/text_utils/text_cleaning.py +1 -1
  139. novel_downloader/utils/time_utils/datetime_utils.py +3 -3
  140. novel_downloader/utils/time_utils/sleep_utils.py +3 -3
  141. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +72 -38
  142. novel_downloader-1.4.0.dist-info/RECORD +170 -0
  143. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
  144. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
  145. novel_downloader/cli/interactive.py +0 -66
  146. novel_downloader/cli/settings.py +0 -177
  147. novel_downloader/config/models.py +0 -187
  148. novel_downloader/core/downloaders/base/__init__.py +0 -14
  149. novel_downloader/core/downloaders/base/base_async.py +0 -153
  150. novel_downloader/core/downloaders/base/base_sync.py +0 -208
  151. novel_downloader/core/downloaders/biquge/__init__.py +0 -14
  152. novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
  153. novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
  154. novel_downloader/core/downloaders/common/__init__.py +0 -14
  155. novel_downloader/core/downloaders/common/common_async.py +0 -218
  156. novel_downloader/core/downloaders/common/common_sync.py +0 -210
  157. novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
  158. novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
  159. novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
  160. novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
  161. novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
  162. novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
  163. novel_downloader/core/downloaders/qidian/__init__.py +0 -10
  164. novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -227
  165. novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
  166. novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
  167. novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
  168. novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
  169. novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
  170. novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
  171. novel_downloader/core/factory/requester.py +0 -144
  172. novel_downloader/core/factory/saver.py +0 -56
  173. novel_downloader/core/interfaces/async_downloader.py +0 -36
  174. novel_downloader/core/interfaces/async_requester.py +0 -84
  175. novel_downloader/core/interfaces/sync_downloader.py +0 -36
  176. novel_downloader/core/interfaces/sync_requester.py +0 -82
  177. novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
  178. novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
  179. novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
  180. novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
  181. novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
  182. novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
  183. novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
  184. novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
  185. novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
  186. novel_downloader/core/requesters/base/async_session.py +0 -410
  187. novel_downloader/core/requesters/base/browser.py +0 -337
  188. novel_downloader/core/requesters/base/session.py +0 -378
  189. novel_downloader/core/requesters/biquge/__init__.py +0 -14
  190. novel_downloader/core/requesters/common/__init__.py +0 -17
  191. novel_downloader/core/requesters/common/session.py +0 -113
  192. novel_downloader/core/requesters/esjzone/__init__.py +0 -13
  193. novel_downloader/core/requesters/esjzone/session.py +0 -235
  194. novel_downloader/core/requesters/qianbi/__init__.py +0 -13
  195. novel_downloader/core/requesters/qidian/__init__.py +0 -21
  196. novel_downloader/core/requesters/qidian/broswer.py +0 -307
  197. novel_downloader/core/requesters/qidian/session.py +0 -290
  198. novel_downloader/core/requesters/sfacg/__init__.py +0 -13
  199. novel_downloader/core/requesters/sfacg/session.py +0 -242
  200. novel_downloader/core/requesters/yamibo/__init__.py +0 -13
  201. novel_downloader/core/requesters/yamibo/session.py +0 -237
  202. novel_downloader/core/savers/__init__.py +0 -34
  203. novel_downloader/core/savers/biquge.py +0 -25
  204. novel_downloader/core/savers/common/__init__.py +0 -12
  205. novel_downloader/core/savers/epub_utils/__init__.py +0 -26
  206. novel_downloader/core/savers/esjzone.py +0 -25
  207. novel_downloader/core/savers/qianbi.py +0 -25
  208. novel_downloader/core/savers/sfacg.py +0 -25
  209. novel_downloader/core/savers/yamibo.py +0 -25
  210. novel_downloader/resources/config/rules.toml +0 -196
  211. novel_downloader-1.3.2.dist-info/RECORD +0 -165
  212. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
  213. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,218 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.common.common_async
4
- -----------------------------------------------------
5
-
6
- """
7
-
8
- import asyncio
9
- import json
10
- import logging
11
- from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
12
- from typing import Any
13
-
14
- from novel_downloader.config import DownloaderConfig
15
- from novel_downloader.core.downloaders.base import BaseAsyncDownloader
16
- from novel_downloader.core.interfaces import (
17
- AsyncRequesterProtocol,
18
- ParserProtocol,
19
- SaverProtocol,
20
- )
21
- from novel_downloader.utils.chapter_storage import ChapterDict, ChapterStorage
22
- from novel_downloader.utils.file_utils import save_as_json, save_as_txt
23
- from novel_downloader.utils.network import download_image_as_bytes
24
- from novel_downloader.utils.time_utils import calculate_time_difference
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
-
29
- class CommonAsyncDownloader(BaseAsyncDownloader):
30
- """
31
- Specialized Async downloader for common novels.
32
- """
33
-
34
- def __init__(
35
- self,
36
- requester: AsyncRequesterProtocol,
37
- parser: ParserProtocol,
38
- saver: SaverProtocol,
39
- config: DownloaderConfig,
40
- site: str,
41
- ):
42
- """ """
43
- super().__init__(requester, parser, saver, config, site)
44
- self._is_logged_in = False
45
-
46
- async def prepare(self) -> None:
47
- """
48
- Perform login
49
- """
50
- if self.login_required and not self._is_logged_in:
51
- success = await self.requester.login()
52
- if not success:
53
- raise RuntimeError("Login failed")
54
- self._is_logged_in = True
55
-
56
- async def download_one(self, book_id: str) -> None:
57
- """
58
- The full download logic for a single book.
59
-
60
- :param book_id: The identifier of the book to download.
61
- """
62
- assert isinstance(self.requester, AsyncRequesterProtocol)
63
- await self.prepare()
64
-
65
- TAG = "[AsyncDownloader]"
66
- wait_time = self.config.request_interval
67
-
68
- raw_base = self.raw_data_dir / book_id
69
- cache_base = self.cache_dir / book_id
70
- info_path = raw_base / "book_info.json"
71
- chapters_html_dir = cache_base / "html"
72
-
73
- raw_base.mkdir(parents=True, exist_ok=True)
74
- if self.save_html:
75
- chapters_html_dir.mkdir(parents=True, exist_ok=True)
76
- normal_cs = ChapterStorage(
77
- raw_base=raw_base,
78
- namespace="chapters",
79
- backend_type=self._config.storage_backend,
80
- batch_size=self._config.storage_batch_size,
81
- )
82
-
83
- # load or fetch book_info
84
- book_info: dict[str, Any]
85
- re_fetch = True
86
- if info_path.exists():
87
- try:
88
- data = json.loads(info_path.read_text("utf-8"))
89
- days, *_ = calculate_time_difference(
90
- data.get("update_time", ""), "UTC+8"
91
- )
92
- re_fetch = days > 1
93
- except Exception:
94
- re_fetch = True
95
-
96
- if re_fetch:
97
- info_html = await self.requester.get_book_info(book_id)
98
- if self.save_html:
99
- for i, html in enumerate(info_html):
100
- save_as_txt(html, chapters_html_dir / f"info_{i}.html")
101
- book_info = self.parser.parse_book_info(info_html)
102
- if book_info.get("book_name") != "未找到书名":
103
- save_as_json(book_info, info_path)
104
- else:
105
- logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
106
- await asyncio.sleep(wait_time)
107
- else:
108
- book_info = json.loads(info_path.read_text("utf-8"))
109
-
110
- # download cover
111
- cover_url = book_info.get("cover_url", "")
112
- if cover_url:
113
- await asyncio.get_running_loop().run_in_executor(
114
- None, download_image_as_bytes, cover_url, raw_base
115
- )
116
-
117
- # setup queue, semaphore, executor
118
- semaphore = asyncio.Semaphore(self.download_workers)
119
- queue: asyncio.Queue[tuple[str, list[str]]] = asyncio.Queue()
120
- save_queue: asyncio.Queue[ChapterDict] = asyncio.Queue()
121
- loop = asyncio.get_running_loop()
122
- executor = (
123
- ProcessPoolExecutor() if self.use_process_pool else ThreadPoolExecutor()
124
- )
125
-
126
- async def parser_worker(worker_id: int) -> None:
127
- while True:
128
- cid, html = await queue.get()
129
- try:
130
- chap_json = await loop.run_in_executor(
131
- executor, self.parser.parse_chapter, html, cid
132
- )
133
- if chap_json:
134
- await save_queue.put(chap_json)
135
- logger.info(
136
- "%s [Parser-%d] saved chapter %s", TAG, worker_id, cid
137
- )
138
- except Exception as e:
139
- logger.error(
140
- "%s [Parser-%d] error on chapter %s: %s", TAG, worker_id, cid, e
141
- )
142
- finally:
143
- queue.task_done()
144
-
145
- async def saver_loop(
146
- cs: ChapterStorage,
147
- queue: asyncio.Queue[ChapterDict],
148
- ) -> None:
149
- while True:
150
- data = await queue.get()
151
- try:
152
- cs.save(data)
153
- except Exception as e:
154
- logger.error(
155
- "[saver] Error saving chapter %s: %s",
156
- data.get("id"),
157
- e,
158
- )
159
- finally:
160
- queue.task_done()
161
-
162
- async def download_worker(chap: dict[str, Any]) -> None:
163
- cid = str(chap.get("chapterId") or "")
164
- if not cid:
165
- return
166
- if normal_cs.exists(cid) and self.skip_existing:
167
- logger.info("%s skipping existing chapter %s", TAG, cid)
168
- return
169
-
170
- try:
171
- async with semaphore:
172
- html = await self.requester.get_book_chapter(book_id, cid)
173
- await queue.put((cid, html))
174
- logger.info("%s downloaded chapter %s", TAG, cid)
175
- except Exception as e:
176
- logger.error("%s error downloading %s: %s", TAG, cid, e)
177
-
178
- # start parser workers
179
- parsers = [
180
- asyncio.create_task(parser_worker(i)) for i in range(self.parser_workers)
181
- ]
182
- chapter_saver = asyncio.create_task(saver_loop(normal_cs, save_queue))
183
-
184
- # enqueue + run downloads
185
- download_tasks = []
186
- for vol in book_info.get("volumes", []):
187
- for chap in vol.get("chapters", []):
188
- download_tasks.append(asyncio.create_task(download_worker(chap)))
189
-
190
- await asyncio.gather(*download_tasks)
191
- await queue.join() # wait until all parsed
192
- await save_queue.join()
193
- for p in parsers:
194
- p.cancel() # stop parser loops
195
- chapter_saver.cancel()
196
-
197
- # final save
198
- await loop.run_in_executor(executor, self.saver.save, book_id)
199
- executor.shutdown(wait=True)
200
-
201
- logger.info(
202
- "%s Novel '%s' download completed.",
203
- TAG,
204
- book_info.get("book_name", "unknown"),
205
- )
206
- return
207
-
208
- @property
209
- def parser_workers(self) -> int:
210
- return self.config.parser_workers
211
-
212
- @property
213
- def download_workers(self) -> int:
214
- return self.config.download_workers
215
-
216
- @property
217
- def use_process_pool(self) -> bool:
218
- return self.config.use_process_pool
@@ -1,210 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.common.common_sync
4
- ----------------------------------------------------
5
-
6
- This module defines `CommonDownloader`.
7
- """
8
-
9
- import json
10
- import logging
11
- from typing import Any
12
-
13
- from novel_downloader.config import DownloaderConfig
14
- from novel_downloader.core.downloaders.base import BaseDownloader
15
- from novel_downloader.core.interfaces import (
16
- ParserProtocol,
17
- SaverProtocol,
18
- SyncRequesterProtocol,
19
- )
20
- from novel_downloader.utils.chapter_storage import ChapterStorage
21
- from novel_downloader.utils.file_utils import save_as_json, save_as_txt
22
- from novel_downloader.utils.network import download_image_as_bytes
23
- from novel_downloader.utils.time_utils import (
24
- calculate_time_difference,
25
- sleep_with_random_delay,
26
- )
27
-
28
- logger = logging.getLogger(__name__)
29
-
30
-
31
- class CommonDownloader(BaseDownloader):
32
- """
33
- Specialized downloader for common novels.
34
- """
35
-
36
- def __init__(
37
- self,
38
- requester: SyncRequesterProtocol,
39
- parser: ParserProtocol,
40
- saver: SaverProtocol,
41
- config: DownloaderConfig,
42
- site: str,
43
- ):
44
- """
45
- Initialize the common novel downloader with site information.
46
-
47
- :param requester: Object implementing RequesterProtocol, used to fetch raw data.
48
- :param parser: Object implementing ParserProtocol, used to parse page content.
49
- :param saver: Object implementing SaverProtocol, used to save final output.
50
- :param config: Downloader configuration object.
51
- :param site: Identifier for the site the downloader is targeting.
52
- """
53
- super().__init__(requester, parser, saver, config, site)
54
- self._site = site
55
- self._is_logged_in = False
56
-
57
- def prepare(self) -> None:
58
- """
59
- Perform login
60
- """
61
- if self.login_required and not self._is_logged_in:
62
- success = self.requester.login()
63
- if not success:
64
- raise RuntimeError("Login failed")
65
- self._is_logged_in = True
66
-
67
- def download_one(self, book_id: str) -> None:
68
- """
69
- The full download logic for a single book.
70
-
71
- :param book_id: The identifier of the book to download.
72
- """
73
- self.prepare()
74
-
75
- TAG = "[Downloader]"
76
- save_html = self.config.save_html
77
- skip_existing = self.config.skip_existing
78
- wait_time = self.config.request_interval
79
-
80
- raw_base = self.raw_data_dir / book_id
81
- cache_base = self.cache_dir / book_id
82
- info_path = raw_base / "book_info.json"
83
- chapters_html_dir = cache_base / "html"
84
-
85
- raw_base.mkdir(parents=True, exist_ok=True)
86
- if self.save_html:
87
- chapters_html_dir.mkdir(parents=True, exist_ok=True)
88
- normal_cs = ChapterStorage(
89
- raw_base=raw_base,
90
- namespace="chapters",
91
- backend_type=self._config.storage_backend,
92
- batch_size=self._config.storage_batch_size,
93
- )
94
-
95
- book_info: dict[str, Any]
96
-
97
- try:
98
- if not info_path.exists():
99
- raise FileNotFoundError
100
- book_info = json.loads(info_path.read_text(encoding="utf-8"))
101
- days, hrs, mins, secs = calculate_time_difference(
102
- book_info.get("update_time", ""), "UTC+8"
103
- )
104
- logger.info(
105
- "%s Last updated %dd %dh %dm %ds ago", TAG, days, hrs, mins, secs
106
- )
107
- if days > 1:
108
- raise FileNotFoundError # trigger re-fetch
109
- except Exception:
110
- info_html = self.requester.get_book_info(book_id)
111
- if save_html:
112
- for i, html in enumerate(info_html):
113
- save_as_txt(html, chapters_html_dir / f"info_{i}.html")
114
- book_info = self.parser.parse_book_info(info_html)
115
- if (
116
- book_info.get("book_name", "") != "未找到书名"
117
- and book_info.get("update_time", "") != "未找到更新时间"
118
- ):
119
- save_as_json(book_info, info_path)
120
- sleep_with_random_delay(wait_time, mul_spread=1.1, max_sleep=wait_time + 2)
121
-
122
- # download cover
123
- cover_url = book_info.get("cover_url", "")
124
- if cover_url:
125
- cover_bytes = download_image_as_bytes(cover_url, raw_base)
126
- if not cover_bytes:
127
- logger.warning("%s Failed to download cover: %s", TAG, cover_url)
128
-
129
- # enqueue chapters
130
- for vol in book_info.get("volumes", []):
131
- vol_name = vol.get("volume_name", "")
132
- logger.info("%s Enqueuing volume: %s", TAG, vol_name)
133
-
134
- for chap in vol.get("chapters", []):
135
- cid = chap.get("chapterId")
136
- if not cid:
137
- logger.warning("%s Skipping chapter without chapterId", TAG)
138
- continue
139
-
140
- if normal_cs.exists(cid) and skip_existing:
141
- logger.debug(
142
- "%s Chapter already exists, skipping: %s",
143
- TAG,
144
- cid,
145
- )
146
- continue
147
-
148
- chap_title = chap.get("title", "")
149
- logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
150
- try:
151
- chap_html = self.requester.get_book_chapter(book_id, cid)
152
-
153
- if save_html:
154
- for i, html in enumerate(chap_html):
155
- html_path = chapters_html_dir / f"{cid}_{i}.html"
156
- save_as_txt(html, html_path, on_exist="skip")
157
-
158
- chap_json = self.parser.parse_chapter(chap_html, cid)
159
-
160
- sleep_with_random_delay(
161
- wait_time, mul_spread=1.1, max_sleep=wait_time + 2
162
- )
163
- if not chap_json:
164
- logger.warning(
165
- "%s Parsed chapter json is empty, skipping: %s (%s)",
166
- TAG,
167
- chap_title,
168
- cid,
169
- )
170
- continue
171
- except Exception as e:
172
- logger.warning(
173
- "%s Error while processing chapter %s (%s): %s",
174
- TAG,
175
- chap_title,
176
- cid,
177
- str(e),
178
- )
179
- continue
180
-
181
- normal_cs.save(chap_json)
182
- logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
183
-
184
- normal_cs.close()
185
- self.saver.save(book_id)
186
-
187
- logger.info(
188
- "%s Novel '%s' download completed.",
189
- TAG,
190
- book_info.get("book_name", "unknown"),
191
- )
192
- return
193
-
194
- @property
195
- def site(self) -> str:
196
- """
197
- Get the site identifier.
198
-
199
- :return: The site string.
200
- """
201
- return self._site
202
-
203
- @site.setter
204
- def site(self, value: str) -> None:
205
- """
206
- Set the site identifier.
207
-
208
- :param value: New site string to set.
209
- """
210
- self._site = value
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.esjzone
4
- -----------------------------------------
5
-
6
- """
7
-
8
- from .esjzone_async import EsjzoneAsyncDownloader
9
- from .esjzone_sync import EsjzoneDownloader
10
-
11
- __all__ = [
12
- "EsjzoneAsyncDownloader",
13
- "EsjzoneDownloader",
14
- ]
@@ -1,27 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.esjzone.esjzone_async
4
- -------------------------------------------------------
5
-
6
- """
7
-
8
- from novel_downloader.config.models import DownloaderConfig
9
- from novel_downloader.core.downloaders.common import CommonAsyncDownloader
10
- from novel_downloader.core.interfaces import (
11
- AsyncRequesterProtocol,
12
- ParserProtocol,
13
- SaverProtocol,
14
- )
15
-
16
-
17
- class EsjzoneAsyncDownloader(CommonAsyncDownloader):
18
- """"""
19
-
20
- def __init__(
21
- self,
22
- requester: AsyncRequesterProtocol,
23
- parser: ParserProtocol,
24
- saver: SaverProtocol,
25
- config: DownloaderConfig,
26
- ):
27
- super().__init__(requester, parser, saver, config, "esjzone")
@@ -1,27 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.esjzone.esjzone_sync
4
- ------------------------------------------------------
5
-
6
- """
7
-
8
- from novel_downloader.config.models import DownloaderConfig
9
- from novel_downloader.core.downloaders.common import CommonDownloader
10
- from novel_downloader.core.interfaces import (
11
- ParserProtocol,
12
- SaverProtocol,
13
- SyncRequesterProtocol,
14
- )
15
-
16
-
17
- class EsjzoneDownloader(CommonDownloader):
18
- """"""
19
-
20
- def __init__(
21
- self,
22
- requester: SyncRequesterProtocol,
23
- parser: ParserProtocol,
24
- saver: SaverProtocol,
25
- config: DownloaderConfig,
26
- ):
27
- super().__init__(requester, parser, saver, config, "esjzone")
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.qianbi
4
- ----------------------------------------
5
-
6
- """
7
-
8
- from .qianbi_async import QianbiAsyncDownloader
9
- from .qianbi_sync import QianbiDownloader
10
-
11
- __all__ = [
12
- "QianbiAsyncDownloader",
13
- "QianbiDownloader",
14
- ]
@@ -1,27 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.qianbi.qianbi_async
4
- -----------------------------------------------------
5
-
6
- """
7
-
8
- from novel_downloader.config.models import DownloaderConfig
9
- from novel_downloader.core.downloaders.common import CommonAsyncDownloader
10
- from novel_downloader.core.interfaces import (
11
- AsyncRequesterProtocol,
12
- ParserProtocol,
13
- SaverProtocol,
14
- )
15
-
16
-
17
- class QianbiAsyncDownloader(CommonAsyncDownloader):
18
- """"""
19
-
20
- def __init__(
21
- self,
22
- requester: AsyncRequesterProtocol,
23
- parser: ParserProtocol,
24
- saver: SaverProtocol,
25
- config: DownloaderConfig,
26
- ):
27
- super().__init__(requester, parser, saver, config, "qianbi")
@@ -1,27 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.qianbi.qianbi_sync
4
- ----------------------------------------------------
5
-
6
- """
7
-
8
- from novel_downloader.config.models import DownloaderConfig
9
- from novel_downloader.core.downloaders.common import CommonDownloader
10
- from novel_downloader.core.interfaces import (
11
- ParserProtocol,
12
- SaverProtocol,
13
- SyncRequesterProtocol,
14
- )
15
-
16
-
17
- class QianbiDownloader(CommonDownloader):
18
- """"""
19
-
20
- def __init__(
21
- self,
22
- requester: SyncRequesterProtocol,
23
- parser: ParserProtocol,
24
- saver: SaverProtocol,
25
- config: DownloaderConfig,
26
- ):
27
- super().__init__(requester, parser, saver, config, "qianbi")
@@ -1,10 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.qidian
4
- ----------------------------------------
5
-
6
- """
7
-
8
- from .qidian_sync import QidianDownloader
9
-
10
- __all__ = ["QidianDownloader"]