novel-downloader 1.3.3__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/clean.py +97 -78
  3. novel_downloader/cli/config.py +177 -0
  4. novel_downloader/cli/download.py +132 -87
  5. novel_downloader/cli/export.py +77 -0
  6. novel_downloader/cli/main.py +21 -28
  7. novel_downloader/config/__init__.py +1 -25
  8. novel_downloader/config/adapter.py +32 -31
  9. novel_downloader/config/loader.py +3 -3
  10. novel_downloader/config/site_rules.py +1 -2
  11. novel_downloader/core/__init__.py +3 -6
  12. novel_downloader/core/downloaders/__init__.py +10 -13
  13. novel_downloader/core/downloaders/base.py +233 -0
  14. novel_downloader/core/downloaders/biquge.py +27 -0
  15. novel_downloader/core/downloaders/common.py +414 -0
  16. novel_downloader/core/downloaders/esjzone.py +27 -0
  17. novel_downloader/core/downloaders/linovelib.py +27 -0
  18. novel_downloader/core/downloaders/qianbi.py +27 -0
  19. novel_downloader/core/downloaders/qidian.py +352 -0
  20. novel_downloader/core/downloaders/sfacg.py +27 -0
  21. novel_downloader/core/downloaders/yamibo.py +27 -0
  22. novel_downloader/core/exporters/__init__.py +37 -0
  23. novel_downloader/core/{savers → exporters}/base.py +73 -39
  24. novel_downloader/core/exporters/biquge.py +25 -0
  25. novel_downloader/core/exporters/common/__init__.py +12 -0
  26. novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
  27. novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
  28. novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
  29. novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
  30. novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
  31. novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
  32. novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
  33. novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
  34. novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
  35. novel_downloader/core/exporters/esjzone.py +25 -0
  36. novel_downloader/core/exporters/linovelib/__init__.py +10 -0
  37. novel_downloader/core/exporters/linovelib/epub.py +449 -0
  38. novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
  39. novel_downloader/core/exporters/linovelib/txt.py +129 -0
  40. novel_downloader/core/exporters/qianbi.py +25 -0
  41. novel_downloader/core/{savers → exporters}/qidian.py +8 -8
  42. novel_downloader/core/exporters/sfacg.py +25 -0
  43. novel_downloader/core/exporters/yamibo.py +25 -0
  44. novel_downloader/core/factory/__init__.py +5 -17
  45. novel_downloader/core/factory/downloader.py +24 -126
  46. novel_downloader/core/factory/exporter.py +58 -0
  47. novel_downloader/core/factory/fetcher.py +96 -0
  48. novel_downloader/core/factory/parser.py +17 -12
  49. novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
  50. novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
  51. novel_downloader/core/fetchers/base/browser.py +383 -0
  52. novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
  53. novel_downloader/core/fetchers/base/session.py +419 -0
  54. novel_downloader/core/fetchers/biquge/__init__.py +14 -0
  55. novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
  56. novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
  57. novel_downloader/core/fetchers/common/__init__.py +14 -0
  58. novel_downloader/core/fetchers/common/browser.py +79 -0
  59. novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
  60. novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
  61. novel_downloader/core/fetchers/esjzone/browser.py +202 -0
  62. novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
  63. novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
  64. novel_downloader/core/fetchers/linovelib/browser.py +193 -0
  65. novel_downloader/core/fetchers/linovelib/session.py +193 -0
  66. novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
  67. novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
  68. novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
  69. novel_downloader/core/fetchers/qidian/__init__.py +14 -0
  70. novel_downloader/core/fetchers/qidian/browser.py +266 -0
  71. novel_downloader/core/fetchers/qidian/session.py +326 -0
  72. novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
  73. novel_downloader/core/fetchers/sfacg/browser.py +189 -0
  74. novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
  75. novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
  76. novel_downloader/core/fetchers/yamibo/browser.py +229 -0
  77. novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
  78. novel_downloader/core/interfaces/__init__.py +8 -12
  79. novel_downloader/core/interfaces/downloader.py +54 -0
  80. novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
  81. novel_downloader/core/interfaces/fetcher.py +162 -0
  82. novel_downloader/core/interfaces/parser.py +6 -7
  83. novel_downloader/core/parsers/__init__.py +5 -6
  84. novel_downloader/core/parsers/base.py +9 -13
  85. novel_downloader/core/parsers/biquge/main_parser.py +12 -13
  86. novel_downloader/core/parsers/common/helper.py +3 -3
  87. novel_downloader/core/parsers/common/main_parser.py +39 -34
  88. novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
  89. novel_downloader/core/parsers/linovelib/__init__.py +10 -0
  90. novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
  91. novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
  92. novel_downloader/core/parsers/qidian/__init__.py +2 -11
  93. novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
  94. novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
  95. novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
  96. novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
  97. novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
  98. novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
  99. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
  100. novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
  101. novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
  102. novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
  103. novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
  104. novel_downloader/locales/en.json +18 -2
  105. novel_downloader/locales/zh.json +18 -2
  106. novel_downloader/models/__init__.py +64 -0
  107. novel_downloader/models/browser.py +21 -0
  108. novel_downloader/models/chapter.py +25 -0
  109. novel_downloader/models/config.py +100 -0
  110. novel_downloader/models/login.py +20 -0
  111. novel_downloader/models/site_rules.py +99 -0
  112. novel_downloader/models/tasks.py +33 -0
  113. novel_downloader/models/types.py +15 -0
  114. novel_downloader/resources/config/settings.toml +31 -25
  115. novel_downloader/resources/json/linovelib_font_map.json +3573 -0
  116. novel_downloader/tui/__init__.py +7 -0
  117. novel_downloader/tui/app.py +32 -0
  118. novel_downloader/tui/main.py +17 -0
  119. novel_downloader/tui/screens/__init__.py +14 -0
  120. novel_downloader/tui/screens/home.py +191 -0
  121. novel_downloader/tui/screens/login.py +74 -0
  122. novel_downloader/tui/styles/home_layout.tcss +79 -0
  123. novel_downloader/tui/widgets/richlog_handler.py +24 -0
  124. novel_downloader/utils/__init__.py +6 -0
  125. novel_downloader/utils/chapter_storage.py +25 -38
  126. novel_downloader/utils/constants.py +11 -5
  127. novel_downloader/utils/cookies.py +66 -0
  128. novel_downloader/utils/crypto_utils.py +1 -74
  129. novel_downloader/utils/fontocr/ocr_v1.py +2 -1
  130. novel_downloader/utils/fontocr/ocr_v2.py +2 -2
  131. novel_downloader/utils/hash_store.py +10 -18
  132. novel_downloader/utils/hash_utils.py +3 -2
  133. novel_downloader/utils/logger.py +2 -3
  134. novel_downloader/utils/network.py +2 -1
  135. novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
  136. novel_downloader/utils/text_utils/font_mapping.py +1 -1
  137. novel_downloader/utils/text_utils/text_cleaning.py +1 -1
  138. novel_downloader/utils/time_utils/datetime_utils.py +3 -3
  139. novel_downloader/utils/time_utils/sleep_utils.py +1 -1
  140. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/METADATA +69 -35
  141. novel_downloader-1.4.1.dist-info/RECORD +170 -0
  142. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/WHEEL +1 -1
  143. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/entry_points.txt +1 -0
  144. novel_downloader/cli/interactive.py +0 -66
  145. novel_downloader/cli/settings.py +0 -177
  146. novel_downloader/config/models.py +0 -187
  147. novel_downloader/core/downloaders/base/__init__.py +0 -14
  148. novel_downloader/core/downloaders/base/base_async.py +0 -153
  149. novel_downloader/core/downloaders/base/base_sync.py +0 -208
  150. novel_downloader/core/downloaders/biquge/__init__.py +0 -14
  151. novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
  152. novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
  153. novel_downloader/core/downloaders/common/__init__.py +0 -14
  154. novel_downloader/core/downloaders/common/common_async.py +0 -210
  155. novel_downloader/core/downloaders/common/common_sync.py +0 -202
  156. novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
  157. novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
  158. novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
  159. novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
  160. novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
  161. novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
  162. novel_downloader/core/downloaders/qidian/__init__.py +0 -10
  163. novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
  164. novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
  165. novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
  166. novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
  167. novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
  168. novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
  169. novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
  170. novel_downloader/core/factory/requester.py +0 -144
  171. novel_downloader/core/factory/saver.py +0 -56
  172. novel_downloader/core/interfaces/async_downloader.py +0 -36
  173. novel_downloader/core/interfaces/async_requester.py +0 -84
  174. novel_downloader/core/interfaces/sync_downloader.py +0 -36
  175. novel_downloader/core/interfaces/sync_requester.py +0 -82
  176. novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
  177. novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
  178. novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
  179. novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
  180. novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
  181. novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
  182. novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
  183. novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
  184. novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
  185. novel_downloader/core/requesters/base/async_session.py +0 -410
  186. novel_downloader/core/requesters/base/browser.py +0 -337
  187. novel_downloader/core/requesters/base/session.py +0 -378
  188. novel_downloader/core/requesters/biquge/__init__.py +0 -14
  189. novel_downloader/core/requesters/common/__init__.py +0 -17
  190. novel_downloader/core/requesters/common/session.py +0 -113
  191. novel_downloader/core/requesters/esjzone/__init__.py +0 -13
  192. novel_downloader/core/requesters/esjzone/session.py +0 -235
  193. novel_downloader/core/requesters/qianbi/__init__.py +0 -13
  194. novel_downloader/core/requesters/qidian/__init__.py +0 -21
  195. novel_downloader/core/requesters/qidian/broswer.py +0 -307
  196. novel_downloader/core/requesters/qidian/session.py +0 -290
  197. novel_downloader/core/requesters/sfacg/__init__.py +0 -13
  198. novel_downloader/core/requesters/sfacg/session.py +0 -242
  199. novel_downloader/core/requesters/yamibo/__init__.py +0 -13
  200. novel_downloader/core/requesters/yamibo/session.py +0 -237
  201. novel_downloader/core/savers/__init__.py +0 -34
  202. novel_downloader/core/savers/biquge.py +0 -25
  203. novel_downloader/core/savers/common/__init__.py +0 -12
  204. novel_downloader/core/savers/esjzone.py +0 -25
  205. novel_downloader/core/savers/qianbi.py +0 -25
  206. novel_downloader/core/savers/sfacg.py +0 -25
  207. novel_downloader/core/savers/yamibo.py +0 -25
  208. novel_downloader/resources/config/rules.toml +0 -196
  209. novel_downloader-1.3.3.dist-info/RECORD +0 -166
  210. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/licenses/LICENSE +0 -0
  211. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/top_level.txt +0 -0
@@ -1,153 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.base.base_async
4
- -------------------------------------------------
5
-
6
- Defines the abstract base class `BaseAsyncDownloader`, which provides a
7
- common interface and reusable logic for all downloader implementations.
8
- """
9
-
10
- import abc
11
- import logging
12
- from pathlib import Path
13
-
14
- from novel_downloader.config import DownloaderConfig
15
- from novel_downloader.core.interfaces import (
16
- AsyncDownloaderProtocol,
17
- AsyncRequesterProtocol,
18
- ParserProtocol,
19
- SaverProtocol,
20
- )
21
-
22
-
23
- class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
24
- """
25
- Abstract downloader that defines the initialization interface
26
- and the general batch download flow.
27
-
28
- Subclasses must implement the logic for downloading a single book.
29
- """
30
-
31
- def __init__(
32
- self,
33
- requester: AsyncRequesterProtocol,
34
- parser: ParserProtocol,
35
- saver: SaverProtocol,
36
- config: DownloaderConfig,
37
- site: str,
38
- ):
39
- self._requester = requester
40
- self._parser = parser
41
- self._saver = saver
42
- self._config = config
43
- self._site = site
44
-
45
- self._raw_data_dir = Path(config.raw_data_dir) / site
46
- self._cache_dir = Path(config.cache_dir) / site
47
- self._raw_data_dir.mkdir(parents=True, exist_ok=True)
48
- self._cache_dir.mkdir(parents=True, exist_ok=True)
49
-
50
- self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
51
-
52
- async def download(self, book_ids: list[str]) -> None:
53
- """
54
- The general batch download process:
55
- 1. Iterate over all book IDs
56
- 2. For each ID, call `download_one()`
57
-
58
- :param book_ids: A list of book identifiers to download.
59
- """
60
- await self.prepare()
61
-
62
- # 2) batch download
63
- for idx, book_id in enumerate(book_ids, start=1):
64
- self.logger.debug(
65
- "[%s] Starting download for %r (%s/%s)",
66
- self.__class__.__name__,
67
- book_id,
68
- idx,
69
- len(book_ids),
70
- )
71
- try:
72
- await self.download_one(book_id)
73
- except Exception as e:
74
- self._handle_download_exception(book_id, e)
75
-
76
- @abc.abstractmethod
77
- async def download_one(self, book_id: str) -> None:
78
- """
79
- The full download logic for a single book.
80
-
81
- Subclasses must implement this method.
82
-
83
- :param book_id: The identifier of the book to download.
84
- """
85
- ...
86
-
87
- async def prepare(self) -> None:
88
- """
89
- Optional hook called before downloading each book.
90
-
91
- Subclasses can override this method to perform pre-download setup.
92
- """
93
- return
94
-
95
- @property
96
- def requester(self) -> AsyncRequesterProtocol:
97
- return self._requester
98
-
99
- @property
100
- def parser(self) -> ParserProtocol:
101
- return self._parser
102
-
103
- @property
104
- def saver(self) -> SaverProtocol:
105
- return self._saver
106
-
107
- @property
108
- def config(self) -> DownloaderConfig:
109
- return self._config
110
-
111
- @property
112
- def raw_data_dir(self) -> Path:
113
- return self._raw_data_dir
114
-
115
- @property
116
- def cache_dir(self) -> Path:
117
- return self._cache_dir
118
-
119
- @property
120
- def site(self) -> str:
121
- return self._site
122
-
123
- @property
124
- def save_html(self) -> bool:
125
- return self._config.save_html
126
-
127
- @property
128
- def skip_existing(self) -> bool:
129
- return self._config.skip_existing
130
-
131
- @property
132
- def login_required(self) -> bool:
133
- return self._config.login_required
134
-
135
- @property
136
- def request_interval(self) -> float:
137
- return self._config.request_interval
138
-
139
- def _handle_download_exception(self, book_id: str, error: Exception) -> None:
140
- """
141
- Handle download errors in a consistent way.
142
-
143
- This method can be overridden or extended to implement retry logic, etc.
144
-
145
- :param book_id: The ID of the book that failed.
146
- :param error: The exception raised during download.
147
- """
148
- self.logger.warning(
149
- "[%s] Failed to download %r: %s",
150
- self.__class__.__name__,
151
- book_id,
152
- error,
153
- )
@@ -1,208 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.base.base_sync
4
- ------------------------------------------------
5
-
6
- Defines the abstract base class `BaseDownloader`, which provides a
7
- common interface and reusable logic for all downloader implementations.
8
- """
9
-
10
- import abc
11
- import logging
12
- from pathlib import Path
13
-
14
- from novel_downloader.config import DownloaderConfig
15
- from novel_downloader.core.interfaces import (
16
- ParserProtocol,
17
- SaverProtocol,
18
- SyncDownloaderProtocol,
19
- SyncRequesterProtocol,
20
- )
21
-
22
-
23
- class BaseDownloader(SyncDownloaderProtocol, abc.ABC):
24
- """
25
- Abstract downloader that defines the initialization interface
26
- and the general batch download flow.
27
-
28
- Subclasses must implement the logic for downloading a single book.
29
- """
30
-
31
- def __init__(
32
- self,
33
- requester: SyncRequesterProtocol,
34
- parser: ParserProtocol,
35
- saver: SaverProtocol,
36
- config: DownloaderConfig,
37
- site: str,
38
- ):
39
- """
40
- Initialize the downloader with its components.
41
-
42
- :param requester: Object implementing RequesterProtocol, used to fetch raw data.
43
- :param parser: Object implementing ParserProtocol, used to parse page content.
44
- :param saver: Object implementing SaverProtocol, used to save final output.
45
- :param config: Downloader configuration object.
46
- """
47
- self._requester = requester
48
- self._parser = parser
49
- self._saver = saver
50
- self._config = config
51
- self._site = site
52
-
53
- self._raw_data_dir = Path(config.raw_data_dir) / site
54
- self._cache_dir = Path(config.cache_dir) / site
55
- self._raw_data_dir.mkdir(parents=True, exist_ok=True)
56
- self._cache_dir.mkdir(parents=True, exist_ok=True)
57
-
58
- self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
59
-
60
- def download(self, book_ids: list[str]) -> None:
61
- """
62
- The general batch download process:
63
- 1. Iterate over all book IDs
64
- 2. For each ID, call `download_one()`
65
-
66
- :param book_ids: A list of book identifiers to download.
67
- """
68
- self.prepare()
69
-
70
- for idx, book_id in enumerate(book_ids, start=1):
71
- self.logger.debug(
72
- "[downloader] Starting download for book_id: %s (%s/%s)",
73
- book_id,
74
- idx,
75
- len(book_ids),
76
- )
77
- try:
78
- self.download_one(book_id)
79
- except Exception as e:
80
- self._handle_download_exception(book_id, e)
81
-
82
- @abc.abstractmethod
83
- def download_one(self, book_id: str) -> None:
84
- """
85
- The full download logic for a single book.
86
-
87
- Subclasses must implement this method.
88
-
89
- :param book_id: The identifier of the book to download.
90
- """
91
- ...
92
-
93
- def prepare(self) -> None:
94
- """
95
- Optional hook called before downloading each book.
96
-
97
- Subclasses can override this method to perform pre-download setup.
98
- """
99
- return
100
-
101
- @property
102
- def requester(self) -> SyncRequesterProtocol:
103
- """
104
- Access the current requester.
105
-
106
- :return: The internal requester instance.
107
- """
108
- return self._requester
109
-
110
- @property
111
- def parser(self) -> ParserProtocol:
112
- """
113
- Access the current parser.
114
-
115
- :return: The internal parser instance.
116
- """
117
- return self._parser
118
-
119
- @property
120
- def saver(self) -> SaverProtocol:
121
- """
122
- Access the current saver.
123
-
124
- :return: The internal saver instance.
125
- """
126
- return self._saver
127
-
128
- @property
129
- def config(self) -> DownloaderConfig:
130
- """
131
- Access the downloader configuration.
132
-
133
- :return: The internal DownloaderConfig object.
134
- """
135
- return self._config
136
-
137
- @property
138
- def raw_data_dir(self) -> Path:
139
- """
140
- Access the root directory for storing raw downloaded data.
141
-
142
- :return: Path to the raw data directory.
143
- """
144
- return self._raw_data_dir
145
-
146
- @property
147
- def cache_dir(self) -> Path:
148
- """
149
- Access the directory used for temporary caching during download.
150
-
151
- :return: Path to the cache directory.
152
- """
153
- return self._cache_dir
154
-
155
- @property
156
- def site(self) -> str:
157
- return self._site
158
-
159
- @property
160
- def save_html(self) -> bool:
161
- return self._config.save_html
162
-
163
- @property
164
- def skip_existing(self) -> bool:
165
- return self._config.skip_existing
166
-
167
- @property
168
- def login_required(self) -> bool:
169
- return self._config.login_required
170
-
171
- @property
172
- def request_interval(self) -> float:
173
- return self._config.request_interval
174
-
175
- def set_requester(self, requester: SyncRequesterProtocol) -> None:
176
- """
177
- Replace the requester instance with a new one.
178
-
179
- :param requester: The new requester to be used.
180
- """
181
- self._requester = requester
182
-
183
- def set_parser(self, parser: ParserProtocol) -> None:
184
- """
185
- Replace the parser instance with a new one.
186
-
187
- :param parser: The new parser to be used.
188
- """
189
- self._parser = parser
190
-
191
- def set_saver(self, saver: SaverProtocol) -> None:
192
- """
193
- Replace the saver instance with a new one.
194
-
195
- :param saver: The new saver to be used.
196
- """
197
- self._saver = saver
198
-
199
- def _handle_download_exception(self, book_id: str, error: Exception) -> None:
200
- """
201
- Handle download errors in a consistent way.
202
-
203
- This method can be overridden or extended to implement retry logic, etc.
204
-
205
- :param book_id: The ID of the book that failed.
206
- :param error: The exception raised during download.
207
- """
208
- self.logger.warning("[downloader] Failed to download %s: %s", book_id, error)
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.biquge
4
- ----------------------------------------
5
-
6
- """
7
-
8
- from .biquge_async import BiqugeAsyncDownloader
9
- from .biquge_sync import BiqugeDownloader
10
-
11
- __all__ = [
12
- "BiqugeAsyncDownloader",
13
- "BiqugeDownloader",
14
- ]
@@ -1,27 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.biquge.biquge_async
4
- -----------------------------------------------------
5
-
6
- """
7
-
8
- from novel_downloader.config.models import DownloaderConfig
9
- from novel_downloader.core.downloaders.common import CommonAsyncDownloader
10
- from novel_downloader.core.interfaces import (
11
- AsyncRequesterProtocol,
12
- ParserProtocol,
13
- SaverProtocol,
14
- )
15
-
16
-
17
- class BiqugeAsyncDownloader(CommonAsyncDownloader):
18
- """"""
19
-
20
- def __init__(
21
- self,
22
- requester: AsyncRequesterProtocol,
23
- parser: ParserProtocol,
24
- saver: SaverProtocol,
25
- config: DownloaderConfig,
26
- ):
27
- super().__init__(requester, parser, saver, config, "biquge")
@@ -1,27 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.biquge.biquge_sync
4
- ----------------------------------------------------
5
-
6
- """
7
-
8
- from novel_downloader.config.models import DownloaderConfig
9
- from novel_downloader.core.downloaders.common import CommonDownloader
10
- from novel_downloader.core.interfaces import (
11
- ParserProtocol,
12
- SaverProtocol,
13
- SyncRequesterProtocol,
14
- )
15
-
16
-
17
- class BiqugeDownloader(CommonDownloader):
18
- """"""
19
-
20
- def __init__(
21
- self,
22
- requester: SyncRequesterProtocol,
23
- parser: ParserProtocol,
24
- saver: SaverProtocol,
25
- config: DownloaderConfig,
26
- ):
27
- super().__init__(requester, parser, saver, config, "biquge")
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.common
4
- ----------------------------------------
5
-
6
- """
7
-
8
- from .common_async import CommonAsyncDownloader
9
- from .common_sync import CommonDownloader
10
-
11
- __all__ = [
12
- "CommonAsyncDownloader",
13
- "CommonDownloader",
14
- ]
@@ -1,210 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.downloaders.common.common_async
4
- -----------------------------------------------------
5
-
6
- """
7
-
8
- import asyncio
9
- import json
10
- import logging
11
- from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
12
- from typing import Any
13
-
14
- from novel_downloader.config import DownloaderConfig
15
- from novel_downloader.core.downloaders.base import BaseAsyncDownloader
16
- from novel_downloader.core.interfaces import (
17
- AsyncRequesterProtocol,
18
- ParserProtocol,
19
- SaverProtocol,
20
- )
21
- from novel_downloader.utils.chapter_storage import ChapterDict, ChapterStorage
22
- from novel_downloader.utils.file_utils import save_as_json, save_as_txt
23
- from novel_downloader.utils.time_utils import calculate_time_difference
24
-
25
- logger = logging.getLogger(__name__)
26
-
27
-
28
- class CommonAsyncDownloader(BaseAsyncDownloader):
29
- """
30
- Specialized Async downloader for common novels.
31
- """
32
-
33
- def __init__(
34
- self,
35
- requester: AsyncRequesterProtocol,
36
- parser: ParserProtocol,
37
- saver: SaverProtocol,
38
- config: DownloaderConfig,
39
- site: str,
40
- ):
41
- """ """
42
- super().__init__(requester, parser, saver, config, site)
43
- self._is_logged_in = False
44
-
45
- async def prepare(self) -> None:
46
- """
47
- Perform login
48
- """
49
- if self.login_required and not self._is_logged_in:
50
- success = await self.requester.login()
51
- if not success:
52
- raise RuntimeError("Login failed")
53
- self._is_logged_in = True
54
-
55
- async def download_one(self, book_id: str) -> None:
56
- """
57
- The full download logic for a single book.
58
-
59
- :param book_id: The identifier of the book to download.
60
- """
61
- assert isinstance(self.requester, AsyncRequesterProtocol)
62
- await self.prepare()
63
-
64
- TAG = "[AsyncDownloader]"
65
- wait_time = self.config.request_interval
66
-
67
- raw_base = self.raw_data_dir / book_id
68
- cache_base = self.cache_dir / book_id
69
- info_path = raw_base / "book_info.json"
70
- chapters_html_dir = cache_base / "html"
71
-
72
- raw_base.mkdir(parents=True, exist_ok=True)
73
- if self.save_html:
74
- chapters_html_dir.mkdir(parents=True, exist_ok=True)
75
- normal_cs = ChapterStorage(
76
- raw_base=raw_base,
77
- namespace="chapters",
78
- backend_type=self._config.storage_backend,
79
- batch_size=self._config.storage_batch_size,
80
- )
81
-
82
- # load or fetch book_info
83
- book_info: dict[str, Any]
84
- re_fetch = True
85
- if info_path.exists():
86
- try:
87
- data = json.loads(info_path.read_text("utf-8"))
88
- days, *_ = calculate_time_difference(
89
- data.get("update_time", ""), "UTC+8"
90
- )
91
- re_fetch = days > 1
92
- except Exception:
93
- re_fetch = True
94
-
95
- if re_fetch:
96
- info_html = await self.requester.get_book_info(book_id)
97
- if self.save_html:
98
- for i, html in enumerate(info_html):
99
- save_as_txt(html, chapters_html_dir / f"info_{i}.html")
100
- book_info = self.parser.parse_book_info(info_html)
101
- if book_info.get("book_name") != "未找到书名":
102
- save_as_json(book_info, info_path)
103
- else:
104
- logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
105
- await asyncio.sleep(wait_time)
106
- else:
107
- book_info = json.loads(info_path.read_text("utf-8"))
108
-
109
- # setup queue, semaphore, executor
110
- semaphore = asyncio.Semaphore(self.download_workers)
111
- queue: asyncio.Queue[tuple[str, list[str]]] = asyncio.Queue()
112
- save_queue: asyncio.Queue[ChapterDict] = asyncio.Queue()
113
- loop = asyncio.get_running_loop()
114
- executor = (
115
- ProcessPoolExecutor() if self.use_process_pool else ThreadPoolExecutor()
116
- )
117
-
118
- async def parser_worker(worker_id: int) -> None:
119
- while True:
120
- cid, html = await queue.get()
121
- try:
122
- chap_json = await loop.run_in_executor(
123
- executor, self.parser.parse_chapter, html, cid
124
- )
125
- if chap_json:
126
- await save_queue.put(chap_json)
127
- logger.info(
128
- "%s [Parser-%d] saved chapter %s", TAG, worker_id, cid
129
- )
130
- except Exception as e:
131
- logger.error(
132
- "%s [Parser-%d] error on chapter %s: %s", TAG, worker_id, cid, e
133
- )
134
- finally:
135
- queue.task_done()
136
-
137
- async def saver_loop(
138
- cs: ChapterStorage,
139
- queue: asyncio.Queue[ChapterDict],
140
- ) -> None:
141
- while True:
142
- data = await queue.get()
143
- try:
144
- cs.save(data)
145
- except Exception as e:
146
- logger.error(
147
- "[saver] Error saving chapter %s: %s",
148
- data.get("id"),
149
- e,
150
- )
151
- finally:
152
- queue.task_done()
153
-
154
- async def download_worker(chap: dict[str, Any]) -> None:
155
- cid = str(chap.get("chapterId") or "")
156
- if not cid:
157
- return
158
- if normal_cs.exists(cid) and self.skip_existing:
159
- logger.info("%s skipping existing chapter %s", TAG, cid)
160
- return
161
-
162
- try:
163
- async with semaphore:
164
- html = await self.requester.get_book_chapter(book_id, cid)
165
- await queue.put((cid, html))
166
- logger.info("%s downloaded chapter %s", TAG, cid)
167
- except Exception as e:
168
- logger.error("%s error downloading %s: %s", TAG, cid, e)
169
-
170
- # start parser workers
171
- parsers = [
172
- asyncio.create_task(parser_worker(i)) for i in range(self.parser_workers)
173
- ]
174
- chapter_saver = asyncio.create_task(saver_loop(normal_cs, save_queue))
175
-
176
- # enqueue + run downloads
177
- download_tasks = []
178
- for vol in book_info.get("volumes", []):
179
- for chap in vol.get("chapters", []):
180
- download_tasks.append(asyncio.create_task(download_worker(chap)))
181
-
182
- await asyncio.gather(*download_tasks)
183
- await queue.join() # wait until all parsed
184
- await save_queue.join()
185
- for p in parsers:
186
- p.cancel() # stop parser loops
187
- chapter_saver.cancel()
188
-
189
- # final save
190
- await loop.run_in_executor(executor, self.saver.save, book_id)
191
- executor.shutdown(wait=True)
192
-
193
- logger.info(
194
- "%s Novel '%s' download completed.",
195
- TAG,
196
- book_info.get("book_name", "unknown"),
197
- )
198
- return
199
-
200
- @property
201
- def parser_workers(self) -> int:
202
- return self.config.parser_workers
203
-
204
- @property
205
- def download_workers(self) -> int:
206
- return self.config.download_workers
207
-
208
- @property
209
- def use_process_pool(self) -> bool:
210
- return self.config.use_process_pool