novel-downloader 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. novel_downloader/__init__.py +14 -0
  2. novel_downloader/cli/__init__.py +14 -0
  3. novel_downloader/cli/clean.py +134 -0
  4. novel_downloader/cli/download.py +132 -0
  5. novel_downloader/cli/interactive.py +67 -0
  6. novel_downloader/cli/main.py +45 -0
  7. novel_downloader/cli/settings.py +177 -0
  8. novel_downloader/config/__init__.py +52 -0
  9. novel_downloader/config/adapter.py +153 -0
  10. novel_downloader/config/loader.py +177 -0
  11. novel_downloader/config/models.py +173 -0
  12. novel_downloader/config/site_rules.py +97 -0
  13. novel_downloader/core/__init__.py +25 -0
  14. novel_downloader/core/downloaders/__init__.py +22 -0
  15. novel_downloader/core/downloaders/base_async_downloader.py +157 -0
  16. novel_downloader/core/downloaders/base_downloader.py +187 -0
  17. novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
  18. novel_downloader/core/downloaders/common_downloader.py +191 -0
  19. novel_downloader/core/downloaders/qidian_downloader.py +208 -0
  20. novel_downloader/core/factory/__init__.py +33 -0
  21. novel_downloader/core/factory/downloader_factory.py +149 -0
  22. novel_downloader/core/factory/parser_factory.py +62 -0
  23. novel_downloader/core/factory/requester_factory.py +106 -0
  24. novel_downloader/core/factory/saver_factory.py +49 -0
  25. novel_downloader/core/interfaces/__init__.py +32 -0
  26. novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
  27. novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
  28. novel_downloader/core/interfaces/downloader_protocol.py +37 -0
  29. novel_downloader/core/interfaces/parser_protocol.py +40 -0
  30. novel_downloader/core/interfaces/requester_protocol.py +65 -0
  31. novel_downloader/core/interfaces/saver_protocol.py +61 -0
  32. novel_downloader/core/parsers/__init__.py +28 -0
  33. novel_downloader/core/parsers/base_parser.py +96 -0
  34. novel_downloader/core/parsers/common_parser/__init__.py +14 -0
  35. novel_downloader/core/parsers/common_parser/helper.py +321 -0
  36. novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
  37. novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
  38. novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
  39. novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
  40. novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
  41. novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
  42. novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
  43. novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
  44. novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
  45. novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
  46. novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
  47. novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
  48. novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
  49. novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
  50. novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
  51. novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
  52. novel_downloader/core/requesters/__init__.py +31 -0
  53. novel_downloader/core/requesters/base_async_session.py +297 -0
  54. novel_downloader/core/requesters/base_browser.py +210 -0
  55. novel_downloader/core/requesters/base_session.py +243 -0
  56. novel_downloader/core/requesters/common_requester/__init__.py +18 -0
  57. novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
  58. novel_downloader/core/requesters/common_requester/common_session.py +126 -0
  59. novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
  60. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
  61. novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
  62. novel_downloader/core/savers/__init__.py +20 -0
  63. novel_downloader/core/savers/base_saver.py +169 -0
  64. novel_downloader/core/savers/common_saver/__init__.py +13 -0
  65. novel_downloader/core/savers/common_saver/common_epub.py +232 -0
  66. novel_downloader/core/savers/common_saver/common_txt.py +176 -0
  67. novel_downloader/core/savers/common_saver/main_saver.py +86 -0
  68. novel_downloader/core/savers/epub_utils/__init__.py +27 -0
  69. novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
  70. novel_downloader/core/savers/epub_utils/initializer.py +98 -0
  71. novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
  72. novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
  73. novel_downloader/core/savers/qidian_saver.py +22 -0
  74. novel_downloader/locales/en.json +91 -0
  75. novel_downloader/locales/zh.json +91 -0
  76. novel_downloader/resources/config/rules.toml +196 -0
  77. novel_downloader/resources/config/settings.yaml +73 -0
  78. novel_downloader/resources/css_styles/main.css +104 -0
  79. novel_downloader/resources/css_styles/volume-intro.css +56 -0
  80. novel_downloader/resources/images/volume_border.png +0 -0
  81. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
  82. novel_downloader/resources/json/replace_word_map.json +4 -0
  83. novel_downloader/resources/text/blacklist.txt +22 -0
  84. novel_downloader/utils/__init__.py +0 -0
  85. novel_downloader/utils/cache.py +24 -0
  86. novel_downloader/utils/constants.py +158 -0
  87. novel_downloader/utils/crypto_utils.py +144 -0
  88. novel_downloader/utils/file_utils/__init__.py +43 -0
  89. novel_downloader/utils/file_utils/io.py +252 -0
  90. novel_downloader/utils/file_utils/normalize.py +68 -0
  91. novel_downloader/utils/file_utils/sanitize.py +77 -0
  92. novel_downloader/utils/fontocr/__init__.py +23 -0
  93. novel_downloader/utils/fontocr/ocr_v1.py +304 -0
  94. novel_downloader/utils/fontocr/ocr_v2.py +658 -0
  95. novel_downloader/utils/hash_store.py +288 -0
  96. novel_downloader/utils/hash_utils.py +103 -0
  97. novel_downloader/utils/i18n.py +41 -0
  98. novel_downloader/utils/logger.py +104 -0
  99. novel_downloader/utils/model_loader.py +72 -0
  100. novel_downloader/utils/network.py +287 -0
  101. novel_downloader/utils/state.py +156 -0
  102. novel_downloader/utils/text_utils/__init__.py +27 -0
  103. novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
  104. novel_downloader/utils/text_utils/diff_display.py +75 -0
  105. novel_downloader/utils/text_utils/font_mapping.py +31 -0
  106. novel_downloader/utils/text_utils/text_cleaning.py +57 -0
  107. novel_downloader/utils/time_utils/__init__.py +22 -0
  108. novel_downloader/utils/time_utils/datetime_utils.py +146 -0
  109. novel_downloader/utils/time_utils/sleep_utils.py +49 -0
  110. novel_downloader-1.1.0.dist-info/METADATA +157 -0
  111. novel_downloader-1.1.0.dist-info/RECORD +115 -0
  112. novel_downloader-1.1.0.dist-info/WHEEL +5 -0
  113. novel_downloader-1.1.0.dist-info/entry_points.txt +2 -0
  114. novel_downloader-1.1.0.dist-info/licenses/LICENSE +21 -0
  115. novel_downloader-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.downloaders.base_async_downloader
5
+ -------------------------------------------------------
6
+
7
+ Defines the abstract base class `BaseAsyncDownloader`, which provides a
8
+ common interface and reusable logic for all downloader implementations.
9
+ """
10
+
11
+ import abc
12
+ import logging
13
+ from pathlib import Path
14
+ from typing import List
15
+
16
+ from novel_downloader.config import DownloaderConfig
17
+ from novel_downloader.core.interfaces import (
18
+ AsyncRequesterProtocol,
19
+ ParserProtocol,
20
+ SaverProtocol,
21
+ )
22
+ from novel_downloader.core.interfaces.async_downloader_protocol import (
23
+ AsyncDownloaderProtocol,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
30
+ """
31
+ Abstract downloader that defines the initialization interface
32
+ and the general batch download flow.
33
+
34
+ Subclasses must implement the logic for downloading a single book.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ requester: AsyncRequesterProtocol,
40
+ parser: ParserProtocol,
41
+ saver: SaverProtocol,
42
+ config: DownloaderConfig,
43
+ site: str,
44
+ ):
45
+ self._requester = requester
46
+ self._parser = parser
47
+ self._saver = saver
48
+ self._config = config
49
+ self._site = site
50
+
51
+ self._raw_data_dir = Path(config.raw_data_dir) / site
52
+ self._cache_dir = Path(config.cache_dir) / site
53
+ self._raw_data_dir.mkdir(parents=True, exist_ok=True)
54
+ self._cache_dir.mkdir(parents=True, exist_ok=True)
55
+
56
+ @property
57
+ def requester(self) -> AsyncRequesterProtocol:
58
+ return self._requester
59
+
60
+ @property
61
+ def parser(self) -> ParserProtocol:
62
+ return self._parser
63
+
64
+ @property
65
+ def saver(self) -> SaverProtocol:
66
+ return self._saver
67
+
68
+ @property
69
+ def config(self) -> DownloaderConfig:
70
+ return self._config
71
+
72
+ @property
73
+ def raw_data_dir(self) -> Path:
74
+ return self._raw_data_dir
75
+
76
+ @property
77
+ def cache_dir(self) -> Path:
78
+ return self._cache_dir
79
+
80
+ @property
81
+ def site(self) -> str:
82
+ return self._site
83
+
84
+ @property
85
+ def save_html(self) -> bool:
86
+ return self._config.save_html
87
+
88
+ @property
89
+ def skip_existing(self) -> bool:
90
+ return self._config.skip_existing
91
+
92
+ @property
93
+ def login_required(self) -> bool:
94
+ return self._config.login_required
95
+
96
+ @property
97
+ def request_interval(self) -> int:
98
+ return self._config.request_interval
99
+
100
+ async def prepare(self) -> None:
101
+ """
102
+ Optional hook called before downloading each book.
103
+
104
+ Subclasses can override this method to perform pre-download setup.
105
+ """
106
+ return
107
+
108
+ async def download(self, book_ids: List[str]) -> None:
109
+ """
110
+ The general batch download process:
111
+ 1. Iterate over all book IDs
112
+ 2. For each ID, call `download_one()`
113
+
114
+ :param book_ids: A list of book identifiers to download.
115
+ """
116
+ await self.prepare()
117
+
118
+ # 2) batch download
119
+ for idx, book_id in enumerate(book_ids, start=1):
120
+ logger.debug(
121
+ "[%s] Starting download for %r (%s/%s)",
122
+ self.__class__.__name__,
123
+ book_id,
124
+ idx,
125
+ len(book_ids),
126
+ )
127
+ try:
128
+ await self.download_one(book_id)
129
+ except Exception as e:
130
+ self._handle_download_exception(book_id, e)
131
+
132
+ @abc.abstractmethod
133
+ async def download_one(self, book_id: str) -> None:
134
+ """
135
+ The full download logic for a single book.
136
+
137
+ Subclasses must implement this method.
138
+
139
+ :param book_id: The identifier of the book to download.
140
+ """
141
+ ...
142
+
143
+ def _handle_download_exception(self, book_id: str, error: Exception) -> None:
144
+ """
145
+ Handle download errors in a consistent way.
146
+
147
+ This method can be overridden or extended to implement retry logic, etc.
148
+
149
+ :param book_id: The ID of the book that failed.
150
+ :param error: The exception raised during download.
151
+ """
152
+ logger.warning(
153
+ "[%s] Failed to download %r: %s",
154
+ self.__class__.__name__,
155
+ book_id,
156
+ error,
157
+ )
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.downloaders.base_downloader
5
+ -------------------------------------------------
6
+
7
+ Defines the abstract base class `BaseDownloader`, which provides a
8
+ common interface and reusable logic for all downloader implementations.
9
+ """
10
+
11
+ import abc
12
+ import logging
13
+ from pathlib import Path
14
+ from typing import List
15
+
16
+ from novel_downloader.config import DownloaderConfig
17
+ from novel_downloader.core.interfaces import (
18
+ DownloaderProtocol,
19
+ ParserProtocol,
20
+ RequesterProtocol,
21
+ SaverProtocol,
22
+ )
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class BaseDownloader(DownloaderProtocol, abc.ABC):
28
+ """
29
+ Abstract downloader that defines the initialization interface
30
+ and the general batch download flow.
31
+
32
+ Subclasses must implement the logic for downloading a single book.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ requester: RequesterProtocol,
38
+ parser: ParserProtocol,
39
+ saver: SaverProtocol,
40
+ config: DownloaderConfig,
41
+ ):
42
+ """
43
+ Initialize the downloader with its components.
44
+
45
+ :param requester: Object implementing RequesterProtocol, used to fetch raw data.
46
+ :param parser: Object implementing ParserProtocol, used to parse page content.
47
+ :param saver: Object implementing SaverProtocol, used to save final output.
48
+ :param config: Downloader configuration object.
49
+ """
50
+ self._requester = requester
51
+ self._parser = parser
52
+ self._saver = saver
53
+ self._config = config
54
+ self._raw_data_dir = Path(config.raw_data_dir)
55
+ self._cache_dir = Path(config.cache_dir)
56
+ self._raw_data_dir.mkdir(parents=True, exist_ok=True)
57
+ self._cache_dir.mkdir(parents=True, exist_ok=True)
58
+
59
+ @abc.abstractmethod
60
+ def download_one(self, book_id: str) -> None:
61
+ """
62
+ The full download logic for a single book.
63
+
64
+ Subclasses must implement this method.
65
+
66
+ :param book_id: The identifier of the book to download.
67
+ """
68
+ ...
69
+
70
+ def download(self, book_ids: List[str]) -> None:
71
+ """
72
+ The general batch download process:
73
+ 1. Iterate over all book IDs
74
+ 2. For each ID, call `download_one()`
75
+
76
+ :param book_ids: A list of book identifiers to download.
77
+ """
78
+ for idx, book_id in enumerate(book_ids, start=1):
79
+ try:
80
+ logger.debug(
81
+ "[downloader] Starting download for book_id: %s (%s/%s)",
82
+ book_id,
83
+ idx,
84
+ len(book_ids),
85
+ )
86
+ self.download_one(book_id)
87
+ except Exception as e:
88
+ self._handle_download_exception(book_id, e)
89
+
90
+ def before_download(self, book_id: str) -> None:
91
+ """
92
+ Optional hook called before downloading each book.
93
+
94
+ Subclasses can override this method to perform pre-download setup.
95
+
96
+ :param book_id: The book ID about to be processed.
97
+ """
98
+ pass
99
+
100
+ def _handle_download_exception(self, book_id: str, error: Exception) -> None:
101
+ """
102
+ Handle download errors in a consistent way.
103
+
104
+ This method can be overridden or extended to implement retry logic, etc.
105
+
106
+ :param book_id: The ID of the book that failed.
107
+ :param error: The exception raised during download.
108
+ """
109
+ logger.warning("[downloader] Failed to download %s: %s", book_id, error)
110
+
111
+ @property
112
+ def requester(self) -> RequesterProtocol:
113
+ """
114
+ Access the current requester.
115
+
116
+ :return: The internal requester instance.
117
+ """
118
+ return self._requester
119
+
120
+ @property
121
+ def parser(self) -> ParserProtocol:
122
+ """
123
+ Access the current parser.
124
+
125
+ :return: The internal parser instance.
126
+ """
127
+ return self._parser
128
+
129
+ @property
130
+ def saver(self) -> SaverProtocol:
131
+ """
132
+ Access the current saver.
133
+
134
+ :return: The internal saver instance.
135
+ """
136
+ return self._saver
137
+
138
+ @property
139
+ def config(self) -> DownloaderConfig:
140
+ """
141
+ Access the downloader configuration.
142
+
143
+ :return: The internal DownloaderConfig object.
144
+ """
145
+ return self._config
146
+
147
+ @property
148
+ def raw_data_dir(self) -> Path:
149
+ """
150
+ Access the root directory for storing raw downloaded data.
151
+
152
+ :return: Path to the raw data directory.
153
+ """
154
+ return self._raw_data_dir
155
+
156
+ @property
157
+ def cache_dir(self) -> Path:
158
+ """
159
+ Access the directory used for temporary caching during download.
160
+
161
+ :return: Path to the cache directory.
162
+ """
163
+ return self._cache_dir
164
+
165
+ def set_requester(self, requester: RequesterProtocol) -> None:
166
+ """
167
+ Replace the requester instance with a new one.
168
+
169
+ :param requester: The new requester to be used.
170
+ """
171
+ self._requester = requester
172
+
173
+ def set_parser(self, parser: ParserProtocol) -> None:
174
+ """
175
+ Replace the parser instance with a new one.
176
+
177
+ :param parser: The new parser to be used.
178
+ """
179
+ self._parser = parser
180
+
181
+ def set_saver(self, saver: SaverProtocol) -> None:
182
+ """
183
+ Replace the saver instance with a new one.
184
+
185
+ :param saver: The new saver to be used.
186
+ """
187
+ self._saver = saver
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.downloaders.common_asynb_downloader
5
+ ---------------------------------------------------------
6
+
7
+ This module defines `CommonAsynbDownloader`.
8
+ """
9
+
10
+ import asyncio
11
+ import json
12
+ import logging
13
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
14
+ from typing import Any, Dict, Tuple
15
+
16
+ from novel_downloader.config import DownloaderConfig
17
+ from novel_downloader.core.interfaces import (
18
+ AsyncRequesterProtocol,
19
+ ParserProtocol,
20
+ SaverProtocol,
21
+ )
22
+ from novel_downloader.utils.file_utils import save_as_json, save_as_txt
23
+ from novel_downloader.utils.network import download_image_as_bytes
24
+ from novel_downloader.utils.time_utils import calculate_time_difference
25
+
26
+ from .base_async_downloader import BaseAsyncDownloader
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class CommonAsyncDownloader(BaseAsyncDownloader):
32
+ """
33
+ Specialized Async downloader for common novels.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ requester: AsyncRequesterProtocol,
39
+ parser: ParserProtocol,
40
+ saver: SaverProtocol,
41
+ config: DownloaderConfig,
42
+ site: str,
43
+ ):
44
+ """ """
45
+ super().__init__(requester, parser, saver, config, site)
46
+ self._is_logged_in = False
47
+
48
+ async def prepare(self) -> None:
49
+ """
50
+ Perform login
51
+ """
52
+ if self.login_required and not self._is_logged_in:
53
+ success = await self.requester.login(max_retries=3)
54
+ if not success:
55
+ raise RuntimeError("Login failed")
56
+ self._is_logged_in = True
57
+
58
+ async def download_one(self, book_id: str) -> None:
59
+ """
60
+ The full download logic for a single book.
61
+
62
+ :param book_id: The identifier of the book to download.
63
+ """
64
+ assert isinstance(self.requester, AsyncRequesterProtocol)
65
+
66
+ TAG = "[AsyncDownloader]"
67
+ raw_base = self.raw_data_dir / book_id
68
+ cache_base = self.cache_dir / book_id
69
+ info_path = raw_base / "book_info.json"
70
+ chapters_html_dir = cache_base / "html"
71
+ chapter_dir = raw_base / "chapters"
72
+
73
+ raw_base.mkdir(parents=True, exist_ok=True)
74
+ chapter_dir.mkdir(parents=True, exist_ok=True)
75
+ if self.save_html:
76
+ chapters_html_dir.mkdir(parents=True, exist_ok=True)
77
+
78
+ # load or fetch book_info
79
+ book_info: Dict[str, Any]
80
+ re_fetch = True
81
+ if info_path.exists():
82
+ try:
83
+ data = json.loads(info_path.read_text("utf-8"))
84
+ days, *_ = calculate_time_difference(
85
+ data.get("update_time", ""), "UTC+8"
86
+ )
87
+ re_fetch = days > 1
88
+ except Exception:
89
+ re_fetch = True
90
+
91
+ if re_fetch:
92
+ info_html = await self.requester.get_book_info(
93
+ book_id, self.request_interval
94
+ )
95
+ if self.save_html:
96
+ save_as_txt(info_html, chapters_html_dir / "info.html")
97
+ book_info = self.parser.parse_book_info(info_html)
98
+ if book_info.get("book_name") != "未找到书名":
99
+ save_as_json(book_info, info_path)
100
+ else:
101
+ logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
102
+ else:
103
+ book_info = json.loads(info_path.read_text("utf-8"))
104
+
105
+ # download cover
106
+ cover_url = book_info.get("cover_url", "")
107
+ if cover_url:
108
+ await asyncio.get_running_loop().run_in_executor(
109
+ None, download_image_as_bytes, cover_url, raw_base
110
+ )
111
+
112
+ # setup queue, semaphore, executor
113
+ semaphore = asyncio.Semaphore(self.download_workers)
114
+ queue: asyncio.Queue[Tuple[str, str]] = asyncio.Queue()
115
+ loop = asyncio.get_running_loop()
116
+ executor = (
117
+ ProcessPoolExecutor() if self.use_process_pool else ThreadPoolExecutor()
118
+ )
119
+
120
+ async def parser_worker(worker_id: int) -> None:
121
+ while True:
122
+ cid, html = await queue.get()
123
+ try:
124
+ chap_json = await loop.run_in_executor(
125
+ executor, self.parser.parse_chapter, html, cid
126
+ )
127
+ if chap_json:
128
+ await loop.run_in_executor(
129
+ executor,
130
+ save_as_json,
131
+ chap_json,
132
+ chapter_dir / f"{cid}.json",
133
+ )
134
+ logger.info(
135
+ "%s [Parser-%d] saved chapter %s", TAG, worker_id, cid
136
+ )
137
+ except Exception as e:
138
+ logger.error(
139
+ "%s [Parser-%d] error on chapter %s: %s", TAG, worker_id, cid, e
140
+ )
141
+ finally:
142
+ queue.task_done()
143
+
144
+ async def download_worker(chap: Dict[str, Any]) -> None:
145
+ cid = str(chap.get("chapterId") or "")
146
+ if not cid:
147
+ return
148
+ target = chapter_dir / f"{cid}.json"
149
+ if target.exists() and self.skip_existing:
150
+ logger.info("%s skipping existing chapter %s", TAG, cid)
151
+ return
152
+
153
+ try:
154
+ async with semaphore:
155
+ html = await self.requester.get_book_chapter(
156
+ book_id, cid, self.request_interval
157
+ )
158
+ if self.save_html:
159
+ await loop.run_in_executor(
160
+ executor,
161
+ save_as_txt,
162
+ html,
163
+ chapters_html_dir / f"{cid}.html",
164
+ )
165
+ await queue.put((cid, html))
166
+ logger.info("%s downloaded chapter %s", TAG, cid)
167
+ except Exception as e:
168
+ logger.error("%s error downloading %s: %s", TAG, cid, e)
169
+
170
+ # start parser workers
171
+ parsers = [
172
+ asyncio.create_task(parser_worker(i)) for i in range(self.parser_workers)
173
+ ]
174
+
175
+ # enqueue + run downloads
176
+ download_tasks = []
177
+ for vol in book_info.get("volumes", []):
178
+ for chap in vol.get("chapters", []):
179
+ download_tasks.append(asyncio.create_task(download_worker(chap)))
180
+
181
+ await asyncio.gather(*download_tasks)
182
+ await queue.join() # wait until all parsed
183
+ for p in parsers:
184
+ p.cancel() # stop parser loops
185
+
186
+ # final save
187
+ await loop.run_in_executor(executor, self.saver.save, book_id)
188
+ executor.shutdown(wait=True)
189
+
190
+ logger.info(
191
+ "%s Novel '%s' download completed.",
192
+ TAG,
193
+ book_info.get("book_name", "unknown"),
194
+ )
195
+ return
196
+
197
+ @property
198
+ def parser_workers(self) -> int:
199
+ return self.config.parser_workers
200
+
201
+ @property
202
+ def download_workers(self) -> int:
203
+ return self.config.download_workers
204
+
205
+ @property
206
+ def use_process_pool(self) -> bool:
207
+ return self.config.use_process_pool