novel-downloader 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. novel_downloader/__init__.py +1 -2
  2. novel_downloader/cli/__init__.py +0 -1
  3. novel_downloader/cli/clean.py +2 -10
  4. novel_downloader/cli/download.py +16 -22
  5. novel_downloader/cli/interactive.py +0 -1
  6. novel_downloader/cli/main.py +1 -3
  7. novel_downloader/cli/settings.py +8 -8
  8. novel_downloader/config/__init__.py +0 -1
  9. novel_downloader/config/adapter.py +32 -27
  10. novel_downloader/config/loader.py +116 -108
  11. novel_downloader/config/models.py +35 -29
  12. novel_downloader/config/site_rules.py +2 -4
  13. novel_downloader/core/__init__.py +0 -1
  14. novel_downloader/core/downloaders/__init__.py +4 -4
  15. novel_downloader/core/downloaders/base/__init__.py +14 -0
  16. novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
  17. novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
  18. novel_downloader/core/downloaders/biquge/__init__.py +12 -0
  19. novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
  20. novel_downloader/core/downloaders/common/__init__.py +14 -0
  21. novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
  22. novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +33 -21
  23. novel_downloader/core/downloaders/qidian/__init__.py +10 -0
  24. novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +79 -62
  25. novel_downloader/core/factory/__init__.py +4 -5
  26. novel_downloader/core/factory/{downloader_factory.py → downloader.py} +25 -26
  27. novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
  28. novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
  29. novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
  30. novel_downloader/core/interfaces/__init__.py +8 -9
  31. novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
  32. novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +23 -12
  33. novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
  34. novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
  35. novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
  36. novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +31 -17
  37. novel_downloader/core/parsers/__init__.py +5 -4
  38. novel_downloader/core/parsers/{base_parser.py → base.py} +18 -9
  39. novel_downloader/core/parsers/biquge/__init__.py +10 -0
  40. novel_downloader/core/parsers/biquge/main_parser.py +126 -0
  41. novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
  42. novel_downloader/core/parsers/{common_parser → common}/helper.py +13 -13
  43. novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
  44. novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
  45. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
  46. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +40 -48
  47. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
  48. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
  49. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +14 -10
  50. novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
  51. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +36 -44
  52. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
  53. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
  54. novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +14 -10
  55. novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
  56. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
  57. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/book_info_parser.py +5 -6
  58. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +7 -8
  59. novel_downloader/core/requesters/__init__.py +9 -5
  60. novel_downloader/core/requesters/base/__init__.py +16 -0
  61. novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +177 -73
  62. novel_downloader/core/requesters/base/browser.py +340 -0
  63. novel_downloader/core/requesters/base/session.py +364 -0
  64. novel_downloader/core/requesters/biquge/__init__.py +12 -0
  65. novel_downloader/core/requesters/biquge/session.py +90 -0
  66. novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
  67. novel_downloader/core/requesters/common/async_session.py +96 -0
  68. novel_downloader/core/requesters/common/session.py +113 -0
  69. novel_downloader/core/requesters/qidian/__init__.py +21 -0
  70. novel_downloader/core/requesters/qidian/broswer.py +306 -0
  71. novel_downloader/core/requesters/qidian/session.py +287 -0
  72. novel_downloader/core/savers/__init__.py +5 -3
  73. novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
  74. novel_downloader/core/savers/biquge.py +25 -0
  75. novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
  76. novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +23 -51
  77. novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
  78. novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
  79. novel_downloader/core/savers/epub_utils/__init__.py +0 -1
  80. novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
  81. novel_downloader/core/savers/epub_utils/initializer.py +4 -5
  82. novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
  83. novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
  84. novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
  85. novel_downloader/locales/en.json +8 -4
  86. novel_downloader/locales/zh.json +5 -1
  87. novel_downloader/resources/config/settings.toml +88 -0
  88. novel_downloader/utils/cache.py +2 -2
  89. novel_downloader/utils/chapter_storage.py +340 -0
  90. novel_downloader/utils/constants.py +6 -4
  91. novel_downloader/utils/crypto_utils.py +3 -3
  92. novel_downloader/utils/file_utils/__init__.py +0 -1
  93. novel_downloader/utils/file_utils/io.py +12 -17
  94. novel_downloader/utils/file_utils/normalize.py +1 -3
  95. novel_downloader/utils/file_utils/sanitize.py +2 -9
  96. novel_downloader/utils/fontocr/__init__.py +0 -1
  97. novel_downloader/utils/fontocr/ocr_v1.py +19 -22
  98. novel_downloader/utils/fontocr/ocr_v2.py +147 -60
  99. novel_downloader/utils/hash_store.py +19 -20
  100. novel_downloader/utils/hash_utils.py +0 -1
  101. novel_downloader/utils/i18n.py +3 -4
  102. novel_downloader/utils/logger.py +5 -6
  103. novel_downloader/utils/model_loader.py +5 -8
  104. novel_downloader/utils/network.py +9 -10
  105. novel_downloader/utils/state.py +6 -7
  106. novel_downloader/utils/text_utils/__init__.py +0 -1
  107. novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
  108. novel_downloader/utils/text_utils/diff_display.py +0 -1
  109. novel_downloader/utils/text_utils/font_mapping.py +1 -4
  110. novel_downloader/utils/text_utils/text_cleaning.py +0 -1
  111. novel_downloader/utils/time_utils/__init__.py +0 -1
  112. novel_downloader/utils/time_utils/datetime_utils.py +8 -10
  113. novel_downloader/utils/time_utils/sleep_utils.py +1 -3
  114. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
  115. novel_downloader-1.3.0.dist-info/RECORD +127 -0
  116. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
  117. novel_downloader/core/requesters/base_browser.py +0 -214
  118. novel_downloader/core/requesters/base_session.py +0 -246
  119. novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
  120. novel_downloader/core/requesters/common_requester/common_session.py +0 -126
  121. novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
  122. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -396
  123. novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
  124. novel_downloader/resources/config/settings.yaml +0 -76
  125. novel_downloader-1.2.2.dist-info/RECORD +0 -115
  126. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
  127. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
  128. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.downloaders.base_downloader
5
- -------------------------------------------------
3
+ novel_downloader.core.downloaders.base.base_sync
4
+ ------------------------------------------------
6
5
 
7
6
  Defines the abstract base class `BaseDownloader`, which provides a
8
7
  common interface and reusable logic for all downloader implementations.
@@ -11,20 +10,17 @@ common interface and reusable logic for all downloader implementations.
11
10
  import abc
12
11
  import logging
13
12
  from pathlib import Path
14
- from typing import List
15
13
 
16
14
  from novel_downloader.config import DownloaderConfig
17
15
  from novel_downloader.core.interfaces import (
18
- DownloaderProtocol,
19
16
  ParserProtocol,
20
- RequesterProtocol,
21
17
  SaverProtocol,
18
+ SyncDownloaderProtocol,
19
+ SyncRequesterProtocol,
22
20
  )
23
21
 
24
- logger = logging.getLogger(__name__)
25
22
 
26
-
27
- class BaseDownloader(DownloaderProtocol, abc.ABC):
23
+ class BaseDownloader(SyncDownloaderProtocol, abc.ABC):
28
24
  """
29
25
  Abstract downloader that defines the initialization interface
30
26
  and the general batch download flow.
@@ -34,10 +30,11 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
34
30
 
35
31
  def __init__(
36
32
  self,
37
- requester: RequesterProtocol,
33
+ requester: SyncRequesterProtocol,
38
34
  parser: ParserProtocol,
39
35
  saver: SaverProtocol,
40
36
  config: DownloaderConfig,
37
+ site: str,
41
38
  ):
42
39
  """
43
40
  Initialize the downloader with its components.
@@ -51,23 +48,16 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
51
48
  self._parser = parser
52
49
  self._saver = saver
53
50
  self._config = config
54
- self._raw_data_dir = Path(config.raw_data_dir)
55
- self._cache_dir = Path(config.cache_dir)
51
+ self._site = site
52
+
53
+ self._raw_data_dir = Path(config.raw_data_dir) / site
54
+ self._cache_dir = Path(config.cache_dir) / site
56
55
  self._raw_data_dir.mkdir(parents=True, exist_ok=True)
57
56
  self._cache_dir.mkdir(parents=True, exist_ok=True)
58
57
 
59
- @abc.abstractmethod
60
- def download_one(self, book_id: str) -> None:
61
- """
62
- The full download logic for a single book.
58
+ self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
63
59
 
64
- Subclasses must implement this method.
65
-
66
- :param book_id: The identifier of the book to download.
67
- """
68
- ...
69
-
70
- def download(self, book_ids: List[str]) -> None:
60
+ def download(self, book_ids: list[str]) -> None:
71
61
  """
72
62
  The general batch download process:
73
63
  1. Iterate over all book IDs
@@ -75,41 +65,41 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
75
65
 
76
66
  :param book_ids: A list of book identifiers to download.
77
67
  """
68
+ self.prepare()
69
+
78
70
  for idx, book_id in enumerate(book_ids, start=1):
71
+ self.logger.debug(
72
+ "[downloader] Starting download for book_id: %s (%s/%s)",
73
+ book_id,
74
+ idx,
75
+ len(book_ids),
76
+ )
79
77
  try:
80
- logger.debug(
81
- "[downloader] Starting download for book_id: %s (%s/%s)",
82
- book_id,
83
- idx,
84
- len(book_ids),
85
- )
86
78
  self.download_one(book_id)
87
79
  except Exception as e:
88
80
  self._handle_download_exception(book_id, e)
89
81
 
90
- def before_download(self, book_id: str) -> None:
82
+ @abc.abstractmethod
83
+ def download_one(self, book_id: str) -> None:
91
84
  """
92
- Optional hook called before downloading each book.
85
+ The full download logic for a single book.
93
86
 
94
- Subclasses can override this method to perform pre-download setup.
87
+ Subclasses must implement this method.
95
88
 
96
- :param book_id: The book ID about to be processed.
89
+ :param book_id: The identifier of the book to download.
97
90
  """
98
- pass
91
+ ...
99
92
 
100
- def _handle_download_exception(self, book_id: str, error: Exception) -> None:
93
+ def prepare(self) -> None:
101
94
  """
102
- Handle download errors in a consistent way.
103
-
104
- This method can be overridden or extended to implement retry logic, etc.
95
+ Optional hook called before downloading each book.
105
96
 
106
- :param book_id: The ID of the book that failed.
107
- :param error: The exception raised during download.
97
+ Subclasses can override this method to perform pre-download setup.
108
98
  """
109
- logger.warning("[downloader] Failed to download %s: %s", book_id, error)
99
+ return
110
100
 
111
101
  @property
112
- def requester(self) -> RequesterProtocol:
102
+ def requester(self) -> SyncRequesterProtocol:
113
103
  """
114
104
  Access the current requester.
115
105
 
@@ -162,7 +152,27 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
162
152
  """
163
153
  return self._cache_dir
164
154
 
165
- def set_requester(self, requester: RequesterProtocol) -> None:
155
+ @property
156
+ def site(self) -> str:
157
+ return self._site
158
+
159
+ @property
160
+ def save_html(self) -> bool:
161
+ return self._config.save_html
162
+
163
+ @property
164
+ def skip_existing(self) -> bool:
165
+ return self._config.skip_existing
166
+
167
+ @property
168
+ def login_required(self) -> bool:
169
+ return self._config.login_required
170
+
171
+ @property
172
+ def request_interval(self) -> float:
173
+ return self._config.request_interval
174
+
175
+ def set_requester(self, requester: SyncRequesterProtocol) -> None:
166
176
  """
167
177
  Replace the requester instance with a new one.
168
178
 
@@ -185,3 +195,14 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
185
195
  :param saver: The new saver to be used.
186
196
  """
187
197
  self._saver = saver
198
+
199
+ def _handle_download_exception(self, book_id: str, error: Exception) -> None:
200
+ """
201
+ Handle download errors in a consistent way.
202
+
203
+ This method can be overridden or extended to implement retry logic, etc.
204
+
205
+ :param book_id: The ID of the book that failed.
206
+ :param error: The exception raised during download.
207
+ """
208
+ self.logger.warning("[downloader] Failed to download %s: %s", book_id, error)
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.downloaders.biquge
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ from .biquge_sync import BiqugeDownloader
9
+
10
+ __all__ = [
11
+ "BiqugeDownloader",
12
+ ]
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.downloaders.biquge.biquge_sync
4
+ ----------------------------------------------------
5
+
6
+ """
7
+
8
+ from novel_downloader.config.models import DownloaderConfig
9
+ from novel_downloader.core.downloaders.common import CommonDownloader
10
+ from novel_downloader.core.interfaces.parser import ParserProtocol
11
+ from novel_downloader.core.interfaces.saver import SaverProtocol
12
+ from novel_downloader.core.interfaces.sync_requester import SyncRequesterProtocol
13
+
14
+
15
+ class BiqugeDownloader(CommonDownloader):
16
+ """"""
17
+
18
+ def __init__(
19
+ self,
20
+ requester: SyncRequesterProtocol,
21
+ parser: ParserProtocol,
22
+ saver: SaverProtocol,
23
+ config: DownloaderConfig,
24
+ ):
25
+ super().__init__(requester, parser, saver, config, "biquge")
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.downloaders.common
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ from .common_async import CommonAsyncDownloader
9
+ from .common_sync import CommonDownloader
10
+
11
+ __all__ = [
12
+ "CommonAsyncDownloader",
13
+ "CommonDownloader",
14
+ ]
@@ -1,30 +1,28 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.downloaders.common_asynb_downloader
5
- ---------------------------------------------------------
3
+ novel_downloader.core.downloaders.common.common_async
4
+ -----------------------------------------------------
6
5
 
7
- This module defines `CommonAsynbDownloader`.
8
6
  """
9
7
 
10
8
  import asyncio
11
9
  import json
12
10
  import logging
13
11
  from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
14
- from typing import Any, Dict, Tuple
12
+ from typing import Any
15
13
 
16
14
  from novel_downloader.config import DownloaderConfig
15
+ from novel_downloader.core.downloaders.base import BaseAsyncDownloader
17
16
  from novel_downloader.core.interfaces import (
18
17
  AsyncRequesterProtocol,
19
18
  ParserProtocol,
20
19
  SaverProtocol,
21
20
  )
21
+ from novel_downloader.utils.chapter_storage import ChapterDict, ChapterStorage
22
22
  from novel_downloader.utils.file_utils import save_as_json, save_as_txt
23
23
  from novel_downloader.utils.network import download_image_as_bytes
24
24
  from novel_downloader.utils.time_utils import calculate_time_difference
25
25
 
26
- from .base_async_downloader import BaseAsyncDownloader
27
-
28
26
  logger = logging.getLogger(__name__)
29
27
 
30
28
 
@@ -64,19 +62,25 @@ class CommonAsyncDownloader(BaseAsyncDownloader):
64
62
  assert isinstance(self.requester, AsyncRequesterProtocol)
65
63
 
66
64
  TAG = "[AsyncDownloader]"
65
+ wait_time = self.config.request_interval
66
+
67
67
  raw_base = self.raw_data_dir / book_id
68
68
  cache_base = self.cache_dir / book_id
69
69
  info_path = raw_base / "book_info.json"
70
70
  chapters_html_dir = cache_base / "html"
71
- chapter_dir = raw_base / "chapters"
72
71
 
73
72
  raw_base.mkdir(parents=True, exist_ok=True)
74
- chapter_dir.mkdir(parents=True, exist_ok=True)
75
73
  if self.save_html:
76
74
  chapters_html_dir.mkdir(parents=True, exist_ok=True)
75
+ normal_cs = ChapterStorage(
76
+ raw_base=raw_base,
77
+ namespace="chapters",
78
+ backend_type=self._config.storage_backend,
79
+ batch_size=self._config.storage_batch_size,
80
+ )
77
81
 
78
82
  # load or fetch book_info
79
- book_info: Dict[str, Any]
83
+ book_info: dict[str, Any]
80
84
  re_fetch = True
81
85
  if info_path.exists():
82
86
  try:
@@ -89,9 +93,7 @@ class CommonAsyncDownloader(BaseAsyncDownloader):
89
93
  re_fetch = True
90
94
 
91
95
  if re_fetch:
92
- info_html = await self.requester.get_book_info(
93
- book_id, self.request_interval
94
- )
96
+ info_html = await self.requester.get_book_info(book_id)
95
97
  if self.save_html:
96
98
  save_as_txt(info_html, chapters_html_dir / "info.html")
97
99
  book_info = self.parser.parse_book_info(info_html)
@@ -99,6 +101,7 @@ class CommonAsyncDownloader(BaseAsyncDownloader):
99
101
  save_as_json(book_info, info_path)
100
102
  else:
101
103
  logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
104
+ await asyncio.sleep(wait_time)
102
105
  else:
103
106
  book_info = json.loads(info_path.read_text("utf-8"))
104
107
 
@@ -111,7 +114,8 @@ class CommonAsyncDownloader(BaseAsyncDownloader):
111
114
 
112
115
  # setup queue, semaphore, executor
113
116
  semaphore = asyncio.Semaphore(self.download_workers)
114
- queue: asyncio.Queue[Tuple[str, str]] = asyncio.Queue()
117
+ queue: asyncio.Queue[tuple[str, str]] = asyncio.Queue()
118
+ save_queue: asyncio.Queue[ChapterDict] = asyncio.Queue()
115
119
  loop = asyncio.get_running_loop()
116
120
  executor = (
117
121
  ProcessPoolExecutor() if self.use_process_pool else ThreadPoolExecutor()
@@ -125,12 +129,7 @@ class CommonAsyncDownloader(BaseAsyncDownloader):
125
129
  executor, self.parser.parse_chapter, html, cid
126
130
  )
127
131
  if chap_json:
128
- await loop.run_in_executor(
129
- executor,
130
- save_as_json,
131
- chap_json,
132
- chapter_dir / f"{cid}.json",
133
- )
132
+ await save_queue.put(chap_json)
134
133
  logger.info(
135
134
  "%s [Parser-%d] saved chapter %s", TAG, worker_id, cid
136
135
  )
@@ -141,27 +140,34 @@ class CommonAsyncDownloader(BaseAsyncDownloader):
141
140
  finally:
142
141
  queue.task_done()
143
142
 
144
- async def download_worker(chap: Dict[str, Any]) -> None:
143
+ async def saver_loop(
144
+ cs: ChapterStorage,
145
+ queue: asyncio.Queue[ChapterDict],
146
+ ) -> None:
147
+ while True:
148
+ data = await queue.get()
149
+ try:
150
+ cs.save(data)
151
+ except Exception as e:
152
+ logger.error(
153
+ "[saver] Error saving chapter %s: %s",
154
+ data.get("id"),
155
+ e,
156
+ )
157
+ finally:
158
+ queue.task_done()
159
+
160
+ async def download_worker(chap: dict[str, Any]) -> None:
145
161
  cid = str(chap.get("chapterId") or "")
146
162
  if not cid:
147
163
  return
148
- target = chapter_dir / f"{cid}.json"
149
- if target.exists() and self.skip_existing:
164
+ if normal_cs.exists(cid) and self.skip_existing:
150
165
  logger.info("%s skipping existing chapter %s", TAG, cid)
151
166
  return
152
167
 
153
168
  try:
154
169
  async with semaphore:
155
- html = await self.requester.get_book_chapter(
156
- book_id, cid, self.request_interval
157
- )
158
- if self.save_html:
159
- await loop.run_in_executor(
160
- executor,
161
- save_as_txt,
162
- html,
163
- chapters_html_dir / f"{cid}.html",
164
- )
170
+ html = await self.requester.get_book_chapter(book_id, cid)
165
171
  await queue.put((cid, html))
166
172
  logger.info("%s downloaded chapter %s", TAG, cid)
167
173
  except Exception as e:
@@ -171,6 +177,7 @@ class CommonAsyncDownloader(BaseAsyncDownloader):
171
177
  parsers = [
172
178
  asyncio.create_task(parser_worker(i)) for i in range(self.parser_workers)
173
179
  ]
180
+ chapter_saver = asyncio.create_task(saver_loop(normal_cs, save_queue))
174
181
 
175
182
  # enqueue + run downloads
176
183
  download_tasks = []
@@ -180,8 +187,10 @@ class CommonAsyncDownloader(BaseAsyncDownloader):
180
187
 
181
188
  await asyncio.gather(*download_tasks)
182
189
  await queue.join() # wait until all parsed
190
+ await save_queue.join()
183
191
  for p in parsers:
184
192
  p.cancel() # stop parser loops
193
+ chapter_saver.cancel()
185
194
 
186
195
  # final save
187
196
  await loop.run_in_executor(executor, self.saver.save, book_id)
@@ -1,27 +1,29 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.downloaders.common_downloader
5
- ---------------------------------------------------
3
+ novel_downloader.core.downloaders.common.common_sync
4
+ ----------------------------------------------------
6
5
 
7
6
  This module defines `CommonDownloader`.
8
7
  """
9
8
 
10
9
  import json
11
10
  import logging
12
- from typing import Any, Dict
11
+ from typing import Any
13
12
 
14
13
  from novel_downloader.config import DownloaderConfig
14
+ from novel_downloader.core.downloaders.base import BaseDownloader
15
15
  from novel_downloader.core.interfaces import (
16
16
  ParserProtocol,
17
- RequesterProtocol,
18
17
  SaverProtocol,
18
+ SyncRequesterProtocol,
19
19
  )
20
+ from novel_downloader.utils.chapter_storage import ChapterStorage
20
21
  from novel_downloader.utils.file_utils import save_as_json, save_as_txt
21
22
  from novel_downloader.utils.network import download_image_as_bytes
22
- from novel_downloader.utils.time_utils import calculate_time_difference
23
-
24
- from .base_downloader import BaseDownloader
23
+ from novel_downloader.utils.time_utils import (
24
+ calculate_time_difference,
25
+ sleep_with_random_delay,
26
+ )
25
27
 
26
28
  logger = logging.getLogger(__name__)
27
29
 
@@ -33,7 +35,7 @@ class CommonDownloader(BaseDownloader):
33
35
 
34
36
  def __init__(
35
37
  self,
36
- requester: RequesterProtocol,
38
+ requester: SyncRequesterProtocol,
37
39
  parser: ParserProtocol,
38
40
  saver: SaverProtocol,
39
41
  config: DownloaderConfig,
@@ -48,7 +50,7 @@ class CommonDownloader(BaseDownloader):
48
50
  :param config: Downloader configuration object.
49
51
  :param site: Identifier for the site the downloader is targeting.
50
52
  """
51
- super().__init__(requester, parser, saver, config)
53
+ super().__init__(requester, parser, saver, config, site)
52
54
  self._site = site
53
55
 
54
56
  def download_one(self, book_id: str) -> None:
@@ -60,19 +62,24 @@ class CommonDownloader(BaseDownloader):
60
62
  TAG = "[Downloader]"
61
63
  save_html = self.config.save_html
62
64
  skip_existing = self.config.skip_existing
63
- site = self.site
64
65
  wait_time = self.config.request_interval
65
66
 
66
- raw_base = self.raw_data_dir / site / book_id
67
- cache_base = self.cache_dir / site / book_id
67
+ raw_base = self.raw_data_dir / book_id
68
+ cache_base = self.cache_dir / book_id
68
69
  info_path = raw_base / "book_info.json"
69
- chapter_dir = raw_base / "chapters"
70
70
  chapters_html_dir = cache_base / "html"
71
71
 
72
72
  raw_base.mkdir(parents=True, exist_ok=True)
73
- chapter_dir.mkdir(parents=True, exist_ok=True)
73
+ if self.save_html:
74
+ chapters_html_dir.mkdir(parents=True, exist_ok=True)
75
+ normal_cs = ChapterStorage(
76
+ raw_base=raw_base,
77
+ namespace="chapters",
78
+ backend_type=self._config.storage_backend,
79
+ batch_size=self._config.storage_batch_size,
80
+ )
74
81
 
75
- book_info: Dict[str, Any]
82
+ book_info: dict[str, Any]
76
83
 
77
84
  try:
78
85
  if not info_path.exists():
@@ -87,7 +94,7 @@ class CommonDownloader(BaseDownloader):
87
94
  if days > 1:
88
95
  raise FileNotFoundError # trigger re-fetch
89
96
  except Exception:
90
- info_html = self.requester.get_book_info(book_id, wait_time)
97
+ info_html = self.requester.get_book_info(book_id)
91
98
  if save_html:
92
99
  info_html_path = chapters_html_dir / "info.html"
93
100
  save_as_txt(info_html, info_html_path)
@@ -97,6 +104,7 @@ class CommonDownloader(BaseDownloader):
97
104
  and book_info.get("update_time", "") != "未找到更新时间"
98
105
  ):
99
106
  save_as_json(book_info, info_path)
107
+ sleep_with_random_delay(wait_time, mul_spread=1.1, max_sleep=wait_time + 2)
100
108
 
101
109
  # download cover
102
110
  cover_url = book_info.get("cover_url", "")
@@ -116,8 +124,7 @@ class CommonDownloader(BaseDownloader):
116
124
  logger.warning("%s Skipping chapter without chapterId", TAG)
117
125
  continue
118
126
 
119
- chap_path = chapter_dir / f"{cid}.json"
120
- if chap_path.exists() and skip_existing:
127
+ if normal_cs.exists(cid) and skip_existing:
121
128
  logger.debug(
122
129
  "%s Chapter already exists, skipping: %s",
123
130
  TAG,
@@ -128,7 +135,7 @@ class CommonDownloader(BaseDownloader):
128
135
  chap_title = chap.get("title", "")
129
136
  logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
130
137
  try:
131
- chap_html = self.requester.get_book_chapter(book_id, cid, wait_time)
138
+ chap_html = self.requester.get_book_chapter(book_id, cid)
132
139
 
133
140
  if save_html:
134
141
  html_path = chapters_html_dir / f"{cid}.html"
@@ -141,6 +148,10 @@ class CommonDownloader(BaseDownloader):
141
148
  )
142
149
 
143
150
  chap_json = self.parser.parse_chapter(chap_html, cid)
151
+
152
+ sleep_with_random_delay(
153
+ wait_time, mul_spread=1.1, max_sleep=wait_time + 2
154
+ )
144
155
  if not chap_json:
145
156
  logger.warning(
146
157
  "%s Parsed chapter json is empty, skipping: %s (%s)",
@@ -159,9 +170,10 @@ class CommonDownloader(BaseDownloader):
159
170
  )
160
171
  continue
161
172
 
162
- save_as_json(chap_json, chap_path)
173
+ normal_cs.save(chap_json)
163
174
  logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
164
175
 
176
+ normal_cs.close()
165
177
  self.saver.save(book_id)
166
178
 
167
179
  logger.info(
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.downloaders.qidian
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ from .qidian_sync import QidianDownloader
9
+
10
+ __all__ = ["QidianDownloader"]