novel-downloader 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +11 -8
  3. novel_downloader/cli/export.py +17 -17
  4. novel_downloader/cli/ui.py +28 -1
  5. novel_downloader/config/adapter.py +27 -1
  6. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  7. novel_downloader/core/downloaders/__init__.py +2 -0
  8. novel_downloader/core/downloaders/base.py +34 -85
  9. novel_downloader/core/downloaders/common.py +147 -171
  10. novel_downloader/core/downloaders/qianbi.py +30 -64
  11. novel_downloader/core/downloaders/qidian.py +157 -184
  12. novel_downloader/core/downloaders/qqbook.py +292 -0
  13. novel_downloader/core/downloaders/registry.py +2 -2
  14. novel_downloader/core/exporters/__init__.py +2 -0
  15. novel_downloader/core/exporters/base.py +37 -59
  16. novel_downloader/core/exporters/common.py +620 -0
  17. novel_downloader/core/exporters/linovelib.py +47 -0
  18. novel_downloader/core/exporters/qidian.py +41 -12
  19. novel_downloader/core/exporters/qqbook.py +28 -0
  20. novel_downloader/core/exporters/registry.py +2 -2
  21. novel_downloader/core/fetchers/__init__.py +4 -2
  22. novel_downloader/core/fetchers/aaatxt.py +2 -22
  23. novel_downloader/core/fetchers/b520.py +3 -23
  24. novel_downloader/core/fetchers/base.py +80 -105
  25. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  26. novel_downloader/core/fetchers/dxmwx.py +10 -22
  27. novel_downloader/core/fetchers/esjzone.py +6 -29
  28. novel_downloader/core/fetchers/guidaye.py +2 -22
  29. novel_downloader/core/fetchers/hetushu.py +9 -29
  30. novel_downloader/core/fetchers/i25zw.py +2 -16
  31. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  32. novel_downloader/core/fetchers/jpxs123.py +2 -16
  33. novel_downloader/core/fetchers/lewenn.py +2 -22
  34. novel_downloader/core/fetchers/linovelib.py +4 -20
  35. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  36. novel_downloader/core/fetchers/piaotia.py +2 -16
  37. novel_downloader/core/fetchers/qbtr.py +2 -16
  38. novel_downloader/core/fetchers/qianbi.py +1 -20
  39. novel_downloader/core/fetchers/qidian.py +7 -33
  40. novel_downloader/core/fetchers/qqbook.py +177 -0
  41. novel_downloader/core/fetchers/quanben5.py +9 -29
  42. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  43. novel_downloader/core/fetchers/sfacg.py +3 -16
  44. novel_downloader/core/fetchers/shencou.py +2 -16
  45. novel_downloader/core/fetchers/shuhaige.py +2 -22
  46. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  47. novel_downloader/core/fetchers/ttkan.py +3 -14
  48. novel_downloader/core/fetchers/wanbengo.py +2 -22
  49. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  50. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  51. novel_downloader/core/fetchers/xs63b.py +3 -15
  52. novel_downloader/core/fetchers/xshbook.py +2 -22
  53. novel_downloader/core/fetchers/yamibo.py +4 -28
  54. novel_downloader/core/fetchers/yibige.py +13 -26
  55. novel_downloader/core/interfaces/exporter.py +19 -7
  56. novel_downloader/core/interfaces/fetcher.py +21 -47
  57. novel_downloader/core/parsers/__init__.py +4 -2
  58. novel_downloader/core/parsers/b520.py +2 -2
  59. novel_downloader/core/parsers/base.py +4 -39
  60. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +5 -5
  61. novel_downloader/core/parsers/{qidian/main_parser.py → qidian.py} +147 -266
  62. novel_downloader/core/parsers/qqbook.py +709 -0
  63. novel_downloader/core/parsers/xiguashuwu.py +3 -4
  64. novel_downloader/core/searchers/__init__.py +2 -2
  65. novel_downloader/core/searchers/b520.py +1 -1
  66. novel_downloader/core/searchers/base.py +2 -2
  67. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  68. novel_downloader/models/__init__.py +2 -0
  69. novel_downloader/models/book.py +1 -0
  70. novel_downloader/models/config.py +12 -0
  71. novel_downloader/resources/config/settings.toml +23 -5
  72. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  73. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  74. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  75. novel_downloader/utils/constants.py +6 -0
  76. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  77. novel_downloader/utils/epub/constants.py +1 -6
  78. novel_downloader/utils/fontocr/core.py +2 -0
  79. novel_downloader/utils/fontocr/loader.py +10 -8
  80. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  81. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  82. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  83. novel_downloader/web/pages/download.py +1 -1
  84. novel_downloader/web/pages/search.py +1 -1
  85. novel_downloader/web/services/task_manager.py +2 -0
  86. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +4 -1
  87. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/RECORD +91 -94
  88. novel_downloader/core/exporters/common/__init__.py +0 -11
  89. novel_downloader/core/exporters/common/epub.py +0 -198
  90. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  91. novel_downloader/core/exporters/common/txt.py +0 -146
  92. novel_downloader/core/exporters/epub_util.py +0 -215
  93. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  94. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  95. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  96. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  97. novel_downloader/core/exporters/txt_util.py +0 -67
  98. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  99. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -11
  100. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  101. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  102. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  103. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  104. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -12,19 +12,9 @@ from pathlib import Path
12
12
  from typing import Any
13
13
 
14
14
  from novel_downloader.core.downloaders.base import BaseDownloader
15
- from novel_downloader.core.downloaders.signals import (
16
- STOP,
17
- Progress,
18
- StopToken,
19
- )
20
- from novel_downloader.models import (
21
- BookConfig,
22
- ChapterDict,
23
- )
24
- from novel_downloader.utils import (
25
- ChapterStorage,
26
- async_jitter_sleep,
27
- )
15
+ from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
16
+ from novel_downloader.models import BookConfig, ChapterDict
17
+ from novel_downloader.utils import ChapterStorage, async_jitter_sleep
28
18
 
29
19
 
30
20
  class CommonDownloader(BaseDownloader):
@@ -41,7 +31,7 @@ class CommonDownloader(BaseDownloader):
41
31
  **kwargs: Any,
42
32
  ) -> None:
43
33
  """
44
- Sentinel-based pipeline with graceful cancellation:
34
+ Sentinel-based pipeline with cancellation:
45
35
 
46
36
  Producer -> ChapterWorkers -> StorageWorker.
47
37
 
@@ -59,181 +49,167 @@ class CommonDownloader(BaseDownloader):
59
49
  raw_base.mkdir(parents=True, exist_ok=True)
60
50
  html_dir = self._debug_dir / book_id / "html"
61
51
 
62
- chapter_storage = ChapterStorage(
63
- raw_base=raw_base,
64
- priorities=self.PRIORITIES_MAP,
65
- )
66
- chapter_storage.connect()
67
-
68
52
  def cancelled() -> bool:
69
53
  return bool(cancel_event and cancel_event.is_set())
70
54
 
71
- try:
72
- # --- metadata ---
73
- book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
74
- if not book_info:
75
- return
76
-
77
- vols = book_info["volumes"]
78
- total_chapters = sum(len(v["chapters"]) for v in vols)
79
- if total_chapters == 0:
80
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
81
- return
55
+ # --- metadata ---
56
+ book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
57
+ if not book_info:
58
+ return
82
59
 
83
- progress = Progress(total_chapters, progress_hook)
60
+ vols = book_info["volumes"]
61
+ plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
62
+ if not plan:
63
+ self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
64
+ return
84
65
 
85
- # --- queues & batching ---
86
- cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
87
- save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
88
- batch: list[ChapterDict] = []
66
+ progress = Progress(total=len(plan), hook=progress_hook)
89
67
 
90
- async def flush_batch() -> None:
91
- if not batch:
92
- return
93
- try:
94
- chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
95
- except Exception as e:
96
- self.logger.error(
97
- "[Storage] batch upsert failed (size=%d): %s",
98
- len(batch),
99
- e,
100
- exc_info=True,
101
- )
102
- else:
103
- await progress.bump(len(batch))
104
- finally:
105
- batch.clear()
106
-
107
- # --- stage: storage worker ---
108
- async def storage_worker() -> None:
109
- """
110
- Consumes parsed chapters, writes in batches.
111
-
112
- Terminates after receiving STOP from each chapter worker.
113
-
114
- On cancel: keeps consuming (to avoid blocking producers),
115
- flushes, and exits once all STOPs are seen.
116
- """
117
- stop_count = 0
118
- while True:
119
- item = await save_q.get()
120
- if isinstance(item, StopToken):
121
- stop_count += 1
122
- if stop_count == self.workers:
123
- # All chapter workers have exited.
124
- await flush_batch()
125
- return
126
- # else keep waiting for remaining STOPs
127
- continue
128
-
129
- # Normal chapter
130
- batch.append(item)
131
- if len(batch) >= self.storage_batch_size:
132
- await flush_batch()
68
+ # --- queues & batching ---
69
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(maxsize=self._workers * 2)
70
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
71
+ maxsize=self._workers * 2
72
+ )
73
+ batch: list[ChapterDict] = []
133
74
 
134
- if cancelled():
135
- # Drain whatever is already in the queue
136
- try:
137
- while True:
138
- nxt = save_q.get_nowait()
139
- if isinstance(nxt, StopToken):
140
- stop_count += 1
141
- else:
142
- batch.append(nxt)
143
- except asyncio.QueueEmpty:
144
- pass
145
- # Final flush of everything
75
+ async def flush_batch() -> None:
76
+ if not batch:
77
+ return
78
+ try:
79
+ storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
80
+ except Exception as e:
81
+ self.logger.error(
82
+ "[Storage] batch upsert failed (size=%d): %s",
83
+ len(batch),
84
+ e,
85
+ exc_info=True,
86
+ )
87
+ else:
88
+ await progress.bump(len(batch))
89
+ finally:
90
+ batch.clear()
91
+
92
+ # --- stage: storage worker ---
93
+ async def storage_worker() -> None:
94
+ """
95
+ Consumes parsed chapters, writes in batches.
96
+
97
+ Terminates after receiving STOP from each chapter worker.
98
+
99
+ On cancel: keeps consuming (to avoid blocking producers),
100
+ flushes, and exits once all STOPs are seen.
101
+ """
102
+ stop_count = 0
103
+ while True:
104
+ item = await save_q.get()
105
+ if isinstance(item, StopToken):
106
+ stop_count += 1
107
+ if stop_count == self._workers:
108
+ # All chapter workers have exited.
146
109
  await flush_batch()
147
- # Wait for remaining STOPs so chapter workers can finish.
148
- while stop_count < self.workers:
149
- nxt = await save_q.get()
110
+ return
111
+ # else keep waiting for remaining STOPs
112
+ continue
113
+
114
+ # Normal chapter
115
+ batch.append(item)
116
+ if len(batch) >= self._storage_batch_size:
117
+ await flush_batch()
118
+
119
+ if cancelled():
120
+ # Drain whatever is already in the queue
121
+ try:
122
+ while True:
123
+ nxt = save_q.get_nowait()
150
124
  if isinstance(nxt, StopToken):
151
125
  stop_count += 1
152
- return
153
-
154
- # --- stage: chapter worker ---
155
- sem = asyncio.Semaphore(self.workers)
126
+ else:
127
+ batch.append(nxt)
128
+ except asyncio.QueueEmpty:
129
+ pass
130
+ # Final flush of everything
131
+ await flush_batch()
132
+ # Wait for remaining STOPs so chapter workers can finish.
133
+ while stop_count < self._workers:
134
+ nxt = await save_q.get()
135
+ if isinstance(nxt, StopToken):
136
+ stop_count += 1
137
+ return
156
138
 
157
- async def chapter_worker() -> None:
158
- """
159
- Fetch + parse with retry, then enqueue to save_q.
139
+ # --- stage: chapter worker ---
140
+ async def chapter_worker() -> None:
141
+ """
142
+ Fetch + parse with retry, then enqueue to save_q.
143
+
144
+ Exits on STOP, or early if cancel is set before starting a new fetch.
145
+ """
146
+ while True:
147
+ cid = await cid_q.get()
148
+ if isinstance(cid, StopToken):
149
+ # Propagate one STOP to storage and exit.
150
+ await save_q.put(STOP)
151
+ return
160
152
 
161
- Exits on STOP, or early if cancel is set before starting a new fetch.
162
- """
163
- while True:
164
- cid = await cid_q.get()
165
- if isinstance(cid, StopToken):
166
- # Propagate one STOP to storage and exit.
167
- await save_q.put(STOP)
168
- return
153
+ # If cancelled, don't start a new network call; let storage finish.
154
+ if cancelled():
155
+ await save_q.put(STOP)
156
+ return
169
157
 
170
- if not cid or cid in ignore_set:
171
- # Ignore silently and continue.
172
- continue
158
+ chap = await self._process_chapter(book_id, cid, html_dir)
159
+ if chap:
160
+ await save_q.put(chap)
173
161
 
174
- # If cancelled, don't start a new network call; let storage finish.
175
- if cancelled():
176
- await save_q.put(STOP)
177
- return
162
+ # polite pacing
163
+ await async_jitter_sleep(
164
+ self._request_interval,
165
+ mul_spread=1.1,
166
+ max_sleep=self._request_interval + 2,
167
+ )
178
168
 
179
- async with sem:
180
- chap = await self._process_chapter(book_id, cid, html_dir)
181
- if chap:
182
- await save_q.put(chap)
169
+ # --- stage: producer ---
170
+ async def producer() -> None:
171
+ """
172
+ Enqueue chapter IDs (respecting start/end/skip_existing).
183
173
 
184
- # polite pacing
185
- await async_jitter_sleep(
186
- self.request_interval,
187
- mul_spread=1.1,
188
- max_sleep=self.request_interval + 2,
189
- )
190
-
191
- # --- stage: producer ---
192
- async def producer() -> None:
193
- """
194
- Enqueue chapter IDs (respecting start/end/skip_existing).
195
-
196
- Always sends STOP x workers at the end (even if cancelled early),
197
- so chapter workers can exit deterministically.
198
- """
199
- try:
200
- async for cid in self._chapter_ids(vols, start_id, end_id):
201
- if cancelled():
202
- break
203
- if self.skip_existing and chapter_storage.exists(cid):
204
- # Count as completed but don't enqueue.
205
- await progress.bump(1)
206
- else:
207
- await cid_q.put(cid)
208
- finally:
209
- for _ in range(self.workers):
210
- await cid_q.put(STOP)
211
-
212
- # --- run the pipeline ---
174
+ Always sends STOP x workers at the end (even if cancelled early),
175
+ so chapter workers can exit deterministically.
176
+ """
177
+ try:
178
+ for cid in plan:
179
+ if cancelled():
180
+ break
181
+ if self._skip_existing and storage.exists(cid):
182
+ # Count as completed but don't enqueue.
183
+ await progress.bump(1)
184
+ else:
185
+ await cid_q.put(cid)
186
+ finally:
187
+ for _ in range(self._workers):
188
+ await cid_q.put(STOP)
189
+
190
+ # --- run the pipeline ---
191
+ with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
213
192
  async with asyncio.TaskGroup() as tg:
214
193
  tg.create_task(storage_worker())
215
- for _ in range(self.workers):
194
+ for _ in range(self._workers):
216
195
  tg.create_task(chapter_worker())
217
196
  tg.create_task(producer())
218
197
 
219
- # --- done ---
220
- if cancelled():
221
- self.logger.info(
222
- "%s Novel '%s' cancelled: flushed %d/%d chapters.",
223
- TAG,
224
- book_info.get("book_name", "unknown"),
225
- progress.done,
226
- progress.total,
227
- )
228
- else:
229
- self.logger.info(
230
- "%s Novel '%s' download completed.",
231
- TAG,
232
- book_info.get("book_name", "unknown"),
233
- )
234
-
235
- finally:
236
- chapter_storage.close()
198
+ # --- done ---
199
+ if cancelled():
200
+ self.logger.info(
201
+ "%s Novel '%s' cancelled: flushed %d/%d chapters.",
202
+ TAG,
203
+ book_info.get("book_name", "unknown"),
204
+ progress.done,
205
+ progress.total,
206
+ )
207
+ else:
208
+ self.logger.info(
209
+ "%s Novel '%s' download completed.",
210
+ TAG,
211
+ book_info.get("book_name", "unknown"),
212
+ )
237
213
 
238
214
  async def _process_chapter(
239
215
  self,
@@ -247,7 +223,7 @@ class CommonDownloader(BaseDownloader):
247
223
 
248
224
  :return: ChapterDict on success, or None on failure.
249
225
  """
250
- for attempt in range(self.retry_times + 1):
226
+ for attempt in range(self._retry_times + 1):
251
227
  try:
252
228
  html_list = await self.fetcher.get_book_chapter(book_id, cid)
253
229
  self._save_html_pages(html_dir, cid, html_list)
@@ -258,11 +234,11 @@ class CommonDownloader(BaseDownloader):
258
234
  raise ValueError("Empty parse result")
259
235
  return chap
260
236
  except Exception as e:
261
- if attempt < self.retry_times:
237
+ if attempt < self._retry_times:
262
238
  self.logger.info(
263
239
  "[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
264
240
  )
265
- backoff = self.backoff_factor * (2**attempt)
241
+ backoff = self._backoff_factor * (2**attempt)
266
242
  await async_jitter_sleep(
267
243
  base=backoff, mul_spread=1.2, max_sleep=backoff + 3
268
244
  )
@@ -13,25 +13,13 @@ from typing import Any
13
13
 
14
14
  from novel_downloader.core.downloaders.base import BaseDownloader
15
15
  from novel_downloader.core.downloaders.registry import register_downloader
16
- from novel_downloader.core.downloaders.signals import (
17
- STOP,
18
- Progress,
19
- StopToken,
20
- )
21
- from novel_downloader.core.interfaces import (
22
- FetcherProtocol,
23
- ParserProtocol,
24
- )
16
+ from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
25
17
  from novel_downloader.models import (
26
18
  BookConfig,
27
19
  BookInfoDict,
28
20
  ChapterDict,
29
- DownloaderConfig,
30
- )
31
- from novel_downloader.utils import (
32
- ChapterStorage,
33
- async_jitter_sleep,
34
21
  )
22
+ from novel_downloader.utils import ChapterStorage, async_jitter_sleep
35
23
 
36
24
 
37
25
  @register_downloader(site_keys=["qianbi"])
@@ -43,16 +31,6 @@ class QianbiDownloader(BaseDownloader):
43
31
  each chapter as a unit (fetch -> parse -> enqueue storage).
44
32
  """
45
33
 
46
- DEFAULT_SOURCE_ID = 0
47
-
48
- def __init__(
49
- self,
50
- fetcher: FetcherProtocol,
51
- parser: ParserProtocol,
52
- config: DownloaderConfig,
53
- ):
54
- super().__init__(fetcher, parser, config, "qianbi")
55
-
56
34
  async def _download_one(
57
35
  self,
58
36
  book: BookConfig,
@@ -77,16 +55,10 @@ class QianbiDownloader(BaseDownloader):
77
55
  raw_base.mkdir(parents=True, exist_ok=True)
78
56
  html_dir = self._debug_dir / book_id / "html"
79
57
 
80
- chapter_storage = ChapterStorage(
81
- raw_base=raw_base,
82
- priorities=self.PRIORITIES_MAP,
83
- )
84
- chapter_storage.connect()
85
-
86
58
  def cancelled() -> bool:
87
59
  return bool(cancel_event and cancel_event.is_set())
88
60
 
89
- try:
61
+ with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
90
62
  # --- metadata ---
91
63
  book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
92
64
  if not book_info:
@@ -95,28 +67,32 @@ class QianbiDownloader(BaseDownloader):
95
67
  book_info = await self._repair_chapter_ids(
96
68
  book_id,
97
69
  book_info,
98
- chapter_storage,
70
+ storage,
99
71
  html_dir,
100
72
  )
101
73
 
102
74
  vols = book_info["volumes"]
103
- total_chapters = sum(len(v["chapters"]) for v in vols)
104
- if total_chapters == 0:
105
- self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
75
+ plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
76
+ if not plan:
77
+ self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
106
78
  return
107
79
 
108
- progress = Progress(total_chapters, progress_hook)
80
+ progress = Progress(total=len(plan), hook=progress_hook)
109
81
 
110
82
  # --- queues & batching ---
111
- cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
112
- save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
83
+ cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(
84
+ maxsize=self._workers * 2
85
+ )
86
+ save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
87
+ maxsize=self._workers * 2
88
+ )
113
89
  batch: list[ChapterDict] = []
114
90
 
115
91
  async def flush_batch() -> None:
116
92
  if not batch:
117
93
  return
118
94
  try:
119
- chapter_storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
95
+ storage.upsert_chapters(batch, self.DEFAULT_SOURCE_ID)
120
96
  except Exception as e:
121
97
  self.logger.error(
122
98
  "[Storage] batch upsert failed (size=%d): %s",
@@ -144,7 +120,7 @@ class QianbiDownloader(BaseDownloader):
144
120
  item = await save_q.get()
145
121
  if isinstance(item, StopToken):
146
122
  stop_count += 1
147
- if stop_count == self.workers:
123
+ if stop_count == self._workers:
148
124
  # All chapter workers have exited.
149
125
  await flush_batch()
150
126
  return
@@ -153,7 +129,7 @@ class QianbiDownloader(BaseDownloader):
153
129
 
154
130
  # Normal chapter
155
131
  batch.append(item)
156
- if len(batch) >= self.storage_batch_size:
132
+ if len(batch) >= self._storage_batch_size:
157
133
  await flush_batch()
158
134
 
159
135
  if cancelled():
@@ -170,15 +146,13 @@ class QianbiDownloader(BaseDownloader):
170
146
  # Final flush of everything
171
147
  await flush_batch()
172
148
  # Wait for remaining STOPs so chapter workers can finish.
173
- while stop_count < self.workers:
149
+ while stop_count < self._workers:
174
150
  nxt = await save_q.get()
175
151
  if isinstance(nxt, StopToken):
176
152
  stop_count += 1
177
153
  return
178
154
 
179
155
  # --- stage: chapter worker ---
180
- sem = asyncio.Semaphore(self.workers)
181
-
182
156
  async def chapter_worker() -> None:
183
157
  """
184
158
  Fetch + parse with retry, then enqueue to save_q.
@@ -192,25 +166,20 @@ class QianbiDownloader(BaseDownloader):
192
166
  await save_q.put(STOP)
193
167
  return
194
168
 
195
- if not cid or cid in ignore_set:
196
- # Ignore silently and continue.
197
- continue
198
-
199
169
  # If cancelled, don't start a new network call; let storage finish.
200
170
  if cancelled():
201
171
  await save_q.put(STOP)
202
172
  return
203
173
 
204
- async with sem:
205
- chap = await self._process_chapter(book_id, cid, html_dir)
174
+ chap = await self._process_chapter(book_id, cid, html_dir)
206
175
  if chap:
207
176
  await save_q.put(chap)
208
177
 
209
178
  # polite pacing
210
179
  await async_jitter_sleep(
211
- self.request_interval,
180
+ self._request_interval,
212
181
  mul_spread=1.1,
213
- max_sleep=self.request_interval + 2,
182
+ max_sleep=self._request_interval + 2,
214
183
  )
215
184
 
216
185
  # --- stage: producer ---
@@ -221,22 +190,22 @@ class QianbiDownloader(BaseDownloader):
221
190
  so chapter workers can exit deterministically.
222
191
  """
223
192
  try:
224
- async for cid in self._chapter_ids(vols, start_id, end_id):
193
+ for cid in plan:
225
194
  if cancelled():
226
195
  break
227
- if self.skip_existing and chapter_storage.exists(cid):
196
+ if self._skip_existing and storage.exists(cid):
228
197
  # Count as completed but don't enqueue.
229
198
  await progress.bump(1)
230
199
  else:
231
200
  await cid_q.put(cid)
232
201
  finally:
233
- for _ in range(self.workers):
202
+ for _ in range(self._workers):
234
203
  await cid_q.put(STOP)
235
204
 
236
205
  # --- run the pipeline ---
237
206
  async with asyncio.TaskGroup() as tg:
238
207
  tg.create_task(storage_worker())
239
- for _ in range(self.workers):
208
+ for _ in range(self._workers):
240
209
  tg.create_task(chapter_worker())
241
210
  tg.create_task(producer())
242
211
 
@@ -256,9 +225,6 @@ class QianbiDownloader(BaseDownloader):
256
225
  book_info.get("book_name", "unknown"),
257
226
  )
258
227
 
259
- finally:
260
- chapter_storage.close()
261
-
262
228
  async def _repair_chapter_ids(
263
229
  self,
264
230
  book_id: str,
@@ -295,9 +261,9 @@ class QianbiDownloader(BaseDownloader):
295
261
  continue
296
262
  storage.upsert_chapter(data, self.DEFAULT_SOURCE_ID)
297
263
  await async_jitter_sleep(
298
- self.request_interval,
264
+ self._request_interval,
299
265
  mul_spread=1.1,
300
- max_sleep=self.request_interval + 2,
266
+ max_sleep=self._request_interval + 2,
301
267
  )
302
268
 
303
269
  next_cid = data.get("extra", {}).get("next_chapter_id")
@@ -331,7 +297,7 @@ class QianbiDownloader(BaseDownloader):
331
297
 
332
298
  :return: ChapterDict on success, or None on failure.
333
299
  """
334
- for attempt in range(self.retry_times + 1):
300
+ for attempt in range(self._retry_times + 1):
335
301
  try:
336
302
  html_list = await self.fetcher.get_book_chapter(book_id, cid)
337
303
  self._save_html_pages(html_dir, cid, html_list)
@@ -342,9 +308,9 @@ class QianbiDownloader(BaseDownloader):
342
308
  raise ValueError("Empty parse result")
343
309
  return chap
344
310
  except Exception as e:
345
- if attempt < self.retry_times:
311
+ if attempt < self._retry_times:
346
312
  self.logger.info(f"[ChapterWorker] Retry {cid} ({attempt+1}): {e}")
347
- backoff = self.backoff_factor * (2**attempt)
313
+ backoff = self._backoff_factor * (2**attempt)
348
314
  await async_jitter_sleep(
349
315
  base=backoff, mul_spread=1.2, max_sleep=backoff + 3
350
316
  )