novel-downloader 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. novel_downloader/__init__.py +1 -2
  2. novel_downloader/cli/__init__.py +0 -1
  3. novel_downloader/cli/clean.py +2 -10
  4. novel_downloader/cli/download.py +18 -22
  5. novel_downloader/cli/interactive.py +0 -1
  6. novel_downloader/cli/main.py +1 -3
  7. novel_downloader/cli/settings.py +8 -8
  8. novel_downloader/config/__init__.py +0 -1
  9. novel_downloader/config/adapter.py +48 -18
  10. novel_downloader/config/loader.py +116 -108
  11. novel_downloader/config/models.py +41 -32
  12. novel_downloader/config/site_rules.py +2 -4
  13. novel_downloader/core/__init__.py +0 -1
  14. novel_downloader/core/downloaders/__init__.py +4 -4
  15. novel_downloader/core/downloaders/base/__init__.py +14 -0
  16. novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
  17. novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
  18. novel_downloader/core/downloaders/biquge/__init__.py +12 -0
  19. novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
  20. novel_downloader/core/downloaders/common/__init__.py +14 -0
  21. novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
  22. novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +34 -23
  23. novel_downloader/core/downloaders/qidian/__init__.py +10 -0
  24. novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +80 -64
  25. novel_downloader/core/factory/__init__.py +4 -5
  26. novel_downloader/core/factory/{downloader_factory.py → downloader.py} +36 -35
  27. novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
  28. novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
  29. novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
  30. novel_downloader/core/interfaces/__init__.py +8 -9
  31. novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
  32. novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +26 -12
  33. novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
  34. novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
  35. novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
  36. novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +34 -17
  37. novel_downloader/core/parsers/__init__.py +5 -4
  38. novel_downloader/core/parsers/{base_parser.py → base.py} +20 -11
  39. novel_downloader/core/parsers/biquge/__init__.py +10 -0
  40. novel_downloader/core/parsers/biquge/main_parser.py +126 -0
  41. novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
  42. novel_downloader/core/parsers/{common_parser → common}/helper.py +20 -18
  43. novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
  44. novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
  45. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
  46. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +41 -49
  47. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
  48. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
  49. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +16 -12
  50. novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
  51. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +37 -45
  52. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
  53. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
  54. novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +16 -12
  55. novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
  56. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
  57. novel_downloader/core/parsers/qidian/shared/book_info_parser.py +150 -0
  58. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +9 -10
  59. novel_downloader/core/requesters/__init__.py +9 -5
  60. novel_downloader/core/requesters/base/__init__.py +16 -0
  61. novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +180 -73
  62. novel_downloader/core/requesters/base/browser.py +340 -0
  63. novel_downloader/core/requesters/base/session.py +364 -0
  64. novel_downloader/core/requesters/biquge/__init__.py +12 -0
  65. novel_downloader/core/requesters/biquge/session.py +90 -0
  66. novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
  67. novel_downloader/core/requesters/common/async_session.py +96 -0
  68. novel_downloader/core/requesters/common/session.py +113 -0
  69. novel_downloader/core/requesters/qidian/__init__.py +21 -0
  70. novel_downloader/core/requesters/qidian/broswer.py +306 -0
  71. novel_downloader/core/requesters/qidian/session.py +287 -0
  72. novel_downloader/core/savers/__init__.py +5 -3
  73. novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
  74. novel_downloader/core/savers/biquge.py +25 -0
  75. novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
  76. novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +24 -52
  77. novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
  78. novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
  79. novel_downloader/core/savers/epub_utils/__init__.py +0 -1
  80. novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
  81. novel_downloader/core/savers/epub_utils/initializer.py +4 -5
  82. novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
  83. novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
  84. novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
  85. novel_downloader/locales/en.json +12 -4
  86. novel_downloader/locales/zh.json +9 -1
  87. novel_downloader/resources/config/settings.toml +88 -0
  88. novel_downloader/utils/cache.py +2 -2
  89. novel_downloader/utils/chapter_storage.py +340 -0
  90. novel_downloader/utils/constants.py +8 -5
  91. novel_downloader/utils/crypto_utils.py +3 -3
  92. novel_downloader/utils/file_utils/__init__.py +0 -1
  93. novel_downloader/utils/file_utils/io.py +12 -17
  94. novel_downloader/utils/file_utils/normalize.py +1 -3
  95. novel_downloader/utils/file_utils/sanitize.py +2 -9
  96. novel_downloader/utils/fontocr/__init__.py +0 -1
  97. novel_downloader/utils/fontocr/ocr_v1.py +19 -22
  98. novel_downloader/utils/fontocr/ocr_v2.py +147 -60
  99. novel_downloader/utils/hash_store.py +19 -20
  100. novel_downloader/utils/hash_utils.py +0 -1
  101. novel_downloader/utils/i18n.py +3 -4
  102. novel_downloader/utils/logger.py +5 -6
  103. novel_downloader/utils/model_loader.py +5 -8
  104. novel_downloader/utils/network.py +9 -10
  105. novel_downloader/utils/state.py +6 -7
  106. novel_downloader/utils/text_utils/__init__.py +0 -1
  107. novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
  108. novel_downloader/utils/text_utils/diff_display.py +0 -1
  109. novel_downloader/utils/text_utils/font_mapping.py +1 -4
  110. novel_downloader/utils/text_utils/text_cleaning.py +0 -1
  111. novel_downloader/utils/time_utils/__init__.py +0 -1
  112. novel_downloader/utils/time_utils/datetime_utils.py +9 -11
  113. novel_downloader/utils/time_utils/sleep_utils.py +27 -13
  114. {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
  115. novel_downloader-1.3.0.dist-info/RECORD +127 -0
  116. {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
  117. novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +0 -95
  118. novel_downloader/core/requesters/base_browser.py +0 -210
  119. novel_downloader/core/requesters/base_session.py +0 -243
  120. novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
  121. novel_downloader/core/requesters/common_requester/common_session.py +0 -126
  122. novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
  123. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -377
  124. novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
  125. novel_downloader/resources/config/settings.yaml +0 -76
  126. novel_downloader-1.2.1.dist-info/RECORD +0 -115
  127. {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
  128. {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
  129. {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.common_parser.helpers
5
- ---------------------------------------------------
3
+ novel_downloader.core.parsers.common.helpers
4
+ --------------------------------------------
6
5
 
7
6
  Shared utility functions for parsing Common pages.
8
7
  """
9
8
 
10
9
  import logging
11
10
  import re
12
- from typing import Any, Dict, Iterable, Iterator, List, Optional, cast
11
+ from collections.abc import Iterable, Iterator
12
+ from typing import Any, cast
13
13
 
14
14
  from bs4 import BeautifulSoup, Tag
15
15
 
@@ -47,7 +47,7 @@ class HTMLExtractor:
47
47
  self._html = html
48
48
  self._soup = html_to_soup(html)
49
49
 
50
- def extract_book_info(self, rules: BookInfoRules) -> Dict[str, Any]:
50
+ def extract_book_info(self, rules: BookInfoRules) -> dict[str, Any]:
51
51
  """
52
52
  Extract structured book information from HTML according to the given rules.
53
53
 
@@ -56,7 +56,7 @@ class HTMLExtractor:
56
56
  :param rules: Extraction configuration specifying how to extract.
57
57
  :return: A dictionary containing extracted book information.
58
58
  """
59
- book_info: Dict[str, Any] = {}
59
+ book_info: dict[str, Any] = {}
60
60
 
61
61
  for field_name, field_rules in rules.items():
62
62
  if field_rules is None:
@@ -72,7 +72,7 @@ class HTMLExtractor:
72
72
 
73
73
  return book_info
74
74
 
75
- def extract_field(self, steps: List[RuleStep]) -> str:
75
+ def extract_field(self, steps: list[RuleStep]) -> str:
76
76
  """
77
77
  Execute a list of extraction steps on the given HTML.
78
78
 
@@ -188,7 +188,7 @@ class HTMLExtractor:
188
188
  current = sep.join(current)
189
189
 
190
190
  elif t == "attr":
191
- name = step.get("attr")
191
+ name = step.get("attr") or ""
192
192
  if isinstance(current, list):
193
193
  current = [elem.get(name, "") for elem in current]
194
194
  elif isinstance(current, Tag):
@@ -209,16 +209,16 @@ class HTMLExtractor:
209
209
  return str(current.get_text().strip())
210
210
  return str(current or "").strip()
211
211
 
212
- def extract_mixed_volumes(self, volume_rule: VolumesRules) -> List[Dict[str, Any]]:
212
+ def extract_mixed_volumes(self, volume_rule: VolumesRules) -> list[dict[str, Any]]:
213
213
  """
214
214
  Special mode: mixed <volume> and <chapter> under same parent.
215
215
  (e.g., dt / dd pattern in BiQuGe)
216
216
  """
217
217
  list_selector = volume_rule.get("list_selector")
218
218
  volume_selector = volume_rule.get("volume_selector")
219
- chapter_selector = volume_rule.get("chapter_selector")
220
219
  volume_name_steps = volume_rule.get("volume_name_steps")
221
- chapter_steps_list = volume_rule.get("chapter_steps")
220
+ chapter_selector = volume_rule["chapter_selector"]
221
+ chapter_steps_list = volume_rule["chapter_steps"]
222
222
 
223
223
  if not (
224
224
  list_selector and volume_selector and chapter_selector and volume_name_steps
@@ -228,8 +228,8 @@ class HTMLExtractor:
228
228
  "chapter_selector 和 volume_name_steps"
229
229
  )
230
230
 
231
- volumes: List[Dict[str, Any]] = []
232
- current_volume: Optional[Dict[str, Any]] = None
231
+ volumes: list[dict[str, Any]] = []
232
+ current_volume: dict[str, Any] | None = None
233
233
  if not chapter_steps_list:
234
234
  chapter_steps_list = []
235
235
  chapter_info_steps = {item["key"]: item["steps"] for item in chapter_steps_list}
@@ -241,6 +241,8 @@ class HTMLExtractor:
241
241
  for elem in list_area.find_all(
242
242
  [volume_selector, chapter_selector], recursive=True
243
243
  ):
244
+ if not isinstance(elem, Tag):
245
+ continue
244
246
  if elem.name == volume_selector:
245
247
  extractor = HTMLExtractor(str(elem))
246
248
  volume_name = extractor.extract_field(volume_name_steps)
@@ -256,10 +258,10 @@ class HTMLExtractor:
256
258
 
257
259
  return volumes
258
260
 
259
- def extract_volume_blocks(self, volume_rule: VolumesRules) -> List[Dict[str, Any]]:
260
- volume_selector = volume_rule["volume_selector"]
261
+ def extract_volume_blocks(self, volume_rule: VolumesRules) -> list[dict[str, Any]]:
262
+ volume_selector = volume_rule.get("volume_selector")
263
+ volume_name_steps = volume_rule.get("volume_name_steps")
261
264
  chapter_selector = volume_rule["chapter_selector"]
262
- volume_name_steps = volume_rule["volume_name_steps"]
263
265
  chapter_steps_list = volume_rule["chapter_steps"]
264
266
  if not (volume_selector and volume_name_steps):
265
267
  raise ValueError(
@@ -283,7 +285,7 @@ class HTMLExtractor:
283
285
 
284
286
  return volumes
285
287
 
286
- def extract_flat_chapters(self, volume_rule: VolumesRules) -> List[Dict[str, Any]]:
288
+ def extract_flat_chapters(self, volume_rule: VolumesRules) -> list[dict[str, Any]]:
287
289
  chapter_selector = volume_rule["chapter_selector"]
288
290
  chapter_steps_list = volume_rule["chapter_steps"]
289
291
  volume_selector = volume_rule.get("volume_selector")
@@ -310,7 +312,7 @@ class HTMLExtractor:
310
312
 
311
313
  def extract_volumes_structure(
312
314
  self, volume_rule: VolumesRules
313
- ) -> List[Dict[str, Any]]:
315
+ ) -> list[dict[str, Any]]:
314
316
  volume_mode = volume_rule.get("volume_mode", "normal")
315
317
  if volume_mode == "mixed":
316
318
  return self.extract_mixed_volumes(volume_rule)
@@ -1,18 +1,18 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.common_parser.main_parser
5
- -------------------------------------------------------
3
+ novel_downloader.core.parsers.common.main_parser
4
+ ------------------------------------------------
6
5
 
7
6
  This package provides parsing components for handling
8
7
  Common pages.
9
8
  """
10
9
 
11
- from typing import Any, Dict
10
+ from typing import Any
12
11
 
13
12
  from novel_downloader.config import ParserConfig, SiteRules
13
+ from novel_downloader.core.parsers.base import BaseParser
14
+ from novel_downloader.utils.chapter_storage import ChapterDict
14
15
 
15
- from ..base_parser import BaseParser
16
16
  from .helper import HTMLExtractor
17
17
 
18
18
 
@@ -35,7 +35,7 @@ class CommonParser(BaseParser):
35
35
  self._site = site
36
36
  self._site_rule = site_rule
37
37
 
38
- def parse_book_info(self, html_str: str) -> Dict[str, Any]:
38
+ def parse_book_info(self, html_str: str) -> dict[str, Any]:
39
39
  """
40
40
  Parse a book info page and extract metadata and chapter structure.
41
41
 
@@ -46,7 +46,11 @@ class CommonParser(BaseParser):
46
46
  rules = self._site_rule["book_info"]
47
47
  return extractor.extract_book_info(rules)
48
48
 
49
- def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
49
+ def parse_chapter(
50
+ self,
51
+ html_str: str,
52
+ chapter_id: str,
53
+ ) -> ChapterDict | None:
50
54
  """
51
55
  Parse a single chapter page and extract clean text or simplified HTML.
52
56
 
@@ -66,13 +70,15 @@ class CommonParser(BaseParser):
66
70
  title = extractor.extract_field(title_steps["steps"]) if title_steps else ""
67
71
  content = extractor.extract_field(content_steps["steps"])
68
72
  if not content:
69
- return {}
73
+ return None
70
74
 
71
75
  return {
72
76
  "id": chapter_id,
73
77
  "title": title or "Untitled",
74
78
  "content": content,
75
- "site": self._site,
79
+ "extra": {
80
+ "site": self._site,
81
+ },
76
82
  }
77
83
 
78
84
  @property
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser
5
- -------------------------------------------
3
+ novel_downloader.core.parsers.qidian
4
+ ------------------------------------
6
5
 
7
6
  This package provides parsing implementations for the Qidian platform.
8
7
 
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser
5
- ---------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser
4
+ --------------------------------------------
6
5
 
7
6
  This package provides parsing components for handling Qidian
8
7
  pages that have been rendered by a browser engine.
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser.chapter_encrypted
5
- ---------------------------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser.chapter_encrypted
4
+ --------------------------------------------------------------
6
5
 
7
6
  Support for parsing encrypted chapters from Qidian using font OCR mapping,
8
7
  CSS rules, and custom rendering logic.
@@ -19,11 +18,12 @@ from __future__ import annotations
19
18
  import json
20
19
  import logging
21
20
  from pathlib import Path
22
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
21
+ from typing import TYPE_CHECKING, Any
23
22
 
24
23
  import tinycss2
25
24
  from bs4 import BeautifulSoup, Tag
26
25
 
26
+ from novel_downloader.utils.chapter_storage import ChapterDict
27
27
  from novel_downloader.utils.network import download_font_file
28
28
  from novel_downloader.utils.text_utils import apply_font_mapping
29
29
 
@@ -43,7 +43,7 @@ def parse_encrypted_chapter(
43
43
  parser: QidianBrowserParser,
44
44
  soup: BeautifulSoup,
45
45
  chapter_id: str,
46
- ) -> Dict[str, Any]:
46
+ ) -> ChapterDict | None:
47
47
  """
48
48
  Extract and return the formatted textual content of an encrypted chapter.
49
49
 
@@ -61,15 +61,15 @@ def parse_encrypted_chapter(
61
61
  """
62
62
  try:
63
63
  if not (parser._decode_font and parser._font_ocr):
64
- return {}
64
+ return None
65
65
  ssr_data = find_ssr_page_context(soup)
66
66
  chapter_info = extract_chapter_info(ssr_data)
67
67
  if not chapter_info:
68
68
  logger.warning(
69
69
  "[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
70
70
  )
71
- return {}
72
- debug_base_dir: Optional[Path] = None
71
+ return None
72
+ debug_base_dir: Path | None = None
73
73
  if parser._font_debug_dir:
74
74
  debug_base_dir = parser._font_debug_dir / chapter_id
75
75
  debug_base_dir.mkdir(parents=True, exist_ok=True)
@@ -85,10 +85,7 @@ def parse_encrypted_chapter(
85
85
  update_timestamp = chapter_info.get("updateTimestamp", 0)
86
86
  modify_time = chapter_info.get("modifyTime", 0)
87
87
  word_count = chapter_info.get("wordsCount", 0)
88
- vip = bool(chapter_info.get("vipStatus", 0))
89
- is_buy = bool(chapter_info.get("isBuy", 0))
90
88
  seq = chapter_info.get("seq", None)
91
- order = chapter_info.get("chapterOrder", None)
92
89
  volume = chapter_info.get("extra", {}).get("volumeName", "")
93
90
 
94
91
  # extract + save font
@@ -133,7 +130,7 @@ def parse_encrypted_chapter(
133
130
  logger.warning(
134
131
  f"[Parser] No end_number found after parsing chapter '{chapter_id}'"
135
132
  )
136
- return {}
133
+ return None
137
134
 
138
135
  paragraphs_str, refl_list = render_paragraphs(
139
136
  main_paragraphs, paragraphs_rules, end_number
@@ -143,7 +140,7 @@ def parse_encrypted_chapter(
143
140
  paragraphs_str_path.write_text(paragraphs_str, encoding="utf-8")
144
141
 
145
142
  # Run OCR + fallback mapping
146
- char_set = set(c for c in paragraphs_str if c not in {" ", "\n", "\u3000"})
143
+ char_set = {c for c in paragraphs_str if c not in {" ", "\n", "\u3000"}}
147
144
  refl_set = set(refl_list)
148
145
  char_set = char_set - refl_set
149
146
  if debug_base_dir:
@@ -174,33 +171,31 @@ def parse_encrypted_chapter(
174
171
  final_paragraphs_str = "\n\n".join(
175
172
  line.strip() for line in original_text.splitlines() if line.strip()
176
173
  )
177
- chapter_info = {
174
+ return {
178
175
  "id": str(chapter_id),
179
176
  "title": title,
180
177
  "content": final_paragraphs_str,
181
- "author_say": author_say.strip() if author_say else "",
182
- "updated_at": update_time,
183
- "update_timestamp": update_timestamp,
184
- "modify_time": modify_time,
185
- "word_count": word_count,
186
- "vip": vip,
187
- "purchased": is_buy,
188
- "order": order,
189
- "seq": seq,
190
- "volume": volume,
178
+ "extra": {
179
+ "author_say": author_say.strip() if author_say else "",
180
+ "updated_at": update_time,
181
+ "update_timestamp": update_timestamp,
182
+ "modify_time": modify_time,
183
+ "word_count": word_count,
184
+ "seq": seq,
185
+ "volume": volume,
186
+ },
191
187
  }
192
- return chapter_info
193
188
 
194
189
  except Exception as e:
195
190
  logger.warning(
196
191
  "[Parser] parse error for encrypted chapter '%s': %s", chapter_id, e
197
192
  )
198
- return {}
193
+ return None
199
194
 
200
195
 
201
196
  def extract_paragraphs_recursively(
202
197
  soup: BeautifulSoup, chapter_id: str = ""
203
- ) -> List[Dict[str, Any]]:
198
+ ) -> list[dict[str, Any]]:
204
199
  """
205
200
  Extracts paragraph elements under <main id="c-{chapter_id}"> from HTML
206
201
  and converts them to a nested data structure for further processing.
@@ -211,7 +206,7 @@ def extract_paragraphs_recursively(
211
206
  :return list: List of parsed <p> paragraph data.
212
207
  """
213
208
 
214
- def parse_element(elem: Any) -> Union[Dict[str, Any], None]:
209
+ def parse_element(elem: Any) -> dict[str, Any] | None:
215
210
  if not isinstance(elem, Tag):
216
211
  return None
217
212
  result = {"tag": elem.name, "attrs": dict(elem.attrs), "data": []}
@@ -229,7 +224,7 @@ def extract_paragraphs_recursively(
229
224
  if chapter_id:
230
225
  main_id = f"c-{chapter_id}"
231
226
  main_tag = soup.find("main", id=main_id)
232
- if not main_tag:
227
+ if not isinstance(main_tag, Tag):
233
228
  return []
234
229
  else:
235
230
  main_tag = soup
@@ -243,7 +238,7 @@ def extract_paragraphs_recursively(
243
238
  return result
244
239
 
245
240
 
246
- def parse_rule(css_str: str) -> Dict[str, Any]:
241
+ def parse_rule(css_str: str) -> dict[str, Any]:
247
242
  """
248
243
  Parse a CSS string and extract style rules for rendering.
249
244
 
@@ -258,7 +253,7 @@ def parse_rule(css_str: str) -> Dict[str, Any]:
258
253
  :return: Dict with "rules" and "orders" for rendering.
259
254
  """
260
255
 
261
- rules: Dict[str, Any] = {}
256
+ rules: dict[str, Any] = {}
262
257
  orders = []
263
258
 
264
259
  stylesheet = tinycss2.parse_stylesheet(
@@ -322,7 +317,7 @@ def parse_rule(css_str: str) -> Dict[str, Any]:
322
317
  return {"rules": rules, "orders": orders}
323
318
 
324
319
 
325
- def parse_paragraph_names(rules: Dict[str, Any]) -> Set[str]:
320
+ def parse_paragraph_names(rules: dict[str, Any]) -> set[str]:
326
321
  """
327
322
  Extract all paragraph selector names from parsed rules, excluding "sy".
328
323
  """
@@ -335,16 +330,16 @@ def parse_paragraph_names(rules: Dict[str, Any]) -> Set[str]:
335
330
 
336
331
 
337
332
  def parse_end_number(
338
- main_paragraphs: List[Dict[str, Any]], paragraph_names: Set[str]
339
- ) -> Optional[int]:
333
+ main_paragraphs: list[dict[str, Any]], paragraph_names: set[str]
334
+ ) -> int | None:
340
335
  """
341
336
  Find the most frequent numeric suffix from tag names
342
337
  matched by given paragraph prefixes.
343
338
  """
344
- end_numbers: Dict[int, int] = {}
339
+ end_numbers: dict[int, int] = {}
345
340
  sorted_names = sorted(paragraph_names, key=len, reverse=True)
346
341
 
347
- def rec_parse(item: Union[List[Any], Dict[str, Any]]) -> None:
342
+ def rec_parse(item: list[Any] | dict[str, Any]) -> None:
348
343
  if isinstance(item, list):
349
344
  for element in item:
350
345
  rec_parse(element)
@@ -359,7 +354,7 @@ def parse_end_number(
359
354
  end_numbers[num] = end_numbers.get(num, 0) + 1
360
355
  break
361
356
  for val in item.values():
362
- if isinstance(val, (list, dict)):
357
+ if isinstance(val, (list | dict)):
363
358
  rec_parse(val)
364
359
 
365
360
  rec_parse(main_paragraphs)
@@ -381,10 +376,10 @@ def parse_end_number(
381
376
 
382
377
 
383
378
  def render_paragraphs(
384
- main_paragraphs: List[Dict[str, Any]],
385
- rules: Dict[str, Any],
379
+ main_paragraphs: list[dict[str, Any]],
380
+ rules: dict[str, Any],
386
381
  end_number: int,
387
- ) -> Tuple[str, List[str]]:
382
+ ) -> tuple[str, list[str]]:
388
383
  """
389
384
  Applies the parsed CSS rules to the paragraph structure and
390
385
  reconstructs the visible text.
@@ -403,11 +398,11 @@ def render_paragraphs(
403
398
  - A reconstructed paragraph string with line breaks.
404
399
  - A list of mirrored (reflected) characters for later OCR processing.
405
400
  """
406
- orders: List[Tuple[str, str]] = rules.get("orders", [])
401
+ orders: list[tuple[str, str]] = rules.get("orders", [])
407
402
  rules = rules.get("rules", {})
408
- refl_list: List[str] = []
403
+ refl_list: list[str] = []
409
404
 
410
- def apply_rule(data: Dict[str, Any], rule: Dict[str, Any]) -> str:
405
+ def apply_rule(data: dict[str, Any], rule: dict[str, Any]) -> str:
411
406
  if rule.get("delete-all", False):
412
407
  return ""
413
408
 
@@ -418,10 +413,7 @@ def render_paragraphs(
418
413
  curr_str += first_data
419
414
 
420
415
  if rule.get("delete-first", False):
421
- if len(curr_str) <= 1:
422
- curr_str = ""
423
- else:
424
- curr_str = curr_str[1:]
416
+ curr_str = "" if len(curr_str) <= 1 else curr_str[1:]
425
417
 
426
418
  curr_str += rule.get("append-end-char", "")
427
419
 
@@ -480,7 +472,7 @@ def render_paragraphs(
480
472
  logger.debug(f"[parser] not find p_class_str: {class_list}")
481
473
  continue
482
474
  # 普通标签处理,根据 orders 顺序匹配
483
- for ord_selector, ord_id in orders:
475
+ for ord_selector, _ in orders:
484
476
  tag_name = f"{ord_selector}{end_number}"
485
477
  if data.get("tag") != tag_name:
486
478
  continue
@@ -489,7 +481,7 @@ def render_paragraphs(
489
481
  ordered_cache[ord_selector] = apply_rule(data, curr_rule)
490
482
  break
491
483
  # 最后按 orders 顺序拼接
492
- for ord_selector, ord_id in orders:
484
+ for ord_selector, _ in orders:
493
485
  if ord_selector in ordered_cache:
494
486
  paragraphs_str += ordered_cache[ord_selector]
495
487
 
@@ -1,18 +1,18 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser.chapter_normal
5
- ------------------------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser.chapter_normal
4
+ -----------------------------------------------------------
6
5
 
7
6
  Parser logic for extracting readable text from Qidian chapters
8
7
  that use plain (non-encrypted) browser-rendered HTML.
9
8
  """
10
9
 
11
10
  import logging
12
- from typing import Any, Dict
13
11
 
14
12
  from bs4 import BeautifulSoup
15
13
 
14
+ from novel_downloader.utils.chapter_storage import ChapterDict
15
+
16
16
  from ..shared import (
17
17
  extract_chapter_info,
18
18
  find_ssr_page_context,
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
24
24
  def parse_normal_chapter(
25
25
  soup: BeautifulSoup,
26
26
  chapter_id: str,
27
- ) -> Dict[str, Any]:
27
+ ) -> ChapterDict | None:
28
28
  """
29
29
  Extract and format the chapter text from a normal Qidian page.
30
30
  Returns empty string if VIP/encrypted.
@@ -44,7 +44,7 @@ def parse_normal_chapter(
44
44
  main = soup.select_one("div#app div#reader-content main")
45
45
  if not main:
46
46
  logger.warning("[Parser] Main content not found for chapter")
47
- return {}
47
+ return None
48
48
 
49
49
  ssr_data = find_ssr_page_context(soup)
50
50
  chapter_info = extract_chapter_info(ssr_data)
@@ -52,7 +52,7 @@ def parse_normal_chapter(
52
52
  logger.warning(
53
53
  "[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
54
54
  )
55
- return {}
55
+ return None
56
56
 
57
57
  title = chapter_info.get("chapterName", "Untitled")
58
58
  chapter_id = chapter_info.get("chapterId", "")
@@ -61,10 +61,7 @@ def parse_normal_chapter(
61
61
  update_timestamp = chapter_info.get("updateTimestamp", 0)
62
62
  modify_time = chapter_info.get("modifyTime", 0)
63
63
  word_count = chapter_info.get("wordsCount", 0)
64
- vip = bool(chapter_info.get("vipStatus", 0))
65
- is_buy = bool(chapter_info.get("isBuy", 0))
66
64
  seq = chapter_info.get("seq", None)
67
- order = chapter_info.get("chapterOrder", None)
68
65
  volume = chapter_info.get("extra", {}).get("volumeName", "")
69
66
 
70
67
  # remove review spans
@@ -78,20 +75,19 @@ def parse_normal_chapter(
78
75
  "id": str(chapter_id),
79
76
  "title": title,
80
77
  "content": chapter_text,
81
- "author_say": author_say.strip() if author_say else "",
82
- "updated_at": update_time,
83
- "update_timestamp": update_timestamp,
84
- "modify_time": modify_time,
85
- "word_count": word_count,
86
- "vip": vip,
87
- "purchased": is_buy,
88
- "order": order,
89
- "seq": seq,
90
- "volume": volume,
78
+ "extra": {
79
+ "author_say": author_say.strip() if author_say else "",
80
+ "updated_at": update_time,
81
+ "update_timestamp": update_timestamp,
82
+ "modify_time": modify_time,
83
+ "word_count": word_count,
84
+ "seq": seq,
85
+ "volume": volume,
86
+ },
91
87
  }
92
88
 
93
89
  except Exception as e:
94
90
  logger.warning(
95
91
  "[Parser] parse error for normal chapter '%s': %s", chapter_id, e
96
92
  )
97
- return {}
93
+ return None
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser.chapter_router
5
- ------------------------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser.chapter_router
4
+ -----------------------------------------------------------
6
5
 
7
6
  Routing logic for selecting the correct chapter parser for Qidian browser pages.
8
7
 
@@ -13,7 +12,9 @@ routes the parsing task to either the encrypted or normal chapter parser.
13
12
  from __future__ import annotations
14
13
 
15
14
  import logging
16
- from typing import TYPE_CHECKING, Any, Dict
15
+ from typing import TYPE_CHECKING
16
+
17
+ from novel_downloader.utils.chapter_storage import ChapterDict
17
18
 
18
19
  from ..shared import (
19
20
  can_view_chapter,
@@ -32,7 +33,7 @@ def parse_chapter(
32
33
  parser: QidianBrowserParser,
33
34
  html_str: str,
34
35
  chapter_id: str,
35
- ) -> Dict[str, Any]:
36
+ ) -> ChapterDict | None:
36
37
  """
37
38
  Extract and return the formatted textual content of chapter.
38
39
 
@@ -48,11 +49,11 @@ def parse_chapter(
48
49
  logger.warning(
49
50
  "[Parser] Chapter '%s' is not purchased or inaccessible.", chapter_id
50
51
  )
51
- return {}
52
+ return None
52
53
 
53
54
  if is_encrypted(soup):
54
55
  if not parser._decode_font:
55
- return {}
56
+ return None
56
57
  try:
57
58
  from .chapter_encrypted import parse_encrypted_chapter
58
59
 
@@ -62,9 +63,9 @@ def parse_chapter(
62
63
  "[Parser] Encrypted chapter '%s' requires extra dependencies.",
63
64
  chapter_id,
64
65
  )
65
- return {}
66
+ return None
66
67
 
67
68
  return parse_normal_chapter(soup, chapter_id)
68
69
  except Exception as e:
69
70
  logger.warning("[Parser] parse error for chapter '%s': %s", chapter_id, e)
70
- return {}
71
+ return None