novel-downloader 1.2.1__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/PKG-INFO +1 -1
  2. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/__init__.py +1 -1
  3. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/cli/download.py +2 -0
  4. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/config/adapter.py +29 -4
  5. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/config/models.py +7 -4
  6. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/downloaders/common_downloader.py +1 -2
  7. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/downloaders/qidian_downloader.py +1 -2
  8. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/factory/downloader_factory.py +13 -11
  9. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/interfaces/async_requester_protocol.py +4 -1
  10. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/interfaces/requester_protocol.py +4 -1
  11. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/base_parser.py +3 -3
  12. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/common_parser/helper.py +7 -5
  13. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +1 -1
  14. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +3 -3
  15. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +1 -1
  16. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/session/main_parser.py +3 -3
  17. novel_downloader-1.2.2/novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +151 -0
  18. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/shared/helpers.py +2 -2
  19. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/base_async_session.py +4 -1
  20. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/base_browser.py +9 -5
  21. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/base_session.py +4 -1
  22. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/common_requester/common_session.py +2 -2
  23. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +35 -16
  24. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/qidian_requester/qidian_session.py +3 -3
  25. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/common_saver/common_epub.py +1 -1
  26. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/locales/en.json +4 -0
  27. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/locales/zh.json +4 -0
  28. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/constants.py +2 -1
  29. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/time_utils/datetime_utils.py +1 -1
  30. novel_downloader-1.2.2/novel_downloader/utils/time_utils/sleep_utils.py +65 -0
  31. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader.egg-info/PKG-INFO +1 -1
  32. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/pyproject.toml +1 -1
  33. novel_downloader-1.2.1/novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +0 -95
  34. novel_downloader-1.2.1/novel_downloader/utils/time_utils/sleep_utils.py +0 -49
  35. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/LICENSE +0 -0
  36. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/README.md +0 -0
  37. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/cli/__init__.py +0 -0
  38. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/cli/clean.py +0 -0
  39. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/cli/interactive.py +0 -0
  40. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/cli/main.py +0 -0
  41. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/cli/settings.py +0 -0
  42. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/config/__init__.py +0 -0
  43. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/config/loader.py +0 -0
  44. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/config/site_rules.py +0 -0
  45. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/__init__.py +0 -0
  46. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/downloaders/__init__.py +0 -0
  47. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/downloaders/base_async_downloader.py +0 -0
  48. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/downloaders/base_downloader.py +0 -0
  49. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/downloaders/common_asynb_downloader.py +0 -0
  50. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/factory/__init__.py +0 -0
  51. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/factory/parser_factory.py +0 -0
  52. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/factory/requester_factory.py +0 -0
  53. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/factory/saver_factory.py +0 -0
  54. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/interfaces/__init__.py +0 -0
  55. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/interfaces/async_downloader_protocol.py +0 -0
  56. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/interfaces/downloader_protocol.py +0 -0
  57. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/interfaces/parser_protocol.py +0 -0
  58. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/interfaces/saver_protocol.py +0 -0
  59. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/__init__.py +0 -0
  60. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/common_parser/__init__.py +0 -0
  61. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/common_parser/main_parser.py +0 -0
  62. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/__init__.py +0 -0
  63. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/browser/__init__.py +0 -0
  64. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +0 -0
  65. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +0 -0
  66. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/session/__init__.py +0 -0
  67. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +0 -0
  68. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +0 -0
  69. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +0 -0
  70. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/parsers/qidian_parser/shared/__init__.py +0 -0
  71. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/__init__.py +0 -0
  72. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/common_requester/__init__.py +0 -0
  73. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/common_requester/common_async_session.py +0 -0
  74. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/requesters/qidian_requester/__init__.py +0 -0
  75. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/__init__.py +0 -0
  76. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/base_saver.py +0 -0
  77. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/common_saver/__init__.py +0 -0
  78. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/common_saver/common_txt.py +0 -0
  79. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/common_saver/main_saver.py +0 -0
  80. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/epub_utils/__init__.py +0 -0
  81. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/epub_utils/css_builder.py +0 -0
  82. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/epub_utils/initializer.py +0 -0
  83. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/epub_utils/text_to_html.py +0 -0
  84. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/epub_utils/volume_intro.py +0 -0
  85. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/core/savers/qidian_saver.py +0 -0
  86. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/resources/config/rules.toml +0 -0
  87. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/resources/config/settings.yaml +0 -0
  88. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/resources/css_styles/main.css +0 -0
  89. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/resources/css_styles/volume-intro.css +0 -0
  90. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/resources/images/volume_border.png +0 -0
  91. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/resources/js_scripts/qidian_decrypt_node.js +0 -0
  92. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/resources/json/replace_word_map.json +0 -0
  93. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/resources/text/blacklist.txt +0 -0
  94. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/__init__.py +0 -0
  95. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/cache.py +0 -0
  96. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/crypto_utils.py +0 -0
  97. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/file_utils/__init__.py +0 -0
  98. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/file_utils/io.py +0 -0
  99. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/file_utils/normalize.py +0 -0
  100. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/file_utils/sanitize.py +0 -0
  101. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/fontocr/__init__.py +0 -0
  102. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/fontocr/ocr_v1.py +0 -0
  103. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/fontocr/ocr_v2.py +0 -0
  104. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/hash_store.py +0 -0
  105. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/hash_utils.py +0 -0
  106. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/i18n.py +0 -0
  107. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/logger.py +0 -0
  108. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/model_loader.py +0 -0
  109. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/network.py +0 -0
  110. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/state.py +0 -0
  111. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/text_utils/__init__.py +0 -0
  112. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/text_utils/chapter_formatting.py +0 -0
  113. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/text_utils/diff_display.py +0 -0
  114. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/text_utils/font_mapping.py +0 -0
  115. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/text_utils/text_cleaning.py +0 -0
  116. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader/utils/time_utils/__init__.py +0 -0
  117. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader.egg-info/SOURCES.txt +0 -0
  118. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader.egg-info/dependency_links.txt +0 -0
  119. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader.egg-info/entry_points.txt +0 -0
  120. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader.egg-info/requires.txt +0 -0
  121. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/novel_downloader.egg-info/top_level.txt +0 -0
  122. {novel_downloader-1.2.1 → novel_downloader-1.2.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: novel-downloader
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: A command-line tool for downloading Chinese web novels from Qidian and similar platforms.
5
5
  Author-email: Saudade Z <saudadez217@gmail.com>
6
6
  License: MIT License
@@ -7,7 +7,7 @@ novel_downloader
7
7
  Core package for the Novel Downloader project.
8
8
  """
9
9
 
10
- __version__ = "1.2.1"
10
+ __version__ = "1.2.2"
11
11
 
12
12
  __author__ = "Saudade Z"
13
13
  __email__ = "saudadez217@gmail.com"
@@ -57,6 +57,8 @@ def download_cli(ctx: Context, book_ids: List[str], site: str) -> None:
57
57
  parser_cfg = adapter.get_parser_config()
58
58
  saver_cfg = adapter.get_saver_config()
59
59
 
60
+ click.echo(t("download_site_mode", mode=downloader_cfg.mode))
61
+
60
62
  # If no book_ids provided on the command line, try to load them from config
61
63
  if not book_ids:
62
64
  try:
@@ -23,6 +23,7 @@ from .models import (
23
23
  RequesterConfig,
24
24
  SaverConfig,
25
25
  )
26
+ from .site_rules import load_site_rules
26
27
 
27
28
 
28
29
  class ConfigAdapter:
@@ -38,19 +39,43 @@ class ConfigAdapter:
38
39
  self._config = config
39
40
  self._site = site
40
41
 
42
+ site_rules = load_site_rules() # -> Dict[str, SiteRules]
43
+ self._supported_sites = set(site_rules.keys())
44
+
41
45
  def set_site(self, site: str) -> None:
42
46
  """
43
47
  切换当前适配的站点
44
48
  """
45
49
  self._site = site
46
50
 
51
+ def _get_site_cfg(self) -> Dict[str, Any]:
52
+ """
53
+ 统一获取站点配置:
54
+
55
+ 1. 先尝试从 self._config["sites"][self._site] 取配置
56
+ 2. 如果没有配置, 且 self._site 在 self._supported_sites 中, 则取 sites["common"]
57
+ 3. 否则返回空 dict
58
+ """
59
+ sites_cfg = self._config.get("sites", {}) or {}
60
+
61
+ # 1. site-specific config
62
+ if self._site in sites_cfg:
63
+ return sites_cfg[self._site] or {}
64
+
65
+ # 2. fallback to "common" only if site is supported
66
+ if self._site in self._supported_sites:
67
+ return sites_cfg.get("common", {}) or {}
68
+
69
+ # 3. completely unsupported site
70
+ return {}
71
+
47
72
  def get_requester_config(self) -> RequesterConfig:
48
73
  """
49
74
  从 config["requests"] 中读取通用请求配置 (含 DrissionPage 设置)
50
75
  返回 RequesterConfig 实例
51
76
  """
52
77
  req = self._config.get("requests", {})
53
- site_cfg = self._config.get("sites", {}).get(self._site, {})
78
+ site_cfg = self._get_site_cfg()
54
79
  return RequesterConfig(
55
80
  wait_time=req.get("wait_time", 5),
56
81
  retry_times=req.get("retry_times", 3),
@@ -73,7 +98,7 @@ class ConfigAdapter:
73
98
  """
74
99
  gen = self._config.get("general", {})
75
100
  debug = gen.get("debug", {})
76
- site_cfg = self._config.get("sites", {}).get(self._site, {})
101
+ site_cfg = self._get_site_cfg()
77
102
  return DownloaderConfig(
78
103
  request_interval=gen.get("request_interval", 5),
79
104
  raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
@@ -94,7 +119,7 @@ class ConfigAdapter:
94
119
  """
95
120
  gen = self._config.get("general", {})
96
121
  font_ocr = gen.get("font_ocr", {})
97
- site_cfg = self._config.get("sites", {}).get(self._site, {})
122
+ site_cfg = self._get_site_cfg()
98
123
  return ParserConfig(
99
124
  cache_dir=gen.get("cache_dir", "./cache"),
100
125
  decode_font=font_ocr.get("decode_font", False),
@@ -139,7 +164,7 @@ class ConfigAdapter:
139
164
  """
140
165
  从 config["sites"][site]["book_ids"] 中提取目标书籍列表
141
166
  """
142
- site_cfg = self._config.get("sites", {}).get(self._site, {})
167
+ site_cfg = self._get_site_cfg()
143
168
  raw_ids = site_cfg.get("book_ids", [])
144
169
 
145
170
  if isinstance(raw_ids, str):
@@ -135,16 +135,19 @@ class ChapterFieldRules(TypedDict):
135
135
  steps: List[RuleStep]
136
136
 
137
137
 
138
- class VolumesRules(TypedDict, total=False):
139
- has_volume: bool # 是否存在卷,false=未分卷
138
+ class VolumesRulesOptional(TypedDict, total=False):
140
139
  volume_selector: str # 有卷时选择 volume 块的 selector
141
- chapter_selector: str # 选择 chapter 节点的 selector
142
140
  volume_name_steps: List[RuleStep]
143
- chapter_steps: List[ChapterFieldRules] # 提取章节信息的步骤列表
144
141
  volume_mode: str # Optional: "normal" (default) or "mixed"
145
142
  list_selector: str # Optional: If "mixed" mode, parent container selector
146
143
 
147
144
 
145
+ class VolumesRules(VolumesRulesOptional):
146
+ has_volume: bool # 是否存在卷,false=未分卷
147
+ chapter_selector: str # 选择 chapter 节点的 selector
148
+ chapter_steps: List[ChapterFieldRules] # 提取章节信息的步骤列表
149
+
150
+
148
151
  class BookInfoRules(TypedDict, total=False):
149
152
  book_name: FieldRules
150
153
  author: FieldRules
@@ -67,8 +67,7 @@ class CommonDownloader(BaseDownloader):
67
67
  cache_base = self.cache_dir / site / book_id
68
68
  info_path = raw_base / "book_info.json"
69
69
  chapter_dir = raw_base / "chapters"
70
- if save_html:
71
- chapters_html_dir = cache_base / "html"
70
+ chapters_html_dir = cache_base / "html"
72
71
 
73
72
  raw_base.mkdir(parents=True, exist_ok=True)
74
73
  chapter_dir.mkdir(parents=True, exist_ok=True)
@@ -87,8 +87,7 @@ class QidianDownloader(BaseDownloader):
87
87
  info_path = raw_base / "book_info.json"
88
88
  chapter_dir = raw_base / "chapters"
89
89
  encrypted_chapter_dir = raw_base / "encrypted_chapters"
90
- if save_html:
91
- chapters_html_dir = cache_base / "html"
90
+ chapters_html_dir = cache_base / "html"
92
91
 
93
92
  raw_base.mkdir(parents=True, exist_ok=True)
94
93
  chapter_dir.mkdir(parents=True, exist_ok=True)
@@ -14,7 +14,7 @@ based on the site name and parser mode specified in the configuration.
14
14
  To add support for new sites or modes, extend the `_site_map` accordingly.
15
15
  """
16
16
 
17
- from typing import Union
17
+ from typing import Union, cast
18
18
 
19
19
  from novel_downloader.config import DownloaderConfig, load_site_rules
20
20
  from novel_downloader.core.downloaders import (
@@ -137,13 +137,15 @@ def get_downloader(
137
137
  :raises TypeError: If the provided requester does not match the required protocol
138
138
  for the chosen mode (sync vs async).
139
139
  """
140
- mode = config.mode.lower()
141
- if mode == "async":
142
- if not isinstance(requester, AsyncRequesterProtocol):
143
- raise TypeError("Async mode requires an AsyncRequesterProtocol")
144
- return get_async_downloader(requester, parser, saver, site, config)
145
- if mode in ("browser", "session"):
146
- if not isinstance(requester, RequesterProtocol):
147
- raise TypeError("Sync mode requires a RequesterProtocol")
148
- return get_sync_downloader(requester, parser, saver, site, config)
149
- raise ValueError(f"Unknown mode '{config.mode}' for site '{site}'")
140
+ if requester.is_async():
141
+ if config.mode.lower() != "async":
142
+ raise TypeError("Requester is async, but config.mode is not 'async'")
143
+ async_requester = cast(AsyncRequesterProtocol, requester)
144
+ return get_async_downloader(async_requester, parser, saver, site, config)
145
+ else:
146
+ if config.mode.lower() not in ("browser", "session"):
147
+ raise TypeError(
148
+ "Requester is sync, but config.mode is not 'browser' or 'session'"
149
+ )
150
+ sync_requester = cast(RequesterProtocol, requester)
151
+ return get_sync_downloader(sync_requester, parser, saver, site, config)
@@ -9,7 +9,7 @@ for book info pages, individual chapters, managing request lifecycle,
9
9
  and optionally retrieving a user's authenticated bookcase — all in async style.
10
10
  """
11
11
 
12
- from typing import Optional, Protocol, runtime_checkable
12
+ from typing import Literal, Optional, Protocol, runtime_checkable
13
13
 
14
14
 
15
15
  @runtime_checkable
@@ -21,6 +21,9 @@ class AsyncRequesterProtocol(Protocol):
21
21
  and manage login/shutdown asynchronously.
22
22
  """
23
23
 
24
+ def is_async(self) -> Literal[True]:
25
+ ...
26
+
24
27
  async def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
25
28
  """
26
29
  Attempt to log in asynchronously.
@@ -9,7 +9,7 @@ for book info pages, individual chapters, managing request lifecycle,
9
9
  and optionally retrieving a user's authenticated bookcase.
10
10
  """
11
11
 
12
- from typing import Optional, Protocol, runtime_checkable
12
+ from typing import Literal, Optional, Protocol, runtime_checkable
13
13
 
14
14
 
15
15
  @runtime_checkable
@@ -20,6 +20,9 @@ class RequesterProtocol(Protocol):
20
20
  - a specific chapter page.
21
21
  """
22
22
 
23
+ def is_async(self) -> Literal[False]:
24
+ ...
25
+
23
26
  def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
24
27
  """
25
28
  Attempt to log in
@@ -45,14 +45,14 @@ class BaseParser(ParserProtocol, abc.ABC):
45
45
  self._base_cache_dir = Path(config.cache_dir)
46
46
 
47
47
  @abc.abstractmethod
48
- def parse_book_info(self, html: str) -> Dict[str, Any]:
48
+ def parse_book_info(self, html_str: str) -> Dict[str, Any]:
49
49
  """
50
50
  Parse a book info page and extract metadata and chapter structure.
51
51
 
52
52
  Depending on the site structure, the return dict may include a
53
53
  flat `chapters` list or nested `volumes` with chapter groups.
54
54
 
55
- :param html: Raw HTML of the book info page.
55
+ :param html_str: Raw HTML of the book info page.
56
56
  :return: Parsed metadata and chapter structure as a dictionary.
57
57
  """
58
58
  ...
@@ -62,7 +62,7 @@ class BaseParser(ParserProtocol, abc.ABC):
62
62
  """
63
63
  Parse a single chapter page and extract clean text or simplified HTML.
64
64
 
65
- :param html: Raw HTML of the chapter page.
65
+ :param html_str: Raw HTML of the chapter page.
66
66
  :param chapter_id: Identifier of the chapter being parsed.
67
67
  :return: Cleaned chapter content as plain text or minimal HTML.
68
68
  """
@@ -188,7 +188,7 @@ class HTMLExtractor:
188
188
  current = sep.join(current)
189
189
 
190
190
  elif t == "attr":
191
- name = step.get("attr")
191
+ name = step.get("attr") or ""
192
192
  if isinstance(current, list):
193
193
  current = [elem.get(name, "") for elem in current]
194
194
  elif isinstance(current, Tag):
@@ -216,9 +216,9 @@ class HTMLExtractor:
216
216
  """
217
217
  list_selector = volume_rule.get("list_selector")
218
218
  volume_selector = volume_rule.get("volume_selector")
219
- chapter_selector = volume_rule.get("chapter_selector")
220
219
  volume_name_steps = volume_rule.get("volume_name_steps")
221
- chapter_steps_list = volume_rule.get("chapter_steps")
220
+ chapter_selector = volume_rule["chapter_selector"]
221
+ chapter_steps_list = volume_rule["chapter_steps"]
222
222
 
223
223
  if not (
224
224
  list_selector and volume_selector and chapter_selector and volume_name_steps
@@ -241,6 +241,8 @@ class HTMLExtractor:
241
241
  for elem in list_area.find_all(
242
242
  [volume_selector, chapter_selector], recursive=True
243
243
  ):
244
+ if not isinstance(elem, Tag):
245
+ continue
244
246
  if elem.name == volume_selector:
245
247
  extractor = HTMLExtractor(str(elem))
246
248
  volume_name = extractor.extract_field(volume_name_steps)
@@ -257,9 +259,9 @@ class HTMLExtractor:
257
259
  return volumes
258
260
 
259
261
  def extract_volume_blocks(self, volume_rule: VolumesRules) -> List[Dict[str, Any]]:
260
- volume_selector = volume_rule["volume_selector"]
262
+ volume_selector = volume_rule.get("volume_selector")
263
+ volume_name_steps = volume_rule.get("volume_name_steps")
261
264
  chapter_selector = volume_rule["chapter_selector"]
262
- volume_name_steps = volume_rule["volume_name_steps"]
263
265
  chapter_steps_list = volume_rule["chapter_steps"]
264
266
  if not (volume_selector and volume_name_steps):
265
267
  raise ValueError(
@@ -229,7 +229,7 @@ def extract_paragraphs_recursively(
229
229
  if chapter_id:
230
230
  main_id = f"c-{chapter_id}"
231
231
  main_tag = soup.find("main", id=main_id)
232
- if not main_tag:
232
+ if not isinstance(main_tag, Tag):
233
233
  return []
234
234
  else:
235
235
  main_tag = soup
@@ -69,14 +69,14 @@ class QidianBrowserParser(BaseParser):
69
69
  self._font_debug_dir = self._base_cache_dir / "font_debug"
70
70
  self._font_debug_dir.mkdir(parents=True, exist_ok=True)
71
71
 
72
- def parse_book_info(self, html: str) -> Dict[str, Any]:
72
+ def parse_book_info(self, html_str: str) -> Dict[str, Any]:
73
73
  """
74
74
  Parse a book info page and extract metadata and chapter structure.
75
75
 
76
- :param html: Raw HTML of the book info page.
76
+ :param html_str: Raw HTML of the book info page.
77
77
  :return: Parsed metadata and chapter structure as a dictionary.
78
78
  """
79
- return parse_book_info(html)
79
+ return parse_book_info(html_str)
80
80
 
81
81
  def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
82
82
  """
@@ -245,7 +245,7 @@ def extract_paragraphs_recursively(
245
245
  if chapter_id > 0:
246
246
  main_id = f"c-{chapter_id}"
247
247
  main_tag = soup.find("main", id=main_id)
248
- if not main_tag:
248
+ if not isinstance(main_tag, Tag):
249
249
  return []
250
250
  else:
251
251
  main_tag = soup
@@ -72,14 +72,14 @@ class QidianSessionParser(BaseParser):
72
72
  self._font_debug_dir = self._base_cache_dir / "font_debug"
73
73
  self._font_debug_dir.mkdir(parents=True, exist_ok=True)
74
74
 
75
- def parse_book_info(self, html: str) -> Dict[str, Any]:
75
+ def parse_book_info(self, html_str: str) -> Dict[str, Any]:
76
76
  """
77
77
  Parse a book info page and extract metadata and chapter structure.
78
78
 
79
- :param html: Raw HTML of the book info page.
79
+ :param html_str: Raw HTML of the book info page.
80
80
  :return: Parsed metadata and chapter structure as a dictionary.
81
81
  """
82
- return parse_book_info(html)
82
+ return parse_book_info(html_str)
83
83
 
84
84
  def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
85
85
  """
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.parsers.qidian_parser.shared.book_info_parser
5
+ -------------------------------------------------------------------
6
+
7
+ This module provides parsing of Qidian book info pages.
8
+
9
+ It extracts metadata such as title, author, cover URL, update
10
+ time, status, word count, summary, and volume-chapter structure.
11
+ """
12
+
13
+ import logging
14
+ import re
15
+ from typing import Any, Dict
16
+
17
+ from bs4.element import Tag
18
+
19
+ from .helpers import html_to_soup
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def _chapter_url_to_id(url: str) -> str:
25
+ """
26
+ Extract chapterId as the last non-empty segment of the URL.
27
+ """
28
+ return url.rstrip("/").split("/")[-1]
29
+
30
+
31
+ def _get_volume_name(vol_div: Tag) -> str:
32
+ """
33
+ Extracts the volume title from a <div class="volume"> element
34
+ """
35
+ h3 = vol_div.select_one("h3")
36
+ if not h3:
37
+ return ""
38
+ for a in h3.find_all("a"):
39
+ a.decompose()
40
+ text: str = h3.get_text(strip=True)
41
+ return text.split(chr(183))[0].strip()
42
+
43
+
44
+ def safe_select_text(
45
+ soup: Tag,
46
+ selector: str,
47
+ *,
48
+ separator: str = "",
49
+ strip: bool = False,
50
+ default: str = "",
51
+ ) -> str:
52
+ """
53
+ Safely select the first element matching a CSS selector and return its text.
54
+
55
+ :param soup: A BeautifulSoup Tag or sub-tree to query.
56
+ :param selector: A CSS selector string.
57
+ :param separator: Separator to use between strings when joining.
58
+ :param strip: Whether to strip whitespace from the result.
59
+ :param default: Value to return if no element is found.
60
+ :return: The element's text, or `default` if not found.
61
+ """
62
+ tag = soup.select_one(selector)
63
+ return (
64
+ tag.get_text(separator=separator, strip=strip)
65
+ if isinstance(tag, Tag)
66
+ else default
67
+ )
68
+
69
+
70
+ def safe_select_attr(
71
+ soup: Tag,
72
+ selector: str,
73
+ attr: str,
74
+ *,
75
+ default: str = "",
76
+ ) -> str:
77
+ """
78
+ Safely select the first element matching a CSS selector and return one attributes.
79
+
80
+ :param soup: A BeautifulSoup Tag or sub-tree to query.
81
+ :param selector: A CSS selector string.
82
+ :param attr: The attribute name to retrieve from the selected element.
83
+ :param default: Value to return if no element or attribute is found.
84
+ :return: The attribute's value stripped of whitespace, or `default` if not found.
85
+ """
86
+ tag = soup.select_one(selector)
87
+ if isinstance(tag, Tag) and attr in tag.attrs:
88
+ value = tag.attrs[attr]
89
+ if isinstance(value, list):
90
+ return " ".join(value).strip()
91
+ elif isinstance(value, str):
92
+ return value.strip()
93
+ return default
94
+
95
+
96
+ def parse_book_info(html_str: str) -> Dict[str, Any]:
97
+ """
98
+ Extract metadata: title, author, cover_url, update_time, status,
99
+ word_count, summary, and volumes with chapters.
100
+
101
+ :param html_str: Raw HTML of the book info page.
102
+ :return: A dict containing book metadata.
103
+ """
104
+ info: Dict[str, Any] = {}
105
+ try:
106
+ soup = html_to_soup(html_str)
107
+ info["book_name"] = safe_select_text(soup, "em#bookName", strip=True)
108
+ info["author"] = safe_select_text(soup, "a.writer", strip=True)
109
+ info["cover_url"] = safe_select_attr(soup, "div.book-img img", "src")
110
+ info["update_time"] = (
111
+ safe_select_text(soup, "span.book-update-time", strip=True)
112
+ .replace("更新时间", "")
113
+ .strip()
114
+ )
115
+ info["serial_status"] = safe_select_text(soup, "span.blue", strip=True)
116
+
117
+ # Word count via regex fallback
118
+ match = re.search(r"<em>([\d.]+)</em>\s*<cite>(.*?)字</cite>", html_str)
119
+ info["word_count"] = (
120
+ f"{match.group(1)}{match.group(2)}字" if match else "Unknown"
121
+ )
122
+
123
+ info["summary"] = safe_select_text(
124
+ soup, "div.book-intro p", separator="\n", strip=True
125
+ )
126
+ # volumes
127
+ vols = []
128
+ for vol_div in soup.select("div.volume-wrap div.volume"):
129
+ name = _get_volume_name(vol_div)
130
+ chaps = []
131
+ for li in vol_div.select("li"):
132
+ a = li.select_one("a")
133
+ if not isinstance(a, Tag) or "href" not in a.attrs:
134
+ continue
135
+ href_val = a["href"]
136
+ if isinstance(href_val, list):
137
+ href = href_val[0].strip()
138
+ else:
139
+ href = str(href_val).strip()
140
+ chaps.append(
141
+ {
142
+ "title": a.get_text(strip=True),
143
+ "url": href,
144
+ "chapterId": _chapter_url_to_id(href),
145
+ }
146
+ )
147
+ vols.append({"volume_name": name, "chapters": chaps})
148
+ info["volumes"] = vols
149
+ except Exception as e:
150
+ logger.warning("[Parser] Error parsing book info: %s", e)
151
+ return info
@@ -16,7 +16,7 @@ import json
16
16
  import logging
17
17
  from typing import Any, Dict, Union
18
18
 
19
- from bs4 import BeautifulSoup
19
+ from bs4 import BeautifulSoup, Tag
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
 
@@ -103,7 +103,7 @@ def find_ssr_page_context(soup: BeautifulSoup) -> Dict[str, Any]:
103
103
  """
104
104
  try:
105
105
  tag = soup.find("script", id="vite-plugin-ssr_pageContext")
106
- if tag and tag.string:
106
+ if isinstance(tag, Tag) and tag.string:
107
107
  data: Dict[str, Any] = json.loads(tag.string.strip())
108
108
  return data
109
109
  except Exception as e:
@@ -13,7 +13,7 @@ cookie handling, and defines abstract methods for subclasses.
13
13
  import abc
14
14
  import asyncio
15
15
  import time
16
- from typing import Any, Dict, Optional, Union
16
+ from typing import Any, Dict, Literal, Optional, Union
17
17
 
18
18
  import aiohttp
19
19
  from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
@@ -58,6 +58,9 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
58
58
  _cookies (Dict[str, str]): Optional cookie jar for the session.
59
59
  """
60
60
 
61
+ def is_async(self) -> Literal[True]:
62
+ return True
63
+
61
64
  def _init_session(
62
65
  self,
63
66
  config: RequesterConfig,
@@ -11,9 +11,10 @@ specialized purposes.
11
11
 
12
12
  import abc
13
13
  import logging
14
- from typing import Any, Dict, Optional
14
+ from typing import Any, Dict, Literal, Optional, cast
15
15
 
16
- from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
16
+ from DrissionPage import Chromium, ChromiumOptions
17
+ from DrissionPage._pages.mix_tab import MixTab
17
18
 
18
19
  from novel_downloader.config.models import RequesterConfig
19
20
  from novel_downloader.core.interfaces import RequesterProtocol
@@ -42,6 +43,9 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
42
43
  _page (ChromiumPage): The active browser tab.
43
44
  """
44
45
 
46
+ def is_async(self) -> Literal[False]:
47
+ return False
48
+
45
49
  def _init_browser(self, config: RequesterConfig) -> None:
46
50
  """
47
51
  Initialize the browser with specified options from RequesterConfig.
@@ -99,7 +103,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
99
103
  Set up the browser instance and open the default tab.
100
104
  """
101
105
  self._browser = Chromium(self._options)
102
- self._page = self._browser.get_tab()
106
+ self._page = cast(MixTab, self._browser.get_tab())
103
107
 
104
108
  def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
105
109
  """
@@ -151,7 +155,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
151
155
  )
152
156
 
153
157
  @property
154
- def page(self) -> ChromiumPage:
158
+ def page(self) -> Optional[MixTab]:
155
159
  """
156
160
  Return the current Chromium page object.
157
161
 
@@ -160,7 +164,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
160
164
  return self._page
161
165
 
162
166
  @property
163
- def browser(self) -> Chromium:
167
+ def browser(self) -> Optional[Chromium]:
164
168
  """
165
169
  Return the Chromium browser instance.
166
170
 
@@ -10,7 +10,7 @@ persistent session and supports retries, headers, and timeout configurations.
10
10
  """
11
11
 
12
12
  import abc
13
- from typing import Any, Dict, Optional, Union
13
+ from typing import Any, Dict, Literal, Optional, Union
14
14
 
15
15
  import requests
16
16
  from requests import Response, Session
@@ -31,6 +31,9 @@ class BaseSession(RequesterProtocol, abc.ABC):
31
31
  _timeout (float): Timeout for each request in seconds.
32
32
  """
33
33
 
34
+ def is_async(self) -> Literal[False]:
35
+ return False
36
+
34
37
  def _init_session(
35
38
  self, config: RequesterConfig, cookies: Optional[Dict[str, str]] = None
36
39
  ) -> None:
@@ -64,7 +64,7 @@ class CommonSession(BaseSession):
64
64
  with self.session.get(url, timeout=self.timeout) as response:
65
65
  response.raise_for_status()
66
66
  content = response.text
67
- sleep_with_random_delay(base)
67
+ sleep_with_random_delay(base, add_spread=1.0)
68
68
  return content
69
69
  except Exception as e:
70
70
  if attempt == self.retry_times:
@@ -94,7 +94,7 @@ class CommonSession(BaseSession):
94
94
  with self.session.get(url, timeout=self.timeout) as response:
95
95
  response.raise_for_status()
96
96
  content = response.text
97
- sleep_with_random_delay(base)
97
+ sleep_with_random_delay(base, add_spread=1.0)
98
98
  return content
99
99
  except Exception as e:
100
100
  if attempt == self.retry_times: