novel-downloader 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. novel_downloader/__init__.py +1 -2
  2. novel_downloader/cli/__init__.py +0 -1
  3. novel_downloader/cli/clean.py +2 -10
  4. novel_downloader/cli/download.py +16 -22
  5. novel_downloader/cli/interactive.py +0 -1
  6. novel_downloader/cli/main.py +1 -3
  7. novel_downloader/cli/settings.py +8 -8
  8. novel_downloader/config/__init__.py +0 -1
  9. novel_downloader/config/adapter.py +32 -27
  10. novel_downloader/config/loader.py +116 -108
  11. novel_downloader/config/models.py +35 -29
  12. novel_downloader/config/site_rules.py +2 -4
  13. novel_downloader/core/__init__.py +0 -1
  14. novel_downloader/core/downloaders/__init__.py +4 -4
  15. novel_downloader/core/downloaders/base/__init__.py +14 -0
  16. novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
  17. novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
  18. novel_downloader/core/downloaders/biquge/__init__.py +12 -0
  19. novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
  20. novel_downloader/core/downloaders/common/__init__.py +14 -0
  21. novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
  22. novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +33 -21
  23. novel_downloader/core/downloaders/qidian/__init__.py +10 -0
  24. novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +79 -62
  25. novel_downloader/core/factory/__init__.py +4 -5
  26. novel_downloader/core/factory/{downloader_factory.py → downloader.py} +25 -26
  27. novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
  28. novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
  29. novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
  30. novel_downloader/core/interfaces/__init__.py +8 -9
  31. novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
  32. novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +23 -12
  33. novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
  34. novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
  35. novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
  36. novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +31 -17
  37. novel_downloader/core/parsers/__init__.py +5 -4
  38. novel_downloader/core/parsers/{base_parser.py → base.py} +18 -9
  39. novel_downloader/core/parsers/biquge/__init__.py +10 -0
  40. novel_downloader/core/parsers/biquge/main_parser.py +126 -0
  41. novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
  42. novel_downloader/core/parsers/{common_parser → common}/helper.py +13 -13
  43. novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
  44. novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
  45. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
  46. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +40 -48
  47. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
  48. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
  49. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +14 -10
  50. novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
  51. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +36 -44
  52. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
  53. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
  54. novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +14 -10
  55. novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
  56. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
  57. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/book_info_parser.py +5 -6
  58. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +7 -8
  59. novel_downloader/core/requesters/__init__.py +9 -5
  60. novel_downloader/core/requesters/base/__init__.py +16 -0
  61. novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +177 -73
  62. novel_downloader/core/requesters/base/browser.py +340 -0
  63. novel_downloader/core/requesters/base/session.py +364 -0
  64. novel_downloader/core/requesters/biquge/__init__.py +12 -0
  65. novel_downloader/core/requesters/biquge/session.py +90 -0
  66. novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
  67. novel_downloader/core/requesters/common/async_session.py +96 -0
  68. novel_downloader/core/requesters/common/session.py +113 -0
  69. novel_downloader/core/requesters/qidian/__init__.py +21 -0
  70. novel_downloader/core/requesters/qidian/broswer.py +307 -0
  71. novel_downloader/core/requesters/qidian/session.py +287 -0
  72. novel_downloader/core/savers/__init__.py +5 -3
  73. novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
  74. novel_downloader/core/savers/biquge.py +25 -0
  75. novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
  76. novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +23 -51
  77. novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
  78. novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
  79. novel_downloader/core/savers/epub_utils/__init__.py +0 -1
  80. novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
  81. novel_downloader/core/savers/epub_utils/initializer.py +4 -5
  82. novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
  83. novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
  84. novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
  85. novel_downloader/locales/en.json +8 -4
  86. novel_downloader/locales/zh.json +5 -1
  87. novel_downloader/resources/config/settings.toml +88 -0
  88. novel_downloader/utils/cache.py +2 -2
  89. novel_downloader/utils/chapter_storage.py +340 -0
  90. novel_downloader/utils/constants.py +6 -4
  91. novel_downloader/utils/crypto_utils.py +3 -3
  92. novel_downloader/utils/file_utils/__init__.py +0 -1
  93. novel_downloader/utils/file_utils/io.py +12 -17
  94. novel_downloader/utils/file_utils/normalize.py +1 -3
  95. novel_downloader/utils/file_utils/sanitize.py +2 -9
  96. novel_downloader/utils/fontocr/__init__.py +0 -1
  97. novel_downloader/utils/fontocr/ocr_v1.py +19 -22
  98. novel_downloader/utils/fontocr/ocr_v2.py +147 -60
  99. novel_downloader/utils/hash_store.py +19 -20
  100. novel_downloader/utils/hash_utils.py +0 -1
  101. novel_downloader/utils/i18n.py +3 -4
  102. novel_downloader/utils/logger.py +5 -6
  103. novel_downloader/utils/model_loader.py +5 -8
  104. novel_downloader/utils/network.py +9 -10
  105. novel_downloader/utils/state.py +6 -7
  106. novel_downloader/utils/text_utils/__init__.py +0 -1
  107. novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
  108. novel_downloader/utils/text_utils/diff_display.py +0 -1
  109. novel_downloader/utils/text_utils/font_mapping.py +1 -4
  110. novel_downloader/utils/text_utils/text_cleaning.py +0 -1
  111. novel_downloader/utils/time_utils/__init__.py +0 -1
  112. novel_downloader/utils/time_utils/datetime_utils.py +8 -10
  113. novel_downloader/utils/time_utils/sleep_utils.py +1 -3
  114. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/METADATA +14 -17
  115. novel_downloader-1.3.1.dist-info/RECORD +127 -0
  116. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/WHEEL +1 -1
  117. novel_downloader/core/requesters/base_browser.py +0 -214
  118. novel_downloader/core/requesters/base_session.py +0 -246
  119. novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
  120. novel_downloader/core/requesters/common_requester/common_session.py +0 -126
  121. novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
  122. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -396
  123. novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
  124. novel_downloader/resources/config/settings.yaml +0 -76
  125. novel_downloader-1.2.2.dist-info/RECORD +0 -115
  126. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/entry_points.txt +0 -0
  127. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/licenses/LICENSE +0 -0
  128. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,18 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.common_parser.main_parser
5
- -------------------------------------------------------
3
+ novel_downloader.core.parsers.common.main_parser
4
+ ------------------------------------------------
6
5
 
7
6
  This package provides parsing components for handling
8
7
  Common pages.
9
8
  """
10
9
 
11
- from typing import Any, Dict
10
+ from typing import Any
12
11
 
13
12
  from novel_downloader.config import ParserConfig, SiteRules
13
+ from novel_downloader.core.parsers.base import BaseParser
14
+ from novel_downloader.utils.chapter_storage import ChapterDict
14
15
 
15
- from ..base_parser import BaseParser
16
16
  from .helper import HTMLExtractor
17
17
 
18
18
 
@@ -35,7 +35,7 @@ class CommonParser(BaseParser):
35
35
  self._site = site
36
36
  self._site_rule = site_rule
37
37
 
38
- def parse_book_info(self, html_str: str) -> Dict[str, Any]:
38
+ def parse_book_info(self, html_str: str) -> dict[str, Any]:
39
39
  """
40
40
  Parse a book info page and extract metadata and chapter structure.
41
41
 
@@ -46,7 +46,11 @@ class CommonParser(BaseParser):
46
46
  rules = self._site_rule["book_info"]
47
47
  return extractor.extract_book_info(rules)
48
48
 
49
- def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
49
+ def parse_chapter(
50
+ self,
51
+ html_str: str,
52
+ chapter_id: str,
53
+ ) -> ChapterDict | None:
50
54
  """
51
55
  Parse a single chapter page and extract clean text or simplified HTML.
52
56
 
@@ -66,13 +70,15 @@ class CommonParser(BaseParser):
66
70
  title = extractor.extract_field(title_steps["steps"]) if title_steps else ""
67
71
  content = extractor.extract_field(content_steps["steps"])
68
72
  if not content:
69
- return {}
73
+ return None
70
74
 
71
75
  return {
72
76
  "id": chapter_id,
73
77
  "title": title or "Untitled",
74
78
  "content": content,
75
- "site": self._site,
79
+ "extra": {
80
+ "site": self._site,
81
+ },
76
82
  }
77
83
 
78
84
  @property
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser
5
- -------------------------------------------
3
+ novel_downloader.core.parsers.qidian
4
+ ------------------------------------
6
5
 
7
6
  This package provides parsing implementations for the Qidian platform.
8
7
 
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser
5
- ---------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser
4
+ --------------------------------------------
6
5
 
7
6
  This package provides parsing components for handling Qidian
8
7
  pages that have been rendered by a browser engine.
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser.chapter_encrypted
5
- ---------------------------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser.chapter_encrypted
4
+ --------------------------------------------------------------
6
5
 
7
6
  Support for parsing encrypted chapters from Qidian using font OCR mapping,
8
7
  CSS rules, and custom rendering logic.
@@ -19,11 +18,12 @@ from __future__ import annotations
19
18
  import json
20
19
  import logging
21
20
  from pathlib import Path
22
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
21
+ from typing import TYPE_CHECKING, Any
23
22
 
24
23
  import tinycss2
25
24
  from bs4 import BeautifulSoup, Tag
26
25
 
26
+ from novel_downloader.utils.chapter_storage import ChapterDict
27
27
  from novel_downloader.utils.network import download_font_file
28
28
  from novel_downloader.utils.text_utils import apply_font_mapping
29
29
 
@@ -43,7 +43,7 @@ def parse_encrypted_chapter(
43
43
  parser: QidianBrowserParser,
44
44
  soup: BeautifulSoup,
45
45
  chapter_id: str,
46
- ) -> Dict[str, Any]:
46
+ ) -> ChapterDict | None:
47
47
  """
48
48
  Extract and return the formatted textual content of an encrypted chapter.
49
49
 
@@ -61,15 +61,15 @@ def parse_encrypted_chapter(
61
61
  """
62
62
  try:
63
63
  if not (parser._decode_font and parser._font_ocr):
64
- return {}
64
+ return None
65
65
  ssr_data = find_ssr_page_context(soup)
66
66
  chapter_info = extract_chapter_info(ssr_data)
67
67
  if not chapter_info:
68
68
  logger.warning(
69
69
  "[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
70
70
  )
71
- return {}
72
- debug_base_dir: Optional[Path] = None
71
+ return None
72
+ debug_base_dir: Path | None = None
73
73
  if parser._font_debug_dir:
74
74
  debug_base_dir = parser._font_debug_dir / chapter_id
75
75
  debug_base_dir.mkdir(parents=True, exist_ok=True)
@@ -85,10 +85,7 @@ def parse_encrypted_chapter(
85
85
  update_timestamp = chapter_info.get("updateTimestamp", 0)
86
86
  modify_time = chapter_info.get("modifyTime", 0)
87
87
  word_count = chapter_info.get("wordsCount", 0)
88
- vip = bool(chapter_info.get("vipStatus", 0))
89
- is_buy = bool(chapter_info.get("isBuy", 0))
90
88
  seq = chapter_info.get("seq", None)
91
- order = chapter_info.get("chapterOrder", None)
92
89
  volume = chapter_info.get("extra", {}).get("volumeName", "")
93
90
 
94
91
  # extract + save font
@@ -133,7 +130,7 @@ def parse_encrypted_chapter(
133
130
  logger.warning(
134
131
  f"[Parser] No end_number found after parsing chapter '{chapter_id}'"
135
132
  )
136
- return {}
133
+ return None
137
134
 
138
135
  paragraphs_str, refl_list = render_paragraphs(
139
136
  main_paragraphs, paragraphs_rules, end_number
@@ -143,7 +140,7 @@ def parse_encrypted_chapter(
143
140
  paragraphs_str_path.write_text(paragraphs_str, encoding="utf-8")
144
141
 
145
142
  # Run OCR + fallback mapping
146
- char_set = set(c for c in paragraphs_str if c not in {" ", "\n", "\u3000"})
143
+ char_set = {c for c in paragraphs_str if c not in {" ", "\n", "\u3000"}}
147
144
  refl_set = set(refl_list)
148
145
  char_set = char_set - refl_set
149
146
  if debug_base_dir:
@@ -174,33 +171,31 @@ def parse_encrypted_chapter(
174
171
  final_paragraphs_str = "\n\n".join(
175
172
  line.strip() for line in original_text.splitlines() if line.strip()
176
173
  )
177
- chapter_info = {
174
+ return {
178
175
  "id": str(chapter_id),
179
176
  "title": title,
180
177
  "content": final_paragraphs_str,
181
- "author_say": author_say.strip() if author_say else "",
182
- "updated_at": update_time,
183
- "update_timestamp": update_timestamp,
184
- "modify_time": modify_time,
185
- "word_count": word_count,
186
- "vip": vip,
187
- "purchased": is_buy,
188
- "order": order,
189
- "seq": seq,
190
- "volume": volume,
178
+ "extra": {
179
+ "author_say": author_say.strip() if author_say else "",
180
+ "updated_at": update_time,
181
+ "update_timestamp": update_timestamp,
182
+ "modify_time": modify_time,
183
+ "word_count": word_count,
184
+ "seq": seq,
185
+ "volume": volume,
186
+ },
191
187
  }
192
- return chapter_info
193
188
 
194
189
  except Exception as e:
195
190
  logger.warning(
196
191
  "[Parser] parse error for encrypted chapter '%s': %s", chapter_id, e
197
192
  )
198
- return {}
193
+ return None
199
194
 
200
195
 
201
196
  def extract_paragraphs_recursively(
202
197
  soup: BeautifulSoup, chapter_id: str = ""
203
- ) -> List[Dict[str, Any]]:
198
+ ) -> list[dict[str, Any]]:
204
199
  """
205
200
  Extracts paragraph elements under <main id="c-{chapter_id}"> from HTML
206
201
  and converts them to a nested data structure for further processing.
@@ -211,7 +206,7 @@ def extract_paragraphs_recursively(
211
206
  :return list: List of parsed <p> paragraph data.
212
207
  """
213
208
 
214
- def parse_element(elem: Any) -> Union[Dict[str, Any], None]:
209
+ def parse_element(elem: Any) -> dict[str, Any] | None:
215
210
  if not isinstance(elem, Tag):
216
211
  return None
217
212
  result = {"tag": elem.name, "attrs": dict(elem.attrs), "data": []}
@@ -243,7 +238,7 @@ def extract_paragraphs_recursively(
243
238
  return result
244
239
 
245
240
 
246
- def parse_rule(css_str: str) -> Dict[str, Any]:
241
+ def parse_rule(css_str: str) -> dict[str, Any]:
247
242
  """
248
243
  Parse a CSS string and extract style rules for rendering.
249
244
 
@@ -258,7 +253,7 @@ def parse_rule(css_str: str) -> Dict[str, Any]:
258
253
  :return: Dict with "rules" and "orders" for rendering.
259
254
  """
260
255
 
261
- rules: Dict[str, Any] = {}
256
+ rules: dict[str, Any] = {}
262
257
  orders = []
263
258
 
264
259
  stylesheet = tinycss2.parse_stylesheet(
@@ -322,7 +317,7 @@ def parse_rule(css_str: str) -> Dict[str, Any]:
322
317
  return {"rules": rules, "orders": orders}
323
318
 
324
319
 
325
- def parse_paragraph_names(rules: Dict[str, Any]) -> Set[str]:
320
+ def parse_paragraph_names(rules: dict[str, Any]) -> set[str]:
326
321
  """
327
322
  Extract all paragraph selector names from parsed rules, excluding "sy".
328
323
  """
@@ -335,16 +330,16 @@ def parse_paragraph_names(rules: Dict[str, Any]) -> Set[str]:
335
330
 
336
331
 
337
332
  def parse_end_number(
338
- main_paragraphs: List[Dict[str, Any]], paragraph_names: Set[str]
339
- ) -> Optional[int]:
333
+ main_paragraphs: list[dict[str, Any]], paragraph_names: set[str]
334
+ ) -> int | None:
340
335
  """
341
336
  Find the most frequent numeric suffix from tag names
342
337
  matched by given paragraph prefixes.
343
338
  """
344
- end_numbers: Dict[int, int] = {}
339
+ end_numbers: dict[int, int] = {}
345
340
  sorted_names = sorted(paragraph_names, key=len, reverse=True)
346
341
 
347
- def rec_parse(item: Union[List[Any], Dict[str, Any]]) -> None:
342
+ def rec_parse(item: list[Any] | dict[str, Any]) -> None:
348
343
  if isinstance(item, list):
349
344
  for element in item:
350
345
  rec_parse(element)
@@ -359,7 +354,7 @@ def parse_end_number(
359
354
  end_numbers[num] = end_numbers.get(num, 0) + 1
360
355
  break
361
356
  for val in item.values():
362
- if isinstance(val, (list, dict)):
357
+ if isinstance(val, (list | dict)):
363
358
  rec_parse(val)
364
359
 
365
360
  rec_parse(main_paragraphs)
@@ -381,10 +376,10 @@ def parse_end_number(
381
376
 
382
377
 
383
378
  def render_paragraphs(
384
- main_paragraphs: List[Dict[str, Any]],
385
- rules: Dict[str, Any],
379
+ main_paragraphs: list[dict[str, Any]],
380
+ rules: dict[str, Any],
386
381
  end_number: int,
387
- ) -> Tuple[str, List[str]]:
382
+ ) -> tuple[str, list[str]]:
388
383
  """
389
384
  Applies the parsed CSS rules to the paragraph structure and
390
385
  reconstructs the visible text.
@@ -403,11 +398,11 @@ def render_paragraphs(
403
398
  - A reconstructed paragraph string with line breaks.
404
399
  - A list of mirrored (reflected) characters for later OCR processing.
405
400
  """
406
- orders: List[Tuple[str, str]] = rules.get("orders", [])
401
+ orders: list[tuple[str, str]] = rules.get("orders", [])
407
402
  rules = rules.get("rules", {})
408
- refl_list: List[str] = []
403
+ refl_list: list[str] = []
409
404
 
410
- def apply_rule(data: Dict[str, Any], rule: Dict[str, Any]) -> str:
405
+ def apply_rule(data: dict[str, Any], rule: dict[str, Any]) -> str:
411
406
  if rule.get("delete-all", False):
412
407
  return ""
413
408
 
@@ -418,10 +413,7 @@ def render_paragraphs(
418
413
  curr_str += first_data
419
414
 
420
415
  if rule.get("delete-first", False):
421
- if len(curr_str) <= 1:
422
- curr_str = ""
423
- else:
424
- curr_str = curr_str[1:]
416
+ curr_str = "" if len(curr_str) <= 1 else curr_str[1:]
425
417
 
426
418
  curr_str += rule.get("append-end-char", "")
427
419
 
@@ -480,7 +472,7 @@ def render_paragraphs(
480
472
  logger.debug(f"[parser] not find p_class_str: {class_list}")
481
473
  continue
482
474
  # 普通标签处理,根据 orders 顺序匹配
483
- for ord_selector, ord_id in orders:
475
+ for ord_selector, _ in orders:
484
476
  tag_name = f"{ord_selector}{end_number}"
485
477
  if data.get("tag") != tag_name:
486
478
  continue
@@ -489,7 +481,7 @@ def render_paragraphs(
489
481
  ordered_cache[ord_selector] = apply_rule(data, curr_rule)
490
482
  break
491
483
  # 最后按 orders 顺序拼接
492
- for ord_selector, ord_id in orders:
484
+ for ord_selector, _ in orders:
493
485
  if ord_selector in ordered_cache:
494
486
  paragraphs_str += ordered_cache[ord_selector]
495
487
 
@@ -1,18 +1,18 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser.chapter_normal
5
- ------------------------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser.chapter_normal
4
+ -----------------------------------------------------------
6
5
 
7
6
  Parser logic for extracting readable text from Qidian chapters
8
7
  that use plain (non-encrypted) browser-rendered HTML.
9
8
  """
10
9
 
11
10
  import logging
12
- from typing import Any, Dict
13
11
 
14
12
  from bs4 import BeautifulSoup
15
13
 
14
+ from novel_downloader.utils.chapter_storage import ChapterDict
15
+
16
16
  from ..shared import (
17
17
  extract_chapter_info,
18
18
  find_ssr_page_context,
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
24
24
  def parse_normal_chapter(
25
25
  soup: BeautifulSoup,
26
26
  chapter_id: str,
27
- ) -> Dict[str, Any]:
27
+ ) -> ChapterDict | None:
28
28
  """
29
29
  Extract and format the chapter text from a normal Qidian page.
30
30
  Returns empty string if VIP/encrypted.
@@ -44,7 +44,7 @@ def parse_normal_chapter(
44
44
  main = soup.select_one("div#app div#reader-content main")
45
45
  if not main:
46
46
  logger.warning("[Parser] Main content not found for chapter")
47
- return {}
47
+ return None
48
48
 
49
49
  ssr_data = find_ssr_page_context(soup)
50
50
  chapter_info = extract_chapter_info(ssr_data)
@@ -52,7 +52,7 @@ def parse_normal_chapter(
52
52
  logger.warning(
53
53
  "[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
54
54
  )
55
- return {}
55
+ return None
56
56
 
57
57
  title = chapter_info.get("chapterName", "Untitled")
58
58
  chapter_id = chapter_info.get("chapterId", "")
@@ -61,10 +61,7 @@ def parse_normal_chapter(
61
61
  update_timestamp = chapter_info.get("updateTimestamp", 0)
62
62
  modify_time = chapter_info.get("modifyTime", 0)
63
63
  word_count = chapter_info.get("wordsCount", 0)
64
- vip = bool(chapter_info.get("vipStatus", 0))
65
- is_buy = bool(chapter_info.get("isBuy", 0))
66
64
  seq = chapter_info.get("seq", None)
67
- order = chapter_info.get("chapterOrder", None)
68
65
  volume = chapter_info.get("extra", {}).get("volumeName", "")
69
66
 
70
67
  # remove review spans
@@ -78,20 +75,19 @@ def parse_normal_chapter(
78
75
  "id": str(chapter_id),
79
76
  "title": title,
80
77
  "content": chapter_text,
81
- "author_say": author_say.strip() if author_say else "",
82
- "updated_at": update_time,
83
- "update_timestamp": update_timestamp,
84
- "modify_time": modify_time,
85
- "word_count": word_count,
86
- "vip": vip,
87
- "purchased": is_buy,
88
- "order": order,
89
- "seq": seq,
90
- "volume": volume,
78
+ "extra": {
79
+ "author_say": author_say.strip() if author_say else "",
80
+ "updated_at": update_time,
81
+ "update_timestamp": update_timestamp,
82
+ "modify_time": modify_time,
83
+ "word_count": word_count,
84
+ "seq": seq,
85
+ "volume": volume,
86
+ },
91
87
  }
92
88
 
93
89
  except Exception as e:
94
90
  logger.warning(
95
91
  "[Parser] parse error for normal chapter '%s': %s", chapter_id, e
96
92
  )
97
- return {}
93
+ return None
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser.chapter_router
5
- ------------------------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser.chapter_router
4
+ -----------------------------------------------------------
6
5
 
7
6
  Routing logic for selecting the correct chapter parser for Qidian browser pages.
8
7
 
@@ -13,7 +12,9 @@ routes the parsing task to either the encrypted or normal chapter parser.
13
12
  from __future__ import annotations
14
13
 
15
14
  import logging
16
- from typing import TYPE_CHECKING, Any, Dict
15
+ from typing import TYPE_CHECKING
16
+
17
+ from novel_downloader.utils.chapter_storage import ChapterDict
17
18
 
18
19
  from ..shared import (
19
20
  can_view_chapter,
@@ -32,7 +33,7 @@ def parse_chapter(
32
33
  parser: QidianBrowserParser,
33
34
  html_str: str,
34
35
  chapter_id: str,
35
- ) -> Dict[str, Any]:
36
+ ) -> ChapterDict | None:
36
37
  """
37
38
  Extract and return the formatted textual content of chapter.
38
39
 
@@ -48,11 +49,11 @@ def parse_chapter(
48
49
  logger.warning(
49
50
  "[Parser] Chapter '%s' is not purchased or inaccessible.", chapter_id
50
51
  )
51
- return {}
52
+ return None
52
53
 
53
54
  if is_encrypted(soup):
54
55
  if not parser._decode_font:
55
- return {}
56
+ return None
56
57
  try:
57
58
  from .chapter_encrypted import parse_encrypted_chapter
58
59
 
@@ -62,9 +63,9 @@ def parse_chapter(
62
63
  "[Parser] Encrypted chapter '%s' requires extra dependencies.",
63
64
  chapter_id,
64
65
  )
65
- return {}
66
+ return None
66
67
 
67
68
  return parse_normal_chapter(soup, chapter_id)
68
69
  except Exception as e:
69
70
  logger.warning("[Parser] parse error for chapter '%s': %s", chapter_id, e)
70
- return {}
71
+ return None
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.browser.main_parser
5
- ---------------------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.browser.main_parser
4
+ --------------------------------------------------------
6
5
 
7
6
  Main parser class for handling Qidian chapters rendered via a browser environment.
8
7
 
@@ -13,10 +12,11 @@ content extracted from dynamically rendered Qidian HTML pages.
13
12
  from __future__ import annotations
14
13
 
15
14
  from pathlib import Path
16
- from typing import TYPE_CHECKING, Any, Dict, Optional
15
+ from typing import TYPE_CHECKING, Any
17
16
 
18
17
  from novel_downloader.config.models import ParserConfig
19
- from novel_downloader.core.parsers.base_parser import BaseParser
18
+ from novel_downloader.core.parsers.base import BaseParser
19
+ from novel_downloader.utils.chapter_storage import ChapterDict
20
20
 
21
21
  from ..shared import (
22
22
  is_encrypted,
@@ -47,9 +47,9 @@ class QidianBrowserParser(BaseParser):
47
47
 
48
48
  self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
49
49
  self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
50
- self._font_debug_dir: Optional[Path] = None
50
+ self._font_debug_dir: Path | None = None
51
51
 
52
- self._font_ocr: Optional[FontOCR] = None
52
+ self._font_ocr: FontOCR | None = None
53
53
  if self._decode_font:
54
54
  from novel_downloader.utils.fontocr import FontOCR
55
55
 
@@ -66,10 +66,10 @@ class QidianBrowserParser(BaseParser):
66
66
  vec_weight=config.vec_weight,
67
67
  font_debug=config.save_font_debug,
68
68
  )
69
- self._font_debug_dir = self._base_cache_dir / "font_debug"
69
+ self._font_debug_dir = self._base_cache_dir / "qidian" / "font_debug"
70
70
  self._font_debug_dir.mkdir(parents=True, exist_ok=True)
71
71
 
72
- def parse_book_info(self, html_str: str) -> Dict[str, Any]:
72
+ def parse_book_info(self, html_str: str) -> dict[str, Any]:
73
73
  """
74
74
  Parse a book info page and extract metadata and chapter structure.
75
75
 
@@ -78,7 +78,11 @@ class QidianBrowserParser(BaseParser):
78
78
  """
79
79
  return parse_book_info(html_str)
80
80
 
81
- def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
81
+ def parse_chapter(
82
+ self,
83
+ html_str: str,
84
+ chapter_id: str,
85
+ ) -> ChapterDict | None:
82
86
  """
83
87
  :param html: Raw HTML of the chapter page.
84
88
  :param chapter_id: Identifier of the chapter being parsed.
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.parsers.qidian_parser.session
5
- ---------------------------------------------------------------
3
+ novel_downloader.core.parsers.qidian.session
4
+ --------------------------------------------
6
5
 
7
6
  This package provides parsing components for handling Qidian
8
7
  pages that have been rendered by a session.