novel-downloader 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -2
  3. novel_downloader/cli/config.py +1 -83
  4. novel_downloader/cli/download.py +4 -5
  5. novel_downloader/cli/export.py +4 -1
  6. novel_downloader/cli/main.py +2 -0
  7. novel_downloader/cli/search.py +123 -0
  8. novel_downloader/config/__init__.py +3 -10
  9. novel_downloader/config/adapter.py +190 -54
  10. novel_downloader/config/loader.py +2 -3
  11. novel_downloader/core/__init__.py +13 -13
  12. novel_downloader/core/downloaders/__init__.py +10 -11
  13. novel_downloader/core/downloaders/base.py +152 -26
  14. novel_downloader/core/downloaders/biquge.py +5 -1
  15. novel_downloader/core/downloaders/common.py +157 -378
  16. novel_downloader/core/downloaders/esjzone.py +5 -1
  17. novel_downloader/core/downloaders/linovelib.py +5 -1
  18. novel_downloader/core/downloaders/qianbi.py +291 -4
  19. novel_downloader/core/downloaders/qidian.py +199 -285
  20. novel_downloader/core/downloaders/registry.py +67 -0
  21. novel_downloader/core/downloaders/sfacg.py +5 -1
  22. novel_downloader/core/downloaders/yamibo.py +5 -1
  23. novel_downloader/core/exporters/__init__.py +10 -11
  24. novel_downloader/core/exporters/base.py +87 -7
  25. novel_downloader/core/exporters/biquge.py +5 -8
  26. novel_downloader/core/exporters/common/__init__.py +2 -2
  27. novel_downloader/core/exporters/common/epub.py +82 -166
  28. novel_downloader/core/exporters/common/main_exporter.py +0 -60
  29. novel_downloader/core/exporters/common/txt.py +82 -83
  30. novel_downloader/core/exporters/epub_util.py +157 -1330
  31. novel_downloader/core/exporters/esjzone.py +5 -8
  32. novel_downloader/core/exporters/linovelib/__init__.py +2 -2
  33. novel_downloader/core/exporters/linovelib/epub.py +157 -212
  34. novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
  35. novel_downloader/core/exporters/linovelib/txt.py +67 -63
  36. novel_downloader/core/exporters/qianbi.py +5 -8
  37. novel_downloader/core/exporters/qidian.py +14 -4
  38. novel_downloader/core/exporters/registry.py +53 -0
  39. novel_downloader/core/exporters/sfacg.py +5 -8
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/exporters/yamibo.py +5 -8
  42. novel_downloader/core/fetchers/__init__.py +19 -24
  43. novel_downloader/core/fetchers/base/__init__.py +3 -3
  44. novel_downloader/core/fetchers/base/browser.py +23 -4
  45. novel_downloader/core/fetchers/base/session.py +30 -5
  46. novel_downloader/core/fetchers/biquge/__init__.py +3 -3
  47. novel_downloader/core/fetchers/biquge/browser.py +5 -0
  48. novel_downloader/core/fetchers/biquge/session.py +6 -1
  49. novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
  50. novel_downloader/core/fetchers/esjzone/browser.py +5 -0
  51. novel_downloader/core/fetchers/esjzone/session.py +6 -1
  52. novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
  53. novel_downloader/core/fetchers/linovelib/browser.py +6 -1
  54. novel_downloader/core/fetchers/linovelib/session.py +6 -1
  55. novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
  56. novel_downloader/core/fetchers/qianbi/browser.py +5 -0
  57. novel_downloader/core/fetchers/qianbi/session.py +5 -0
  58. novel_downloader/core/fetchers/qidian/__init__.py +3 -3
  59. novel_downloader/core/fetchers/qidian/browser.py +12 -4
  60. novel_downloader/core/fetchers/qidian/session.py +11 -3
  61. novel_downloader/core/fetchers/registry.py +71 -0
  62. novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
  63. novel_downloader/core/fetchers/sfacg/browser.py +5 -0
  64. novel_downloader/core/fetchers/sfacg/session.py +5 -0
  65. novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
  66. novel_downloader/core/fetchers/yamibo/browser.py +5 -0
  67. novel_downloader/core/fetchers/yamibo/session.py +6 -1
  68. novel_downloader/core/interfaces/__init__.py +7 -5
  69. novel_downloader/core/interfaces/searcher.py +18 -0
  70. novel_downloader/core/parsers/__init__.py +10 -11
  71. novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
  72. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
  73. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
  74. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
  75. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  76. novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
  77. novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
  78. novel_downloader/core/parsers/qidian/main_parser.py +10 -21
  79. novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
  80. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
  81. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  82. novel_downloader/core/parsers/registry.py +68 -0
  83. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
  84. novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
  85. novel_downloader/core/searchers/__init__.py +20 -0
  86. novel_downloader/core/searchers/base.py +92 -0
  87. novel_downloader/core/searchers/biquge.py +83 -0
  88. novel_downloader/core/searchers/esjzone.py +84 -0
  89. novel_downloader/core/searchers/qianbi.py +131 -0
  90. novel_downloader/core/searchers/qidian.py +87 -0
  91. novel_downloader/core/searchers/registry.py +63 -0
  92. novel_downloader/locales/en.json +12 -4
  93. novel_downloader/locales/zh.json +12 -4
  94. novel_downloader/models/__init__.py +4 -30
  95. novel_downloader/models/config.py +12 -6
  96. novel_downloader/models/search.py +16 -0
  97. novel_downloader/models/types.py +0 -2
  98. novel_downloader/resources/config/settings.toml +31 -4
  99. novel_downloader/resources/css_styles/intro.css +83 -0
  100. novel_downloader/resources/css_styles/main.css +30 -89
  101. novel_downloader/utils/__init__.py +52 -0
  102. novel_downloader/utils/chapter_storage.py +244 -224
  103. novel_downloader/utils/constants.py +1 -21
  104. novel_downloader/utils/epub/__init__.py +34 -0
  105. novel_downloader/utils/epub/builder.py +377 -0
  106. novel_downloader/utils/epub/constants.py +77 -0
  107. novel_downloader/utils/epub/documents.py +403 -0
  108. novel_downloader/utils/epub/models.py +134 -0
  109. novel_downloader/utils/epub/utils.py +212 -0
  110. novel_downloader/utils/file_utils/__init__.py +10 -14
  111. novel_downloader/utils/file_utils/io.py +20 -51
  112. novel_downloader/utils/file_utils/normalize.py +2 -2
  113. novel_downloader/utils/file_utils/sanitize.py +2 -3
  114. novel_downloader/utils/fontocr/__init__.py +5 -5
  115. novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
  116. novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
  117. novel_downloader/utils/fontocr/ocr_v1.py +13 -1
  118. novel_downloader/utils/fontocr/ocr_v2.py +13 -1
  119. novel_downloader/utils/fontocr/ocr_v3.py +744 -0
  120. novel_downloader/utils/i18n.py +2 -0
  121. novel_downloader/utils/logger.py +2 -0
  122. novel_downloader/utils/network.py +110 -251
  123. novel_downloader/utils/state.py +1 -0
  124. novel_downloader/utils/text_utils/__init__.py +18 -17
  125. novel_downloader/utils/text_utils/diff_display.py +4 -5
  126. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  127. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  128. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  129. novel_downloader/utils/time_utils/__init__.py +3 -3
  130. novel_downloader/utils/time_utils/datetime_utils.py +4 -5
  131. novel_downloader/utils/time_utils/sleep_utils.py +2 -3
  132. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
  133. novel_downloader-1.5.0.dist-info/RECORD +164 -0
  134. novel_downloader/config/site_rules.py +0 -94
  135. novel_downloader/core/factory/__init__.py +0 -20
  136. novel_downloader/core/factory/downloader.py +0 -73
  137. novel_downloader/core/factory/exporter.py +0 -58
  138. novel_downloader/core/factory/fetcher.py +0 -96
  139. novel_downloader/core/factory/parser.py +0 -86
  140. novel_downloader/core/fetchers/common/__init__.py +0 -14
  141. novel_downloader/core/fetchers/common/browser.py +0 -79
  142. novel_downloader/core/fetchers/common/session.py +0 -79
  143. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  144. novel_downloader/core/parsers/common/__init__.py +0 -13
  145. novel_downloader/core/parsers/common/helper.py +0 -323
  146. novel_downloader/core/parsers/common/main_parser.py +0 -106
  147. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  148. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  149. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  150. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  151. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  152. novel_downloader/models/browser.py +0 -21
  153. novel_downloader/models/site_rules.py +0 -99
  154. novel_downloader/models/tasks.py +0 -33
  155. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  156. novel_downloader/resources/json/replace_word_map.json +0 -4
  157. novel_downloader/resources/text/blacklist.txt +0 -22
  158. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  159. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  160. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  161. novel_downloader-1.4.4.dist-info/RECORD +0 -165
  162. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
  163. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
  164. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
  165. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -11,16 +11,14 @@ from __future__ import annotations
11
11
 
12
12
  import json
13
13
  import logging
14
- from pathlib import Path
15
14
  from typing import TYPE_CHECKING, Any
16
15
 
17
16
  import tinycss2
18
17
  from lxml import html
19
18
 
20
19
  from novel_downloader.models import ChapterDict
21
- from novel_downloader.utils.network import download_font_file
22
- from novel_downloader.utils.text_utils import (
23
- apply_font_mapping,
20
+ from novel_downloader.utils import (
21
+ download,
24
22
  truncate_half_lines,
25
23
  )
26
24
 
@@ -70,10 +68,9 @@ def parse_encrypted_chapter(
70
68
  )
71
69
  return None
72
70
 
73
- debug_base_dir: Path | None = None
74
- if parser._font_debug_dir:
75
- debug_base_dir = parser._font_debug_dir / chapter_id
76
- debug_base_dir.mkdir(parents=True, exist_ok=True)
71
+ debug_dir = parser._debug_dir / "font_debug" / "qidian" / chapter_id
72
+ if parser.save_font_debug:
73
+ debug_dir.mkdir(parents=True, exist_ok=True)
77
74
 
78
75
  css_str = chapter_info["css"]
79
76
  randomFont_str = chapter_info["randomFont"]
@@ -98,8 +95,10 @@ def parse_encrypted_chapter(
98
95
  rand_path.parent.mkdir(parents=True, exist_ok=True)
99
96
  rand_path.write_bytes(bytes(rf["data"]))
100
97
 
101
- fixed_path = download_font_file(
102
- url=fixedFontWoff2_url, target_folder=parser._fixed_font_dir
98
+ fixed_path = download(
99
+ url=fixedFontWoff2_url,
100
+ target_dir=parser._fixed_font_dir,
101
+ stream=True,
103
102
  )
104
103
  if fixed_path is None:
105
104
  raise ValueError("fixed_path is None: failed to download font")
@@ -125,16 +124,16 @@ def parse_encrypted_chapter(
125
124
  return None
126
125
  main_paragraphs = extract_paragraphs_recursively(raw_html, chapter_id)
127
126
 
128
- if debug_base_dir:
129
- main_paragraphs_path = debug_base_dir / "main_paragraphs_debug.json"
127
+ if parser.save_font_debug:
128
+ main_paragraphs_path = debug_dir / "main_paragraphs_debug.json"
130
129
  main_paragraphs_path.write_text(
131
130
  json.dumps(main_paragraphs, ensure_ascii=False, indent=2),
132
131
  encoding="utf-8",
133
132
  )
134
133
 
135
134
  paragraphs_rules = parse_rule(css_str)
136
- if debug_base_dir:
137
- paragraphs_rules_path = debug_base_dir / "paragraphs_rules_debug.json"
135
+ if parser.save_font_debug:
136
+ paragraphs_rules_path = debug_dir / "paragraphs_rules_debug.json"
138
137
  paragraphs_rules_path.write_text(
139
138
  json.dumps(paragraphs_rules, ensure_ascii=False, indent=2),
140
139
  encoding="utf-8",
@@ -146,16 +145,16 @@ def parse_encrypted_chapter(
146
145
  paragraphs_rules,
147
146
  end_number,
148
147
  )
149
- if debug_base_dir:
150
- paragraphs_str_path = debug_base_dir / f"{chapter_id}_debug.txt"
148
+ if parser.save_font_debug:
149
+ paragraphs_str_path = debug_dir / f"{chapter_id}_debug.txt"
151
150
  paragraphs_str_path.write_text(paragraphs_str, encoding="utf-8")
152
151
 
153
152
  # Run OCR + fallback mapping
154
153
  char_set = {c for c in paragraphs_str if c not in {" ", "\n", "\u3000"}}
155
154
  refl_set = set(refl_list)
156
155
  char_set = char_set - refl_set
157
- if debug_base_dir:
158
- char_sets_path = debug_base_dir / "char_set_debug.txt"
156
+ if parser.save_font_debug:
157
+ char_sets_path = debug_dir / "char_set_debug.txt"
159
158
  temp = f"char_set:\n{char_set}\n\nrefl_set:\n{refl_set}"
160
159
  char_sets_path.write_text(
161
160
  temp,
@@ -169,15 +168,18 @@ def parse_encrypted_chapter(
169
168
  refl_set=refl_set,
170
169
  chapter_id=chapter_id,
171
170
  )
172
- if debug_base_dir:
173
- mapping_json_path = debug_base_dir / "font_mapping.json"
171
+ if parser.save_font_debug:
172
+ mapping_json_path = debug_dir / "font_mapping.json"
174
173
  mapping_json_path.write_text(
175
174
  json.dumps(mapping_result, ensure_ascii=False, indent=2),
176
175
  encoding="utf-8",
177
176
  )
178
177
 
179
178
  # Reconstruct final readable text
180
- original_text = apply_font_mapping(paragraphs_str, mapping_result)
179
+ original_text = parser._font_ocr.apply_font_mapping(
180
+ text=paragraphs_str,
181
+ font_map=mapping_result,
182
+ )
181
183
 
182
184
  final_paragraphs_str = "\n\n".join(
183
185
  line.strip() for line in original_text.splitlines() if line.strip()
@@ -15,7 +15,7 @@ from typing import TYPE_CHECKING
15
15
  from lxml import html
16
16
 
17
17
  from novel_downloader.models import ChapterDict
18
- from novel_downloader.utils.text_utils import truncate_half_lines
18
+ from novel_downloader.utils import truncate_half_lines
19
19
 
20
20
  from .utils import (
21
21
  extract_chapter_info,
@@ -13,9 +13,10 @@ from pathlib import Path
13
13
  from typing import TYPE_CHECKING, Any
14
14
 
15
15
  from novel_downloader.core.parsers.base import BaseParser
16
+ from novel_downloader.core.parsers.registry import register_parser
16
17
  from novel_downloader.models import ChapterDict, ParserConfig
18
+ from novel_downloader.utils import find_cookie_value
17
19
  from novel_downloader.utils.constants import DATA_DIR
18
- from novel_downloader.utils.cookies import find_cookie_value
19
20
 
20
21
  from .book_info_parser import parse_book_info
21
22
  from .chapter_router import parse_chapter
@@ -27,6 +28,10 @@ if TYPE_CHECKING:
27
28
  from novel_downloader.utils.fontocr import FontOCR
28
29
 
29
30
 
31
+ @register_parser(
32
+ site_keys=["qidian", "qd"],
33
+ backends=["session", "browser"],
34
+ )
30
35
  class QidianParser(BaseParser):
31
36
  """
32
37
  Parser for Qidian site.
@@ -47,11 +52,10 @@ class QidianParser(BaseParser):
47
52
  # Extract and store parser flags from config
48
53
  self._use_truncation = config.use_truncation
49
54
  self._decode_font: bool = config.decode_font
50
- self._save_font_debug: bool = config.save_font_debug
51
55
 
52
56
  self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
53
57
  self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
54
- self._font_debug_dir: Path | None = None
58
+ self._debug_dir: Path = Path.cwd() / "debug"
55
59
 
56
60
  state_files = [
57
61
  DATA_DIR / "qidian" / "browser_state.cookies",
@@ -80,8 +84,6 @@ class QidianParser(BaseParser):
80
84
  vec_weight=config.vec_weight,
81
85
  font_debug=config.save_font_debug,
82
86
  )
83
- self._font_debug_dir = self._base_cache_dir / "qidian" / "font_debug"
84
- self._font_debug_dir.mkdir(parents=True, exist_ok=True)
85
87
 
86
88
  def parse_book_info(
87
89
  self,
@@ -121,19 +123,6 @@ class QidianParser(BaseParser):
121
123
  """
122
124
  return is_encrypted(html_str)
123
125
 
124
- def _init_cache_folders(self) -> None:
125
- """
126
- Prepare cache folders for plain/encrypted HTML and font debug data.
127
- Folders are only created if corresponding debug/save flags are enabled.
128
- """
129
- base = self._base_cache_dir
130
-
131
- # Font debug folder
132
- if self._save_font_debug and self.book_id:
133
- self._font_debug_dir = base / self.book_id / "font_debug"
134
- self._font_debug_dir.mkdir(parents=True, exist_ok=True)
135
- else:
136
- self._font_debug_dir = None
137
-
138
- def _on_book_id_set(self) -> None:
139
- self._init_cache_folders()
126
+ @property
127
+ def save_font_debug(self) -> bool:
128
+ return self._config.save_font_debug
@@ -5,17 +5,6 @@ novel_downloader.core.parsers.qidian.utils
5
5
 
6
6
  """
7
7
 
8
- from .helpers import (
9
- can_view_chapter,
10
- extract_chapter_info,
11
- find_ssr_page_context,
12
- is_duplicated,
13
- is_encrypted,
14
- is_restricted_page,
15
- vip_status,
16
- )
17
- from .node_decryptor import QidianNodeDecryptor, get_decryptor
18
-
19
8
  __all__ = [
20
9
  "find_ssr_page_context",
21
10
  "extract_chapter_info",
@@ -27,3 +16,14 @@ __all__ = [
27
16
  "QidianNodeDecryptor",
28
17
  "get_decryptor",
29
18
  ]
19
+
20
+ from .helpers import (
21
+ can_view_chapter,
22
+ extract_chapter_info,
23
+ find_ssr_page_context,
24
+ is_duplicated,
25
+ is_encrypted,
26
+ is_restricted_page,
27
+ vip_status,
28
+ )
29
+ from .node_decryptor import QidianNodeDecryptor, get_decryptor
@@ -9,6 +9,11 @@ GitHub releases.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
+ __all__ = [
13
+ "ensure_decryptor",
14
+ "RELEASE_VERSION",
15
+ ]
16
+
12
17
  import hashlib
13
18
  import platform
14
19
  import stat
@@ -137,9 +142,3 @@ def _make_executable(p: Path) -> None:
137
142
  p.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
138
143
  except PermissionError:
139
144
  pass
140
-
141
-
142
- __all__ = [
143
- "ensure_decryptor",
144
- "RELEASE_VERSION",
145
- ]
@@ -80,10 +80,10 @@ class QidianNodeDecryptor:
80
80
 
81
81
  # 3) Download the Fock JS module from Qidian CDN if missing
82
82
  if not self.QIDIAN_FOCK_JS_PATH.exists():
83
- from novel_downloader.utils.network import download_js_file
83
+ from novel_downloader.utils.network import download
84
84
 
85
85
  try:
86
- download_js_file(
86
+ download(
87
87
  self.QIDIAN_FOCK_JS_URL,
88
88
  self.script_dir,
89
89
  on_exist="overwrite",
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.registry
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ __all__ = ["register_parser", "get_parser"]
9
+
10
+ from collections.abc import Callable, Sequence
11
+ from typing import TypeVar
12
+
13
+ from novel_downloader.core.interfaces import ParserProtocol
14
+ from novel_downloader.models import ParserConfig
15
+
16
+ ParserBuilder = Callable[[ParserConfig], ParserProtocol]
17
+
18
+ P = TypeVar("P", bound=ParserProtocol)
19
+ _PARSER_MAP: dict[str, dict[str, ParserBuilder]] = {}
20
+
21
+
22
+ def register_parser(
23
+ site_keys: Sequence[str],
24
+ backends: Sequence[str],
25
+ ) -> Callable[[type[P]], type[P]]:
26
+ """
27
+ Decorator to register a parser class under given keys.
28
+
29
+ :param site_keys: Sequence of site identifiers
30
+ :param backends: Sequence of backend types
31
+ :return: A class decorator that populates _PARSER_MAP.
32
+ """
33
+
34
+ def decorator(cls: type[P]) -> type[P]:
35
+ for site in site_keys:
36
+ site_lower = site.lower()
37
+ bucket = _PARSER_MAP.setdefault(site_lower, {})
38
+ for backend in backends:
39
+ bucket[backend] = cls
40
+ return cls
41
+
42
+ return decorator
43
+
44
+
45
+ def get_parser(site: str, config: ParserConfig) -> ParserProtocol:
46
+ """
47
+ Returns a site-specific parser instance.
48
+
49
+ :param site: Site name (e.g., 'qidian')
50
+ :param config: Configuration for the parser
51
+ :return: An instance of a parser class
52
+ """
53
+ site_key = site.lower()
54
+ try:
55
+ backend_map = _PARSER_MAP[site_key]
56
+ except KeyError as err:
57
+ raise ValueError(f"Unsupported site: {site!r}") from err
58
+
59
+ mode = config.mode
60
+ try:
61
+ parser_cls = backend_map[mode]
62
+ except KeyError as err:
63
+ raise ValueError(
64
+ f"Unsupported parser mode {mode!r} for site {site!r}. "
65
+ f"Available modes: {list(backend_map)}"
66
+ ) from err
67
+
68
+ return parser_cls(config)
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.parsers.sfacg.main_parser
4
- -----------------------------------------------
3
+ novel_downloader.core.parsers.sfacg
4
+ -----------------------------------
5
5
 
6
6
  """
7
7
 
@@ -10,9 +10,14 @@ from typing import Any
10
10
  from lxml import html
11
11
 
12
12
  from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
13
14
  from novel_downloader.models import ChapterDict
14
15
 
15
16
 
17
+ @register_parser(
18
+ site_keys=["sfacg"],
19
+ backends=["session", "browser"],
20
+ )
16
21
  class SfacgParser(BaseParser):
17
22
  """ """
18
23
 
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.parsers.yamibo.main_parser
4
- ------------------------------------------------
3
+ novel_downloader.core.parsers.yamibo
4
+ ------------------------------------
5
5
 
6
6
  """
7
7
 
@@ -10,9 +10,14 @@ from typing import Any
10
10
  from lxml import html
11
11
 
12
12
  from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
13
14
  from novel_downloader.models import ChapterDict
14
15
 
15
16
 
17
+ @register_parser(
18
+ site_keys=["yamibo"],
19
+ backends=["session", "browser"],
20
+ )
16
21
  class YamiboParser(BaseParser):
17
22
  """ """
18
23
 
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers
4
+ -------------------------------
5
+
6
+ """
7
+
8
+ __all__ = [
9
+ "search",
10
+ "BiqugeSearcher",
11
+ "EsjzoneSearcher",
12
+ "QianbiSearcher",
13
+ "QidianSearcher",
14
+ ]
15
+
16
+ from .biquge import BiqugeSearcher
17
+ from .esjzone import EsjzoneSearcher
18
+ from .qianbi import QianbiSearcher
19
+ from .qidian import QidianSearcher
20
+ from .registry import search
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.base
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ import abc
9
+ from typing import Any
10
+ from urllib.parse import quote_plus
11
+
12
+ import requests
13
+
14
+ from novel_downloader.core.interfaces import SearcherProtocol
15
+ from novel_downloader.models import SearchResult
16
+ from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
17
+
18
+
19
+ class BaseSearcher(abc.ABC, SearcherProtocol):
20
+ site_name: str
21
+ _session = requests.Session()
22
+ _DEFAULT_TIMEOUT: tuple[int, int] = (5, 10)
23
+
24
+ @classmethod
25
+ def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
26
+ html = cls._fetch_html(keyword)
27
+ return cls._parse_html(html, limit)
28
+
29
+ @classmethod
30
+ @abc.abstractmethod
31
+ def _fetch_html(cls, keyword: str) -> str:
32
+ """Get raw HTML from search API or page"""
33
+ pass
34
+
35
+ @classmethod
36
+ @abc.abstractmethod
37
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
38
+ """Parse HTML into standard search result list"""
39
+ pass
40
+
41
+ @classmethod
42
+ def _http_get(
43
+ cls,
44
+ url: str,
45
+ *,
46
+ params: dict[str, str] | None = None,
47
+ headers: dict[str, str] | None = None,
48
+ timeout: tuple[int, int] | None = None,
49
+ **kwargs: Any,
50
+ ) -> requests.Response:
51
+ """
52
+ Helper for GET requests with default headers, timeout, and error-raising.
53
+ """
54
+ hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
55
+ resp = cls._session.get(
56
+ url,
57
+ params=params,
58
+ headers=hdrs,
59
+ timeout=timeout or cls._DEFAULT_TIMEOUT,
60
+ **kwargs,
61
+ )
62
+ resp.raise_for_status()
63
+ return resp
64
+
65
+ @classmethod
66
+ def _http_post(
67
+ cls,
68
+ url: str,
69
+ *,
70
+ data: dict[str, str] | str | None = None,
71
+ headers: dict[str, str] | None = None,
72
+ timeout: tuple[int, int] | None = None,
73
+ **kwargs: Any,
74
+ ) -> requests.Response:
75
+ """
76
+ Helper for POST requests with default headers, timeout, and error-raising.
77
+ """
78
+ hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
79
+ resp = cls._session.post(
80
+ url,
81
+ data=data,
82
+ headers=hdrs,
83
+ timeout=timeout or cls._DEFAULT_TIMEOUT,
84
+ **kwargs,
85
+ )
86
+ resp.raise_for_status()
87
+ return resp
88
+
89
+ @staticmethod
90
+ def _quote(q: str) -> str:
91
+ """URL-encode a query string safely."""
92
+ return quote_plus(q)
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.biquge
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["biquge", "bqg"],
21
+ )
22
+ class BiqugeSearcher(BaseSearcher):
23
+ site_name = "biquge"
24
+ priority = 5
25
+ SEARCH_URL = "http://www.b520.cc/modules/article/search.php"
26
+
27
+ @classmethod
28
+ def _fetch_html(cls, keyword: str) -> str:
29
+ """
30
+ Fetch raw HTML from Biquge's search page.
31
+
32
+ :param keyword: The search term to query on Biquge.
33
+ :return: HTML text of the search results page, or an empty string on fail.
34
+ """
35
+ params = {"searchkey": keyword}
36
+ try:
37
+ response = cls._http_get(cls.SEARCH_URL, params=params)
38
+ return response.text
39
+ except Exception:
40
+ logger.error(
41
+ "Failed to fetch HTML for keyword '%s' from '%s'",
42
+ keyword,
43
+ cls.SEARCH_URL,
44
+ exc_info=True,
45
+ )
46
+ return ""
47
+
48
+ @classmethod
49
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
50
+ """
51
+ Parse raw HTML from Biquge search results into list of SearchResult.
52
+
53
+ :param html_str: Raw HTML string from Biquge search results page.
54
+ :param limit: Maximum number of results to return, or None for all.
55
+ :return: List of SearchResult dicts.
56
+ """
57
+ doc = html.fromstring(html_str)
58
+ rows = doc.xpath('//table[@class="grid"]//tr[position()>1]')
59
+ results: list[SearchResult] = []
60
+
61
+ for idx, row in enumerate(rows):
62
+ if limit is not None and idx >= limit:
63
+ break
64
+ # Title and book_id
65
+ title_elem = row.xpath(".//td[1]/a")[0]
66
+ title = title_elem.text_content().strip()
67
+ href = title_elem.get("href", "").strip("/")
68
+ book_id = href.split("/")[0] if href else ""
69
+ # Author
70
+ author = row.xpath(".//td[3]")[0].text_content().strip()
71
+ # Compute priority
72
+ prio = cls.priority + idx
73
+
74
+ results.append(
75
+ SearchResult(
76
+ site=cls.site_name,
77
+ book_id=book_id,
78
+ title=title,
79
+ author=author,
80
+ priority=prio,
81
+ )
82
+ )
83
+ return results
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.esjzone
4
+ ---------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.core.searchers.registry import register_searcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @register_searcher(
20
+ site_keys=["esjzone"],
21
+ )
22
+ class EsjzoneSearcher(BaseSearcher):
23
+ site_name = "esjzone"
24
+ priority = 3
25
+ SEARCH_URL = "https://www.esjzone.cc/tags/{query}/"
26
+
27
+ @classmethod
28
+ def _fetch_html(cls, keyword: str) -> str:
29
+ """
30
+ Fetch raw HTML from Esjzone's search page.
31
+
32
+ :param keyword: The search term to query on Esjzone.
33
+ :return: HTML text of the search results page, or an empty string on fail.
34
+ """
35
+ url = cls.SEARCH_URL.format(query=cls._quote(keyword))
36
+ try:
37
+ response = cls._http_get(url)
38
+ return response.text
39
+ except Exception:
40
+ logger.error(
41
+ "Failed to fetch HTML for keyword '%s' from '%s'",
42
+ keyword,
43
+ url,
44
+ exc_info=True,
45
+ )
46
+ return ""
47
+
48
+ @classmethod
49
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
50
+ """
51
+ Parse raw HTML from Esjzone search results into list of SearchResult.
52
+
53
+ :param html_str: Raw HTML string from Esjzone search results page.
54
+ :param limit: Maximum number of results to return, or None for all.
55
+ :return: List of SearchResult dicts.
56
+ """
57
+ doc = html.fromstring(html_str)
58
+ cards = doc.xpath('//div[contains(@class,"card-body")]')
59
+ results: list[SearchResult] = []
60
+
61
+ for idx, card in enumerate(cards):
62
+ if limit is not None and idx >= limit:
63
+ break
64
+ # Title and book_id
65
+ link = card.xpath('.//h5[@class="card-title"]/a')[0]
66
+ title = link.text_content().strip()
67
+ href = link.get("href", "")
68
+ # href format: /detail/<book_id>.html
69
+ book_id = href.strip("/").replace("detail/", "").replace(".html", "")
70
+ # Author
71
+ author_link = card.xpath('.//div[@class="card-author"]/a')[0]
72
+ author = author_link.text_content().strip()
73
+ # Compute priority incrementally
74
+ prio = cls.priority + idx
75
+ results.append(
76
+ SearchResult(
77
+ site=cls.site_name,
78
+ book_id=book_id,
79
+ title=title,
80
+ author=author,
81
+ priority=prio,
82
+ )
83
+ )
84
+ return results