novel-downloader 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. novel_downloader/__init__.py +1 -2
  2. novel_downloader/cli/__init__.py +0 -1
  3. novel_downloader/cli/clean.py +2 -10
  4. novel_downloader/cli/download.py +16 -22
  5. novel_downloader/cli/interactive.py +0 -1
  6. novel_downloader/cli/main.py +1 -3
  7. novel_downloader/cli/settings.py +8 -8
  8. novel_downloader/config/__init__.py +0 -1
  9. novel_downloader/config/adapter.py +32 -27
  10. novel_downloader/config/loader.py +116 -108
  11. novel_downloader/config/models.py +35 -29
  12. novel_downloader/config/site_rules.py +2 -4
  13. novel_downloader/core/__init__.py +0 -1
  14. novel_downloader/core/downloaders/__init__.py +4 -4
  15. novel_downloader/core/downloaders/base/__init__.py +14 -0
  16. novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
  17. novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
  18. novel_downloader/core/downloaders/biquge/__init__.py +12 -0
  19. novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
  20. novel_downloader/core/downloaders/common/__init__.py +14 -0
  21. novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
  22. novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +33 -21
  23. novel_downloader/core/downloaders/qidian/__init__.py +10 -0
  24. novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +79 -62
  25. novel_downloader/core/factory/__init__.py +4 -5
  26. novel_downloader/core/factory/{downloader_factory.py → downloader.py} +25 -26
  27. novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
  28. novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
  29. novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
  30. novel_downloader/core/interfaces/__init__.py +8 -9
  31. novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
  32. novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +23 -12
  33. novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
  34. novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
  35. novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
  36. novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +31 -17
  37. novel_downloader/core/parsers/__init__.py +5 -4
  38. novel_downloader/core/parsers/{base_parser.py → base.py} +18 -9
  39. novel_downloader/core/parsers/biquge/__init__.py +10 -0
  40. novel_downloader/core/parsers/biquge/main_parser.py +126 -0
  41. novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
  42. novel_downloader/core/parsers/{common_parser → common}/helper.py +13 -13
  43. novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
  44. novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
  45. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
  46. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +40 -48
  47. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
  48. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
  49. novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +14 -10
  50. novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
  51. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +36 -44
  52. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
  53. novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
  54. novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +14 -10
  55. novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
  56. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
  57. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/book_info_parser.py +5 -6
  58. novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +7 -8
  59. novel_downloader/core/requesters/__init__.py +9 -5
  60. novel_downloader/core/requesters/base/__init__.py +16 -0
  61. novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +177 -73
  62. novel_downloader/core/requesters/base/browser.py +340 -0
  63. novel_downloader/core/requesters/base/session.py +364 -0
  64. novel_downloader/core/requesters/biquge/__init__.py +12 -0
  65. novel_downloader/core/requesters/biquge/session.py +90 -0
  66. novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
  67. novel_downloader/core/requesters/common/async_session.py +96 -0
  68. novel_downloader/core/requesters/common/session.py +113 -0
  69. novel_downloader/core/requesters/qidian/__init__.py +21 -0
  70. novel_downloader/core/requesters/qidian/broswer.py +306 -0
  71. novel_downloader/core/requesters/qidian/session.py +287 -0
  72. novel_downloader/core/savers/__init__.py +5 -3
  73. novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
  74. novel_downloader/core/savers/biquge.py +25 -0
  75. novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
  76. novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +23 -51
  77. novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
  78. novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
  79. novel_downloader/core/savers/epub_utils/__init__.py +0 -1
  80. novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
  81. novel_downloader/core/savers/epub_utils/initializer.py +4 -5
  82. novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
  83. novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
  84. novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
  85. novel_downloader/locales/en.json +8 -4
  86. novel_downloader/locales/zh.json +5 -1
  87. novel_downloader/resources/config/settings.toml +88 -0
  88. novel_downloader/utils/cache.py +2 -2
  89. novel_downloader/utils/chapter_storage.py +340 -0
  90. novel_downloader/utils/constants.py +6 -4
  91. novel_downloader/utils/crypto_utils.py +3 -3
  92. novel_downloader/utils/file_utils/__init__.py +0 -1
  93. novel_downloader/utils/file_utils/io.py +12 -17
  94. novel_downloader/utils/file_utils/normalize.py +1 -3
  95. novel_downloader/utils/file_utils/sanitize.py +2 -9
  96. novel_downloader/utils/fontocr/__init__.py +0 -1
  97. novel_downloader/utils/fontocr/ocr_v1.py +19 -22
  98. novel_downloader/utils/fontocr/ocr_v2.py +147 -60
  99. novel_downloader/utils/hash_store.py +19 -20
  100. novel_downloader/utils/hash_utils.py +0 -1
  101. novel_downloader/utils/i18n.py +3 -4
  102. novel_downloader/utils/logger.py +5 -6
  103. novel_downloader/utils/model_loader.py +5 -8
  104. novel_downloader/utils/network.py +9 -10
  105. novel_downloader/utils/state.py +6 -7
  106. novel_downloader/utils/text_utils/__init__.py +0 -1
  107. novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
  108. novel_downloader/utils/text_utils/diff_display.py +0 -1
  109. novel_downloader/utils/text_utils/font_mapping.py +1 -4
  110. novel_downloader/utils/text_utils/text_cleaning.py +0 -1
  111. novel_downloader/utils/time_utils/__init__.py +0 -1
  112. novel_downloader/utils/time_utils/datetime_utils.py +8 -10
  113. novel_downloader/utils/time_utils/sleep_utils.py +1 -3
  114. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
  115. novel_downloader-1.3.0.dist-info/RECORD +127 -0
  116. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
  117. novel_downloader/core/requesters/base_browser.py +0 -214
  118. novel_downloader/core/requesters/base_session.py +0 -246
  119. novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
  120. novel_downloader/core/requesters/common_requester/common_session.py +0 -126
  121. novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
  122. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -396
  123. novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
  124. novel_downloader/resources/config/settings.yaml +0 -76
  125. novel_downloader-1.2.2.dist-info/RECORD +0 -115
  126. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
  127. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
  128. {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,133 +1,160 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
3
  novel_downloader.config.loader
5
4
  --------------------------------
6
5
 
7
- Provides functionality to load YAML configuration files into Python
6
+ Provides functionality to load Toml configuration files into Python
8
7
  dictionaries, with robust error handling and fallback support.
9
-
10
- This is typically used to load user-supplied or internal default config files.
11
8
  """
12
9
 
13
10
  import json
14
11
  import logging
15
- from importlib.abc import Traversable
16
- from importlib.resources import as_file
17
12
  from pathlib import Path
18
- from typing import Any, Dict, Optional, Union
19
-
20
- import yaml
13
+ from typing import Any
21
14
 
22
15
  from novel_downloader.utils.cache import cached_load_config
23
- from novel_downloader.utils.constants import (
24
- BASE_CONFIG_PATH,
25
- SETTING_FILE,
26
- )
16
+ from novel_downloader.utils.constants import SETTING_FILE
27
17
 
28
18
  logger = logging.getLogger(__name__)
29
19
 
30
20
 
31
- def resolve_config_path(
32
- config_path: Optional[Union[str, Path]]
33
- ) -> Optional[Union[Path, Traversable]]:
21
+ def resolve_file_path(
22
+ user_path: str | Path | None,
23
+ local_filename: str | list[str],
24
+ fallback_path: Path,
25
+ ) -> Path | None:
34
26
  """
35
- Resolve which configuration file to use, in this priority order:
27
+ Resolve the file path to use based on a prioritized lookup order.
36
28
 
37
- 1. User-specified path (the `config_path` argument).
38
- 2. `./settings.yaml` in the current working directory.
39
- 3. The global settings file (`SETTING_FILE`).
40
- 4. The internal default (`BASE_CONFIG_PATH`).
29
+ Priority:
30
+ 1. A user-specified path (if provided and exists)
31
+ 2. A file in the current working directory with the given name
32
+ 3. A globally registered fallback path
41
33
 
42
- Returns a Path to the first existing file, or None if none is found.
34
+ :param user_path: Optional user-specified file path.
35
+ :param local_filename: File name to check in the current working directory.
36
+ :param fallback_path: Fallback path used if other options are not available.
37
+ :return: A valid Path object if found, otherwise None.
43
38
  """
44
- # 1. Try the user-provided path
45
- if config_path:
46
- path = Path(config_path).expanduser().resolve()
39
+ if user_path:
40
+ path = Path(user_path).expanduser().resolve()
47
41
  if path.is_file():
48
42
  return path
49
- logger.warning("[config] Specified config file not found: %s", path)
43
+ logger.warning("[config] Specified file not found: %s", path)
50
44
 
51
- # 2. Try ./settings.yaml in the current working directory
52
- local_path = Path.cwd() / "settings.yaml"
53
- if local_path.is_file():
54
- logger.debug("[config] Using local settings.yaml at %s", local_path)
55
- return local_path
45
+ filenames = [local_filename] if isinstance(local_filename, str) else local_filename
46
+ for name in filenames:
47
+ local_path = Path.cwd() / name
48
+ if local_path.is_file():
49
+ logger.debug("[config] Using local file: %s", local_path)
50
+ return local_path
56
51
 
57
- # 3. Try the globally registered settings file
58
- if SETTING_FILE.is_file():
59
- logger.debug("[config] Using global settings file at %s", SETTING_FILE)
60
- return SETTING_FILE
52
+ if fallback_path.is_file():
53
+ logger.debug("[config] Using fallback file: %s", fallback_path)
54
+ return fallback_path
61
55
 
62
- # 4. Fallback to the internal default configuration
63
- try:
64
- logger.debug(
65
- "[config] Falling back to internal base config at %s", BASE_CONFIG_PATH
66
- )
67
- return BASE_CONFIG_PATH
68
- except Exception as e:
69
- logger.error("[config] Failed to load internal base config: %s", e)
70
- return None
56
+ logger.warning("[config] No file found at any location for: %s", local_filename)
57
+ return None
71
58
 
72
59
 
73
- @cached_load_config
74
- def load_config(config_path: Optional[Union[str, Path]]) -> Dict[str, Any]:
60
+ def _validate_dict(data: Any, path: Path, format: str) -> dict[str, Any]:
75
61
  """
76
- Load configuration data from a YAML file.
62
+ Validate that the parsed config is a dictionary.
77
63
 
78
- :param config_path: Optional path to the YAML configuration file.
79
- :return: Parsed configuration as a dict.
64
+ :param data: The loaded content to validate.
65
+ :param path: Path to the original config file (used for logging).
66
+ :param format: Format name ('json', 'toml', etc.) for log context.
67
+ :return: The original data if valid, otherwise an empty dict.
80
68
  """
81
- path = resolve_config_path(config_path)
82
- if not path or not path.is_file():
83
- logger.warning("[config] No valid config file found, using empty config.")
69
+ if not isinstance(data, dict):
70
+ logger.warning(
71
+ "[config] %s content is not a dictionary: %s",
72
+ format.upper(),
73
+ path,
74
+ )
84
75
  return {}
76
+ return data
85
77
 
86
- with as_file(path) as real_path:
87
- try:
88
- content = real_path.read_text(encoding="utf-8")
89
- ext = real_path.suffix.lower()
90
- except Exception as e:
91
- logger.error("[config] Failed to read config file '%s': %s", path, e)
92
- return {}
93
78
 
94
- data: Any = None
79
+ def _load_by_extension(path: Path) -> dict[str, Any]:
80
+ """
81
+ Load a configuration file by its file extension.
95
82
 
83
+ Supports `.toml`, `.json`, and `.yaml`/`.yml` formats.
84
+
85
+ :param path: Path to the configuration file.
86
+ :return: Parsed configuration as a dictionary.
87
+ :raises ValueError: If the file extension is unsupported.
88
+ """
89
+ ext = path.suffix.lower()
96
90
  if ext == ".json":
91
+ with path.open("r", encoding="utf-8") as f:
92
+ data = json.load(f)
93
+ return _validate_dict(data, path, "json")
94
+
95
+ elif ext == ".toml":
96
+ import tomllib
97
+
98
+ with path.open("rb") as f:
99
+ data = tomllib.load(f)
100
+ return _validate_dict(data, path, "toml")
101
+
102
+ elif ext in {".yaml", ".yml"}:
97
103
  try:
98
- data = json.loads(content)
99
- except json.JSONDecodeError as e:
100
- logger.error("[config] JSON parse error in '%s': %s", path, e)
101
- return {}
104
+ import yaml
105
+ except ImportError as err:
106
+ raise ImportError(
107
+ "YAML config support requires PyYAML. "
108
+ "Install it via: pip install PyYAML"
109
+ ) from err
110
+ with path.open("r", encoding="utf-8") as f:
111
+ data = yaml.safe_load(f)
112
+ return _validate_dict(data, path, "yaml")
113
+
102
114
  else:
103
- try:
104
- data = yaml.safe_load(content)
105
- except yaml.YAMLError as e:
106
- logger.error("[config] YAML parse error in '%s': %s", path, e)
107
- return {}
115
+ raise ValueError(f"Unsupported config file extension: {ext}")
108
116
 
109
- if data is None:
110
- return {}
111
- if not isinstance(data, dict):
112
- logger.warning(
113
- "[config] Expected dict in config file '%s', got %s",
114
- path,
115
- type(data).__name__,
116
- )
117
- return {}
118
117
 
119
- return data
118
+ @cached_load_config
119
+ def load_config(
120
+ config_path: str | Path | None = None,
121
+ ) -> dict[str, Any]:
122
+ """
123
+ Load configuration data from a YAML file.
124
+
125
+ :param config_path: Optional path to the YAML configuration file.
126
+ :return: Parsed configuration as a dict.
127
+ """
128
+ path = resolve_file_path(
129
+ user_path=config_path,
130
+ local_filename=[
131
+ "settings.toml",
132
+ "settings.yaml",
133
+ "settings.yml",
134
+ "settings.json",
135
+ ],
136
+ fallback_path=SETTING_FILE,
137
+ )
138
+
139
+ if not path or not path.is_file():
140
+ raise FileNotFoundError("No valid config file found.")
141
+
142
+ try:
143
+ return _load_by_extension(path)
144
+ except Exception as e:
145
+ logger.warning("[config] Failed to load config file: %s", e)
146
+ return {}
120
147
 
121
148
 
122
149
  def save_config_file(
123
- source_path: Union[str, Path],
124
- output_path: Union[str, Path] = SETTING_FILE,
150
+ source_path: str | Path,
151
+ output_path: str | Path = SETTING_FILE,
125
152
  ) -> None:
126
153
  """
127
- Validate a YAML/JSON config file, load it into a dict,
154
+ Validate a TOML/YAML/JSON config file, load it into a dict,
128
155
  and then dump it as JSON to the internal SETTING_FILE.
129
156
 
130
- :param source_path: The user-provided YAML file path.
157
+ :param source_path: The user-provided TOML file path.
131
158
  :param output_path: Destination path to save the config (default: SETTING_FILE).
132
159
  """
133
160
  source = Path(source_path).expanduser().resolve()
@@ -136,33 +163,14 @@ def save_config_file(
136
163
  if not source.is_file():
137
164
  raise FileNotFoundError(f"Source file not found: {source}")
138
165
 
139
- ext = source.suffix.lower()
140
-
141
- if ext in {".yaml", ".yml"}:
142
- logger.debug("[config] Loading YAML for conversion: %s", source)
143
- try:
144
- with source.open("r", encoding="utf-8") as f:
145
- data = yaml.safe_load(f)
146
- except yaml.YAMLError as e:
147
- logger.error("[config] Invalid YAML format: %s", e)
148
- raise ValueError(f"Invalid YAML file: {source}") from e
149
-
150
- elif ext == ".json":
151
- logger.debug("[config] Loading JSON for saving: %s", source)
152
- try:
153
- with source.open("r", encoding="utf-8") as f:
154
- data = json.load(f)
155
- except json.JSONDecodeError as e:
156
- logger.error("[config] Invalid JSON format: %s", e)
157
- raise ValueError(f"Invalid JSON file: {source}") from e
158
-
159
- else:
160
- raise ValueError(f"Source file must be .yaml, .yml, or .json: {source}")
161
-
162
- if not isinstance(data, dict):
163
- raise ValueError(f"Config root must be a JSON/YAML object: {source}")
166
+ try:
167
+ data = _load_by_extension(source)
168
+ except (ValueError, ImportError) as e:
169
+ logger.error("[config] Failed to load config file: %s", e)
170
+ raise ValueError(f"Invalid config file: {source}") from e
164
171
 
165
172
  output.parent.mkdir(parents=True, exist_ok=True)
173
+
166
174
  try:
167
175
  with output.open("w", encoding="utf-8") as f:
168
176
  json.dump(data, f, indent=2, ensure_ascii=False)
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
3
  novel_downloader.config.models
5
4
  ------------------------------
@@ -18,15 +17,17 @@ strongly typed Python objects for safer and cleaner access.
18
17
  """
19
18
 
20
19
  from dataclasses import dataclass
21
- from typing import Any, Dict, List, Literal, Optional, TypedDict
20
+ from typing import Any, Literal, TypedDict
21
+
22
+ ModeType = Literal["browser", "session", "async"]
23
+ StorageBackend = Literal["json", "sqlite"]
22
24
 
23
25
 
24
26
  # === Requesters ===
25
27
  @dataclass
26
28
  class RequesterConfig:
27
- wait_time: float = 5.0
28
29
  retry_times: int = 3
29
- retry_interval: float = 5.0
30
+ backoff_factor: float = 2.0
30
31
  timeout: float = 30.0
31
32
  headless: bool = True
32
33
  user_data_folder: str = ""
@@ -34,8 +35,9 @@ class RequesterConfig:
34
35
  auto_close: bool = True
35
36
  disable_images: bool = True
36
37
  mute_audio: bool = True
37
- mode: str = "session" # browser / session / async
38
- max_rps: Optional[float] = None # Maximum requests per second
38
+ mode: ModeType = "session"
39
+ max_connections: int = 10
40
+ max_rps: float | None = None # Maximum requests per second
39
41
 
40
42
 
41
43
  # === Downloaders ===
@@ -50,7 +52,9 @@ class DownloaderConfig:
50
52
  skip_existing: bool = True
51
53
  login_required: bool = False
52
54
  save_html: bool = False
53
- mode: str = "session" # browser / session / async
55
+ mode: ModeType = "session"
56
+ storage_backend: StorageBackend = "json"
57
+ storage_batch_size: int = 1
54
58
 
55
59
 
56
60
  # === Parsers ===
@@ -64,11 +68,11 @@ class ParserConfig:
64
68
  ocr_version: str = "v1.0"
65
69
  batch_size: int = 32
66
70
  gpu_mem: int = 500
67
- gpu_id: Optional[int] = None
71
+ gpu_id: int | None = None
68
72
  ocr_weight: float = 0.6
69
73
  vec_weight: float = 0.4
70
74
  save_font_debug: bool = False
71
- mode: str = "session" # browser / session
75
+ mode: ModeType = "session"
72
76
 
73
77
 
74
78
  # === Savers ===
@@ -76,6 +80,7 @@ class ParserConfig:
76
80
  class SaverConfig:
77
81
  raw_data_dir: str = "./raw_data"
78
82
  output_dir: str = "./downloads"
83
+ storage_backend: StorageBackend = "json"
79
84
  clean_text: bool = True
80
85
  make_txt: bool = True
81
86
  make_epub: bool = False
@@ -85,6 +90,7 @@ class SaverConfig:
85
90
  filename_template: str = "{title}_{author}"
86
91
  include_cover: bool = True
87
92
  include_toc: bool = False
93
+ include_picture: bool = False
88
94
 
89
95
 
90
96
  class RuleStep(TypedDict, total=False):
@@ -105,39 +111,39 @@ class RuleStep(TypedDict, total=False):
105
111
  ]
106
112
 
107
113
  # —— BeautifulSoup 相关 —— #
108
- selector: Optional[str] # CSS 选择器, 用于 select/select_one/exclude
109
- name: Optional[str] # 标签名称, 用于 find/find_all
110
- attrs: Optional[Dict[str, Any]] # 属性过滤, 用于 find/find_all
111
- limit: Optional[int] # find_all 的最大匹配数
112
- attr: Optional[str] # 从元素获取属性值 (select/select_one/select_all)
114
+ selector: str | None # CSS 选择器, 用于 select/select_one/exclude
115
+ name: str | None # 标签名称, 用于 find/find_all
116
+ attrs: dict[str, Any] | None # 属性过滤, 用于 find/find_all
117
+ limit: int | None # find_all 的最大匹配数
118
+ attr: str | None # 从元素获取属性值 (select/select_one/select_all)
113
119
 
114
120
  # —— 正则相关 —— #
115
- pattern: Optional[str] # 正则表达式
116
- flags: Optional[int] # re.I, re.M 等
117
- group: Optional[int] # 匹配结果中的第几个分组 (默认 0)
118
- template: Optional[str] # 自定义组合, 比如 "$1$2字"
121
+ pattern: str | None # 正则表达式
122
+ flags: int | None # re.I, re.M 等
123
+ group: int | None # 匹配结果中的第几个分组 (默认 0)
124
+ template: str | None # 自定义组合, 比如 "$1$2字"
119
125
 
120
126
  # —— 文本处理 —— #
121
- chars: Optional[str] # strip 要去除的字符集
122
- old: Optional[str] # replace 中要被替换的子串
123
- new: Optional[str] # replace 中新的子串
124
- count: Optional[int] # replace 中的最大替换次数
125
- sep: Optional[str] # split/join 的分隔符
126
- index: Optional[int] # split/select_all/select 之后取第几个元素
127
+ chars: str | None # strip 要去除的字符集
128
+ old: str | None # replace 中要被替换的子串
129
+ new: str | None # replace 中新的子串
130
+ count: int | None # replace 中的最大替换次数
131
+ sep: str | None # split/join 的分隔符
132
+ index: int | None # split/select_all/select 之后取第几个元素
127
133
 
128
134
 
129
135
  class FieldRules(TypedDict):
130
- steps: List[RuleStep]
136
+ steps: list[RuleStep]
131
137
 
132
138
 
133
139
  class ChapterFieldRules(TypedDict):
134
140
  key: str
135
- steps: List[RuleStep]
141
+ steps: list[RuleStep]
136
142
 
137
143
 
138
144
  class VolumesRulesOptional(TypedDict, total=False):
139
145
  volume_selector: str # 有卷时选择 volume 块的 selector
140
- volume_name_steps: List[RuleStep]
146
+ volume_name_steps: list[RuleStep]
141
147
  volume_mode: str # Optional: "normal" (default) or "mixed"
142
148
  list_selector: str # Optional: If "mixed" mode, parent container selector
143
149
 
@@ -145,7 +151,7 @@ class VolumesRulesOptional(TypedDict, total=False):
145
151
  class VolumesRules(VolumesRulesOptional):
146
152
  has_volume: bool # 是否存在卷,false=未分卷
147
153
  chapter_selector: str # 选择 chapter 节点的 selector
148
- chapter_steps: List[ChapterFieldRules] # 提取章节信息的步骤列表
154
+ chapter_steps: list[ChapterFieldRules] # 提取章节信息的步骤列表
149
155
 
150
156
 
151
157
  class BookInfoRules(TypedDict, total=False):
@@ -175,4 +181,4 @@ class SiteRules(TypedDict):
175
181
  chapter: ChapterRules
176
182
 
177
183
 
178
- SiteRulesDict = Dict[str, SiteRules]
184
+ SiteRulesDict = dict[str, SiteRules]
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
3
  novel_downloader.config.site_rules
5
4
  ----------------------------------
@@ -14,7 +13,6 @@ This module provides functionality to:
14
13
  import json
15
14
  import logging
16
15
  from pathlib import Path
17
- from typing import Union
18
16
 
19
17
  from novel_downloader.utils.cache import cached_load_config
20
18
  from novel_downloader.utils.constants import SITE_RULES_FILE
@@ -26,7 +24,7 @@ logger = logging.getLogger(__name__)
26
24
 
27
25
 
28
26
  def save_rules_as_json(
29
- source_path: Union[str, Path], output_path: Union[str, Path] = SITE_RULES_FILE
27
+ source_path: str | Path, output_path: str | Path = SITE_RULES_FILE
30
28
  ) -> None:
31
29
  """
32
30
  Load rules from source_path (toml, yaml, or json) and save as JSON.
@@ -78,7 +76,7 @@ def save_rules_as_json(
78
76
 
79
77
 
80
78
  @cached_load_config
81
- def load_site_rules(json_path: Union[str, Path] = SITE_RULES_FILE) -> SiteRulesDict:
79
+ def load_site_rules(json_path: str | Path = SITE_RULES_FILE) -> SiteRulesDict:
82
80
  """
83
81
  Loads site scraping rules from a JSON file and caches the result for future access.
84
82
 
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
3
  novel_downloader.core
5
4
  ---------------------
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
3
  novel_downloader.core.downloaders
5
4
  ---------------------------------
@@ -11,11 +10,12 @@ Each downloader is responsible for orchestrating the full lifecycle
11
10
  of retrieving, parsing, and saving novel content for a given source.
12
11
  """
13
12
 
14
- from .common_asynb_downloader import CommonAsyncDownloader
15
- from .common_downloader import CommonDownloader
16
- from .qidian_downloader import QidianDownloader
13
+ from .biquge import BiqugeDownloader
14
+ from .common import CommonAsyncDownloader, CommonDownloader
15
+ from .qidian import QidianDownloader
17
16
 
18
17
  __all__ = [
18
+ "BiqugeDownloader",
19
19
  "CommonAsyncDownloader",
20
20
  "CommonDownloader",
21
21
  "QidianDownloader",
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.downloaders.base
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ from .base_async import BaseAsyncDownloader
9
+ from .base_sync import BaseDownloader
10
+
11
+ __all__ = [
12
+ "BaseAsyncDownloader",
13
+ "BaseDownloader",
14
+ ]
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
  """
4
- novel_downloader.core.downloaders.base_async_downloader
5
- -------------------------------------------------------
3
+ novel_downloader.core.downloaders.base.base_async
4
+ -------------------------------------------------
6
5
 
7
6
  Defines the abstract base class `BaseAsyncDownloader`, which provides a
8
7
  common interface and reusable logic for all downloader implementations.
@@ -11,19 +10,14 @@ common interface and reusable logic for all downloader implementations.
11
10
  import abc
12
11
  import logging
13
12
  from pathlib import Path
14
- from typing import List
15
13
 
16
14
  from novel_downloader.config import DownloaderConfig
17
15
  from novel_downloader.core.interfaces import (
16
+ AsyncDownloaderProtocol,
18
17
  AsyncRequesterProtocol,
19
18
  ParserProtocol,
20
19
  SaverProtocol,
21
20
  )
22
- from novel_downloader.core.interfaces.async_downloader_protocol import (
23
- AsyncDownloaderProtocol,
24
- )
25
-
26
- logger = logging.getLogger(__name__)
27
21
 
28
22
 
29
23
  class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
@@ -53,6 +47,51 @@ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
53
47
  self._raw_data_dir.mkdir(parents=True, exist_ok=True)
54
48
  self._cache_dir.mkdir(parents=True, exist_ok=True)
55
49
 
50
+ self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
51
+
52
+ async def download(self, book_ids: list[str]) -> None:
53
+ """
54
+ The general batch download process:
55
+ 1. Iterate over all book IDs
56
+ 2. For each ID, call `download_one()`
57
+
58
+ :param book_ids: A list of book identifiers to download.
59
+ """
60
+ await self.prepare()
61
+
62
+ # 2) batch download
63
+ for idx, book_id in enumerate(book_ids, start=1):
64
+ self.logger.debug(
65
+ "[%s] Starting download for %r (%s/%s)",
66
+ self.__class__.__name__,
67
+ book_id,
68
+ idx,
69
+ len(book_ids),
70
+ )
71
+ try:
72
+ await self.download_one(book_id)
73
+ except Exception as e:
74
+ self._handle_download_exception(book_id, e)
75
+
76
+ @abc.abstractmethod
77
+ async def download_one(self, book_id: str) -> None:
78
+ """
79
+ The full download logic for a single book.
80
+
81
+ Subclasses must implement this method.
82
+
83
+ :param book_id: The identifier of the book to download.
84
+ """
85
+ ...
86
+
87
+ async def prepare(self) -> None:
88
+ """
89
+ Optional hook called before downloading each book.
90
+
91
+ Subclasses can override this method to perform pre-download setup.
92
+ """
93
+ return
94
+
56
95
  @property
57
96
  def requester(self) -> AsyncRequesterProtocol:
58
97
  return self._requester
@@ -97,49 +136,6 @@ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
97
136
  def request_interval(self) -> float:
98
137
  return self._config.request_interval
99
138
 
100
- async def prepare(self) -> None:
101
- """
102
- Optional hook called before downloading each book.
103
-
104
- Subclasses can override this method to perform pre-download setup.
105
- """
106
- return
107
-
108
- async def download(self, book_ids: List[str]) -> None:
109
- """
110
- The general batch download process:
111
- 1. Iterate over all book IDs
112
- 2. For each ID, call `download_one()`
113
-
114
- :param book_ids: A list of book identifiers to download.
115
- """
116
- await self.prepare()
117
-
118
- # 2) batch download
119
- for idx, book_id in enumerate(book_ids, start=1):
120
- logger.debug(
121
- "[%s] Starting download for %r (%s/%s)",
122
- self.__class__.__name__,
123
- book_id,
124
- idx,
125
- len(book_ids),
126
- )
127
- try:
128
- await self.download_one(book_id)
129
- except Exception as e:
130
- self._handle_download_exception(book_id, e)
131
-
132
- @abc.abstractmethod
133
- async def download_one(self, book_id: str) -> None:
134
- """
135
- The full download logic for a single book.
136
-
137
- Subclasses must implement this method.
138
-
139
- :param book_id: The identifier of the book to download.
140
- """
141
- ...
142
-
143
139
  def _handle_download_exception(self, book_id: str, error: Exception) -> None:
144
140
  """
145
141
  Handle download errors in a consistent way.
@@ -149,7 +145,7 @@ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
149
145
  :param book_id: The ID of the book that failed.
150
146
  :param error: The exception raised during download.
151
147
  """
152
- logger.warning(
148
+ self.logger.warning(
153
149
  "[%s] Failed to download %r: %s",
154
150
  self.__class__.__name__,
155
151
  book_id,