novel-downloader 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -2
- novel_downloader/cli/__init__.py +0 -1
- novel_downloader/cli/clean.py +2 -10
- novel_downloader/cli/download.py +16 -22
- novel_downloader/cli/interactive.py +0 -1
- novel_downloader/cli/main.py +1 -3
- novel_downloader/cli/settings.py +8 -8
- novel_downloader/config/__init__.py +0 -1
- novel_downloader/config/adapter.py +32 -27
- novel_downloader/config/loader.py +116 -108
- novel_downloader/config/models.py +35 -29
- novel_downloader/config/site_rules.py +2 -4
- novel_downloader/core/__init__.py +0 -1
- novel_downloader/core/downloaders/__init__.py +4 -4
- novel_downloader/core/downloaders/base/__init__.py +14 -0
- novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
- novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
- novel_downloader/core/downloaders/biquge/__init__.py +12 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
- novel_downloader/core/downloaders/common/__init__.py +14 -0
- novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
- novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +33 -21
- novel_downloader/core/downloaders/qidian/__init__.py +10 -0
- novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +79 -62
- novel_downloader/core/factory/__init__.py +4 -5
- novel_downloader/core/factory/{downloader_factory.py → downloader.py} +25 -26
- novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
- novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
- novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
- novel_downloader/core/interfaces/__init__.py +8 -9
- novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
- novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +23 -12
- novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
- novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
- novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
- novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +31 -17
- novel_downloader/core/parsers/__init__.py +5 -4
- novel_downloader/core/parsers/{base_parser.py → base.py} +18 -9
- novel_downloader/core/parsers/biquge/__init__.py +10 -0
- novel_downloader/core/parsers/biquge/main_parser.py +126 -0
- novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
- novel_downloader/core/parsers/{common_parser → common}/helper.py +13 -13
- novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +40 -48
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +14 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +36 -44
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +14 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/book_info_parser.py +5 -6
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +7 -8
- novel_downloader/core/requesters/__init__.py +9 -5
- novel_downloader/core/requesters/base/__init__.py +16 -0
- novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +177 -73
- novel_downloader/core/requesters/base/browser.py +340 -0
- novel_downloader/core/requesters/base/session.py +364 -0
- novel_downloader/core/requesters/biquge/__init__.py +12 -0
- novel_downloader/core/requesters/biquge/session.py +90 -0
- novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
- novel_downloader/core/requesters/common/async_session.py +96 -0
- novel_downloader/core/requesters/common/session.py +113 -0
- novel_downloader/core/requesters/qidian/__init__.py +21 -0
- novel_downloader/core/requesters/qidian/broswer.py +306 -0
- novel_downloader/core/requesters/qidian/session.py +287 -0
- novel_downloader/core/savers/__init__.py +5 -3
- novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
- novel_downloader/core/savers/biquge.py +25 -0
- novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
- novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +23 -51
- novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
- novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
- novel_downloader/core/savers/epub_utils/__init__.py +0 -1
- novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
- novel_downloader/core/savers/epub_utils/initializer.py +4 -5
- novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
- novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
- novel_downloader/locales/en.json +8 -4
- novel_downloader/locales/zh.json +5 -1
- novel_downloader/resources/config/settings.toml +88 -0
- novel_downloader/utils/cache.py +2 -2
- novel_downloader/utils/chapter_storage.py +340 -0
- novel_downloader/utils/constants.py +6 -4
- novel_downloader/utils/crypto_utils.py +3 -3
- novel_downloader/utils/file_utils/__init__.py +0 -1
- novel_downloader/utils/file_utils/io.py +12 -17
- novel_downloader/utils/file_utils/normalize.py +1 -3
- novel_downloader/utils/file_utils/sanitize.py +2 -9
- novel_downloader/utils/fontocr/__init__.py +0 -1
- novel_downloader/utils/fontocr/ocr_v1.py +19 -22
- novel_downloader/utils/fontocr/ocr_v2.py +147 -60
- novel_downloader/utils/hash_store.py +19 -20
- novel_downloader/utils/hash_utils.py +0 -1
- novel_downloader/utils/i18n.py +3 -4
- novel_downloader/utils/logger.py +5 -6
- novel_downloader/utils/model_loader.py +5 -8
- novel_downloader/utils/network.py +9 -10
- novel_downloader/utils/state.py +6 -7
- novel_downloader/utils/text_utils/__init__.py +0 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
- novel_downloader/utils/text_utils/diff_display.py +0 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -4
- novel_downloader/utils/text_utils/text_cleaning.py +0 -1
- novel_downloader/utils/time_utils/__init__.py +0 -1
- novel_downloader/utils/time_utils/datetime_utils.py +8 -10
- novel_downloader/utils/time_utils/sleep_utils.py +1 -3
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
- novel_downloader-1.3.0.dist-info/RECORD +127 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
- novel_downloader/core/requesters/base_browser.py +0 -214
- novel_downloader/core/requesters/base_session.py +0 -246
- novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
- novel_downloader/core/requesters/common_requester/common_session.py +0 -126
- novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -396
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
- novel_downloader/resources/config/settings.yaml +0 -76
- novel_downloader-1.2.2.dist-info/RECORD +0 -115
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.2.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,133 +1,160 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
3
|
novel_downloader.config.loader
|
5
4
|
--------------------------------
|
6
5
|
|
7
|
-
Provides functionality to load
|
6
|
+
Provides functionality to load Toml configuration files into Python
|
8
7
|
dictionaries, with robust error handling and fallback support.
|
9
|
-
|
10
|
-
This is typically used to load user-supplied or internal default config files.
|
11
8
|
"""
|
12
9
|
|
13
10
|
import json
|
14
11
|
import logging
|
15
|
-
from importlib.abc import Traversable
|
16
|
-
from importlib.resources import as_file
|
17
12
|
from pathlib import Path
|
18
|
-
from typing import Any
|
19
|
-
|
20
|
-
import yaml
|
13
|
+
from typing import Any
|
21
14
|
|
22
15
|
from novel_downloader.utils.cache import cached_load_config
|
23
|
-
from novel_downloader.utils.constants import
|
24
|
-
BASE_CONFIG_PATH,
|
25
|
-
SETTING_FILE,
|
26
|
-
)
|
16
|
+
from novel_downloader.utils.constants import SETTING_FILE
|
27
17
|
|
28
18
|
logger = logging.getLogger(__name__)
|
29
19
|
|
30
20
|
|
31
|
-
def
|
32
|
-
|
33
|
-
|
21
|
+
def resolve_file_path(
|
22
|
+
user_path: str | Path | None,
|
23
|
+
local_filename: str | list[str],
|
24
|
+
fallback_path: Path,
|
25
|
+
) -> Path | None:
|
34
26
|
"""
|
35
|
-
Resolve
|
27
|
+
Resolve the file path to use based on a prioritized lookup order.
|
36
28
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
29
|
+
Priority:
|
30
|
+
1. A user-specified path (if provided and exists)
|
31
|
+
2. A file in the current working directory with the given name
|
32
|
+
3. A globally registered fallback path
|
41
33
|
|
42
|
-
|
34
|
+
:param user_path: Optional user-specified file path.
|
35
|
+
:param local_filename: File name to check in the current working directory.
|
36
|
+
:param fallback_path: Fallback path used if other options are not available.
|
37
|
+
:return: A valid Path object if found, otherwise None.
|
43
38
|
"""
|
44
|
-
|
45
|
-
|
46
|
-
path = Path(config_path).expanduser().resolve()
|
39
|
+
if user_path:
|
40
|
+
path = Path(user_path).expanduser().resolve()
|
47
41
|
if path.is_file():
|
48
42
|
return path
|
49
|
-
logger.warning("[config] Specified
|
43
|
+
logger.warning("[config] Specified file not found: %s", path)
|
50
44
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
45
|
+
filenames = [local_filename] if isinstance(local_filename, str) else local_filename
|
46
|
+
for name in filenames:
|
47
|
+
local_path = Path.cwd() / name
|
48
|
+
if local_path.is_file():
|
49
|
+
logger.debug("[config] Using local file: %s", local_path)
|
50
|
+
return local_path
|
56
51
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
return SETTING_FILE
|
52
|
+
if fallback_path.is_file():
|
53
|
+
logger.debug("[config] Using fallback file: %s", fallback_path)
|
54
|
+
return fallback_path
|
61
55
|
|
62
|
-
|
63
|
-
|
64
|
-
logger.debug(
|
65
|
-
"[config] Falling back to internal base config at %s", BASE_CONFIG_PATH
|
66
|
-
)
|
67
|
-
return BASE_CONFIG_PATH
|
68
|
-
except Exception as e:
|
69
|
-
logger.error("[config] Failed to load internal base config: %s", e)
|
70
|
-
return None
|
56
|
+
logger.warning("[config] No file found at any location for: %s", local_filename)
|
57
|
+
return None
|
71
58
|
|
72
59
|
|
73
|
-
|
74
|
-
def load_config(config_path: Optional[Union[str, Path]]) -> Dict[str, Any]:
|
60
|
+
def _validate_dict(data: Any, path: Path, format: str) -> dict[str, Any]:
|
75
61
|
"""
|
76
|
-
|
62
|
+
Validate that the parsed config is a dictionary.
|
77
63
|
|
78
|
-
:param
|
79
|
-
:
|
64
|
+
:param data: The loaded content to validate.
|
65
|
+
:param path: Path to the original config file (used for logging).
|
66
|
+
:param format: Format name ('json', 'toml', etc.) for log context.
|
67
|
+
:return: The original data if valid, otherwise an empty dict.
|
80
68
|
"""
|
81
|
-
|
82
|
-
|
83
|
-
|
69
|
+
if not isinstance(data, dict):
|
70
|
+
logger.warning(
|
71
|
+
"[config] %s content is not a dictionary: %s",
|
72
|
+
format.upper(),
|
73
|
+
path,
|
74
|
+
)
|
84
75
|
return {}
|
76
|
+
return data
|
85
77
|
|
86
|
-
with as_file(path) as real_path:
|
87
|
-
try:
|
88
|
-
content = real_path.read_text(encoding="utf-8")
|
89
|
-
ext = real_path.suffix.lower()
|
90
|
-
except Exception as e:
|
91
|
-
logger.error("[config] Failed to read config file '%s': %s", path, e)
|
92
|
-
return {}
|
93
78
|
|
94
|
-
|
79
|
+
def _load_by_extension(path: Path) -> dict[str, Any]:
|
80
|
+
"""
|
81
|
+
Load a configuration file by its file extension.
|
95
82
|
|
83
|
+
Supports `.toml`, `.json`, and `.yaml`/`.yml` formats.
|
84
|
+
|
85
|
+
:param path: Path to the configuration file.
|
86
|
+
:return: Parsed configuration as a dictionary.
|
87
|
+
:raises ValueError: If the file extension is unsupported.
|
88
|
+
"""
|
89
|
+
ext = path.suffix.lower()
|
96
90
|
if ext == ".json":
|
91
|
+
with path.open("r", encoding="utf-8") as f:
|
92
|
+
data = json.load(f)
|
93
|
+
return _validate_dict(data, path, "json")
|
94
|
+
|
95
|
+
elif ext == ".toml":
|
96
|
+
import tomllib
|
97
|
+
|
98
|
+
with path.open("rb") as f:
|
99
|
+
data = tomllib.load(f)
|
100
|
+
return _validate_dict(data, path, "toml")
|
101
|
+
|
102
|
+
elif ext in {".yaml", ".yml"}:
|
97
103
|
try:
|
98
|
-
|
99
|
-
except
|
100
|
-
|
101
|
-
|
104
|
+
import yaml
|
105
|
+
except ImportError as err:
|
106
|
+
raise ImportError(
|
107
|
+
"YAML config support requires PyYAML. "
|
108
|
+
"Install it via: pip install PyYAML"
|
109
|
+
) from err
|
110
|
+
with path.open("r", encoding="utf-8") as f:
|
111
|
+
data = yaml.safe_load(f)
|
112
|
+
return _validate_dict(data, path, "yaml")
|
113
|
+
|
102
114
|
else:
|
103
|
-
|
104
|
-
data = yaml.safe_load(content)
|
105
|
-
except yaml.YAMLError as e:
|
106
|
-
logger.error("[config] YAML parse error in '%s': %s", path, e)
|
107
|
-
return {}
|
115
|
+
raise ValueError(f"Unsupported config file extension: {ext}")
|
108
116
|
|
109
|
-
if data is None:
|
110
|
-
return {}
|
111
|
-
if not isinstance(data, dict):
|
112
|
-
logger.warning(
|
113
|
-
"[config] Expected dict in config file '%s', got %s",
|
114
|
-
path,
|
115
|
-
type(data).__name__,
|
116
|
-
)
|
117
|
-
return {}
|
118
117
|
|
119
|
-
|
118
|
+
@cached_load_config
|
119
|
+
def load_config(
|
120
|
+
config_path: str | Path | None = None,
|
121
|
+
) -> dict[str, Any]:
|
122
|
+
"""
|
123
|
+
Load configuration data from a YAML file.
|
124
|
+
|
125
|
+
:param config_path: Optional path to the YAML configuration file.
|
126
|
+
:return: Parsed configuration as a dict.
|
127
|
+
"""
|
128
|
+
path = resolve_file_path(
|
129
|
+
user_path=config_path,
|
130
|
+
local_filename=[
|
131
|
+
"settings.toml",
|
132
|
+
"settings.yaml",
|
133
|
+
"settings.yml",
|
134
|
+
"settings.json",
|
135
|
+
],
|
136
|
+
fallback_path=SETTING_FILE,
|
137
|
+
)
|
138
|
+
|
139
|
+
if not path or not path.is_file():
|
140
|
+
raise FileNotFoundError("No valid config file found.")
|
141
|
+
|
142
|
+
try:
|
143
|
+
return _load_by_extension(path)
|
144
|
+
except Exception as e:
|
145
|
+
logger.warning("[config] Failed to load config file: %s", e)
|
146
|
+
return {}
|
120
147
|
|
121
148
|
|
122
149
|
def save_config_file(
|
123
|
-
source_path:
|
124
|
-
output_path:
|
150
|
+
source_path: str | Path,
|
151
|
+
output_path: str | Path = SETTING_FILE,
|
125
152
|
) -> None:
|
126
153
|
"""
|
127
|
-
Validate a YAML/JSON config file, load it into a dict,
|
154
|
+
Validate a TOML/YAML/JSON config file, load it into a dict,
|
128
155
|
and then dump it as JSON to the internal SETTING_FILE.
|
129
156
|
|
130
|
-
:param source_path: The user-provided
|
157
|
+
:param source_path: The user-provided TOML file path.
|
131
158
|
:param output_path: Destination path to save the config (default: SETTING_FILE).
|
132
159
|
"""
|
133
160
|
source = Path(source_path).expanduser().resolve()
|
@@ -136,33 +163,14 @@ def save_config_file(
|
|
136
163
|
if not source.is_file():
|
137
164
|
raise FileNotFoundError(f"Source file not found: {source}")
|
138
165
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
logger.
|
143
|
-
|
144
|
-
with source.open("r", encoding="utf-8") as f:
|
145
|
-
data = yaml.safe_load(f)
|
146
|
-
except yaml.YAMLError as e:
|
147
|
-
logger.error("[config] Invalid YAML format: %s", e)
|
148
|
-
raise ValueError(f"Invalid YAML file: {source}") from e
|
149
|
-
|
150
|
-
elif ext == ".json":
|
151
|
-
logger.debug("[config] Loading JSON for saving: %s", source)
|
152
|
-
try:
|
153
|
-
with source.open("r", encoding="utf-8") as f:
|
154
|
-
data = json.load(f)
|
155
|
-
except json.JSONDecodeError as e:
|
156
|
-
logger.error("[config] Invalid JSON format: %s", e)
|
157
|
-
raise ValueError(f"Invalid JSON file: {source}") from e
|
158
|
-
|
159
|
-
else:
|
160
|
-
raise ValueError(f"Source file must be .yaml, .yml, or .json: {source}")
|
161
|
-
|
162
|
-
if not isinstance(data, dict):
|
163
|
-
raise ValueError(f"Config root must be a JSON/YAML object: {source}")
|
166
|
+
try:
|
167
|
+
data = _load_by_extension(source)
|
168
|
+
except (ValueError, ImportError) as e:
|
169
|
+
logger.error("[config] Failed to load config file: %s", e)
|
170
|
+
raise ValueError(f"Invalid config file: {source}") from e
|
164
171
|
|
165
172
|
output.parent.mkdir(parents=True, exist_ok=True)
|
173
|
+
|
166
174
|
try:
|
167
175
|
with output.open("w", encoding="utf-8") as f:
|
168
176
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
3
|
novel_downloader.config.models
|
5
4
|
------------------------------
|
@@ -18,15 +17,17 @@ strongly typed Python objects for safer and cleaner access.
|
|
18
17
|
"""
|
19
18
|
|
20
19
|
from dataclasses import dataclass
|
21
|
-
from typing import Any,
|
20
|
+
from typing import Any, Literal, TypedDict
|
21
|
+
|
22
|
+
ModeType = Literal["browser", "session", "async"]
|
23
|
+
StorageBackend = Literal["json", "sqlite"]
|
22
24
|
|
23
25
|
|
24
26
|
# === Requesters ===
|
25
27
|
@dataclass
|
26
28
|
class RequesterConfig:
|
27
|
-
wait_time: float = 5.0
|
28
29
|
retry_times: int = 3
|
29
|
-
|
30
|
+
backoff_factor: float = 2.0
|
30
31
|
timeout: float = 30.0
|
31
32
|
headless: bool = True
|
32
33
|
user_data_folder: str = ""
|
@@ -34,8 +35,9 @@ class RequesterConfig:
|
|
34
35
|
auto_close: bool = True
|
35
36
|
disable_images: bool = True
|
36
37
|
mute_audio: bool = True
|
37
|
-
mode:
|
38
|
-
|
38
|
+
mode: ModeType = "session"
|
39
|
+
max_connections: int = 10
|
40
|
+
max_rps: float | None = None # Maximum requests per second
|
39
41
|
|
40
42
|
|
41
43
|
# === Downloaders ===
|
@@ -50,7 +52,9 @@ class DownloaderConfig:
|
|
50
52
|
skip_existing: bool = True
|
51
53
|
login_required: bool = False
|
52
54
|
save_html: bool = False
|
53
|
-
mode:
|
55
|
+
mode: ModeType = "session"
|
56
|
+
storage_backend: StorageBackend = "json"
|
57
|
+
storage_batch_size: int = 1
|
54
58
|
|
55
59
|
|
56
60
|
# === Parsers ===
|
@@ -64,11 +68,11 @@ class ParserConfig:
|
|
64
68
|
ocr_version: str = "v1.0"
|
65
69
|
batch_size: int = 32
|
66
70
|
gpu_mem: int = 500
|
67
|
-
gpu_id:
|
71
|
+
gpu_id: int | None = None
|
68
72
|
ocr_weight: float = 0.6
|
69
73
|
vec_weight: float = 0.4
|
70
74
|
save_font_debug: bool = False
|
71
|
-
mode:
|
75
|
+
mode: ModeType = "session"
|
72
76
|
|
73
77
|
|
74
78
|
# === Savers ===
|
@@ -76,6 +80,7 @@ class ParserConfig:
|
|
76
80
|
class SaverConfig:
|
77
81
|
raw_data_dir: str = "./raw_data"
|
78
82
|
output_dir: str = "./downloads"
|
83
|
+
storage_backend: StorageBackend = "json"
|
79
84
|
clean_text: bool = True
|
80
85
|
make_txt: bool = True
|
81
86
|
make_epub: bool = False
|
@@ -85,6 +90,7 @@ class SaverConfig:
|
|
85
90
|
filename_template: str = "{title}_{author}"
|
86
91
|
include_cover: bool = True
|
87
92
|
include_toc: bool = False
|
93
|
+
include_picture: bool = False
|
88
94
|
|
89
95
|
|
90
96
|
class RuleStep(TypedDict, total=False):
|
@@ -105,39 +111,39 @@ class RuleStep(TypedDict, total=False):
|
|
105
111
|
]
|
106
112
|
|
107
113
|
# —— BeautifulSoup 相关 —— #
|
108
|
-
selector:
|
109
|
-
name:
|
110
|
-
attrs:
|
111
|
-
limit:
|
112
|
-
attr:
|
114
|
+
selector: str | None # CSS 选择器, 用于 select/select_one/exclude
|
115
|
+
name: str | None # 标签名称, 用于 find/find_all
|
116
|
+
attrs: dict[str, Any] | None # 属性过滤, 用于 find/find_all
|
117
|
+
limit: int | None # find_all 的最大匹配数
|
118
|
+
attr: str | None # 从元素获取属性值 (select/select_one/select_all)
|
113
119
|
|
114
120
|
# —— 正则相关 —— #
|
115
|
-
pattern:
|
116
|
-
flags:
|
117
|
-
group:
|
118
|
-
template:
|
121
|
+
pattern: str | None # 正则表达式
|
122
|
+
flags: int | None # re.I, re.M 等
|
123
|
+
group: int | None # 匹配结果中的第几个分组 (默认 0)
|
124
|
+
template: str | None # 自定义组合, 比如 "$1$2字"
|
119
125
|
|
120
126
|
# —— 文本处理 —— #
|
121
|
-
chars:
|
122
|
-
old:
|
123
|
-
new:
|
124
|
-
count:
|
125
|
-
sep:
|
126
|
-
index:
|
127
|
+
chars: str | None # strip 要去除的字符集
|
128
|
+
old: str | None # replace 中要被替换的子串
|
129
|
+
new: str | None # replace 中新的子串
|
130
|
+
count: int | None # replace 中的最大替换次数
|
131
|
+
sep: str | None # split/join 的分隔符
|
132
|
+
index: int | None # split/select_all/select 之后取第几个元素
|
127
133
|
|
128
134
|
|
129
135
|
class FieldRules(TypedDict):
|
130
|
-
steps:
|
136
|
+
steps: list[RuleStep]
|
131
137
|
|
132
138
|
|
133
139
|
class ChapterFieldRules(TypedDict):
|
134
140
|
key: str
|
135
|
-
steps:
|
141
|
+
steps: list[RuleStep]
|
136
142
|
|
137
143
|
|
138
144
|
class VolumesRulesOptional(TypedDict, total=False):
|
139
145
|
volume_selector: str # 有卷时选择 volume 块的 selector
|
140
|
-
volume_name_steps:
|
146
|
+
volume_name_steps: list[RuleStep]
|
141
147
|
volume_mode: str # Optional: "normal" (default) or "mixed"
|
142
148
|
list_selector: str # Optional: If "mixed" mode, parent container selector
|
143
149
|
|
@@ -145,7 +151,7 @@ class VolumesRulesOptional(TypedDict, total=False):
|
|
145
151
|
class VolumesRules(VolumesRulesOptional):
|
146
152
|
has_volume: bool # 是否存在卷,false=未分卷
|
147
153
|
chapter_selector: str # 选择 chapter 节点的 selector
|
148
|
-
chapter_steps:
|
154
|
+
chapter_steps: list[ChapterFieldRules] # 提取章节信息的步骤列表
|
149
155
|
|
150
156
|
|
151
157
|
class BookInfoRules(TypedDict, total=False):
|
@@ -175,4 +181,4 @@ class SiteRules(TypedDict):
|
|
175
181
|
chapter: ChapterRules
|
176
182
|
|
177
183
|
|
178
|
-
SiteRulesDict =
|
184
|
+
SiteRulesDict = dict[str, SiteRules]
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
3
|
novel_downloader.config.site_rules
|
5
4
|
----------------------------------
|
@@ -14,7 +13,6 @@ This module provides functionality to:
|
|
14
13
|
import json
|
15
14
|
import logging
|
16
15
|
from pathlib import Path
|
17
|
-
from typing import Union
|
18
16
|
|
19
17
|
from novel_downloader.utils.cache import cached_load_config
|
20
18
|
from novel_downloader.utils.constants import SITE_RULES_FILE
|
@@ -26,7 +24,7 @@ logger = logging.getLogger(__name__)
|
|
26
24
|
|
27
25
|
|
28
26
|
def save_rules_as_json(
|
29
|
-
source_path:
|
27
|
+
source_path: str | Path, output_path: str | Path = SITE_RULES_FILE
|
30
28
|
) -> None:
|
31
29
|
"""
|
32
30
|
Load rules from source_path (toml, yaml, or json) and save as JSON.
|
@@ -78,7 +76,7 @@ def save_rules_as_json(
|
|
78
76
|
|
79
77
|
|
80
78
|
@cached_load_config
|
81
|
-
def load_site_rules(json_path:
|
79
|
+
def load_site_rules(json_path: str | Path = SITE_RULES_FILE) -> SiteRulesDict:
|
82
80
|
"""
|
83
81
|
Loads site scraping rules from a JSON file and caches the result for future access.
|
84
82
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
3
|
novel_downloader.core.downloaders
|
5
4
|
---------------------------------
|
@@ -11,11 +10,12 @@ Each downloader is responsible for orchestrating the full lifecycle
|
|
11
10
|
of retrieving, parsing, and saving novel content for a given source.
|
12
11
|
"""
|
13
12
|
|
14
|
-
from .
|
15
|
-
from .
|
16
|
-
from .
|
13
|
+
from .biquge import BiqugeDownloader
|
14
|
+
from .common import CommonAsyncDownloader, CommonDownloader
|
15
|
+
from .qidian import QidianDownloader
|
17
16
|
|
18
17
|
__all__ = [
|
18
|
+
"BiqugeDownloader",
|
19
19
|
"CommonAsyncDownloader",
|
20
20
|
"CommonDownloader",
|
21
21
|
"QidianDownloader",
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.downloaders.base
|
4
|
+
--------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from .base_async import BaseAsyncDownloader
|
9
|
+
from .base_sync import BaseDownloader
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"BaseAsyncDownloader",
|
13
|
+
"BaseDownloader",
|
14
|
+
]
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.downloaders.
|
5
|
-
|
3
|
+
novel_downloader.core.downloaders.base.base_async
|
4
|
+
-------------------------------------------------
|
6
5
|
|
7
6
|
Defines the abstract base class `BaseAsyncDownloader`, which provides a
|
8
7
|
common interface and reusable logic for all downloader implementations.
|
@@ -11,19 +10,14 @@ common interface and reusable logic for all downloader implementations.
|
|
11
10
|
import abc
|
12
11
|
import logging
|
13
12
|
from pathlib import Path
|
14
|
-
from typing import List
|
15
13
|
|
16
14
|
from novel_downloader.config import DownloaderConfig
|
17
15
|
from novel_downloader.core.interfaces import (
|
16
|
+
AsyncDownloaderProtocol,
|
18
17
|
AsyncRequesterProtocol,
|
19
18
|
ParserProtocol,
|
20
19
|
SaverProtocol,
|
21
20
|
)
|
22
|
-
from novel_downloader.core.interfaces.async_downloader_protocol import (
|
23
|
-
AsyncDownloaderProtocol,
|
24
|
-
)
|
25
|
-
|
26
|
-
logger = logging.getLogger(__name__)
|
27
21
|
|
28
22
|
|
29
23
|
class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
|
@@ -53,6 +47,51 @@ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
|
|
53
47
|
self._raw_data_dir.mkdir(parents=True, exist_ok=True)
|
54
48
|
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
55
49
|
|
50
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
51
|
+
|
52
|
+
async def download(self, book_ids: list[str]) -> None:
|
53
|
+
"""
|
54
|
+
The general batch download process:
|
55
|
+
1. Iterate over all book IDs
|
56
|
+
2. For each ID, call `download_one()`
|
57
|
+
|
58
|
+
:param book_ids: A list of book identifiers to download.
|
59
|
+
"""
|
60
|
+
await self.prepare()
|
61
|
+
|
62
|
+
# 2) batch download
|
63
|
+
for idx, book_id in enumerate(book_ids, start=1):
|
64
|
+
self.logger.debug(
|
65
|
+
"[%s] Starting download for %r (%s/%s)",
|
66
|
+
self.__class__.__name__,
|
67
|
+
book_id,
|
68
|
+
idx,
|
69
|
+
len(book_ids),
|
70
|
+
)
|
71
|
+
try:
|
72
|
+
await self.download_one(book_id)
|
73
|
+
except Exception as e:
|
74
|
+
self._handle_download_exception(book_id, e)
|
75
|
+
|
76
|
+
@abc.abstractmethod
|
77
|
+
async def download_one(self, book_id: str) -> None:
|
78
|
+
"""
|
79
|
+
The full download logic for a single book.
|
80
|
+
|
81
|
+
Subclasses must implement this method.
|
82
|
+
|
83
|
+
:param book_id: The identifier of the book to download.
|
84
|
+
"""
|
85
|
+
...
|
86
|
+
|
87
|
+
async def prepare(self) -> None:
|
88
|
+
"""
|
89
|
+
Optional hook called before downloading each book.
|
90
|
+
|
91
|
+
Subclasses can override this method to perform pre-download setup.
|
92
|
+
"""
|
93
|
+
return
|
94
|
+
|
56
95
|
@property
|
57
96
|
def requester(self) -> AsyncRequesterProtocol:
|
58
97
|
return self._requester
|
@@ -97,49 +136,6 @@ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
|
|
97
136
|
def request_interval(self) -> float:
|
98
137
|
return self._config.request_interval
|
99
138
|
|
100
|
-
async def prepare(self) -> None:
|
101
|
-
"""
|
102
|
-
Optional hook called before downloading each book.
|
103
|
-
|
104
|
-
Subclasses can override this method to perform pre-download setup.
|
105
|
-
"""
|
106
|
-
return
|
107
|
-
|
108
|
-
async def download(self, book_ids: List[str]) -> None:
|
109
|
-
"""
|
110
|
-
The general batch download process:
|
111
|
-
1. Iterate over all book IDs
|
112
|
-
2. For each ID, call `download_one()`
|
113
|
-
|
114
|
-
:param book_ids: A list of book identifiers to download.
|
115
|
-
"""
|
116
|
-
await self.prepare()
|
117
|
-
|
118
|
-
# 2) batch download
|
119
|
-
for idx, book_id in enumerate(book_ids, start=1):
|
120
|
-
logger.debug(
|
121
|
-
"[%s] Starting download for %r (%s/%s)",
|
122
|
-
self.__class__.__name__,
|
123
|
-
book_id,
|
124
|
-
idx,
|
125
|
-
len(book_ids),
|
126
|
-
)
|
127
|
-
try:
|
128
|
-
await self.download_one(book_id)
|
129
|
-
except Exception as e:
|
130
|
-
self._handle_download_exception(book_id, e)
|
131
|
-
|
132
|
-
@abc.abstractmethod
|
133
|
-
async def download_one(self, book_id: str) -> None:
|
134
|
-
"""
|
135
|
-
The full download logic for a single book.
|
136
|
-
|
137
|
-
Subclasses must implement this method.
|
138
|
-
|
139
|
-
:param book_id: The identifier of the book to download.
|
140
|
-
"""
|
141
|
-
...
|
142
|
-
|
143
139
|
def _handle_download_exception(self, book_id: str, error: Exception) -> None:
|
144
140
|
"""
|
145
141
|
Handle download errors in a consistent way.
|
@@ -149,7 +145,7 @@ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
|
|
149
145
|
:param book_id: The ID of the book that failed.
|
150
146
|
:param error: The exception raised during download.
|
151
147
|
"""
|
152
|
-
logger.warning(
|
148
|
+
self.logger.warning(
|
153
149
|
"[%s] Failed to download %r: %s",
|
154
150
|
self.__class__.__name__,
|
155
151
|
book_id,
|