novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,8 @@ Defines ConfigAdapter, which maps a raw configuration dictionary and
|
|
7
7
|
site name into structured dataclass-based config models.
|
8
8
|
"""
|
9
9
|
|
10
|
-
|
10
|
+
import json
|
11
|
+
from typing import Any, TypeVar, cast
|
11
12
|
|
12
13
|
from novel_downloader.models import (
|
13
14
|
BookConfig,
|
@@ -15,150 +16,104 @@ from novel_downloader.models import (
|
|
15
16
|
ExporterConfig,
|
16
17
|
FetcherConfig,
|
17
18
|
ParserConfig,
|
19
|
+
TextCleanerConfig,
|
18
20
|
)
|
19
|
-
from novel_downloader.utils.constants import SUPPORTED_SITES
|
20
21
|
|
21
|
-
|
22
|
+
T = TypeVar("T")
|
22
23
|
|
23
24
|
|
24
25
|
class ConfigAdapter:
|
25
26
|
"""
|
26
|
-
Adapter to map a raw
|
27
|
+
Adapter to map a raw configuration dictionary and site name
|
28
|
+
into structured dataclass configuration models.
|
27
29
|
"""
|
28
30
|
|
29
31
|
def __init__(self, config: dict[str, Any], site: str):
|
30
32
|
"""
|
31
|
-
|
32
|
-
|
33
|
+
Initialize the adapter.
|
34
|
+
|
35
|
+
:param config: The fully loaded configuration dictionary.
|
36
|
+
:param site: The current site name (e.g. "qidian").
|
33
37
|
"""
|
34
38
|
self._config = config
|
35
39
|
self._site = site
|
36
|
-
|
37
|
-
|
38
|
-
self._supported_sites = set(site_rules.keys()) | SUPPORTED_SITES
|
39
|
-
|
40
|
-
@property
|
41
|
-
def site(self) -> str:
|
42
|
-
return self._site
|
43
|
-
|
44
|
-
@site.setter
|
45
|
-
def site(self, value: str) -> None:
|
46
|
-
self._site = value
|
47
|
-
|
48
|
-
def _get_site_cfg(self, site: str | None = None) -> dict[str, Any]:
|
49
|
-
"""
|
50
|
-
获取指定站点的配置 (默认为当前适配站点)
|
51
|
-
|
52
|
-
1. 如果有 site-specific 配置, 优先返回它
|
53
|
-
2. 否则, 如果该站点在支持站点中, 尝试返回 'common' 配置
|
54
|
-
3. 否则返回空 dict
|
55
|
-
"""
|
56
|
-
site = site or self._site
|
57
|
-
sites_cfg = self._config.get("sites", {}) or {}
|
58
|
-
|
59
|
-
if site in sites_cfg:
|
60
|
-
return sites_cfg[site] or {}
|
61
|
-
|
62
|
-
if site in self._supported_sites:
|
63
|
-
return sites_cfg.get("common", {}) or {}
|
64
|
-
|
65
|
-
return {}
|
40
|
+
self._site_cfg: dict[str, Any] = self._get_site_cfg()
|
41
|
+
self._gen_cfg: dict[str, Any] = config.get("general") or {}
|
66
42
|
|
67
43
|
def get_fetcher_config(self) -> FetcherConfig:
|
68
44
|
"""
|
69
|
-
|
70
|
-
|
45
|
+
Build a FetcherConfig from the raw configuration.
|
46
|
+
|
47
|
+
:return: A FetcherConfig instance with all fields populated.
|
71
48
|
"""
|
72
|
-
gen = self._config.get("general", {})
|
73
|
-
req = self._config.get("requests", {})
|
74
|
-
site_cfg = self._get_site_cfg()
|
75
49
|
return FetcherConfig(
|
76
|
-
request_interval=
|
77
|
-
retry_times=
|
78
|
-
backoff_factor=
|
79
|
-
timeout=
|
80
|
-
max_connections=
|
81
|
-
max_rps=
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
user_agent=req.get("user_agent", None),
|
87
|
-
headers=req.get("headers", None),
|
88
|
-
browser_type=req.get("browser_type", "chromium"),
|
89
|
-
verify_ssl=req.get("verify_ssl", True),
|
50
|
+
request_interval=self._get_gen_cfg("request_interval", 2.0),
|
51
|
+
retry_times=self._get_gen_cfg("retry_times", 3),
|
52
|
+
backoff_factor=self._get_gen_cfg("backoff_factor", 2.0),
|
53
|
+
timeout=self._get_gen_cfg("timeout", 30.0),
|
54
|
+
max_connections=self._get_gen_cfg("max_connections", 10),
|
55
|
+
max_rps=self._get_gen_cfg("max_rps", 1000.0),
|
56
|
+
user_agent=self._get_gen_cfg("user_agent", None),
|
57
|
+
headers=self._get_gen_cfg("headers", None),
|
58
|
+
verify_ssl=self._get_gen_cfg("verify_ssl", True),
|
59
|
+
locale_style=self._get_gen_cfg("locale_style", "simplified"),
|
90
60
|
)
|
91
61
|
|
92
62
|
def get_downloader_config(self) -> DownloaderConfig:
|
93
63
|
"""
|
94
|
-
|
95
|
-
|
64
|
+
Build a DownloaderConfig using both general and site-specific settings.
|
65
|
+
|
66
|
+
:return: A DownloaderConfig instance with all fields populated.
|
96
67
|
"""
|
97
68
|
gen = self._config.get("general", {})
|
98
|
-
req = self._config.get("requests", {})
|
99
69
|
debug = gen.get("debug", {})
|
100
|
-
site_cfg = self._get_site_cfg()
|
101
70
|
return DownloaderConfig(
|
102
|
-
request_interval=
|
103
|
-
retry_times=
|
104
|
-
backoff_factor=
|
71
|
+
request_interval=self._get_gen_cfg("request_interval", 2.0),
|
72
|
+
retry_times=self._get_gen_cfg("retry_times", 3),
|
73
|
+
backoff_factor=self._get_gen_cfg("backoff_factor", 2.0),
|
74
|
+
workers=self._get_gen_cfg("workers", 2),
|
75
|
+
skip_existing=self._get_gen_cfg("skip_existing", True),
|
76
|
+
login_required=self._site_cfg.get("login_required", False),
|
77
|
+
save_html=debug.get("save_html", False),
|
105
78
|
raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
|
106
79
|
cache_dir=gen.get("cache_dir", "./novel_cache"),
|
107
|
-
download_workers=gen.get("download_workers", 2),
|
108
|
-
parser_workers=gen.get("parser_workers", 2),
|
109
|
-
skip_existing=gen.get("skip_existing", True),
|
110
|
-
login_required=site_cfg.get("login_required", False),
|
111
|
-
save_html=debug.get("save_html", False),
|
112
|
-
mode=site_cfg.get("mode", "session"),
|
113
|
-
storage_backend=gen.get("storage_backend", "json"),
|
114
80
|
storage_batch_size=gen.get("storage_batch_size", 1),
|
115
|
-
username=site_cfg.get("username", ""),
|
116
|
-
password=site_cfg.get("password", ""),
|
117
|
-
cookies=site_cfg.get("cookies", ""),
|
118
81
|
)
|
119
82
|
|
120
83
|
def get_parser_config(self) -> ParserConfig:
|
121
84
|
"""
|
122
|
-
|
123
|
-
|
85
|
+
Build a ParserConfig from general, OCR, and site-specific settings.
|
86
|
+
|
87
|
+
:return: A ParserConfig instance with all fields populated.
|
124
88
|
"""
|
125
89
|
gen = self._config.get("general", {})
|
126
90
|
font_ocr = gen.get("font_ocr", {})
|
127
|
-
site_cfg = self._get_site_cfg()
|
128
91
|
return ParserConfig(
|
129
92
|
cache_dir=gen.get("cache_dir", "./novel_cache"),
|
130
|
-
use_truncation=
|
93
|
+
use_truncation=self._site_cfg.get("use_truncation", True),
|
131
94
|
decode_font=font_ocr.get("decode_font", False),
|
132
|
-
use_freq=font_ocr.get("use_freq", False),
|
133
|
-
use_ocr=font_ocr.get("use_ocr", True),
|
134
|
-
use_vec=font_ocr.get("use_vec", False),
|
135
|
-
ocr_version=font_ocr.get("ocr_version", "v1.0"),
|
136
95
|
save_font_debug=font_ocr.get("save_font_debug", False),
|
137
96
|
batch_size=font_ocr.get("batch_size", 32),
|
138
|
-
gpu_mem=font_ocr.get("gpu_mem", 500),
|
139
|
-
gpu_id=font_ocr.get("gpu_id", None),
|
140
|
-
ocr_weight=font_ocr.get("ocr_weight", 0.6),
|
141
|
-
vec_weight=font_ocr.get("vec_weight", 0.4),
|
142
|
-
mode=site_cfg.get("mode", "session"),
|
143
97
|
)
|
144
98
|
|
145
99
|
def get_exporter_config(self) -> ExporterConfig:
|
146
100
|
"""
|
147
|
-
|
148
|
-
|
101
|
+
Build an ExporterConfig from output and general settings.
|
102
|
+
|
103
|
+
:return: An ExporterConfig instance with all fields populated.
|
149
104
|
"""
|
150
105
|
gen = self._config.get("general", {})
|
151
106
|
out = self._config.get("output", {})
|
107
|
+
cln = self._config.get("cleaner", {})
|
152
108
|
fmt = out.get("formats", {})
|
153
109
|
naming = out.get("naming", {})
|
154
110
|
epub_opts = out.get("epub", {})
|
155
|
-
|
111
|
+
cleaner_cfg = self._dict_to_cleaner_cfg(cln)
|
156
112
|
return ExporterConfig(
|
157
113
|
cache_dir=gen.get("cache_dir", "./novel_cache"),
|
158
114
|
raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
|
159
115
|
output_dir=gen.get("output_dir", "./downloads"),
|
160
|
-
|
161
|
-
clean_text=out.get("clean_text", True),
|
116
|
+
clean_text=cln.get("clean_text", True),
|
162
117
|
make_txt=fmt.get("make_txt", True),
|
163
118
|
make_epub=fmt.get("make_epub", False),
|
164
119
|
make_md=fmt.get("make_md", False),
|
@@ -166,14 +121,37 @@ class ConfigAdapter:
|
|
166
121
|
append_timestamp=naming.get("append_timestamp", True),
|
167
122
|
filename_template=naming.get("filename_template", "{title}_{author}"),
|
168
123
|
include_cover=epub_opts.get("include_cover", True),
|
169
|
-
|
170
|
-
|
171
|
-
|
124
|
+
include_picture=epub_opts.get("include_picture", True),
|
125
|
+
split_mode=self._site_cfg.get("split_mode", "book"),
|
126
|
+
cleaner_cfg=cleaner_cfg,
|
172
127
|
)
|
173
128
|
|
129
|
+
def get_login_config(self) -> dict[str, str]:
|
130
|
+
"""
|
131
|
+
Return the subset of login fields present in current site config:
|
132
|
+
* `username`
|
133
|
+
* `password`
|
134
|
+
* `cookies`
|
135
|
+
"""
|
136
|
+
out: dict[str, str] = {}
|
137
|
+
for key in ("username", "password", "cookies"):
|
138
|
+
val = self._site_cfg.get(key, "")
|
139
|
+
val = val.strip()
|
140
|
+
if val:
|
141
|
+
out[key] = val
|
142
|
+
return out
|
143
|
+
|
174
144
|
def get_book_ids(self) -> list[BookConfig]:
|
175
145
|
"""
|
176
|
-
|
146
|
+
Extract the list of target books from the site configuration.
|
147
|
+
|
148
|
+
The site config may specify book_ids as:
|
149
|
+
* a single string or integer
|
150
|
+
* a dict with book_id and optional start_id, end_id, ignore_ids
|
151
|
+
* a list of the above types
|
152
|
+
|
153
|
+
:return: A list of BookConfig dicts.
|
154
|
+
:raises ValueError: if the raw book_ids is neither a str/int, dict, nor list.
|
177
155
|
"""
|
178
156
|
site_cfg = self._get_site_cfg()
|
179
157
|
raw = site_cfg.get("book_ids", [])
|
@@ -182,7 +160,7 @@ class ConfigAdapter:
|
|
182
160
|
return [{"book_id": str(raw)}]
|
183
161
|
|
184
162
|
if isinstance(raw, dict):
|
185
|
-
return [self.
|
163
|
+
return [self._dict_to_book_cfg(raw)]
|
186
164
|
|
187
165
|
if not isinstance(raw, list):
|
188
166
|
raise ValueError(
|
@@ -195,17 +173,68 @@ class ConfigAdapter:
|
|
195
173
|
if isinstance(item, str | int):
|
196
174
|
result.append({"book_id": str(item)})
|
197
175
|
elif isinstance(item, dict):
|
198
|
-
result.append(self.
|
176
|
+
result.append(self._dict_to_book_cfg(item))
|
199
177
|
except ValueError:
|
200
178
|
continue
|
201
179
|
|
202
180
|
return result
|
203
181
|
|
182
|
+
def get_log_level(self) -> str:
|
183
|
+
"""
|
184
|
+
Retrieve the logging level from [general.debug].
|
185
|
+
|
186
|
+
:return: The configured log level ("DEBUG", "INFO", "WARNING", "ERROR").
|
187
|
+
"""
|
188
|
+
debug_cfg = self._config.get("general", {}).get("debug", {})
|
189
|
+
return debug_cfg.get("log_level") or "INFO"
|
190
|
+
|
191
|
+
@property
|
192
|
+
def site(self) -> str:
|
193
|
+
"""
|
194
|
+
Get the current site name.
|
195
|
+
"""
|
196
|
+
return self._site
|
197
|
+
|
198
|
+
@site.setter
|
199
|
+
def site(self, value: str) -> None:
|
200
|
+
"""
|
201
|
+
Set a new site name for configuration lookups.
|
202
|
+
|
203
|
+
:param value: The new site key in config["sites"] to use.
|
204
|
+
"""
|
205
|
+
self._site = value
|
206
|
+
self._site_cfg = self._get_site_cfg()
|
207
|
+
|
208
|
+
def _get_gen_cfg(self, key: str, default: T) -> T:
|
209
|
+
return self._site_cfg.get(key) or self._gen_cfg.get(key) or default
|
210
|
+
|
211
|
+
def _get_site_cfg(self) -> dict[str, Any]:
|
212
|
+
"""
|
213
|
+
Retrieve the configuration for a specific site.
|
214
|
+
|
215
|
+
Lookup order:
|
216
|
+
1. If there is a site-specific entry under config["sites"], return that.
|
217
|
+
2. Otherwise, if a "common" entry exists under config["sites"], return that.
|
218
|
+
3. If neither is present, return an empty dict.
|
219
|
+
|
220
|
+
:param site: Optional override of the site name; defaults to self._site.
|
221
|
+
:return: The site-specific or common configuration dict.
|
222
|
+
"""
|
223
|
+
sites_cfg = self._config.get("sites") or {}
|
224
|
+
|
225
|
+
if self._site in sites_cfg:
|
226
|
+
return sites_cfg[self._site] or {}
|
227
|
+
|
228
|
+
return sites_cfg.get("common") or {}
|
229
|
+
|
204
230
|
@staticmethod
|
205
|
-
def
|
231
|
+
def _dict_to_book_cfg(data: dict[str, Any]) -> BookConfig:
|
206
232
|
"""
|
207
|
-
|
208
|
-
|
233
|
+
Convert a dictionary to a BookConfig with normalized types.
|
234
|
+
|
235
|
+
:param data: A dict that must contain at least "book_id".
|
236
|
+
:return: A BookConfig dict with all values cast to strings or lists of strings.
|
237
|
+
:raises ValueError: if the "book_id" field is missing.
|
209
238
|
"""
|
210
239
|
if "book_id" not in data:
|
211
240
|
raise ValueError("Missing required field 'book_id'")
|
@@ -222,3 +251,70 @@ class ConfigAdapter:
|
|
222
251
|
result["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
|
223
252
|
|
224
253
|
return result
|
254
|
+
|
255
|
+
@classmethod
|
256
|
+
def _dict_to_cleaner_cfg(cls, cfg: dict[str, Any]) -> TextCleanerConfig:
|
257
|
+
"""
|
258
|
+
Convert a nested dict of title/content rules into a TextCleanerConfig.
|
259
|
+
|
260
|
+
:param cfg: configuration dictionary
|
261
|
+
:return: fully constructed TextCleanerConfig
|
262
|
+
"""
|
263
|
+
# Title rules
|
264
|
+
title_section = cfg.get("title", {})
|
265
|
+
title_remove = title_section.get("remove_patterns", [])
|
266
|
+
title_repl = title_section.get("replace", {})
|
267
|
+
|
268
|
+
title_ext = title_section.get("external", {})
|
269
|
+
if title_ext.get("enabled", False):
|
270
|
+
title_ext_rm_p = title_ext.get("remove_patterns", "")
|
271
|
+
title_ext_rp_p = title_ext.get("replace", "")
|
272
|
+
|
273
|
+
title_remove_ext = cls._load_str_list(title_ext_rm_p)
|
274
|
+
title_remove += title_remove_ext
|
275
|
+
|
276
|
+
title_repl_ext = cls._load_str_dict(title_ext_rp_p)
|
277
|
+
title_repl = {**title_repl, **title_repl_ext}
|
278
|
+
|
279
|
+
# Content rules
|
280
|
+
content_section = cfg.get("content", {})
|
281
|
+
content_remove = content_section.get("remove_patterns", [])
|
282
|
+
content_repl = content_section.get("replace", {})
|
283
|
+
|
284
|
+
content_ext = content_section.get("external", {})
|
285
|
+
|
286
|
+
if content_ext.get("enabled", False):
|
287
|
+
content_ext_rm_p = content_ext.get("remove_patterns", "")
|
288
|
+
content_ext_rp_p = content_ext.get("replace", "")
|
289
|
+
|
290
|
+
content_remove_ext = cls._load_str_list(content_ext_rm_p)
|
291
|
+
content_remove += content_remove_ext
|
292
|
+
|
293
|
+
content_repl_ext = cls._load_str_dict(content_ext_rp_p)
|
294
|
+
content_repl = {**content_repl, **content_repl_ext}
|
295
|
+
|
296
|
+
return TextCleanerConfig(
|
297
|
+
remove_invisible=cfg.get("remove_invisible", True),
|
298
|
+
title_remove_patterns=title_remove,
|
299
|
+
title_replacements=title_repl,
|
300
|
+
content_remove_patterns=content_remove,
|
301
|
+
content_replacements=content_repl,
|
302
|
+
)
|
303
|
+
|
304
|
+
@staticmethod
|
305
|
+
def _load_str_list(path: str) -> list[str]:
|
306
|
+
try:
|
307
|
+
with open(path, encoding="utf-8") as f:
|
308
|
+
parsed = json.load(f)
|
309
|
+
return cast(list[str], parsed)
|
310
|
+
except Exception:
|
311
|
+
return []
|
312
|
+
|
313
|
+
@staticmethod
|
314
|
+
def _load_str_dict(path: str) -> dict[str, str]:
|
315
|
+
try:
|
316
|
+
with open(path, encoding="utf-8") as f:
|
317
|
+
parsed = json.load(f)
|
318
|
+
return cast(dict[str, str], parsed)
|
319
|
+
except Exception:
|
320
|
+
return {}
|
@@ -1,24 +1,23 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.config.
|
4
|
-
|
3
|
+
novel_downloader.config.file_io
|
4
|
+
-------------------------------
|
5
5
|
|
6
|
-
Provides functionality to load Toml configuration files into Python
|
7
|
-
dictionaries, with robust error handling and fallback support.
|
6
|
+
Provides functionality to load Toml configuration files into Python dict
|
8
7
|
"""
|
9
8
|
|
10
9
|
import json
|
11
10
|
import logging
|
12
11
|
from pathlib import Path
|
13
|
-
from typing import Any
|
12
|
+
from typing import Any, TypeVar
|
14
13
|
|
15
|
-
from novel_downloader.utils.cache import cached_load_config
|
16
14
|
from novel_downloader.utils.constants import SETTING_FILE
|
17
15
|
|
16
|
+
T = TypeVar("T")
|
18
17
|
logger = logging.getLogger(__name__)
|
19
18
|
|
20
19
|
|
21
|
-
def
|
20
|
+
def _resolve_file_path(
|
22
21
|
user_path: str | Path | None,
|
23
22
|
local_filename: str | list[str],
|
24
23
|
fallback_path: Path,
|
@@ -27,9 +26,9 @@ def resolve_file_path(
|
|
27
26
|
Resolve the file path to use based on a prioritized lookup order.
|
28
27
|
|
29
28
|
Priority:
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
1. A user-specified path (if provided and exists)
|
30
|
+
2. A file in the current working directory with the given name
|
31
|
+
3. A globally registered fallback path
|
33
32
|
|
34
33
|
:param user_path: Optional user-specified file path.
|
35
34
|
:param local_filename: File name to check in the current working directory.
|
@@ -115,7 +114,6 @@ def _load_by_extension(path: Path) -> dict[str, Any]:
|
|
115
114
|
raise ValueError(f"Unsupported config file extension: {ext}")
|
116
115
|
|
117
116
|
|
118
|
-
@cached_load_config
|
119
117
|
def load_config(
|
120
118
|
config_path: str | Path | None = None,
|
121
119
|
) -> dict[str, Any]:
|
@@ -123,9 +121,9 @@ def load_config(
|
|
123
121
|
Load configuration data from a Toml file.
|
124
122
|
|
125
123
|
:param config_path: Optional path to the Toml configuration file.
|
126
|
-
:return:
|
124
|
+
:return: Parsed configuration as a dict.
|
127
125
|
"""
|
128
|
-
path =
|
126
|
+
path = _resolve_file_path(
|
129
127
|
user_path=config_path,
|
130
128
|
local_filename=[
|
131
129
|
"settings.toml",
|
@@ -146,6 +144,46 @@ def load_config(
|
|
146
144
|
return {}
|
147
145
|
|
148
146
|
|
147
|
+
def get_config_value(keys: list[str], default: T) -> T:
|
148
|
+
"""
|
149
|
+
Safely retrieve a nested config value.
|
150
|
+
"""
|
151
|
+
cur = load_config()
|
152
|
+
for i, k in enumerate(keys):
|
153
|
+
if not isinstance(cur, dict):
|
154
|
+
return default
|
155
|
+
if i == len(keys) - 1:
|
156
|
+
val = cur.get(k, default)
|
157
|
+
return val if isinstance(val, type(default)) else default
|
158
|
+
cur = cur.get(k, {})
|
159
|
+
return default
|
160
|
+
|
161
|
+
|
162
|
+
def save_config(
|
163
|
+
config: dict[str, Any],
|
164
|
+
output_path: str | Path = SETTING_FILE,
|
165
|
+
) -> None:
|
166
|
+
"""
|
167
|
+
Save configuration data to disk in JSON format.
|
168
|
+
|
169
|
+
:param config: Dictionary containing configuration data to save.
|
170
|
+
:param output_path: Destination path to save the config (default: SETTING_FILE).
|
171
|
+
:raises Exception: If writing to the file fails.
|
172
|
+
"""
|
173
|
+
output = Path(output_path).expanduser().resolve()
|
174
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
175
|
+
|
176
|
+
try:
|
177
|
+
with output.open("w", encoding="utf-8") as f:
|
178
|
+
json.dump(config, f, indent=2, ensure_ascii=False)
|
179
|
+
except Exception as e:
|
180
|
+
logger.error("[config] Failed to write config JSON '%s': %s", output, e)
|
181
|
+
raise
|
182
|
+
|
183
|
+
logger.info("[config] Configuration successfully saved to JSON: %s", output)
|
184
|
+
return
|
185
|
+
|
186
|
+
|
149
187
|
def save_config_file(
|
150
188
|
source_path: str | Path,
|
151
189
|
output_path: str | Path = SETTING_FILE,
|
@@ -156,9 +194,9 @@ def save_config_file(
|
|
156
194
|
|
157
195
|
:param source_path: The user-provided TOML file path.
|
158
196
|
:param output_path: Destination path to save the config (default: SETTING_FILE).
|
197
|
+
:raises Exception: If writing to the file fails.
|
159
198
|
"""
|
160
199
|
source = Path(source_path).expanduser().resolve()
|
161
|
-
output = Path(output_path).expanduser().resolve()
|
162
200
|
|
163
201
|
if not source.is_file():
|
164
202
|
raise FileNotFoundError(f"Source file not found: {source}")
|
@@ -169,17 +207,5 @@ def save_config_file(
|
|
169
207
|
logger.error("[config] Failed to load config file: %s", e)
|
170
208
|
raise ValueError(f"Invalid config file: {source}") from e
|
171
209
|
|
172
|
-
|
173
|
-
|
174
|
-
try:
|
175
|
-
with output.open("w", encoding="utf-8") as f:
|
176
|
-
json.dump(data, f, indent=2, ensure_ascii=False)
|
177
|
-
except Exception as e:
|
178
|
-
logger.error("[config] Failed to write config JSON '%s': %s", output, e)
|
179
|
-
raise
|
180
|
-
|
181
|
-
logger.info("[config] Configuration successfully saved to JSON: %s", output)
|
210
|
+
save_config(data, output_path)
|
182
211
|
return
|
183
|
-
|
184
|
-
|
185
|
-
__all__ = ["load_config"]
|
@@ -12,28 +12,29 @@ downloading and processing online novel content, including:
|
|
12
12
|
- Parser: Extracts structured data from HTML or SSR content.
|
13
13
|
- Fetcher: Sends HTTP requests and manages sessions, including login if required.
|
14
14
|
- Exporter: Responsible for exporting downloaded data into various output formats.
|
15
|
+
- search: Provides unified search functionality across supported novel sites.
|
15
16
|
"""
|
16
17
|
|
17
|
-
from .factory import (
|
18
|
-
get_downloader,
|
19
|
-
get_exporter,
|
20
|
-
get_fetcher,
|
21
|
-
get_parser,
|
22
|
-
)
|
23
|
-
from .interfaces import (
|
24
|
-
DownloaderProtocol,
|
25
|
-
ExporterProtocol,
|
26
|
-
FetcherProtocol,
|
27
|
-
ParserProtocol,
|
28
|
-
)
|
29
|
-
|
30
18
|
__all__ = [
|
31
19
|
"get_downloader",
|
32
20
|
"get_exporter",
|
33
21
|
"get_fetcher",
|
34
22
|
"get_parser",
|
23
|
+
"search",
|
35
24
|
"DownloaderProtocol",
|
36
25
|
"ExporterProtocol",
|
37
26
|
"FetcherProtocol",
|
38
27
|
"ParserProtocol",
|
39
28
|
]
|
29
|
+
|
30
|
+
from .downloaders import get_downloader
|
31
|
+
from .exporters import get_exporter
|
32
|
+
from .fetchers import get_fetcher
|
33
|
+
from .interfaces import (
|
34
|
+
DownloaderProtocol,
|
35
|
+
ExporterProtocol,
|
36
|
+
FetcherProtocol,
|
37
|
+
ParserProtocol,
|
38
|
+
)
|
39
|
+
from .parsers import get_parser
|
40
|
+
from .searchers import search
|