novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -6,295 +6,154 @@ novel_downloader.utils.network
|
|
6
6
|
Utilities for handling HTTP requests and downloading remote resources.
|
7
7
|
"""
|
8
8
|
|
9
|
+
__all__ = ["download"]
|
10
|
+
|
9
11
|
import logging
|
10
|
-
import random
|
11
|
-
import time
|
12
12
|
from pathlib import Path
|
13
13
|
from typing import Literal
|
14
14
|
from urllib.parse import unquote, urlparse
|
15
15
|
|
16
16
|
import requests
|
17
|
+
from requests.adapters import HTTPAdapter
|
18
|
+
from urllib3.util.retry import Retry
|
17
19
|
|
18
|
-
from .constants import DEFAULT_HEADERS
|
19
|
-
from .file_utils
|
20
|
+
from .constants import DEFAULT_HEADERS
|
21
|
+
from .file_utils import sanitize_filename
|
22
|
+
from .file_utils.io import _get_non_conflicting_path, write_file
|
20
23
|
|
21
24
|
logger = logging.getLogger(__name__)
|
22
|
-
|
23
25
|
_DEFAULT_CHUNK_SIZE = 8192 # 8KB per chunk for streaming downloads
|
24
26
|
|
25
27
|
|
26
|
-
def
|
27
|
-
url: str,
|
28
|
-
*,
|
29
|
-
retries: int = 3,
|
30
|
-
timeout: int = 10,
|
31
|
-
backoff: float = 0.5,
|
32
|
-
headers: dict[str, str] | None = None,
|
33
|
-
stream: bool = False,
|
34
|
-
) -> requests.Response | None:
|
28
|
+
def _normalize_url(url: str) -> str:
|
35
29
|
"""
|
36
|
-
|
37
|
-
|
38
|
-
:param url: URL to request.
|
39
|
-
:param retries: Number of retry attempts.
|
40
|
-
:param timeout: Timeout in seconds per request.
|
41
|
-
:param backoff: Base backoff delay between retries.
|
42
|
-
:param headers: Optional HTTP headers.
|
43
|
-
:param stream: Whether to stream the response.
|
44
|
-
:return: Response object if successful, else None.
|
30
|
+
Ensure URL has scheme, defaulting to https:// if missing.
|
45
31
|
"""
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
response.raise_for_status()
|
52
|
-
return response
|
53
|
-
except requests.RequestException as e:
|
54
|
-
logger.warning("[http] Attempt %s/%s failed: %s", attempt, retries, e)
|
55
|
-
if attempt < retries:
|
56
|
-
sleep_time = backoff * (2 ** (attempt - 1)) + random.uniform(0, 0.1)
|
57
|
-
time.sleep(sleep_time)
|
58
|
-
except Exception as e:
|
59
|
-
logger.error("[http] Unexpected error: %s", e)
|
60
|
-
break
|
61
|
-
|
62
|
-
logger.error("[http] Failed after %s attempts: %s", retries, url)
|
63
|
-
return None
|
64
|
-
|
32
|
+
if url.startswith("//"):
|
33
|
+
return "https:" + url
|
34
|
+
if not url.startswith(("http://", "https://")):
|
35
|
+
return "https://" + url
|
36
|
+
return url
|
65
37
|
|
66
|
-
def image_url_to_filename(url: str) -> str:
|
67
|
-
"""
|
68
|
-
Parse and sanitize a image filename from a URL.
|
69
|
-
If no filename or suffix exists, fallback to default name and extension.
|
70
38
|
|
71
|
-
|
72
|
-
:
|
73
|
-
|
39
|
+
def _build_filepath(
|
40
|
+
folder: Path,
|
41
|
+
url: str,
|
42
|
+
filename: str | None,
|
43
|
+
default_suffix: str,
|
44
|
+
on_exist: Literal["overwrite", "skip", "rename"],
|
45
|
+
) -> Path:
|
74
46
|
parsed_url = urlparse(url)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
47
|
+
url_path = Path(unquote(parsed_url.path))
|
48
|
+
|
49
|
+
raw_name = filename or url_path.name or "unnamed"
|
50
|
+
name = sanitize_filename(raw_name)
|
51
|
+
suffix = default_suffix or url_path.suffix
|
52
|
+
if suffix and not suffix.startswith("."):
|
53
|
+
suffix = "." + suffix
|
54
|
+
|
55
|
+
file_path = folder / name
|
56
|
+
if not file_path.suffix and suffix:
|
57
|
+
file_path = file_path.with_suffix(suffix)
|
58
|
+
|
59
|
+
if on_exist == "rename":
|
60
|
+
file_path = _get_non_conflicting_path(file_path)
|
61
|
+
return file_path
|
62
|
+
|
63
|
+
|
64
|
+
def _make_session(
|
65
|
+
retries: int,
|
66
|
+
backoff: float,
|
67
|
+
headers: dict[str, str] | None,
|
68
|
+
) -> requests.Session:
|
69
|
+
session = requests.Session()
|
70
|
+
session.headers.update(headers or DEFAULT_HEADERS)
|
71
|
+
|
72
|
+
retry = Retry(
|
73
|
+
total=retries,
|
74
|
+
backoff_factor=backoff,
|
75
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
76
|
+
allowed_methods={"GET", "HEAD", "OPTIONS"},
|
77
|
+
)
|
78
|
+
adapter = HTTPAdapter(max_retries=retry)
|
79
|
+
session.mount("https://", adapter)
|
80
|
+
session.mount("http://", adapter)
|
81
|
+
return session
|
85
82
|
|
86
83
|
|
87
|
-
def
|
84
|
+
def download(
|
88
85
|
url: str,
|
89
|
-
|
90
|
-
|
86
|
+
target_dir: str | Path | None = None,
|
87
|
+
filename: str | None = None,
|
91
88
|
*,
|
92
89
|
timeout: int = 10,
|
93
90
|
retries: int = 3,
|
94
91
|
backoff: float = 0.5,
|
95
92
|
headers: dict[str, str] | None = None,
|
93
|
+
stream: bool = False,
|
96
94
|
on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
|
95
|
+
default_suffix: str = "",
|
96
|
+
chunk_size: int = _DEFAULT_CHUNK_SIZE,
|
97
97
|
) -> Path | None:
|
98
98
|
"""
|
99
|
-
Download
|
100
|
-
|
101
|
-
|
102
|
-
:param
|
103
|
-
:param
|
104
|
-
:param
|
105
|
-
:param
|
106
|
-
:param
|
107
|
-
:param
|
108
|
-
:param
|
109
|
-
:
|
99
|
+
Download a URL to disk, with retries, optional rename/skip, and cleanup on failure.
|
100
|
+
|
101
|
+
:param url: the file URL.
|
102
|
+
:param target_dir: directory to save into.
|
103
|
+
:param filename: override the basename (else from URL path).
|
104
|
+
:param timeout: per-request timeout.
|
105
|
+
:param retries: GET retry count.
|
106
|
+
:param backoff: exponential backoff base.
|
107
|
+
:param headers: optional headers.
|
108
|
+
:param stream: Whether to stream the response.
|
109
|
+
:param on_exist: if 'skip', return filepath; if 'rename', auto-rename.
|
110
|
+
:param default_suffix: used if no suffix in URL or filename.
|
111
|
+
:param chunk_size: streaming chunk size.
|
112
|
+
:return: path to the downloaded file.
|
110
113
|
"""
|
111
|
-
|
112
|
-
if url.startswith("//"):
|
113
|
-
url = "https:" + url
|
114
|
-
elif not url.startswith("http"):
|
115
|
-
url = "https://" + url
|
114
|
+
url = _normalize_url(url)
|
116
115
|
|
117
|
-
folder = Path(
|
116
|
+
folder = Path(target_dir) if target_dir else Path.cwd()
|
118
117
|
folder.mkdir(parents=True, exist_ok=True)
|
119
118
|
|
120
|
-
|
121
|
-
|
122
|
-
if not Path(name).suffix:
|
123
|
-
# infer ext from URL-derived name
|
124
|
-
name += Path(image_url_to_filename(url)).suffix
|
125
|
-
else:
|
126
|
-
name = image_url_to_filename(url)
|
127
|
-
save_path = folder / name
|
128
|
-
|
129
|
-
# Handle existing file
|
130
|
-
if save_path.exists():
|
131
|
-
if on_exist == "skip":
|
132
|
-
logger.debug("Skipping download; file exists: %s", save_path)
|
133
|
-
return save_path
|
134
|
-
if on_exist == "rename":
|
135
|
-
save_path = _get_non_conflicting_path(save_path)
|
136
|
-
|
137
|
-
# Proceed with download
|
138
|
-
resp = http_get_with_retry(
|
139
|
-
url,
|
140
|
-
retries=retries,
|
141
|
-
timeout=timeout,
|
142
|
-
backoff=backoff,
|
143
|
-
headers=headers or DEFAULT_HEADERS,
|
144
|
-
stream=False,
|
145
|
-
)
|
146
|
-
|
147
|
-
if not (resp and resp.ok):
|
148
|
-
logger.warning(
|
149
|
-
"Failed to download %s (status=%s)",
|
150
|
-
url,
|
151
|
-
getattr(resp, "status_code", None),
|
152
|
-
)
|
153
|
-
return None
|
154
|
-
|
155
|
-
# Write to disk
|
156
|
-
try:
|
157
|
-
_write_file(
|
158
|
-
content=resp.content,
|
159
|
-
filepath=save_path,
|
160
|
-
mode="wb",
|
161
|
-
on_exist=on_exist,
|
162
|
-
)
|
163
|
-
return save_path
|
164
|
-
except Exception:
|
165
|
-
logger.exception("Error saving image to %s", save_path)
|
166
|
-
return None
|
167
|
-
|
168
|
-
|
169
|
-
def download_font_file(
|
170
|
-
url: str,
|
171
|
-
target_folder: str | Path,
|
172
|
-
*,
|
173
|
-
timeout: int = 10,
|
174
|
-
retries: int = 3,
|
175
|
-
backoff: float = 0.5,
|
176
|
-
on_exist: Literal["overwrite", "skip", "rename"] = "skip",
|
177
|
-
) -> Path | None:
|
178
|
-
"""
|
179
|
-
Download a font file from a URL and save it locally with retry and overwrite control
|
180
|
-
|
181
|
-
:param url: Fully-qualified font file URL.
|
182
|
-
:param target_folder: Local folder to save the font file.
|
183
|
-
:param timeout: Timeout for each request (in seconds).
|
184
|
-
:param retries: Number of retry attempts.
|
185
|
-
:param backoff: Base backoff time between retries (in seconds).
|
186
|
-
:param on_exist: File conflict strategy: 'overwrite', 'skip', or 'rename'.
|
187
|
-
:return: Path to the saved font file, or None if failed.
|
188
|
-
"""
|
189
|
-
# Validate and parse URL
|
190
|
-
parsed = urlparse(url)
|
191
|
-
if not parsed.scheme or not parsed.netloc:
|
192
|
-
logger.warning("[font] Invalid URL: %s", url)
|
193
|
-
return None
|
194
|
-
|
195
|
-
# Determine filename
|
196
|
-
filename = Path(unquote(parsed.path)).name
|
197
|
-
if not filename:
|
198
|
-
logger.warning("[font] Could not extract filename from URL: %s", url)
|
199
|
-
return None
|
200
|
-
|
201
|
-
# Resolve save path
|
202
|
-
target_folder = Path(target_folder)
|
203
|
-
target_folder.mkdir(parents=True, exist_ok=True)
|
204
|
-
font_path = target_folder / filename
|
205
|
-
|
206
|
-
# If skip and file exists -> return immediately
|
207
|
-
if on_exist == "skip" and font_path.exists():
|
208
|
-
logger.debug("[font] File exists, skipping download: %s", font_path)
|
209
|
-
return font_path
|
210
|
-
|
211
|
-
# Retry download with exponential backoff
|
212
|
-
response = http_get_with_retry(
|
119
|
+
save_path = _build_filepath(
|
120
|
+
folder,
|
213
121
|
url,
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
headers=DEFAULT_HEADERS,
|
218
|
-
stream=True,
|
122
|
+
filename,
|
123
|
+
default_suffix,
|
124
|
+
on_exist,
|
219
125
|
)
|
220
126
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
font_path = _get_non_conflicting_path(font_path)
|
225
|
-
|
226
|
-
with open(font_path, "wb") as f:
|
227
|
-
for chunk in response.iter_content(chunk_size=_DEFAULT_CHUNK_SIZE):
|
228
|
-
if chunk:
|
229
|
-
f.write(chunk)
|
230
|
-
|
231
|
-
logger.debug("[font] Font saved to: %s", font_path)
|
232
|
-
return font_path
|
233
|
-
|
234
|
-
except Exception as e:
|
235
|
-
logger.error("[font] Error writing font to disk: %s", e)
|
236
|
-
|
237
|
-
return None
|
238
|
-
|
239
|
-
|
240
|
-
def download_js_file(
|
241
|
-
url: str,
|
242
|
-
target_folder: str | Path,
|
243
|
-
*,
|
244
|
-
timeout: int = 10,
|
245
|
-
retries: int = 3,
|
246
|
-
backoff: float = 0.5,
|
247
|
-
on_exist: Literal["overwrite", "skip", "rename"] = "skip",
|
248
|
-
) -> Path | None:
|
249
|
-
"""
|
250
|
-
Download a JavaScript (.js) file from a URL and save it locally.
|
251
|
-
|
252
|
-
:param url: Fully-qualified JS file URL.
|
253
|
-
:param target_folder: Local folder to save the JS file.
|
254
|
-
:param timeout: Timeout for each request (in seconds).
|
255
|
-
:param retries: Number of retry attempts.
|
256
|
-
:param backoff: Base backoff time between retries (in seconds).
|
257
|
-
:param on_exist: File conflict strategy: 'overwrite', 'skip', or 'rename'.
|
258
|
-
:return: Path to the saved JS file, or None if failed.
|
259
|
-
"""
|
260
|
-
parsed = urlparse(url)
|
261
|
-
if not parsed.scheme or not parsed.netloc:
|
262
|
-
logger.warning("[js] Invalid URL: %s", url)
|
263
|
-
return None
|
264
|
-
|
265
|
-
# Determine filename
|
266
|
-
filename = Path(unquote(parsed.path)).name
|
267
|
-
if not filename.endswith(".js"):
|
268
|
-
filename += ".js"
|
269
|
-
|
270
|
-
target_folder = Path(target_folder)
|
271
|
-
target_folder.mkdir(parents=True, exist_ok=True)
|
272
|
-
save_path = target_folder / filename
|
273
|
-
|
274
|
-
if on_exist == "skip" and save_path.exists():
|
275
|
-
logger.debug("[js] File exists, skipping download: %s", save_path)
|
127
|
+
# Handle existing file
|
128
|
+
if save_path.exists() and on_exist == "skip":
|
129
|
+
logger.debug("Skipping download; file exists: %s", save_path)
|
276
130
|
return save_path
|
277
131
|
|
278
|
-
|
279
|
-
url,
|
280
|
-
retries=retries,
|
281
|
-
timeout=timeout,
|
282
|
-
backoff=backoff,
|
283
|
-
headers=DEFAULT_HEADERS,
|
284
|
-
stream=False,
|
285
|
-
)
|
286
|
-
|
287
|
-
if response and response.ok:
|
288
|
-
content = response.content
|
289
|
-
|
290
|
-
if on_exist == "rename":
|
291
|
-
save_path = _get_non_conflicting_path(save_path)
|
292
|
-
|
132
|
+
with _make_session(retries, backoff, headers) as session:
|
293
133
|
try:
|
294
|
-
|
295
|
-
|
296
|
-
return save_path
|
134
|
+
resp = session.get(url, timeout=timeout, stream=stream)
|
135
|
+
resp.raise_for_status()
|
297
136
|
except Exception as e:
|
298
|
-
logger.
|
299
|
-
|
137
|
+
logger.warning("[download] request failed: %s", e)
|
138
|
+
return None
|
139
|
+
|
140
|
+
# Write to disk
|
141
|
+
if stream:
|
142
|
+
try:
|
143
|
+
with open(save_path, "wb") as f:
|
144
|
+
for chunk in resp.iter_content(chunk_size=chunk_size):
|
145
|
+
if chunk:
|
146
|
+
f.write(chunk)
|
147
|
+
return save_path
|
148
|
+
except Exception as e:
|
149
|
+
logger.warning("[download] write failed: %s", e)
|
150
|
+
save_path.unlink(missing_ok=True)
|
151
|
+
return None
|
152
|
+
else:
|
153
|
+
return write_file(
|
154
|
+
content=resp.content,
|
155
|
+
filepath=save_path,
|
156
|
+
write_mode="wb",
|
157
|
+
on_exist=on_exist,
|
158
|
+
)
|
300
159
|
return None
|
novel_downloader/utils/state.py
CHANGED
@@ -2,23 +2,22 @@
|
|
2
2
|
"""
|
3
3
|
novel_downloader.utils.state
|
4
4
|
----------------------------
|
5
|
-
State management for user preferences and runtime flags.
|
6
5
|
|
7
|
-
|
8
|
-
- general: global preferences (e.g. language)
|
9
|
-
- sites: per-site flags & data (e.g. manual_login, cookies)
|
6
|
+
State management for user preferences and runtime flags.
|
10
7
|
"""
|
8
|
+
|
9
|
+
__all__ = ["StateManager", "state_mgr"]
|
10
|
+
|
11
11
|
import json
|
12
12
|
from pathlib import Path
|
13
13
|
from typing import Any
|
14
14
|
|
15
|
-
from .constants import STATE_FILE
|
15
|
+
from novel_downloader.utils.constants import STATE_FILE
|
16
16
|
|
17
17
|
|
18
18
|
class StateManager:
|
19
19
|
"""
|
20
20
|
Manages persistent state for user preferences and runtime flags.
|
21
|
-
Stores data in JSON at STATE_FILE.
|
22
21
|
"""
|
23
22
|
|
24
23
|
def __init__(self, path: Path = STATE_FILE) -> None:
|
@@ -49,26 +48,6 @@ class StateManager:
|
|
49
48
|
content = json.dumps(self._data, ensure_ascii=False, indent=2)
|
50
49
|
self._path.write_text(content, encoding="utf-8")
|
51
50
|
|
52
|
-
def _parse_cookie_string(self, cookie_str: str) -> dict[str, str]:
|
53
|
-
"""
|
54
|
-
Parse a Cookie header string into a dict.
|
55
|
-
|
56
|
-
:param cookie_str: e.g. 'k1=v1; k2=v2; k3'
|
57
|
-
:return: mapping cookie names to values (missing '=' yields empty string)
|
58
|
-
:rtype: Dict[str, str]
|
59
|
-
"""
|
60
|
-
cookies: dict[str, str] = {}
|
61
|
-
for item in cookie_str.split(";"):
|
62
|
-
item = item.strip()
|
63
|
-
if not item:
|
64
|
-
continue
|
65
|
-
if "=" in item:
|
66
|
-
k, v = item.split("=", 1)
|
67
|
-
cookies[k.strip()] = v.strip()
|
68
|
-
else:
|
69
|
-
cookies[item] = ""
|
70
|
-
return cookies
|
71
|
-
|
72
51
|
def get_language(self) -> str:
|
73
52
|
"""
|
74
53
|
Load the user's language preference, defaulting to 'zh'.
|
@@ -87,69 +66,5 @@ class StateManager:
|
|
87
66
|
self._data.setdefault("general", {})["lang"] = lang
|
88
67
|
self._save()
|
89
68
|
|
90
|
-
def get_manual_login_flag(self, site: str) -> bool:
|
91
|
-
"""
|
92
|
-
Retrieve the manual login requirement flag for a specific site.
|
93
|
-
|
94
|
-
:param site: Site identifier (e.g. 'qidian', 'bqg')
|
95
|
-
:return: True if manual login is required (defaults to True)
|
96
|
-
"""
|
97
|
-
val = self._data.get("sites", {}).get(site, {}).get("manual_login", True)
|
98
|
-
return bool(val)
|
99
|
-
|
100
|
-
def set_manual_login_flag(self, site: str, flag: bool) -> None:
|
101
|
-
"""
|
102
|
-
Set the 'manual_login' flag for a specific site.
|
103
|
-
|
104
|
-
:param flag: True if the site requires manual login.
|
105
|
-
:param site: Site identifier (e.g. 'qidian', 'bqg')
|
106
|
-
"""
|
107
|
-
sites = self._data.setdefault("sites", {})
|
108
|
-
site_data = sites.setdefault(site, {})
|
109
|
-
site_data["manual_login"] = flag
|
110
|
-
self._save()
|
111
|
-
|
112
|
-
def get_cookies(self, site: str) -> dict[str, str]:
|
113
|
-
"""
|
114
|
-
Retrieve the persisted cookies for a specific site.
|
115
|
-
|
116
|
-
:param site: Site identifier (e.g. 'qidian', 'bqg')
|
117
|
-
:return: A dict mapping cookie names to values. Returns empty dict if not set.
|
118
|
-
"""
|
119
|
-
cookies = self._data.get("sites", {}).get(site, {}).get("cookies", {})
|
120
|
-
return {str(k): str(v) for k, v in cookies.items()}
|
121
|
-
|
122
|
-
def set_cookies(self, site: str, cookies: str | dict[str, str]) -> None:
|
123
|
-
"""
|
124
|
-
Persist (overwrite) the cookies for a specific site.
|
125
|
-
|
126
|
-
:param site: Site identifier (e.g. 'qidian', 'bqg')
|
127
|
-
:param cookies: Either a dict mapping cookie names to values,
|
128
|
-
or a string (JSON or 'k=v; k2=v2') to be parsed.
|
129
|
-
:raises TypeError: if cookies is neither str nor dict
|
130
|
-
"""
|
131
|
-
# 1) normalize to dict
|
132
|
-
if isinstance(cookies, dict):
|
133
|
-
cookies_dict = cookies
|
134
|
-
elif isinstance(cookies, str):
|
135
|
-
# try JSON first
|
136
|
-
try:
|
137
|
-
parsed = json.loads(cookies)
|
138
|
-
if isinstance(parsed, dict):
|
139
|
-
cookies_dict = parsed # OK!
|
140
|
-
else:
|
141
|
-
raise ValueError
|
142
|
-
except Exception:
|
143
|
-
# fallback to "k=v; k2=v2" format
|
144
|
-
cookies_dict = self._parse_cookie_string(cookies)
|
145
|
-
else:
|
146
|
-
raise TypeError("`cookies` must be a dict or a str")
|
147
|
-
|
148
|
-
# 2) persist
|
149
|
-
sites = self._data.setdefault("sites", {})
|
150
|
-
site_data = sites.setdefault(site, {})
|
151
|
-
site_data["cookies"] = {str(k): str(v) for k, v in cookies_dict.items()}
|
152
|
-
self._save()
|
153
|
-
|
154
69
|
|
155
70
|
state_mgr = StateManager()
|
@@ -3,31 +3,26 @@
|
|
3
3
|
novel_downloader.utils.text_utils
|
4
4
|
---------------------------------
|
5
5
|
|
6
|
-
Utility modules for text formatting,
|
7
|
-
|
8
|
-
Submodules:
|
9
|
-
- font_mapping: Replace obfuscated characters using font maps
|
10
|
-
- chapter_formatting: Build structured chapter strings from raw content
|
11
|
-
- text_cleaning: Remove promo text and check for spam lines
|
12
|
-
- diff_display: Generate inline diffs with aligned character markers
|
6
|
+
Utility modules for text formatting, cleaning, and diff display.
|
13
7
|
"""
|
14
8
|
|
15
|
-
from .chapter_formatting import format_chapter
|
16
|
-
from .diff_display import diff_inline_display
|
17
|
-
from .font_mapping import apply_font_mapping
|
18
|
-
from .text_cleaning import (
|
19
|
-
clean_chapter_title,
|
20
|
-
content_prefix,
|
21
|
-
is_promotional_line,
|
22
|
-
truncate_half_lines,
|
23
|
-
)
|
24
|
-
|
25
9
|
__all__ = [
|
26
|
-
"
|
27
|
-
"
|
28
|
-
"clean_chapter_title",
|
29
|
-
"is_promotional_line",
|
10
|
+
"TextCleaner",
|
11
|
+
"get_cleaner",
|
30
12
|
"content_prefix",
|
31
13
|
"truncate_half_lines",
|
14
|
+
"chinese_to_arabic",
|
15
|
+
"arabic_to_chinese",
|
32
16
|
"diff_inline_display",
|
33
17
|
]
|
18
|
+
|
19
|
+
from .diff_display import diff_inline_display
|
20
|
+
from .numeric_conversion import (
|
21
|
+
arabic_to_chinese,
|
22
|
+
chinese_to_arabic,
|
23
|
+
)
|
24
|
+
from .text_cleaner import TextCleaner, get_cleaner
|
25
|
+
from .truncate_utils import (
|
26
|
+
content_prefix,
|
27
|
+
truncate_half_lines,
|
28
|
+
)
|
@@ -6,6 +6,8 @@ novel_downloader.utils.text_utils.diff_display
|
|
6
6
|
Generate inline character-level diff between two strings with visual markers.
|
7
7
|
"""
|
8
8
|
|
9
|
+
__all__ = ["diff_inline_display"]
|
10
|
+
|
9
11
|
import difflib
|
10
12
|
import unicodedata
|
11
13
|
|
@@ -18,10 +20,10 @@ def _char_width_space(
|
|
18
20
|
|
19
21
|
Fullwidth (F) or Wide (W) characters map to `asian_char`, else `normal_char`.
|
20
22
|
|
21
|
-
:param c:
|
23
|
+
:param c: A single character.
|
22
24
|
:param normal_char: Replacement for narrow chars (default U+0020).
|
23
|
-
:param asian_char:
|
24
|
-
:return:
|
25
|
+
:param asian_char: Replacement for wide chars (default U+3000).
|
26
|
+
:return: The appropriate space character.
|
25
27
|
"""
|
26
28
|
return asian_char if unicodedata.east_asian_width(c) in ("F", "W") else normal_char
|
27
29
|
|
@@ -33,7 +35,7 @@ def diff_inline_display(old_str: str, new_str: str) -> str:
|
|
33
35
|
|
34
36
|
:param old_str: Original string (prefixed '-' will be trimmed).
|
35
37
|
:param new_str: Modified string (prefixed '+' will be trimmed).
|
36
|
-
:return:
|
38
|
+
:return: A multiline diff display with aligned markers.
|
37
39
|
"""
|
38
40
|
space_1 = " "
|
39
41
|
space_2 = "\u3000"
|
@@ -67,8 +69,3 @@ def diff_inline_display(old_str: str, new_str: str) -> str:
|
|
67
69
|
marker_s2 += "".join(_char_width_space(c, mark_1, mark_2) for c in s2_seg)
|
68
70
|
output_str = f"-{s1}\n {marker_s1}\n+{s2}\n {marker_s2}"
|
69
71
|
return output_str
|
70
|
-
|
71
|
-
|
72
|
-
__all__ = [
|
73
|
-
"diff_inline_display",
|
74
|
-
]
|