novel-downloader 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -2
- novel_downloader/cli/config.py +1 -83
- novel_downloader/cli/download.py +4 -5
- novel_downloader/cli/export.py +4 -1
- novel_downloader/cli/main.py +2 -0
- novel_downloader/cli/search.py +123 -0
- novel_downloader/config/__init__.py +3 -10
- novel_downloader/config/adapter.py +190 -54
- novel_downloader/config/loader.py +2 -3
- novel_downloader/core/__init__.py +13 -13
- novel_downloader/core/downloaders/__init__.py +10 -11
- novel_downloader/core/downloaders/base.py +152 -26
- novel_downloader/core/downloaders/biquge.py +5 -1
- novel_downloader/core/downloaders/common.py +157 -378
- novel_downloader/core/downloaders/esjzone.py +5 -1
- novel_downloader/core/downloaders/linovelib.py +5 -1
- novel_downloader/core/downloaders/qianbi.py +291 -4
- novel_downloader/core/downloaders/qidian.py +199 -285
- novel_downloader/core/downloaders/registry.py +67 -0
- novel_downloader/core/downloaders/sfacg.py +5 -1
- novel_downloader/core/downloaders/yamibo.py +5 -1
- novel_downloader/core/exporters/__init__.py +10 -11
- novel_downloader/core/exporters/base.py +87 -7
- novel_downloader/core/exporters/biquge.py +5 -8
- novel_downloader/core/exporters/common/__init__.py +2 -2
- novel_downloader/core/exporters/common/epub.py +82 -166
- novel_downloader/core/exporters/common/main_exporter.py +0 -60
- novel_downloader/core/exporters/common/txt.py +82 -83
- novel_downloader/core/exporters/epub_util.py +157 -1330
- novel_downloader/core/exporters/esjzone.py +5 -8
- novel_downloader/core/exporters/linovelib/__init__.py +2 -2
- novel_downloader/core/exporters/linovelib/epub.py +157 -212
- novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
- novel_downloader/core/exporters/linovelib/txt.py +67 -63
- novel_downloader/core/exporters/qianbi.py +5 -8
- novel_downloader/core/exporters/qidian.py +14 -4
- novel_downloader/core/exporters/registry.py +53 -0
- novel_downloader/core/exporters/sfacg.py +5 -8
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/exporters/yamibo.py +5 -8
- novel_downloader/core/fetchers/__init__.py +19 -24
- novel_downloader/core/fetchers/base/__init__.py +3 -3
- novel_downloader/core/fetchers/base/browser.py +23 -4
- novel_downloader/core/fetchers/base/session.py +30 -5
- novel_downloader/core/fetchers/biquge/__init__.py +3 -3
- novel_downloader/core/fetchers/biquge/browser.py +5 -0
- novel_downloader/core/fetchers/biquge/session.py +6 -1
- novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
- novel_downloader/core/fetchers/esjzone/browser.py +5 -0
- novel_downloader/core/fetchers/esjzone/session.py +6 -1
- novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
- novel_downloader/core/fetchers/linovelib/browser.py +6 -1
- novel_downloader/core/fetchers/linovelib/session.py +6 -1
- novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
- novel_downloader/core/fetchers/qianbi/browser.py +5 -0
- novel_downloader/core/fetchers/qianbi/session.py +5 -0
- novel_downloader/core/fetchers/qidian/__init__.py +3 -3
- novel_downloader/core/fetchers/qidian/browser.py +12 -4
- novel_downloader/core/fetchers/qidian/session.py +11 -3
- novel_downloader/core/fetchers/registry.py +71 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
- novel_downloader/core/fetchers/sfacg/browser.py +5 -0
- novel_downloader/core/fetchers/sfacg/session.py +5 -0
- novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
- novel_downloader/core/fetchers/yamibo/browser.py +5 -0
- novel_downloader/core/fetchers/yamibo/session.py +6 -1
- novel_downloader/core/interfaces/__init__.py +7 -5
- novel_downloader/core/interfaces/searcher.py +18 -0
- novel_downloader/core/parsers/__init__.py +10 -11
- novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
- novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
- novel_downloader/core/parsers/qidian/main_parser.py +10 -21
- novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/registry.py +68 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
- novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
- novel_downloader/core/searchers/__init__.py +20 -0
- novel_downloader/core/searchers/base.py +92 -0
- novel_downloader/core/searchers/biquge.py +83 -0
- novel_downloader/core/searchers/esjzone.py +84 -0
- novel_downloader/core/searchers/qianbi.py +131 -0
- novel_downloader/core/searchers/qidian.py +87 -0
- novel_downloader/core/searchers/registry.py +63 -0
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +12 -4
- novel_downloader/models/__init__.py +4 -30
- novel_downloader/models/config.py +12 -6
- novel_downloader/models/search.py +16 -0
- novel_downloader/models/types.py +0 -2
- novel_downloader/resources/config/settings.toml +31 -4
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/utils/__init__.py +52 -0
- novel_downloader/utils/chapter_storage.py +244 -224
- novel_downloader/utils/constants.py +1 -21
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +77 -0
- novel_downloader/utils/epub/documents.py +403 -0
- novel_downloader/utils/epub/models.py +134 -0
- novel_downloader/utils/epub/utils.py +212 -0
- novel_downloader/utils/file_utils/__init__.py +10 -14
- novel_downloader/utils/file_utils/io.py +20 -51
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -3
- novel_downloader/utils/fontocr/__init__.py +5 -5
- novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
- novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
- novel_downloader/utils/fontocr/ocr_v1.py +13 -1
- novel_downloader/utils/fontocr/ocr_v2.py +13 -1
- novel_downloader/utils/fontocr/ocr_v3.py +744 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +2 -0
- novel_downloader/utils/network.py +110 -251
- novel_downloader/utils/state.py +1 -0
- novel_downloader/utils/text_utils/__init__.py +18 -17
- novel_downloader/utils/text_utils/diff_display.py +4 -5
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +3 -3
- novel_downloader/utils/time_utils/datetime_utils.py +4 -5
- novel_downloader/utils/time_utils/sleep_utils.py +2 -3
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
- novel_downloader-1.5.0.dist-info/RECORD +164 -0
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/common/browser.py +0 -79
- novel_downloader/core/fetchers/common/session.py +0 -79
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.4.dist-info/RECORD +0 -165
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
novel_downloader/utils/i18n.py
CHANGED
novel_downloader/utils/logger.py
CHANGED
@@ -7,6 +7,8 @@ Provides a configurable logging setup for Python applications.
|
|
7
7
|
Log files are rotated daily and named with the given logger name and current date.
|
8
8
|
"""
|
9
9
|
|
10
|
+
__all__ = ["setup_logging"]
|
11
|
+
|
10
12
|
import logging
|
11
13
|
from datetime import datetime
|
12
14
|
from logging.handlers import TimedRotatingFileHandler
|
@@ -6,295 +6,154 @@ novel_downloader.utils.network
|
|
6
6
|
Utilities for handling HTTP requests and downloading remote resources.
|
7
7
|
"""
|
8
8
|
|
9
|
+
__all__ = ["download"]
|
10
|
+
|
9
11
|
import logging
|
10
|
-
import random
|
11
|
-
import time
|
12
12
|
from pathlib import Path
|
13
13
|
from typing import Literal
|
14
14
|
from urllib.parse import unquote, urlparse
|
15
15
|
|
16
16
|
import requests
|
17
|
+
from requests.adapters import HTTPAdapter
|
18
|
+
from urllib3.util.retry import Retry
|
17
19
|
|
18
|
-
from .constants import DEFAULT_HEADERS
|
20
|
+
from .constants import DEFAULT_HEADERS
|
21
|
+
from .file_utils import sanitize_filename
|
19
22
|
from .file_utils.io import _get_non_conflicting_path, _write_file
|
20
23
|
|
21
24
|
logger = logging.getLogger(__name__)
|
22
|
-
|
23
25
|
_DEFAULT_CHUNK_SIZE = 8192 # 8KB per chunk for streaming downloads
|
24
26
|
|
25
27
|
|
26
|
-
def
|
27
|
-
url: str,
|
28
|
-
*,
|
29
|
-
retries: int = 3,
|
30
|
-
timeout: int = 10,
|
31
|
-
backoff: float = 0.5,
|
32
|
-
headers: dict[str, str] | None = None,
|
33
|
-
stream: bool = False,
|
34
|
-
) -> requests.Response | None:
|
28
|
+
def _normalize_url(url: str) -> str:
|
35
29
|
"""
|
36
|
-
|
37
|
-
|
38
|
-
:param url: URL to request.
|
39
|
-
:param retries: Number of retry attempts.
|
40
|
-
:param timeout: Timeout in seconds per request.
|
41
|
-
:param backoff: Base backoff delay between retries.
|
42
|
-
:param headers: Optional HTTP headers.
|
43
|
-
:param stream: Whether to stream the response.
|
44
|
-
:return: Response object if successful, else None.
|
30
|
+
Ensure URL has scheme, defaulting to https:// if missing.
|
45
31
|
"""
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
response.raise_for_status()
|
52
|
-
return response
|
53
|
-
except requests.RequestException as e:
|
54
|
-
logger.warning("[http] Attempt %s/%s failed: %s", attempt, retries, e)
|
55
|
-
if attempt < retries:
|
56
|
-
sleep_time = backoff * (2 ** (attempt - 1)) + random.uniform(0, 0.1)
|
57
|
-
time.sleep(sleep_time)
|
58
|
-
except Exception as e:
|
59
|
-
logger.error("[http] Unexpected error: %s", e)
|
60
|
-
break
|
61
|
-
|
62
|
-
logger.error("[http] Failed after %s attempts: %s", retries, url)
|
63
|
-
return None
|
64
|
-
|
32
|
+
if url.startswith("//"):
|
33
|
+
return "https:" + url
|
34
|
+
if not url.startswith(("http://", "https://")):
|
35
|
+
return "https://" + url
|
36
|
+
return url
|
65
37
|
|
66
|
-
def image_url_to_filename(url: str) -> str:
|
67
|
-
"""
|
68
|
-
Parse and sanitize a image filename from a URL.
|
69
|
-
If no filename or suffix exists, fallback to default name and extension.
|
70
38
|
|
71
|
-
|
72
|
-
:
|
73
|
-
|
39
|
+
def _build_filepath(
|
40
|
+
folder: Path,
|
41
|
+
url: str,
|
42
|
+
filename: str | None,
|
43
|
+
default_suffix: str,
|
44
|
+
on_exist: Literal["overwrite", "skip", "rename"],
|
45
|
+
) -> Path:
|
74
46
|
parsed_url = urlparse(url)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
47
|
+
url_path = Path(unquote(parsed_url.path))
|
48
|
+
|
49
|
+
raw_name = filename or url_path.name or "unnamed"
|
50
|
+
name = sanitize_filename(raw_name)
|
51
|
+
suffix = default_suffix or url_path.suffix
|
52
|
+
if suffix and not suffix.startswith("."):
|
53
|
+
suffix = "." + suffix
|
54
|
+
|
55
|
+
file_path = folder / name
|
56
|
+
if not file_path.suffix and suffix:
|
57
|
+
file_path = file_path.with_suffix(suffix)
|
58
|
+
|
59
|
+
if on_exist == "rename":
|
60
|
+
file_path = _get_non_conflicting_path(file_path)
|
61
|
+
return file_path
|
62
|
+
|
63
|
+
|
64
|
+
def _make_session(
|
65
|
+
retries: int,
|
66
|
+
backoff: float,
|
67
|
+
headers: dict[str, str] | None,
|
68
|
+
) -> requests.Session:
|
69
|
+
session = requests.Session()
|
70
|
+
session.headers.update(headers or DEFAULT_HEADERS)
|
71
|
+
|
72
|
+
retry = Retry(
|
73
|
+
total=retries,
|
74
|
+
backoff_factor=backoff,
|
75
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
76
|
+
allowed_methods={"GET", "HEAD", "OPTIONS"},
|
77
|
+
)
|
78
|
+
adapter = HTTPAdapter(max_retries=retry)
|
79
|
+
session.mount("https://", adapter)
|
80
|
+
session.mount("http://", adapter)
|
81
|
+
return session
|
85
82
|
|
86
83
|
|
87
|
-
def
|
84
|
+
def download(
|
88
85
|
url: str,
|
89
|
-
|
90
|
-
|
86
|
+
target_dir: str | Path | None = None,
|
87
|
+
filename: str | None = None,
|
91
88
|
*,
|
92
89
|
timeout: int = 10,
|
93
90
|
retries: int = 3,
|
94
91
|
backoff: float = 0.5,
|
95
92
|
headers: dict[str, str] | None = None,
|
93
|
+
stream: bool = False,
|
96
94
|
on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
|
95
|
+
default_suffix: str = "",
|
96
|
+
chunk_size: int = _DEFAULT_CHUNK_SIZE,
|
97
97
|
) -> Path | None:
|
98
98
|
"""
|
99
|
-
Download
|
100
|
-
|
101
|
-
|
102
|
-
:param
|
103
|
-
:param
|
104
|
-
:param
|
105
|
-
:param
|
106
|
-
:param
|
107
|
-
:param
|
108
|
-
:param
|
109
|
-
:
|
99
|
+
Download a URL to disk, with retries, optional rename/skip, and cleanup on failure.
|
100
|
+
|
101
|
+
:param url: the file URL.
|
102
|
+
:param target_dir: directory to save into.
|
103
|
+
:param filename: override the basename (else from URL path).
|
104
|
+
:param timeout: per-request timeout.
|
105
|
+
:param retries: GET retry count.
|
106
|
+
:param backoff: exponential backoff base.
|
107
|
+
:param headers: optional headers.
|
108
|
+
:param stream: Whether to stream the response.
|
109
|
+
:param on_exist: if 'skip', return filepath; if 'rename', auto-rename.
|
110
|
+
:param default_suffix: used if no suffix in URL or filename.
|
111
|
+
:param chunk_size: streaming chunk size.
|
112
|
+
:return: path to the downloaded file.
|
110
113
|
"""
|
111
|
-
|
112
|
-
if url.startswith("//"):
|
113
|
-
url = "https:" + url
|
114
|
-
elif not url.startswith("http"):
|
115
|
-
url = "https://" + url
|
114
|
+
url = _normalize_url(url)
|
116
115
|
|
117
|
-
folder = Path(
|
116
|
+
folder = Path(target_dir) if target_dir else Path.cwd()
|
118
117
|
folder.mkdir(parents=True, exist_ok=True)
|
119
118
|
|
120
|
-
|
121
|
-
|
122
|
-
if not Path(name).suffix:
|
123
|
-
# infer ext from URL-derived name
|
124
|
-
name += Path(image_url_to_filename(url)).suffix
|
125
|
-
else:
|
126
|
-
name = image_url_to_filename(url)
|
127
|
-
save_path = folder / name
|
128
|
-
|
129
|
-
# Handle existing file
|
130
|
-
if save_path.exists():
|
131
|
-
if on_exist == "skip":
|
132
|
-
logger.debug("Skipping download; file exists: %s", save_path)
|
133
|
-
return save_path
|
134
|
-
if on_exist == "rename":
|
135
|
-
save_path = _get_non_conflicting_path(save_path)
|
136
|
-
|
137
|
-
# Proceed with download
|
138
|
-
resp = http_get_with_retry(
|
139
|
-
url,
|
140
|
-
retries=retries,
|
141
|
-
timeout=timeout,
|
142
|
-
backoff=backoff,
|
143
|
-
headers=headers or DEFAULT_HEADERS,
|
144
|
-
stream=False,
|
145
|
-
)
|
146
|
-
|
147
|
-
if not (resp and resp.ok):
|
148
|
-
logger.warning(
|
149
|
-
"Failed to download %s (status=%s)",
|
150
|
-
url,
|
151
|
-
getattr(resp, "status_code", None),
|
152
|
-
)
|
153
|
-
return None
|
154
|
-
|
155
|
-
# Write to disk
|
156
|
-
try:
|
157
|
-
_write_file(
|
158
|
-
content=resp.content,
|
159
|
-
filepath=save_path,
|
160
|
-
mode="wb",
|
161
|
-
on_exist=on_exist,
|
162
|
-
)
|
163
|
-
return save_path
|
164
|
-
except Exception:
|
165
|
-
logger.exception("Error saving image to %s", save_path)
|
166
|
-
return None
|
167
|
-
|
168
|
-
|
169
|
-
def download_font_file(
|
170
|
-
url: str,
|
171
|
-
target_folder: str | Path,
|
172
|
-
*,
|
173
|
-
timeout: int = 10,
|
174
|
-
retries: int = 3,
|
175
|
-
backoff: float = 0.5,
|
176
|
-
on_exist: Literal["overwrite", "skip", "rename"] = "skip",
|
177
|
-
) -> Path | None:
|
178
|
-
"""
|
179
|
-
Download a font file from a URL and save it locally with retry and overwrite control
|
180
|
-
|
181
|
-
:param url: Fully-qualified font file URL.
|
182
|
-
:param target_folder: Local folder to save the font file.
|
183
|
-
:param timeout: Timeout for each request (in seconds).
|
184
|
-
:param retries: Number of retry attempts.
|
185
|
-
:param backoff: Base backoff time between retries (in seconds).
|
186
|
-
:param on_exist: File conflict strategy: 'overwrite', 'skip', or 'rename'.
|
187
|
-
:return: Path to the saved font file, or None if failed.
|
188
|
-
"""
|
189
|
-
# Validate and parse URL
|
190
|
-
parsed = urlparse(url)
|
191
|
-
if not parsed.scheme or not parsed.netloc:
|
192
|
-
logger.warning("[font] Invalid URL: %s", url)
|
193
|
-
return None
|
194
|
-
|
195
|
-
# Determine filename
|
196
|
-
filename = Path(unquote(parsed.path)).name
|
197
|
-
if not filename:
|
198
|
-
logger.warning("[font] Could not extract filename from URL: %s", url)
|
199
|
-
return None
|
200
|
-
|
201
|
-
# Resolve save path
|
202
|
-
target_folder = Path(target_folder)
|
203
|
-
target_folder.mkdir(parents=True, exist_ok=True)
|
204
|
-
font_path = target_folder / filename
|
205
|
-
|
206
|
-
# If skip and file exists -> return immediately
|
207
|
-
if on_exist == "skip" and font_path.exists():
|
208
|
-
logger.debug("[font] File exists, skipping download: %s", font_path)
|
209
|
-
return font_path
|
210
|
-
|
211
|
-
# Retry download with exponential backoff
|
212
|
-
response = http_get_with_retry(
|
119
|
+
save_path = _build_filepath(
|
120
|
+
folder,
|
213
121
|
url,
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
headers=DEFAULT_HEADERS,
|
218
|
-
stream=True,
|
122
|
+
filename,
|
123
|
+
default_suffix,
|
124
|
+
on_exist,
|
219
125
|
)
|
220
126
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
font_path = _get_non_conflicting_path(font_path)
|
225
|
-
|
226
|
-
with open(font_path, "wb") as f:
|
227
|
-
for chunk in response.iter_content(chunk_size=_DEFAULT_CHUNK_SIZE):
|
228
|
-
if chunk:
|
229
|
-
f.write(chunk)
|
230
|
-
|
231
|
-
logger.debug("[font] Font saved to: %s", font_path)
|
232
|
-
return font_path
|
233
|
-
|
234
|
-
except Exception as e:
|
235
|
-
logger.error("[font] Error writing font to disk: %s", e)
|
236
|
-
|
237
|
-
return None
|
238
|
-
|
239
|
-
|
240
|
-
def download_js_file(
|
241
|
-
url: str,
|
242
|
-
target_folder: str | Path,
|
243
|
-
*,
|
244
|
-
timeout: int = 10,
|
245
|
-
retries: int = 3,
|
246
|
-
backoff: float = 0.5,
|
247
|
-
on_exist: Literal["overwrite", "skip", "rename"] = "skip",
|
248
|
-
) -> Path | None:
|
249
|
-
"""
|
250
|
-
Download a JavaScript (.js) file from a URL and save it locally.
|
251
|
-
|
252
|
-
:param url: Fully-qualified JS file URL.
|
253
|
-
:param target_folder: Local folder to save the JS file.
|
254
|
-
:param timeout: Timeout for each request (in seconds).
|
255
|
-
:param retries: Number of retry attempts.
|
256
|
-
:param backoff: Base backoff time between retries (in seconds).
|
257
|
-
:param on_exist: File conflict strategy: 'overwrite', 'skip', or 'rename'.
|
258
|
-
:return: Path to the saved JS file, or None if failed.
|
259
|
-
"""
|
260
|
-
parsed = urlparse(url)
|
261
|
-
if not parsed.scheme or not parsed.netloc:
|
262
|
-
logger.warning("[js] Invalid URL: %s", url)
|
263
|
-
return None
|
264
|
-
|
265
|
-
# Determine filename
|
266
|
-
filename = Path(unquote(parsed.path)).name
|
267
|
-
if not filename.endswith(".js"):
|
268
|
-
filename += ".js"
|
269
|
-
|
270
|
-
target_folder = Path(target_folder)
|
271
|
-
target_folder.mkdir(parents=True, exist_ok=True)
|
272
|
-
save_path = target_folder / filename
|
273
|
-
|
274
|
-
if on_exist == "skip" and save_path.exists():
|
275
|
-
logger.debug("[js] File exists, skipping download: %s", save_path)
|
127
|
+
# Handle existing file
|
128
|
+
if save_path.exists() and on_exist == "skip":
|
129
|
+
logger.debug("Skipping download; file exists: %s", save_path)
|
276
130
|
return save_path
|
277
131
|
|
278
|
-
|
279
|
-
url,
|
280
|
-
retries=retries,
|
281
|
-
timeout=timeout,
|
282
|
-
backoff=backoff,
|
283
|
-
headers=DEFAULT_HEADERS,
|
284
|
-
stream=False,
|
285
|
-
)
|
286
|
-
|
287
|
-
if response and response.ok:
|
288
|
-
content = response.content
|
289
|
-
|
290
|
-
if on_exist == "rename":
|
291
|
-
save_path = _get_non_conflicting_path(save_path)
|
292
|
-
|
132
|
+
with _make_session(retries, backoff, headers) as session:
|
293
133
|
try:
|
294
|
-
|
295
|
-
|
296
|
-
return save_path
|
134
|
+
resp = session.get(url, timeout=timeout, stream=stream)
|
135
|
+
resp.raise_for_status()
|
297
136
|
except Exception as e:
|
298
|
-
logger.
|
299
|
-
|
137
|
+
logger.warning("[download] request failed: %s", e)
|
138
|
+
return None
|
139
|
+
|
140
|
+
# Write to disk
|
141
|
+
if stream:
|
142
|
+
try:
|
143
|
+
with open(save_path, "wb") as f:
|
144
|
+
for chunk in resp.iter_content(chunk_size=chunk_size):
|
145
|
+
if chunk:
|
146
|
+
f.write(chunk)
|
147
|
+
return save_path
|
148
|
+
except Exception as e:
|
149
|
+
logger.warning("[download] write failed: %s", e)
|
150
|
+
save_path.unlink(missing_ok=True)
|
151
|
+
return None
|
152
|
+
else:
|
153
|
+
return _write_file(
|
154
|
+
content=resp.content,
|
155
|
+
filepath=save_path,
|
156
|
+
write_mode="wb",
|
157
|
+
on_exist=on_exist,
|
158
|
+
)
|
300
159
|
return None
|
novel_downloader/utils/state.py
CHANGED
@@ -6,28 +6,29 @@ novel_downloader.utils.text_utils
|
|
6
6
|
Utility modules for text formatting, font mapping, cleaning, and diff display.
|
7
7
|
|
8
8
|
Submodules:
|
9
|
-
- font_mapping: Replace obfuscated characters using font maps
|
10
|
-
- chapter_formatting: Build structured chapter strings from raw content
|
11
|
-
- text_cleaning: Remove promo text and check for spam lines
|
12
9
|
- diff_display: Generate inline diffs with aligned character markers
|
10
|
+
- numeric_conversion: Convert between Chinese and Arabic numerals
|
11
|
+
- text_cleaner: Text cleaning and normalization utilities
|
12
|
+
- truncate_utils: Text truncation and content prefix generation
|
13
13
|
"""
|
14
14
|
|
15
|
-
from .chapter_formatting import format_chapter
|
16
|
-
from .diff_display import diff_inline_display
|
17
|
-
from .font_mapping import apply_font_mapping
|
18
|
-
from .text_cleaning import (
|
19
|
-
clean_chapter_title,
|
20
|
-
content_prefix,
|
21
|
-
is_promotional_line,
|
22
|
-
truncate_half_lines,
|
23
|
-
)
|
24
|
-
|
25
15
|
__all__ = [
|
26
|
-
"
|
27
|
-
"
|
28
|
-
"clean_chapter_title",
|
29
|
-
"is_promotional_line",
|
16
|
+
"TextCleaner",
|
17
|
+
"get_cleaner",
|
30
18
|
"content_prefix",
|
31
19
|
"truncate_half_lines",
|
20
|
+
"chinese_to_arabic",
|
21
|
+
"arabic_to_chinese",
|
32
22
|
"diff_inline_display",
|
33
23
|
]
|
24
|
+
|
25
|
+
from .diff_display import diff_inline_display
|
26
|
+
from .numeric_conversion import (
|
27
|
+
arabic_to_chinese,
|
28
|
+
chinese_to_arabic,
|
29
|
+
)
|
30
|
+
from .text_cleaner import TextCleaner, get_cleaner
|
31
|
+
from .truncate_utils import (
|
32
|
+
content_prefix,
|
33
|
+
truncate_half_lines,
|
34
|
+
)
|
@@ -6,6 +6,10 @@ novel_downloader.utils.text_utils.diff_display
|
|
6
6
|
Generate inline character-level diff between two strings with visual markers.
|
7
7
|
"""
|
8
8
|
|
9
|
+
__all__ = [
|
10
|
+
"diff_inline_display",
|
11
|
+
]
|
12
|
+
|
9
13
|
import difflib
|
10
14
|
import unicodedata
|
11
15
|
|
@@ -67,8 +71,3 @@ def diff_inline_display(old_str: str, new_str: str) -> str:
|
|
67
71
|
marker_s2 += "".join(_char_width_space(c, mark_1, mark_2) for c in s2_seg)
|
68
72
|
output_str = f"-{s1}\n {marker_s1}\n+{s2}\n {marker_s2}"
|
69
73
|
return output_str
|
70
|
-
|
71
|
-
|
72
|
-
__all__ = [
|
73
|
-
"diff_inline_display",
|
74
|
-
]
|