novel-downloader 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -44
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +40 -52
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +36 -39
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -24
- novel_downloader/core/exporters/epub_utils/__init__.py +40 -0
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -1
- novel_downloader/core/exporters/epub_utils/image_loader.py +131 -0
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -3
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +49 -2
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -1
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +178 -0
- novel_downloader/core/fetchers/linovelib/session.py +178 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +24 -17
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +15 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/file_utils/io.py +1 -1
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +53 -39
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +3 -3
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +72 -38
- novel_downloader-1.4.0.dist-info/RECORD +170 -0
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -218
- novel_downloader/core/downloaders/common/common_sync.py +0 -210
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -227
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/epub_utils/__init__.py +0 -26
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.2.dist-info/RECORD +0 -165
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.utils.cookies
|
4
|
+
------------------------------
|
5
|
+
|
6
|
+
Utility for normalizing cookie input from user configuration.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import json
|
10
|
+
from collections.abc import Mapping
|
11
|
+
from email.utils import parsedate_to_datetime
|
12
|
+
from http.cookies import SimpleCookie
|
13
|
+
from pathlib import Path
|
14
|
+
|
15
|
+
|
16
|
+
def resolve_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
|
17
|
+
"""
|
18
|
+
Parse cookies from a string or dictionary into a standard dictionary.
|
19
|
+
|
20
|
+
Supports input like:
|
21
|
+
- "key1=value1; key2=value2"
|
22
|
+
- {"key1": "value1", "key2": "value2"}
|
23
|
+
|
24
|
+
:param cookies: Cookie string or dict-like object (e.g., from config)
|
25
|
+
:return: A normalized cookie dictionary (key -> value)
|
26
|
+
:raises TypeError: If the input is neither string nor dict-like
|
27
|
+
"""
|
28
|
+
if isinstance(cookies, str):
|
29
|
+
filtered = "; ".join(pair for pair in cookies.split(";") if "=" in pair)
|
30
|
+
parsed = SimpleCookie()
|
31
|
+
parsed.load(filtered)
|
32
|
+
return {k: v.value for k, v in parsed.items()}
|
33
|
+
elif isinstance(cookies, Mapping):
|
34
|
+
return {str(k).strip(): str(v).strip() for k, v in cookies.items()}
|
35
|
+
raise TypeError("Unsupported cookie format: must be str or dict-like")
|
36
|
+
|
37
|
+
|
38
|
+
def parse_cookie_expires(value: str | None) -> int:
|
39
|
+
if not value:
|
40
|
+
return -1
|
41
|
+
try:
|
42
|
+
return int(value)
|
43
|
+
except (ValueError, TypeError):
|
44
|
+
try:
|
45
|
+
dt = parsedate_to_datetime(value)
|
46
|
+
return int(dt.timestamp())
|
47
|
+
except Exception:
|
48
|
+
return -1
|
49
|
+
|
50
|
+
|
51
|
+
def find_cookie_value(state_files: list[Path], key: str) -> str:
|
52
|
+
for state_file in state_files:
|
53
|
+
try:
|
54
|
+
with state_file.open("r", encoding="utf-8") as f:
|
55
|
+
data = json.load(f)
|
56
|
+
except Exception:
|
57
|
+
continue
|
58
|
+
|
59
|
+
cookies = data.get("cookies", [])
|
60
|
+
for cookie in cookies:
|
61
|
+
if cookie.get("name") != key:
|
62
|
+
continue
|
63
|
+
value = cookie.get("value")
|
64
|
+
if isinstance(value, str):
|
65
|
+
return value
|
66
|
+
return ""
|
@@ -1,3 +1,4 @@
|
|
1
|
+
#!/usr/bin/env python3
|
1
2
|
"""
|
2
3
|
novel_downloader.utils.crypto_utils
|
3
4
|
-----------------------------------
|
@@ -8,11 +9,6 @@ Generic cryptographic utilities
|
|
8
9
|
from __future__ import annotations
|
9
10
|
|
10
11
|
import base64
|
11
|
-
import hashlib
|
12
|
-
import json
|
13
|
-
import random
|
14
|
-
import time
|
15
|
-
from typing import Any
|
16
12
|
|
17
13
|
|
18
14
|
def rc4_crypt(
|
@@ -73,72 +69,3 @@ def rc4_crypt(
|
|
73
69
|
return plain_bytes.decode(encoding, errors="replace")
|
74
70
|
|
75
71
|
raise ValueError("Mode must be 'encrypt' or 'decrypt'.")
|
76
|
-
|
77
|
-
|
78
|
-
def _get_key() -> str:
|
79
|
-
encoded = "Lj1qYxMuaXBjMg=="
|
80
|
-
decoded = base64.b64decode(encoded)
|
81
|
-
key = "".join([chr(b ^ 0x5A) for b in decoded])
|
82
|
-
return key
|
83
|
-
|
84
|
-
|
85
|
-
def _d(b64str: str) -> str:
|
86
|
-
return base64.b64decode(b64str).decode()
|
87
|
-
|
88
|
-
|
89
|
-
def patch_qd_payload_token(
|
90
|
-
enc_token: str,
|
91
|
-
new_uri: str,
|
92
|
-
*,
|
93
|
-
key: str = "",
|
94
|
-
) -> str:
|
95
|
-
"""
|
96
|
-
Patch a timestamp-bearing token with fresh timing and checksum info.
|
97
|
-
|
98
|
-
:param enc_token: Encrypted token string from a live request.
|
99
|
-
:type enc_token: str
|
100
|
-
:param new_uri: URI used in checksum generation.
|
101
|
-
:type new_uri: str
|
102
|
-
:param key: RC4 key extracted from front-end JavaScript (optional).
|
103
|
-
:type key: str, optional
|
104
|
-
|
105
|
-
:return: Updated token with new timing and checksum values.
|
106
|
-
:rtype: str
|
107
|
-
"""
|
108
|
-
if not key:
|
109
|
-
key = _get_key()
|
110
|
-
|
111
|
-
# Step 1 - decrypt --------------------------------------------------
|
112
|
-
decrypted_json: str = rc4_crypt(key, enc_token, mode="decrypt")
|
113
|
-
payload: dict[str, Any] = json.loads(decrypted_json)
|
114
|
-
|
115
|
-
# Step 2 - rebuild timing fields -----------------------------------
|
116
|
-
loadts = int(time.time() * 1000) # ms since epoch
|
117
|
-
# Simulate the JS duration: N(600, 150) pushed into [300, 1000]
|
118
|
-
duration = max(300, min(1000, int(random.normalvariate(600, 150))))
|
119
|
-
timestamp = loadts + duration
|
120
|
-
|
121
|
-
# Step 3 - recalculate ------------------------------------
|
122
|
-
fp_key = _d("ZmluZ2VycHJpbnQ=")
|
123
|
-
ab_key = _d("YWJub3JtYWw=")
|
124
|
-
ck_key = _d("Y2hlY2tzdW0=")
|
125
|
-
lt_key = _d("bG9hZHRz")
|
126
|
-
ts_key = _d("dGltZXN0YW1w")
|
127
|
-
|
128
|
-
fp_val = payload.get(fp_key, "")
|
129
|
-
ab_val = payload.get(ab_key, "0" * 32)
|
130
|
-
comb = f"{new_uri}{loadts}{fp_val}"
|
131
|
-
ck_val = hashlib.md5(comb.encode("utf-8")).hexdigest()
|
132
|
-
|
133
|
-
new_payload = {
|
134
|
-
lt_key: loadts,
|
135
|
-
ts_key: timestamp,
|
136
|
-
fp_key: fp_val,
|
137
|
-
ab_key: ab_val,
|
138
|
-
ck_key: ck_val,
|
139
|
-
}
|
140
|
-
|
141
|
-
# Step 4 - encrypt and return --------------------------------------
|
142
|
-
return rc4_crypt(
|
143
|
-
key, json.dumps(new_payload, separators=(",", ":")), mode="encrypt"
|
144
|
-
)
|
@@ -103,7 +103,7 @@ def _write_file(
|
|
103
103
|
tmp.write(content_to_write)
|
104
104
|
tmp_path = Path(tmp.name)
|
105
105
|
tmp_path.replace(path)
|
106
|
-
logger.
|
106
|
+
logger.debug("[file] '%s' written successfully", path)
|
107
107
|
return True
|
108
108
|
except Exception as exc:
|
109
109
|
logger.warning("[file] Error writing %r: %s", path, exc)
|
@@ -18,6 +18,7 @@ import paddle
|
|
18
18
|
from fontTools.ttLib import TTFont
|
19
19
|
from paddleocr import PaddleOCR
|
20
20
|
from PIL import Image, ImageDraw, ImageFont
|
21
|
+
from PIL.Image import Transpose
|
21
22
|
|
22
23
|
from novel_downloader.utils.constants import (
|
23
24
|
REC_CHAR_MODEL_FILES,
|
@@ -142,7 +143,7 @@ class FontOCRV1:
|
|
142
143
|
y = (size - h) // 2 - bbox[1]
|
143
144
|
draw.text((x, y), char, fill=0, font=render_font)
|
144
145
|
if is_reflect:
|
145
|
-
img = img.transpose(
|
146
|
+
img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
|
146
147
|
|
147
148
|
img_np = np.array(img)
|
148
149
|
if np.unique(img_np).size == 1:
|
@@ -570,7 +570,7 @@ class FontOCRV2:
|
|
570
570
|
else:
|
571
571
|
ocr_fallback = raw_ocr
|
572
572
|
|
573
|
-
# Vec
|
573
|
+
# Vec-embedding scores
|
574
574
|
raw_vec: list[tuple[str, float]] | list[list[tuple[str, float]]] = (
|
575
575
|
self.match_text_by_embedding(fallback_imgs, top_k=top_k)
|
576
576
|
if (self.use_vec and fallback_imgs)
|
@@ -624,7 +624,7 @@ class FontOCRV2:
|
|
624
624
|
else:
|
625
625
|
fused_batch.append(next(fallback_iter))
|
626
626
|
|
627
|
-
# Unwrap single
|
627
|
+
# Unwrap single-image case
|
628
628
|
return fused_batch[0] if single else fused_batch
|
629
629
|
|
630
630
|
def _chunked(self, seq: list[T], size: int) -> Generator[list[T], None, None]:
|
@@ -13,7 +13,6 @@ import logging
|
|
13
13
|
from collections.abc import Callable
|
14
14
|
from pathlib import Path
|
15
15
|
|
16
|
-
import numpy as np
|
17
16
|
from PIL import Image
|
18
17
|
|
19
18
|
from .constants import HASH_STORE_FILE
|
@@ -100,25 +99,21 @@ class ImageHashStore:
|
|
100
99
|
"""Load store from disk and rebuild BK-Tree index."""
|
101
100
|
if not self._path.exists():
|
102
101
|
self._hash.clear()
|
103
|
-
logger.
|
102
|
+
logger.debug(
|
104
103
|
"[ImageHashStore] No file found at %s, starting empty.", self._path
|
105
104
|
)
|
106
105
|
return
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
else:
|
112
|
-
txt = self._path.read_text(encoding="utf-8")
|
113
|
-
obj = json.loads(txt) or {}
|
114
|
-
self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
|
106
|
+
|
107
|
+
txt = self._path.read_text(encoding="utf-8")
|
108
|
+
obj = json.loads(txt) or {}
|
109
|
+
self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
|
115
110
|
|
116
111
|
# rebuild reverse map and BK-Tree
|
117
112
|
self._hash_to_labels.clear()
|
118
113
|
for lbl, hs in self._hash.items():
|
119
114
|
for h in hs:
|
120
115
|
self._hash_to_labels.setdefault(h, []).append(lbl)
|
121
|
-
logger.
|
116
|
+
logger.debug(
|
122
117
|
"[ImageHashStore] Loaded hash store from %s with %d hashes",
|
123
118
|
self._path,
|
124
119
|
sum(len(v) for v in self._hash.values()),
|
@@ -134,7 +129,7 @@ class ImageHashStore:
|
|
134
129
|
self._bk_root = _BKNode(h)
|
135
130
|
else:
|
136
131
|
self._bk_root.add(h, self._hd)
|
137
|
-
logger.
|
132
|
+
logger.debug(
|
138
133
|
"[ImageHashStore] BK-tree index built with %d unique hashes",
|
139
134
|
len(self._hash_to_labels),
|
140
135
|
)
|
@@ -143,12 +138,9 @@ class ImageHashStore:
|
|
143
138
|
"""Persist current store to disk."""
|
144
139
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
145
140
|
data = {lbl: list(s) for lbl, s in self._hash.items()}
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
txt = json.dumps(data, ensure_ascii=False, indent=2)
|
150
|
-
self._path.write_text(txt, encoding="utf-8")
|
151
|
-
logger.info("[ImageHashStore] Saved hash store to %s", self._path)
|
141
|
+
txt = json.dumps(data, ensure_ascii=False, indent=2)
|
142
|
+
self._path.write_text(txt, encoding="utf-8")
|
143
|
+
logger.debug("[ImageHashStore] Saved hash store to %s", self._path)
|
152
144
|
|
153
145
|
def _maybe_save(self) -> None:
|
154
146
|
if self._auto:
|
@@ -16,6 +16,7 @@ Provides:
|
|
16
16
|
"""
|
17
17
|
|
18
18
|
import numpy as np
|
19
|
+
from numpy.typing import NDArray
|
19
20
|
from PIL import Image
|
20
21
|
from scipy.fft import dct as dct_1d
|
21
22
|
|
@@ -24,7 +25,7 @@ HASH_SIZE = 10 # default is 8
|
|
24
25
|
HASH_DISTANCE_THRESHOLD = 5
|
25
26
|
|
26
27
|
|
27
|
-
def hash_to_int(hash_array: np.
|
28
|
+
def hash_to_int(hash_array: NDArray[np.bool_]) -> int:
|
28
29
|
"""
|
29
30
|
Convert a boolean hash array to an integer.
|
30
31
|
|
@@ -60,7 +61,7 @@ def fast_hamming_distance(hash_1: int, hash_2: int) -> int:
|
|
60
61
|
return count
|
61
62
|
|
62
63
|
|
63
|
-
def _threshold_and_pack(dct_low: np.
|
64
|
+
def _threshold_and_pack(dct_low: NDArray[np.float64]) -> int:
|
64
65
|
"""
|
65
66
|
Convert a low-frequency DCT matrix into a binary hash.
|
66
67
|
|
novel_downloader/utils/logger.py
CHANGED
@@ -11,11 +11,10 @@ import logging
|
|
11
11
|
from datetime import datetime
|
12
12
|
from logging.handlers import TimedRotatingFileHandler
|
13
13
|
from pathlib import Path
|
14
|
-
from typing import Literal
|
15
14
|
|
16
|
-
from .
|
15
|
+
from novel_downloader.models import LogLevel
|
17
16
|
|
18
|
-
|
17
|
+
from .constants import LOGGER_DIR, LOGGER_NAME
|
19
18
|
|
20
19
|
LOG_LEVELS: dict[LogLevel, int] = {
|
21
20
|
"DEBUG": logging.DEBUG,
|
@@ -16,7 +16,7 @@ from urllib.parse import unquote, urlparse
|
|
16
16
|
import requests
|
17
17
|
|
18
18
|
from .constants import DEFAULT_HEADERS, DEFAULT_IMAGE_SUFFIX
|
19
|
-
from .file_utils.io import _get_non_conflicting_path, _write_file
|
19
|
+
from .file_utils.io import _get_non_conflicting_path, _write_file
|
20
20
|
|
21
21
|
logger = logging.getLogger(__name__)
|
22
22
|
|
@@ -84,28 +84,29 @@ def image_url_to_filename(url: str) -> str:
|
|
84
84
|
return filename
|
85
85
|
|
86
86
|
|
87
|
-
def
|
87
|
+
def download_image(
|
88
88
|
url: str,
|
89
89
|
target_folder: str | Path | None = None,
|
90
|
+
target_name: str | None = None,
|
90
91
|
*,
|
91
92
|
timeout: int = 10,
|
92
93
|
retries: int = 3,
|
93
94
|
backoff: float = 0.5,
|
95
|
+
headers: dict[str, str] | None = None,
|
94
96
|
on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
|
95
|
-
) ->
|
97
|
+
) -> Path | None:
|
96
98
|
"""
|
97
|
-
Download an image from
|
98
|
-
|
99
|
-
If on_exist='skip' and the file already exists, it will be read from disk
|
100
|
-
instead of being downloaded again.
|
99
|
+
Download an image from `url` and save it to `target_folder`, returning the Path.
|
100
|
+
Can override the filename via `target_name`.
|
101
101
|
|
102
102
|
:param url: Image URL. Can start with 'http', '//', or without protocol.
|
103
|
-
:param target_folder:
|
103
|
+
:param target_folder: Directory to save into (defaults to cwd).
|
104
|
+
:param target_name: Optional filename (with or without extension).
|
104
105
|
:param timeout: Request timeout in seconds.
|
105
106
|
:param retries: Number of retry attempts.
|
106
107
|
:param backoff: Base delay between retries (exponential backoff).
|
107
108
|
:param on_exist: What to do if file exists: 'overwrite', 'skip', or 'rename'.
|
108
|
-
:return:
|
109
|
+
:return: Path to the saved image, or `None` on any failure.
|
109
110
|
"""
|
110
111
|
# Normalize URL
|
111
112
|
if url.startswith("//"):
|
@@ -113,42 +114,55 @@ def download_image_as_bytes(
|
|
113
114
|
elif not url.startswith("http"):
|
114
115
|
url = "https://" + url
|
115
116
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
117
|
+
folder = Path(target_folder) if target_folder else Path.cwd()
|
118
|
+
folder.mkdir(parents=True, exist_ok=True)
|
119
|
+
|
120
|
+
if target_name:
|
121
|
+
name = target_name
|
122
|
+
if not Path(name).suffix:
|
123
|
+
# infer ext from URL-derived name
|
124
|
+
name += Path(image_url_to_filename(url)).suffix
|
125
|
+
else:
|
126
|
+
name = image_url_to_filename(url)
|
127
|
+
save_path = folder / name
|
128
|
+
|
129
|
+
# Handle existing file
|
130
|
+
if save_path.exists():
|
131
|
+
if on_exist == "skip":
|
132
|
+
logger.debug("Skipping download; file exists: %s", save_path)
|
133
|
+
return save_path
|
134
|
+
if on_exist == "rename":
|
135
|
+
save_path = _get_non_conflicting_path(save_path)
|
128
136
|
|
129
137
|
# Proceed with download
|
130
|
-
|
138
|
+
resp = http_get_with_retry(
|
131
139
|
url,
|
132
140
|
retries=retries,
|
133
141
|
timeout=timeout,
|
134
142
|
backoff=backoff,
|
135
|
-
headers=DEFAULT_HEADERS,
|
143
|
+
headers=headers or DEFAULT_HEADERS,
|
136
144
|
stream=False,
|
137
145
|
)
|
138
146
|
|
139
|
-
if
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
mode="wb",
|
147
|
-
on_exist=on_exist,
|
148
|
-
)
|
149
|
-
|
150
|
-
return content
|
147
|
+
if not (resp and resp.ok):
|
148
|
+
logger.warning(
|
149
|
+
"Failed to download %s (status=%s)",
|
150
|
+
url,
|
151
|
+
getattr(resp, "status_code", None),
|
152
|
+
)
|
153
|
+
return None
|
151
154
|
|
155
|
+
# Write to disk
|
156
|
+
try:
|
157
|
+
_write_file(
|
158
|
+
content=resp.content,
|
159
|
+
filepath=save_path,
|
160
|
+
mode="wb",
|
161
|
+
on_exist=on_exist,
|
162
|
+
)
|
163
|
+
return save_path
|
164
|
+
except Exception:
|
165
|
+
logger.exception("Error saving image to %s", save_path)
|
152
166
|
return None
|
153
167
|
|
154
168
|
|
@@ -191,7 +205,7 @@ def download_font_file(
|
|
191
205
|
|
192
206
|
# If skip and file exists -> return immediately
|
193
207
|
if on_exist == "skip" and font_path.exists():
|
194
|
-
logger.
|
208
|
+
logger.debug("[font] File exists, skipping download: %s", font_path)
|
195
209
|
return font_path
|
196
210
|
|
197
211
|
# Retry download with exponential backoff
|
@@ -214,7 +228,7 @@ def download_font_file(
|
|
214
228
|
if chunk:
|
215
229
|
f.write(chunk)
|
216
230
|
|
217
|
-
logger.
|
231
|
+
logger.debug("[font] Font saved to: %s", font_path)
|
218
232
|
return font_path
|
219
233
|
|
220
234
|
except Exception as e:
|
@@ -258,7 +272,7 @@ def download_js_file(
|
|
258
272
|
save_path = target_folder / filename
|
259
273
|
|
260
274
|
if on_exist == "skip" and save_path.exists():
|
261
|
-
logger.
|
275
|
+
logger.debug("[js] File exists, skipping download: %s", save_path)
|
262
276
|
return save_path
|
263
277
|
|
264
278
|
response = http_get_with_retry(
|
@@ -278,7 +292,7 @@ def download_js_file(
|
|
278
292
|
|
279
293
|
try:
|
280
294
|
_write_file(content=content, filepath=save_path, mode="wb")
|
281
|
-
logger.
|
295
|
+
logger.debug("[js] JS file saved to: %s", save_path)
|
282
296
|
return save_path
|
283
297
|
except Exception as e:
|
284
298
|
logger.error("[js] Error writing JS to disk: %s", e)
|
@@ -6,13 +6,17 @@ novel_downloader.utils.text_utils.chapter_formatting
|
|
6
6
|
Format chapter content with title, paragraph blocks, and optional author notes.
|
7
7
|
"""
|
8
8
|
|
9
|
+
import re
|
10
|
+
|
11
|
+
_IMG_TAG_RE = re.compile(r"<img[^>]*>")
|
12
|
+
|
9
13
|
|
10
14
|
def format_chapter(title: str, paragraphs: str, author_say: str | None = None) -> str:
|
11
15
|
"""
|
12
16
|
Build a formatted chapter string with title, paragraphs, and optional author note.
|
13
17
|
|
14
18
|
:param title: The chapter title.
|
15
|
-
:param paragraphs: Raw multi
|
19
|
+
:param paragraphs: Raw multi-line string; lines are treated as paragraphs.
|
16
20
|
:param author_say: Optional author comment to append at the end.
|
17
21
|
:return: A single string where title, paragraphs, and author note
|
18
22
|
are separated by blank lines.
|
@@ -20,6 +24,7 @@ def format_chapter(title: str, paragraphs: str, author_say: str | None = None) -
|
|
20
24
|
parts: list[str] = [title.strip()]
|
21
25
|
|
22
26
|
# add each nonempty paragraph line
|
27
|
+
paragraphs = _IMG_TAG_RE.sub("", paragraphs)
|
23
28
|
for ln in paragraphs.splitlines():
|
24
29
|
line = ln.strip()
|
25
30
|
if line:
|
@@ -18,7 +18,7 @@ def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
|
|
18
18
|
|
19
19
|
:param text: The input string, possibly containing obfuscated font chars.
|
20
20
|
:param font_map: A dict mapping obfuscated chars to real chars.
|
21
|
-
:return: The de
|
21
|
+
:return: The de-obfuscated text.
|
22
22
|
"""
|
23
23
|
return "".join(font_map.get(ch, ch) for ch in text)
|
24
24
|
|
@@ -37,7 +37,7 @@ def clean_chapter_title(title: str) -> str:
|
|
37
37
|
|
38
38
|
def is_promotional_line(line: str) -> bool:
|
39
39
|
"""
|
40
|
-
Check if a line of text likely contains promotional or ad
|
40
|
+
Check if a line of text likely contains promotional or ad-like content.
|
41
41
|
|
42
42
|
:param line: A single line of text.
|
43
43
|
:return: True if it contains promo keywords or a '###k' vote count pattern.
|
@@ -106,9 +106,9 @@ def calculate_time_difference(
|
|
106
106
|
"""
|
107
107
|
Calculate the difference between two datetime values.
|
108
108
|
|
109
|
-
:param from_time_str: Date
|
109
|
+
:param from_time_str: Date-time string "YYYY-MM-DD HH:MM:SS" for the start.
|
110
110
|
:param tz_str: Timezone of from_time_str, e.g. 'UTC+8'. Defaults to 'UTC'.
|
111
|
-
:param to_time_str: Optional date
|
111
|
+
:param to_time_str: Optional date-time string for the end; if None, uses now().
|
112
112
|
:param to_tz_str: Timezone of to_time_str. Defaults to 'UTC'.
|
113
113
|
:return: Tuple (days, hours, minutes, seconds).
|
114
114
|
"""
|
@@ -138,7 +138,7 @@ def calculate_time_difference(
|
|
138
138
|
|
139
139
|
except Exception as e:
|
140
140
|
logger.warning("[time] Failed to calculate time difference: %s", e)
|
141
|
-
return
|
141
|
+
return 0, 0, 0, 0
|
142
142
|
|
143
143
|
|
144
144
|
__all__ = [
|
@@ -56,7 +56,7 @@ def sleep_with_random_delay(
|
|
56
56
|
if max_sleep is not None:
|
57
57
|
duration = min(duration, max_sleep)
|
58
58
|
|
59
|
-
logger.
|
59
|
+
logger.debug("[time] Sleeping for %.2f seconds", duration)
|
60
60
|
time.sleep(duration)
|
61
61
|
return
|
62
62
|
|
@@ -82,7 +82,7 @@ async def async_sleep_with_random_delay(
|
|
82
82
|
:param mul_spread: Maximum multiplier factor for base; drawn from [1.0, mul_spread].
|
83
83
|
:param max_sleep: Optional upper limit for the final sleep duration.
|
84
84
|
"""
|
85
|
-
if base < 0 or add_spread < 0 or mul_spread < 0:
|
85
|
+
if base < 0 or add_spread < 0 or mul_spread < 1.0:
|
86
86
|
logger.warning(
|
87
87
|
"[async sleep] Invalid parameters: base=%s, add_spread=%s, mul_spread=%s",
|
88
88
|
base,
|
@@ -98,7 +98,7 @@ async def async_sleep_with_random_delay(
|
|
98
98
|
if max_sleep is not None:
|
99
99
|
duration = min(duration, max_sleep)
|
100
100
|
|
101
|
-
logger.
|
101
|
+
logger.debug("[async time] Sleeping for %.2f seconds", duration)
|
102
102
|
await asyncio.sleep(duration)
|
103
103
|
|
104
104
|
|