novel-downloader 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -39
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
- novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
- novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +178 -0
- novel_downloader/core/fetchers/linovelib/session.py +178 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +11 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +2 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +69 -35
- novel_downloader-1.4.0.dist-info/RECORD +170 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -210
- novel_downloader/core/downloaders/common/common_sync.py +0 -202
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.3.dist-info/RECORD +0 -166
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.utils.cookies
|
4
|
+
------------------------------
|
5
|
+
|
6
|
+
Utility for normalizing cookie input from user configuration.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import json
|
10
|
+
from collections.abc import Mapping
|
11
|
+
from email.utils import parsedate_to_datetime
|
12
|
+
from http.cookies import SimpleCookie
|
13
|
+
from pathlib import Path
|
14
|
+
|
15
|
+
|
16
|
+
def resolve_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
|
17
|
+
"""
|
18
|
+
Parse cookies from a string or dictionary into a standard dictionary.
|
19
|
+
|
20
|
+
Supports input like:
|
21
|
+
- "key1=value1; key2=value2"
|
22
|
+
- {"key1": "value1", "key2": "value2"}
|
23
|
+
|
24
|
+
:param cookies: Cookie string or dict-like object (e.g., from config)
|
25
|
+
:return: A normalized cookie dictionary (key -> value)
|
26
|
+
:raises TypeError: If the input is neither string nor dict-like
|
27
|
+
"""
|
28
|
+
if isinstance(cookies, str):
|
29
|
+
filtered = "; ".join(pair for pair in cookies.split(";") if "=" in pair)
|
30
|
+
parsed = SimpleCookie()
|
31
|
+
parsed.load(filtered)
|
32
|
+
return {k: v.value for k, v in parsed.items()}
|
33
|
+
elif isinstance(cookies, Mapping):
|
34
|
+
return {str(k).strip(): str(v).strip() for k, v in cookies.items()}
|
35
|
+
raise TypeError("Unsupported cookie format: must be str or dict-like")
|
36
|
+
|
37
|
+
|
38
|
+
def parse_cookie_expires(value: str | None) -> int:
|
39
|
+
if not value:
|
40
|
+
return -1
|
41
|
+
try:
|
42
|
+
return int(value)
|
43
|
+
except (ValueError, TypeError):
|
44
|
+
try:
|
45
|
+
dt = parsedate_to_datetime(value)
|
46
|
+
return int(dt.timestamp())
|
47
|
+
except Exception:
|
48
|
+
return -1
|
49
|
+
|
50
|
+
|
51
|
+
def find_cookie_value(state_files: list[Path], key: str) -> str:
|
52
|
+
for state_file in state_files:
|
53
|
+
try:
|
54
|
+
with state_file.open("r", encoding="utf-8") as f:
|
55
|
+
data = json.load(f)
|
56
|
+
except Exception:
|
57
|
+
continue
|
58
|
+
|
59
|
+
cookies = data.get("cookies", [])
|
60
|
+
for cookie in cookies:
|
61
|
+
if cookie.get("name") != key:
|
62
|
+
continue
|
63
|
+
value = cookie.get("value")
|
64
|
+
if isinstance(value, str):
|
65
|
+
return value
|
66
|
+
return ""
|
@@ -1,3 +1,4 @@
|
|
1
|
+
#!/usr/bin/env python3
|
1
2
|
"""
|
2
3
|
novel_downloader.utils.crypto_utils
|
3
4
|
-----------------------------------
|
@@ -8,11 +9,6 @@ Generic cryptographic utilities
|
|
8
9
|
from __future__ import annotations
|
9
10
|
|
10
11
|
import base64
|
11
|
-
import hashlib
|
12
|
-
import json
|
13
|
-
import random
|
14
|
-
import time
|
15
|
-
from typing import Any
|
16
12
|
|
17
13
|
|
18
14
|
def rc4_crypt(
|
@@ -73,72 +69,3 @@ def rc4_crypt(
|
|
73
69
|
return plain_bytes.decode(encoding, errors="replace")
|
74
70
|
|
75
71
|
raise ValueError("Mode must be 'encrypt' or 'decrypt'.")
|
76
|
-
|
77
|
-
|
78
|
-
def _get_key() -> str:
|
79
|
-
encoded = "Lj1qYxMuaXBjMg=="
|
80
|
-
decoded = base64.b64decode(encoded)
|
81
|
-
key = "".join([chr(b ^ 0x5A) for b in decoded])
|
82
|
-
return key
|
83
|
-
|
84
|
-
|
85
|
-
def _d(b64str: str) -> str:
|
86
|
-
return base64.b64decode(b64str).decode()
|
87
|
-
|
88
|
-
|
89
|
-
def patch_qd_payload_token(
|
90
|
-
enc_token: str,
|
91
|
-
new_uri: str,
|
92
|
-
*,
|
93
|
-
key: str = "",
|
94
|
-
) -> str:
|
95
|
-
"""
|
96
|
-
Patch a timestamp-bearing token with fresh timing and checksum info.
|
97
|
-
|
98
|
-
:param enc_token: Encrypted token string from a live request.
|
99
|
-
:type enc_token: str
|
100
|
-
:param new_uri: URI used in checksum generation.
|
101
|
-
:type new_uri: str
|
102
|
-
:param key: RC4 key extracted from front-end JavaScript (optional).
|
103
|
-
:type key: str, optional
|
104
|
-
|
105
|
-
:return: Updated token with new timing and checksum values.
|
106
|
-
:rtype: str
|
107
|
-
"""
|
108
|
-
if not key:
|
109
|
-
key = _get_key()
|
110
|
-
|
111
|
-
# Step 1 - decrypt --------------------------------------------------
|
112
|
-
decrypted_json: str = rc4_crypt(key, enc_token, mode="decrypt")
|
113
|
-
payload: dict[str, Any] = json.loads(decrypted_json)
|
114
|
-
|
115
|
-
# Step 2 - rebuild timing fields -----------------------------------
|
116
|
-
loadts = int(time.time() * 1000) # ms since epoch
|
117
|
-
# Simulate the JS duration: N(600, 150) pushed into [300, 1000]
|
118
|
-
duration = max(300, min(1000, int(random.normalvariate(600, 150))))
|
119
|
-
timestamp = loadts + duration
|
120
|
-
|
121
|
-
# Step 3 - recalculate ------------------------------------
|
122
|
-
fp_key = _d("ZmluZ2VycHJpbnQ=")
|
123
|
-
ab_key = _d("YWJub3JtYWw=")
|
124
|
-
ck_key = _d("Y2hlY2tzdW0=")
|
125
|
-
lt_key = _d("bG9hZHRz")
|
126
|
-
ts_key = _d("dGltZXN0YW1w")
|
127
|
-
|
128
|
-
fp_val = payload.get(fp_key, "")
|
129
|
-
ab_val = payload.get(ab_key, "0" * 32)
|
130
|
-
comb = f"{new_uri}{loadts}{fp_val}"
|
131
|
-
ck_val = hashlib.md5(comb.encode("utf-8")).hexdigest()
|
132
|
-
|
133
|
-
new_payload = {
|
134
|
-
lt_key: loadts,
|
135
|
-
ts_key: timestamp,
|
136
|
-
fp_key: fp_val,
|
137
|
-
ab_key: ab_val,
|
138
|
-
ck_key: ck_val,
|
139
|
-
}
|
140
|
-
|
141
|
-
# Step 4 - encrypt and return --------------------------------------
|
142
|
-
return rc4_crypt(
|
143
|
-
key, json.dumps(new_payload, separators=(",", ":")), mode="encrypt"
|
144
|
-
)
|
@@ -18,6 +18,7 @@ import paddle
|
|
18
18
|
from fontTools.ttLib import TTFont
|
19
19
|
from paddleocr import PaddleOCR
|
20
20
|
from PIL import Image, ImageDraw, ImageFont
|
21
|
+
from PIL.Image import Transpose
|
21
22
|
|
22
23
|
from novel_downloader.utils.constants import (
|
23
24
|
REC_CHAR_MODEL_FILES,
|
@@ -142,7 +143,7 @@ class FontOCRV1:
|
|
142
143
|
y = (size - h) // 2 - bbox[1]
|
143
144
|
draw.text((x, y), char, fill=0, font=render_font)
|
144
145
|
if is_reflect:
|
145
|
-
img = img.transpose(
|
146
|
+
img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
|
146
147
|
|
147
148
|
img_np = np.array(img)
|
148
149
|
if np.unique(img_np).size == 1:
|
@@ -570,7 +570,7 @@ class FontOCRV2:
|
|
570
570
|
else:
|
571
571
|
ocr_fallback = raw_ocr
|
572
572
|
|
573
|
-
# Vec
|
573
|
+
# Vec-embedding scores
|
574
574
|
raw_vec: list[tuple[str, float]] | list[list[tuple[str, float]]] = (
|
575
575
|
self.match_text_by_embedding(fallback_imgs, top_k=top_k)
|
576
576
|
if (self.use_vec and fallback_imgs)
|
@@ -624,7 +624,7 @@ class FontOCRV2:
|
|
624
624
|
else:
|
625
625
|
fused_batch.append(next(fallback_iter))
|
626
626
|
|
627
|
-
# Unwrap single
|
627
|
+
# Unwrap single-image case
|
628
628
|
return fused_batch[0] if single else fused_batch
|
629
629
|
|
630
630
|
def _chunked(self, seq: list[T], size: int) -> Generator[list[T], None, None]:
|
@@ -13,7 +13,6 @@ import logging
|
|
13
13
|
from collections.abc import Callable
|
14
14
|
from pathlib import Path
|
15
15
|
|
16
|
-
import numpy as np
|
17
16
|
from PIL import Image
|
18
17
|
|
19
18
|
from .constants import HASH_STORE_FILE
|
@@ -100,25 +99,21 @@ class ImageHashStore:
|
|
100
99
|
"""Load store from disk and rebuild BK-Tree index."""
|
101
100
|
if not self._path.exists():
|
102
101
|
self._hash.clear()
|
103
|
-
logger.
|
102
|
+
logger.debug(
|
104
103
|
"[ImageHashStore] No file found at %s, starting empty.", self._path
|
105
104
|
)
|
106
105
|
return
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
else:
|
112
|
-
txt = self._path.read_text(encoding="utf-8")
|
113
|
-
obj = json.loads(txt) or {}
|
114
|
-
self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
|
106
|
+
|
107
|
+
txt = self._path.read_text(encoding="utf-8")
|
108
|
+
obj = json.loads(txt) or {}
|
109
|
+
self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
|
115
110
|
|
116
111
|
# rebuild reverse map and BK-Tree
|
117
112
|
self._hash_to_labels.clear()
|
118
113
|
for lbl, hs in self._hash.items():
|
119
114
|
for h in hs:
|
120
115
|
self._hash_to_labels.setdefault(h, []).append(lbl)
|
121
|
-
logger.
|
116
|
+
logger.debug(
|
122
117
|
"[ImageHashStore] Loaded hash store from %s with %d hashes",
|
123
118
|
self._path,
|
124
119
|
sum(len(v) for v in self._hash.values()),
|
@@ -134,7 +129,7 @@ class ImageHashStore:
|
|
134
129
|
self._bk_root = _BKNode(h)
|
135
130
|
else:
|
136
131
|
self._bk_root.add(h, self._hd)
|
137
|
-
logger.
|
132
|
+
logger.debug(
|
138
133
|
"[ImageHashStore] BK-tree index built with %d unique hashes",
|
139
134
|
len(self._hash_to_labels),
|
140
135
|
)
|
@@ -143,12 +138,9 @@ class ImageHashStore:
|
|
143
138
|
"""Persist current store to disk."""
|
144
139
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
145
140
|
data = {lbl: list(s) for lbl, s in self._hash.items()}
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
txt = json.dumps(data, ensure_ascii=False, indent=2)
|
150
|
-
self._path.write_text(txt, encoding="utf-8")
|
151
|
-
logger.info("[ImageHashStore] Saved hash store to %s", self._path)
|
141
|
+
txt = json.dumps(data, ensure_ascii=False, indent=2)
|
142
|
+
self._path.write_text(txt, encoding="utf-8")
|
143
|
+
logger.debug("[ImageHashStore] Saved hash store to %s", self._path)
|
152
144
|
|
153
145
|
def _maybe_save(self) -> None:
|
154
146
|
if self._auto:
|
@@ -16,6 +16,7 @@ Provides:
|
|
16
16
|
"""
|
17
17
|
|
18
18
|
import numpy as np
|
19
|
+
from numpy.typing import NDArray
|
19
20
|
from PIL import Image
|
20
21
|
from scipy.fft import dct as dct_1d
|
21
22
|
|
@@ -24,7 +25,7 @@ HASH_SIZE = 10 # default is 8
|
|
24
25
|
HASH_DISTANCE_THRESHOLD = 5
|
25
26
|
|
26
27
|
|
27
|
-
def hash_to_int(hash_array: np.
|
28
|
+
def hash_to_int(hash_array: NDArray[np.bool_]) -> int:
|
28
29
|
"""
|
29
30
|
Convert a boolean hash array to an integer.
|
30
31
|
|
@@ -60,7 +61,7 @@ def fast_hamming_distance(hash_1: int, hash_2: int) -> int:
|
|
60
61
|
return count
|
61
62
|
|
62
63
|
|
63
|
-
def _threshold_and_pack(dct_low: np.
|
64
|
+
def _threshold_and_pack(dct_low: NDArray[np.float64]) -> int:
|
64
65
|
"""
|
65
66
|
Convert a low-frequency DCT matrix into a binary hash.
|
66
67
|
|
novel_downloader/utils/logger.py
CHANGED
@@ -11,11 +11,10 @@ import logging
|
|
11
11
|
from datetime import datetime
|
12
12
|
from logging.handlers import TimedRotatingFileHandler
|
13
13
|
from pathlib import Path
|
14
|
-
from typing import Literal
|
15
14
|
|
16
|
-
from .
|
15
|
+
from novel_downloader.models import LogLevel
|
17
16
|
|
18
|
-
|
17
|
+
from .constants import LOGGER_DIR, LOGGER_NAME
|
19
18
|
|
20
19
|
LOG_LEVELS: dict[LogLevel, int] = {
|
21
20
|
"DEBUG": logging.DEBUG,
|
@@ -92,6 +92,7 @@ def download_image(
|
|
92
92
|
timeout: int = 10,
|
93
93
|
retries: int = 3,
|
94
94
|
backoff: float = 0.5,
|
95
|
+
headers: dict[str, str] | None = None,
|
95
96
|
on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
|
96
97
|
) -> Path | None:
|
97
98
|
"""
|
@@ -139,7 +140,7 @@ def download_image(
|
|
139
140
|
retries=retries,
|
140
141
|
timeout=timeout,
|
141
142
|
backoff=backoff,
|
142
|
-
headers=DEFAULT_HEADERS,
|
143
|
+
headers=headers or DEFAULT_HEADERS,
|
143
144
|
stream=False,
|
144
145
|
)
|
145
146
|
|
@@ -6,13 +6,17 @@ novel_downloader.utils.text_utils.chapter_formatting
|
|
6
6
|
Format chapter content with title, paragraph blocks, and optional author notes.
|
7
7
|
"""
|
8
8
|
|
9
|
+
import re
|
10
|
+
|
11
|
+
_IMG_TAG_RE = re.compile(r"<img[^>]*>")
|
12
|
+
|
9
13
|
|
10
14
|
def format_chapter(title: str, paragraphs: str, author_say: str | None = None) -> str:
|
11
15
|
"""
|
12
16
|
Build a formatted chapter string with title, paragraphs, and optional author note.
|
13
17
|
|
14
18
|
:param title: The chapter title.
|
15
|
-
:param paragraphs: Raw multi
|
19
|
+
:param paragraphs: Raw multi-line string; lines are treated as paragraphs.
|
16
20
|
:param author_say: Optional author comment to append at the end.
|
17
21
|
:return: A single string where title, paragraphs, and author note
|
18
22
|
are separated by blank lines.
|
@@ -20,6 +24,7 @@ def format_chapter(title: str, paragraphs: str, author_say: str | None = None) -
|
|
20
24
|
parts: list[str] = [title.strip()]
|
21
25
|
|
22
26
|
# add each nonempty paragraph line
|
27
|
+
paragraphs = _IMG_TAG_RE.sub("", paragraphs)
|
23
28
|
for ln in paragraphs.splitlines():
|
24
29
|
line = ln.strip()
|
25
30
|
if line:
|
@@ -18,7 +18,7 @@ def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
|
|
18
18
|
|
19
19
|
:param text: The input string, possibly containing obfuscated font chars.
|
20
20
|
:param font_map: A dict mapping obfuscated chars to real chars.
|
21
|
-
:return: The de
|
21
|
+
:return: The de-obfuscated text.
|
22
22
|
"""
|
23
23
|
return "".join(font_map.get(ch, ch) for ch in text)
|
24
24
|
|
@@ -37,7 +37,7 @@ def clean_chapter_title(title: str) -> str:
|
|
37
37
|
|
38
38
|
def is_promotional_line(line: str) -> bool:
|
39
39
|
"""
|
40
|
-
Check if a line of text likely contains promotional or ad
|
40
|
+
Check if a line of text likely contains promotional or ad-like content.
|
41
41
|
|
42
42
|
:param line: A single line of text.
|
43
43
|
:return: True if it contains promo keywords or a '###k' vote count pattern.
|
@@ -106,9 +106,9 @@ def calculate_time_difference(
|
|
106
106
|
"""
|
107
107
|
Calculate the difference between two datetime values.
|
108
108
|
|
109
|
-
:param from_time_str: Date
|
109
|
+
:param from_time_str: Date-time string "YYYY-MM-DD HH:MM:SS" for the start.
|
110
110
|
:param tz_str: Timezone of from_time_str, e.g. 'UTC+8'. Defaults to 'UTC'.
|
111
|
-
:param to_time_str: Optional date
|
111
|
+
:param to_time_str: Optional date-time string for the end; if None, uses now().
|
112
112
|
:param to_tz_str: Timezone of to_time_str. Defaults to 'UTC'.
|
113
113
|
:return: Tuple (days, hours, minutes, seconds).
|
114
114
|
"""
|
@@ -138,7 +138,7 @@ def calculate_time_difference(
|
|
138
138
|
|
139
139
|
except Exception as e:
|
140
140
|
logger.warning("[time] Failed to calculate time difference: %s", e)
|
141
|
-
return
|
141
|
+
return 0, 0, 0, 0
|
142
142
|
|
143
143
|
|
144
144
|
__all__ = [
|
@@ -82,7 +82,7 @@ async def async_sleep_with_random_delay(
|
|
82
82
|
:param mul_spread: Maximum multiplier factor for base; drawn from [1.0, mul_spread].
|
83
83
|
:param max_sleep: Optional upper limit for the final sleep duration.
|
84
84
|
"""
|
85
|
-
if base < 0 or add_spread < 0 or mul_spread < 0:
|
85
|
+
if base < 0 or add_spread < 0 or mul_spread < 1.0:
|
86
86
|
logger.warning(
|
87
87
|
"[async sleep] Invalid parameters: base=%s, add_spread=%s, mul_spread=%s",
|
88
88
|
base,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: novel-downloader
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.4.0
|
4
4
|
Summary: A command-line tool for downloading Chinese web novels from Qidian and similar platforms.
|
5
5
|
Author-email: Saudade Z <saudadez217@gmail.com>
|
6
6
|
License: MIT License
|
@@ -34,19 +34,18 @@ Classifier: License :: OSI Approved :: MIT License
|
|
34
34
|
Classifier: Natural Language :: Chinese (Simplified)
|
35
35
|
Classifier: Topic :: Utilities
|
36
36
|
Classifier: Programming Language :: Python :: 3
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
37
38
|
Classifier: Programming Language :: Python :: 3.12
|
38
39
|
Classifier: Programming Language :: Python :: 3.13
|
39
|
-
Requires-Python: >=3.
|
40
|
+
Requires-Python: >=3.11
|
40
41
|
Description-Content-Type: text/markdown
|
41
42
|
License-File: LICENSE
|
43
|
+
Requires-Dist: textual
|
42
44
|
Requires-Dist: requests
|
43
45
|
Requires-Dist: aiohttp
|
44
|
-
Requires-Dist:
|
45
|
-
Requires-Dist: DrissionPage
|
46
|
-
Requires-Dist: opencv-python
|
46
|
+
Requires-Dist: playwright
|
47
47
|
Requires-Dist: lxml
|
48
48
|
Requires-Dist: platformdirs
|
49
|
-
Requires-Dist: click
|
50
49
|
Requires-Dist: ebooklib
|
51
50
|
Provides-Extra: dev
|
52
51
|
Requires-Dist: black; extra == "dev"
|
@@ -55,6 +54,8 @@ Requires-Dist: ruff; extra == "dev"
|
|
55
54
|
Requires-Dist: pytest; extra == "dev"
|
56
55
|
Requires-Dist: pytest-cov; extra == "dev"
|
57
56
|
Requires-Dist: pytest-mock; extra == "dev"
|
57
|
+
Requires-Dist: types-requests; extra == "dev"
|
58
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
58
59
|
Requires-Dist: pre-commit; extra == "dev"
|
59
60
|
Requires-Dist: commitizen; extra == "dev"
|
60
61
|
Provides-Extra: font-recovery
|
@@ -63,32 +64,19 @@ Requires-Dist: numpy; extra == "font-recovery"
|
|
63
64
|
Requires-Dist: tinycss2; extra == "font-recovery"
|
64
65
|
Requires-Dist: fonttools; extra == "font-recovery"
|
65
66
|
Requires-Dist: brotli; extra == "font-recovery"
|
67
|
+
Requires-Dist: opencv-python; extra == "font-recovery"
|
66
68
|
Requires-Dist: pillow; extra == "font-recovery"
|
67
69
|
Requires-Dist: huggingface_hub; extra == "font-recovery"
|
68
70
|
Dynamic: license-file
|
69
71
|
|
70
72
|
# novel-downloader
|
71
73
|
|
72
|
-
一个基于 [
|
74
|
+
一个基于 [playwright](https://playwright.dev/) 和 [aiohttp](https://github.com/aio-libs/aiohttp) 的小说下载工具/库。
|
73
75
|
|
74
|
-
|
75
|
-
|
76
|
-
## 项目简介
|
77
|
-
|
78
|
-
**novel-downloader** 支持多种小说网站的章节抓取与合并导出,
|
79
|
-
- **轻量化抓取**: 绝大多数站点仅依赖 `requests` 实现 HTTP 请求, 无需额外浏览器驱动
|
80
|
-
- 对于起点中文网 (Qidian), 可在配置中选择:
|
81
|
-
- `mode: session` : 纯 Requests 模式
|
82
|
-
- `mode: browser` : 基于 `DrissionPage` 驱动 Chrome 的浏览器模式 (可处理更复杂的 JS/加密)。
|
83
|
-
- **自动登录** (可选)
|
84
|
-
- 配置 `login_required: true` 后自动检测并重用历史 Cookie
|
85
|
-
- 首次登录或 Cookie 失效时:
|
86
|
-
- **browser** 模式: 在程序打开的浏览器窗口登录, 登录后回车继续
|
87
|
-
- **session** 模式: 根据提示粘贴浏览器中已登录的 Cookie (参考 [复制 Cookies](https://github.com/BowenZ217/novel-downloader/blob/main/docs/copy-cookies.md))
|
76
|
+
> 本项目开发环境为 Python 3.12, 需确保运行环境为 Python 3.11 及以上版本
|
88
77
|
|
89
78
|
## 功能特性
|
90
79
|
|
91
|
-
- 抓取起点中文网免费及已订阅章节内容
|
92
80
|
- 支持断点续爬, 自动续传未完成任务
|
93
81
|
- 自动整合所有章节并导出为:
|
94
82
|
- TXT
|
@@ -101,25 +89,71 @@ Dynamic: license-file
|
|
101
89
|
|
102
90
|
## 快速开始
|
103
91
|
|
92
|
+
### 安装
|
93
|
+
|
94
|
+
使用 `pip` 安装:
|
95
|
+
|
104
96
|
```bash
|
105
|
-
# 克隆 + 安装
|
106
97
|
pip install novel-downloader
|
98
|
+
```
|
99
|
+
|
100
|
+
如需使用浏览器模式 (即 `mode: browser`), 请确保已安装 Playwright 依赖:
|
107
101
|
|
108
|
-
|
109
|
-
|
102
|
+
```bash
|
103
|
+
playwright install
|
104
|
+
```
|
110
105
|
|
106
|
+
如需启用字体解密功能 (`decode_font`, 用于处理起点中文网对近一个月更新章节所采用的字体混淆技术), 请使用扩展安装方式:
|
107
|
+
|
108
|
+
```bash
|
109
|
+
pip install novel-downloader[font-recovery]
|
110
|
+
```
|
111
|
+
|
112
|
+
---
|
113
|
+
|
114
|
+
### CLI 模式
|
115
|
+
|
116
|
+
```bash
|
111
117
|
# 初始化默认配置 (生成 settings.toml)
|
112
|
-
novel-cli
|
118
|
+
novel-cli config init
|
113
119
|
|
114
120
|
# 编辑 ./settings.toml 完成 site/book_ids 等
|
115
|
-
# 可查看 docs/
|
121
|
+
# 可查看 docs/3-settings-schema.md
|
116
122
|
|
117
|
-
#
|
123
|
+
# 执行下载任务
|
118
124
|
novel-cli download 123456
|
119
125
|
```
|
120
126
|
|
121
|
-
- 详细可见: [支持站点列表](https://github.com/BowenZ217/novel-downloader/blob/main/docs/
|
122
|
-
- 更多使用方法, 查看 [使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/
|
127
|
+
- 详细可见: [支持站点列表](https://github.com/BowenZ217/novel-downloader/blob/main/docs/4-supported-sites.md)
|
128
|
+
- 更多使用方法, 查看 [使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/6-cli-usage-examples.md)
|
129
|
+
|
130
|
+
---
|
131
|
+
|
132
|
+
### TUI 模式 (终端用户界面)
|
133
|
+
|
134
|
+
**注意**: TUI 模式仍在开发中, 目前尚未实现登录和修改设置等功能。建议优先使用稳定的 CLI 模式。
|
135
|
+
|
136
|
+
```bash
|
137
|
+
# 初始化默认配置 (生成 settings.toml)
|
138
|
+
novel-cli config init
|
139
|
+
|
140
|
+
# 编辑 ./settings.toml 修改网络配置
|
141
|
+
# 可查看 docs/3-settings-schema.md
|
142
|
+
|
143
|
+
# 启动 TUI 界面
|
144
|
+
novel-tui
|
145
|
+
```
|
146
|
+
|
147
|
+
- 详细可见: [支持站点列表](https://github.com/BowenZ217/novel-downloader/blob/main/docs/4-supported-sites.md)
|
148
|
+
- 更多使用方法, 查看 [使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/5-tui-usage-examples.md)
|
149
|
+
|
150
|
+
---
|
151
|
+
|
152
|
+
### GUI 模式 (图形界面)
|
153
|
+
|
154
|
+
尚未实现
|
155
|
+
|
156
|
+
---
|
123
157
|
|
124
158
|
## 从 GitHub 安装 (开发版)
|
125
159
|
|
@@ -139,11 +173,11 @@ pip install .
|
|
139
173
|
|
140
174
|
- [项目简介](#项目简介)
|
141
175
|
- [安装](https://github.com/BowenZ217/novel-downloader/blob/main/docs/1-installation.md)
|
142
|
-
- [
|
143
|
-
- [
|
144
|
-
- [
|
145
|
-
- [使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/5-usage-examples.md)
|
146
|
-
- [
|
176
|
+
- [配置](https://github.com/BowenZ217/novel-downloader/blob/main/docs/2-configuration.md)
|
177
|
+
- [settings.toml 配置说明](https://github.com/BowenZ217/novel-downloader/blob/main/docs/3-settings-schema.md)
|
178
|
+
- [支持站点列表](https://github.com/BowenZ217/novel-downloader/blob/main/docs/4-supported-sites.md)
|
179
|
+
- [TUI 使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/5-tui-usage-examples.md)
|
180
|
+
- [CLI 使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/6-cli-usage-examples.md)
|
147
181
|
- [复制 Cookies](https://github.com/BowenZ217/novel-downloader/blob/main/docs/copy-cookies.md)
|
148
182
|
- [文件保存](https://github.com/BowenZ217/novel-downloader/blob/main/docs/file-saving.md)
|
149
183
|
- [TODO](https://github.com/BowenZ217/novel-downloader/blob/main/docs/todo.md)
|