novel-downloader 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +14 -0
- novel_downloader/cli/__init__.py +14 -0
- novel_downloader/cli/clean.py +134 -0
- novel_downloader/cli/download.py +132 -0
- novel_downloader/cli/interactive.py +67 -0
- novel_downloader/cli/main.py +45 -0
- novel_downloader/cli/settings.py +177 -0
- novel_downloader/config/__init__.py +52 -0
- novel_downloader/config/adapter.py +153 -0
- novel_downloader/config/loader.py +177 -0
- novel_downloader/config/models.py +173 -0
- novel_downloader/config/site_rules.py +97 -0
- novel_downloader/core/__init__.py +25 -0
- novel_downloader/core/downloaders/__init__.py +22 -0
- novel_downloader/core/downloaders/base_async_downloader.py +157 -0
- novel_downloader/core/downloaders/base_downloader.py +187 -0
- novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
- novel_downloader/core/downloaders/common_downloader.py +191 -0
- novel_downloader/core/downloaders/qidian_downloader.py +208 -0
- novel_downloader/core/factory/__init__.py +33 -0
- novel_downloader/core/factory/downloader_factory.py +149 -0
- novel_downloader/core/factory/parser_factory.py +62 -0
- novel_downloader/core/factory/requester_factory.py +106 -0
- novel_downloader/core/factory/saver_factory.py +49 -0
- novel_downloader/core/interfaces/__init__.py +32 -0
- novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
- novel_downloader/core/interfaces/downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/parser_protocol.py +40 -0
- novel_downloader/core/interfaces/requester_protocol.py +65 -0
- novel_downloader/core/interfaces/saver_protocol.py +61 -0
- novel_downloader/core/parsers/__init__.py +28 -0
- novel_downloader/core/parsers/base_parser.py +96 -0
- novel_downloader/core/parsers/common_parser/__init__.py +14 -0
- novel_downloader/core/parsers/common_parser/helper.py +321 -0
- novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
- novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
- novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
- novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
- novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
- novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
- novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
- novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
- novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
- novel_downloader/core/requesters/__init__.py +31 -0
- novel_downloader/core/requesters/base_async_session.py +297 -0
- novel_downloader/core/requesters/base_browser.py +210 -0
- novel_downloader/core/requesters/base_session.py +243 -0
- novel_downloader/core/requesters/common_requester/__init__.py +18 -0
- novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
- novel_downloader/core/requesters/common_requester/common_session.py +126 -0
- novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
- novel_downloader/core/savers/__init__.py +20 -0
- novel_downloader/core/savers/base_saver.py +169 -0
- novel_downloader/core/savers/common_saver/__init__.py +13 -0
- novel_downloader/core/savers/common_saver/common_epub.py +232 -0
- novel_downloader/core/savers/common_saver/common_txt.py +176 -0
- novel_downloader/core/savers/common_saver/main_saver.py +86 -0
- novel_downloader/core/savers/epub_utils/__init__.py +27 -0
- novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
- novel_downloader/core/savers/epub_utils/initializer.py +98 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
- novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
- novel_downloader/core/savers/qidian_saver.py +22 -0
- novel_downloader/locales/en.json +91 -0
- novel_downloader/locales/zh.json +91 -0
- novel_downloader/resources/config/rules.toml +196 -0
- novel_downloader/resources/config/settings.yaml +73 -0
- novel_downloader/resources/css_styles/main.css +104 -0
- novel_downloader/resources/css_styles/volume-intro.css +56 -0
- novel_downloader/resources/images/volume_border.png +0 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
- novel_downloader/resources/json/replace_word_map.json +4 -0
- novel_downloader/resources/text/blacklist.txt +22 -0
- novel_downloader/utils/__init__.py +0 -0
- novel_downloader/utils/cache.py +24 -0
- novel_downloader/utils/constants.py +158 -0
- novel_downloader/utils/crypto_utils.py +144 -0
- novel_downloader/utils/file_utils/__init__.py +43 -0
- novel_downloader/utils/file_utils/io.py +252 -0
- novel_downloader/utils/file_utils/normalize.py +68 -0
- novel_downloader/utils/file_utils/sanitize.py +77 -0
- novel_downloader/utils/fontocr/__init__.py +23 -0
- novel_downloader/utils/fontocr/ocr_v1.py +304 -0
- novel_downloader/utils/fontocr/ocr_v2.py +658 -0
- novel_downloader/utils/hash_store.py +288 -0
- novel_downloader/utils/hash_utils.py +103 -0
- novel_downloader/utils/i18n.py +41 -0
- novel_downloader/utils/logger.py +104 -0
- novel_downloader/utils/model_loader.py +72 -0
- novel_downloader/utils/network.py +287 -0
- novel_downloader/utils/state.py +156 -0
- novel_downloader/utils/text_utils/__init__.py +27 -0
- novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
- novel_downloader/utils/text_utils/diff_display.py +75 -0
- novel_downloader/utils/text_utils/font_mapping.py +31 -0
- novel_downloader/utils/text_utils/text_cleaning.py +57 -0
- novel_downloader/utils/time_utils/__init__.py +22 -0
- novel_downloader/utils/time_utils/datetime_utils.py +146 -0
- novel_downloader/utils/time_utils/sleep_utils.py +49 -0
- novel_downloader-1.1.0.dist-info/METADATA +157 -0
- novel_downloader-1.1.0.dist-info/RECORD +115 -0
- novel_downloader-1.1.0.dist-info/WHEEL +5 -0
- novel_downloader-1.1.0.dist-info/entry_points.txt +2 -0
- novel_downloader-1.1.0.dist-info/licenses/LICENSE +21 -0
- novel_downloader-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,288 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.utils.hash_store
|
5
|
+
---------------------------------
|
6
|
+
|
7
|
+
Manage a small collection of image perceptual hashes and their labels.
|
8
|
+
Supports loading/saving to .json or .npy, and basic CRUD + search.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import heapq
|
12
|
+
import json
|
13
|
+
import logging
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Callable, Dict, List, Optional, Set, Tuple, Union
|
16
|
+
|
17
|
+
import numpy as np
|
18
|
+
from PIL import Image
|
19
|
+
|
20
|
+
from .constants import HASH_STORE_FILE
|
21
|
+
from .hash_utils import HASH_DISTANCE_THRESHOLD, fast_hamming_distance, phash
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
class _BKNode:
|
27
|
+
"""
|
28
|
+
A node in a Burkhard-Keller tree (BK-Tree) for distance search.
|
29
|
+
Stores one value and a dict of children keyed by distance.
|
30
|
+
"""
|
31
|
+
|
32
|
+
__slots__ = ("value", "children")
|
33
|
+
|
34
|
+
def __init__(self, value: int):
|
35
|
+
self.value = value
|
36
|
+
self.children: Dict[int, _BKNode] = {}
|
37
|
+
|
38
|
+
def add(self, h: int, dist_fn: Callable[[int, int], int]) -> None:
|
39
|
+
d = dist_fn(h, self.value)
|
40
|
+
child = self.children.get(d)
|
41
|
+
if child is not None:
|
42
|
+
child.add(h, dist_fn)
|
43
|
+
else:
|
44
|
+
self.children[d] = _BKNode(h)
|
45
|
+
|
46
|
+
def query(
|
47
|
+
self,
|
48
|
+
target: int,
|
49
|
+
threshold: int,
|
50
|
+
dist_fn: Callable[[int, int], int],
|
51
|
+
) -> List[Tuple[int, int]]:
|
52
|
+
"""
|
53
|
+
Recursively collect (value, dist) pairs within threshold.
|
54
|
+
"""
|
55
|
+
d0 = dist_fn(target, self.value)
|
56
|
+
matches: List[Tuple[int, int]] = []
|
57
|
+
if d0 <= threshold:
|
58
|
+
matches.append((self.value, d0))
|
59
|
+
# Only children whose edge-dist \in [d0-threshold, d0+threshold]
|
60
|
+
lower, upper = d0 - threshold, d0 + threshold
|
61
|
+
for edge, child in self.children.items():
|
62
|
+
if lower <= edge <= upper:
|
63
|
+
matches.extend(child.query(target, threshold, dist_fn))
|
64
|
+
return matches
|
65
|
+
|
66
|
+
|
67
|
+
class ImageHashStore:
|
68
|
+
"""
|
69
|
+
Store and manage image hashes grouped by label, with a BK-Tree index.
|
70
|
+
|
71
|
+
:param path: file path for persistence (".json" or ".npy")
|
72
|
+
:param auto_save: if True, every modification automatically calls save()
|
73
|
+
:param hash_func: function to compute hash from PIL.Image
|
74
|
+
:param ham_dist: function to compute Hamming distance between two hashes
|
75
|
+
"""
|
76
|
+
|
77
|
+
def __init__(
|
78
|
+
self,
|
79
|
+
path: Union[str, Path] = HASH_STORE_FILE,
|
80
|
+
auto_save: bool = False,
|
81
|
+
hash_func: Callable[[Image.Image], int] = phash,
|
82
|
+
ham_dist: Callable[[int, int], int] = fast_hamming_distance,
|
83
|
+
threshold: int = HASH_DISTANCE_THRESHOLD,
|
84
|
+
) -> None:
|
85
|
+
self._path = Path(path)
|
86
|
+
self._auto = auto_save
|
87
|
+
self._hf = hash_func
|
88
|
+
self._hd = ham_dist
|
89
|
+
self._th = threshold
|
90
|
+
|
91
|
+
# label -> set of hashes
|
92
|
+
self._hash: Dict[str, Set[int]] = {}
|
93
|
+
# hash -> list of labels (for reverse lookup)
|
94
|
+
self._hash_to_labels: Dict[int, List[str]] = {}
|
95
|
+
# root of BK-Tree (or None if empty)
|
96
|
+
self._bk_root: Optional[_BKNode] = None
|
97
|
+
|
98
|
+
self.load()
|
99
|
+
|
100
|
+
def load(self) -> None:
|
101
|
+
"""Load store from disk and rebuild BK-Tree index."""
|
102
|
+
if not self._path.exists():
|
103
|
+
self._hash.clear()
|
104
|
+
logger.info(
|
105
|
+
"[ImageHashStore] No file found at %s, starting empty.", self._path
|
106
|
+
)
|
107
|
+
return
|
108
|
+
else:
|
109
|
+
if self._path.suffix == ".npy":
|
110
|
+
arr = np.load(self._path, allow_pickle=True).item()
|
111
|
+
self._hash = {lbl: set(v) for lbl, v in arr.items()}
|
112
|
+
else:
|
113
|
+
txt = self._path.read_text(encoding="utf-8")
|
114
|
+
obj = json.loads(txt) or {}
|
115
|
+
self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
|
116
|
+
|
117
|
+
# rebuild reverse map and BK-Tree
|
118
|
+
self._hash_to_labels.clear()
|
119
|
+
for lbl, hs in self._hash.items():
|
120
|
+
for h in hs:
|
121
|
+
self._hash_to_labels.setdefault(h, []).append(lbl)
|
122
|
+
logger.info(
|
123
|
+
"[ImageHashStore] Loaded hash store from %s with %d hashes",
|
124
|
+
self._path,
|
125
|
+
sum(len(v) for v in self._hash.values()),
|
126
|
+
)
|
127
|
+
|
128
|
+
self._build_index()
|
129
|
+
|
130
|
+
def _build_index(self) -> None:
|
131
|
+
"""Construct a BK-Tree over all stored hashes."""
|
132
|
+
self._bk_root = None
|
133
|
+
for h in self._hash_to_labels:
|
134
|
+
if self._bk_root is None:
|
135
|
+
self._bk_root = _BKNode(h)
|
136
|
+
else:
|
137
|
+
self._bk_root.add(h, self._hd)
|
138
|
+
logger.info(
|
139
|
+
"[ImageHashStore] BK-tree index built with %d unique hashes",
|
140
|
+
len(self._hash_to_labels),
|
141
|
+
)
|
142
|
+
|
143
|
+
def save(self) -> None:
|
144
|
+
"""Persist current store to disk."""
|
145
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
146
|
+
data = {lbl: list(s) for lbl, s in self._hash.items()}
|
147
|
+
if self._path.suffix == ".npy":
|
148
|
+
np.save(self._path, data)
|
149
|
+
else:
|
150
|
+
txt = json.dumps(data, ensure_ascii=False, indent=2)
|
151
|
+
self._path.write_text(txt, encoding="utf-8")
|
152
|
+
logger.info("[ImageHashStore] Saved hash store to %s", self._path)
|
153
|
+
|
154
|
+
def _maybe_save(self) -> None:
|
155
|
+
if self._auto:
|
156
|
+
self.save()
|
157
|
+
|
158
|
+
def add_image(self, img_path: Union[str, Path], label: str) -> int:
|
159
|
+
"""
|
160
|
+
Compute hash for the given image and add it under `label`.
|
161
|
+
Updates BK-Tree index incrementally.
|
162
|
+
"""
|
163
|
+
img = Image.open(img_path).convert("L")
|
164
|
+
h = self._hf(img)
|
165
|
+
self._hash.setdefault(label, set()).add(h)
|
166
|
+
self._hash_to_labels.setdefault(h, []).append(label)
|
167
|
+
# insert into BK-Tree
|
168
|
+
if self._bk_root is None:
|
169
|
+
self._bk_root = _BKNode(h)
|
170
|
+
else:
|
171
|
+
self._bk_root.add(h, self._hd)
|
172
|
+
logger.debug("[ImageHashStore] Added hash %d under label '%s'", h, label)
|
173
|
+
self._maybe_save()
|
174
|
+
return h
|
175
|
+
|
176
|
+
def add_from_map(self, map_path: Union[str, Path]) -> None:
|
177
|
+
"""
|
178
|
+
Load a JSON file of the form { "image_path": "label", ... }
|
179
|
+
and add each entry.
|
180
|
+
"""
|
181
|
+
map_path = Path(map_path)
|
182
|
+
text = map_path.read_text(encoding="utf-8")
|
183
|
+
mapping = json.loads(text)
|
184
|
+
for rel_img_path, lbl in mapping.items():
|
185
|
+
img_path = (map_path.parent / rel_img_path).resolve()
|
186
|
+
try:
|
187
|
+
self.add_image(img_path, lbl)
|
188
|
+
except Exception as e:
|
189
|
+
logger.warning(
|
190
|
+
"[ImageHashStore] Failed to add image '%s': %s", img_path, str(e)
|
191
|
+
)
|
192
|
+
continue
|
193
|
+
|
194
|
+
def labels(self) -> List[str]:
|
195
|
+
"""Return a sorted list of all labels in the store."""
|
196
|
+
return sorted(self._hash.keys())
|
197
|
+
|
198
|
+
def hashes(self, label: str) -> Set[int]:
|
199
|
+
"""Return the set of hashes for a given `label` (empty set if none)."""
|
200
|
+
return set(self._hash.get(label, ()))
|
201
|
+
|
202
|
+
def remove_label(self, label: str) -> None:
|
203
|
+
"""Remove all hashes associated with `label`."""
|
204
|
+
if label in self._hash:
|
205
|
+
del self._hash[label]
|
206
|
+
logger.debug("[ImageHashStore] Removed label '%s'", label)
|
207
|
+
self._maybe_save()
|
208
|
+
|
209
|
+
def remove_hash(self, label: str, this: Union[int, str, Path]) -> bool:
|
210
|
+
"""
|
211
|
+
Remove a specific hash under `label`.
|
212
|
+
`this` can be:
|
213
|
+
- an integer hash
|
214
|
+
- a Path (image file) -> will compute its hash then remove
|
215
|
+
Returns True if something was removed.
|
216
|
+
"""
|
217
|
+
if label not in self._hash:
|
218
|
+
return False
|
219
|
+
|
220
|
+
h = None
|
221
|
+
if isinstance(this, (str, Path)):
|
222
|
+
try:
|
223
|
+
img = Image.open(this).convert("L")
|
224
|
+
h = self._hf(img)
|
225
|
+
except Exception as e:
|
226
|
+
logger.warning(
|
227
|
+
"[ImageHashStore] Could not open image '%s': %s", this, str(e)
|
228
|
+
)
|
229
|
+
return False
|
230
|
+
else:
|
231
|
+
h = int(this)
|
232
|
+
|
233
|
+
if h in self._hash[label]:
|
234
|
+
self._hash[label].remove(h)
|
235
|
+
logger.debug("[ImageHashStore] Removed hash %d from label '%s'", h, label)
|
236
|
+
self._maybe_save()
|
237
|
+
return True
|
238
|
+
return False
|
239
|
+
|
240
|
+
def query(
|
241
|
+
self,
|
242
|
+
target: Union[int, str, Path, Image.Image],
|
243
|
+
k: int = 1,
|
244
|
+
threshold: Optional[int] = None,
|
245
|
+
) -> List[Tuple[str, float]]:
|
246
|
+
"""
|
247
|
+
Find up to `k` distinct labels whose stored hashes are most similar
|
248
|
+
to `target` within `threshold`. Returns a list of (label, score),
|
249
|
+
sorted by descending score. Each label appears at most once.
|
250
|
+
|
251
|
+
:param target: Image path / int hash / PIL.Image
|
252
|
+
:param k: number of labels to return (default=1)
|
253
|
+
:param threshold: Hamming distance cutoff (default=self._th)
|
254
|
+
"""
|
255
|
+
if threshold is None:
|
256
|
+
threshold = self._th
|
257
|
+
|
258
|
+
# compute target hash
|
259
|
+
if isinstance(target, Image.Image):
|
260
|
+
img = target.convert("L")
|
261
|
+
thash = self._hf(img)
|
262
|
+
elif isinstance(target, (str, Path)):
|
263
|
+
img = Image.open(target).convert("L")
|
264
|
+
thash = self._hf(img)
|
265
|
+
else:
|
266
|
+
thash = int(target)
|
267
|
+
|
268
|
+
if self._bk_root is None:
|
269
|
+
return []
|
270
|
+
|
271
|
+
# find all (hash,dist) within threshold
|
272
|
+
matches = self._bk_root.query(thash, threshold, self._hd)
|
273
|
+
|
274
|
+
# collapse to one best dist per label
|
275
|
+
best_per_label: Dict[str, float] = {}
|
276
|
+
h2l = self._hash_to_labels
|
277
|
+
for h, dist in matches:
|
278
|
+
for lbl in h2l.get(h, ()):
|
279
|
+
score = 1.0 - dist / threshold
|
280
|
+
prev = best_per_label.get(lbl)
|
281
|
+
if prev is None or score > prev:
|
282
|
+
best_per_label[lbl] = score
|
283
|
+
|
284
|
+
top_k = heapq.nsmallest(k, best_per_label.items(), key=lambda x: x[1])
|
285
|
+
return top_k
|
286
|
+
|
287
|
+
|
288
|
+
img_hash_store = ImageHashStore()
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.utils.hash_utils
|
5
|
+
---------------------------------
|
6
|
+
|
7
|
+
Utilities for image perceptual hashing and comparison.
|
8
|
+
|
9
|
+
Implements a perceptual hash (pHash) based on DCT, following the method
|
10
|
+
described in:
|
11
|
+
https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
|
12
|
+
|
13
|
+
Provides:
|
14
|
+
- pHash computation via DCT and median thresholding
|
15
|
+
- Integer hash representation
|
16
|
+
- Fast Hamming distance between hashes
|
17
|
+
"""
|
18
|
+
|
19
|
+
import numpy as np
|
20
|
+
from PIL import Image
|
21
|
+
from scipy.fft import dct as dct_1d
|
22
|
+
|
23
|
+
ANTIALIAS = Image.Resampling.LANCZOS
|
24
|
+
HASH_SIZE = 10 # default is 8
|
25
|
+
HASH_DISTANCE_THRESHOLD = 5
|
26
|
+
|
27
|
+
|
28
|
+
def hash_to_int(hash_array: np.ndarray) -> int:
|
29
|
+
"""
|
30
|
+
Convert a boolean hash array to an integer.
|
31
|
+
|
32
|
+
:param hash_array: A binary array (dtype=bool) from a hash function.
|
33
|
+
:type hash_array: np.ndarray
|
34
|
+
:return: Integer representation of the binary hash.
|
35
|
+
:rtype: int
|
36
|
+
"""
|
37
|
+
result = 0
|
38
|
+
for bit in hash_array:
|
39
|
+
result = (result << 1) | int(bit)
|
40
|
+
return result
|
41
|
+
|
42
|
+
|
43
|
+
def fast_hamming_distance(hash_1: int, hash_2: int) -> int:
|
44
|
+
"""
|
45
|
+
Compute the Hamming distance between two integer-based image hashes.
|
46
|
+
|
47
|
+
Uses bitwise XOR and bit count for fast comparison.
|
48
|
+
|
49
|
+
:param hash_1: First image hash (as integer).
|
50
|
+
:type hash_1: int
|
51
|
+
:param hash_2: Second image hash (as integer).
|
52
|
+
:type hash_2: int
|
53
|
+
:return: Number of differing bits between the two hashes.
|
54
|
+
:rtype: int
|
55
|
+
"""
|
56
|
+
x = hash_1 ^ hash_2
|
57
|
+
count = 0
|
58
|
+
while x:
|
59
|
+
x &= x - 1
|
60
|
+
count += 1
|
61
|
+
return count
|
62
|
+
|
63
|
+
|
64
|
+
def _threshold_and_pack(dct_low: np.ndarray) -> int:
|
65
|
+
"""
|
66
|
+
Convert a low-frequency DCT matrix into a binary hash.
|
67
|
+
|
68
|
+
Compares each element to the median, builds a boolean mask,
|
69
|
+
then packs it into an integer.
|
70
|
+
"""
|
71
|
+
med = np.median(dct_low)
|
72
|
+
diff = dct_low > med
|
73
|
+
return hash_to_int(diff.flatten())
|
74
|
+
|
75
|
+
|
76
|
+
def phash(
|
77
|
+
image: Image.Image, hash_size: int = HASH_SIZE, highfreq_factor: int = 4
|
78
|
+
) -> int:
|
79
|
+
"""
|
80
|
+
Compute the perceptual hash (pHash) of an image.
|
81
|
+
|
82
|
+
This method applies a Discrete Cosine Transform (DCT) to extract
|
83
|
+
low-frequency features, then compares them to the median to create
|
84
|
+
a binary fingerprint of the image.
|
85
|
+
|
86
|
+
:param image: The input image.
|
87
|
+
:type image: PIL.Image.Image
|
88
|
+
:param hash_size: Size of the resulting hash (NxN).
|
89
|
+
:type hash_size: int
|
90
|
+
:param highfreq_factor: Multiplier for the image resize to preserve detail.
|
91
|
+
:type highfreq_factor: int
|
92
|
+
:return: Integer representation of the perceptual hash.
|
93
|
+
:rtype: int
|
94
|
+
"""
|
95
|
+
if hash_size < 2:
|
96
|
+
raise ValueError("Hash size must be greater than or equal to 2")
|
97
|
+
|
98
|
+
img_size = hash_size * highfreq_factor
|
99
|
+
image = image.convert("L").resize((img_size, img_size), ANTIALIAS)
|
100
|
+
pixels = np.asarray(image)
|
101
|
+
dct = dct_1d(dct_1d(pixels, axis=0, norm="ortho"), axis=1, norm="ortho")
|
102
|
+
dctlowfreq = dct[:hash_size, :hash_size]
|
103
|
+
return _threshold_and_pack(dctlowfreq)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.utils.i18n
|
5
|
+
---------------------------
|
6
|
+
|
7
|
+
Multilingual text dictionary and utility for CLI and interactive mode.
|
8
|
+
"""
|
9
|
+
|
10
|
+
import json
|
11
|
+
from typing import Any, Dict
|
12
|
+
|
13
|
+
from novel_downloader.utils.constants import LOCALES_DIR
|
14
|
+
from novel_downloader.utils.state import state_mgr
|
15
|
+
|
16
|
+
_TRANSLATIONS: Dict[str, Dict[str, str]] = {}
|
17
|
+
|
18
|
+
for locale_path in LOCALES_DIR.glob("*.json"):
|
19
|
+
lang = locale_path.stem
|
20
|
+
try:
|
21
|
+
with open(locale_path, "r", encoding="utf-8") as f:
|
22
|
+
_TRANSLATIONS[lang] = json.load(f)
|
23
|
+
except Exception:
|
24
|
+
continue
|
25
|
+
|
26
|
+
|
27
|
+
def t(key: str, **kwargs: Any) -> str:
|
28
|
+
"""
|
29
|
+
Retrieve a localized string by key and language.
|
30
|
+
|
31
|
+
:param key: The string key.
|
32
|
+
:param kwargs: Optional formatting arguments.
|
33
|
+
:return: The translated string, or the key if not found.
|
34
|
+
"""
|
35
|
+
lang = state_mgr.get_language() or "zh"
|
36
|
+
txt = (
|
37
|
+
_TRANSLATIONS.get(lang, {}).get(key)
|
38
|
+
or _TRANSLATIONS.get("en", {}).get(key)
|
39
|
+
or key
|
40
|
+
)
|
41
|
+
return txt.format(**kwargs)
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.utils.logger
|
5
|
+
-----------------------------
|
6
|
+
|
7
|
+
Provides a configurable logging setup for Python applications.
|
8
|
+
Log files are rotated daily and named with the given logger name and current date.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import logging
|
12
|
+
from datetime import datetime
|
13
|
+
from logging.handlers import TimedRotatingFileHandler
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Dict, Literal, Optional, Union
|
16
|
+
|
17
|
+
from .constants import LOGGER_DIR, LOGGER_NAME
|
18
|
+
|
19
|
+
LogLevel = Literal["DEBUG", "INFO", "WARNING", "ERROR"]
|
20
|
+
|
21
|
+
LOG_LEVELS: Dict[LogLevel, int] = {
|
22
|
+
"DEBUG": logging.DEBUG,
|
23
|
+
"INFO": logging.INFO,
|
24
|
+
"WARNING": logging.WARNING,
|
25
|
+
"ERROR": logging.ERROR,
|
26
|
+
}
|
27
|
+
|
28
|
+
|
29
|
+
def setup_logging(
|
30
|
+
log_filename_prefix: Optional[str] = None,
|
31
|
+
log_level: Optional[LogLevel] = None,
|
32
|
+
log_dir: Optional[Union[str, Path]] = None,
|
33
|
+
) -> logging.Logger:
|
34
|
+
"""
|
35
|
+
Create and configure a logger for both console and rotating file output.
|
36
|
+
|
37
|
+
:param log_filename_prefix: Prefix for the log file name.
|
38
|
+
If None, will use the last part of `logger_name`.
|
39
|
+
:param log_level: Minimum log level to show in console:
|
40
|
+
"DEBUG", "INFO", "WARNING", or "ERROR".
|
41
|
+
Defaults to "INFO" if not specified.
|
42
|
+
:param log_dir: Directory where log files will be saved.
|
43
|
+
Defaults to "./logs" if not specified.
|
44
|
+
:return: A fully configured logger instance.
|
45
|
+
"""
|
46
|
+
ft_logger = logging.getLogger("fontTools.ttLib.tables._p_o_s_t")
|
47
|
+
ft_logger.setLevel(logging.ERROR)
|
48
|
+
ft_logger.propagate = False
|
49
|
+
|
50
|
+
# Determine console level (default INFO)
|
51
|
+
level_str: LogLevel = log_level or "INFO"
|
52
|
+
console_level = LOG_LEVELS.get(level_str)
|
53
|
+
if console_level is None:
|
54
|
+
raise ValueError(
|
55
|
+
f"Invalid log level: {level_str}. Must be one of {list(LOG_LEVELS.keys())}"
|
56
|
+
)
|
57
|
+
|
58
|
+
# Resolve log file path
|
59
|
+
log_path = Path(log_dir) if log_dir else LOGGER_DIR
|
60
|
+
log_path.mkdir(parents=True, exist_ok=True)
|
61
|
+
|
62
|
+
# Resolve log file name
|
63
|
+
if not log_filename_prefix:
|
64
|
+
log_filename_prefix = LOGGER_NAME
|
65
|
+
date_str = datetime.now().strftime("%Y-%m-%d")
|
66
|
+
log_filename = log_path / f"{log_filename_prefix}_{date_str}.log"
|
67
|
+
|
68
|
+
# Create or retrieve logger
|
69
|
+
logger = logging.getLogger()
|
70
|
+
logger.setLevel(logging.DEBUG) # Capture everything, filter by handlers
|
71
|
+
|
72
|
+
# Clear existing handlers to avoid duplicate logs
|
73
|
+
if logger.hasHandlers():
|
74
|
+
logger.handlers.clear()
|
75
|
+
|
76
|
+
# File handler: rotates at midnight, keeps 7 days of logs
|
77
|
+
file_handler = TimedRotatingFileHandler(
|
78
|
+
filename=str(log_filename),
|
79
|
+
when="midnight",
|
80
|
+
interval=1,
|
81
|
+
backupCount=7,
|
82
|
+
encoding="utf-8",
|
83
|
+
utc=False,
|
84
|
+
)
|
85
|
+
file_formatter = logging.Formatter(
|
86
|
+
fmt="%(asctime)s [%(levelname)s] %(name)s.%(funcName)s: %(message)s",
|
87
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
88
|
+
)
|
89
|
+
file_handler.setFormatter(file_formatter)
|
90
|
+
file_handler.setLevel(logging.DEBUG)
|
91
|
+
logger.addHandler(file_handler)
|
92
|
+
|
93
|
+
# Console handler
|
94
|
+
console_handler = logging.StreamHandler()
|
95
|
+
console_formatter = logging.Formatter(
|
96
|
+
fmt="%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
97
|
+
)
|
98
|
+
console_handler.setFormatter(console_formatter)
|
99
|
+
console_handler.setLevel(console_level)
|
100
|
+
logger.addHandler(console_handler)
|
101
|
+
|
102
|
+
print(f"Logging to {log_path}")
|
103
|
+
|
104
|
+
return logger
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.utils.model_loader
|
5
|
+
-----------------------------------
|
6
|
+
|
7
|
+
Utility functions for managing pre-trained model downloads.
|
8
|
+
|
9
|
+
Currently supports:
|
10
|
+
- Character recognition model for single Chinese character inference
|
11
|
+
"""
|
12
|
+
|
13
|
+
from pathlib import Path
|
14
|
+
|
15
|
+
from huggingface_hub import hf_hub_download
|
16
|
+
from huggingface_hub.utils import LocalEntryNotFoundError
|
17
|
+
|
18
|
+
from novel_downloader.utils.constants import (
|
19
|
+
MODEL_CACHE_DIR,
|
20
|
+
REC_CHAR_MODEL_FILES,
|
21
|
+
REC_CHAR_MODEL_REPO,
|
22
|
+
REC_CHAR_VECTOR_FILES,
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
def get_rec_chinese_char_model_dir(version: str = "v1.0") -> Path:
|
27
|
+
"""
|
28
|
+
Ensure model files are downloaded, return the directory path.
|
29
|
+
"""
|
30
|
+
model_dir = MODEL_CACHE_DIR / "rec_chinese_char"
|
31
|
+
|
32
|
+
model_dir.mkdir(parents=True, exist_ok=True)
|
33
|
+
|
34
|
+
for fname in REC_CHAR_MODEL_FILES:
|
35
|
+
try:
|
36
|
+
hf_hub_download(
|
37
|
+
repo_id=REC_CHAR_MODEL_REPO,
|
38
|
+
filename=fname,
|
39
|
+
revision=version,
|
40
|
+
local_dir=model_dir,
|
41
|
+
local_dir_use_symlinks=False,
|
42
|
+
)
|
43
|
+
except LocalEntryNotFoundError:
|
44
|
+
raise RuntimeError(
|
45
|
+
f"[model] Missing model file '{fname}' and no internet connection."
|
46
|
+
)
|
47
|
+
return model_dir
|
48
|
+
|
49
|
+
|
50
|
+
def get_rec_char_vector_dir(version: str = "v1.0") -> Path:
|
51
|
+
"""
|
52
|
+
Ensure vector files are downloaded into a 'vector' subfolder under model directory.
|
53
|
+
Return the directory path.
|
54
|
+
"""
|
55
|
+
vector_dir = MODEL_CACHE_DIR / "rec_chinese_char"
|
56
|
+
vector_dir.mkdir(parents=True, exist_ok=True)
|
57
|
+
|
58
|
+
for fname in REC_CHAR_VECTOR_FILES:
|
59
|
+
try:
|
60
|
+
hf_hub_download(
|
61
|
+
repo_id=REC_CHAR_MODEL_REPO,
|
62
|
+
filename=fname,
|
63
|
+
revision=version,
|
64
|
+
local_dir=vector_dir,
|
65
|
+
local_dir_use_symlinks=False,
|
66
|
+
)
|
67
|
+
except LocalEntryNotFoundError:
|
68
|
+
raise RuntimeError(
|
69
|
+
f"[vector] Missing vector file '{fname}' and no internet connection."
|
70
|
+
)
|
71
|
+
|
72
|
+
return vector_dir
|