novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +79 -66
  6. novel_downloader/cli/export.py +17 -21
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +206 -209
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +5 -5
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +17 -12
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +20 -14
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +6 -19
  79. novel_downloader/core/interfaces/parser.py +7 -8
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +64 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +64 -69
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/main_parser.py +756 -48
  100. novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
  101. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  102. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  103. novel_downloader/core/parsers/quanben5.py +103 -0
  104. novel_downloader/core/parsers/registry.py +5 -16
  105. novel_downloader/core/parsers/sfacg.py +38 -45
  106. novel_downloader/core/parsers/shencou.py +215 -0
  107. novel_downloader/core/parsers/shuhaige.py +111 -0
  108. novel_downloader/core/parsers/tongrenquan.py +116 -0
  109. novel_downloader/core/parsers/ttkan.py +132 -0
  110. novel_downloader/core/parsers/wanbengo.py +191 -0
  111. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  112. novel_downloader/core/parsers/xiguashuwu.py +429 -0
  113. novel_downloader/core/parsers/xs63b.py +161 -0
  114. novel_downloader/core/parsers/xshbook.py +134 -0
  115. novel_downloader/core/parsers/yamibo.py +87 -131
  116. novel_downloader/core/parsers/yibige.py +166 -0
  117. novel_downloader/core/searchers/__init__.py +34 -3
  118. novel_downloader/core/searchers/aaatxt.py +107 -0
  119. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  120. novel_downloader/core/searchers/base.py +112 -36
  121. novel_downloader/core/searchers/dxmwx.py +105 -0
  122. novel_downloader/core/searchers/eightnovel.py +84 -0
  123. novel_downloader/core/searchers/esjzone.py +43 -25
  124. novel_downloader/core/searchers/hetushu.py +92 -0
  125. novel_downloader/core/searchers/i25zw.py +93 -0
  126. novel_downloader/core/searchers/ixdzs8.py +107 -0
  127. novel_downloader/core/searchers/jpxs123.py +107 -0
  128. novel_downloader/core/searchers/piaotia.py +100 -0
  129. novel_downloader/core/searchers/qbtr.py +106 -0
  130. novel_downloader/core/searchers/qianbi.py +74 -40
  131. novel_downloader/core/searchers/quanben5.py +144 -0
  132. novel_downloader/core/searchers/registry.py +24 -8
  133. novel_downloader/core/searchers/shuhaige.py +124 -0
  134. novel_downloader/core/searchers/tongrenquan.py +110 -0
  135. novel_downloader/core/searchers/ttkan.py +92 -0
  136. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  137. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  138. novel_downloader/core/searchers/xs63b.py +104 -0
  139. novel_downloader/locales/en.json +34 -85
  140. novel_downloader/locales/zh.json +35 -86
  141. novel_downloader/models/__init__.py +21 -22
  142. novel_downloader/models/book.py +44 -0
  143. novel_downloader/models/config.py +4 -37
  144. novel_downloader/models/login.py +1 -1
  145. novel_downloader/models/search.py +5 -0
  146. novel_downloader/resources/config/settings.toml +8 -70
  147. novel_downloader/resources/json/xiguashuwu.json +718 -0
  148. novel_downloader/utils/__init__.py +13 -24
  149. novel_downloader/utils/chapter_storage.py +5 -5
  150. novel_downloader/utils/constants.py +4 -31
  151. novel_downloader/utils/cookies.py +38 -35
  152. novel_downloader/utils/crypto_utils/__init__.py +7 -0
  153. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  154. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  155. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  156. novel_downloader/utils/crypto_utils/rc4.py +54 -0
  157. novel_downloader/utils/epub/__init__.py +3 -4
  158. novel_downloader/utils/epub/builder.py +6 -6
  159. novel_downloader/utils/epub/constants.py +62 -21
  160. novel_downloader/utils/epub/documents.py +95 -201
  161. novel_downloader/utils/epub/models.py +8 -22
  162. novel_downloader/utils/epub/utils.py +73 -106
  163. novel_downloader/utils/file_utils/__init__.py +2 -23
  164. novel_downloader/utils/file_utils/io.py +53 -188
  165. novel_downloader/utils/file_utils/normalize.py +1 -7
  166. novel_downloader/utils/file_utils/sanitize.py +4 -15
  167. novel_downloader/utils/fontocr/__init__.py +5 -14
  168. novel_downloader/utils/fontocr/core.py +216 -0
  169. novel_downloader/utils/fontocr/loader.py +50 -0
  170. novel_downloader/utils/logger.py +81 -65
  171. novel_downloader/utils/network.py +17 -41
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  176. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  177. novel_downloader/utils/time_utils/__init__.py +5 -11
  178. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  179. novel_downloader/utils/time_utils/sleep_utils.py +55 -49
  180. novel_downloader/web/__init__.py +13 -0
  181. novel_downloader/web/components/__init__.py +11 -0
  182. novel_downloader/web/components/navigation.py +35 -0
  183. novel_downloader/web/main.py +66 -0
  184. novel_downloader/web/pages/__init__.py +17 -0
  185. novel_downloader/web/pages/download.py +78 -0
  186. novel_downloader/web/pages/progress.py +147 -0
  187. novel_downloader/web/pages/search.py +329 -0
  188. novel_downloader/web/services/__init__.py +17 -0
  189. novel_downloader/web/services/client_dialog.py +164 -0
  190. novel_downloader/web/services/cred_broker.py +113 -0
  191. novel_downloader/web/services/cred_models.py +35 -0
  192. novel_downloader/web/services/task_manager.py +264 -0
  193. novel_downloader-2.0.1.dist-info/METADATA +172 -0
  194. novel_downloader-2.0.1.dist-info/RECORD +206 -0
  195. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
  196. novel_downloader/core/downloaders/biquge.py +0 -29
  197. novel_downloader/core/downloaders/esjzone.py +0 -29
  198. novel_downloader/core/downloaders/linovelib.py +0 -29
  199. novel_downloader/core/downloaders/sfacg.py +0 -29
  200. novel_downloader/core/downloaders/yamibo.py +0 -29
  201. novel_downloader/core/exporters/biquge.py +0 -22
  202. novel_downloader/core/exporters/esjzone.py +0 -22
  203. novel_downloader/core/exporters/qianbi.py +0 -22
  204. novel_downloader/core/exporters/sfacg.py +0 -22
  205. novel_downloader/core/exporters/yamibo.py +0 -22
  206. novel_downloader/core/fetchers/base/__init__.py +0 -14
  207. novel_downloader/core/fetchers/base/browser.py +0 -422
  208. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  209. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  210. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  211. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  212. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  213. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  214. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  215. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  216. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  217. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  218. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  219. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  220. novel_downloader/core/parsers/biquge.py +0 -139
  221. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
  222. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
  223. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
  224. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  225. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
  226. novel_downloader/models/chapter.py +0 -25
  227. novel_downloader/models/types.py +0 -13
  228. novel_downloader/tui/__init__.py +0 -7
  229. novel_downloader/tui/app.py +0 -32
  230. novel_downloader/tui/main.py +0 -17
  231. novel_downloader/tui/screens/__init__.py +0 -14
  232. novel_downloader/tui/screens/home.py +0 -198
  233. novel_downloader/tui/screens/login.py +0 -74
  234. novel_downloader/tui/styles/home_layout.tcss +0 -79
  235. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  236. novel_downloader/utils/cache.py +0 -24
  237. novel_downloader/utils/crypto_utils.py +0 -71
  238. novel_downloader/utils/fontocr/hash_store.py +0 -280
  239. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  240. novel_downloader/utils/fontocr/model_loader.py +0 -69
  241. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  242. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  243. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  244. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  245. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  246. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  247. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  248. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.crypto_utils
4
- -----------------------------------
5
-
6
- Generic cryptographic utilities
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import base64
12
-
13
-
14
- def rc4_crypt(
15
- key: str,
16
- data: str,
17
- *,
18
- mode: str = "encrypt",
19
- encoding: str = "utf-8",
20
- ) -> str:
21
- """
22
- Encrypt or decrypt data using RC4 and Base64.
23
-
24
- :param key: RC4 key (will be encoded using the specified encoding).
25
- :type key: str
26
- :param data: Plain-text (for 'encrypt') or Base64 cipher-text (for 'decrypt').
27
- :type data: str
28
- :param mode: Operation mode, either 'encrypt' or 'decrypt'. Defaults to 'encrypt'.
29
- :type mode: str, optional
30
- :param encoding: Character encoding for key and returned string. Defaults 'utf-8'.
31
- :type encoding: str, optional
32
-
33
- :return: Base64 cipher-text (for encryption) or decoded plain-text (for decryption).
34
- :rtype: str
35
-
36
- :raises ValueError: If mode is not 'encrypt' or 'decrypt'.
37
- """
38
-
39
- def _rc4(key_bytes: bytes, data_bytes: bytes) -> bytes:
40
- # Key-Scheduling Algorithm (KSA)
41
- S = list(range(256))
42
- j = 0
43
- for i in range(256):
44
- j = (j + S[i] + key_bytes[i % len(key_bytes)]) % 256
45
- S[i], S[j] = S[j], S[i]
46
-
47
- # Pseudo-Random Generation Algorithm (PRGA)
48
- i = j = 0
49
- out: list[int] = []
50
- for char in data_bytes:
51
- i = (i + 1) % 256
52
- j = (j + S[i]) % 256
53
- S[i], S[j] = S[j], S[i]
54
- K = S[(S[i] + S[j]) % 256]
55
- out.append(char ^ K)
56
-
57
- return bytes(out)
58
-
59
- key_bytes = key.encode(encoding)
60
-
61
- if mode == "encrypt":
62
- plain_bytes = data.encode(encoding)
63
- cipher_bytes = _rc4(key_bytes, plain_bytes)
64
- return base64.b64encode(cipher_bytes).decode(encoding)
65
-
66
- if mode == "decrypt":
67
- cipher_bytes = base64.b64decode(data)
68
- plain_bytes = _rc4(key_bytes, cipher_bytes)
69
- return plain_bytes.decode(encoding, errors="replace")
70
-
71
- raise ValueError("Mode must be 'encrypt' or 'decrypt'.")
@@ -1,280 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.fontocr.hash_store
4
- -----------------------------------------
5
-
6
- Manage a small collection of image perceptual hashes and their labels.
7
- Supports loading/saving to .json or .npy, and basic CRUD + search.
8
- """
9
-
10
- import heapq
11
- import json
12
- import logging
13
- from collections.abc import Callable
14
- from pathlib import Path
15
-
16
- from PIL import Image
17
-
18
- from ..constants import DATA_DIR
19
- from .hash_utils import HASH_DISTANCE_THRESHOLD, fast_hamming_distance, phash
20
-
21
- logger = logging.getLogger(__name__)
22
- HASH_STORE_FILE = DATA_DIR / "image_hashes.json"
23
-
24
-
25
- class _BKNode:
26
- """
27
- A node in a Burkhard-Keller tree (BK-Tree) for distance search.
28
- Stores one value and a dict of children keyed by distance.
29
- """
30
-
31
- __slots__ = ("value", "children")
32
-
33
- def __init__(self, value: int):
34
- self.value = value
35
- self.children: dict[int, _BKNode] = {}
36
-
37
- def add(self, h: int, dist_fn: Callable[[int, int], int]) -> None:
38
- d = dist_fn(h, self.value)
39
- child = self.children.get(d)
40
- if child is not None:
41
- child.add(h, dist_fn)
42
- else:
43
- self.children[d] = _BKNode(h)
44
-
45
- def query(
46
- self,
47
- target: int,
48
- threshold: int,
49
- dist_fn: Callable[[int, int], int],
50
- ) -> list[tuple[int, int]]:
51
- """
52
- Recursively collect (value, dist) pairs within threshold.
53
- """
54
- d0 = dist_fn(target, self.value)
55
- matches: list[tuple[int, int]] = []
56
- if d0 <= threshold:
57
- matches.append((self.value, d0))
58
- # Only children whose edge-dist \in [d0-threshold, d0+threshold]
59
- lower, upper = d0 - threshold, d0 + threshold
60
- for edge, child in self.children.items():
61
- if lower <= edge <= upper:
62
- matches.extend(child.query(target, threshold, dist_fn))
63
- return matches
64
-
65
-
66
- class ImageHashStore:
67
- """
68
- Store and manage image hashes grouped by label, with a BK-Tree index.
69
-
70
- :param path: file path for persistence (".json" or ".npy")
71
- :param auto_save: if True, every modification automatically calls save()
72
- :param hash_func: function to compute hash from PIL.Image
73
- :param ham_dist: function to compute Hamming distance between two hashes
74
- """
75
-
76
- def __init__(
77
- self,
78
- path: str | Path = HASH_STORE_FILE,
79
- auto_save: bool = False,
80
- hash_func: Callable[[Image.Image], int] = phash,
81
- ham_dist: Callable[[int, int], int] = fast_hamming_distance,
82
- threshold: int = HASH_DISTANCE_THRESHOLD,
83
- ) -> None:
84
- self._path = Path(path)
85
- self._auto = auto_save
86
- self._hf = hash_func
87
- self._hd = ham_dist
88
- self._th = threshold
89
-
90
- # label -> set of hashes
91
- self._hash: dict[str, set[int]] = {}
92
- # hash -> list of labels (for reverse lookup)
93
- self._hash_to_labels: dict[int, list[str]] = {}
94
- # root of BK-Tree (or None if empty)
95
- self._bk_root: _BKNode | None = None
96
-
97
- self.load()
98
-
99
- def load(self) -> None:
100
- """Load store from disk and rebuild BK-Tree index."""
101
- if not self._path.exists():
102
- self._hash.clear()
103
- logger.debug(
104
- "[ImageHashStore] No file found at %s, starting empty.", self._path
105
- )
106
- return
107
-
108
- txt = self._path.read_text(encoding="utf-8")
109
- obj = json.loads(txt) or {}
110
- self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
111
-
112
- # rebuild reverse map and BK-Tree
113
- self._hash_to_labels.clear()
114
- for lbl, hs in self._hash.items():
115
- for h in hs:
116
- self._hash_to_labels.setdefault(h, []).append(lbl)
117
- logger.debug(
118
- "[ImageHashStore] Loaded hash store from %s with %d hashes",
119
- self._path,
120
- sum(len(v) for v in self._hash.values()),
121
- )
122
-
123
- self._build_index()
124
-
125
- def _build_index(self) -> None:
126
- """Construct a BK-Tree over all stored hashes."""
127
- self._bk_root = None
128
- for h in self._hash_to_labels:
129
- if self._bk_root is None:
130
- self._bk_root = _BKNode(h)
131
- else:
132
- self._bk_root.add(h, self._hd)
133
- logger.debug(
134
- "[ImageHashStore] BK-tree index built with %d unique hashes",
135
- len(self._hash_to_labels),
136
- )
137
-
138
- def save(self) -> None:
139
- """Persist current store to disk."""
140
- self._path.parent.mkdir(parents=True, exist_ok=True)
141
- data = {lbl: list(s) for lbl, s in self._hash.items()}
142
- txt = json.dumps(data, ensure_ascii=False, indent=2)
143
- self._path.write_text(txt, encoding="utf-8")
144
- logger.debug("[ImageHashStore] Saved hash store to %s", self._path)
145
-
146
- def _maybe_save(self) -> None:
147
- if self._auto:
148
- self.save()
149
-
150
- def add_image(self, img_path: str | Path, label: str) -> int:
151
- """
152
- Compute hash for the given image and add it under `label`.
153
- Updates BK-Tree index incrementally.
154
- """
155
- img = Image.open(img_path).convert("L")
156
- h = self._hf(img)
157
- self._hash.setdefault(label, set()).add(h)
158
- self._hash_to_labels.setdefault(h, []).append(label)
159
- # insert into BK-Tree
160
- if self._bk_root is None:
161
- self._bk_root = _BKNode(h)
162
- else:
163
- self._bk_root.add(h, self._hd)
164
- logger.debug("[ImageHashStore] Added hash %d under label '%s'", h, label)
165
- self._maybe_save()
166
- return h
167
-
168
- def add_from_map(self, map_path: str | Path) -> None:
169
- """
170
- Load a JSON file of the form { "image_path": "label", ... }
171
- and add each entry.
172
- """
173
- map_path = Path(map_path)
174
- text = map_path.read_text(encoding="utf-8")
175
- mapping = json.loads(text)
176
- for rel_img_path, lbl in mapping.items():
177
- img_path = (map_path.parent / rel_img_path).resolve()
178
- try:
179
- self.add_image(img_path, lbl)
180
- except Exception as e:
181
- logger.warning(
182
- "[ImageHashStore] Failed to add image '%s': %s", img_path, str(e)
183
- )
184
- continue
185
-
186
- def labels(self) -> list[str]:
187
- """Return a sorted list of all labels in the store."""
188
- return sorted(self._hash.keys())
189
-
190
- def hashes(self, label: str) -> set[int]:
191
- """Return the set of hashes for a given `label` (empty set if none)."""
192
- return set(self._hash.get(label, ()))
193
-
194
- def remove_label(self, label: str) -> None:
195
- """Remove all hashes associated with `label`."""
196
- if label in self._hash:
197
- del self._hash[label]
198
- logger.debug("[ImageHashStore] Removed label '%s'", label)
199
- self._maybe_save()
200
-
201
- def remove_hash(self, label: str, this: int | str | Path) -> bool:
202
- """
203
- Remove a specific hash under `label`.
204
- `this` can be:
205
- - an integer hash
206
- - a Path (image file) -> will compute its hash then remove
207
- Returns True if something was removed.
208
- """
209
- if label not in self._hash:
210
- return False
211
-
212
- h = None
213
- if isinstance(this, (str | Path)):
214
- try:
215
- img = Image.open(this).convert("L")
216
- h = self._hf(img)
217
- except Exception as e:
218
- logger.warning(
219
- "[ImageHashStore] Could not open image '%s': %s", this, str(e)
220
- )
221
- return False
222
- else:
223
- h = int(this)
224
-
225
- if h in self._hash[label]:
226
- self._hash[label].remove(h)
227
- logger.debug("[ImageHashStore] Removed hash %d from label '%s'", h, label)
228
- self._maybe_save()
229
- return True
230
- return False
231
-
232
- def query(
233
- self,
234
- target: int | str | Path | Image.Image,
235
- k: int = 1,
236
- threshold: int | None = None,
237
- ) -> list[tuple[str, float]]:
238
- """
239
- Find up to `k` distinct labels whose stored hashes are most similar
240
- to `target` within `threshold`. Returns a list of (label, score),
241
- sorted by descending score. Each label appears at most once.
242
-
243
- :param target: Image path / int hash / PIL.Image
244
- :param k: number of labels to return (default=1)
245
- :param threshold: Hamming distance cutoff (default=self._th)
246
- """
247
- if threshold is None:
248
- threshold = self._th
249
-
250
- # compute target hash
251
- if isinstance(target, Image.Image):
252
- img = target.convert("L")
253
- thash = self._hf(img)
254
- elif isinstance(target, (str | Path)):
255
- img = Image.open(target).convert("L")
256
- thash = self._hf(img)
257
- else:
258
- thash = int(target)
259
-
260
- if self._bk_root is None:
261
- return []
262
-
263
- # find all (hash,dist) within threshold
264
- matches = self._bk_root.query(thash, threshold, self._hd)
265
-
266
- # collapse to one best dist per label
267
- best_per_label: dict[str, float] = {}
268
- h2l = self._hash_to_labels
269
- for h, dist in matches:
270
- for lbl in h2l.get(h, ()):
271
- score = 1.0 - dist / threshold
272
- prev = best_per_label.get(lbl)
273
- if prev is None or score > prev:
274
- best_per_label[lbl] = score
275
-
276
- top_k = heapq.nsmallest(k, best_per_label.items(), key=lambda x: x[1])
277
- return top_k
278
-
279
-
280
- img_hash_store = ImageHashStore()
@@ -1,103 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.fontocr.hash_utils
4
- -----------------------------------------
5
-
6
- Utilities for image perceptual hashing and comparison.
7
-
8
- Implements a perceptual hash (pHash) based on DCT, following the method
9
- described in:
10
- https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
11
-
12
- Provides:
13
- - pHash computation via DCT and median thresholding
14
- - Integer hash representation
15
- - Fast Hamming distance between hashes
16
- """
17
-
18
- import numpy as np
19
- from numpy.typing import NDArray
20
- from PIL import Image
21
- from scipy.fft import dct as dct_1d
22
-
23
- ANTIALIAS = Image.Resampling.LANCZOS
24
- HASH_SIZE = 10 # default is 8
25
- HASH_DISTANCE_THRESHOLD = 5
26
-
27
-
28
- def hash_to_int(hash_array: NDArray[np.bool_]) -> int:
29
- """
30
- Convert a boolean hash array to an integer.
31
-
32
- :param hash_array: A binary array (dtype=bool) from a hash function.
33
- :type hash_array: np.ndarray
34
- :return: Integer representation of the binary hash.
35
- :rtype: int
36
- """
37
- result = 0
38
- for bit in hash_array:
39
- result = (result << 1) | int(bit)
40
- return result
41
-
42
-
43
- def fast_hamming_distance(hash_1: int, hash_2: int) -> int:
44
- """
45
- Compute the Hamming distance between two integer-based image hashes.
46
-
47
- Uses bitwise XOR and bit count for fast comparison.
48
-
49
- :param hash_1: First image hash (as integer).
50
- :type hash_1: int
51
- :param hash_2: Second image hash (as integer).
52
- :type hash_2: int
53
- :return: Number of differing bits between the two hashes.
54
- :rtype: int
55
- """
56
- x = hash_1 ^ hash_2
57
- count = 0
58
- while x:
59
- x &= x - 1
60
- count += 1
61
- return count
62
-
63
-
64
- def _threshold_and_pack(dct_low: NDArray[np.float64]) -> int:
65
- """
66
- Convert a low-frequency DCT matrix into a binary hash.
67
-
68
- Compares each element to the median, builds a boolean mask,
69
- then packs it into an integer.
70
- """
71
- med = np.median(dct_low)
72
- diff = dct_low > med
73
- return hash_to_int(diff.flatten())
74
-
75
-
76
- def phash(
77
- image: Image.Image, hash_size: int = HASH_SIZE, highfreq_factor: int = 4
78
- ) -> int:
79
- """
80
- Compute the perceptual hash (pHash) of an image.
81
-
82
- This method applies a Discrete Cosine Transform (DCT) to extract
83
- low-frequency features, then compares them to the median to create
84
- a binary fingerprint of the image.
85
-
86
- :param image: The input image.
87
- :type image: PIL.Image.Image
88
- :param hash_size: Size of the resulting hash (NxN).
89
- :type hash_size: int
90
- :param highfreq_factor: Multiplier for the image resize to preserve detail.
91
- :type highfreq_factor: int
92
- :return: Integer representation of the perceptual hash.
93
- :rtype: int
94
- """
95
- if hash_size < 2:
96
- raise ValueError("Hash size must be greater than or equal to 2")
97
-
98
- img_size = hash_size * highfreq_factor
99
- image = image.convert("L").resize((img_size, img_size), ANTIALIAS)
100
- pixels = np.asarray(image)
101
- dct = dct_1d(dct_1d(pixels, axis=0, norm="ortho"), axis=1, norm="ortho")
102
- dctlowfreq = dct[:hash_size, :hash_size]
103
- return _threshold_and_pack(dctlowfreq)
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.fontocr.model_loader
4
- -------------------------------------------
5
-
6
- Utility functions for managing pre-trained model downloads.
7
-
8
- Currently supports:
9
- - Character recognition model for single Chinese character inference
10
- """
11
-
12
- from pathlib import Path
13
-
14
- from huggingface_hub import hf_hub_download
15
- from huggingface_hub.errors import LocalEntryNotFoundError
16
-
17
- from novel_downloader.utils.constants import (
18
- MODEL_CACHE_DIR,
19
- REC_CHAR_MODEL_FILES,
20
- REC_CHAR_MODEL_REPO,
21
- REC_CHAR_VECTOR_FILES,
22
- )
23
-
24
-
25
- def get_rec_chinese_char_model_dir(version: str = "v1.0") -> Path:
26
- """
27
- Ensure model files are downloaded, return the directory path.
28
- """
29
- model_dir = MODEL_CACHE_DIR / "rec_chinese_char"
30
-
31
- model_dir.mkdir(parents=True, exist_ok=True)
32
-
33
- for fname in REC_CHAR_MODEL_FILES:
34
- try:
35
- hf_hub_download(
36
- repo_id=REC_CHAR_MODEL_REPO,
37
- filename=fname,
38
- revision=version,
39
- local_dir=model_dir,
40
- )
41
- except LocalEntryNotFoundError as err:
42
- raise RuntimeError(
43
- f"[model] Missing model file '{fname}' and no internet connection."
44
- ) from err
45
- return model_dir
46
-
47
-
48
- def get_rec_char_vector_dir(version: str = "v1.0") -> Path:
49
- """
50
- Ensure vector files are downloaded into a 'vector' subfolder under model directory.
51
- Return the directory path.
52
- """
53
- vector_dir = MODEL_CACHE_DIR / "rec_chinese_char"
54
- vector_dir.mkdir(parents=True, exist_ok=True)
55
-
56
- for fname in REC_CHAR_VECTOR_FILES:
57
- try:
58
- hf_hub_download(
59
- repo_id=REC_CHAR_MODEL_REPO,
60
- filename=fname,
61
- revision=version,
62
- local_dir=vector_dir,
63
- )
64
- except LocalEntryNotFoundError as err:
65
- raise RuntimeError(
66
- f"[vector] Missing vector file '{fname}' and no internet connection."
67
- ) from err
68
-
69
- return vector_dir