novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,279 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.hash_store
4
- ---------------------------------
5
-
6
- Manage a small collection of image perceptual hashes and their labels.
7
- Supports loading/saving to .json or .npy, and basic CRUD + search.
8
- """
9
-
10
- import heapq
11
- import json
12
- import logging
13
- from collections.abc import Callable
14
- from pathlib import Path
15
-
16
- from PIL import Image
17
-
18
- from .constants import HASH_STORE_FILE
19
- from .hash_utils import HASH_DISTANCE_THRESHOLD, fast_hamming_distance, phash
20
-
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- class _BKNode:
25
- """
26
- A node in a Burkhard-Keller tree (BK-Tree) for distance search.
27
- Stores one value and a dict of children keyed by distance.
28
- """
29
-
30
- __slots__ = ("value", "children")
31
-
32
- def __init__(self, value: int):
33
- self.value = value
34
- self.children: dict[int, _BKNode] = {}
35
-
36
- def add(self, h: int, dist_fn: Callable[[int, int], int]) -> None:
37
- d = dist_fn(h, self.value)
38
- child = self.children.get(d)
39
- if child is not None:
40
- child.add(h, dist_fn)
41
- else:
42
- self.children[d] = _BKNode(h)
43
-
44
- def query(
45
- self,
46
- target: int,
47
- threshold: int,
48
- dist_fn: Callable[[int, int], int],
49
- ) -> list[tuple[int, int]]:
50
- """
51
- Recursively collect (value, dist) pairs within threshold.
52
- """
53
- d0 = dist_fn(target, self.value)
54
- matches: list[tuple[int, int]] = []
55
- if d0 <= threshold:
56
- matches.append((self.value, d0))
57
- # Only children whose edge-dist \in [d0-threshold, d0+threshold]
58
- lower, upper = d0 - threshold, d0 + threshold
59
- for edge, child in self.children.items():
60
- if lower <= edge <= upper:
61
- matches.extend(child.query(target, threshold, dist_fn))
62
- return matches
63
-
64
-
65
- class ImageHashStore:
66
- """
67
- Store and manage image hashes grouped by label, with a BK-Tree index.
68
-
69
- :param path: file path for persistence (".json" or ".npy")
70
- :param auto_save: if True, every modification automatically calls save()
71
- :param hash_func: function to compute hash from PIL.Image
72
- :param ham_dist: function to compute Hamming distance between two hashes
73
- """
74
-
75
- def __init__(
76
- self,
77
- path: str | Path = HASH_STORE_FILE,
78
- auto_save: bool = False,
79
- hash_func: Callable[[Image.Image], int] = phash,
80
- ham_dist: Callable[[int, int], int] = fast_hamming_distance,
81
- threshold: int = HASH_DISTANCE_THRESHOLD,
82
- ) -> None:
83
- self._path = Path(path)
84
- self._auto = auto_save
85
- self._hf = hash_func
86
- self._hd = ham_dist
87
- self._th = threshold
88
-
89
- # label -> set of hashes
90
- self._hash: dict[str, set[int]] = {}
91
- # hash -> list of labels (for reverse lookup)
92
- self._hash_to_labels: dict[int, list[str]] = {}
93
- # root of BK-Tree (or None if empty)
94
- self._bk_root: _BKNode | None = None
95
-
96
- self.load()
97
-
98
- def load(self) -> None:
99
- """Load store from disk and rebuild BK-Tree index."""
100
- if not self._path.exists():
101
- self._hash.clear()
102
- logger.debug(
103
- "[ImageHashStore] No file found at %s, starting empty.", self._path
104
- )
105
- return
106
-
107
- txt = self._path.read_text(encoding="utf-8")
108
- obj = json.loads(txt) or {}
109
- self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
110
-
111
- # rebuild reverse map and BK-Tree
112
- self._hash_to_labels.clear()
113
- for lbl, hs in self._hash.items():
114
- for h in hs:
115
- self._hash_to_labels.setdefault(h, []).append(lbl)
116
- logger.debug(
117
- "[ImageHashStore] Loaded hash store from %s with %d hashes",
118
- self._path,
119
- sum(len(v) for v in self._hash.values()),
120
- )
121
-
122
- self._build_index()
123
-
124
- def _build_index(self) -> None:
125
- """Construct a BK-Tree over all stored hashes."""
126
- self._bk_root = None
127
- for h in self._hash_to_labels:
128
- if self._bk_root is None:
129
- self._bk_root = _BKNode(h)
130
- else:
131
- self._bk_root.add(h, self._hd)
132
- logger.debug(
133
- "[ImageHashStore] BK-tree index built with %d unique hashes",
134
- len(self._hash_to_labels),
135
- )
136
-
137
- def save(self) -> None:
138
- """Persist current store to disk."""
139
- self._path.parent.mkdir(parents=True, exist_ok=True)
140
- data = {lbl: list(s) for lbl, s in self._hash.items()}
141
- txt = json.dumps(data, ensure_ascii=False, indent=2)
142
- self._path.write_text(txt, encoding="utf-8")
143
- logger.debug("[ImageHashStore] Saved hash store to %s", self._path)
144
-
145
- def _maybe_save(self) -> None:
146
- if self._auto:
147
- self.save()
148
-
149
- def add_image(self, img_path: str | Path, label: str) -> int:
150
- """
151
- Compute hash for the given image and add it under `label`.
152
- Updates BK-Tree index incrementally.
153
- """
154
- img = Image.open(img_path).convert("L")
155
- h = self._hf(img)
156
- self._hash.setdefault(label, set()).add(h)
157
- self._hash_to_labels.setdefault(h, []).append(label)
158
- # insert into BK-Tree
159
- if self._bk_root is None:
160
- self._bk_root = _BKNode(h)
161
- else:
162
- self._bk_root.add(h, self._hd)
163
- logger.debug("[ImageHashStore] Added hash %d under label '%s'", h, label)
164
- self._maybe_save()
165
- return h
166
-
167
- def add_from_map(self, map_path: str | Path) -> None:
168
- """
169
- Load a JSON file of the form { "image_path": "label", ... }
170
- and add each entry.
171
- """
172
- map_path = Path(map_path)
173
- text = map_path.read_text(encoding="utf-8")
174
- mapping = json.loads(text)
175
- for rel_img_path, lbl in mapping.items():
176
- img_path = (map_path.parent / rel_img_path).resolve()
177
- try:
178
- self.add_image(img_path, lbl)
179
- except Exception as e:
180
- logger.warning(
181
- "[ImageHashStore] Failed to add image '%s': %s", img_path, str(e)
182
- )
183
- continue
184
-
185
- def labels(self) -> list[str]:
186
- """Return a sorted list of all labels in the store."""
187
- return sorted(self._hash.keys())
188
-
189
- def hashes(self, label: str) -> set[int]:
190
- """Return the set of hashes for a given `label` (empty set if none)."""
191
- return set(self._hash.get(label, ()))
192
-
193
- def remove_label(self, label: str) -> None:
194
- """Remove all hashes associated with `label`."""
195
- if label in self._hash:
196
- del self._hash[label]
197
- logger.debug("[ImageHashStore] Removed label '%s'", label)
198
- self._maybe_save()
199
-
200
- def remove_hash(self, label: str, this: int | str | Path) -> bool:
201
- """
202
- Remove a specific hash under `label`.
203
- `this` can be:
204
- - an integer hash
205
- - a Path (image file) -> will compute its hash then remove
206
- Returns True if something was removed.
207
- """
208
- if label not in self._hash:
209
- return False
210
-
211
- h = None
212
- if isinstance(this, (str | Path)):
213
- try:
214
- img = Image.open(this).convert("L")
215
- h = self._hf(img)
216
- except Exception as e:
217
- logger.warning(
218
- "[ImageHashStore] Could not open image '%s': %s", this, str(e)
219
- )
220
- return False
221
- else:
222
- h = int(this)
223
-
224
- if h in self._hash[label]:
225
- self._hash[label].remove(h)
226
- logger.debug("[ImageHashStore] Removed hash %d from label '%s'", h, label)
227
- self._maybe_save()
228
- return True
229
- return False
230
-
231
- def query(
232
- self,
233
- target: int | str | Path | Image.Image,
234
- k: int = 1,
235
- threshold: int | None = None,
236
- ) -> list[tuple[str, float]]:
237
- """
238
- Find up to `k` distinct labels whose stored hashes are most similar
239
- to `target` within `threshold`. Returns a list of (label, score),
240
- sorted by descending score. Each label appears at most once.
241
-
242
- :param target: Image path / int hash / PIL.Image
243
- :param k: number of labels to return (default=1)
244
- :param threshold: Hamming distance cutoff (default=self._th)
245
- """
246
- if threshold is None:
247
- threshold = self._th
248
-
249
- # compute target hash
250
- if isinstance(target, Image.Image):
251
- img = target.convert("L")
252
- thash = self._hf(img)
253
- elif isinstance(target, (str | Path)):
254
- img = Image.open(target).convert("L")
255
- thash = self._hf(img)
256
- else:
257
- thash = int(target)
258
-
259
- if self._bk_root is None:
260
- return []
261
-
262
- # find all (hash,dist) within threshold
263
- matches = self._bk_root.query(thash, threshold, self._hd)
264
-
265
- # collapse to one best dist per label
266
- best_per_label: dict[str, float] = {}
267
- h2l = self._hash_to_labels
268
- for h, dist in matches:
269
- for lbl in h2l.get(h, ()):
270
- score = 1.0 - dist / threshold
271
- prev = best_per_label.get(lbl)
272
- if prev is None or score > prev:
273
- best_per_label[lbl] = score
274
-
275
- top_k = heapq.nsmallest(k, best_per_label.items(), key=lambda x: x[1])
276
- return top_k
277
-
278
-
279
- img_hash_store = ImageHashStore()
@@ -1,103 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.hash_utils
4
- ---------------------------------
5
-
6
- Utilities for image perceptual hashing and comparison.
7
-
8
- Implements a perceptual hash (pHash) based on DCT, following the method
9
- described in:
10
- https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
11
-
12
- Provides:
13
- - pHash computation via DCT and median thresholding
14
- - Integer hash representation
15
- - Fast Hamming distance between hashes
16
- """
17
-
18
- import numpy as np
19
- from numpy.typing import NDArray
20
- from PIL import Image
21
- from scipy.fft import dct as dct_1d
22
-
23
- ANTIALIAS = Image.Resampling.LANCZOS
24
- HASH_SIZE = 10 # default is 8
25
- HASH_DISTANCE_THRESHOLD = 5
26
-
27
-
28
- def hash_to_int(hash_array: NDArray[np.bool_]) -> int:
29
- """
30
- Convert a boolean hash array to an integer.
31
-
32
- :param hash_array: A binary array (dtype=bool) from a hash function.
33
- :type hash_array: np.ndarray
34
- :return: Integer representation of the binary hash.
35
- :rtype: int
36
- """
37
- result = 0
38
- for bit in hash_array:
39
- result = (result << 1) | int(bit)
40
- return result
41
-
42
-
43
- def fast_hamming_distance(hash_1: int, hash_2: int) -> int:
44
- """
45
- Compute the Hamming distance between two integer-based image hashes.
46
-
47
- Uses bitwise XOR and bit count for fast comparison.
48
-
49
- :param hash_1: First image hash (as integer).
50
- :type hash_1: int
51
- :param hash_2: Second image hash (as integer).
52
- :type hash_2: int
53
- :return: Number of differing bits between the two hashes.
54
- :rtype: int
55
- """
56
- x = hash_1 ^ hash_2
57
- count = 0
58
- while x:
59
- x &= x - 1
60
- count += 1
61
- return count
62
-
63
-
64
- def _threshold_and_pack(dct_low: NDArray[np.float64]) -> int:
65
- """
66
- Convert a low-frequency DCT matrix into a binary hash.
67
-
68
- Compares each element to the median, builds a boolean mask,
69
- then packs it into an integer.
70
- """
71
- med = np.median(dct_low)
72
- diff = dct_low > med
73
- return hash_to_int(diff.flatten())
74
-
75
-
76
- def phash(
77
- image: Image.Image, hash_size: int = HASH_SIZE, highfreq_factor: int = 4
78
- ) -> int:
79
- """
80
- Compute the perceptual hash (pHash) of an image.
81
-
82
- This method applies a Discrete Cosine Transform (DCT) to extract
83
- low-frequency features, then compares them to the median to create
84
- a binary fingerprint of the image.
85
-
86
- :param image: The input image.
87
- :type image: PIL.Image.Image
88
- :param hash_size: Size of the resulting hash (NxN).
89
- :type hash_size: int
90
- :param highfreq_factor: Multiplier for the image resize to preserve detail.
91
- :type highfreq_factor: int
92
- :return: Integer representation of the perceptual hash.
93
- :rtype: int
94
- """
95
- if hash_size < 2:
96
- raise ValueError("Hash size must be greater than or equal to 2")
97
-
98
- img_size = hash_size * highfreq_factor
99
- image = image.convert("L").resize((img_size, img_size), ANTIALIAS)
100
- pixels = np.asarray(image)
101
- dct = dct_1d(dct_1d(pixels, axis=0, norm="ortho"), axis=1, norm="ortho")
102
- dctlowfreq = dct[:hash_size, :hash_size]
103
- return _threshold_and_pack(dctlowfreq)
@@ -1,46 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.text_utils.chapter_formatting
4
- ----------------------------------------------------
5
-
6
- Format chapter content with title, paragraph blocks, and optional author notes.
7
- """
8
-
9
- import re
10
-
11
- _IMG_TAG_RE = re.compile(r"<img[^>]*>")
12
-
13
-
14
- def format_chapter(title: str, paragraphs: str, author_say: str | None = None) -> str:
15
- """
16
- Build a formatted chapter string with title, paragraphs, and optional author note.
17
-
18
- :param title: The chapter title.
19
- :param paragraphs: Raw multi-line string; lines are treated as paragraphs.
20
- :param author_say: Optional author comment to append at the end.
21
- :return: A single string where title, paragraphs, and author note
22
- are separated by blank lines.
23
- """
24
- parts: list[str] = [title.strip()]
25
-
26
- # add each nonempty paragraph line
27
- paragraphs = _IMG_TAG_RE.sub("", paragraphs)
28
- for ln in paragraphs.splitlines():
29
- line = ln.strip()
30
- if line:
31
- parts.append(line)
32
-
33
- # add author_say lines if present
34
- if author_say:
35
- author_lines = [ln.strip() for ln in author_say.splitlines() if ln.strip()]
36
- if author_lines:
37
- parts.append("---")
38
- parts.append("作者说:")
39
- parts.extend(author_lines)
40
-
41
- return "\n\n".join(parts)
42
-
43
-
44
- __all__ = [
45
- "format_chapter",
46
- ]
@@ -1,28 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.text_utils.font_mapping
4
- ----------------------------------------------
5
-
6
- Utility for decoding obfuscated text by applying character-level font mapping.
7
-
8
- This is commonly used to reverse font-based obfuscation in scraped content,
9
- where characters are visually disguised via custom font glyphs but can be
10
- recovered using a known mapping.
11
- """
12
-
13
-
14
- def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
15
- """
16
- Replace each character in `text` using `font_map`,
17
- leaving unmapped characters unchanged.
18
-
19
- :param text: The input string, possibly containing obfuscated font chars.
20
- :param font_map: A dict mapping obfuscated chars to real chars.
21
- :return: The de-obfuscated text.
22
- """
23
- return "".join(font_map.get(ch, ch) for ch in text)
24
-
25
-
26
- __all__ = [
27
- "apply_font_mapping",
28
- ]
@@ -1,107 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.utils.text_utils.text_cleaning
4
- -----------------------------------------------
5
-
6
- Tools for detecting and removing promotional or ad-like content from text.
7
- """
8
-
9
- import math
10
- import re
11
-
12
- from novel_downloader.utils.file_utils.io import load_blacklisted_words
13
-
14
- # --- Constants & Precompiled Patterns ---
15
-
16
- _BLACKLISTED_WORDS = load_blacklisted_words()
17
-
18
- _BRACKET_PATTERN = re.compile(r"[\((](.*?)[\))]")
19
- _K_PROMO_PATTERN = re.compile(r"\b\d{1,4}k\b", re.IGNORECASE)
20
-
21
-
22
- def clean_chapter_title(title: str) -> str:
23
- """
24
- Remove bracketed promotional content from a chapter title.
25
-
26
- If any blacklisted word appears inside parentheses (Chinese or English),
27
- the entire bracketed section is stripped.
28
-
29
- :param title: Original title, possibly containing ad text in brackets.
30
- :return: Title with offending bracketed sections removed.
31
- """
32
- cleaned = title
33
- for content in _BRACKET_PATTERN.findall(title):
34
- if any(bw in content for bw in _BLACKLISTED_WORDS):
35
- cleaned = re.sub(rf"[\((]{re.escape(content)}[\))]", "", cleaned)
36
- return cleaned.strip()
37
-
38
-
39
- def is_promotional_line(line: str) -> bool:
40
- """
41
- Check if a line of text likely contains promotional or ad-like content.
42
-
43
- :param line: A single line of text.
44
- :return: True if it contains promo keywords or a '###k' vote count pattern.
45
- """
46
- low = line.lower()
47
- if any(kw in low for kw in _BLACKLISTED_WORDS):
48
- return True
49
- if _K_PROMO_PATTERN.search(low):
50
- return True
51
- return False
52
-
53
-
54
- def content_prefix(
55
- text: str,
56
- n: int,
57
- ignore_chars: set[str] | None = None,
58
- ) -> str:
59
- """
60
- Return the prefix of `text` containing the first `n` non-ignored characters.
61
-
62
- :param text: The full input string.
63
- :param n: Number of content characters to include.
64
- :param ignore_chars: Characters to ignore when counting content.
65
- :return: Truncated string preserving original whitespace and line breaks.
66
- """
67
- ignore = ignore_chars or set()
68
- cnt = 0
69
-
70
- for i, ch in enumerate(text):
71
- if ch not in ignore:
72
- cnt += 1
73
- if cnt >= n:
74
- return text[: i + 1]
75
-
76
- return text
77
-
78
-
79
- def truncate_half_lines(text: str) -> str:
80
- """
81
- Keep the first half of the lines (rounded up), preserving line breaks.
82
-
83
- :param text: Full input text
84
- :return: Truncated text with first half of lines
85
- """
86
- lines = text.splitlines()
87
- non_empty_lines = [line for line in lines if line.strip()]
88
- keep_count = math.ceil(len(non_empty_lines) / 2)
89
-
90
- result_lines = []
91
- count = 0
92
- for line in lines:
93
- result_lines.append(line)
94
- if line.strip():
95
- count += 1
96
- if count >= keep_count:
97
- break
98
-
99
- return "\n".join(result_lines)
100
-
101
-
102
- __all__ = [
103
- "clean_chapter_title",
104
- "is_promotional_line",
105
- "content_prefix",
106
- "truncate_half_lines",
107
- ]