novel-downloader 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/clean.py +97 -78
  3. novel_downloader/cli/config.py +177 -0
  4. novel_downloader/cli/download.py +132 -87
  5. novel_downloader/cli/export.py +77 -0
  6. novel_downloader/cli/main.py +21 -28
  7. novel_downloader/config/__init__.py +1 -25
  8. novel_downloader/config/adapter.py +32 -31
  9. novel_downloader/config/loader.py +3 -3
  10. novel_downloader/config/site_rules.py +1 -2
  11. novel_downloader/core/__init__.py +3 -6
  12. novel_downloader/core/downloaders/__init__.py +10 -13
  13. novel_downloader/core/downloaders/base.py +233 -0
  14. novel_downloader/core/downloaders/biquge.py +27 -0
  15. novel_downloader/core/downloaders/common.py +414 -0
  16. novel_downloader/core/downloaders/esjzone.py +27 -0
  17. novel_downloader/core/downloaders/linovelib.py +27 -0
  18. novel_downloader/core/downloaders/qianbi.py +27 -0
  19. novel_downloader/core/downloaders/qidian.py +352 -0
  20. novel_downloader/core/downloaders/sfacg.py +27 -0
  21. novel_downloader/core/downloaders/yamibo.py +27 -0
  22. novel_downloader/core/exporters/__init__.py +37 -0
  23. novel_downloader/core/{savers → exporters}/base.py +73 -44
  24. novel_downloader/core/exporters/biquge.py +25 -0
  25. novel_downloader/core/exporters/common/__init__.py +12 -0
  26. novel_downloader/core/{savers → exporters}/common/epub.py +40 -52
  27. novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +36 -39
  28. novel_downloader/core/{savers → exporters}/common/txt.py +20 -24
  29. novel_downloader/core/exporters/epub_utils/__init__.py +40 -0
  30. novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -1
  31. novel_downloader/core/exporters/epub_utils/image_loader.py +131 -0
  32. novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -3
  33. novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +49 -2
  34. novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -1
  35. novel_downloader/core/exporters/esjzone.py +25 -0
  36. novel_downloader/core/exporters/linovelib/__init__.py +10 -0
  37. novel_downloader/core/exporters/linovelib/epub.py +449 -0
  38. novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
  39. novel_downloader/core/exporters/linovelib/txt.py +129 -0
  40. novel_downloader/core/exporters/qianbi.py +25 -0
  41. novel_downloader/core/{savers → exporters}/qidian.py +8 -8
  42. novel_downloader/core/exporters/sfacg.py +25 -0
  43. novel_downloader/core/exporters/yamibo.py +25 -0
  44. novel_downloader/core/factory/__init__.py +5 -17
  45. novel_downloader/core/factory/downloader.py +24 -126
  46. novel_downloader/core/factory/exporter.py +58 -0
  47. novel_downloader/core/factory/fetcher.py +96 -0
  48. novel_downloader/core/factory/parser.py +17 -12
  49. novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
  50. novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
  51. novel_downloader/core/fetchers/base/browser.py +383 -0
  52. novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
  53. novel_downloader/core/fetchers/base/session.py +419 -0
  54. novel_downloader/core/fetchers/biquge/__init__.py +14 -0
  55. novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
  56. novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
  57. novel_downloader/core/fetchers/common/__init__.py +14 -0
  58. novel_downloader/core/fetchers/common/browser.py +79 -0
  59. novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
  60. novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
  61. novel_downloader/core/fetchers/esjzone/browser.py +202 -0
  62. novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
  63. novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
  64. novel_downloader/core/fetchers/linovelib/browser.py +178 -0
  65. novel_downloader/core/fetchers/linovelib/session.py +178 -0
  66. novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
  67. novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
  68. novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
  69. novel_downloader/core/fetchers/qidian/__init__.py +14 -0
  70. novel_downloader/core/fetchers/qidian/browser.py +266 -0
  71. novel_downloader/core/fetchers/qidian/session.py +326 -0
  72. novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
  73. novel_downloader/core/fetchers/sfacg/browser.py +189 -0
  74. novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
  75. novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
  76. novel_downloader/core/fetchers/yamibo/browser.py +229 -0
  77. novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
  78. novel_downloader/core/interfaces/__init__.py +8 -12
  79. novel_downloader/core/interfaces/downloader.py +54 -0
  80. novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
  81. novel_downloader/core/interfaces/fetcher.py +162 -0
  82. novel_downloader/core/interfaces/parser.py +6 -7
  83. novel_downloader/core/parsers/__init__.py +5 -6
  84. novel_downloader/core/parsers/base.py +9 -13
  85. novel_downloader/core/parsers/biquge/main_parser.py +12 -13
  86. novel_downloader/core/parsers/common/helper.py +3 -3
  87. novel_downloader/core/parsers/common/main_parser.py +39 -34
  88. novel_downloader/core/parsers/esjzone/main_parser.py +24 -17
  89. novel_downloader/core/parsers/linovelib/__init__.py +10 -0
  90. novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
  91. novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
  92. novel_downloader/core/parsers/qidian/__init__.py +2 -11
  93. novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
  94. novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
  95. novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
  96. novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
  97. novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
  98. novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
  99. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
  100. novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
  101. novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
  102. novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
  103. novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
  104. novel_downloader/locales/en.json +18 -2
  105. novel_downloader/locales/zh.json +18 -2
  106. novel_downloader/models/__init__.py +64 -0
  107. novel_downloader/models/browser.py +21 -0
  108. novel_downloader/models/chapter.py +25 -0
  109. novel_downloader/models/config.py +100 -0
  110. novel_downloader/models/login.py +20 -0
  111. novel_downloader/models/site_rules.py +99 -0
  112. novel_downloader/models/tasks.py +33 -0
  113. novel_downloader/models/types.py +15 -0
  114. novel_downloader/resources/config/settings.toml +31 -25
  115. novel_downloader/resources/json/linovelib_font_map.json +3573 -0
  116. novel_downloader/tui/__init__.py +7 -0
  117. novel_downloader/tui/app.py +32 -0
  118. novel_downloader/tui/main.py +17 -0
  119. novel_downloader/tui/screens/__init__.py +14 -0
  120. novel_downloader/tui/screens/home.py +191 -0
  121. novel_downloader/tui/screens/login.py +74 -0
  122. novel_downloader/tui/styles/home_layout.tcss +79 -0
  123. novel_downloader/tui/widgets/richlog_handler.py +24 -0
  124. novel_downloader/utils/__init__.py +6 -0
  125. novel_downloader/utils/chapter_storage.py +25 -38
  126. novel_downloader/utils/constants.py +15 -5
  127. novel_downloader/utils/cookies.py +66 -0
  128. novel_downloader/utils/crypto_utils.py +1 -74
  129. novel_downloader/utils/file_utils/io.py +1 -1
  130. novel_downloader/utils/fontocr/ocr_v1.py +2 -1
  131. novel_downloader/utils/fontocr/ocr_v2.py +2 -2
  132. novel_downloader/utils/hash_store.py +10 -18
  133. novel_downloader/utils/hash_utils.py +3 -2
  134. novel_downloader/utils/logger.py +2 -3
  135. novel_downloader/utils/network.py +53 -39
  136. novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
  137. novel_downloader/utils/text_utils/font_mapping.py +1 -1
  138. novel_downloader/utils/text_utils/text_cleaning.py +1 -1
  139. novel_downloader/utils/time_utils/datetime_utils.py +3 -3
  140. novel_downloader/utils/time_utils/sleep_utils.py +3 -3
  141. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +72 -38
  142. novel_downloader-1.4.0.dist-info/RECORD +170 -0
  143. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
  144. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
  145. novel_downloader/cli/interactive.py +0 -66
  146. novel_downloader/cli/settings.py +0 -177
  147. novel_downloader/config/models.py +0 -187
  148. novel_downloader/core/downloaders/base/__init__.py +0 -14
  149. novel_downloader/core/downloaders/base/base_async.py +0 -153
  150. novel_downloader/core/downloaders/base/base_sync.py +0 -208
  151. novel_downloader/core/downloaders/biquge/__init__.py +0 -14
  152. novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
  153. novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
  154. novel_downloader/core/downloaders/common/__init__.py +0 -14
  155. novel_downloader/core/downloaders/common/common_async.py +0 -218
  156. novel_downloader/core/downloaders/common/common_sync.py +0 -210
  157. novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
  158. novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
  159. novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
  160. novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
  161. novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
  162. novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
  163. novel_downloader/core/downloaders/qidian/__init__.py +0 -10
  164. novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -227
  165. novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
  166. novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
  167. novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
  168. novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
  169. novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
  170. novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
  171. novel_downloader/core/factory/requester.py +0 -144
  172. novel_downloader/core/factory/saver.py +0 -56
  173. novel_downloader/core/interfaces/async_downloader.py +0 -36
  174. novel_downloader/core/interfaces/async_requester.py +0 -84
  175. novel_downloader/core/interfaces/sync_downloader.py +0 -36
  176. novel_downloader/core/interfaces/sync_requester.py +0 -82
  177. novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
  178. novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
  179. novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
  180. novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
  181. novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
  182. novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
  183. novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
  184. novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
  185. novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
  186. novel_downloader/core/requesters/base/async_session.py +0 -410
  187. novel_downloader/core/requesters/base/browser.py +0 -337
  188. novel_downloader/core/requesters/base/session.py +0 -378
  189. novel_downloader/core/requesters/biquge/__init__.py +0 -14
  190. novel_downloader/core/requesters/common/__init__.py +0 -17
  191. novel_downloader/core/requesters/common/session.py +0 -113
  192. novel_downloader/core/requesters/esjzone/__init__.py +0 -13
  193. novel_downloader/core/requesters/esjzone/session.py +0 -235
  194. novel_downloader/core/requesters/qianbi/__init__.py +0 -13
  195. novel_downloader/core/requesters/qidian/__init__.py +0 -21
  196. novel_downloader/core/requesters/qidian/broswer.py +0 -307
  197. novel_downloader/core/requesters/qidian/session.py +0 -290
  198. novel_downloader/core/requesters/sfacg/__init__.py +0 -13
  199. novel_downloader/core/requesters/sfacg/session.py +0 -242
  200. novel_downloader/core/requesters/yamibo/__init__.py +0 -13
  201. novel_downloader/core/requesters/yamibo/session.py +0 -237
  202. novel_downloader/core/savers/__init__.py +0 -34
  203. novel_downloader/core/savers/biquge.py +0 -25
  204. novel_downloader/core/savers/common/__init__.py +0 -12
  205. novel_downloader/core/savers/epub_utils/__init__.py +0 -26
  206. novel_downloader/core/savers/esjzone.py +0 -25
  207. novel_downloader/core/savers/qianbi.py +0 -25
  208. novel_downloader/core/savers/sfacg.py +0 -25
  209. novel_downloader/core/savers/yamibo.py +0 -25
  210. novel_downloader/resources/config/rules.toml +0 -196
  211. novel_downloader-1.3.2.dist-info/RECORD +0 -165
  212. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
  213. {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.utils.cookies
4
+ ------------------------------
5
+
6
+ Utility for normalizing cookie input from user configuration.
7
+ """
8
+
9
+ import json
10
+ from collections.abc import Mapping
11
+ from email.utils import parsedate_to_datetime
12
+ from http.cookies import SimpleCookie
13
+ from pathlib import Path
14
+
15
+
16
+ def resolve_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
17
+ """
18
+ Parse cookies from a string or dictionary into a standard dictionary.
19
+
20
+ Supports input like:
21
+ - "key1=value1; key2=value2"
22
+ - {"key1": "value1", "key2": "value2"}
23
+
24
+ :param cookies: Cookie string or dict-like object (e.g., from config)
25
+ :return: A normalized cookie dictionary (key -> value)
26
+ :raises TypeError: If the input is neither string nor dict-like
27
+ """
28
+ if isinstance(cookies, str):
29
+ filtered = "; ".join(pair for pair in cookies.split(";") if "=" in pair)
30
+ parsed = SimpleCookie()
31
+ parsed.load(filtered)
32
+ return {k: v.value for k, v in parsed.items()}
33
+ elif isinstance(cookies, Mapping):
34
+ return {str(k).strip(): str(v).strip() for k, v in cookies.items()}
35
+ raise TypeError("Unsupported cookie format: must be str or dict-like")
36
+
37
+
38
+ def parse_cookie_expires(value: str | None) -> int:
39
+ if not value:
40
+ return -1
41
+ try:
42
+ return int(value)
43
+ except (ValueError, TypeError):
44
+ try:
45
+ dt = parsedate_to_datetime(value)
46
+ return int(dt.timestamp())
47
+ except Exception:
48
+ return -1
49
+
50
+
51
+ def find_cookie_value(state_files: list[Path], key: str) -> str:
52
+ for state_file in state_files:
53
+ try:
54
+ with state_file.open("r", encoding="utf-8") as f:
55
+ data = json.load(f)
56
+ except Exception:
57
+ continue
58
+
59
+ cookies = data.get("cookies", [])
60
+ for cookie in cookies:
61
+ if cookie.get("name") != key:
62
+ continue
63
+ value = cookie.get("value")
64
+ if isinstance(value, str):
65
+ return value
66
+ return ""
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env python3
1
2
  """
2
3
  novel_downloader.utils.crypto_utils
3
4
  -----------------------------------
@@ -8,11 +9,6 @@ Generic cryptographic utilities
8
9
  from __future__ import annotations
9
10
 
10
11
  import base64
11
- import hashlib
12
- import json
13
- import random
14
- import time
15
- from typing import Any
16
12
 
17
13
 
18
14
  def rc4_crypt(
@@ -73,72 +69,3 @@ def rc4_crypt(
73
69
  return plain_bytes.decode(encoding, errors="replace")
74
70
 
75
71
  raise ValueError("Mode must be 'encrypt' or 'decrypt'.")
76
-
77
-
78
- def _get_key() -> str:
79
- encoded = "Lj1qYxMuaXBjMg=="
80
- decoded = base64.b64decode(encoded)
81
- key = "".join([chr(b ^ 0x5A) for b in decoded])
82
- return key
83
-
84
-
85
- def _d(b64str: str) -> str:
86
- return base64.b64decode(b64str).decode()
87
-
88
-
89
- def patch_qd_payload_token(
90
- enc_token: str,
91
- new_uri: str,
92
- *,
93
- key: str = "",
94
- ) -> str:
95
- """
96
- Patch a timestamp-bearing token with fresh timing and checksum info.
97
-
98
- :param enc_token: Encrypted token string from a live request.
99
- :type enc_token: str
100
- :param new_uri: URI used in checksum generation.
101
- :type new_uri: str
102
- :param key: RC4 key extracted from front-end JavaScript (optional).
103
- :type key: str, optional
104
-
105
- :return: Updated token with new timing and checksum values.
106
- :rtype: str
107
- """
108
- if not key:
109
- key = _get_key()
110
-
111
- # Step 1 - decrypt --------------------------------------------------
112
- decrypted_json: str = rc4_crypt(key, enc_token, mode="decrypt")
113
- payload: dict[str, Any] = json.loads(decrypted_json)
114
-
115
- # Step 2 - rebuild timing fields -----------------------------------
116
- loadts = int(time.time() * 1000) # ms since epoch
117
- # Simulate the JS duration: N(600, 150) pushed into [300, 1000]
118
- duration = max(300, min(1000, int(random.normalvariate(600, 150))))
119
- timestamp = loadts + duration
120
-
121
- # Step 3 - recalculate ------------------------------------
122
- fp_key = _d("ZmluZ2VycHJpbnQ=")
123
- ab_key = _d("YWJub3JtYWw=")
124
- ck_key = _d("Y2hlY2tzdW0=")
125
- lt_key = _d("bG9hZHRz")
126
- ts_key = _d("dGltZXN0YW1w")
127
-
128
- fp_val = payload.get(fp_key, "")
129
- ab_val = payload.get(ab_key, "0" * 32)
130
- comb = f"{new_uri}{loadts}{fp_val}"
131
- ck_val = hashlib.md5(comb.encode("utf-8")).hexdigest()
132
-
133
- new_payload = {
134
- lt_key: loadts,
135
- ts_key: timestamp,
136
- fp_key: fp_val,
137
- ab_key: ab_val,
138
- ck_key: ck_val,
139
- }
140
-
141
- # Step 4 - encrypt and return --------------------------------------
142
- return rc4_crypt(
143
- key, json.dumps(new_payload, separators=(",", ":")), mode="encrypt"
144
- )
@@ -103,7 +103,7 @@ def _write_file(
103
103
  tmp.write(content_to_write)
104
104
  tmp_path = Path(tmp.name)
105
105
  tmp_path.replace(path)
106
- logger.info("[file] '%s' written successfully", path)
106
+ logger.debug("[file] '%s' written successfully", path)
107
107
  return True
108
108
  except Exception as exc:
109
109
  logger.warning("[file] Error writing %r: %s", path, exc)
@@ -18,6 +18,7 @@ import paddle
18
18
  from fontTools.ttLib import TTFont
19
19
  from paddleocr import PaddleOCR
20
20
  from PIL import Image, ImageDraw, ImageFont
21
+ from PIL.Image import Transpose
21
22
 
22
23
  from novel_downloader.utils.constants import (
23
24
  REC_CHAR_MODEL_FILES,
@@ -142,7 +143,7 @@ class FontOCRV1:
142
143
  y = (size - h) // 2 - bbox[1]
143
144
  draw.text((x, y), char, fill=0, font=render_font)
144
145
  if is_reflect:
145
- img = img.transpose(Image.FLIP_LEFT_RIGHT)
146
+ img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
146
147
 
147
148
  img_np = np.array(img)
148
149
  if np.unique(img_np).size == 1:
@@ -570,7 +570,7 @@ class FontOCRV2:
570
570
  else:
571
571
  ocr_fallback = raw_ocr
572
572
 
573
- # Vecembedding scores
573
+ # Vec-embedding scores
574
574
  raw_vec: list[tuple[str, float]] | list[list[tuple[str, float]]] = (
575
575
  self.match_text_by_embedding(fallback_imgs, top_k=top_k)
576
576
  if (self.use_vec and fallback_imgs)
@@ -624,7 +624,7 @@ class FontOCRV2:
624
624
  else:
625
625
  fused_batch.append(next(fallback_iter))
626
626
 
627
- # Unwrap singleimage case
627
+ # Unwrap single-image case
628
628
  return fused_batch[0] if single else fused_batch
629
629
 
630
630
  def _chunked(self, seq: list[T], size: int) -> Generator[list[T], None, None]:
@@ -13,7 +13,6 @@ import logging
13
13
  from collections.abc import Callable
14
14
  from pathlib import Path
15
15
 
16
- import numpy as np
17
16
  from PIL import Image
18
17
 
19
18
  from .constants import HASH_STORE_FILE
@@ -100,25 +99,21 @@ class ImageHashStore:
100
99
  """Load store from disk and rebuild BK-Tree index."""
101
100
  if not self._path.exists():
102
101
  self._hash.clear()
103
- logger.info(
102
+ logger.debug(
104
103
  "[ImageHashStore] No file found at %s, starting empty.", self._path
105
104
  )
106
105
  return
107
- else:
108
- if self._path.suffix == ".npy":
109
- arr = np.load(self._path, allow_pickle=True).item()
110
- self._hash = {lbl: set(v) for lbl, v in arr.items()}
111
- else:
112
- txt = self._path.read_text(encoding="utf-8")
113
- obj = json.loads(txt) or {}
114
- self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
106
+
107
+ txt = self._path.read_text(encoding="utf-8")
108
+ obj = json.loads(txt) or {}
109
+ self._hash = {lbl: set(obj.get(lbl, [])) for lbl in obj}
115
110
 
116
111
  # rebuild reverse map and BK-Tree
117
112
  self._hash_to_labels.clear()
118
113
  for lbl, hs in self._hash.items():
119
114
  for h in hs:
120
115
  self._hash_to_labels.setdefault(h, []).append(lbl)
121
- logger.info(
116
+ logger.debug(
122
117
  "[ImageHashStore] Loaded hash store from %s with %d hashes",
123
118
  self._path,
124
119
  sum(len(v) for v in self._hash.values()),
@@ -134,7 +129,7 @@ class ImageHashStore:
134
129
  self._bk_root = _BKNode(h)
135
130
  else:
136
131
  self._bk_root.add(h, self._hd)
137
- logger.info(
132
+ logger.debug(
138
133
  "[ImageHashStore] BK-tree index built with %d unique hashes",
139
134
  len(self._hash_to_labels),
140
135
  )
@@ -143,12 +138,9 @@ class ImageHashStore:
143
138
  """Persist current store to disk."""
144
139
  self._path.parent.mkdir(parents=True, exist_ok=True)
145
140
  data = {lbl: list(s) for lbl, s in self._hash.items()}
146
- if self._path.suffix == ".npy":
147
- np.save(self._path, data)
148
- else:
149
- txt = json.dumps(data, ensure_ascii=False, indent=2)
150
- self._path.write_text(txt, encoding="utf-8")
151
- logger.info("[ImageHashStore] Saved hash store to %s", self._path)
141
+ txt = json.dumps(data, ensure_ascii=False, indent=2)
142
+ self._path.write_text(txt, encoding="utf-8")
143
+ logger.debug("[ImageHashStore] Saved hash store to %s", self._path)
152
144
 
153
145
  def _maybe_save(self) -> None:
154
146
  if self._auto:
@@ -16,6 +16,7 @@ Provides:
16
16
  """
17
17
 
18
18
  import numpy as np
19
+ from numpy.typing import NDArray
19
20
  from PIL import Image
20
21
  from scipy.fft import dct as dct_1d
21
22
 
@@ -24,7 +25,7 @@ HASH_SIZE = 10 # default is 8
24
25
  HASH_DISTANCE_THRESHOLD = 5
25
26
 
26
27
 
27
- def hash_to_int(hash_array: np.ndarray) -> int:
28
+ def hash_to_int(hash_array: NDArray[np.bool_]) -> int:
28
29
  """
29
30
  Convert a boolean hash array to an integer.
30
31
 
@@ -60,7 +61,7 @@ def fast_hamming_distance(hash_1: int, hash_2: int) -> int:
60
61
  return count
61
62
 
62
63
 
63
- def _threshold_and_pack(dct_low: np.ndarray) -> int:
64
+ def _threshold_and_pack(dct_low: NDArray[np.float64]) -> int:
64
65
  """
65
66
  Convert a low-frequency DCT matrix into a binary hash.
66
67
 
@@ -11,11 +11,10 @@ import logging
11
11
  from datetime import datetime
12
12
  from logging.handlers import TimedRotatingFileHandler
13
13
  from pathlib import Path
14
- from typing import Literal
15
14
 
16
- from .constants import LOGGER_DIR, LOGGER_NAME
15
+ from novel_downloader.models import LogLevel
17
16
 
18
- LogLevel = Literal["DEBUG", "INFO", "WARNING", "ERROR"]
17
+ from .constants import LOGGER_DIR, LOGGER_NAME
19
18
 
20
19
  LOG_LEVELS: dict[LogLevel, int] = {
21
20
  "DEBUG": logging.DEBUG,
@@ -16,7 +16,7 @@ from urllib.parse import unquote, urlparse
16
16
  import requests
17
17
 
18
18
  from .constants import DEFAULT_HEADERS, DEFAULT_IMAGE_SUFFIX
19
- from .file_utils.io import _get_non_conflicting_path, _write_file, read_binary_file
19
+ from .file_utils.io import _get_non_conflicting_path, _write_file
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
 
@@ -84,28 +84,29 @@ def image_url_to_filename(url: str) -> str:
84
84
  return filename
85
85
 
86
86
 
87
- def download_image_as_bytes(
87
+ def download_image(
88
88
  url: str,
89
89
  target_folder: str | Path | None = None,
90
+ target_name: str | None = None,
90
91
  *,
91
92
  timeout: int = 10,
92
93
  retries: int = 3,
93
94
  backoff: float = 0.5,
95
+ headers: dict[str, str] | None = None,
94
96
  on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
95
- ) -> bytes | None:
97
+ ) -> Path | None:
96
98
  """
97
- Download an image from a given URL and return its content as bytes.
98
-
99
- If on_exist='skip' and the file already exists, it will be read from disk
100
- instead of being downloaded again.
99
+ Download an image from `url` and save it to `target_folder`, returning the Path.
100
+ Can override the filename via `target_name`.
101
101
 
102
102
  :param url: Image URL. Can start with 'http', '//', or without protocol.
103
- :param target_folder: Optional folder to save the image (str or Path).
103
+ :param target_folder: Directory to save into (defaults to cwd).
104
+ :param target_name: Optional filename (with or without extension).
104
105
  :param timeout: Request timeout in seconds.
105
106
  :param retries: Number of retry attempts.
106
107
  :param backoff: Base delay between retries (exponential backoff).
107
108
  :param on_exist: What to do if file exists: 'overwrite', 'skip', or 'rename'.
108
- :return: Image content as bytes, or None if failed.
109
+ :return: Path to the saved image, or `None` on any failure.
109
110
  """
110
111
  # Normalize URL
111
112
  if url.startswith("//"):
@@ -113,42 +114,55 @@ def download_image_as_bytes(
113
114
  elif not url.startswith("http"):
114
115
  url = "https://" + url
115
116
 
116
- save_path = None
117
- if target_folder:
118
- target_folder = Path(target_folder)
119
- filename = image_url_to_filename(url)
120
- save_path = target_folder / filename
121
-
122
- if on_exist == "skip" and save_path.exists():
123
- logger.info(
124
- "[image] '%s' exists, skipping download and reading from disk.",
125
- save_path,
126
- )
127
- return read_binary_file(save_path)
117
+ folder = Path(target_folder) if target_folder else Path.cwd()
118
+ folder.mkdir(parents=True, exist_ok=True)
119
+
120
+ if target_name:
121
+ name = target_name
122
+ if not Path(name).suffix:
123
+ # infer ext from URL-derived name
124
+ name += Path(image_url_to_filename(url)).suffix
125
+ else:
126
+ name = image_url_to_filename(url)
127
+ save_path = folder / name
128
+
129
+ # Handle existing file
130
+ if save_path.exists():
131
+ if on_exist == "skip":
132
+ logger.debug("Skipping download; file exists: %s", save_path)
133
+ return save_path
134
+ if on_exist == "rename":
135
+ save_path = _get_non_conflicting_path(save_path)
128
136
 
129
137
  # Proceed with download
130
- response = http_get_with_retry(
138
+ resp = http_get_with_retry(
131
139
  url,
132
140
  retries=retries,
133
141
  timeout=timeout,
134
142
  backoff=backoff,
135
- headers=DEFAULT_HEADERS,
143
+ headers=headers or DEFAULT_HEADERS,
136
144
  stream=False,
137
145
  )
138
146
 
139
- if response and response.ok:
140
- content = response.content
141
-
142
- if save_path:
143
- _write_file(
144
- content=content,
145
- filepath=save_path,
146
- mode="wb",
147
- on_exist=on_exist,
148
- )
149
-
150
- return content
147
+ if not (resp and resp.ok):
148
+ logger.warning(
149
+ "Failed to download %s (status=%s)",
150
+ url,
151
+ getattr(resp, "status_code", None),
152
+ )
153
+ return None
151
154
 
155
+ # Write to disk
156
+ try:
157
+ _write_file(
158
+ content=resp.content,
159
+ filepath=save_path,
160
+ mode="wb",
161
+ on_exist=on_exist,
162
+ )
163
+ return save_path
164
+ except Exception:
165
+ logger.exception("Error saving image to %s", save_path)
152
166
  return None
153
167
 
154
168
 
@@ -191,7 +205,7 @@ def download_font_file(
191
205
 
192
206
  # If skip and file exists -> return immediately
193
207
  if on_exist == "skip" and font_path.exists():
194
- logger.info("[font] File exists, skipping download: %s", font_path)
208
+ logger.debug("[font] File exists, skipping download: %s", font_path)
195
209
  return font_path
196
210
 
197
211
  # Retry download with exponential backoff
@@ -214,7 +228,7 @@ def download_font_file(
214
228
  if chunk:
215
229
  f.write(chunk)
216
230
 
217
- logger.info("[font] Font saved to: %s", font_path)
231
+ logger.debug("[font] Font saved to: %s", font_path)
218
232
  return font_path
219
233
 
220
234
  except Exception as e:
@@ -258,7 +272,7 @@ def download_js_file(
258
272
  save_path = target_folder / filename
259
273
 
260
274
  if on_exist == "skip" and save_path.exists():
261
- logger.info("[js] File exists, skipping download: %s", save_path)
275
+ logger.debug("[js] File exists, skipping download: %s", save_path)
262
276
  return save_path
263
277
 
264
278
  response = http_get_with_retry(
@@ -278,7 +292,7 @@ def download_js_file(
278
292
 
279
293
  try:
280
294
  _write_file(content=content, filepath=save_path, mode="wb")
281
- logger.info("[js] JS file saved to: %s", save_path)
295
+ logger.debug("[js] JS file saved to: %s", save_path)
282
296
  return save_path
283
297
  except Exception as e:
284
298
  logger.error("[js] Error writing JS to disk: %s", e)
@@ -6,13 +6,17 @@ novel_downloader.utils.text_utils.chapter_formatting
6
6
  Format chapter content with title, paragraph blocks, and optional author notes.
7
7
  """
8
8
 
9
+ import re
10
+
11
+ _IMG_TAG_RE = re.compile(r"<img[^>]*>")
12
+
9
13
 
10
14
  def format_chapter(title: str, paragraphs: str, author_say: str | None = None) -> str:
11
15
  """
12
16
  Build a formatted chapter string with title, paragraphs, and optional author note.
13
17
 
14
18
  :param title: The chapter title.
15
- :param paragraphs: Raw multiline string; lines are treated as paragraphs.
19
+ :param paragraphs: Raw multi-line string; lines are treated as paragraphs.
16
20
  :param author_say: Optional author comment to append at the end.
17
21
  :return: A single string where title, paragraphs, and author note
18
22
  are separated by blank lines.
@@ -20,6 +24,7 @@ def format_chapter(title: str, paragraphs: str, author_say: str | None = None) -
20
24
  parts: list[str] = [title.strip()]
21
25
 
22
26
  # add each nonempty paragraph line
27
+ paragraphs = _IMG_TAG_RE.sub("", paragraphs)
23
28
  for ln in paragraphs.splitlines():
24
29
  line = ln.strip()
25
30
  if line:
@@ -18,7 +18,7 @@ def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
18
18
 
19
19
  :param text: The input string, possibly containing obfuscated font chars.
20
20
  :param font_map: A dict mapping obfuscated chars to real chars.
21
- :return: The deobfuscated text.
21
+ :return: The de-obfuscated text.
22
22
  """
23
23
  return "".join(font_map.get(ch, ch) for ch in text)
24
24
 
@@ -37,7 +37,7 @@ def clean_chapter_title(title: str) -> str:
37
37
 
38
38
  def is_promotional_line(line: str) -> bool:
39
39
  """
40
- Check if a line of text likely contains promotional or adlike content.
40
+ Check if a line of text likely contains promotional or ad-like content.
41
41
 
42
42
  :param line: A single line of text.
43
43
  :return: True if it contains promo keywords or a '###k' vote count pattern.
@@ -106,9 +106,9 @@ def calculate_time_difference(
106
106
  """
107
107
  Calculate the difference between two datetime values.
108
108
 
109
- :param from_time_str: Datetime string "YYYY-MM-DD HH:MM:SS" for the start.
109
+ :param from_time_str: Date-time string "YYYY-MM-DD HH:MM:SS" for the start.
110
110
  :param tz_str: Timezone of from_time_str, e.g. 'UTC+8'. Defaults to 'UTC'.
111
- :param to_time_str: Optional datetime string for the end; if None, uses now().
111
+ :param to_time_str: Optional date-time string for the end; if None, uses now().
112
112
  :param to_tz_str: Timezone of to_time_str. Defaults to 'UTC'.
113
113
  :return: Tuple (days, hours, minutes, seconds).
114
114
  """
@@ -138,7 +138,7 @@ def calculate_time_difference(
138
138
 
139
139
  except Exception as e:
140
140
  logger.warning("[time] Failed to calculate time difference: %s", e)
141
- return 999, 23, 59, 59
141
+ return 0, 0, 0, 0
142
142
 
143
143
 
144
144
  __all__ = [
@@ -56,7 +56,7 @@ def sleep_with_random_delay(
56
56
  if max_sleep is not None:
57
57
  duration = min(duration, max_sleep)
58
58
 
59
- logger.info("[time] Sleeping for %.2f seconds", duration)
59
+ logger.debug("[time] Sleeping for %.2f seconds", duration)
60
60
  time.sleep(duration)
61
61
  return
62
62
 
@@ -82,7 +82,7 @@ async def async_sleep_with_random_delay(
82
82
  :param mul_spread: Maximum multiplier factor for base; drawn from [1.0, mul_spread].
83
83
  :param max_sleep: Optional upper limit for the final sleep duration.
84
84
  """
85
- if base < 0 or add_spread < 0 or mul_spread < 0:
85
+ if base < 0 or add_spread < 0 or mul_spread < 1.0:
86
86
  logger.warning(
87
87
  "[async sleep] Invalid parameters: base=%s, add_spread=%s, mul_spread=%s",
88
88
  base,
@@ -98,7 +98,7 @@ async def async_sleep_with_random_delay(
98
98
  if max_sleep is not None:
99
99
  duration = min(duration, max_sleep)
100
100
 
101
- logger.info("[async time] Sleeping for %.2f seconds", duration)
101
+ logger.debug("[async time] Sleeping for %.2f seconds", duration)
102
102
  await asyncio.sleep(duration)
103
103
 
104
104