novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -6,295 +6,154 @@ novel_downloader.utils.network
6
6
  Utilities for handling HTTP requests and downloading remote resources.
7
7
  """
8
8
 
9
+ __all__ = ["download"]
10
+
9
11
  import logging
10
- import random
11
- import time
12
12
  from pathlib import Path
13
13
  from typing import Literal
14
14
  from urllib.parse import unquote, urlparse
15
15
 
16
16
  import requests
17
+ from requests.adapters import HTTPAdapter
18
+ from urllib3.util.retry import Retry
17
19
 
18
- from .constants import DEFAULT_HEADERS, DEFAULT_IMAGE_SUFFIX
19
- from .file_utils.io import _get_non_conflicting_path, _write_file
20
+ from .constants import DEFAULT_HEADERS
21
+ from .file_utils import sanitize_filename
22
+ from .file_utils.io import _get_non_conflicting_path, write_file
20
23
 
21
24
  logger = logging.getLogger(__name__)
22
-
23
25
  _DEFAULT_CHUNK_SIZE = 8192 # 8KB per chunk for streaming downloads
24
26
 
25
27
 
26
- def http_get_with_retry(
27
- url: str,
28
- *,
29
- retries: int = 3,
30
- timeout: int = 10,
31
- backoff: float = 0.5,
32
- headers: dict[str, str] | None = None,
33
- stream: bool = False,
34
- ) -> requests.Response | None:
28
+ def _normalize_url(url: str) -> str:
35
29
  """
36
- Perform a GET request with retry support.
37
-
38
- :param url: URL to request.
39
- :param retries: Number of retry attempts.
40
- :param timeout: Timeout in seconds per request.
41
- :param backoff: Base backoff delay between retries.
42
- :param headers: Optional HTTP headers.
43
- :param stream: Whether to stream the response.
44
- :return: Response object if successful, else None.
30
+ Ensure URL has scheme, defaulting to https:// if missing.
45
31
  """
46
- for attempt in range(1, retries + 1):
47
- try:
48
- response = requests.get(
49
- url, timeout=timeout, headers=headers, stream=stream
50
- )
51
- response.raise_for_status()
52
- return response
53
- except requests.RequestException as e:
54
- logger.warning("[http] Attempt %s/%s failed: %s", attempt, retries, e)
55
- if attempt < retries:
56
- sleep_time = backoff * (2 ** (attempt - 1)) + random.uniform(0, 0.1)
57
- time.sleep(sleep_time)
58
- except Exception as e:
59
- logger.error("[http] Unexpected error: %s", e)
60
- break
61
-
62
- logger.error("[http] Failed after %s attempts: %s", retries, url)
63
- return None
64
-
32
+ if url.startswith("//"):
33
+ return "https:" + url
34
+ if not url.startswith(("http://", "https://")):
35
+ return "https://" + url
36
+ return url
65
37
 
66
- def image_url_to_filename(url: str) -> str:
67
- """
68
- Parse and sanitize a image filename from a URL.
69
- If no filename or suffix exists, fallback to default name and extension.
70
38
 
71
- :param url: URL string
72
- :return: Safe filename string
73
- """
39
+ def _build_filepath(
40
+ folder: Path,
41
+ url: str,
42
+ filename: str | None,
43
+ default_suffix: str,
44
+ on_exist: Literal["overwrite", "skip", "rename"],
45
+ ) -> Path:
74
46
  parsed_url = urlparse(url)
75
- path = unquote(parsed_url.path)
76
- filename = Path(path).name
77
-
78
- if not filename:
79
- filename = "image"
80
-
81
- if not Path(filename).suffix:
82
- filename += DEFAULT_IMAGE_SUFFIX
83
-
84
- return filename
47
+ url_path = Path(unquote(parsed_url.path))
48
+
49
+ raw_name = filename or url_path.name or "unnamed"
50
+ name = sanitize_filename(raw_name)
51
+ suffix = default_suffix or url_path.suffix
52
+ if suffix and not suffix.startswith("."):
53
+ suffix = "." + suffix
54
+
55
+ file_path = folder / name
56
+ if not file_path.suffix and suffix:
57
+ file_path = file_path.with_suffix(suffix)
58
+
59
+ if on_exist == "rename":
60
+ file_path = _get_non_conflicting_path(file_path)
61
+ return file_path
62
+
63
+
64
+ def _make_session(
65
+ retries: int,
66
+ backoff: float,
67
+ headers: dict[str, str] | None,
68
+ ) -> requests.Session:
69
+ session = requests.Session()
70
+ session.headers.update(headers or DEFAULT_HEADERS)
71
+
72
+ retry = Retry(
73
+ total=retries,
74
+ backoff_factor=backoff,
75
+ status_forcelist=[429, 500, 502, 503, 504],
76
+ allowed_methods={"GET", "HEAD", "OPTIONS"},
77
+ )
78
+ adapter = HTTPAdapter(max_retries=retry)
79
+ session.mount("https://", adapter)
80
+ session.mount("http://", adapter)
81
+ return session
85
82
 
86
83
 
87
- def download_image(
84
+ def download(
88
85
  url: str,
89
- target_folder: str | Path | None = None,
90
- target_name: str | None = None,
86
+ target_dir: str | Path | None = None,
87
+ filename: str | None = None,
91
88
  *,
92
89
  timeout: int = 10,
93
90
  retries: int = 3,
94
91
  backoff: float = 0.5,
95
92
  headers: dict[str, str] | None = None,
93
+ stream: bool = False,
96
94
  on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
95
+ default_suffix: str = "",
96
+ chunk_size: int = _DEFAULT_CHUNK_SIZE,
97
97
  ) -> Path | None:
98
98
  """
99
- Download an image from `url` and save it to `target_folder`, returning the Path.
100
- Can override the filename via `target_name`.
101
-
102
- :param url: Image URL. Can start with 'http', '//', or without protocol.
103
- :param target_folder: Directory to save into (defaults to cwd).
104
- :param target_name: Optional filename (with or without extension).
105
- :param timeout: Request timeout in seconds.
106
- :param retries: Number of retry attempts.
107
- :param backoff: Base delay between retries (exponential backoff).
108
- :param on_exist: What to do if file exists: 'overwrite', 'skip', or 'rename'.
109
- :return: Path to the saved image, or `None` on any failure.
99
+ Download a URL to disk, with retries, optional rename/skip, and cleanup on failure.
100
+
101
+ :param url: the file URL.
102
+ :param target_dir: directory to save into.
103
+ :param filename: override the basename (else from URL path).
104
+ :param timeout: per-request timeout.
105
+ :param retries: GET retry count.
106
+ :param backoff: exponential backoff base.
107
+ :param headers: optional headers.
108
+ :param stream: Whether to stream the response.
109
+ :param on_exist: if 'skip', return filepath; if 'rename', auto-rename.
110
+ :param default_suffix: used if no suffix in URL or filename.
111
+ :param chunk_size: streaming chunk size.
112
+ :return: path to the downloaded file.
110
113
  """
111
- # Normalize URL
112
- if url.startswith("//"):
113
- url = "https:" + url
114
- elif not url.startswith("http"):
115
- url = "https://" + url
114
+ url = _normalize_url(url)
116
115
 
117
- folder = Path(target_folder) if target_folder else Path.cwd()
116
+ folder = Path(target_dir) if target_dir else Path.cwd()
118
117
  folder.mkdir(parents=True, exist_ok=True)
119
118
 
120
- if target_name:
121
- name = target_name
122
- if not Path(name).suffix:
123
- # infer ext from URL-derived name
124
- name += Path(image_url_to_filename(url)).suffix
125
- else:
126
- name = image_url_to_filename(url)
127
- save_path = folder / name
128
-
129
- # Handle existing file
130
- if save_path.exists():
131
- if on_exist == "skip":
132
- logger.debug("Skipping download; file exists: %s", save_path)
133
- return save_path
134
- if on_exist == "rename":
135
- save_path = _get_non_conflicting_path(save_path)
136
-
137
- # Proceed with download
138
- resp = http_get_with_retry(
139
- url,
140
- retries=retries,
141
- timeout=timeout,
142
- backoff=backoff,
143
- headers=headers or DEFAULT_HEADERS,
144
- stream=False,
145
- )
146
-
147
- if not (resp and resp.ok):
148
- logger.warning(
149
- "Failed to download %s (status=%s)",
150
- url,
151
- getattr(resp, "status_code", None),
152
- )
153
- return None
154
-
155
- # Write to disk
156
- try:
157
- _write_file(
158
- content=resp.content,
159
- filepath=save_path,
160
- mode="wb",
161
- on_exist=on_exist,
162
- )
163
- return save_path
164
- except Exception:
165
- logger.exception("Error saving image to %s", save_path)
166
- return None
167
-
168
-
169
- def download_font_file(
170
- url: str,
171
- target_folder: str | Path,
172
- *,
173
- timeout: int = 10,
174
- retries: int = 3,
175
- backoff: float = 0.5,
176
- on_exist: Literal["overwrite", "skip", "rename"] = "skip",
177
- ) -> Path | None:
178
- """
179
- Download a font file from a URL and save it locally with retry and overwrite control
180
-
181
- :param url: Fully-qualified font file URL.
182
- :param target_folder: Local folder to save the font file.
183
- :param timeout: Timeout for each request (in seconds).
184
- :param retries: Number of retry attempts.
185
- :param backoff: Base backoff time between retries (in seconds).
186
- :param on_exist: File conflict strategy: 'overwrite', 'skip', or 'rename'.
187
- :return: Path to the saved font file, or None if failed.
188
- """
189
- # Validate and parse URL
190
- parsed = urlparse(url)
191
- if not parsed.scheme or not parsed.netloc:
192
- logger.warning("[font] Invalid URL: %s", url)
193
- return None
194
-
195
- # Determine filename
196
- filename = Path(unquote(parsed.path)).name
197
- if not filename:
198
- logger.warning("[font] Could not extract filename from URL: %s", url)
199
- return None
200
-
201
- # Resolve save path
202
- target_folder = Path(target_folder)
203
- target_folder.mkdir(parents=True, exist_ok=True)
204
- font_path = target_folder / filename
205
-
206
- # If skip and file exists -> return immediately
207
- if on_exist == "skip" and font_path.exists():
208
- logger.debug("[font] File exists, skipping download: %s", font_path)
209
- return font_path
210
-
211
- # Retry download with exponential backoff
212
- response = http_get_with_retry(
119
+ save_path = _build_filepath(
120
+ folder,
213
121
  url,
214
- retries=retries,
215
- timeout=timeout,
216
- backoff=backoff,
217
- headers=DEFAULT_HEADERS,
218
- stream=True,
122
+ filename,
123
+ default_suffix,
124
+ on_exist,
219
125
  )
220
126
 
221
- if response:
222
- try:
223
- if on_exist == "rename":
224
- font_path = _get_non_conflicting_path(font_path)
225
-
226
- with open(font_path, "wb") as f:
227
- for chunk in response.iter_content(chunk_size=_DEFAULT_CHUNK_SIZE):
228
- if chunk:
229
- f.write(chunk)
230
-
231
- logger.debug("[font] Font saved to: %s", font_path)
232
- return font_path
233
-
234
- except Exception as e:
235
- logger.error("[font] Error writing font to disk: %s", e)
236
-
237
- return None
238
-
239
-
240
- def download_js_file(
241
- url: str,
242
- target_folder: str | Path,
243
- *,
244
- timeout: int = 10,
245
- retries: int = 3,
246
- backoff: float = 0.5,
247
- on_exist: Literal["overwrite", "skip", "rename"] = "skip",
248
- ) -> Path | None:
249
- """
250
- Download a JavaScript (.js) file from a URL and save it locally.
251
-
252
- :param url: Fully-qualified JS file URL.
253
- :param target_folder: Local folder to save the JS file.
254
- :param timeout: Timeout for each request (in seconds).
255
- :param retries: Number of retry attempts.
256
- :param backoff: Base backoff time between retries (in seconds).
257
- :param on_exist: File conflict strategy: 'overwrite', 'skip', or 'rename'.
258
- :return: Path to the saved JS file, or None if failed.
259
- """
260
- parsed = urlparse(url)
261
- if not parsed.scheme or not parsed.netloc:
262
- logger.warning("[js] Invalid URL: %s", url)
263
- return None
264
-
265
- # Determine filename
266
- filename = Path(unquote(parsed.path)).name
267
- if not filename.endswith(".js"):
268
- filename += ".js"
269
-
270
- target_folder = Path(target_folder)
271
- target_folder.mkdir(parents=True, exist_ok=True)
272
- save_path = target_folder / filename
273
-
274
- if on_exist == "skip" and save_path.exists():
275
- logger.debug("[js] File exists, skipping download: %s", save_path)
127
+ # Handle existing file
128
+ if save_path.exists() and on_exist == "skip":
129
+ logger.debug("Skipping download; file exists: %s", save_path)
276
130
  return save_path
277
131
 
278
- response = http_get_with_retry(
279
- url,
280
- retries=retries,
281
- timeout=timeout,
282
- backoff=backoff,
283
- headers=DEFAULT_HEADERS,
284
- stream=False,
285
- )
286
-
287
- if response and response.ok:
288
- content = response.content
289
-
290
- if on_exist == "rename":
291
- save_path = _get_non_conflicting_path(save_path)
292
-
132
+ with _make_session(retries, backoff, headers) as session:
293
133
  try:
294
- _write_file(content=content, filepath=save_path, mode="wb")
295
- logger.debug("[js] JS file saved to: %s", save_path)
296
- return save_path
134
+ resp = session.get(url, timeout=timeout, stream=stream)
135
+ resp.raise_for_status()
297
136
  except Exception as e:
298
- logger.error("[js] Error writing JS to disk: %s", e)
299
-
137
+ logger.warning("[download] request failed: %s", e)
138
+ return None
139
+
140
+ # Write to disk
141
+ if stream:
142
+ try:
143
+ with open(save_path, "wb") as f:
144
+ for chunk in resp.iter_content(chunk_size=chunk_size):
145
+ if chunk:
146
+ f.write(chunk)
147
+ return save_path
148
+ except Exception as e:
149
+ logger.warning("[download] write failed: %s", e)
150
+ save_path.unlink(missing_ok=True)
151
+ return None
152
+ else:
153
+ return write_file(
154
+ content=resp.content,
155
+ filepath=save_path,
156
+ write_mode="wb",
157
+ on_exist=on_exist,
158
+ )
300
159
  return None
@@ -2,23 +2,22 @@
2
2
  """
3
3
  novel_downloader.utils.state
4
4
  ----------------------------
5
- State management for user preferences and runtime flags.
6
5
 
7
- Supported sections:
8
- - general: global preferences (e.g. language)
9
- - sites: per-site flags & data (e.g. manual_login, cookies)
6
+ State management for user preferences and runtime flags.
10
7
  """
8
+
9
+ __all__ = ["StateManager", "state_mgr"]
10
+
11
11
  import json
12
12
  from pathlib import Path
13
13
  from typing import Any
14
14
 
15
- from .constants import STATE_FILE
15
+ from novel_downloader.utils.constants import STATE_FILE
16
16
 
17
17
 
18
18
  class StateManager:
19
19
  """
20
20
  Manages persistent state for user preferences and runtime flags.
21
- Stores data in JSON at STATE_FILE.
22
21
  """
23
22
 
24
23
  def __init__(self, path: Path = STATE_FILE) -> None:
@@ -49,26 +48,6 @@ class StateManager:
49
48
  content = json.dumps(self._data, ensure_ascii=False, indent=2)
50
49
  self._path.write_text(content, encoding="utf-8")
51
50
 
52
- def _parse_cookie_string(self, cookie_str: str) -> dict[str, str]:
53
- """
54
- Parse a Cookie header string into a dict.
55
-
56
- :param cookie_str: e.g. 'k1=v1; k2=v2; k3'
57
- :return: mapping cookie names to values (missing '=' yields empty string)
58
- :rtype: Dict[str, str]
59
- """
60
- cookies: dict[str, str] = {}
61
- for item in cookie_str.split(";"):
62
- item = item.strip()
63
- if not item:
64
- continue
65
- if "=" in item:
66
- k, v = item.split("=", 1)
67
- cookies[k.strip()] = v.strip()
68
- else:
69
- cookies[item] = ""
70
- return cookies
71
-
72
51
  def get_language(self) -> str:
73
52
  """
74
53
  Load the user's language preference, defaulting to 'zh'.
@@ -87,69 +66,5 @@ class StateManager:
87
66
  self._data.setdefault("general", {})["lang"] = lang
88
67
  self._save()
89
68
 
90
- def get_manual_login_flag(self, site: str) -> bool:
91
- """
92
- Retrieve the manual login requirement flag for a specific site.
93
-
94
- :param site: Site identifier (e.g. 'qidian', 'bqg')
95
- :return: True if manual login is required (defaults to True)
96
- """
97
- val = self._data.get("sites", {}).get(site, {}).get("manual_login", True)
98
- return bool(val)
99
-
100
- def set_manual_login_flag(self, site: str, flag: bool) -> None:
101
- """
102
- Set the 'manual_login' flag for a specific site.
103
-
104
- :param flag: True if the site requires manual login.
105
- :param site: Site identifier (e.g. 'qidian', 'bqg')
106
- """
107
- sites = self._data.setdefault("sites", {})
108
- site_data = sites.setdefault(site, {})
109
- site_data["manual_login"] = flag
110
- self._save()
111
-
112
- def get_cookies(self, site: str) -> dict[str, str]:
113
- """
114
- Retrieve the persisted cookies for a specific site.
115
-
116
- :param site: Site identifier (e.g. 'qidian', 'bqg')
117
- :return: A dict mapping cookie names to values. Returns empty dict if not set.
118
- """
119
- cookies = self._data.get("sites", {}).get(site, {}).get("cookies", {})
120
- return {str(k): str(v) for k, v in cookies.items()}
121
-
122
- def set_cookies(self, site: str, cookies: str | dict[str, str]) -> None:
123
- """
124
- Persist (overwrite) the cookies for a specific site.
125
-
126
- :param site: Site identifier (e.g. 'qidian', 'bqg')
127
- :param cookies: Either a dict mapping cookie names to values,
128
- or a string (JSON or 'k=v; k2=v2') to be parsed.
129
- :raises TypeError: if cookies is neither str nor dict
130
- """
131
- # 1) normalize to dict
132
- if isinstance(cookies, dict):
133
- cookies_dict = cookies
134
- elif isinstance(cookies, str):
135
- # try JSON first
136
- try:
137
- parsed = json.loads(cookies)
138
- if isinstance(parsed, dict):
139
- cookies_dict = parsed # OK!
140
- else:
141
- raise ValueError
142
- except Exception:
143
- # fallback to "k=v; k2=v2" format
144
- cookies_dict = self._parse_cookie_string(cookies)
145
- else:
146
- raise TypeError("`cookies` must be a dict or a str")
147
-
148
- # 2) persist
149
- sites = self._data.setdefault("sites", {})
150
- site_data = sites.setdefault(site, {})
151
- site_data["cookies"] = {str(k): str(v) for k, v in cookies_dict.items()}
152
- self._save()
153
-
154
69
 
155
70
  state_mgr = StateManager()
@@ -3,31 +3,26 @@
3
3
  novel_downloader.utils.text_utils
4
4
  ---------------------------------
5
5
 
6
- Utility modules for text formatting, font mapping, cleaning, and diff display.
7
-
8
- Submodules:
9
- - font_mapping: Replace obfuscated characters using font maps
10
- - chapter_formatting: Build structured chapter strings from raw content
11
- - text_cleaning: Remove promo text and check for spam lines
12
- - diff_display: Generate inline diffs with aligned character markers
6
+ Utility modules for text formatting, cleaning, and diff display.
13
7
  """
14
8
 
15
- from .chapter_formatting import format_chapter
16
- from .diff_display import diff_inline_display
17
- from .font_mapping import apply_font_mapping
18
- from .text_cleaning import (
19
- clean_chapter_title,
20
- content_prefix,
21
- is_promotional_line,
22
- truncate_half_lines,
23
- )
24
-
25
9
  __all__ = [
26
- "apply_font_mapping",
27
- "format_chapter",
28
- "clean_chapter_title",
29
- "is_promotional_line",
10
+ "TextCleaner",
11
+ "get_cleaner",
30
12
  "content_prefix",
31
13
  "truncate_half_lines",
14
+ "chinese_to_arabic",
15
+ "arabic_to_chinese",
32
16
  "diff_inline_display",
33
17
  ]
18
+
19
+ from .diff_display import diff_inline_display
20
+ from .numeric_conversion import (
21
+ arabic_to_chinese,
22
+ chinese_to_arabic,
23
+ )
24
+ from .text_cleaner import TextCleaner, get_cleaner
25
+ from .truncate_utils import (
26
+ content_prefix,
27
+ truncate_half_lines,
28
+ )
@@ -6,6 +6,8 @@ novel_downloader.utils.text_utils.diff_display
6
6
  Generate inline character-level diff between two strings with visual markers.
7
7
  """
8
8
 
9
+ __all__ = ["diff_inline_display"]
10
+
9
11
  import difflib
10
12
  import unicodedata
11
13
 
@@ -18,10 +20,10 @@ def _char_width_space(
18
20
 
19
21
  Fullwidth (F) or Wide (W) characters map to `asian_char`, else `normal_char`.
20
22
 
21
- :param c: A single character.
23
+ :param c: A single character.
22
24
  :param normal_char: Replacement for narrow chars (default U+0020).
23
- :param asian_char: Replacement for wide chars (default U+3000).
24
- :return: The appropriate space character.
25
+ :param asian_char: Replacement for wide chars (default U+3000).
26
+ :return: The appropriate space character.
25
27
  """
26
28
  return asian_char if unicodedata.east_asian_width(c) in ("F", "W") else normal_char
27
29
 
@@ -33,7 +35,7 @@ def diff_inline_display(old_str: str, new_str: str) -> str:
33
35
 
34
36
  :param old_str: Original string (prefixed '-' will be trimmed).
35
37
  :param new_str: Modified string (prefixed '+' will be trimmed).
36
- :return: A multiline diff display with aligned markers.
38
+ :return: A multiline diff display with aligned markers.
37
39
  """
38
40
  space_1 = " "
39
41
  space_2 = "\u3000"
@@ -67,8 +69,3 @@ def diff_inline_display(old_str: str, new_str: str) -> str:
67
69
  marker_s2 += "".join(_char_width_space(c, mark_1, mark_2) for c in s2_seg)
68
70
  output_str = f"-{s1}\n {marker_s1}\n+{s2}\n {marker_s2}"
69
71
  return output_str
70
-
71
-
72
- __all__ = [
73
- "diff_inline_display",
74
- ]